예제 #1
0
def getReverseOneHotDict(dict):
    inverted_dict = {str(value): key for key, value in dict.items()}
    return inverted_dict


print("started")
model = p.load("vecTrained")
encoderDict = p.load("oneHotDict")
reverseEncoderDict = getReverseOneHotDict(encoderDict)

num_samples = p.load('numSamples')
targetLine = math.floor(random.random() * num_samples)

seed_line = 0
myfile = open('training_data.csv', 'r')
reader = csv.reader(myfile, delimiter=',')
row = []
for i in range(targetLine):
    row = next(reader)
    #print(i)
song = convertW2V(row[1:], window_size)
for i in range(500):
    tempSong = song[-100:]
    model.reset_states()
    outputVec = model.predict(np.array(tempSong).reshape(1, 100, 20))
    #print(outputHot)

    song.append(closestVec(outputVec[0]))
p.save(song[99:], 'song')
예제 #2
0
import ezPickle as p
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
series_stats = pd.read_csv('../data/summary_stats.csv')
outputs = p.load('output_list')
for i in range(len(outputs)):
    outputs[i] = outputs[i].index(1)
clf = RandomForestClassifier(n_estimators=500, max_depth=7, random_state=0)
clf.fit(series_stats.values[0:3000], outputs[0:3000])
p.save(clf, 'rf_clf')
print(clf.score(series_stats.values[3000:], outputs[3000:]))
예제 #3
0
import keras, math, os, glob
import pandas as pd
import numpy as np
import ezPickle as p

from scipy.misc import imread

train_curated = pd.read_csv('train_curated.csv')
files = train_curated['fname']
labels = train_curated['labels']

labels = [item for label_list in labels for item in label_list.split(',')]
le = preprocessing.LabelEncoder()
le.fit(labels)
print(len(le.classes_.tolist()))
p.save(le, 'le')
data = []
max_len = 0
os.environ["CUDA_VISIBLE_DEVICES"] = "1,2,3"

for file_name in files:
    #print(file_name)

    spectrogram = imread("mel_spec_curated/" + file_name + ".png")
    #print("\t sampled")
    #print('\t',spectrogram.shape)
    if spectrogram.shape[1] > max_len:
        max_len = spectrogram.shape[1]
        max_shape = spectrogram.shape
print(max_len)
print(max_shape)
예제 #4
0
    model.add(
        LSTM(256, return_sequences=True, input_shape=p.load('inputShape')))
    model.add(Dropout(.2))
    model.add(LSTM(128, return_sequences=True))
    model.add(Dropout(.2))
    model.add(LSTM(128, return_sequences=False))
    model.add(Dropout(.2))
    model.add(Dense(size, activation='sigmoid'))
    return model


with tf.device("/cpu:0"):
    model = create_model()

# make the model parallel
p_model = multi_gpu_model(model, gpus=3)
rms = RMSprop()
p_model.compile(loss='mean_squared_error',
                optimizer=rms,
                metrics=['mean_squared_error'])

print("Fitting")
num_samples = p.load('numSamples')
p_model.fit_generator(songBatchGenerator(batch_size),
                      epochs=50,
                      verbose=1,
                      shuffle=False,
                      steps_per_epoch=math.ceil(num_samples / batch_size))
p.save(p_model, 'vecTrained')
print("Saved")
예제 #5
0
from scipy.misc import imsave

df = pd.DataFrame()
print("Please enter name of input folder")
in_folder = input()
print("Please enter name of output folder")
out_folder = input()
le = p.load('le')
data = []
lengths = []
count = 0
files = glob.glob(in_folder + "/*")
for file_name in files:
    print(file_name)
    rate, data = wavfile.read(file_name)
    print("\t read")
    print('\t', data.shape, " Frames at ", rate)
    print('\t', count / len(files))
    #new_data = resample(data,  round(data.shape[0]/rate * new_rate))
    spectrogram = librosa.feature.melspectrogram(y=data.astype(float), sr=rate)
    print("\t Converted")
    print('\t', spectrogram.shape)
    imsave(out_folder + '/' + file_name[file_name.index('/'):] + ".png",
           spectrogram)
    print("\twrote")
    lengths.append(spectrogram.shape[1])
    count += 1
series = pd.Series(lengths)
p.save(series, 'series')
print(series.describe())
예제 #6
0
from gensim.models import Word2Vec
import ezPickle as p
size = 20

songs = p.load('songList')

w2v = Word2Vec(sg=1, seed=1, size=size, window=8, min_count=0, workers=2)
w2v.build_vocab(songs)
w2v.train(songs, total_examples=w2v.corpus_count, epochs=100)
p.save(w2v, 'w2v')
p.save(size, "size")
print(len(w2v.wv.vocab))
예제 #7
0
    if len(song) > maxLen:
        maxLen = len(song)
print(maxLen)
print(len(songList))
		
inputData = []
outputData = []
window_size = 100
print("Writing Data")
import csv
data_file = open('training_data.csv',mode='w')
file_writer = csv.writer(data_file, delimiter=',')
num_samples = 0
for song in [item for item in songList if len(item) > window_size]:
	for i in range(0,len(song)-window_size):
		file_writer.writerow([song[i+window_size]] + song[i:i+window_size])
		num_samples+=1

print("done")
inputData = np.array(inputData).reshape(len(inputData), maxLen, 20)
inputShape = (window_size, size)
outputSize = len(encoderDict.keys())
print("Saving")
p.save(encoderDict,'oneHotDict')
p.save(inputShape, 'inputShape')
p.save(outputSize,'outputSize')
p.save(num_samples,'numSamples')
p.save(window_size,'window_size')


예제 #8
0
#split the training data into batches
import ezPickle as p
import math
print("loading data")
inputData = p.load('inputData')
outputData = p.load('outputData')
print('Size is : ',  len(inputData))
#Split into Sets of 12 songs
numSongs = 12
sets = math.floor(len(inputData)/numSongs)
tempInput = []
tempOutput = []
#making output
numSongs = 12
for i in range(sets):
    start = i*numSongs
    end = (i+1)*numSongs
    tempInput = inputData[start:end]
    tempOutput = outputData[start:end]
    p.save(tempInput, 'inputData'+str(i))
    p.save(tempOutput, 'outputData'+str(i))
    if end >= len(inputData):
        tempInput = inputData[start:]
        tempOutput = outputData[start:]
        p.save(tempInput, 'inputData'+str(i))
        p.save(tempOutput, 'outputData'+str(i))
print(inputData[0])
예제 #9
0
        for row in reader:
            if current_line >= end:
                #print(np.array(o_d)[0])
                yield (np.array(i_d), np.array(o_d))
                start = end
                end = start + batch_size
                i_d = []
                o_d = []
                if end > num_samples:
                    end = num_samples - 1

            i_d.append(convertW2V(row[1:], window_size))
            o_d.append(np.array(encoderDict[row[0]].copy()))
            current_line += 1


p_model = p.load('kerasTrained')
rms = RMSprop()

#p_model.compile(loss='categorical_crossentropy',optimizer=rms, metrics=['categorical_accuracy'])

print("Fitting")
num_samples = p.load('numSamples')
p_model.fit_generator(songBatchGenerator(batch_size),
                      epochs=100,
                      verbose=1,
                      shuffle=False,
                      steps_per_epoch=math.ceil(num_samples / batch_size))
p.save(p_model, 'kerasTrained')
print("Saved")
예제 #10
0
    model.add(LSTM(128, return_sequences=True))

    model.add(Dense(outputSize, activation='softmax'))
    return model


#with tf.device("/cpu:0"):
model = create_model()

# make the model parallel
#p_model = multi_gpu_model(model, gpus=3)
rms = RMSprop()
try:
    model.compile(loss='categorical_crossentropy',
                  optimizer=rms,
                  metrics=['categorical_accuracy'])

    print("Fitting")
    batch_size = 12
    epochs = 100
    songList = p.load('songList')
    #model.fit_generator(songBatchGenerator(songList,batch_size), epochs=10,  verbose=1,  shuffle=False, steps_per_epoch=math.ceil(len(songList)/batch_size),max_queue_size=2)
    for inp, out in songBatchGenerator(
            songList, batch_size,
            epochs * math.ceil(len(songList) / batch_size)):
        model.train_on_batch(inp, out)
    p.save(model, 'kerasTrainedNoDropout')
    print("Saved")
except MemoryError:
    print("Memory whyyyyyy")
예제 #11
0
	parity = True
	while True:
		if parity:
			yield next(g)
		else:
			yield next(ng)
		parity = not parity
		
model = Sequential()
model.add(Conv2D(64, 3, strides=(3,3), activation='relu', input_shape = ( 128, max_len,1)))
model.add(Conv2D(128, 3, strides=(1,1), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2), strides=None))
#model.add(Conv2D(16, 3, strides=(1,1), activation='relu'))
#model.add(Conv2D(16, 3, strides=(1,1), activation='relu'))
#model.add(MaxPooling2D(pool_size=(2,2), strides=None))
model.add(Flatten())
model.add(Dense(256, activation='sigmoid'))
model.add(Dense(len(le.classes_.tolist()),  activation='sigmoid'))

opt = Adadelta()

p_model = multi_gpu_model(model, gpus=3)
p_model.compile(loss='mean_squared_error',optimizer=opt, metrics=['mean_squared_error'])
batch_size = 42
#p_model = p.load('conv_model2')
p_model.fit_generator(generator(batch_size), epochs=10,  verbose=1,  shuffle=False, steps_per_epoch=math.ceil((len(files))/batch_size),max_queue_size=1, callbacks = [cb])
p_model.save_weights("model.h5")
p.save(p_model, 'conv_model1')		
			
			
예제 #12
0
import ezPickle as p
import pandas as pd
from sklearn import svm

series_stats = pd.read_csv('../data/summary_stats.csv')
test_stats = pd.read_csv('../data/test_summary_stats.csv')
outputs = p.load('output_list')
print('load data done..')
for i in range(len(outputs)):
	outputs[i] = outputs[i].index(1)
clf = svm.SVC(gamma='scale', decision_function_shape='ovo')
print('train...')
clf.fit(series_stats.values, outputs)
p.save(clf,'clf')
print('train done...')
predictions = clf.predict(test_stats.values)

print('predict done...')

def getReverseOneHotDict(dict):
    inverted_dict = {str(value): key for key, value in dict.items()}
    return inverted_dict
encoder_dict = p.load('encoder_dict')
decoder_dict = getReverseOneHotDict(encoder_dict)

predictions = predictions.tolist()

for i in range(len(predictions)):
	temp = [0]*9
	temp[predictions[i]] = 1	
	predictions[i] = temp
예제 #13
0
        l.append(np.zeros(20))
    return l
def getReverseOneHotDict(dict):
    inverted_dict = {str(value): key for key, value in dict.items()}
    return inverted_dict

print("started")
model = p.load("kerasTrained")
encoderDict = p.load("oneHotDict")
reverseEncoderDict = getReverseOneHotDict(encoderDict)

num_samples= p.load('numSamples')
targetLine = math.floor(random.random() * num_samples)

seed_line = 0
myfile = open('training_data.csv', 'r')
reader = csv.reader(myfile, delimiter=',')
row = []
for i in range(targetLine):
    row = next(reader)
    #print(i)
song = convertW2V(row[1:],window_size)
for i in range(100):
    tempSong = song[-100:]
    model.reset_states()
    outputHot = list(model.predict(np.array(tempSong).reshape(1,100,20)))
    #print(max(outputHot))
    song.append(w2v[reverseEncoderDict[str(outputHot)]])
p.save( song, 'song' )

예제 #14
0
	model.add(LSTM(256,  return_sequences=True))
	model.add(Dropout(.2))
	model.add(LSTM(128, return_sequences=True))
	model.add(Dropout(.2))
	model.add(LSTM(128, return_sequences=True))
	model.add(Dropout(.2))
	model.add(Dense(outputSize,  activation='softmax'))
	return model

#with tf.device("/cpu:0"):
model = create_model()

# make the model parallel
#p_model = multi_gpu_model(model, gpus=3)
rms = RMSprop()
try:
	model.compile(loss='categorical_crossentropy',optimizer=rms, metrics=['categorical_accuracy'])

	print("Fitting")
	batch_size = 12
	epochs = 100
	songList = p.load('songList')
	#model.fit_generator(songBatchGenerator(songList,batch_size), epochs=10,  verbose=1,  shuffle=False, steps_per_epoch=math.ceil(len(songList)/batch_size),max_queue_size=2)
	for inp, out in songBatchGenerator(songList,batch_size,epochs*math.ceil(len(songList)/batch_size)):		
		model.train_on_batch(inp,out)
	p.save(model, 'kerasTrained2')
	print("Saved")
except MemoryError:
	print("Memory whyyyyyy")

				yield x , y 
				data_list = []
				label_list = []
			
		
#for x, y in audio_generator(10):
#	print(x, y)
#	os.sleep(5)
batch_size = 9

model = Sequential()
model.add(Masking(mask_value=0, input_shape=(max_len, 1)))
model.add(LSTM(256,  return_sequences=True))
model.add(Dropout(.2))
model.add(LSTM(128,  return_sequences=True))
model.add(Dropout(.2))
model.add(LSTM(128, return_sequences=False))
model.add(Dropout(.2))
model.add(Dense(len(le.classes_.tolist()),  activation='softmax'))

rms = RMSprop()

p_model = multi_gpu_model(model, gpus=3)

p_model.compile(loss='categorical_crossentropy',optimizer=rms, metrics=['categorical_accuracy'])


p_model.fit_generator(audio_generator(batch_size), epochs=10,  verbose=1,  shuffle=False, steps_per_epoch=math.ceil(len(files)/batch_size),max_queue_size=1)
p.save(p_model, 'p_model')
#sequence = pad_sequences(sequences, maxlen=max_len, dtype='int32', padding='pre', truncating='pre', value=-1)
예제 #16
0
import ezPickle as p
print("Number of Songs is: ", len(glob.glob("songs/*.mid")))
count = 0
songs = []
#This section is largely based on a web tutorial
for file in glob.glob("songs/*.mid"):
    song = converter.parse(file)
    parts = instrument.partitionByInstrument(song)
    if parts:
        notes = parts.parts[0].recurse()
    else:
        notes = song.flat.notes
    currentSong = []

    for item in notes:
        if isinstance(item, note.Note):
            if item.isNote:
                currentSong.append(
                    str(item.pitch) + ":" + str(item.quarterLength)
                )  #For melody I am recording octave information
            else:
                currentSong.append('rest:' + str(item.quarterLength))
        if isinstance(item, chord.Chord):
            currentSong.append(
                '.'.join(str(n) for n in item.normalOrder) + ":" +
                str(item.quarterLength)
            )  #For harmony I am only recording the 12 tone values
    songs.append(currentSong)
p.save(songs, 'songList')
#print(songs[0:3])