def train_initial(name, n_seq, n_labels, n_dimension, n_hidden_1, n_hidden_2, epochs, save): usage_ratio = 1 # epochs = 150 print '========================= Reading =========================' X_train, y_train, X_test, y_test = test.read_data(name=name, n_seq=n_seq, n_labels=n_labels, n_dimension=n_dimension) data = (X_train, y_train, X_test, y_test) print '========================= Modeling =========================' model = lstm.build_model(n_dimension=n_dimension, n_labels=n_labels, n_seq=n_seq, n_hidden_1=n_hidden_1, n_hidden_2=n_hidden_2) print '========================= Training ==========================' model = lstm.run_network(model=model, data=data, epochs=epochs, usage_ratio=usage_ratio, save=True, save_name=name) print '========================= Testing ==========================' test.test_all_metrics(model, data=data, usage_ratio=usage_ratio)
def train_initial_divide(marker, n_dimension, n_seq, n_hidden_1, n_hidden_2, epochs, save): usage_ratio = 1 # epochs = 150 print '========================= Reading =========================' X_train, y_train, X_test, y_test = pretraining.divide_save( filename = '../data/torque_participants/S01_valid_' + marker + '.csv', savename = marker + '_' + n_dimension + '_' + n_seq, n_seq = n_seq ) data = (X_train, y_train, X_test, y_test) print '========================= Modeling =========================' model = lstm.build_model(n_dimension=n_dimension, n_seq=n_seq, n_hidden_1=n_hidden_1, n_hidden_2=n_hidden_2) print '========================= Training ==========================' model = lstm.run_network(model=model, data=data, epochs=epochs, usage_ratio=usage_ratio, save=True, save_name=marker + '_' + n_dimension + '_' + n_seq) print '========================= Testing ==========================' test.test_all_metrics(model, data=data, usage_ratio=usage_ratio)
def do_system_testing(dataset, result_path, feature_path, model_path, feature_params, dataset_evaluation_mode='folds', classifier_method='gmm', overwrite=False): """System testing. If extracted features are not found from disk, they are extracted but not saved. Parameters ---------- dataset : class dataset class result_path : str path where the results are saved. feature_path : str path where the features are saved. model_path : str path where the models are saved. feature_params : dict parameter dict dataset_evaluation_mode : str ['folds', 'full'] evaluation mode, 'full' all material available is considered to belong to one fold. (Default value='folds') classifier_method : str ['gmm'] classifier method, currently only GMM supported (Default value='gmm') overwrite : bool overwrite existing models (Default value=False) Returns ------- nothing Raises ------- ValueError classifier_method is unknown. IOError Model file not found. Audio file not found. """ if classifier_method not in ['gmm','lstm','dnn']: raise ValueError("Unknown classifier method ["+classifier_method+"]") # Check that target path exists, create if not check_path(result_path) for fold in dataset.folds(mode=dataset_evaluation_mode): current_result_file = get_result_filename(fold=fold, path=result_path) if not os.path.isfile(current_result_file) or overwrite: results = [] # Load class model container model_filename = get_model_filename(fold=fold, path=model_path) if os.path.isfile(model_filename): model_container = load_data(model_filename) if classifier_method == 'lstm': predict = lstm.build_model( model_container['models']) else: raise IOError("Model file not found [%s]" % model_filename) file_count = len(dataset.test(fold)) for file_id, item in enumerate(dataset.test(fold)): progress(title_text='Testing', fold=fold, percentage=(float(file_id) / file_count), note=os.path.split(item['file'])[1]) # Load features feature_filename = get_feature_filename(audio_file=item['file'], path=feature_path) if os.path.isfile(feature_filename): feature_data = load_data(feature_filename)['feat'] else:model_container['normalizer'].normalize(feature_data) # Do classification for the block if classifier_method == 'gmm': current_result = do_classification_gmm(feature_data, model_container['models']) elif classifier_method == 'lstm': current_result = lstm.do_classification_lstm(feature_data,predict) elif classifier_method == 'dnn': current_result = dnn.do_classification_dnn(data,**classifier_params) else: raise ValueError("Unknown classifier method ["+classifier_method+"]") # Store the result results.append((dataset.absolute_to_relative(item['file']), current_result)) # Save testing results with open(current_result_file, 'wt') as f: writer = csv.writer(f, delimiter='\t') for result_item in results: writer.writerow(result_item)
outfn = 'midi/' 'deepjazz_on_metheny...' + str(N_epochs) if (N_epochs == 1): outfn += '_epoch.midi' else: outfn += '_epochs.midi' # musical settings bpm = 130 # get data chords, abstract_grammars = get_musical_data(fn) corpus, val_indices, indices_val = get_corpus_data(abstract_grammars) values = set(corpus) print('corpus length:', len(corpus)) print('total # of values:', len(values)) # build model model = lstm.build_model(corpus=corpus, val_indices=val_indices, maxlen=maxlen, N_epochs=N_epochs) # set up audio stream out_stream = stream.Stream() play = lambda x: midi.realtime.StreamPlayer(x).play() stop = lambda: pygame.mixer.music.stop() # generation loop curr_offset = 0.0 loopEnd = len(chords) for loopIndex in range(1, loopEnd): # get chords from file curr_chords = stream.Voice() for j in chords[loopIndex]: curr_chords.insert((j.offset % 4), j)
plt.show() #Main Run Thread if __name__ == '__main__': global_start_time = time.time() epochs = 1 seq_len = 50 print('> Loading data... ') X_train, y_train, X_test, y_test = lstm.load_data('data/sp500.csv', seq_len, True) print('> Data Loaded. Compiling...', X_train.shape, y_train.shape) model = lstm.build_model([1, 50, 100, 1]) model.fit(X_train, y_train, batch_size=512, nb_epoch=epochs, validation_split=0.05) predictions = lstm.predict_sequences_multiple(model, X_test, seq_len, 50) #predicted = lstm.predict_sequence_full(model, X_test, seq_len) #predicted = lstm.predict_point_by_point(model, X_test) print('Training duration (s) : ', time.time() - global_start_time) plot_results_multiple(predictions, y_test, 50)
def do_system_training(dataset, model_path, feature_normalizer_path, feature_path, classifier_params, dataset_evaluation_mode='folds', classifier_method='gmm', overwrite=False): """System training moden container format: { 'normalizer': normalizer class 'models' : { 'office' : mixture.GMM class 'home' : mixture.GMM class ... } } Parameters ---------- dataset : class dataset class model_path : str path where the models are saved. feature_normalizer_path : str path where the feature normalizers are saved. feature_path : str path where the features are saved. classifier_params : dict parameter dict dataset_evaluation_mode : str ['folds', 'full'] evaluation mode, 'full' all material available is considered to belong to one fold. (Default value='folds') classifier_method : str ['gmm'] classifier method, currently only GMM supported (Default value='gmm') overwrite : bool overwrite existing models (Default value=False) Returns ------- nothing Raises ------- ValueError classifier_method is unknown. IOError Feature normalizer not found. Feature file not found. """ import lstm #pdb.set_trace() if classifier_method not in ['gmm','lstm','dnn']: raise ValueError("Unknown classifier method ["+classifier_method+"]") # Check that target path exists, create if not check_path(model_path) for fold in dataset.folds(mode=dataset_evaluation_mode): #for fold in [1]: current_model_file = get_model_filename(fold=fold, path=model_path) if not os.path.isfile(current_model_file) or overwrite: # Load normalizer feature_normalizer_filename = get_feature_normalizer_filename(fold=fold, path=feature_normalizer_path) if os.path.isfile(feature_normalizer_filename): normalizer = load_data(feature_normalizer_filename) else: raise IOError("Feature normalizer not found [%s]" % feature_normalizer_filename) # Initialize model container model_container = {'normalizer': normalizer, 'models': {}} if os.path.isfile(current_model_file): model_container = load_data(current_model_file) else: print "No file named %s"%current_model_file # Collect training examples file_count = len(dataset.train(fold)) data = {} for item_id, item in enumerate(dataset.train(fold)): progress(title_text='Collecting data', fold=fold, percentage=(float(item_id) / file_count), note=os.path.split(item['file'])[1]) # Load features feature_filename = get_feature_filename(audio_file=item['file'], path=feature_path) if os.path.isfile(feature_filename): feature_data = load_data(feature_filename)['feat'] else: raise IOError("Features not found [%s]" % (item['file'])) # Scale features feature_data = model_container['normalizer'].normalize(feature_data) # Store features per class label if item['scene_label'] not in data: data[item['scene_label']] = feature_data else: data[item['scene_label']] = numpy.vstack((data[item['scene_label']], feature_data)) file_count = len(dataset.val(fold)) data_val = {} for item_id, item in enumerate(dataset.val(fold)): progress(title_text='Collecting data_val', fold=fold, percentage=(float(item_id) / file_count), note=os.path.split(item['file'])[1]) # Load features feature_filename = get_feature_filename(audio_file=item['file'], path=feature_path) if os.path.isfile(feature_filename): feature_data = load_data(feature_filename)['feat'] else: raise IOError("Features not found [%s]" % (item['file'])) # Scale features feature_data = model_container['normalizer'].normalize(feature_data) # Store features per class label if item['scene_label'] not in data_val: data_val[item['scene_label']] = feature_data #data_val[item['scene_label']] = [feature_data] else: data_val[item['scene_label']] = numpy.vstack((data_val[item['scene_label']], feature_data)) #data_val[item['scene_label']].append(feature_data) print classifier_params if classifier_method == 'gmm': # Train models for each class for label in data: progress(title_text='Train models', fold=fold, note=label) model_container['models'][label] = mixture.GMM(**classifier_params).fit(data[label]) elif classifier_method == 'lstm': if classifier_method == 'lstm': predict = lstm.build_model( model_container['models']) lstm.validate(data, data_val,predict) ## add training log elif classifier_method == 'dnn': model_container['models'] = dnn.do_train(data, data_val,**classifier_params) else: raise ValueError("Unknown classifier method ["+classifier_method+"]")
plt.show() # Main Run Thread if __name__ == '__main__': global_start_time = time.time() epochs = 1 seq_len = 10 print('> Loading data... ') X_train, y_train, X_test, y_test = lstm.load_data('sp500.csv', seq_len, True) print('> Data Loaded. Compiling...') model = lstm.build_model([1, seq_len, 100, 1]) model.fit(X_train, y_train, batch_size=512, nb_epoch=epochs, validation_split=0.05) # predictions = lstm.predict_sequences_multiple(model, X_test, seq_len, 50) predictions = lstm.predict_sequence_full(model, X_test, seq_len) # predictions = lstm.predict_point_by_point(model, X_test) print('Training duration (s) : ', time.time() - global_start_time) plot_results_multiple(predictions, y_test, 50)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1)) print[x_train, y_train, x_test, y_test] X_train, y_train, X_test, y_test = lstm.load_data(input_data_filename, seq_len, True) print("TRAINING ROWS: {0} TEST ROWS: {1}".format(X_train.shape[0], X_test.shape[0])) print('> Data Loaded. Compiling...') #model = lstm.build_model([1, 50, 100, 1]) # Don't hardcode "50" but use seq_len instead because seq_len is the lookback length # original model layers were [1, 50, 100, 1] model = lstm.build_model([1, seq_len, seq_len * 2, 1]) model.fit(X_train, y_train, batch_size=512, nb_epoch=epochs, validation_split=0.05) # For now, set our prediction length to our (input) sequence length #prediction_len = seq_len predictions = lstm.predict_sequences_multiple(model, X_test, seq_len, prediction_len) #predicted = lstm.predict_sequence_full(model, X_test, seq_len) #predicted = lstm.predict_point_by_point(model, X_test)
seq_len = 10 train_samples = 100000 test_samples = 2000 x_raw, y_raw, info = lstm.load_data(path="../2014-04-01_1m_172800.csv", sequence_length=seq_len, row_start_ind=0, in_column_ind=[0, 1, 2, 3, 4, 5, 6], out_column_ind=[7, 8, 9, 10, 11, 12], do_normalize=True) #print(info) x_dim = x_raw.shape[2] y_dim = y_raw.shape[2] x_train, y_train = x_raw[:train_samples, :, :], y_raw[:train_samples, :, :] x_test, y_test = x_raw[-test_samples:, :, :], y_raw[-test_samples:, :, :] m_ = lstm.build_model(1, seq_len, x_dim, 100, 1, y_dim, False) #m_.load_weights("./save_model/env.h5") m_.fit(x_train, y_train, batch_size=1, nb_epoch=10) m_.save_weights("./save_model/env.h5") y_pred = lstm.predict_sequence(m_, x_test, batch_size=1) for i in range(y_dim): plot_results( y_pred.reshape(-1, y_dim).transpose()[i], y_test.reshape(-1, y_dim).transpose()[i])
def generate(data_fn, out_fn, N_epochs): # model settings max_len = 20 max_tries = 1000 diversity = 0.5 # musical settings bpm = 130 # get data abstract_grammars = get_musical_data(data_fn) corpus, values, val_indices, indices_val = get_corpus_data( abstract_grammars) print('corpus length:', len(corpus)) print('total # of values:', len(values)) # build model model = lstm.build_model(corpus=corpus, val_indices=val_indices, max_len=max_len, N_epochs=N_epochs) # set up audio stream out_stream = stream.Stream() # generation loop curr_offset = 0.0 loopEnd = len(abstract_grammars) print(loopEnd) for loopIndex in range(1, loopEnd): # generate grammar curr_grammar = __generate_grammar(model=model, corpus=corpus, abstract_grammars=abstract_grammars, values=values, val_indices=val_indices, indices_val=indices_val, max_len=max_len, max_tries=max_tries, diversity=diversity) curr_grammar = curr_grammar.replace(' A', ' C').replace(' X', ' C') curr_grammar = curr_grammar.replace('0X', '0 X') # Pruning #1: smoothing measure curr_grammar = prune_grammar(curr_grammar) # Get notes from grammar and chords curr_notes = unparse_grammar(curr_grammar) # Pruning #2: removing repeated and too close together notes curr_notes = prune_notes(curr_notes) # quality assurance: clean up notes curr_notes = clean_up_notes(curr_notes) # print # of notes in curr_notes print('After pruning: %s notes' % (len([i for i in curr_notes if isinstance(i, note.Note)]))) # insert into the output stream for m in curr_notes: out_stream.insert(curr_offset + m.offset, m) curr_offset += 4.0 out_stream.insert(0.0, tempo.MetronomeMark(number=bpm)) # save stream mf = midi.translate.streamToMidiFile(out_stream) mf.open(out_fn, 'wb') mf.write() mf.close()
stock=stock): return [seq_len, layers, gap, batch, start, end, stock] if __name__ == '__main__': global_start_time = time.time() # print('> Data Loaded. Compiling...') X_train, y_train, X_test, y_test, ender = lstm.get_data(seq_len=seq_len, split=.8, gap=gap, start=start, end=end, stock=stock) model = lstm.build_model(layers, batch=batch_sizes, steps=seq_len, learning_rate=learning_rate) print(model.summary()) plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True) print("neurons: " + str(layers[0] * layers[1]) + ", " + str(layers[2])) print("Learning Rate: " + str(learning_rate)) print("X_train shape: " + str(X_train.shape)) print("X_test shape : " + str(X_test.shape)) call = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=patience, verbose=0,
if __name__ == '__main__': global_start_time = time.time() seq_len = 100 X_train, y_train, X_test, y_test = lstm.load_data('small_data.csv', seq_len, True) # 训练模型 filepath = "model.h5" checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min') callbacks_list = [checkpoint] model = lstm.build_model([1, 100, 200, 1]) model.fit(X_train, y_train, batch_size=512, nb_epoch=1, validation_split=0.05, callbacks=callbacks_list) print(model.summary()) # 加载模型 # model = load_model("model.h5") predictions = lstm.predict_sequences_multiple(model, X_test, seq_len, 100) print('duration (s) : ', time.time() - global_start_time) plot_results_multiple(predictions, y_test, 100)
elif seq_len == 2: valid_ratio = 0.5 if seq_len <= 2: short.append(i) epochs = 20 print("extremely short sequence for item No." + str(i)) """ X_train X_train.shape maxList nodes = seq_len * 2 #build LSTM model activation function = "linear" or "tanh" model = lstm.build_model([1, nodes, nodes*2, fwd_len], act_fnc) model.fit( X_train, y_train, batch_size=nodes*2, nb_epoch= epochs, validation_split=valid_ratio) print('y shape', y_train.shape, '; X shape', X_train.shape) curr = y_train[- 1] #decurr = lstm.denormalise_windows(list(curr), minList[-1], maxList[-1]) p3 = model.predict(curr[newaxis,:, newaxis]) dep3 = lstm.denormalise_windows(list(p3[0]), minList[-1], maxList[-1]) #util.plot_results(dep3, decurr, seq_len, i) p3
plt.legend() plt.show() #Main Run Thread if __name__=='__main__': global_start_time = time.time() epochs = 1 seq_len = 50 print('> Loading data... ') X_train, y_train, X_test, y_test = lstm.load_data('data/sp500.csv', seq_len, True) print('> Data Loaded. Compiling...', X_train.shape, y_train.shape) model = lstm.build_model([1, 50, 100, 1]) model.fit( X_train, y_train, batch_size=512, nb_epoch=epochs, validation_split=0.05) predictions = lstm.predict_sequences_multiple(model, X_test, seq_len, 50) #predicted = lstm.predict_sequence_full(model, X_test, seq_len) #predicted = lstm.predict_point_by_point(model, X_test) print('Training duration (s) : ', time.time() - global_start_time) plot_results_multiple(predictions, y_test, 50)
def generate(data_fn, out_fn, N_epochs): """ Generates musical sequence based on the given data filename and settings. Plays then stores (MIDI file) the generated output. """ # model settings max_len = 20 max_tries = 1000 diversity = 0.5 # musical settings bpm = 130 # get data chords, abstract_grammars = get_musical_data(data_fn) corpus, values, val_indices, indices_val = get_corpus_data( abstract_grammars) print('corpus length:', len(corpus)) print('total # of values:', len(values)) ### embed() ### # build model model = lstm.build_model(corpus=corpus, val_indices=val_indices, max_len=max_len, N_epochs=N_epochs) # set up audio stream out_stream = stream.Stream() # generation loop curr_offset = 0.0 loopEnd = len(chords) for loopIndex in range(1, loopEnd): # get chords from file curr_chords = stream.Voice() for j in chords[loopIndex]: curr_chords.insert((j.offset % 4), j) # generate grammar curr_grammar = __generate_grammar(model=model, corpus=corpus, abstract_grammars=abstract_grammars, values=values, val_indices=val_indices, indices_val=indices_val, max_len=max_len, max_tries=max_tries, diversity=diversity) curr_grammar = curr_grammar.replace(' A', ' C').replace(' X', ' C') # Pruning #1: smoothing measure curr_grammar = prune_grammar(curr_grammar) # Get notes from grammar and chords curr_notes = unparse_grammar(curr_grammar, curr_chords) # Pruning #2: removing repeated and too close together notes curr_notes = prune_notes(curr_notes) # quality assurance: clean up notes curr_notes = clean_up_notes(curr_notes) # print # of notes in curr_notes print('After pruning: %s notes' % (len([i for i in curr_notes if isinstance(i, note.Note)]))) # insert into the output stream for m in curr_notes: out_stream.insert(curr_offset + m.offset, m) for mc in curr_chords: out_stream.insert(curr_offset + mc.offset, mc) curr_offset += 4.0 out_stream.insert(0.0, tempo.MetronomeMark(number=bpm)) # Play the final stream through output (see 'play' lambda function above) play = lambda x: midi.realtime.StreamPlayer(x).play() play(out_stream) # save stream mf = midi.translate.streamToMidiFile(out_stream) mf.open(out_fn, 'wb') mf.write() mf.close()
def generate(data_fn, out_fn, N_epochs): # model settings max_len = 20 max_tries = 1000 diversity = 0.5 # musical settings bpm = 130 # get data chords, abstract_grammars = get_musical_data(data_fn) corpus, values, val_indices, indices_val = get_corpus_data(abstract_grammars) print('corpus length:', len(corpus)) print('total # of values:', len(values)) # build model model = lstm.build_model(corpus=corpus, val_indices=val_indices, max_len=max_len, N_epochs=N_epochs) # set up audio stream out_stream = stream.Stream() # generation loop curr_offset = 0.0 loopEnd = len(chords) for loopIndex in range(1, loopEnd): # get chords from file curr_chords = stream.Voice() for j in chords[loopIndex]: curr_chords.insert((j.offset % 4), j) # generate grammar curr_grammar = __generate_grammar(model=model, corpus=corpus, abstract_grammars=abstract_grammars, values=values, val_indices=val_indices, indices_val=indices_val, max_len=max_len, max_tries=max_tries, diversity=diversity) curr_grammar = curr_grammar.replace(' A',' C').replace(' X',' C') # Pruning #1: smoothing measure curr_grammar = prune_grammar(curr_grammar) # Get notes from grammar and chords curr_notes = unparse_grammar(curr_grammar, curr_chords) # Pruning #2: removing repeated and too close together notes curr_notes = prune_notes(curr_notes) # quality assurance: clean up notes curr_notes = clean_up_notes(curr_notes) # print # of notes in curr_notes print('After pruning: %s notes' % (len([i for i in curr_notes if isinstance(i, note.Note)]))) # insert into the output stream for m in curr_notes: out_stream.insert(curr_offset + m.offset, m) for mc in curr_chords: out_stream.insert(curr_offset + mc.offset, mc) curr_offset += 4.0 out_stream.insert(0.0, tempo.MetronomeMark(number=bpm)) # Play the final stream through output (see 'play' lambda function above) play = lambda x: midi.realtime.StreamPlayer(x).play() play(out_stream) # save stream mf = midi.translate.streamToMidiFile(out_stream) mf.open(out_fn, 'wb') mf.write() mf.close()
if (N_epochs == 1): outfn += '_epoch.midi' else: outfn += '_epochs.midi' # musical settings bpm = 130 # get data chords, abstract_grammars = get_musical_data(fn) corpus, val_indices, indices_val = get_corpus_data(abstract_grammars) values = set(corpus) print('corpus length:', len(corpus)) print('total # of values:', len(values)) # build model model = lstm.build_model(corpus=corpus, val_indices=val_indices, maxlen=maxlen, N_epochs=N_epochs) # set up audio stream out_stream = stream.Stream() play = lambda x: midi.realtime.StreamPlayer(x).play() stop = lambda: pygame.mixer.music.stop() # generation loop curr_offset = 0.0 loopEnd = len(chords) for loopIndex in range(1, loopEnd): # get chords from file curr_chords = stream.Voice() for j in chords[loopIndex]: curr_chords.insert((j.offset % 4), j)