format = args.format docs = file_utils.read_file(infile, n) N = len(docs) np.random.shuffle(docs) # split data tr_i, val_i = int(N * .8), int(N * .9) train, val1, val2 = docs[:tr_i], docs[tr_i:val_i], docs[val_i:] training_routine = training_routines_dict[train_type] probs = training_routine(train, val1, val2, n) # save model in specified format file_utils.save_model(format, probs, lang, n) print('\'{}\' model saved to data folder.'.format(format)) elif task == 'generate': lang = args.language n = args.n format = args.format probs = file_utils.read_model(format, lang, n) w_gen = lang_model.generate_from_LM(300, probs, n) print(w_gen) elif task == 'perp': infile = args.document_file
def main_theano_sign_lang_var_len_adadelta(): """ :description: this trains a model on the sign language data as well, but accounts for variable length sequences and processes batches. """ print('loading data...') n_input_at_each_timestep = 10 n_classes = 97 # no base 0 considered, there are just 98 of them. May need to be 97 X, y = sign_lang.load_data_from_aggregate_file() X, masks = sign_lang.pad_data_to_max_sample_length(X) X = X.astype(theano.config.floatX) masks = masks.astype(theano.config.floatX) X = np.swapaxes(X, 0, 1) masks = np.swapaxes(masks, 0, 1) split_idx = int(.8 * X.shape[1]) X = theano.shared(np.asarray(X, dtype=theano.config.floatX), borrow=True) masks = theano.shared(np.asarray(masks, dtype=theano.config.floatX), borrow=True) y = theano.shared(y, borrow=True) trainset_masks = masks[:, :split_idx, :] testset_masks = masks[:, split_idx:, :] trainset_X, trainset_y = X[:, :split_idx, :], y[:split_idx] testset_X, testset_y = X[:, split_idx:, :], y[split_idx:] index = T.lscalar() x = T.tensor3('x') target = T.lvector('target') print_x = theano.printing.Print('\nx')(x) print_target = theano.printing.Print('target')(target) mask = T.tensor3('mask') print('building model...') lstm_1_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_1.save' lstm_2_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_2.save' lstm_3_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_3.save' softmax_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/softmax_1.save' lstm_1 = load_model(lstm_1_filepath) # lstm_2 = load_model(lstm_2_filepath) # lstm_3 = load_model(lstm_3_filepath) softmax = load_model(softmax_filepath) #lstm_1 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_1', return_indices=[-1], dropout_prob=0.3) #lstm_2 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_2', return_indices=None, dropout_prob=0.3) #lstm_3 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_3', return_indices=[-1], dropout_prob=0.3) #softmax = variable_length_sequence_lstm.Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes) # layers = [lstm_1, lstm_2, lstm_3, softmax] layers = [lstm_1, softmax] cost_expr = variable_length_sequence_lstm.Softmax.negative_log_likelihood rnn = variable_length_sequence_lstm.MLP(layers, cost=cost_expr, return_indices=[-1]) cost, updates = rnn.get_cost_updates(x, target, mask, learning_rate=0.005) batch_size = 10 print('building trainer...') trainer = theano.function( [index], [cost], updates=updates, givens={ x: trainset_X[:, index * batch_size:(index + 1) * batch_size], target: trainset_y[index * batch_size:(index + 1) * batch_size], mask: trainset_masks[:, index * batch_size:(index + 1) * batch_size] }, mode='FAST_RUN') errors = rnn.layers[-1].errors(target) validate_model = theano.function( inputs=[index], outputs=[cost, errors], givens={ x: testset_X[:, index * batch_size:(index + 1) * batch_size], target: testset_y[index * batch_size:(index + 1) * batch_size], mask: testset_masks[:, index * batch_size:(index + 1) * batch_size] }, mode='FAST_RUN') print('training model...') n_train_examples = trainset_X.shape.eval()[1] n_test_examples = testset_X.shape.eval()[1] n_epochs = 1000 lowest_cost = -1 n_train_batches = int(trainset_X.shape.eval()[1] / float(batch_size)) n_validation_batches = int(testset_X.shape.eval()[1] / float(batch_size)) for epoch in range(n_epochs): costs = [] #random_indices = get_random_indices(max_index=n_train_examples - 1, samples_per_epoch=100) for sample_idx in range(n_train_batches): # for sample_idx in random_indices: costs.append(trainer(sample_idx)[0]) avg_cost = np.mean(costs) print('training cost for epoch {0}: {1}'.format(epoch, avg_cost)) if lowest_cost == -1 or avg_cost < lowest_cost * 0.99: lowest_cost = avg_cost run_validation = True save_model(lstm_1, lstm_1_filepath) # save_model(lstm_2, lstm_2_filepath) # save_model(lstm_3, lstm_3_filepath) save_model(softmax, softmax_filepath) predictions = [] if run_validation: print('\nvalidation') for sample_idx in range(n_validation_batches): predictions.append(validate_model(sample_idx)[1]) accuracy = (1 - np.mean(predictions)) * 100 print('accuracy for epoch {0}: {1}%'.format(epoch, accuracy)) run_validation = False # print('finished training, final stats:\nfinal cost: {0}\naccuracy: {1}%'.format(np.mean(costs), accuracy)) print('finished training, final stats:\nfinal cost: {0}'.format( np.mean(costs))) for layer in rnn.layers: for param in layer.params: print('{}: {}'.format(param.name, param.get_value()))
def main_theano_sign_lang_var_len_adadelta(): """ :description: this trains a model on the sign language data as well, but accounts for variable length sequences and processes batches. """ print('loading data...') n_input_at_each_timestep = 10 n_classes = 97 # no base 0 considered, there are just 98 of them. May need to be 97 X, y = sign_lang.load_data_from_aggregate_file() X, masks = sign_lang.pad_data_to_max_sample_length(X) X = X.astype(theano.config.floatX) masks = masks.astype(theano.config.floatX) X = np.swapaxes(X, 0, 1) masks = np.swapaxes(masks, 0, 1) split_idx = int(.8 * X.shape[1]) X = theano.shared(np.asarray(X, dtype=theano.config.floatX), borrow=True) masks = theano.shared(np.asarray(masks, dtype=theano.config.floatX), borrow=True) y = theano.shared(y, borrow=True) trainset_masks = masks[:, :split_idx, :] testset_masks = masks[:, split_idx:, :] trainset_X, trainset_y = X[:, :split_idx, :], y[:split_idx] testset_X, testset_y = X[:, split_idx:, :], y[split_idx:] index = T.lscalar() x = T.tensor3('x') target = T.lvector('target') print_x = theano.printing.Print('\nx')(x) print_target = theano.printing.Print('target')(target) mask = T.tensor3('mask') print('building model...') lstm_1_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_1.save' lstm_2_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_2.save' lstm_3_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_3.save' softmax_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/softmax_1.save' lstm_1 = load_model(lstm_1_filepath) # lstm_2 = load_model(lstm_2_filepath) # lstm_3 = load_model(lstm_3_filepath) softmax = load_model(softmax_filepath) #lstm_1 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_1', return_indices=[-1], dropout_prob=0.3) #lstm_2 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_2', return_indices=None, dropout_prob=0.3) #lstm_3 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_3', return_indices=[-1], dropout_prob=0.3) #softmax = variable_length_sequence_lstm.Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes) # layers = [lstm_1, lstm_2, lstm_3, softmax] layers = [lstm_1, softmax] cost_expr = variable_length_sequence_lstm.Softmax.negative_log_likelihood rnn = variable_length_sequence_lstm.MLP(layers, cost=cost_expr, return_indices=[-1]) cost, updates = rnn.get_cost_updates(x, target, mask, learning_rate=0.005) batch_size = 10 print('building trainer...') trainer = theano.function( [index], [cost], updates=updates, givens={ x: trainset_X[:, index * batch_size: (index + 1) * batch_size], target: trainset_y[index * batch_size: (index + 1) * batch_size], mask: trainset_masks[:, index * batch_size: (index + 1) * batch_size] }, mode='FAST_RUN' ) errors = rnn.layers[-1].errors(target) validate_model = theano.function( inputs=[index], outputs=[cost, errors], givens={ x: testset_X[:, index * batch_size: (index + 1) * batch_size], target: testset_y[index * batch_size: (index + 1) * batch_size], mask: testset_masks[:, index * batch_size: (index + 1) * batch_size] }, mode='FAST_RUN' ) print('training model...') n_train_examples = trainset_X.shape.eval()[1] n_test_examples = testset_X.shape.eval()[1] n_epochs = 1000 lowest_cost = -1 n_train_batches = int(trainset_X.shape.eval()[1] / float(batch_size)) n_validation_batches = int(testset_X.shape.eval()[1] / float(batch_size)) for epoch in range(n_epochs): costs = [] #random_indices = get_random_indices(max_index=n_train_examples - 1, samples_per_epoch=100) for sample_idx in range(n_train_batches): # for sample_idx in random_indices: costs.append(trainer(sample_idx)[0]) avg_cost = np.mean(costs) print('training cost for epoch {0}: {1}'.format(epoch, avg_cost)) if lowest_cost == -1 or avg_cost < lowest_cost * 0.99: lowest_cost = avg_cost run_validation = True save_model(lstm_1, lstm_1_filepath) # save_model(lstm_2, lstm_2_filepath) # save_model(lstm_3, lstm_3_filepath) save_model(softmax, softmax_filepath) predictions = [] if run_validation: print('\nvalidation') for sample_idx in range(n_validation_batches): predictions.append(validate_model(sample_idx)[1]) accuracy = (1 - np.mean(predictions)) * 100 print('accuracy for epoch {0}: {1}%'.format(epoch, accuracy)) run_validation = False # print('finished training, final stats:\nfinal cost: {0}\naccuracy: {1}%'.format(np.mean(costs), accuracy)) print('finished training, final stats:\nfinal cost: {0}'.format(np.mean(costs))) for layer in rnn.layers: for param in layer.params: print('{}: {}'.format(param.name, param.get_value()))
def main_theano_sign_lang(): print('loading data...') n_input_at_each_timestep = 10 n_classes = 97 # no base 0 considered, there are just 98 of them. May need to be 97 dataset_sequence_length = 31 X, y = sign_lang.load_data_from_aggregate_file() X = chest_accel.truncate_to_smallest(X) split_idx = int(.8 * X.shape[0]) X = theano.shared(np.asarray(X, dtype=theano.config.floatX), borrow=True) y = theano.shared(y, borrow=True) trainset_X, trainset_y = X[:split_idx], y[:split_idx] testset_X, testset_y = X[split_idx:], y[split_idx:] index = T.lscalar() x = T.matrix('x') target = T.lscalar('target') print_x = theano.printing.Print('\nx')(x) print_target = theano.printing.Print('target')(target) print('building model...') # layers = [EncDecRecurrent(n_vis=n_input_at_each_timestep, n_hid=rec_n_hid, return_indices=[-1]), Softmax(n_vis=rec_n_hid, n_classes=n_classes)] # single layer #layers = [EncDecRecurrent(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='recurrent', return_indices=[-1]), Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)] # double layer #layers = [EncDecRecurrent(n_vis=n_input_at_each_timestep, n_hid=rec_n_hid, layer_name='rec_1'), EncDecRecurrent(n_vis=rec_n_hid, n_hid=n_input_at_each_timestep, layer_name='rec_2',return_indices=[-1]), Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)] # lstm #layers = [LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm', return_indices=[-1], dropout_prob=0.3), Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)] # 2*lstm #layers = [LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm_1', dropout_prob=0.2),LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm_2', dropout_prob=0.2, return_indices=[-1]), Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)] encoding_rec_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/enc_dec_overlap_1.save' lstm_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_1.save' softmax_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/softmax_1.save' encoding_rec = load_model(encoding_rec_filepath) # recurrent_1 = load_model(lstm_filepath) # softmax = load_model(softmax_filepath) # encoding_rec = encoding_recurrent_overlap.EncodingRecurrentOverlap(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='enc_1') # print('building pretrainer...') # pre_cost, pre_updates = encoding_rec.get_pretraining_cost_updates(x, learning_rate=0.001) # pretrainer = theano.function( # [index], # [pre_cost], # updates=pre_updates, # givens={ # x: trainset_X[index] # }, # mode='FAST_RUN' # ) # print('pretraining model...') # n_epochs = 20 # n_train_examples = trainset_X.shape.eval()[0] # for epoch in range(n_epochs): # costs = [] # #random_indices = get_random_indices(max_index=n_train_examples - 1, samples_per_epoch=10) # for sample_idx in range(n_train_examples): # #for sample_idx in random_indices: # costs.append(pretrainer(sample_idx)[0]) # print('training cost for epoch {0}: {1}'.format(epoch, np.mean(costs))) # for param in encoding_rec.reconstruction_params: # print('{}: {}'.format(param.name, param.get_value())) # save_model(encoding_rec, encoding_rec_filepath) recurrent_1 = LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_1', return_indices=[-1], dropout_prob=0.3) # #recurrent_2 = LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_2', return_indices=[-1], dropout_prob=0.2) softmax = Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes) # 1*encoding + recurrent layers = [encoding_rec, recurrent_1, softmax] # layers = [recurrent_1, softmax] # 3*lstm # layers = [LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm_1'), # LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm_2'), # LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm_3', return_indices=[-1]), # Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)] # rnn = EncDecRNN(layers, cost=model_cost, return_indices=[-1]) cost = Softmax.negative_log_likelihood rnn = EncDecRNN(layers, cost=cost, return_indices=[-1]) # cost, updates = rnn.get_cost_updates((x, print_target)) cost, updates = rnn.get_cost_updates((x, target)) print('building trainer...') trainer = theano.function([index], [cost], updates=updates, givens={ x: trainset_X[index], target: trainset_y[index] }, mode='FAST_RUN') errors = rnn.layers[-1].errors(target) validate_model = theano.function(inputs=[index], outputs=[cost, errors], givens={ x: testset_X[index], target: testset_y[index] }, mode='FAST_RUN') print('training model...') n_train_examples = trainset_X.shape.eval()[0] n_test_examples = testset_X.shape.eval()[0] n_epochs = 100 lowest_cost = -1 for epoch in range(n_epochs): costs = [] #random_indices = get_random_indices(max_index=n_train_examples - 1, samples_per_epoch=100) for sample_idx in range(n_train_examples): # for sample_idx in random_indices: costs.append(trainer(sample_idx)[0]) avg_cost = np.mean(costs) print('training cost for epoch {0}: {1}'.format(epoch, avg_cost)) if lowest_cost == -1 or avg_cost < lowest_cost * 0.98: lowest_cost = avg_cost run_validation = True save_model(recurrent_1, lstm_filepath) save_model(softmax, softmax_filepath) predictions = [] if run_validation: print('\nvalidation') for sample_idx in range(n_test_examples): predictions.append(validate_model(sample_idx)[1]) accuracy = (1 - np.mean(predictions)) * 100 print('accuracy for epoch {0}: {1}%'.format(epoch, accuracy)) run_validation = False # print('finished training, final stats:\nfinal cost: {0}\naccuracy: {1}%'.format(np.mean(costs), accuracy)) print('finished training, final stats:\nfinal cost: {0}'.format( np.mean(costs))) for layer in rnn.layers: for param in layer.params: print('{}: {}'.format(param.name, param.get_value()))
def main_theano_sign_lang(): print('loading data...') n_input_at_each_timestep = 10 n_classes = 97 # no base 0 considered, there are just 98 of them. May need to be 97 dataset_sequence_length = 31 X, y = sign_lang.load_data_from_aggregate_file() X = chest_accel.truncate_to_smallest(X) split_idx = int(.8 * X.shape[0]) X = theano.shared(np.asarray(X, dtype=theano.config.floatX), borrow=True) y = theano.shared(y, borrow=True) trainset_X, trainset_y = X[:split_idx], y[:split_idx] testset_X, testset_y = X[split_idx:], y[split_idx:] index = T.lscalar() x = T.matrix('x') target = T.lscalar('target') print_x = theano.printing.Print('\nx')(x) print_target = theano.printing.Print('target')(target) print('building model...') # layers = [EncDecRecurrent(n_vis=n_input_at_each_timestep, n_hid=rec_n_hid, return_indices=[-1]), Softmax(n_vis=rec_n_hid, n_classes=n_classes)] # single layer #layers = [EncDecRecurrent(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='recurrent', return_indices=[-1]), Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)] # double layer #layers = [EncDecRecurrent(n_vis=n_input_at_each_timestep, n_hid=rec_n_hid, layer_name='rec_1'), EncDecRecurrent(n_vis=rec_n_hid, n_hid=n_input_at_each_timestep, layer_name='rec_2',return_indices=[-1]), Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)] # lstm #layers = [LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm', return_indices=[-1], dropout_prob=0.3), Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)] # 2*lstm #layers = [LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm_1', dropout_prob=0.2),LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm_2', dropout_prob=0.2, return_indices=[-1]), Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)] encoding_rec_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/enc_dec_overlap_1.save' lstm_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_1.save' softmax_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/softmax_1.save' encoding_rec = load_model(encoding_rec_filepath) # recurrent_1 = load_model(lstm_filepath) # softmax = load_model(softmax_filepath) # encoding_rec = encoding_recurrent_overlap.EncodingRecurrentOverlap(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='enc_1') # print('building pretrainer...') # pre_cost, pre_updates = encoding_rec.get_pretraining_cost_updates(x, learning_rate=0.001) # pretrainer = theano.function( # [index], # [pre_cost], # updates=pre_updates, # givens={ # x: trainset_X[index] # }, # mode='FAST_RUN' # ) # print('pretraining model...') # n_epochs = 20 # n_train_examples = trainset_X.shape.eval()[0] # for epoch in range(n_epochs): # costs = [] # #random_indices = get_random_indices(max_index=n_train_examples - 1, samples_per_epoch=10) # for sample_idx in range(n_train_examples): # #for sample_idx in random_indices: # costs.append(pretrainer(sample_idx)[0]) # print('training cost for epoch {0}: {1}'.format(epoch, np.mean(costs))) # for param in encoding_rec.reconstruction_params: # print('{}: {}'.format(param.name, param.get_value())) # save_model(encoding_rec, encoding_rec_filepath) recurrent_1 = LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_1', return_indices=[-1], dropout_prob=0.3) # #recurrent_2 = LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_2', return_indices=[-1], dropout_prob=0.2) softmax = Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes) # 1*encoding + recurrent layers = [encoding_rec, recurrent_1, softmax] # layers = [recurrent_1, softmax] # 3*lstm # layers = [LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm_1'), # LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm_2'), # LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='lstm_3', return_indices=[-1]), # Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes)] # rnn = EncDecRNN(layers, cost=model_cost, return_indices=[-1]) cost = Softmax.negative_log_likelihood rnn = EncDecRNN(layers, cost=cost, return_indices=[-1]) # cost, updates = rnn.get_cost_updates((x, print_target)) cost, updates = rnn.get_cost_updates((x, target)) print('building trainer...') trainer = theano.function( [index], [cost], updates=updates, givens={ x: trainset_X[index], target: trainset_y[index] }, mode='FAST_RUN' ) errors = rnn.layers[-1].errors(target) validate_model = theano.function( inputs=[index], outputs=[cost, errors], givens={ x: testset_X[index], target: testset_y[index] }, mode='FAST_RUN' ) print('training model...') n_train_examples = trainset_X.shape.eval()[0] n_test_examples = testset_X.shape.eval()[0] n_epochs = 100 lowest_cost = -1 for epoch in range(n_epochs): costs = [] #random_indices = get_random_indices(max_index=n_train_examples - 1, samples_per_epoch=100) for sample_idx in range(n_train_examples): # for sample_idx in random_indices: costs.append(trainer(sample_idx)[0]) avg_cost = np.mean(costs) print('training cost for epoch {0}: {1}'.format(epoch, avg_cost)) if lowest_cost == -1 or avg_cost < lowest_cost * 0.98: lowest_cost = avg_cost run_validation = True save_model(recurrent_1, lstm_filepath) save_model(softmax, softmax_filepath) predictions = [] if run_validation: print('\nvalidation') for sample_idx in range(n_test_examples): predictions.append(validate_model(sample_idx)[1]) accuracy = (1 - np.mean(predictions)) * 100 print('accuracy for epoch {0}: {1}%'.format(epoch, accuracy)) run_validation = False # print('finished training, final stats:\nfinal cost: {0}\naccuracy: {1}%'.format(np.mean(costs), accuracy)) print('finished training, final stats:\nfinal cost: {0}'.format(np.mean(costs))) for layer in rnn.layers: for param in layer.params: print('{}: {}'.format(param.name, param.get_value()))