def run_hotel_review(): print('loading data...') X = hotel_review_data_utils.load_data_train() X_reverse = hotel_review_utils.reverse_X() X, masks = sign_lang.pad_data_to_max_sample_length(X) X_reverse, _ = sign_lang.pad_data_to_max_sample_length(X_reverse) X = X.astype(theano.config.floatX) X_reverse = X_reverse.astype(theano.config.floatX) masks = masks.astype(theano.config.floatX) X = np.swapaxes(X, 0, 1) X_reverse = np.swapaxes(X_reverse, 0, 1) masks = np.swapaxes(masks, 0, 1) X = theano.shared(np.asarray(X, dtype=theano.config.floatX), borrow=True) masks = theano.shared(np.asarray(masks, dtype=theano.config.floatX), borrow=True) X_reverse = theano.shared(np.asarray(X_reverse, dtype=theano.config.floatX), borrow=True) index = T.lscalar() x = T.tensor3('x') target = T.tensor3('target') print_x = theano.printing.Print('\nx')(x) print_target = theano.printing.Print('target')(target) mask = T.tensor3('mask') print('building model...') # encoder_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/enc.save' # decoder_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/dec.save' # encoder = load_model(encoder_filepath) # decoder = load_model(decoder_filepath) # the number of words in the dictionary, including the marker for end-of-document n_classes = 25000 n_hidden = 1000 encoder = variable_length_sequence_lstm.LSTM(n_vis=n_classes, n_hid=n_hidden, layer_name='enc', return_indices=[-1]) decoder = hotel_review_enc_dec_rnn.DecoderLSTM(n_hid=n_hidden, n_classes=n_classes, layer_name='dec') rnn = hotel_review_enc_dec_rnn.EncoderDecoderRNN(encoder, decoder) cost, updates = rnn.get_cost_updates(x, target, mask, learning_rate=0.1) batch_size = 10 print('building trainer...') trainer = theano.function( [index], [cost], updates=updates, givens={ x: X_reverse[:, index * batch_size: (index + 1) * batch_size], target: X[:, index * batch_size: (index + 1) * batch_size], mask: masks[:, index * batch_size: (index + 1) * batch_size] }, mode='FAST_RUN' ) print('training model...') n_examples = X.shape.eval()[1] n_batches = int(n_examples / float(batch_size)) n_epochs = 100 lowest_cost = -1 for epoch in range(n_epochs): costs = [] for sample_idx in range(n_batches): costs.append(trainer(sample_idx)[0]) avg_cost = np.mean(costs) print('training cost for epoch {0}: {1}'.format(epoch, avg_cost)) if lowest_cost == -1 or avg_cost < lowest_cost * 0.99: lowest_cost = avg_cost save_model(encoder, encoder_filepath) save_model(decoder, decoder_filepath) print('finished training, final stats:\nfinal cost: {0}'.format(np.mean(costs))) layers = [rnn.encoder, rnn.decoder] for layer in layers: for param in layer.params: print('{}: {}'.format(param.name, param.get_value()))
def main_theano_sign_lang_var_len_adadelta(): """ :description: this trains a model on the sign language data as well, but accounts for variable length sequences and processes batches. """ print('loading data...') n_input_at_each_timestep = 10 n_classes = 97 # no base 0 considered, there are just 98 of them. May need to be 97 X, y = sign_lang.load_data_from_aggregate_file() X, masks = sign_lang.pad_data_to_max_sample_length(X) X = X.astype(theano.config.floatX) masks = masks.astype(theano.config.floatX) X = np.swapaxes(X, 0, 1) masks = np.swapaxes(masks, 0, 1) split_idx = int(.8 * X.shape[1]) X = theano.shared(np.asarray(X, dtype=theano.config.floatX), borrow=True) masks = theano.shared(np.asarray(masks, dtype=theano.config.floatX), borrow=True) y = theano.shared(y, borrow=True) trainset_masks = masks[:, :split_idx, :] testset_masks = masks[:, split_idx:, :] trainset_X, trainset_y = X[:, :split_idx, :], y[:split_idx] testset_X, testset_y = X[:, split_idx:, :], y[split_idx:] index = T.lscalar() x = T.tensor3('x') target = T.lvector('target') print_x = theano.printing.Print('\nx')(x) print_target = theano.printing.Print('target')(target) mask = T.tensor3('mask') print('building model...') lstm_1_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_1.save' lstm_2_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_2.save' lstm_3_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_3.save' softmax_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/softmax_1.save' lstm_1 = load_model(lstm_1_filepath) # lstm_2 = load_model(lstm_2_filepath) # lstm_3 = load_model(lstm_3_filepath) softmax = load_model(softmax_filepath) #lstm_1 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_1', return_indices=[-1], dropout_prob=0.3) #lstm_2 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_2', return_indices=None, dropout_prob=0.3) #lstm_3 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_3', return_indices=[-1], dropout_prob=0.3) #softmax = variable_length_sequence_lstm.Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes) # layers = [lstm_1, lstm_2, lstm_3, softmax] layers = [lstm_1, softmax] cost_expr = variable_length_sequence_lstm.Softmax.negative_log_likelihood rnn = variable_length_sequence_lstm.MLP(layers, cost=cost_expr, return_indices=[-1]) cost, updates = rnn.get_cost_updates(x, target, mask, learning_rate=0.005) batch_size = 10 print('building trainer...') trainer = theano.function( [index], [cost], updates=updates, givens={ x: trainset_X[:, index * batch_size:(index + 1) * batch_size], target: trainset_y[index * batch_size:(index + 1) * batch_size], mask: trainset_masks[:, index * batch_size:(index + 1) * batch_size] }, mode='FAST_RUN') errors = rnn.layers[-1].errors(target) validate_model = theano.function( inputs=[index], outputs=[cost, errors], givens={ x: testset_X[:, index * batch_size:(index + 1) * batch_size], target: testset_y[index * batch_size:(index + 1) * batch_size], mask: testset_masks[:, index * batch_size:(index + 1) * batch_size] }, mode='FAST_RUN') print('training model...') n_train_examples = trainset_X.shape.eval()[1] n_test_examples = testset_X.shape.eval()[1] n_epochs = 1000 lowest_cost = -1 n_train_batches = int(trainset_X.shape.eval()[1] / float(batch_size)) n_validation_batches = int(testset_X.shape.eval()[1] / float(batch_size)) for epoch in range(n_epochs): costs = [] #random_indices = get_random_indices(max_index=n_train_examples - 1, samples_per_epoch=100) for sample_idx in range(n_train_batches): # for sample_idx in random_indices: costs.append(trainer(sample_idx)[0]) avg_cost = np.mean(costs) print('training cost for epoch {0}: {1}'.format(epoch, avg_cost)) if lowest_cost == -1 or avg_cost < lowest_cost * 0.99: lowest_cost = avg_cost run_validation = True save_model(lstm_1, lstm_1_filepath) # save_model(lstm_2, lstm_2_filepath) # save_model(lstm_3, lstm_3_filepath) save_model(softmax, softmax_filepath) predictions = [] if run_validation: print('\nvalidation') for sample_idx in range(n_validation_batches): predictions.append(validate_model(sample_idx)[1]) accuracy = (1 - np.mean(predictions)) * 100 print('accuracy for epoch {0}: {1}%'.format(epoch, accuracy)) run_validation = False # print('finished training, final stats:\nfinal cost: {0}\naccuracy: {1}%'.format(np.mean(costs), accuracy)) print('finished training, final stats:\nfinal cost: {0}'.format( np.mean(costs))) for layer in rnn.layers: for param in layer.params: print('{}: {}'.format(param.name, param.get_value()))
def run_hotel_review(): print('loading data...') X = hotel_review_data_utils.load_data_train() X_reverse = hotel_review_utils.reverse_X() X, masks = sign_lang.pad_data_to_max_sample_length(X) X_reverse, _ = sign_lang.pad_data_to_max_sample_length(X_reverse) X = X.astype(theano.config.floatX) X_reverse = X_reverse.astype(theano.config.floatX) masks = masks.astype(theano.config.floatX) X = np.swapaxes(X, 0, 1) X_reverse = np.swapaxes(X_reverse, 0, 1) masks = np.swapaxes(masks, 0, 1) X = theano.shared(np.asarray(X, dtype=theano.config.floatX), borrow=True) masks = theano.shared(np.asarray(masks, dtype=theano.config.floatX), borrow=True) X_reverse = theano.shared(np.asarray(X_reverse, dtype=theano.config.floatX), borrow=True) index = T.lscalar() x = T.tensor3('x') target = T.tensor3('target') print_x = theano.printing.Print('\nx')(x) print_target = theano.printing.Print('target')(target) mask = T.tensor3('mask') print('building model...') # encoder_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/enc.save' # decoder_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/dec.save' # encoder = load_model(encoder_filepath) # decoder = load_model(decoder_filepath) # the number of words in the dictionary, including the marker for end-of-document n_classes = 25000 n_hidden = 1000 encoder = variable_length_sequence_lstm.LSTM(n_vis=n_classes, n_hid=n_hidden, layer_name='enc', return_indices=[-1]) decoder = hotel_review_enc_dec_rnn.DecoderLSTM(n_hid=n_hidden, n_classes=n_classes, layer_name='dec') rnn = hotel_review_enc_dec_rnn.EncoderDecoderRNN(encoder, decoder) cost, updates = rnn.get_cost_updates(x, target, mask, learning_rate=0.1) batch_size = 10 print('building trainer...') trainer = theano.function( [index], [cost], updates=updates, givens={ x: X_reverse[:, index * batch_size:(index + 1) * batch_size], target: X[:, index * batch_size:(index + 1) * batch_size], mask: masks[:, index * batch_size:(index + 1) * batch_size] }, mode='FAST_RUN') print('training model...') n_examples = X.shape.eval()[1] n_batches = int(n_examples / float(batch_size)) n_epochs = 100 lowest_cost = -1 for epoch in range(n_epochs): costs = [] for sample_idx in range(n_batches): costs.append(trainer(sample_idx)[0]) avg_cost = np.mean(costs) print('training cost for epoch {0}: {1}'.format(epoch, avg_cost)) if lowest_cost == -1 or avg_cost < lowest_cost * 0.99: lowest_cost = avg_cost save_model(encoder, encoder_filepath) save_model(decoder, decoder_filepath) print('finished training, final stats:\nfinal cost: {0}'.format( np.mean(costs))) layers = [rnn.encoder, rnn.decoder] for layer in layers: for param in layer.params: print('{}: {}'.format(param.name, param.get_value()))
def main_theano_sign_lang_var_len_adadelta(): """ :description: this trains a model on the sign language data as well, but accounts for variable length sequences and processes batches. """ print('loading data...') n_input_at_each_timestep = 10 n_classes = 97 # no base 0 considered, there are just 98 of them. May need to be 97 X, y = sign_lang.load_data_from_aggregate_file() X, masks = sign_lang.pad_data_to_max_sample_length(X) X = X.astype(theano.config.floatX) masks = masks.astype(theano.config.floatX) X = np.swapaxes(X, 0, 1) masks = np.swapaxes(masks, 0, 1) split_idx = int(.8 * X.shape[1]) X = theano.shared(np.asarray(X, dtype=theano.config.floatX), borrow=True) masks = theano.shared(np.asarray(masks, dtype=theano.config.floatX), borrow=True) y = theano.shared(y, borrow=True) trainset_masks = masks[:, :split_idx, :] testset_masks = masks[:, split_idx:, :] trainset_X, trainset_y = X[:, :split_idx, :], y[:split_idx] testset_X, testset_y = X[:, split_idx:, :], y[split_idx:] index = T.lscalar() x = T.tensor3('x') target = T.lvector('target') print_x = theano.printing.Print('\nx')(x) print_target = theano.printing.Print('target')(target) mask = T.tensor3('mask') print('building model...') lstm_1_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_1.save' lstm_2_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_2.save' lstm_3_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/lstm_3.save' softmax_filepath = '/Users/wulfe/Dropbox/Start/scripts/machine_learning/stacked_enc_dec_rnn/models/softmax_1.save' lstm_1 = load_model(lstm_1_filepath) # lstm_2 = load_model(lstm_2_filepath) # lstm_3 = load_model(lstm_3_filepath) softmax = load_model(softmax_filepath) #lstm_1 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_1', return_indices=[-1], dropout_prob=0.3) #lstm_2 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_2', return_indices=None, dropout_prob=0.3) #lstm_3 = variable_length_sequence_lstm.LSTM(n_vis=n_input_at_each_timestep, n_hid=n_input_at_each_timestep, layer_name='rec_3', return_indices=[-1], dropout_prob=0.3) #softmax = variable_length_sequence_lstm.Softmax(n_vis=n_input_at_each_timestep, n_classes=n_classes) # layers = [lstm_1, lstm_2, lstm_3, softmax] layers = [lstm_1, softmax] cost_expr = variable_length_sequence_lstm.Softmax.negative_log_likelihood rnn = variable_length_sequence_lstm.MLP(layers, cost=cost_expr, return_indices=[-1]) cost, updates = rnn.get_cost_updates(x, target, mask, learning_rate=0.005) batch_size = 10 print('building trainer...') trainer = theano.function( [index], [cost], updates=updates, givens={ x: trainset_X[:, index * batch_size: (index + 1) * batch_size], target: trainset_y[index * batch_size: (index + 1) * batch_size], mask: trainset_masks[:, index * batch_size: (index + 1) * batch_size] }, mode='FAST_RUN' ) errors = rnn.layers[-1].errors(target) validate_model = theano.function( inputs=[index], outputs=[cost, errors], givens={ x: testset_X[:, index * batch_size: (index + 1) * batch_size], target: testset_y[index * batch_size: (index + 1) * batch_size], mask: testset_masks[:, index * batch_size: (index + 1) * batch_size] }, mode='FAST_RUN' ) print('training model...') n_train_examples = trainset_X.shape.eval()[1] n_test_examples = testset_X.shape.eval()[1] n_epochs = 1000 lowest_cost = -1 n_train_batches = int(trainset_X.shape.eval()[1] / float(batch_size)) n_validation_batches = int(testset_X.shape.eval()[1] / float(batch_size)) for epoch in range(n_epochs): costs = [] #random_indices = get_random_indices(max_index=n_train_examples - 1, samples_per_epoch=100) for sample_idx in range(n_train_batches): # for sample_idx in random_indices: costs.append(trainer(sample_idx)[0]) avg_cost = np.mean(costs) print('training cost for epoch {0}: {1}'.format(epoch, avg_cost)) if lowest_cost == -1 or avg_cost < lowest_cost * 0.99: lowest_cost = avg_cost run_validation = True save_model(lstm_1, lstm_1_filepath) # save_model(lstm_2, lstm_2_filepath) # save_model(lstm_3, lstm_3_filepath) save_model(softmax, softmax_filepath) predictions = [] if run_validation: print('\nvalidation') for sample_idx in range(n_validation_batches): predictions.append(validate_model(sample_idx)[1]) accuracy = (1 - np.mean(predictions)) * 100 print('accuracy for epoch {0}: {1}%'.format(epoch, accuracy)) run_validation = False # print('finished training, final stats:\nfinal cost: {0}\naccuracy: {1}%'.format(np.mean(costs), accuracy)) print('finished training, final stats:\nfinal cost: {0}'.format(np.mean(costs))) for layer in rnn.layers: for param in layer.params: print('{}: {}'.format(param.name, param.get_value()))