def build_lstm_network(data_triplet, num_hidden_layers, num_hidden_units, proj_type, learning_rate=0.001, lr_smoother = 0.01): rng = np.random.RandomState(100) num_units = data_triplet.training_data[0].shape[2] arg1_model = SerialLSTM(rng, num_units, proj_type) arg2_model = SerialLSTM(rng, num_units, proj_type) arg1_pooled = MaskedInputLayer(rng, num_units, proj_type, arg1_model.activation_train, arg1_model.mask, arg1_model.c_mask) arg2_pooled = MaskedInputLayer(rng, num_units, proj_type, arg2_model.activation_train, arg2_model.mask, arg2_model.c_mask) _, pred_layers = make_multilayer_net_from_layers( input_layers=[arg1_pooled, arg2_pooled], Y=T.lvector(), use_sparse=False, num_hidden_layers=num_hidden_layers, num_hidden_units=num_hidden_units, num_output_units=data_triplet.output_dimensions()[0], output_activation_fn=T.nnet.softmax, dropout=False) net = NeuralNet([arg1_model, arg2_model] + pred_layers) net.input = arg1_model.input + arg2_model.input trainer = AdagradTrainer(net, net.crossentropy, learning_rate, lr_smoother, data_triplet, SerialLSTM.make_givens) return net, trainer
def _net_experiment_lstm_helper(experiment_name, json_file, data_triplet, num_units, num_reps, LSTMModel, num_hidden_layers, num_hidden_units, use_hinge, proj_type, use_bl, arg_shared_weights): rng = np.random.RandomState(100) arg1_model = LSTMModel(rng, num_units) if arg_shared_weights: arg2_model = LSTMModel(rng, num_units, W=arg1_model.W, U=arg1_model.U, b=arg1_model.b) else: arg2_model = LSTMModel(rng, num_units) arg1_pooled = MaskedInputLayer(rng, num_units, proj_type, arg1_model.h, arg1_model.mask, arg1_model.c_mask) arg2_pooled = MaskedInputLayer(rng, num_units, proj_type, arg2_model.h, arg2_model.mask, arg2_model.c_mask) if use_bl: raise ValueError('bilinear is not yet supported') else: _, pred_layers = make_multilayer_net_from_layers( input_layers=[arg1_pooled, arg2_pooled], Y=T.lvector(), use_sparse=False, num_hidden_layers=num_hidden_layers, num_hidden_units=num_hidden_units, num_output_units=data_triplet.output_dimensions()[0], output_activation_fn=T.nnet.softmax, dropout=False) # to make sure that the parameters are in the same place nn = NeuralNet([arg1_model, arg2_model] + pred_layers) nn.input = arg1_model.input + arg2_model.input learning_rate = 0.001 lr_smoother = 0.01 trainer = AdagradTrainer(nn, nn.hinge_loss if use_hinge else nn.crossentropy, learning_rate, lr_smoother, data_triplet, LSTMModel.make_givens) for rep in xrange(num_reps): random_seed = rep rng = np.random.RandomState(random_seed) nn.reset(rng) trainer.reset() minibatch_size = np.random.randint(20, 60) n_epochs = 50 start_time = timeit.default_timer() best_iter, best_dev_acc, best_test_acc = \ trainer.train_minibatch_triplet(minibatch_size, n_epochs) end_time = timeit.default_timer() print 'Training process takes %s seconds' % (end_time - start_time) print 'Best iteration is %s;' % best_iter + \ 'Best dev accuracy = %s' % best_dev_acc + \ 'Test accuracy =%s' % best_test_acc result_dict = { 'test accuracy': best_test_acc, 'best dev accuracy': best_dev_acc, 'best iter': best_iter, 'random seed': random_seed, 'minibatch size': minibatch_size, 'learning rate': learning_rate, 'lr smoother': lr_smoother, 'experiment name': experiment_name, 'num hidden units': num_hidden_units, 'num hidden layers': num_hidden_layers, 'cost function': 'hinge loss' if use_hinge else 'crossentropy', 'projection' : proj_type, 'dropout' : False } json_file.write('%s\n' % json.dumps(result_dict, sort_keys=True))