def _copy_tlstm_net(data_list, nn, proj_type): arg1_model = nn.layers[0].copy(data_list[0]) arg2_model = nn.layers[1].copy(data_list[1]) if proj_type == 'max_pool': proj_variables = [arg1_model.max_pooled_h, arg2_model.max_pooled_h] elif proj_type == 'mean_pool': proj_variables = [arg1_model.mean_pooled_h, arg2_model.mean_pooled_h] elif proj_type == 'sum_pool': proj_variables = [arg1_model.sum_pooled_h, arg2_model.sum_pooled_h] elif proj_type == 'top': proj_variables = [arg1_model.top_h, arg2_model.top_h] else: raise ValueError('Invalid projection type: %s' % proj_type) X_list = proj_variables new_hidden_layers = [] for hidden_layer in nn.layers[2:-1]: new_hidden_layer = hidden_layer.copy(X_list) X_list = [new_hidden_layer.activation] new_hidden_layers.append(new_hidden_layer) output_layer = nn.layers[-1].copy(X_list) new_nn = NeuralNet() layers = [arg1_model, arg2_model] + new_hidden_layers + [output_layer] new_nn.layers = layers new_nn.input.extend(arg1_model.input) new_nn.output.extend(output_layer.output) new_nn.predict = output_layer.predict new_nn.hinge_loss = output_layer.hinge_loss new_nn.crossentropy = output_layer.crossentropy return new_nn
def build_lstm_network(data_triplet, num_hidden_layers, num_hidden_units, proj_type, learning_rate=0.001, lr_smoother = 0.01): rng = np.random.RandomState(100) num_units = data_triplet.training_data[0].shape[2] arg1_model = SerialLSTM(rng, num_units, proj_type) arg2_model = SerialLSTM(rng, num_units, proj_type) arg1_pooled = MaskedInputLayer(rng, num_units, proj_type, arg1_model.activation_train, arg1_model.mask, arg1_model.c_mask) arg2_pooled = MaskedInputLayer(rng, num_units, proj_type, arg2_model.activation_train, arg2_model.mask, arg2_model.c_mask) _, pred_layers = make_multilayer_net_from_layers( input_layers=[arg1_pooled, arg2_pooled], Y=T.lvector(), use_sparse=False, num_hidden_layers=num_hidden_layers, num_hidden_units=num_hidden_units, num_output_units=data_triplet.output_dimensions()[0], output_activation_fn=T.nnet.softmax, dropout=False) net = NeuralNet([arg1_model, arg2_model] + pred_layers) net.input = arg1_model.input + arg2_model.input trainer = AdagradTrainer(net, net.crossentropy, learning_rate, lr_smoother, data_triplet, SerialLSTM.make_givens) return net, trainer
def _make_tlstm_net(data_list, wbm, num_output_units, num_hidden_layers, num_hidden_units, use_hinge, proj_type): rng = np.random.RandomState(100) indices = T.lvector() arg1_model = BinaryForestLSTM(data_list[0], rng, wbm, X_list=[indices]) arg2_model = BinaryForestLSTM(data_list[1], rng, wbm, X_list=[indices]) #f = theano.function(inputs=arg1_model.input, #outputs=[arg1_model.max_pooled_h, arg1_model.all_max_pooled_h.shape]) #print f(np.array([2])) #f = theano.function(inputs=arg1_model.input, #outputs=[arg1_model.sum_pooled_h, arg1_model.all_sum_pooled_h.shape]) #print f(np.array([2])) #f = theano.function(inputs=arg2_model.input, #outputs=[arg2_model.max_pooled_h, arg2_model.all_max_pooled_h.shape]) #print f(np.array([2])) if proj_type == 'max_pool': proj_variables = [arg1_model.max_pooled_h, arg2_model.max_pooled_h] elif proj_type == 'mean_pool': proj_variables = [arg1_model.mean_pooled_h, arg2_model.mean_pooled_h] elif proj_type == 'sum_pool': proj_variables = [arg1_model.sum_pooled_h, arg2_model.sum_pooled_h] elif proj_type == 'top': proj_variables = [arg1_model.top_h, arg2_model.top_h] else: raise ValueError('Invalid projection type: %s' % proj_type) hidden_layers = [] n_in_list = [wbm.num_units, wbm.num_units] X_list = proj_variables for i in range(num_hidden_layers): hidden_layer = LinearLayer(rng, n_in_list=n_in_list, n_out=num_hidden_units, use_sparse=False, X_list=X_list, activation_fn=T.tanh) n_in_list = [num_hidden_units] X_list = [hidden_layer.activation] hidden_layers.append(hidden_layer) output_layer = LinearLayer(rng, n_in_list=n_in_list, n_out=num_output_units, use_sparse=False, X_list=X_list, Y=T.lvector(), activation_fn=None if use_hinge else T.nnet.softmax) nn = NeuralNet() layers = [arg1_model, arg2_model] + hidden_layers + [output_layer] nn.params.extend(arg1_model.params) nn.params.extend(arg2_model.params) nn.params.extend(output_layer.params) for hidden_layer in hidden_layers: nn.params.extend(hidden_layer.params) nn.layers = layers nn.input.extend(arg1_model.input) nn.output.extend(output_layer.output) nn.predict = output_layer.predict nn.hinge_loss = output_layer.hinge_loss nn.crossentropy = output_layer.crossentropy return nn
def _net_experiment_lstm_helper(json_file, data_triplet, wbm, num_reps, LSTMModel, num_hidden_layers, num_hidden_units, use_hinge, proj_type, use_bl, arg_shared_weights): rng = np.random.RandomState(100) arg1_model = LSTMModel(rng, wbm.num_units) if arg_shared_weights: arg2_model = LSTMModel(rng, wbm.num_units, W=arg1_model.W, U=arg1_model.U, b=arg1_model.b) else: arg2_model = LSTMModel(rng, wbm.num_units) if proj_type == 'max_pool': proj_variables = [arg1_model.max_pooled_h, arg2_model.max_pooled_h] elif proj_type == 'mean_pool': proj_variables = [arg1_model.mean_pooled_h, arg2_model.mean_pooled_h] elif proj_type == 'sum_pool': proj_variables = [arg1_model.sum_pooled_h, arg2_model.sum_pooled_h] elif proj_type == 'top': proj_variables = [arg1_model.top_h, arg2_model.top_h] else: raise ValueError('Invalid projection type: %s' % proj_type) hidden_layers = [] if use_bl: output_layer = BilinearLayer(rng, n_in1=wbm.num_units, n_in2=wbm.num_units, n_out=data_triplet.output_dimensions()[0], X1=proj_variables[0], X2=proj_variables[1], Y=T.lvector(), activation_fn=None if use_hinge else T.nnet.softmax) else: n_in_list = [wbm.num_units, wbm.num_units] X_list = proj_variables for i in range(num_hidden_layers): hidden_layer = LinearLayer(rng, n_in_list=n_in_list, n_out=num_hidden_units, use_sparse=False, X_list=X_list, activation_fn=T.tanh) n_in_list = [num_hidden_units] X_list = [hidden_layer.activation] hidden_layers.append(hidden_layer) output_layer = LinearLayer(rng, n_in_list=n_in_list, n_out=data_triplet.output_dimensions()[0], use_sparse=False, X_list=X_list, Y=T.lvector(), activation_fn=None if use_hinge else T.nnet.softmax) nn = NeuralNet() layers = [arg1_model, arg2_model, output_layer] + hidden_layers nn.params.extend(arg1_model.params) if not arg_shared_weights: nn.params.extend(arg2_model.params) nn.params.extend(output_layer.params) for hidden_layer in hidden_layers: nn.params.extend(hidden_layer.params) nn.layers = layers nn.input.extend(arg1_model.input) nn.input.extend(arg2_model.input) nn.output.extend(output_layer.output) nn.predict = output_layer.predict nn.hinge_loss = output_layer.hinge_loss nn.crossentropy = output_layer.crossentropy learning_rate = 0.001 lr_smoother = 0.01 trainer = AdagradTrainer(nn, nn.hinge_loss if use_hinge else nn.crossentropy, learning_rate, lr_smoother, data_triplet, LSTMModel.make_givens) for rep in xrange(num_reps): random_seed = rep rng = np.random.RandomState(random_seed) for layer in layers: layer.reset(rng) trainer.reset() minibatch_size = np.random.randint(20, 60) minibatch_size = 1 n_epochs = 50 start_time = timeit.default_timer() best_iter, best_dev_acc, best_test_acc = \ trainer.train_minibatch_triplet(minibatch_size, n_epochs) end_time = timeit.default_timer() print end_time - start_time print best_iter, best_dev_acc, best_test_acc result_dict = { 'test accuracy': best_test_acc, 'best dev accuracy': best_dev_acc, 'best iter': best_iter, 'random seed': random_seed, 'minibatch size': minibatch_size, 'learning rate': learning_rate, 'lr smoother': lr_smoother, 'experiment name': experiment_name, 'num hidden units': num_hidden_units, 'num hidden layers': num_hidden_layers, 'cost function': 'hinge loss' if use_hinge else 'crossentropy', 'projection' : proj_type, } json_file.write('%s\n' % json.dumps(result_dict, sort_keys=True))
def _construct_net(data_triplet, wbm, num_hidden_layers, num_hidden_units, proj_type): rng = np.random.RandomState(100) arg1_model = BinaryTreeLSTM(rng, wbm.num_units) arg2_model = BinaryTreeLSTM(rng, wbm.num_units) arg1_node_label_layer = LinearLayerTensorOutput(rng, n_in=wbm.num_units, n_out=data_triplet.output_dimensions()[0], X=arg1_model.h) arg2_node_label_layer = LinearLayerTensorOutput(rng, n_in=wbm.num_units, n_out=data_triplet.output_dimensions()[1], X=arg2_model.h) if proj_type == 'max_pool': proj_variables = [arg1_model.max_pooled_h, arg2_model.max_pooled_h] elif proj_type == 'mean_pool': proj_variables = [arg1_model.mean_pooled_h, arg2_model.mean_pooled_h] elif proj_type == 'sum_pool': proj_variables = [arg1_model.sum_pooled_h, arg2_model.sum_pooled_h] elif proj_type == 'top': proj_variables = [arg1_model.top_h, arg2_model.top_h] else: raise ValueError('Invalid projection type: %s' % proj_type) hidden_layers = [] n_in_list = [wbm.num_units, wbm.num_units] X_list = proj_variables for i in range(num_hidden_layers): hidden_layer = LinearLayer(rng, n_in_list=n_in_list, n_out=num_hidden_units, use_sparse=False, X_list=X_list, activation_fn=T.tanh) n_in_list = [num_hidden_units] X_list = [hidden_layer.activation] hidden_layers.append(hidden_layer) label_output_layer = LinearLayer(rng, n_in_list=n_in_list, n_out=data_triplet.output_dimensions()[2], use_sparse=False, X_list=X_list, Y=T.lvector(), activation_fn=T.nnet.softmax) nn = NeuralNet() layers = [arg1_model, arg2_model, arg1_node_label_layer, arg2_node_label_layer, label_output_layer] \ + hidden_layers for layer in layers: nn.params.extend(layer.params) nn.layers = layers nn.input.extend(arg1_model.input) nn.input.extend(arg2_model.input) nn.output.extend(arg1_node_label_layer.output + arg2_node_label_layer.output + label_output_layer.output) nn.predict = label_output_layer.predict nn.crossentropy = label_output_layer.crossentropy + \ 0.5 * arg1_node_label_layer.crossentropy + \ 0.5 * arg2_node_label_layer.crossentropy nn.misc_function = arg1_node_label_layer.miscs + arg2_node_label_layer.miscs """ nn.crossentropy = arg2_node_label_layer.crossentropy + arg1_node_label_layer.crossentropy nn.params = arg2_node_label_layer.params + arg1_node_label_layer.params nn.misc_function = [arg1_node_label_layer.W[0:3,0:3], arg2_node_label_layer.W[0:3,0:3]] nn.crossentropy = label_output_layer.crossentropy if len(hidden_layers) > 0: nn.layers = [arg1_model, arg2_model, hidden_layers[0], label_output_layer] nn.params = arg1_model.params + arg2_model.params + hidden_layers[0].params + label_output_layer.params else: nn.layers = [arg1_model, arg2_model, label_output_layer] nn.params = arg1_model.params + arg2_model.params + label_output_layer.params nn.misc_function = [label_output_layer.predict[0], label_output_layer.W_list[0][0:3,0:3], arg1_node_label_layer.W[0:3,0:3], label_output_layer.activation] """ return nn
def _net_experiment_lstm_helper(experiment_name, json_file, data_triplet, num_units, num_reps, LSTMModel, num_hidden_layers, num_hidden_units, use_hinge, proj_type, use_bl, arg_shared_weights): rng = np.random.RandomState(100) arg1_model = LSTMModel(rng, num_units) if arg_shared_weights: arg2_model = LSTMModel(rng, num_units, W=arg1_model.W, U=arg1_model.U, b=arg1_model.b) else: arg2_model = LSTMModel(rng, num_units) arg1_pooled = MaskedInputLayer(rng, num_units, proj_type, arg1_model.h, arg1_model.mask, arg1_model.c_mask) arg2_pooled = MaskedInputLayer(rng, num_units, proj_type, arg2_model.h, arg2_model.mask, arg2_model.c_mask) if use_bl: raise ValueError('bilinear is not yet supported') else: _, pred_layers = make_multilayer_net_from_layers( input_layers=[arg1_pooled, arg2_pooled], Y=T.lvector(), use_sparse=False, num_hidden_layers=num_hidden_layers, num_hidden_units=num_hidden_units, num_output_units=data_triplet.output_dimensions()[0], output_activation_fn=T.nnet.softmax, dropout=False) # to make sure that the parameters are in the same place nn = NeuralNet([arg1_model, arg2_model] + pred_layers) nn.input = arg1_model.input + arg2_model.input learning_rate = 0.001 lr_smoother = 0.01 trainer = AdagradTrainer(nn, nn.hinge_loss if use_hinge else nn.crossentropy, learning_rate, lr_smoother, data_triplet, LSTMModel.make_givens) for rep in xrange(num_reps): random_seed = rep rng = np.random.RandomState(random_seed) nn.reset(rng) trainer.reset() minibatch_size = np.random.randint(20, 60) n_epochs = 50 start_time = timeit.default_timer() best_iter, best_dev_acc, best_test_acc = \ trainer.train_minibatch_triplet(minibatch_size, n_epochs) end_time = timeit.default_timer() print 'Training process takes %s seconds' % (end_time - start_time) print 'Best iteration is %s;' % best_iter + \ 'Best dev accuracy = %s' % best_dev_acc + \ 'Test accuracy =%s' % best_test_acc result_dict = { 'test accuracy': best_test_acc, 'best dev accuracy': best_dev_acc, 'best iter': best_iter, 'random seed': random_seed, 'minibatch size': minibatch_size, 'learning rate': learning_rate, 'lr smoother': lr_smoother, 'experiment name': experiment_name, 'num hidden units': num_hidden_units, 'num hidden layers': num_hidden_layers, 'cost function': 'hinge loss' if use_hinge else 'crossentropy', 'projection' : proj_type, 'dropout' : False } json_file.write('%s\n' % json.dumps(result_dict, sort_keys=True))