コード例 #1
0
def build_lstm_network(data_triplet, num_hidden_layers, num_hidden_units, proj_type,
        learning_rate=0.001, lr_smoother = 0.01):
    rng = np.random.RandomState(100)
    num_units = data_triplet.training_data[0].shape[2]
    arg1_model = SerialLSTM(rng, num_units, proj_type)
    arg2_model = SerialLSTM(rng, num_units, proj_type)
    arg1_pooled = MaskedInputLayer(rng, num_units, proj_type,
            arg1_model.activation_train, arg1_model.mask, arg1_model.c_mask)
    arg2_pooled = MaskedInputLayer(rng, num_units, proj_type,
            arg2_model.activation_train, arg2_model.mask, arg2_model.c_mask)
    _, pred_layers = make_multilayer_net_from_layers(
            input_layers=[arg1_pooled, arg2_pooled],
            Y=T.lvector(), use_sparse=False,
            num_hidden_layers=num_hidden_layers,
            num_hidden_units=num_hidden_units,
            num_output_units=data_triplet.output_dimensions()[0],
            output_activation_fn=T.nnet.softmax,
            dropout=False)
    net = NeuralNet([arg1_model, arg2_model] + pred_layers)
    net.input = arg1_model.input + arg2_model.input
    trainer = AdagradTrainer(net, net.crossentropy, 
            learning_rate, lr_smoother, data_triplet, SerialLSTM.make_givens)
    return net, trainer
コード例 #2
0
def _net_experiment_lstm_helper(experiment_name,
        json_file, data_triplet, num_units, num_reps, 
        LSTMModel, num_hidden_layers, num_hidden_units, use_hinge, proj_type, 
        use_bl, arg_shared_weights):

    rng = np.random.RandomState(100)
    arg1_model = LSTMModel(rng, num_units)
    if arg_shared_weights:
        arg2_model = LSTMModel(rng, num_units, 
                W=arg1_model.W, U=arg1_model.U, b=arg1_model.b)
    else:
        arg2_model = LSTMModel(rng, num_units)


    arg1_pooled = MaskedInputLayer(rng, num_units, proj_type,
            arg1_model.h, arg1_model.mask, arg1_model.c_mask)
    arg2_pooled = MaskedInputLayer(rng, num_units, proj_type,
            arg2_model.h, arg2_model.mask, arg2_model.c_mask)

    if use_bl:
        raise ValueError('bilinear is not yet supported')
    else:
        _, pred_layers = make_multilayer_net_from_layers(
                input_layers=[arg1_pooled, arg2_pooled],
                Y=T.lvector(), use_sparse=False,
                num_hidden_layers=num_hidden_layers,
                num_hidden_units=num_hidden_units,
                num_output_units=data_triplet.output_dimensions()[0],
                output_activation_fn=T.nnet.softmax,
                dropout=False)
    # to make sure that the parameters are in the same place
    nn = NeuralNet([arg1_model, arg2_model] + pred_layers)
    nn.input = arg1_model.input + arg2_model.input

    learning_rate = 0.001
    lr_smoother = 0.01

    trainer = AdagradTrainer(nn,
            nn.hinge_loss if use_hinge else nn.crossentropy,
            learning_rate, lr_smoother, 
            data_triplet, LSTMModel.make_givens)
    
    for rep in xrange(num_reps):
        random_seed = rep
        rng = np.random.RandomState(random_seed)
        nn.reset(rng)
        trainer.reset()
        
        minibatch_size = np.random.randint(20, 60)
        n_epochs = 50

        start_time = timeit.default_timer()
        best_iter, best_dev_acc, best_test_acc = \
                trainer.train_minibatch_triplet(minibatch_size, n_epochs)
        end_time = timeit.default_timer()
        print 'Training process takes %s seconds' % (end_time - start_time)
        print 'Best iteration is %s;' % best_iter + \
                'Best dev accuracy = %s' % best_dev_acc + \
                'Test accuracy =%s' % best_test_acc
        result_dict = {
                'test accuracy': best_test_acc,
                'best dev accuracy': best_dev_acc,
                'best iter': best_iter,
                'random seed': random_seed,
                'minibatch size': minibatch_size,
                'learning rate': learning_rate,
                'lr smoother': lr_smoother,
                'experiment name': experiment_name,
                'num hidden units': num_hidden_units,
                'num hidden layers': num_hidden_layers,
                'cost function': 'hinge loss' if use_hinge else 'crossentropy',
                'projection' : proj_type,
                'dropout' : False
                }
        json_file.write('%s\n' % json.dumps(result_dict, sort_keys=True))