Esempio n. 1
0
def test_binary_accuracy(colvect):
    from lasagne.objectives import binary_accuracy
    p = theano.tensor.vector('p')
    t = theano.tensor.ivector('t')
    if not colvect:
        c = binary_accuracy(p, t)
    else:
        c = binary_accuracy(p.dimshuffle(0, 'x'), t)[:, 0]
    # numeric version
    floatX = theano.config.floatX
    predictions = np.random.rand(10, ).astype(floatX) > 0.5
    targets = np.random.random_integers(0, 1, (10, )).astype("int8")
    accuracy = predictions == targets
    # compare
    assert np.allclose(accuracy, c.eval({p: predictions, t: targets}))
Esempio n. 2
0
def test_binary_accuracy(colvect):
    from lasagne.objectives import binary_accuracy
    p = theano.tensor.vector('p')
    t = theano.tensor.ivector('t')
    if not colvect:
        c = binary_accuracy(p, t)
    else:
        c = binary_accuracy(p.dimshuffle(0, 'x'), t)[:, 0]
    # numeric version
    floatX = theano.config.floatX
    predictions = np.random.rand(10, ).astype(floatX) > 0.5
    targets = np.random.random_integers(0, 1, (10,)).astype("int8")
    accuracy = predictions == targets
    # compare
    assert np.allclose(accuracy, c.eval({p: predictions, t: targets}))
Esempio n. 3
0
def predict(model_path):
    with open(model_path, 'r') as f:
        network = cPickle.load(f)

    target_var = T.imatrix('y')
    predict_prediction = get_output(network, deterministic=True)
    predict_acc = binary_accuracy(predict_prediction, target_var).mean()

    # calculate win rate
    win_rate_result1 = []
    win_rate_result2 = []
    for win_rate_threhold in [0.5, 0.6, 0.7, 0.8, 0.9]:
        tmp1 = T.sum(T.switch(T.and_(T.gt(predict_prediction, win_rate_threhold), T.eq(target_var, 1)), 1, 0),
                     dtype=theano.config.floatX)
        tmp2 = T.sum(T.switch(T.gt(predict_prediction, win_rate_threhold), 1, 0), dtype=theano.config.floatX)
        test_win_rate = (tmp1 + 0.00001) / (tmp2 + 0.00001)
        win_rate_result1.append(test_win_rate)
        win_rate_result2.append(tmp1)

    input_layer = get_all_layers(network)[0]
    predict = theano.function(inputs=[input_layer.input_var, target_var],
                              outputs=[predict_prediction, predict_acc, T.as_tensor_variable(win_rate_result1), T.as_tensor_variable(win_rate_result2)],
                              on_unused_input='warn')
    X, y, labels, values, _, _, _, _, _, _ = load_dataset('../../data/predict.txt')
    predict_prediction, predict_acc, win_rate_result1, win_rate_result2 = predict(X, y)

    for ix in range(len([0.5, 0.6, 0.7, 0.8, 0.9])):
        sys.stdout.write("  predict win rate loss:\t\t\t{}\n".format(win_rate_result1[ix]))
        sys.stdout.write("  predict possitive num:\t\t\t{}\n".format(win_rate_result2[ix]))
    sys.stdout.write("  predict accuracy:\t\t\t{} %\n".format(predict_acc * 100))

    #output predict result
    with open('../../data/prediction', 'w') as f:
        for ix in xrange(len(labels)):
            line = str(labels[ix]) + '\t' + str(values[ix]) + '\t' + str(predict_prediction[ix][0]) + '\n'
            f.write(line)
    sys.stdout.flush()
Esempio n. 4
0
def calc_accuracy_multi(prediction, targets):

    #we can use the lasagne objective binary_accuracy to determine the multi label accuracy
    a = T.mean(objectives.binary_accuracy(prediction, targets))

    return a
Esempio n. 5
0
    return T.clip(x, 1e-8, 1-1e-8)

# Create expressions for getting the network outputs
output_discriminator = lasagne.layers.get_output(discriminator)
output_generator_deterministic = lasagne.layers.get_output(generator, deterministic=True)
output_generator = lasagne.layers.get_output(generator)
# Expression to get discriminator output, given generator output
output_discriminator_fake = lasagne.layers.get_output(discriminator, inputs=output_generator)

# Discriminator total loss (from original and fake data)
loss_discriminator = -T.log(output_discriminator + TINY).mean() -  T.log(1. - output_discriminator_fake + TINY).mean()
# Discriminator loss on fake data only
loss_discriminator_fake = -T.log(output_discriminator_fake + TINY).mean()

# Discriminator accuracy on fake data
accuracy_discriminator_fake = binary_accuracy(output_discriminator_fake, T.zeros_like(output_discriminator_fake)).mean()
# Discriminator accuracy on real data
accuracy_discriminator = binary_accuracy(output_discriminator, T.ones_like(output_discriminator)).mean()
# Overall discriminator accuracy
accuracy_discriminator = (accuracy_discriminator + accuracy_discriminator_fake)/2

# Get discriminator's parameters
params_discriminator = lasagne.layers.get_all_params(discriminator, trainable=True)
# Get generator's parameters
params_generator = lasagne.layers.get_all_params(generator, trainable=True)

# Discriminator and generator optimisers
updates_generator = lasagne.updates.adam(loss_discriminator_fake, params_generator, learning_rate=1e-3,  beta1=0.5)
updates_discriminator = lasagne.updates.adam(loss_discriminator, params_discriminator, learning_rate=1e-4,  beta1=0.5)

# Compile theano functions to train the two networks
Esempio n. 6
0
def test_binary_accuracy_invalid():
    from lasagne.objectives import binary_accuracy
    with pytest.raises(TypeError) as exc:
        binary_accuracy(theano.tensor.matrix(),
                        theano.tensor.vector())
    assert 'rank mismatch' in exc.value.args[0]
def multi_task_classifier(args,
                          input_var,
                          target_var,
                          wordEmbeddings,
                          seqlen,
                          num_feats,
                          lambda_val=0.5 * 1e-4):

    print("Building multi task model with 1D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    kw = 2
    num_filters = seqlen - kw + 1
    stride = 1
    filter_size = wordDim
    pool_size = num_filters

    input = InputLayer((None, seqlen, num_feats), input_var=input_var)
    batchsize, _, _ = input.input_var.shape

    #span
    emb1 = EmbeddingLayer(input,
                          input_size=vocab_size,
                          output_size=wordDim,
                          W=wordEmbeddings.T)
    reshape1 = ReshapeLayer(emb1, (batchsize, seqlen, num_feats * wordDim))
    conv1d_1 = DimshuffleLayer(
        Conv1DLayer(reshape1,
                    num_filters=num_filters,
                    filter_size=wordDim,
                    stride=1,
                    nonlinearity=tanh,
                    W=GlorotUniform()), (0, 2, 1))
    maxpool_1 = MaxPool1DLayer(conv1d_1, pool_size=pool_size)
    hid_1 = DenseLayer(maxpool_1,
                       num_units=args.hiddenDim,
                       nonlinearity=sigmoid)
    network_1 = DenseLayer(hid_1, num_units=2, nonlinearity=softmax)
    """
    #DocTimeRel
    emb2 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    reshape2 = ReshapeLayer(emb2, (batchsize, seqlen, num_feats*wordDim))
    conv1d_2 = DimshuffleLayer(Conv1DLayer(reshape2, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_2 = MaxPool1DLayer(conv1d_2, pool_size=pool_size)  
    hid_2 = DenseLayer(maxpool_2, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_2 = DenseLayer(hid_2, num_units=5, nonlinearity=softmax)
    """

    #Type
    emb3 = EmbeddingLayer(input,
                          input_size=vocab_size,
                          output_size=wordDim,
                          W=wordEmbeddings.T)
    reshape3 = ReshapeLayer(emb3, (batchsize, seqlen, num_feats * wordDim))
    conv1d_3 = DimshuffleLayer(
        Conv1DLayer(reshape3,
                    num_filters=num_filters,
                    filter_size=wordDim,
                    stride=1,
                    nonlinearity=tanh,
                    W=GlorotUniform()), (0, 2, 1))
    maxpool_3 = MaxPool1DLayer(conv1d_3, pool_size=pool_size)
    hid_3 = DenseLayer(maxpool_3,
                       num_units=args.hiddenDim,
                       nonlinearity=sigmoid)
    network_3 = DenseLayer(hid_3, num_units=4, nonlinearity=softmax)

    #Degree
    emb4 = EmbeddingLayer(input,
                          input_size=vocab_size,
                          output_size=wordDim,
                          W=wordEmbeddings.T)
    reshape4 = ReshapeLayer(emb4, (batchsize, seqlen, num_feats * wordDim))
    conv1d_4 = DimshuffleLayer(
        Conv1DLayer(reshape4,
                    num_filters=num_filters,
                    filter_size=wordDim,
                    stride=1,
                    nonlinearity=tanh,
                    W=GlorotUniform()), (0, 2, 1))
    maxpool_4 = MaxPool1DLayer(conv1d_4, pool_size=pool_size)
    hid_4 = DenseLayer(maxpool_4,
                       num_units=args.hiddenDim,
                       nonlinearity=sigmoid)
    network_4 = DenseLayer(hid_4, num_units=4, nonlinearity=softmax)

    #Polarity
    emb5 = EmbeddingLayer(input,
                          input_size=vocab_size,
                          output_size=wordDim,
                          W=wordEmbeddings.T)
    reshape5 = ReshapeLayer(emb5, (batchsize, seqlen, num_feats * wordDim))
    conv1d_5 = DimshuffleLayer(
        Conv1DLayer(reshape5,
                    num_filters=num_filters,
                    filter_size=wordDim,
                    stride=1,
                    nonlinearity=tanh,
                    W=GlorotUniform()), (0, 2, 1))
    maxpool_5 = MaxPool1DLayer(conv1d_5, pool_size=pool_size)
    hid_5 = DenseLayer(maxpool_5,
                       num_units=args.hiddenDim,
                       nonlinearity=sigmoid)
    network_5 = DenseLayer(hid_5, num_units=3, nonlinearity=softmax)

    #ContextualModality
    emb6 = EmbeddingLayer(input,
                          input_size=vocab_size,
                          output_size=wordDim,
                          W=wordEmbeddings.T)
    reshape6 = ReshapeLayer(emb6, (batchsize, seqlen, num_feats * wordDim))
    conv1d_6 = DimshuffleLayer(
        Conv1DLayer(reshape6,
                    num_filters=num_filters,
                    filter_size=wordDim,
                    stride=1,
                    nonlinearity=tanh,
                    W=GlorotUniform()), (0, 2, 1))
    maxpool_6 = MaxPool1DLayer(conv1d_6, pool_size=pool_size)
    hid_6 = DenseLayer(maxpool_6,
                       num_units=args.hiddenDim,
                       nonlinearity=sigmoid)
    network_6 = DenseLayer(hid_6, num_units=5, nonlinearity=softmax)
    """
    #ContextualAspect
    emb7 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    reshape7 = ReshapeLayer(emb7, (batchsize, seqlen, num_feats*wordDim))
    conv1d_7 = DimshuffleLayer(Conv1DLayer(reshape7, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_7 = MaxPool1DLayer(conv1d_7, pool_size=pool_size)  
    hid_7 = DenseLayer(maxpool_7, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_7 = DenseLayer(hid_7, num_units=4, nonlinearity=softmax)
    """
    """
    #Permanence
    emb8 = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    reshape8 = ReshapeLayer(emb8, (batchsize, seqlen, num_feats*wordDim))
    conv1d_8 = DimshuffleLayer(Conv1DLayer(reshape8, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_8 = MaxPool1DLayer(conv1d_8, pool_size=pool_size)  
    hid_8 = DenseLayer(maxpool_8, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_8 = DenseLayer(hid_8, num_units=4, nonlinearity=softmax)
    """

    # Is this important?
    """
    network_1_out, network_2_out, network_3_out, network_4_out, \
    network_5_out, network_6_out, network_7_out, network_8_out = \
    get_output([network_1, network_2, network_3, network_4, network_5, network_6, network_7, network_8])
    """
    network_1_out = get_output(network_1)
    network_3_out = get_output(network_3)
    network_4_out = get_output(network_4)
    network_5_out = get_output(network_5)
    network_6_out = get_output(network_6)

    loss_1 = T.mean(binary_crossentropy(
        network_1_out, target_var)) + regularize_layer_params_weighted(
            {
                emb1: lambda_val,
                conv1d_1: lambda_val,
                hid_1: lambda_val,
                network_1: lambda_val
            }, l2)
    updates_1 = adagrad(loss_1,
                        get_all_params(network_1, trainable=True),
                        learning_rate=args.step)
    train_fn_1 = theano.function([input_var, target_var],
                                 loss_1,
                                 updates=updates_1,
                                 allow_input_downcast=True)
    val_acc_1 = T.mean(
        binary_accuracy(get_output(network_1, deterministic=True), target_var))
    val_fn_1 = theano.function([input_var, target_var],
                               val_acc_1,
                               allow_input_downcast=True)
    """
    loss_2 = T.mean(categorical_crossentropy(network_2_out,target_var)) + regularize_layer_params_weighted({emb2:lambda_val, conv1d_2:lambda_val, 
                hid_2:lambda_val, network_2:lambda_val} , l2)
    updates_2 = adagrad(loss_2, get_all_params(network_2, trainable=True), learning_rate=args.step)
    train_fn_2 = theano.function([input_var, target_var], loss_2, updates=updates_2, allow_input_downcast=True)
    val_acc_2 =  T.mean(categorical_accuracy(get_output(network_2, deterministic=True), target_var))
    val_fn_2 = theano.function([input_var, target_var], val_acc_2, allow_input_downcast=True)
    """

    loss_3 = T.mean(categorical_crossentropy(
        network_3_out, target_var)) + regularize_layer_params_weighted(
            {
                emb3: lambda_val,
                conv1d_3: lambda_val,
                hid_3: lambda_val,
                network_3: lambda_val
            }, l2)
    updates_3 = adagrad(loss_3,
                        get_all_params(network_3, trainable=True),
                        learning_rate=args.step)
    train_fn_3 = theano.function([input_var, target_var],
                                 loss_3,
                                 updates=updates_3,
                                 allow_input_downcast=True)
    val_acc_3 = T.mean(
        categorical_accuracy(get_output(network_3, deterministic=True),
                             target_var))
    val_fn_3 = theano.function([input_var, target_var],
                               val_acc_3,
                               allow_input_downcast=True)

    loss_4 = T.mean(categorical_crossentropy(
        network_4_out, target_var)) + regularize_layer_params_weighted(
            {
                emb4: lambda_val,
                conv1d_4: lambda_val,
                hid_4: lambda_val,
                network_4: lambda_val
            }, l2)
    updates_4 = adagrad(loss_4,
                        get_all_params(network_4, trainable=True),
                        learning_rate=args.step)
    train_fn_4 = theano.function([input_var, target_var],
                                 loss_4,
                                 updates=updates_4,
                                 allow_input_downcast=True)
    val_acc_4 = T.mean(
        categorical_accuracy(get_output(network_4, deterministic=True),
                             target_var))
    val_fn_4 = theano.function([input_var, target_var],
                               val_acc_4,
                               allow_input_downcast=True)

    loss_5 = T.mean(categorical_crossentropy(
        network_5_out, target_var)) + regularize_layer_params_weighted(
            {
                emb5: lambda_val,
                conv1d_5: lambda_val,
                hid_5: lambda_val,
                network_5: lambda_val
            }, l2)
    updates_5 = adagrad(loss_5,
                        get_all_params(network_5, trainable=True),
                        learning_rate=args.step)
    train_fn_5 = theano.function([input_var, target_var],
                                 loss_5,
                                 updates=updates_5,
                                 allow_input_downcast=True)
    val_acc_5 = T.mean(
        categorical_accuracy(get_output(network_5, deterministic=True),
                             target_var))
    val_fn_5 = theano.function([input_var, target_var],
                               val_acc_5,
                               allow_input_downcast=True)

    loss_6 = T.mean(categorical_crossentropy(
        network_6_out, target_var)) + regularize_layer_params_weighted(
            {
                emb6: lambda_val,
                conv1d_6: lambda_val,
                hid_6: lambda_val,
                network_6: lambda_val
            }, l2)
    updates_6 = adagrad(loss_6,
                        get_all_params(network_6, trainable=True),
                        learning_rate=args.step)
    train_fn_6 = theano.function([input_var, target_var],
                                 loss_6,
                                 updates=updates_6,
                                 allow_input_downcast=True)
    val_acc_6 = T.mean(
        categorical_accuracy(get_output(network_6, deterministic=True),
                             target_var))
    val_fn_6 = theano.function([input_var, target_var],
                               val_acc_6,
                               allow_input_downcast=True)
    """
    loss_7 = T.mean(categorical_crossentropy(network_7_out,target_var)) + regularize_layer_params_weighted({emb7:lambda_val, conv1d_7:lambda_val, 
                hid_7:lambda_val, network_7:lambda_val} , l2)
    updates_7 = adagrad(loss_7, get_all_params(network_7, trainable=True), learning_rate=args.step)
    train_fn_7 = theano.function([input_var, target_var], loss_7, updates=updates_7, allow_input_downcast=True)
    val_acc_7 =  T.mean(categorical_accuracy(get_output(network_7, deterministic=True), target_var))
    val_fn_7 = theano.function([input_var, target_var], val_acc_7, allow_input_downcast=True)

    loss_8 = T.mean(categorical_crossentropy(network_8_out,target_var)) + regularize_layer_params_weighted({emb8:lambda_val, conv1d_8:lambda_val, 
                hid_8:lambda_val, network_8:lambda_val} , l2)
    updates_8 = adagrad(loss_8, get_all_params(network_8, trainable=True), learning_rate=args.step)
    train_fn_8 = theano.function([input_var, target_var], loss_8, updates=updates_8, allow_input_downcast=True)
    val_acc_8 =  T.mean(categorical_accuracy(get_output(network_8, deterministic=True), target_var))
    val_fn_8 = theano.function([input_var, target_var], val_acc_8, allow_input_downcast=True)
    """
    """
    return train_fn_1, val_fn_1, network_1, train_fn_2, val_fn_2, network_2, train_fn_3, val_fn_3, \
            network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \
            train_fn_6, val_fn_6, network_6, train_fn_7, val_fn_7, network_7, train_fn_8, val_fn_8, network_8
    """
    return train_fn_1, val_fn_1, network_1, train_fn_3, val_fn_3, \
            network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \
            train_fn_6, val_fn_6, network_6
Esempio n. 8
0
def compile_update_softmax(nnet, inputs, targets):
    """
    create a softmax loss for network given in argument
    """

    floatX = Cfg.floatX
    C = Cfg.C

    final_layer = nnet.all_layers[-1]
    trainable_params = lasagne.layers.get_all_params(final_layer,
                                                     trainable=True)

    # Regularization
    if Cfg.weight_decay:
        l2_penalty = (floatX(0.5) / C) * get_l2_penalty(nnet)
    else:
        l2_penalty = T.cast(0, dtype='float32')

    # Backpropagation
    prediction = lasagne.layers.get_output(final_layer,
                                           inputs=inputs,
                                           deterministic=False)

    if Cfg.ad_experiment:
        train_loss = T.mean(l_objectives.binary_crossentropy(
            prediction.flatten(), targets),
                            dtype='float32')
        train_acc = T.mean(l_objectives.binary_accuracy(
            prediction.flatten(), targets),
                           dtype='float32')
    else:
        train_loss = T.mean(l_objectives.categorical_crossentropy(
            prediction, targets),
                            dtype='float32')
        train_acc = T.mean(T.eq(T.argmax(prediction, axis=1), targets),
                           dtype='float32')

    train_obj = T.cast(train_loss + l2_penalty, dtype='float32')
    updates = get_updates(nnet,
                          train_obj,
                          trainable_params,
                          solver=nnet.solver)
    nnet.backprop = theano.function([inputs, targets], [train_obj, train_acc],
                                    updates=updates)

    # Forwardpropagation
    test_prediction = lasagne.layers.get_output(final_layer,
                                                inputs=inputs,
                                                deterministic=True)

    if Cfg.ad_experiment:
        test_loss = T.mean(l_objectives.binary_crossentropy(
            test_prediction.flatten(), targets),
                           dtype='float32')
        test_acc = T.mean(l_objectives.binary_accuracy(
            test_prediction.flatten(), targets),
                          dtype='float32')
    else:
        test_loss = T.mean(l_objectives.categorical_crossentropy(
            test_prediction, targets),
                           dtype='float32')
        test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), targets),
                          dtype='float32')

    test_obj = T.cast(test_loss + l2_penalty, dtype='float32')
    nnet.forward = theano.function(
        [inputs, targets],
        [test_obj, test_acc, test_prediction, l2_penalty, test_loss])
def multi_task_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats, lambda_val = 0.5 * 1e-4):

    print("Building multi task model with 1D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    kw = 2
    num_filters = seqlen-kw+1
    stride = 1 
    filter_size=wordDim
    pool_size=num_filters

    input = InputLayer((None, seqlen, num_feats),input_var=input_var)
    batchsize, _, _ = input.input_var.shape
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats*wordDim))


    conv1d_1 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_1 = MaxPool1DLayer(conv1d_1, pool_size=pool_size)  
    hid_1 = DenseLayer(maxpool_1, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_1 = DenseLayer(hid_1, num_units=2, nonlinearity=softmax)


    conv1d_2 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_2 = MaxPool1DLayer(conv1d_2, pool_size=pool_size)  
    hid_2 = DenseLayer(maxpool_2, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_2 = DenseLayer(hid_2, num_units=4, nonlinearity=softmax)

    conv1d_3 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_3 = MaxPool1DLayer(conv1d_3, pool_size=pool_size)  
    hid_3 = DenseLayer(maxpool_3, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_3 = DenseLayer(hid_3, num_units=3, nonlinearity=softmax)

    conv1d_4 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_4 = MaxPool1DLayer(conv1d_4, pool_size=pool_size)  
    hid_4 = DenseLayer(maxpool_4, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_4 = DenseLayer(hid_4, num_units=3, nonlinearity=softmax)

    conv1d_5 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_5 = MaxPool1DLayer(conv1d_5, pool_size=pool_size)  
    hid_5 = DenseLayer(maxpool_5, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_5 = DenseLayer(hid_5, num_units=2, nonlinearity=softmax)

    conv1d_6 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_6 = MaxPool1DLayer(conv1d_6, pool_size=pool_size)  
    hid_6 = DenseLayer(maxpool_6, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_6 = DenseLayer(hid_6, num_units=4, nonlinearity=softmax)


    conv1d_7 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_7 = MaxPool1DLayer(conv1d_7, pool_size=pool_size)  
    hid_7 = DenseLayer(maxpool_7, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_7 = DenseLayer(hid_7, num_units=3, nonlinearity=softmax)

    conv1d_8 = DimshuffleLayer(Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()), (0,2,1))
    maxpool_8 = MaxPool1DLayer(conv1d_8, pool_size=pool_size)  
    hid_8 = DenseLayer(maxpool_8, num_units=args.hiddenDim, nonlinearity=sigmoid)
    network_8 = DenseLayer(hid_8, num_units=3, nonlinearity=softmax)


    # Is this important?
    network_1_out, network_2_out, network_3_out, network_4_out, \
    network_5_out, network_6_out, network_7_out, network_8_out = \
    get_output([network_1, network_2, network_3, network_4, network_5, network_6, network_7, network_8])

    loss_1 = T.mean(binary_crossentropy(network_1_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_1:lambda_val, 
                hid_1:lambda_val, network_1:lambda_val} , l2)
    updates_1 = adagrad(loss_1, get_all_params(network_1, trainable=True), learning_rate=args.step)
    train_fn_1 = theano.function([input_var, target_var], loss_1, updates=updates_1, allow_input_downcast=True)
    val_acc_1 =  T.mean(binary_accuracy(get_output(network_1, deterministic=True), target_var))
    val_fn_1 = theano.function([input_var, target_var], val_acc_1, allow_input_downcast=True)


    loss_2 = T.mean(categorical_crossentropy(network_2_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_2:lambda_val, 
                hid_2:lambda_val, network_2:lambda_val} , l2)
    updates_2 = adagrad(loss_2, get_all_params(network_2, trainable=True), learning_rate=args.step)
    train_fn_2 = theano.function([input_var, target_var], loss_2, updates=updates_2, allow_input_downcast=True)
    val_acc_2 =  T.mean(categorical_accuracy(get_output(network_2, deterministic=True), target_var))
    val_fn_2 = theano.function([input_var, target_var], val_acc_2, allow_input_downcast=True)


    loss_3 = T.mean(categorical_crossentropy(network_3_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_3:lambda_val, 
                hid_3:lambda_val, network_3:lambda_val} , l2)
    updates_3 = adagrad(loss_3, get_all_params(network_3, trainable=True), learning_rate=args.step)
    train_fn_3 = theano.function([input_var, target_var], loss_3, updates=updates_3, allow_input_downcast=True)
    val_acc_3 =  T.mean(categorical_accuracy(get_output(network_3, deterministic=True), target_var))
    val_fn_3 = theano.function([input_var, target_var], val_acc_3, allow_input_downcast=True)


    loss_4 = T.mean(categorical_crossentropy(network_4_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_4:lambda_val, 
                hid_4:lambda_val, network_4:lambda_val} , l2)
    updates_4 = adagrad(loss_4, get_all_params(network_4, trainable=True), learning_rate=args.step)
    train_fn_4 = theano.function([input_var, target_var], loss_4, updates=updates_4, allow_input_downcast=True)
    val_acc_4 =  T.mean(categorical_accuracy(get_output(network_4, deterministic=True), target_var))
    val_fn_4 = theano.function([input_var, target_var], val_acc_4, allow_input_downcast=True)

    loss_5 = T.mean(binary_crossentropy(network_5_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_5:lambda_val, 
                hid_5:lambda_val, network_5:lambda_val} , l2)
    updates_5 = adagrad(loss_5, get_all_params(network_5, trainable=True), learning_rate=args.step)
    train_fn_5 = theano.function([input_var, target_var], loss_5, updates=updates_5, allow_input_downcast=True)
    val_acc_5 =  T.mean(binary_accuracy(get_output(network_5, deterministic=True), target_var))
    val_fn_5 = theano.function([input_var, target_var], val_acc_5, allow_input_downcast=True)

    loss_6 = T.mean(categorical_crossentropy(network_6_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_6:lambda_val, 
                hid_6:lambda_val, network_6:lambda_val} , l2)
    updates_6 = adagrad(loss_6, get_all_params(network_6, trainable=True), learning_rate=args.step)
    train_fn_6 = theano.function([input_var, target_var], loss_6, updates=updates_6, allow_input_downcast=True)
    val_acc_6 =  T.mean(categorical_accuracy(get_output(network_6, deterministic=True), target_var))
    val_fn_6 = theano.function([input_var, target_var], val_acc_6, allow_input_downcast=True)

    loss_7 = T.mean(categorical_crossentropy(network_7_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_7:lambda_val, 
                hid_7:lambda_val, network_7:lambda_val} , l2)
    updates_7 = adagrad(loss_7, get_all_params(network_7, trainable=True), learning_rate=args.step)
    train_fn_7 = theano.function([input_var, target_var], loss_7, updates=updates_7, allow_input_downcast=True)
    val_acc_7 =  T.mean(categorical_accuracy(get_output(network_7, deterministic=True), target_var))
    val_fn_7 = theano.function([input_var, target_var], val_acc_7, allow_input_downcast=True)

    loss_8 = T.mean(categorical_crossentropy(network_8_out,target_var)) + regularize_layer_params_weighted({emb:lambda_val, conv1d_8:lambda_val, 
                hid_8:lambda_val, network_8:lambda_val} , l2)
    updates_8 = adagrad(loss_8, get_all_params(network_8, trainable=True), learning_rate=args.step)
    train_fn_8 = theano.function([input_var, target_var], loss_8, updates=updates_8, allow_input_downcast=True)
    val_acc_8 =  T.mean(categorical_accuracy(get_output(network_8, deterministic=True), target_var))
    val_fn_8 = theano.function([input_var, target_var], val_acc_8, allow_input_downcast=True)


    return train_fn_1, val_fn_1, network_1, train_fn_2, val_fn_2, network_2, train_fn_3, val_fn_3, \
            network_3, train_fn_4, val_fn_4, network_4, train_fn_5, val_fn_5, network_5, \
            train_fn_6, val_fn_6, network_6, train_fn_7, val_fn_7, network_7, train_fn_8, val_fn_8, network_8
def event_span_classifier(args, input_var, input_mask_var, target_var, wordEmbeddings, seqlen):

    print("Building model with LSTM")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    GRAD_CLIP = wordDim

    args.lstmDim = 150

    input = InputLayer((None, seqlen),input_var=input_var)
    batchsize, seqlen = input.input_var.shape
    input_mask = InputLayer((None, seqlen),input_var=input_mask_var)
    
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    #emb.params[emb_1.W].remove('trainable')

    lstm = LSTMLayer(emb, num_units=args.lstmDim, mask_input=input_mask, grad_clipping=GRAD_CLIP,
        nonlinearity=tanh)

    lstm_back = LSTMLayer(
        emb, num_units=args.lstmDim, mask_input=input_mask, grad_clipping=GRAD_CLIP,
        nonlinearity=tanh, backwards=True)

    slice_forward = SliceLayer(lstm, indices=-1, axis=1) # out_shape (None, args.lstmDim)
    slice_backward = SliceLayer(lstm_back, indices=0, axis=1) # out_shape (None, args.lstmDim)

    concat = ConcatLayer([slice_forward, slice_backward])

    hid = DenseLayer(concat, num_units=args.hiddenDim, nonlinearity=sigmoid)

    network = DenseLayer(hid, num_units=2, nonlinearity=softmax)

    prediction = get_output(network)
    
    loss = T.mean(binary_crossentropy(prediction,target_var))
    lambda_val = 0.5 * 1e-4

    layers = {emb:lambda_val, lstm:lambda_val, hid:lambda_val, network:lambda_val} 
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty


    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"
 
    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(binary_crossentropy(test_prediction,target_var))


    train_fn = theano.function([input_var, input_mask_var,target_var], 
        loss, updates=updates, allow_input_downcast=True)

    test_acc = T.mean(binary_accuracy(test_prediction, target_var))

    val_fn = theano.function([input_var, input_mask_var, target_var], [test_loss, test_acc], allow_input_downcast=True)

    return train_fn, val_fn, network
def event_span_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats):

    print("Building model with 1D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    kw = 2
    num_filters = seqlen-kw+1
    stride = 1 

    #important context words as channels
 
    #CNN_sentence config
    filter_size=wordDim
    pool_size=seqlen-filter_size+1

    input = InputLayer((None, seqlen, num_feats),input_var=input_var)
    batchsize, _, _ = input.input_var.shape
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    #emb.params[emb.W].remove('trainable') #(batchsize, seqlen, wordDim)

    #print get_output_shape(emb)
    reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats*wordDim))
    #print get_output_shape(reshape)

    conv1d = Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()) #nOutputFrame = num_flters, 
                                            #nOutputFrameSize = (num_feats*wordDim-filter_size)/stride +1

    #print get_output_shape(conv1d)

    conv1d = DimshuffleLayer(conv1d, (0,2,1))

    #print get_output_shape(conv1d)

    pool_size=num_filters

    maxpool = MaxPool1DLayer(conv1d, pool_size=pool_size) 

    #print get_output_shape(maxpool)
  
    #forward = FlattenLayer(maxpool) 

    #print get_output_shape(forward)
 
    hid = DenseLayer(maxpool, num_units=args.hiddenDim, nonlinearity=sigmoid)

    network = DenseLayer(hid, num_units=2, nonlinearity=softmax)

    prediction = get_output(network)
    
    loss = T.mean(binary_crossentropy(prediction,target_var))
    lambda_val = 0.5 * 1e-4

    layers = {emb:lambda_val, conv1d:lambda_val, hid:lambda_val, network:lambda_val} 
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty


    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"
 
    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(binary_crossentropy(test_prediction,target_var))

    train_fn = theano.function([input_var, target_var], 
        loss, updates=updates, allow_input_downcast=True)

    test_acc = T.mean(binary_accuracy(test_prediction, target_var))
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True)

    return train_fn, val_fn, network
def build_network_2dconv(args, input_var, target_var, wordEmbeddings, maxlen=60):

    print("Building model with 2D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    num_filters = 100
    stride = 1

    # CNN_sentence config
    filter_size = (3, wordDim)
    pool_size = (maxlen - 3 + 1, 1)

    input = InputLayer((None, maxlen), input_var=input_var)
    batchsize, seqlen = input.input_var.shape
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    emb.params[emb.W].remove("trainable")  # (batchsize, maxlen, wordDim)

    reshape = ReshapeLayer(emb, (batchsize, 1, maxlen, wordDim))

    conv2d = Conv2DLayer(
        reshape,
        num_filters=num_filters,
        filter_size=(filter_size),
        stride=stride,
        nonlinearity=rectify,
        W=GlorotUniform(),
    )  # (None, 100, 34, 1)
    maxpool = MaxPool2DLayer(conv2d, pool_size=pool_size)  # (None, 100, 1, 1)

    forward = FlattenLayer(maxpool)  # (None, 100) #(None, 50400)

    hid = DenseLayer(forward, num_units=args.hiddenDim, nonlinearity=sigmoid)

    network = DenseLayer(hid, num_units=2, nonlinearity=softmax)

    prediction = get_output(network)

    loss = T.mean(binary_crossentropy(prediction, target_var))
    lambda_val = 0.5 * 1e-4

    layers = {conv2d: lambda_val, hid: lambda_val, network: lambda_val}
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty

    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"

    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(binary_crossentropy(test_prediction, target_var))

    train_fn = theano.function([input_var, target_var], loss, updates=updates, allow_input_downcast=True)

    test_acc = T.mean(binary_accuracy(test_prediction, target_var))
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True)

    return train_fn, val_fn
Esempio n. 13
0
def event_span_classifier(args, input_var, target_var, wordEmbeddings, seqlen, num_feats):

    print("Building model with 1D Convolution")

    vocab_size = wordEmbeddings.shape[1]
    wordDim = wordEmbeddings.shape[0]

    kw = 2
    num_filters = seqlen-kw+1
    stride = 1 

    #important context words as channels
 
    #CNN_sentence config
    filter_size=wordDim
    pool_size=seqlen-filter_size+1

    input = InputLayer((None, seqlen, num_feats),input_var=input_var)
    batchsize, _, _ = input.input_var.shape
    emb = EmbeddingLayer(input, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T)
    #emb.params[emb.W].remove('trainable') #(batchsize, seqlen, wordDim)

    #print get_output_shape(emb)
    reshape = ReshapeLayer(emb, (batchsize, seqlen, num_feats*wordDim))
    #print get_output_shape(reshape)

    conv1d = Conv1DLayer(reshape, num_filters=num_filters, filter_size=wordDim, stride=1, 
        nonlinearity=tanh,W=GlorotUniform()) #nOutputFrame = num_flters, 
                                            #nOutputFrameSize = (num_feats*wordDim-filter_size)/stride +1

    #print get_output_shape(conv1d)

    conv1d = DimshuffleLayer(conv1d, (0,2,1))

    #print get_output_shape(conv1d)

    pool_size=num_filters

    maxpool = MaxPool1DLayer(conv1d, pool_size=pool_size) 

    #print get_output_shape(maxpool)
  
    #forward = FlattenLayer(maxpool) 

    #print get_output_shape(forward)
 
    hid = DenseLayer(maxpool, num_units=args.hiddenDim, nonlinearity=sigmoid)

    network = DenseLayer(hid, num_units=2, nonlinearity=softmax)

    prediction = get_output(network)
    
    loss = T.mean(binary_crossentropy(prediction,target_var))
    lambda_val = 0.5 * 1e-4

    layers = {emb:lambda_val, conv1d:lambda_val, hid:lambda_val, network:lambda_val} 
    penalty = regularize_layer_params_weighted(layers, l2)
    loss = loss + penalty


    params = get_all_params(network, trainable=True)

    if args.optimizer == "sgd":
        updates = sgd(loss, params, learning_rate=args.step)
    elif args.optimizer == "adagrad":
        updates = adagrad(loss, params, learning_rate=args.step)
    elif args.optimizer == "adadelta":
        updates = adadelta(loss, params, learning_rate=args.step)
    elif args.optimizer == "nesterov":
        updates = nesterov_momentum(loss, params, learning_rate=args.step)
    elif args.optimizer == "rms":
        updates = rmsprop(loss, params, learning_rate=args.step)
    elif args.optimizer == "adam":
        updates = adam(loss, params, learning_rate=args.step)
    else:
        raise "Need set optimizer correctly"
 
    test_prediction = get_output(network, deterministic=True)
    test_loss = T.mean(binary_crossentropy(test_prediction,target_var))

    train_fn = theano.function([input_var, target_var], 
        loss, updates=updates, allow_input_downcast=True)

    test_acc = T.mean(binary_accuracy(test_prediction, target_var))
    val_fn = theano.function([input_var, target_var], [test_loss, test_acc], allow_input_downcast=True)

    return train_fn, val_fn, network
Esempio n. 14
0
avg_pool = GlobalPoolLayer(bn_post_relu)
dense_layer = DenseLayer(avg_pool,
                         num_units=128,
                         W=HeNormal(gain='relu'),
                         nonlinearity=rectify)
dist_layer = ExpressionLayer(
    dense_layer,
    lambda I: T.abs_(I[:I.shape[0] / 2] - I[I.shape[0] / 2:]),
    output_shape='auto')
l_y = DenseLayer(dist_layer, num_units=1, nonlinearity=sigmoid)

prediction = get_output(l_y)
prediction_clean = get_output(l_y, deterministic=True)

loss = T.mean(binary_crossentropy(prediction, y))
accuracy = T.mean(binary_accuracy(prediction_clean, y))

all_layers = get_all_layers(l_y)
l2_penalty = 0.0001 * regularize_layer_params(all_layers,
                                              lasagne.regularization.l2)
loss = loss + l2_penalty

params = get_all_params(l_y, trainable=True)
updates = adam(loss, params, learning_rate=learning_rate)

meta_data["num_param"] = lasagne.layers.count_params(l_y)
print "number of parameters: ", meta_data["num_param"]

print "... compiling"
train_fn = theano.function(inputs=[X, y], outputs=loss, updates=updates)
val_fn = theano.function(inputs=[X, y], outputs=[loss, accuracy])
Esempio n. 15
0
def test_binary_accuracy_invalid():
    from lasagne.objectives import binary_accuracy
    with pytest.raises(TypeError) as exc:
        binary_accuracy(theano.tensor.matrix(), theano.tensor.vector())
    assert 'rank mismatch' in exc.value.args[0]
Esempio n. 16
0
def run_dnn(learning_rate=0.001, dnn_strategy='mix', possitive_punishment=1):
    #input_var = T.TensorType('float32', ((False,) * 3))()        # Notice the () at the end
    input_var = T.ftensor3('X')
    target_var = T.imatrix('y')

    features_type = 16
    perioid = 20
    features_dim = features_type * perioid
    network = build_mix(input_var, 1, features_type, features_dim, perioid, activity=sigmoid)
    if dnn_strategy == 'dnn':
        build_dnn(input_var, 1, features_type, features_dim, perioid, activity=sigmoid)
    elif dnn_strategy == 'conv1d':
        build_conv1d(input_var, 1, features_type, features_dim, perioid, activity=sigmoid)
    elif dnn_strategy == 'cascade':
        build_cascade(input_var, 1, features_type, features_dim, perioid, activity=sigmoid)
    elif dnn_strategy == 'lstm':
        build_lstm(input_var, 1, features_type, features_dim, perioid, activity=sigmoid)
    elif dnn_strategy == 'partitioned':
        build_partitioned(input_var, 1, features_type, features_dim, perioid, activity=sigmoid)
    elif dnn_strategy == 'mix':
        pass
    else:
        raise AttributeError("This dnn_strategy is not supported!")

    l_output = get_output(network)
    loss = self_binary_crossentropy(l_output, target_var, possitive_punishment=possitive_punishment).mean()
    train_acc = binary_accuracy(l_output, target_var).mean()
    all_params = get_all_params(network, trainable=True)
    updates = adagrad(loss, all_params, learning_rate=learning_rate)
    train = theano.function([input_var, target_var], [loss, train_acc], updates=updates)

    test_prediction = get_output(network, deterministic=True)
    test_loss = self_binary_crossentropy(test_prediction, target_var, possitive_punishment=possitive_punishment).mean()
    test_acc = binary_accuracy(test_prediction, target_var).mean()

    #calculate win rate
    win_rate_result1 = []
    win_rate_result2 = []
    for win_rate_threhold in [0.5, 0.6, 0.7, 0.8, 0.9]:
        tmp1 = T.sum(T.switch(T.and_(T.gt(test_prediction, win_rate_threhold), T.eq(target_var, 1)), 1, 0), dtype=theano.config.floatX)
        tmp2 = T.sum(T.switch(T.gt(test_prediction, win_rate_threhold), 1, 0), dtype=theano.config.floatX)
        test_win_rate = (tmp1 + 0.00001) / (tmp2 + 0.00001)
        win_rate_result1.append(test_win_rate)
        win_rate_result2.append(tmp1)

    val = theano.function([input_var, target_var], [test_prediction, test_loss, test_acc, T.as_tensor_variable(win_rate_result1), T.as_tensor_variable(win_rate_result2)])

    _, _, _, _, X_train, y_train, X_val, y_val, _, _ = load_dataset('../../data/800core')
    '''
    test_data_list = []
    test_label_list = []
    for ix in range(103):
        file_name = '../../data/test_dis/data_' + str(ix) + '.txt'
        tmp_test_data, tmp_test_label, _, _, _, _, _, _ = load_dataset(file_name)
        test_data_list.append(tmp_test_data)
        test_label_list.append(tmp_test_label)
    '''

    num_epochs = 150
    batch_size = 128
    for epoch in xrange(num_epochs):
        train_err = 0
        train_acc = 0
        train_batches = 0
        start_time = time.time()

        #train
        for batch in iterate_minibatches(X_train, y_train, batch_size):
            inputs, targets = batch
            err, acc= train(inputs, targets)
            train_err += err
            train_acc += acc
            train_batches += 1

        #validate
        _, val_err, val_acc, val_wr1, val_wr2 = val(X_val, y_val)

        # Then we print the results for this epoch:
        for ix in range(len([0.5, 0.6, 0.7, 0.8, 0.9])):
            sys.stdout.write("  validation win rate :\t\t{}\n".format(val_wr1[ix]))
            sys.stdout.write("  validation possitive num:\t\t{}\n".format(val_wr2[ix]))
        sys.stdout.write("Epoch {} of {} took {:.3f}s\n".format(
            epoch + 1, num_epochs, time.time() - start_time))
        sys.stdout.write("  training loss:\t\t{}\n".format(train_err / train_batches))
        sys.stdout.write("  training accuracy:\t\t{}\n".format(train_acc / train_batches))
        sys.stdout.write("  validation loss:\t\t{}\n".format(val_err/1))
        sys.stdout.write("  validation accuracy:\t\t{} %\n".format(val_acc * 100))
        sys.stdout.write('\n')
        sys.stdout.flush()

        #sotre for gpu
        with open('../../model/' + dnn_strategy + '/' + 'learning_rate' + str(learning_rate) + '_punishment' + str(possitive_punishment) + '_epoch' + str(epoch) + '.model', 'w') as f:
            cPickle.dump(network, f, protocol=cPickle.HIGHEST_PROTOCOL)

    print 'Done!'