Ejemplo n.º 1
0
    Time per epoch on CPU (Core i7): ~150s.
'''

max_features = 3
maxlen = 19200  # cut texts after this number of words (among top max_features most common words)
batch_size = 10
nb_epoch = 1
row_count = 120
column_count = 160
number_of_training_data = 455
DEBUG = False

N_HIDDEN = 20
X_train, y_train, X_val, y_val, X_test, y_test = dt.load_dataset(
    number_of_training_data, row_count, column_count, max_features, DEBUG)
dt.saveImage(y_train[0], "gt.png", row_count, column_count, 3, True)

print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')

print("Pad sequences (samples x time)")
#X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
#X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)
y_train = np.array(y_train)
y_test = np.array(y_test)
print('Build model...')

model = Graph()
model.add_input(name='input', input_shape=(None, max_features), dtype='float')
Ejemplo n.º 2
0
def main(num_epochs=NUM_EPOCHS):
    print("Building network ...")


    l_in = lasagne.layers.InputLayer((None, MAX_LENGTH, 3))
    batchsize, seqlen, _ = l_in.input_var.shape

    l_forward = lasagne.layers.LSTMLayer(
        l_in, N_HIDDEN, grad_clipping=GRAD_CLIP,
        nonlinearity=lasagne.nonlinearities.tanh)

    l_backward = lasagne.layers.LSTMLayer(
        l_in, N_HIDDEN, grad_clipping=GRAD_CLIP,
        nonlinearity=lasagne.nonlinearities.tanh, backwards=True)

    l_recurrent = lasagne.layers.ElemwiseMergeLayer([l_forward, l_backward], T.mul)

    softmax = lasagne.nonlinearities.softmax

    l_reshape = lasagne.layers.ReshapeLayer(l_recurrent,(-1, N_HIDDEN))

    l_drop_out = lasagne.layers.DropoutLayer(l_reshape, p=0.95)

    l_dense = lasagne.layers.DenseLayer(l_drop_out, num_units=1, nonlinearity=lasagne.nonlinearities.tanh)

    l_drop_out_2 = lasagne.layers.DropoutLayer(l_dense, p=0.95)

    #l_drop_out_2 = lasagne.layers.DropoutLayer(l_reshape, p=0.5)

    l_softmax = lasagne.layers.DenseLayer(l_drop_out_2, num_units=2, nonlinearity = softmax)

    l_out = lasagne.layers.ReshapeLayer(l_softmax, (batchsize, seqlen, 2)) 

    # Now, we'll concatenate the outputs to combine them.
    #l_sum = lasagne.layers.ConcatLayer([l_forward, l_backward], 2)

    #l_shp = lasagne.layers.ReshapeLayer(l_sum, (-1, N_HIDDEN))

    # Our output layer is a simple dense connection, with 1 output unit
    #l_final = lasagne.layers.DenseLayer(l_shp, num_units=1, nonlinearity=lasagne.nonlinearities.tanh)
    
    #l_out = lasagne.layers.ReshapeLayer(l_final, (batchsize, seqlen, 1))

    target_values = T.tensor3('target_output')

    # lasagne.layers.get_output produces a variable for the output of the net
    network_output = lasagne.layers.get_output(l_out)

    # The value we care about is the final value produced  for each sequence
    #predicted_values = T.argmax(network_output, axis = 2, keepdims = True)
    predicted_values = network_output

    # Our cost will be mean-squared error
    cost = T.mean((T.argmax(predicted_values, axis = 2, keepdims = True) - target_values)**2)
    #cost = lasagne.objectives.squared_error(T.argmax(predicted_values, axis = 2)+1, target_values).mean()
    #cost = cost.mean()

    acc = T.mean(T.eq(T.argmax(predicted_values, axis = 2, keepdims = True), target_values),
                      dtype=theano.config.floatX)

    # Retrieve all parameters from the network
    all_params = lasagne.layers.get_all_params(l_out)

    # Compute SGD updates for training
    print("Computing updates ...")
    updates = lasagne.updates.nesterov_momentum(cost, all_params, learning_rate=LEARNING_RATE)

    # Theano functions for training and computing cost
    print("Compiling functions ...")
    train = theano.function([l_in.input_var, target_values],
                            cost, updates=updates)
    compute_cost = theano.function(
        [l_in.input_var, target_values], cost)
    compute_acc = theano.function(
        [l_in.input_var, target_values], acc)

    get_out = T.argmax(predicted_values, axis = 2, keepdims = True)
    get_prediction = theano.function([l_in.input_var], get_out)
    get_prediction_2 = theano.function([l_in.input_var], predicted_values)

    # We'll use this "validation set" to periodically check progress
    X_train, y_train, X_val, y_val, X_test, y_test = dt.load_dataset(BATCH_SIZE, row_count, column_count, plane_count, DEBUG)
 
    print("Training ...")
    #print(get_prediction(X_train[0:1]))

    try:
        index = 0 #*len(dt.labels_rev)
        dt.saveImage(y_test[0], "results/y_GT.png",row_count, column_count,  plane_count)
        for epoch in range(num_epochs):
            X = X_train[EPOCH_SIZE*epoch:EPOCH_SIZE*(epoch+1)]
            y = y_train[EPOCH_SIZE*epoch:EPOCH_SIZE*(epoch+1)]
            train(X, y)

            cost_val = compute_cost(X_val, y_val)
            cost_test = compute_acc(X_test, y_test)*100
            #print(y_test[0]) 
            #print(get_prediction(X_test)[0])
            #print(get_prediction_2(X_test)[0]) 
            print("Epoch {} validation cost = {}  test acc = {} %".format(epoch, cost_val, cost_test))

            dt.saveImage(get_prediction(X_test)[0], "results/y_output_{}.png".format(epoch), row_count, column_count,  plane_count, True)

        dt.saveImage(get_prediction(X_test)[0], "results/y_output.png", row_count, column_count,  plane_count, True)

    except KeyboardInterrupt:
        pass
Ejemplo n.º 3
0
    Output after 4 epochs on CPU: ~0.8146
    Time per epoch on CPU (Core i7): ~150s.
'''

max_features = 3
maxlen = 19200  # cut texts after this number of words (among top max_features most common words)
batch_size = 10
nb_epoch = 1
row_count = 120
column_count = 160
number_of_training_data = 455
DEBUG = False 

N_HIDDEN = 20
X_train, y_train, X_val, y_val, X_test, y_test = dt.load_dataset(number_of_training_data, row_count, column_count, max_features, DEBUG)
dt.saveImage(y_train[0], "gt.png", row_count, column_count,  3, True)

print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')

print("Pad sequences (samples x time)")
#X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
#X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)
y_train = np.array(y_train)
y_test = np.array(y_test)
print('Build model...')