def test_feedforward_theano_mix():
    minibatch_size = 100
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()

    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)

    l1_o = linear_layer([X_sym], graph, 'l1', proj_dim=20,
                        random_state=random_state)
    l1_o = .999 * l1_o
    y_pred = softmax_layer([l1_o], graph, 'pred', n_classes,
                           random_state=random_state)

    cost = categorical_crossentropy(y_pred, y_sym).mean()
    params, grads = get_params_and_grads(graph, cost)
    learning_rate = 0.001
    opt = sgd(params)
    updates = opt.updates(params, grads, learning_rate)

    fit_function = theano.function([X_sym, y_sym], [cost], updates=updates,
                                   mode="FAST_COMPILE")

    cost_function = theano.function([X_sym, y_sym], [cost],
                                    mode="FAST_COMPILE")

    checkpoint_dict = {}
    train_indices = np.arange(len(X))
    valid_indices = np.arange(len(X))
    early_stopping_trainer(fit_function, cost_function, checkpoint_dict, [X, y],
                           minibatch_size,
                           train_indices, valid_indices,
                           fit_function_output_names=["cost"],
                           cost_function_output_name="valid_cost",
                           n_epochs=1)
Exemple #2
0
def test_conditional_gru_recurrent():
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()
    n_hid = 5
    n_out = n_chars

    # input (where first dimension is time)
    datasets_list = [X_mb, X_mask, y_mb, y_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph)

    h = gru_recurrent_layer([X_sym], X_mask_sym, n_hid, graph, 'l1_end',
                            random_state)

    shifted_y_sym = shift_layer([y_sym], graph, 'shift')

    h_dec, context = conditional_gru_recurrent_layer([y_sym], [h], y_mask_sym,
                                                     n_hid, graph, 'l2_dec',
                                                     random_state)

    # linear output activation
    y_hat = softmax_layer([h_dec, context, shifted_y_sym], graph, 'l2_proj',
                          n_out, random_state=random_state)

    # error between output and target
    cost = categorical_crossentropy(y_hat, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    # Parameters of the model
    """
    params, grads = get_params_and_grads(graph, cost)

    # Use stochastic gradient descent to optimize
    opt = sgd(params)
    learning_rate = 0.00000
    updates = opt.updates(params, grads, learning_rate)


    fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                   [cost], updates=updates,
                                   mode="FAST_COMPILE")
    """

    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost], mode="FAST_COMPILE")

    checkpoint_dict = {}
    train_indices = np.arange(len(X))
    valid_indices = np.arange(len(X))
    early_stopping_trainer(cost_function, cost_function,
                           train_indices, valid_indices,
                           checkpoint_dict,
                           [X, y],
                           minibatch_size,
                           list_of_minibatch_functions=[text_minibatch_func],
                           list_of_train_output_names=["cost"],
                           valid_output_name="valid_cost",
                           n_epochs=1)
Exemple #3
0
def test_conditional_gru_recurrent():
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()
    n_hid = 5
    n_out = n_chars

    # input (where first dimension is time)
    datasets_list = [X_mb, X_mask, y_mb, y_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph)

    h = gru_recurrent_layer([X_sym], X_mask_sym, n_hid, graph, 'l1_end',
                            random_state)

    shifted_y_sym = shift_layer([y_sym], graph, 'shift')

    h_dec, context = conditional_gru_recurrent_layer([y_sym], [h], y_mask_sym,
                                                     n_hid, graph, 'l2_dec',
                                                     random_state)

    # linear output activation
    y_hat = softmax_layer([h_dec, context, shifted_y_sym], graph, 'l2_proj',
                          n_out, random_state)

    # error between output and target
    cost = categorical_crossentropy(y_hat, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    # Parameters of the model
    """
    params, grads = get_params_and_grads(graph, cost)

    # Use stochastic gradient descent to optimize
    opt = sgd(params)
    learning_rate = 0.00000
    updates = opt.updates(params, grads, learning_rate)


    fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                   [cost], updates=updates,
                                   mode="FAST_COMPILE")
    """

    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost], mode="FAST_COMPILE")

    checkpoint_dict = {}
    train_indices = np.arange(len(X))
    valid_indices = np.arange(len(X))
    early_stopping_trainer(cost_function, cost_function, checkpoint_dict,
                           [X, y],
                           minibatch_size, train_indices, valid_indices,
                           list_of_minibatch_functions=[text_minibatch_func],
                           fit_function_output_names=["cost"],
                           cost_function_output_name="valid_cost",
                           n_epochs=1)
Exemple #4
0
def test_softmax_sample_layer():
    random_state = np.random.RandomState(42)
    graph = OrderedDict()
    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
    softmax = softmax_layer([X_sym], graph, 'softmax', proj_dim=20,
                            random_state=random_state)
    samp = softmax_sample_layer([softmax], graph, 'softmax_sample',
                                random_state=random_state)
    out = linear_layer([samp], graph, 'out', proj_dim=10,
                       random_state=random_state)
    f = theano.function([X_sym], [out], mode="FAST_COMPILE")
Exemple #5
0
def test_softmax_zeros_layer():
    graph = OrderedDict()
    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
    single_o = softmax_zeros_layer([X_sym], graph, 'single', proj_dim=5)
    concat_o = softmax_zeros_layer([X_sym, y_sym], graph, 'concat', proj_dim=5)
    # Check that things can be reused
    repeated_o = softmax_layer([X_sym], graph, 'single', strict=False)

    # Check that strict mode raises an error if repeated
    assert_raises(AttributeError, softmax_layer, [X_sym], graph, 'concat')

    f = theano.function([X_sym, y_sym], [single_o, concat_o, repeated_o],
                        mode="FAST_COMPILE")
    single, concat, repeat = f(X, y)
    assert_almost_equal(single, repeat)
def test_feedforward_classifier():
    minibatch_size = 100
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()

    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)

    l1_o = linear_layer([X_sym], graph, "l1", proj_dim=20, random_state=random_state)
    y_pred = softmax_layer([l1_o], graph, "pred", n_classes, random_state=random_state)

    cost = categorical_crossentropy(y_pred, y_sym).mean()
    params, grads = get_params_and_grads(graph, cost)
    learning_rate = 0.001
    opt = sgd(params)
    updates = opt.updates(params, grads, learning_rate)

    train_function = theano.function([X_sym, y_sym], [cost], updates=updates, mode="FAST_COMPILE")

    iterate_function(train_function, [X, y], minibatch_size, list_of_output_names=["cost"], n_epochs=1)
Exemple #7
0
def test_softmax_sample_layer():
    random_state = np.random.RandomState(42)
    graph = OrderedDict()
    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
    softmax = softmax_layer([X_sym],
                            graph,
                            'softmax',
                            proj_dim=20,
                            random_state=random_state)
    samp = softmax_sample_layer([softmax],
                                graph,
                                'softmax_sample',
                                random_state=random_state)
    out = linear_layer([samp],
                       graph,
                       'out',
                       proj_dim=10,
                       random_state=random_state)
    f = theano.function([X_sym], [out], mode="FAST_COMPILE")
Exemple #8
0
vocab = data["vocabulary"]
train_indices = data["train_indices"]
valid_indices = train_indices

X_mb, X_mb_mask = make_masked_minibatch(X, slice(0, len(X)))
y_mb, y_mb_mask = make_masked_minibatch(y, slice(0, len(y)))

n_hid = 256
n_out = vocab_size + 1

datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask]
names_list = ["X", "X_mask", "y", "y_mask"]
X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(datasets_list, names_list, graph)

h = gru_recurrent_layer([X_sym], X_mask_sym, n_hid, graph, "l1_rec", random_state=random_state)
y_pred = softmax_layer([h], graph, "l2_proj", n_out, random_state=random_state)

cost = log_ctc_cost(y_sym, y_mask_sym, y_pred, X_mask_sym).mean()
params, grads = get_params_and_grads(graph, cost)

opt = adadelta(params)
updates = opt.updates(params, grads)

checkpoint_dict = {}

fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost], updates=updates)
cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost])
predict_function = theano.function([X_sym, X_mask_sym], [y_pred])


def prediction_strings(y_pred):
Exemple #9
0
X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
# random state so script is deterministic
random_state = np.random.RandomState(1999)

minibatch_size = 100
n_code = 100
n_enc_layer = [200, 200, 200]
n_dec_layer = [200, 200]
width = 48
height = 48
n_input = width * height

# q(y_pred | x)
y_l1_enc = softplus_layer([X_sym], graph, 'y_l1_enc', n_enc_layer[0],
                          random_state)
y_pred = softmax_layer([y_l1_enc], graph, 'y_pred', n_targets, random_state)

# partial q(z | x, y_pred)
X_l1_enc = softplus_layer([X_sym, y_pred], graph, 'X_l1_enc', n_enc_layer[1],
                          random_state)

# combined q(y_pred | x) and partial q(z | x) for q(z | x, y_pred)
l2_enc = softplus_layer([X_l1_enc], graph, 'l2_enc', n_enc_layer[2],
                        random_state)

# code layer
code_mu = linear_layer([l2_enc], graph, 'code_mu', n_code, random_state)
code_log_sigma = linear_layer([l2_enc], graph, 'code_log_sigma', n_code,
                              random_state)
kl = gaussian_log_kl([code_mu], [code_log_sigma], graph, 'kl').mean()
samp = gaussian_log_sample_layer([code_mu], [code_log_sigma], graph, 'samp',
Exemple #10
0
datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask]
names_list = ["X", "X_mask", "y", "y_mask"]
X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
    datasets_list, names_list, graph)

l1 = maxout_layer([X_sym], graph, 'l1', n_hid, random_state=random_state)
h = bidirectional_gru_recurrent_layer([l1],
                                      X_mask_sym,
                                      n_hid,
                                      graph,
                                      'l1_rec',
                                      random_state=random_state)
l2 = maxout_layer([h], graph, 'l2', n_hid, random_state=random_state)
y_pred = softmax_layer([l2],
                       graph,
                       'softmax',
                       n_out,
                       random_state=random_state)

cost = log_ctc_cost(y_sym, y_mask_sym, y_pred, X_mask_sym).mean()
params, grads = get_params_and_grads(graph, cost)

opt = adadelta(params)
updates = opt.updates(params, grads)

checkpoint_dict = {}

fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost],
                               updates=updates)
cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost])
predict_function = theano.function([X_sym, X_mask_sym], [y_pred])
Exemple #11
0
X_mb, X_mb_mask = make_masked_minibatch(X, slice(0, minibatch_size))
y_mb, y_mb_mask = make_masked_minibatch(y, slice(0, minibatch_size))

n_hid = 500
n_out = vocab_size + 1

datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask]
names_list = ["X", "X_mask", "y", "y_mask"]
X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
    datasets_list, names_list, graph)

l1 = maxout_layer([X_sym], graph, 'l1', n_hid, random_state=random_state)
h = bidirectional_gru_recurrent_layer([l1], X_mask_sym, n_hid, graph,
                                      'l1_rec', random_state=random_state)
l2 = maxout_layer([h], graph, 'l2', n_hid, random_state=random_state)
y_pred = softmax_layer([l2], graph, 'softmax', n_out, random_state=random_state)

cost = log_ctc_cost(y_sym, y_mask_sym, y_pred, X_mask_sym).mean()
params, grads = get_params_and_grads(graph, cost)

opt = adadelta(params)
updates = opt.updates(params, grads)

checkpoint_dict = {}

fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost],
                               updates=updates)
cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost])
predict_function = theano.function([X_sym, X_mask_sym], [y_pred])

random_state = np.random.RandomState(1999)

minibatch_size = 100
n_code = 100
n_enc_layer = 200
n_dec_layer = 200
width = 28
height = 28
n_input = width * height

# q(y_pred | x)
y_l1_enc = softplus_layer([X_sym], graph, 'y_l1_enc', n_enc_layer,
                          random_state=random_state)
y_l2_enc = softplus_layer([y_l1_enc], graph, 'y_l2_enc',  n_targets,
                          random_state=random_state)
y_pred = softmax_layer([y_l2_enc], graph, 'y_pred',  n_targets,
                       random_state=random_state)

# partial q(z | x)
X_l1_enc = softplus_layer([X_sym], graph, 'X_l1_enc', n_enc_layer,
                          random_state=random_state)
X_l2_enc = softplus_layer([X_l1_enc], graph, 'X_l2_enc', n_enc_layer,
                          random_state=random_state)


# combined q(y_pred | x) and partial q(z | x) for q(z | x, y_pred)
l3_enc = softplus_layer([X_l2_enc, y_pred], graph, 'l3_enc', n_enc_layer,
                        random_state=random_state)
l4_enc = softplus_layer([l3_enc], graph, 'l4_enc', n_enc_layer,
                        random_state=random_state)

# code layer
Exemple #13
0
n_hid = 256
n_out = vocab_size + 1

datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask]
names_list = ["X", "X_mask", "y", "y_mask"]
X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
    datasets_list, names_list, graph)

h = gru_recurrent_layer([X_sym],
                        X_mask_sym,
                        n_hid,
                        graph,
                        'l1_rec',
                        random_state=random_state)
y_pred = softmax_layer([h], graph, 'l2_proj', n_out, random_state=random_state)

cost = log_ctc_cost(y_sym, y_mask_sym, y_pred, X_mask_sym).mean()
params, grads = get_params_and_grads(graph, cost)

opt = adadelta(params)
updates = opt.updates(params, grads)

checkpoint_dict = {}

fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost],
                               updates=updates)
cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost])
predict_function = theano.function([X_sym, X_mask_sym], [y_pred])

Exemple #14
0
n_targets = 10
y = convert_to_one_hot(y, n_targets)

# graph holds information necessary to build layers from parents
graph = OrderedDict()
X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
# random state so script is deterministic
random_state = np.random.RandomState(1999)

minibatch_size = 100
n_hid = 512

# q(y_pred | x)
l1 = relu_layer([X_sym], graph, 'l1', n_hid, random_state)
l2 = relu_layer([l1], graph, 'l2', n_hid, random_state)
y_pred = softmax_layer([l2], graph, 'y_pred',  n_targets, random_state)
nll = categorical_crossentropy(y_pred, y_sym).mean()
cost = nll

params, grads = get_params_and_grads(graph, cost)

learning_rate = 0.0002
opt = adam(params)
updates = opt.updates(params, grads, learning_rate)

# Checkpointing
try:
    checkpoint_dict = load_last_checkpoint()
    fit_function = checkpoint_dict["fit_function"]
    cost_function = checkpoint_dict["cost_function"]
    predict_function = checkpoint_dict["predict_function"]
Exemple #15
0
(X_story_syms, X_query_syms), (X_story_mask_sym, X_query_mask_sym) = r

y_sym = add_datasets_to_graph([y_answer], ["y"], graph)


l1_story = embedding_layer(X_story_syms, vocab_size, n_emb, graph, 'l1_story',
                           random_state=random_state)
masked_story = X_story_mask_sym.dimshuffle(0, 1, 'x') * l1_story
h_story = gru_recurrent_layer([masked_story], X_story_mask_sym, n_hid, graph,
                              'story_rec', random_state)

l1_query = embedding_layer(X_query_syms, vocab_size, n_emb, graph, 'l1_query',
                           random_state)
h_query = gru_recurrent_layer([l1_query], X_query_mask_sym, n_hid, graph,
                              'query_rec', random_state)
y_pred = softmax_layer([h_query[-1], h_story[-1]], graph, 'y_pred',
                       y_answer.shape[1], random_state=random_state)
cost = categorical_crossentropy(y_pred, y_sym).mean()
params, grads = get_params_and_grads(graph, cost)

opt = adadelta(params)
updates = opt.updates(params, grads)
print("Compiling fit...")
fit_function = theano.function(X_story_syms + [X_story_mask_sym] + X_query_syms
                               + [X_query_mask_sym, y_sym], [cost],
                               updates=updates)
print("Compiling cost...")
cost_function = theano.function(X_story_syms + [X_story_mask_sym] + X_query_syms
                                + [X_query_mask_sym, y_sym], [cost])
print("Compiling predict...")
predict_function = theano.function(X_story_syms + [X_story_mask_sym] +
                                   X_query_syms + [X_query_mask_sym], [y_pred])
Exemple #16
0
# graph holds information necessary to build layers from parents
graph = OrderedDict()
X_sym, y_sym = add_datasets_to_graph([X[:minibatch_size], y[:minibatch_size]],
                                     ["X", "y"], graph)
# random state so script is deterministic
random_state = np.random.RandomState(1999)

l1 = conv2d_layer([X_sym], graph, 'conv1', 8, random_state=random_state)
l2 = pool2d_layer([l1], graph, 'pool1')
l3 = conv2d_layer([l2], graph, 'conv2', 16, random_state=random_state)
l4 = pool2d_layer([l3], graph, 'pool2')
l5 = l4.reshape((l4.shape[0], -1))
y_pred = softmax_layer([l5],
                       graph,
                       'y_pred',
                       n_targets,
                       random_state=random_state)
nll = categorical_crossentropy(y_pred, y_sym).mean()
cost = nll

params, grads = get_params_and_grads(graph, cost)

learning_rate = 0.001
momentum = 0.9
opt = sgd_nesterov(params, learning_rate, momentum)
updates = opt.updates(params, grads)

fit_function = theano.function([X_sym, y_sym], [cost], updates=updates)
cost_function = theano.function([X_sym, y_sym], [cost])
predict_function = theano.function([X_sym], [y_pred])
Exemple #17
0
(X_story_syms, X_query_syms), (X_story_mask_sym, X_query_mask_sym) = r

y_sym = add_datasets_to_graph([y_answer], ["y"], graph)


l1_story = embedding_layer(X_story_syms, vocab_size, n_emb, graph, 'l1_story',
                           random_state)
masked_story = X_story_mask_sym.dimshuffle(0, 1, 'x') * l1_story
h_story = gru_recurrent_layer([masked_story], X_story_mask_sym, n_hid, graph,
                              'story_rec', random_state)

l1_query = embedding_layer(X_query_syms, vocab_size, n_emb, graph, 'l1_query',
                           random_state)
h_query = gru_recurrent_layer([l1_query], X_query_mask_sym, n_hid, graph,
                              'query_rec', random_state)
y_pred = softmax_layer([h_query[-1], h_story[-1]], graph, 'y_pred',
                       y_answer.shape[1], random_state)
cost = categorical_crossentropy(y_pred, y_sym).mean()
params, grads = get_params_and_grads(graph, cost)

opt = adam(params)
learning_rate = 0.001
updates = opt.updates(params, grads, learning_rate)
print("Compiling fit...")
fit_function = theano.function(X_story_syms + [X_story_mask_sym] + X_query_syms
                               + [X_query_mask_sym, y_sym], [cost],
                               updates=updates)
print("Compiling cost...")
cost_function = theano.function(X_story_syms + [X_story_mask_sym] + X_query_syms
                                + [X_query_mask_sym, y_sym], [cost])
print("Compiling predict...")
predict_function = theano.function(X_story_syms + [X_story_mask_sym] +