Ejemplo n.º 1
0
def test_vae():
    minibatch_size = 100
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()

    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)

    l1_enc = relu_layer([X_sym, y_sym], graph, 'l1_enc', proj_dim=20,
                        random_state=random_state)
    mu = linear_layer([l1_enc], graph, 'mu', proj_dim=10,
                      random_state=random_state)
    log_sigma = linear_layer([l1_enc], graph, 'log_sigma', proj_dim=10,
                             random_state=random_state)
    samp = gaussian_log_sample_layer([mu], [log_sigma], graph,
                                     'gaussian_log_sample',
                                     random_state=random_state)
    l1_dec = relu_layer([samp], graph, 'l1_dec', proj_dim=20,
                        random_state=random_state)
    out = sigmoid_layer([l1_dec], graph, 'out', proj_dim=X.shape[1],
                        random_state=random_state)

    kl = gaussian_log_kl([mu], [log_sigma], graph, 'gaussian_kl').mean()
    cost = binary_crossentropy(out, X_sym).mean() + kl
    params, grads = get_params_and_grads(graph, cost)
    learning_rate = 0.001
    opt = sgd(params)
    updates = opt.updates(params, grads, learning_rate)

    train_function = theano.function([X_sym, y_sym], [cost], updates=updates,
                                     mode="FAST_COMPILE")

    iterate_function(train_function, [X, y], minibatch_size,
                     list_of_output_names=["cost"], n_epochs=1)
Ejemplo n.º 2
0
def test_tanh_rnn():
    # random state so script is deterministic
    random_state = np.random.RandomState(1999)
    # home of the computational graph
    graph = OrderedDict()

    # number of hidden features
    n_hid = 10
    # number of output_features = input_features
    n_out = X.shape[-1]

    # input (where first dimension is time)
    datasets_list = [X, X_mask, y, y_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    test_values_list = [X, X_mask, y, y_mask]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph, list_of_test_values=test_values_list)

    # Setup weights
    l1 = linear_layer([X_sym], graph, 'l1_proj', n_hid, random_state)

    h = tanh_recurrent_layer([l1], X_mask_sym, n_hid, graph, 'l1_rec',
                             random_state)

    # linear output activation
    y_hat = linear_layer([h], graph, 'l2_proj', n_out, random_state)

    # error between output and target
    cost = squared_error(y_hat, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    # Parameters of the model
    params, grads = get_params_and_grads(graph, cost)

    # Use stochastic gradient descent to optimize
    opt = sgd(params)
    learning_rate = 0.001
    updates = opt.updates(params, grads, learning_rate)

    fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                   [cost],
                                   updates=updates,
                                   mode="FAST_COMPILE")

    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost],
                                    mode="FAST_COMPILE")
    checkpoint_dict = {}
    train_indices = np.arange(X.shape[1])
    valid_indices = np.arange(X.shape[1])
    early_stopping_trainer(fit_function,
                           cost_function,
                           checkpoint_dict, [X, y],
                           minibatch_size,
                           train_indices,
                           valid_indices,
                           fit_function_output_names=["cost"],
                           cost_function_output_name="valid_cost",
                           n_epochs=1)
Ejemplo n.º 3
0
def test_tanh_rnn():
    # random state so script is deterministic
    random_state = np.random.RandomState(1999)
    # home of the computational graph
    graph = OrderedDict()

    # number of hidden features
    n_hid = 10
    # number of output_features = input_features
    n_out = X.shape[-1]

    # input (where first dimension is time)
    datasets_list = [X, X_mask, y, y_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    test_values_list = [X, X_mask, y, y_mask]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph, list_of_test_values=test_values_list)

    # Setup weights
    l1 = linear_layer([X_sym], graph, 'l1_proj', proj_dim=n_hid,
                      random_state=random_state)

    h = tanh_recurrent_layer([l1], X_mask_sym, n_hid, graph, 'l1_rec',
                             random_state)

    # linear output activation
    y_hat = linear_layer([h], graph, 'l2_proj', proj_dim=n_out,
                         random_state=random_state)

    # error between output and target
    cost = squared_error(y_hat, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    # Parameters of the model
    params, grads = get_params_and_grads(graph, cost)

    # Use stochastic gradient descent to optimize
    learning_rate = 0.001
    opt = sgd(params, learning_rate)
    updates = opt.updates(params, grads)

    fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                   [cost], updates=updates, mode="FAST_COMPILE")

    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost], mode="FAST_COMPILE")
    checkpoint_dict = {}
    train_indices = np.arange(X.shape[1])
    valid_indices = np.arange(X.shape[1])
    early_stopping_trainer(fit_function, cost_function,
                           train_indices, valid_indices,
                           checkpoint_dict,
                           [X, y], minibatch_size,
                           list_of_train_output_names=["cost"],
                           valid_output_name="valid_cost",
                           n_epochs=1)
Ejemplo n.º 4
0
def test_gaussian_sample_layer():
    random_state = np.random.RandomState(42)
    graph = OrderedDict()
    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
    mu = linear_layer([X_sym], graph, 'mu', proj_dim=20,
                      random_state=random_state)
    sigma = softplus_layer([X_sym], graph, 'sigma', proj_dim=20,
                           random_state=random_state)
    samp = gaussian_sample_layer([mu], [sigma], graph, 'gaussian_sample',
                                 random_state=random_state)
    out = linear_layer([samp], graph, 'out', proj_dim=10,
                       random_state=random_state)
    f = theano.function([X_sym], [out], mode="FAST_COMPILE")
Ejemplo n.º 5
0
def test_feedforward_theano_mix():
    minibatch_size = 100
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()

    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)

    l1_o = linear_layer([X_sym], graph, 'l1', proj_dim=20,
                        random_state=random_state)
    l1_o = .999 * l1_o
    y_pred = softmax_layer([l1_o], graph, 'pred', n_classes,
                           random_state=random_state)

    cost = categorical_crossentropy(y_pred, y_sym).mean()
    params, grads = get_params_and_grads(graph, cost)
    learning_rate = 0.001
    opt = sgd(params)
    updates = opt.updates(params, grads, learning_rate)

    fit_function = theano.function([X_sym, y_sym], [cost], updates=updates,
                                   mode="FAST_COMPILE")

    cost_function = theano.function([X_sym, y_sym], [cost],
                                    mode="FAST_COMPILE")

    checkpoint_dict = {}
    train_indices = np.arange(len(X))
    valid_indices = np.arange(len(X))
    early_stopping_trainer(fit_function, cost_function, checkpoint_dict, [X, y],
                           minibatch_size,
                           train_indices, valid_indices,
                           fit_function_output_names=["cost"],
                           cost_function_output_name="valid_cost",
                           n_epochs=1)
Ejemplo n.º 6
0
def test_vae():
    minibatch_size = 10
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()

    X_sym = add_datasets_to_graph([X], ["X"], graph)

    l1_enc = softplus_layer([X_sym], graph, 'l1_enc', proj_dim=100,
                            random_state=random_state)
    mu = linear_layer([l1_enc], graph, 'mu', proj_dim=50,
                      random_state=random_state)
    log_sigma = linear_layer([l1_enc], graph, 'log_sigma', proj_dim=50,
                             random_state=random_state)
    samp = gaussian_log_sample_layer([mu], [log_sigma], graph,
                                     'gaussian_log_sample',
                                     random_state=random_state)
    l1_dec = softplus_layer([samp], graph, 'l1_dec', proj_dim=100,
                            random_state=random_state)
    out = sigmoid_layer([l1_dec], graph, 'out', proj_dim=X.shape[1],
                        random_state=random_state)

    kl = gaussian_log_kl([mu], [log_sigma], graph, 'gaussian_kl').mean()
    cost = binary_crossentropy(out, X_sym).mean() + kl
    params, grads = get_params_and_grads(graph, cost)
    learning_rate = 0.00000
    opt = sgd(params, learning_rate)
    updates = opt.updates(params, grads)

    fit_function = theano.function([X_sym], [cost], updates=updates,
                                   mode="FAST_COMPILE")

    cost_function = theano.function([X_sym], [cost],
                                    mode="FAST_COMPILE")

    checkpoint_dict = {}
    train_indices = np.arange(len(X))
    valid_indices = np.arange(len(X))
    early_stopping_trainer(fit_function, cost_function,
                           train_indices, valid_indices,
                           checkpoint_dict, [X],
                           minibatch_size,
                           list_of_train_output_names=["cost"],
                           valid_output_name="valid_cost",
                           n_epochs=1)
Ejemplo n.º 7
0
def test_softmax_sample_layer():
    random_state = np.random.RandomState(42)
    graph = OrderedDict()
    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
    softmax = softmax_layer([X_sym], graph, 'softmax', proj_dim=20,
                            random_state=random_state)
    samp = softmax_sample_layer([softmax], graph, 'softmax_sample',
                                random_state=random_state)
    out = linear_layer([samp], graph, 'out', proj_dim=10,
                       random_state=random_state)
    f = theano.function([X_sym], [out], mode="FAST_COMPILE")
Ejemplo n.º 8
0
def test_gaussian_log_sample_layer():
    random_state = np.random.RandomState(42)
    graph = OrderedDict()
    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
    mu = linear_layer([X_sym],
                      graph,
                      'mu',
                      proj_dim=20,
                      random_state=random_state)
    log_sigma = linear_layer([X_sym],
                             graph,
                             'log_sigma',
                             proj_dim=20,
                             random_state=random_state)
    samp = gaussian_log_sample_layer([mu], [log_sigma],
                                     graph,
                                     'gaussian_sample',
                                     random_state=random_state)
    out = linear_layer([samp],
                       graph,
                       'out',
                       proj_dim=10,
                       random_state=random_state)
    f = theano.function([X_sym], [out], mode="FAST_COMPILE")
Ejemplo n.º 9
0
def test_feedforward_classifier():
    minibatch_size = 100
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()

    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)

    l1_o = linear_layer([X_sym], graph, "l1", proj_dim=20, random_state=random_state)
    y_pred = softmax_layer([l1_o], graph, "pred", n_classes, random_state=random_state)

    cost = categorical_crossentropy(y_pred, y_sym).mean()
    params, grads = get_params_and_grads(graph, cost)
    learning_rate = 0.001
    opt = sgd(params)
    updates = opt.updates(params, grads, learning_rate)

    train_function = theano.function([X_sym, y_sym], [cost], updates=updates, mode="FAST_COMPILE")

    iterate_function(train_function, [X, y], minibatch_size, list_of_output_names=["cost"], n_epochs=1)
Ejemplo n.º 10
0
def test_embedding_layer():
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()
    max_index = 100
    proj_dim = 12
    fake_str_int = [[1, 5, 7, 1, 6, 2], [2, 3, 6, 2], [3, 3, 3, 3, 3, 3, 3]]
    minibatch, mask = make_embedding_minibatch(fake_str_int, slice(0, 3))
    (emb_slices, ), (emb_mask, ) = add_embedding_datasets_to_graph([minibatch],
                                                                   [mask],
                                                                   "emb",
                                                                   graph)
    emb = embedding_layer(emb_slices, max_index, proj_dim, graph, 'emb',
                          random_state)
    followup_dim = 17
    proj = linear_layer([emb], graph, 'proj', followup_dim, random_state)
    f = theano.function(emb_slices, [proj], mode="FAST_COMPILE")
    out, = f(*minibatch)
    assert (out.shape[-1] == 17)
    assert (out.shape[-2] == len(fake_str_int))
Ejemplo n.º 11
0
def test_softmax_sample_layer():
    random_state = np.random.RandomState(42)
    graph = OrderedDict()
    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
    softmax = softmax_layer([X_sym],
                            graph,
                            'softmax',
                            proj_dim=20,
                            random_state=random_state)
    samp = softmax_sample_layer([softmax],
                                graph,
                                'softmax_sample',
                                random_state=random_state)
    out = linear_layer([samp],
                       graph,
                       'out',
                       proj_dim=10,
                       random_state=random_state)
    f = theano.function([X_sym], [out], mode="FAST_COMPILE")
Ejemplo n.º 12
0
def test_embedding_layer():
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()
    max_index = 100
    proj_dim = 12
    fake_str_int = [[1, 5, 7, 1, 6, 2], [2, 3, 6, 2], [3, 3, 3, 3, 3, 3, 3]]
    minibatch, mask = make_embedding_minibatch(
        fake_str_int, slice(0, 3))
    (emb_slices,), (emb_mask,) = add_embedding_datasets_to_graph(
        [minibatch], [mask], "emb", graph)
    emb = embedding_layer(emb_slices, max_index, proj_dim, graph,
                          'emb', random_state)
    followup_dim = 17
    proj = linear_layer([emb], graph, 'proj', followup_dim,
                        random_state=random_state)
    f = theano.function(emb_slices, [proj], mode="FAST_COMPILE")
    out, = f(*minibatch)
    assert(out.shape[-1] == 17)
    assert(out.shape[-2] == len(fake_str_int))
Ejemplo n.º 13
0
def test_fixed_projection_layer():
    random_state = np.random.RandomState(1999)
    rand_projection = random_state.randn(64, 12)

    graph = OrderedDict()
    X_sym = add_datasets_to_graph([X], ["X"], graph)
    out = fixed_projection_layer([X_sym], rand_projection, graph, 'proj')
    out2 = fixed_projection_layer([X_sym],
                                  rand_projection,
                                  graph,
                                  'proj',
                                  pre=rand_projection[:, 0])
    out3 = fixed_projection_layer([X_sym],
                                  rand_projection,
                                  graph,
                                  'proj',
                                  post=rand_projection[0])
    final = linear_layer([out2],
                         graph,
                         'linear',
                         17,
                         random_state=random_state)
    # Test that it compiles with and without bias
    f = theano.function([X_sym], [out, out2, out3, final], mode="FAST_COMPILE")

    # Test updates
    params, grads = get_params_and_grads(graph, final.mean())
    opt = sgd(params)
    updates = opt.updates(params, grads, .1)
    f2 = theano.function([X_sym], [out2, final], updates=updates)
    ret = f(np.ones_like(X))[0]
    assert ret.shape[1] != X.shape[1]
    ret2 = f(np.ones_like(X))[1]
    assert ret.shape[1] != X.shape[1]
    out1, final1 = f2(X)
    out2, final2 = f2(X)

    # Make sure fixed basis is unchanged
    assert_almost_equal(out1, out2)

    # Make sure linear layer is updated
    assert_raises(AssertionError, assert_almost_equal, final1, final2)
Ejemplo n.º 14
0
def test_fixed_projection_layer():
    random_state = np.random.RandomState(1999)
    rand_projection = random_state.randn(64, 12)

    graph = OrderedDict()
    X_sym = add_datasets_to_graph([X], ["X"], graph)
    out = fixed_projection_layer([X_sym], rand_projection,
                                 graph, 'proj')
    out2 = fixed_projection_layer([X_sym], rand_projection,
                                  graph, 'proj',
                                  pre=rand_projection[:, 0])
    out3 = fixed_projection_layer([X_sym], rand_projection,
                                  graph, 'proj',
                                  post=rand_projection[0])
    final = linear_layer([out2], graph, 'linear', 17,
                         random_state=random_state)
    # Test that it compiles with and without bias
    f = theano.function([X_sym], [out, out2, out3, final],
                        mode="FAST_COMPILE")

    # Test updates
    params, grads = get_params_and_grads(
        graph, final.mean())
    opt = sgd(params, .1)
    updates = opt.updates(params, grads)
    f2 = theano.function([X_sym], [out2, final],
                         updates=updates)
    ret = f(np.ones_like(X))[0]
    assert ret.shape[1] != X.shape[1]
    ret2 = f(np.ones_like(X))[1]
    assert ret.shape[1] != X.shape[1]
    out1, final1 = f2(X)
    out2, final2 = f2(X)

    # Make sure fixed basis is unchanged
    assert_almost_equal(out1, out2)

    # Make sure linear layer is updated
    assert_raises(AssertionError, assert_almost_equal, final1, final2)
Ejemplo n.º 15
0
def test_vae():
    minibatch_size = 10
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()

    X_sym = add_datasets_to_graph([X], ["X"], graph)

    l1_enc = softplus_layer([X_sym],
                            graph,
                            'l1_enc',
                            proj_dim=100,
                            random_state=random_state)
    mu = linear_layer([l1_enc],
                      graph,
                      'mu',
                      proj_dim=50,
                      random_state=random_state)
    log_sigma = linear_layer([l1_enc],
                             graph,
                             'log_sigma',
                             proj_dim=50,
                             random_state=random_state)
    samp = gaussian_log_sample_layer([mu], [log_sigma],
                                     graph,
                                     'gaussian_log_sample',
                                     random_state=random_state)
    l1_dec = softplus_layer([samp],
                            graph,
                            'l1_dec',
                            proj_dim=100,
                            random_state=random_state)
    out = sigmoid_layer([l1_dec],
                        graph,
                        'out',
                        proj_dim=X.shape[1],
                        random_state=random_state)

    kl = gaussian_log_kl([mu], [log_sigma], graph, 'gaussian_kl').mean()
    cost = binary_crossentropy(out, X_sym).mean() + kl
    params, grads = get_params_and_grads(graph, cost)
    learning_rate = 0.00000
    opt = sgd(params)
    updates = opt.updates(params, grads, learning_rate)

    fit_function = theano.function([X_sym], [cost],
                                   updates=updates,
                                   mode="FAST_COMPILE")

    cost_function = theano.function([X_sym], [cost], mode="FAST_COMPILE")

    checkpoint_dict = {}
    train_indices = np.arange(len(X))
    valid_indices = np.arange(len(X))
    early_stopping_trainer(fit_function,
                           cost_function,
                           checkpoint_dict, [X],
                           minibatch_size,
                           train_indices,
                           valid_indices,
                           fit_function_output_names=["cost"],
                           cost_function_output_name="valid_cost",
                           n_epochs=1)
Ejemplo n.º 16
0
valid_indices = np.arange(len(sine_y))
X = sine_x
y = sine_y

# graph holds information necessary to build layers from parents
graph = OrderedDict()
X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
# random state so script is deterministic
random_state = np.random.RandomState(1999)

minibatch_size = len(sine_y)
n_hid = 20
n_out = 1

l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state)
y_pred = linear_layer([l1], graph, 'y_pred',  proj_dim=n_out,
                      random_state=random_state)
cost = ((y_pred - y_sym) ** 2).mean()
# Can also define cost this way using dagbldr
# cost = squared_error(y_pred, y_sym).mean()
params, grads = get_params_and_grads(graph, cost)

learning_rate = 1E-3
momentum = 0.8
opt = rmsprop(params, learning_rate, momentum)
updates = opt.updates(params, grads)

fit_function = theano.function([X_sym, y_sym], [cost], updates=updates)
cost_function = theano.function([X_sym, y_sym], [cost])
predict_function = theano.function([X_sym], [y_pred])

checkpoint_dict = create_checkpoint_dict(locals())
Ejemplo n.º 17
0
# random state so script is deterministic
random_state = np.random.RandomState(1999)

minibatch_size = 100
n_code = 400
n_enc_layer = [600, 600]
n_dec_layer = [600, 600]
width = 48
height = 48
n_input = width * height

# encode path aka q
l1_enc = softplus_layer([X_sym], graph, 'l1_enc', n_enc_layer[0], random_state)
l2_enc = softplus_layer([l1_enc], graph, 'l2_enc',  n_enc_layer[1],
                        random_state)
code_mu = linear_layer([l2_enc], graph, 'code_mu', n_code, random_state)
code_log_sigma = linear_layer([l2_enc], graph, 'code_log_sigma', n_code,
                              random_state)
kl = gaussian_log_kl([code_mu], [code_log_sigma], graph, 'kl').mean()
samp = gaussian_log_sample_layer([code_mu], [code_log_sigma], graph, 'samp',
                                 random_state)

# decode path aka p
l1_dec = softplus_layer([samp], graph, 'l1_dec',  n_dec_layer[0], random_state)
l2_dec = softplus_layer([l1_dec], graph, 'l2_dec', n_dec_layer[1], random_state)
out = linear_layer([l2_dec], graph, 'out', n_input, random_state)

nll = squared_error(out, X_sym).mean()
# log p(x) = -nll so swap sign
# want to minimize cost in optimization so multiply by -1
cost = -1 * (-nll - kl)
Ejemplo n.º 18
0
# random state so script is deterministic
random_state = np.random.RandomState(1999)

minibatch_size = 100
n_code = 100
n_enc_layer = [200, 200]
n_dec_layer = [200, 200]
width = 28
height = 28
n_input = width * height

# encode path aka q
l1_enc = softplus_layer([X_sym], graph, 'l1_enc', n_enc_layer[0], random_state)
l2_enc = softplus_layer([l1_enc], graph, 'l2_enc',  n_enc_layer[1],
                        random_state)
code_mu = linear_layer([l2_enc], graph, 'code_mu', n_code, random_state)
code_log_sigma = linear_layer([l2_enc], graph, 'code_log_sigma', n_code,
                              random_state)
kl = gaussian_log_kl([code_mu], [code_log_sigma], graph, 'kl').mean()
samp = gaussian_log_sample_layer([code_mu], [code_log_sigma], graph, 'samp',
                                 random_state)

# decode path aka p
l1_dec = softplus_layer([samp], graph, 'l1_dec',  n_dec_layer[0], random_state)
l2_dec = softplus_layer([l1_dec], graph, 'l2_dec', n_dec_layer[1], random_state)
out = sigmoid_layer([l2_dec], graph, 'out', n_input, random_state)

nll = binary_crossentropy(out, X_sym).mean()
# log p(x) = -nll so swap sign
# want to minimize cost in optimization so multiply by -1
cost = -1 * (-nll - kl)
Ejemplo n.º 19
0
# combined q(y_pred | x) and partial q(z | x) for q(z | x, y_pred)
l3_enc = softplus_layer([X_l2_enc, y_pred],
                        graph,
                        'l3_enc',
                        n_enc_layer,
                        random_state=random_state)
l4_enc = softplus_layer([l3_enc],
                        graph,
                        'l4_enc',
                        n_enc_layer,
                        random_state=random_state)

# code layer
code_mu = linear_layer([l4_enc],
                       graph,
                       'code_mu',
                       n_code,
                       random_state=random_state)
code_log_sigma = linear_layer([l4_enc],
                              graph,
                              'code_log_sigma',
                              n_code,
                              random_state=random_state)
kl = gaussian_log_kl([code_mu], [code_log_sigma], graph, 'kl').mean()
samp = gaussian_log_sample_layer([code_mu], [code_log_sigma], graph, 'samp',
                                 random_state)

# decode path aka p(x | z, y) for labeled data
l1_dec = softplus_layer([samp, y_sym],
                        graph,
                        'l1_dec',