Beispiel #1
0
def test_batch_normalization():
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()
    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph,
                                         list_of_test_values=[X, y])
    on_off = tensor.iscalar()
    on_off.tag.test_value = 1
    l1 = relu_layer([X_sym], graph, "proj", proj_dim=5,
                    batch_normalize=True, mode_switch=on_off,
                    random_state=random_state)
    l2 = relu_layer([l1], graph, "proj2", proj_dim=5,
                    batch_normalize=True, mode_switch=on_off,
                    random_state=random_state)
    f = theano.function([X_sym, on_off], [l2], mode="FAST_COMPILE")
    params, grads = get_params_and_grads(graph, l2.mean())
    opt = sgd(params, .1)
    updates = opt.updates(params, grads)
    train_f = theano.function([X_sym, on_off], [l2], mode="FAST_COMPILE",
                              updates=updates)
    valid_f = theano.function([X_sym, on_off], [l2], mode="FAST_COMPILE")
    X1 = random_state.rand(*X.shape)
    X2 = np.vstack([X1, .5 * X1])
    t1 = train_f(X1, 0)[0]
    t2 = valid_f(X1, 1)[0]
    t3 = train_f(X2, 0)[0]
    t4 = valid_f(X1, 1)[0]
    t5 = valid_f(X1, 1)[0]
    assert_almost_equal(t4, t5)
    assert_raises(AssertionError, assert_almost_equal, t2, t4)
Beispiel #2
0
def test_vae():
    minibatch_size = 100
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()

    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)

    l1_enc = relu_layer([X_sym, y_sym], graph, 'l1_enc', proj_dim=20,
                        random_state=random_state)
    mu = linear_layer([l1_enc], graph, 'mu', proj_dim=10,
                      random_state=random_state)
    log_sigma = linear_layer([l1_enc], graph, 'log_sigma', proj_dim=10,
                             random_state=random_state)
    samp = gaussian_log_sample_layer([mu], [log_sigma], graph,
                                     'gaussian_log_sample',
                                     random_state=random_state)
    l1_dec = relu_layer([samp], graph, 'l1_dec', proj_dim=20,
                        random_state=random_state)
    out = sigmoid_layer([l1_dec], graph, 'out', proj_dim=X.shape[1],
                        random_state=random_state)

    kl = gaussian_log_kl([mu], [log_sigma], graph, 'gaussian_kl').mean()
    cost = binary_crossentropy(out, X_sym).mean() + kl
    params, grads = get_params_and_grads(graph, cost)
    learning_rate = 0.001
    opt = sgd(params)
    updates = opt.updates(params, grads, learning_rate)

    train_function = theano.function([X_sym, y_sym], [cost], updates=updates,
                                     mode="FAST_COMPILE")

    iterate_function(train_function, [X, y], minibatch_size,
                     list_of_output_names=["cost"], n_epochs=1)
Beispiel #3
0
y = mnist["target"]
n_targets = 10
y = convert_to_one_hot(y, n_targets)

# graph holds information necessary to build layers from parents
graph = OrderedDict()
X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
# random state so script is deterministic
random_state = np.random.RandomState(1999)

minibatch_size = 20
n_hid = 1000

l1 = relu_layer([X_sym],
                graph,
                'l1',
                proj_dim=n_hid,
                random_state=random_state)
y_pred = softmax_zeros_layer([l1], graph, 'y_pred', proj_dim=n_targets)
nll = categorical_crossentropy(y_pred, y_sym).mean()
weights = get_weights_from_graph(graph)
L2 = sum([(w**2).sum() for w in weights])
cost = nll + .0001 * L2

params, grads = get_params_and_grads(graph, cost)

learning_rate = 1E-4
momentum = 0.95
opt = rmsprop(params, learning_rate, momentum)
updates = opt.updates(params, grads)
Beispiel #4
0
valid_indices = mnist["valid_indices"]
X = mnist["data"]
y = mnist["target"]
n_targets = 10
y = convert_to_one_hot(y, n_targets)

# graph holds information necessary to build layers from parents
graph = OrderedDict()
X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
# random state so script is deterministic
random_state = np.random.RandomState(1999)

minibatch_size = 20
n_hid = 1000

l1 = relu_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state)
y_pred = softmax_zeros_layer([l1], graph, 'y_pred',  proj_dim=n_targets)
nll = categorical_crossentropy(y_pred, y_sym).mean()
weights = get_weights_from_graph(graph)
L2 = sum([(w ** 2).sum() for w in weights])
cost = nll + .0001 * L2


params, grads = get_params_and_grads(graph, cost)

learning_rate = 1E-4
momentum = 0.95
opt = rmsprop(params, learning_rate, momentum)
updates = opt.updates(params, grads)

fit_function = theano.function([X_sym, y_sym], [cost], updates=updates)
Beispiel #5
0
X = mnist["data"]
y = mnist["target"]
n_targets = 10
y = convert_to_one_hot(y, n_targets)

# graph holds information necessary to build layers from parents
graph = OrderedDict()
X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
# random state so script is deterministic
random_state = np.random.RandomState(1999)

minibatch_size = 100
n_hid = 512

# q(y_pred | x)
l1 = relu_layer([X_sym], graph, 'l1', n_hid, random_state)
l2 = relu_layer([l1], graph, 'l2', n_hid, random_state)
y_pred = softmax_layer([l2], graph, 'y_pred', n_targets, random_state)
nll = categorical_crossentropy(y_pred, y_sym).mean()
cost = nll

params, grads = get_params_and_grads(graph, cost)

learning_rate = 0.0002
opt = adam(params)
updates = opt.updates(params, grads, learning_rate)

# Checkpointing
try:
    checkpoint_dict = load_last_checkpoint()
    fit_function = checkpoint_dict["fit_function"]
Beispiel #6
0
y = convert_to_one_hot(y, n_targets)

# graph holds information necessary to build layers from parents
graph = OrderedDict()
X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph,
                                     list_of_test_values=[X[:10], y[:10]])
# random state so script is deterministic
random_state = np.random.RandomState(1999)

minibatch_size = 128
n_hid = 1000

on_off = tensor.iscalar()
on_off.tag.test_value = 0
l1 = relu_layer([X_sym], graph, 'l1', proj_dim=n_hid,
                batch_normalize=True, mode_switch=on_off,
                random_state=random_state)
y_pred = softmax_zeros_layer([l1], graph, 'y_pred',  proj_dim=n_targets)
nll = categorical_crossentropy(y_pred, y_sym).mean()
weights = get_weights_from_graph(graph)
L2 = sum([(w ** 2).sum() for w in weights])
cost = nll + .0001 * L2


params, grads = get_params_and_grads(graph, cost)

learning_rate = 0.1
momentum = 0.9
opt = sgd_nesterov(params, learning_rate, momentum)
updates = opt.updates(params, grads)
Beispiel #7
0
X = mnist["data"]
y = mnist["target"]
n_targets = 10
y = convert_to_one_hot(y, n_targets)

# graph holds information necessary to build layers from parents
graph = OrderedDict()
X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
# random state so script is deterministic
random_state = np.random.RandomState(1999)

minibatch_size = 100
n_hid = 512

# q(y_pred | x)
l1 = relu_layer([X_sym], graph, 'l1', n_hid, random_state)
l2 = relu_layer([l1], graph, 'l2', n_hid, random_state)
y_pred = softmax_layer([l2], graph, 'y_pred',  n_targets, random_state)
nll = categorical_crossentropy(y_pred, y_sym).mean()
cost = nll

params, grads = get_params_and_grads(graph, cost)

learning_rate = 0.0002
opt = adam(params)
updates = opt.updates(params, grads, learning_rate)

# Checkpointing
try:
    checkpoint_dict = load_last_checkpoint()
    fit_function = checkpoint_dict["fit_function"]