Example #1
0
def test_softmax_zeros_layer():
    graph = OrderedDict()
    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
    single_o = softmax_zeros_layer([X_sym], graph, 'single', proj_dim=5)
    concat_o = softmax_zeros_layer([X_sym, y_sym], graph, 'concat', proj_dim=5)
    # Check that things can be reused
    repeated_o = softmax_layer([X_sym], graph, 'single', strict=False)

    # Check that strict mode raises an error if repeated
    assert_raises(AttributeError, softmax_layer, [X_sym], graph, 'concat')

    f = theano.function([X_sym, y_sym], [single_o, concat_o, repeated_o],
                        mode="FAST_COMPILE")
    single, concat, repeat = f(X, y)
    assert_almost_equal(single, repeat)
Example #2
0
# graph holds information necessary to build layers from parents
graph = OrderedDict()
X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
# random state so script is deterministic
random_state = np.random.RandomState(1999)

minibatch_size = 20
n_hid = 1000

l1 = relu_layer([X_sym],
                graph,
                'l1',
                proj_dim=n_hid,
                random_state=random_state)
y_pred = softmax_zeros_layer([l1], graph, 'y_pred', proj_dim=n_targets)
nll = categorical_crossentropy(y_pred, y_sym).mean()
weights = get_weights_from_graph(graph)
L2 = sum([(w**2).sum() for w in weights])
cost = nll + .0001 * L2

params, grads = get_params_and_grads(graph, cost)

learning_rate = 1E-4
momentum = 0.95
opt = rmsprop(params, learning_rate, momentum)
updates = opt.updates(params, grads)

fit_function = theano.function([X_sym, y_sym], [cost], updates=updates)
cost_function = theano.function([X_sym, y_sym], [cost])
predict_function = theano.function([X_sym], [y_pred])
Example #3
0
X = mnist["data"]
y = mnist["target"]
n_targets = 10
y = convert_to_one_hot(y, n_targets)

# graph holds information necessary to build layers from parents
graph = OrderedDict()
X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
# random state so script is deterministic
random_state = np.random.RandomState(1999)

minibatch_size = 20
n_hid = 1000

l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state)
y_pred = softmax_zeros_layer([l1], graph, 'y_pred',  proj_dim=n_targets)
nll = categorical_crossentropy(y_pred, y_sym).mean()
weights = get_weights_from_graph(graph)
L2 = sum([(w ** 2).sum() for w in weights])
cost = nll + .0001 * L2


params, grads = get_params_and_grads(graph, cost)

learning_rate = 1E-4
momentum = 0.95
opt = rmsprop(params, learning_rate, momentum)
updates = opt.updates(params, grads)

fit_function = theano.function([X_sym, y_sym], [cost], updates=updates)
cost_function = theano.function([X_sym, y_sym], [cost])
Example #4
0
def test_loop():
    # graph holds information necessary to build layers from parents
    graph = OrderedDict()
    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
    # random state so script is deterministic
    random_state = np.random.RandomState(1999)

    minibatch_size = 10

    y_pred = softmax_zeros_layer([X_sym], graph, "y_pred", proj_dim=n_targets)
    nll = categorical_crossentropy(y_pred, y_sym).mean()
    weights = get_weights_from_graph(graph)
    cost = nll

    params, grads = get_params_and_grads(graph, cost)

    learning_rate = 0.13
    opt = sgd(params, learning_rate)
    updates = opt.updates(params, grads)

    fit_function = theano.function([X_sym, y_sym], [cost], updates=updates)
    cost_function = theano.function([X_sym, y_sym], [cost])
    predict_function = theano.function([X_sym], [y_pred])

    checkpoint_dict = {
        "fit_function": fit_function,
        "cost_function": cost_function,
        "predict_function": predict_function,
    }

    def error(*args):
        xargs = args[:-1]
        y = args[-1]
        final_args = xargs
        y_pred = predict_function(*final_args)[0]
        return 1 - np.mean((np.argmax(y_pred, axis=1).ravel()) == (np.argmax(y, axis=1).ravel()))

    TL1 = TrainingLoop(
        fit_function,
        error,
        train_indices[:10],
        valid_indices[:10],
        minibatch_size,
        checkpoint_dict=checkpoint_dict,
        list_of_train_output_names=["train_cost"],
        valid_output_name="valid_error",
        n_epochs=1,
        optimizer_object=opt,
    )
    epoch_results1 = TL1.run([X, y])
    TL1.train_indices = train_indices[10:20]
    TL1.valid_indices = valid_indices[10:20]
    epoch_results1 = TL1.run([X, y])

    TL2 = TrainingLoop(
        fit_function,
        error,
        train_indices[:20],
        valid_indices[:20],
        minibatch_size,
        checkpoint_dict=checkpoint_dict,
        list_of_train_output_names=["train_cost"],
        valid_output_name="valid_error",
        n_epochs=1,
        optimizer_object=opt,
    )
    epoch_results2 = TL2.run([X, y])

    r1 = TL1.__dict__["checkpoint_dict"]["previous_results"]["train_cost"][-1]
    r2 = TL2.__dict__["checkpoint_dict"]["previous_results"]["train_cost"][-1]
    assert r1 == r2