Beispiel #1
0
def test_rnn_correlated_mixture_density():
    # graph holds information necessary to build layers from parents
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()
    minibatch_size = 5
    X_seq = np.array([bernoulli_X for i in range(minibatch_size)])
    y_seq = np.array([bernoulli_y for i in range(minibatch_size)])
    X_mb, X_mb_mask = make_masked_minibatch(X_seq, slice(0, minibatch_size))
    y_mb, y_mb_mask = make_masked_minibatch(y_seq, slice(0, minibatch_size))
    datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph)
    n_hid = 5
    train_indices = np.arange(len(X_seq))
    valid_indices = np.arange(len(X_seq))

    l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid,
                    random_state=random_state)
    h = gru_recurrent_layer([l1], X_mask_sym, n_hid, graph, 'l1_rec',
                            random_state=random_state)
    rval = bernoulli_and_correlated_log_gaussian_mixture_layer(
        [h], graph, 'hw', proj_dim=2, n_components=3,
        random_state=random_state)
    binary, coeffs, mus, log_sigmas, corr = rval
    cost = bernoulli_and_correlated_log_gaussian_mixture_cost(
        binary, coeffs, mus, log_sigmas, corr, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost],
                                    mode="FAST_COMPILE")

    checkpoint_dict = create_checkpoint_dict(locals())

    epoch_results = fixed_n_epochs_trainer(
        cost_function, cost_function, train_indices, valid_indices,
        checkpoint_dict, [X_seq, y_seq],
        minibatch_size,
        list_of_minibatch_functions=[make_masked_minibatch,
                                     make_masked_minibatch],
        list_of_train_output_names=["train_cost"],
        valid_output_name="valid_cost",
        n_epochs=1)
Beispiel #2
0
def test_correlated_mixture_density():
    # graph holds information necessary to build layers from parents
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()
    X_sym, y_sym = add_datasets_to_graph([bernoulli_X, bernoulli_y], ["X", "y"],
                                         graph)
    n_hid = 20
    minibatch_size = len(bernoulli_X)
    train_indices = np.arange(len(bernoulli_X))
    valid_indices = np.arange(len(bernoulli_X))

    l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid,
                    random_state=random_state)
    rval = bernoulli_and_correlated_log_gaussian_mixture_layer(
        [l1], graph, 'hw', proj_dim=2, n_components=3,
        random_state=random_state)
    binary, coeffs, mus, log_sigmas, corr = rval
    cost = bernoulli_and_correlated_log_gaussian_mixture_cost(
        binary, coeffs, mus, log_sigmas, corr, y_sym).mean()
    params, grads = get_params_and_grads(graph, cost)

    learning_rate = 1E-6
    opt = sgd(params, learning_rate)
    updates = opt.updates(params, grads)

    fit_function = theano.function([X_sym, y_sym], [cost], updates=updates,
                                   mode="FAST_COMPILE")
    cost_function = theano.function([X_sym, y_sym], [cost],
                                    mode="FAST_COMPILE")

    checkpoint_dict = create_checkpoint_dict(locals())

    epoch_results = fixed_n_epochs_trainer(
        fit_function, cost_function, train_indices, valid_indices,
        checkpoint_dict, [bernoulli_X, bernoulli_y],
        minibatch_size,
        list_of_train_output_names=["train_cost"],
        valid_output_name="valid_cost",
        n_epochs=1)
Beispiel #3
0
valid_indices = mnist["valid_indices"]
X = mnist["data"]
y = mnist["target"]
n_targets = 10
y = convert_to_one_hot(y, n_targets)

# graph holds information necessary to build layers from parents
graph = OrderedDict()
X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
# random state so script is deterministic
random_state = np.random.RandomState(1999)

minibatch_size = 20
n_hid = 1000

l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state)
y_pred = softmax_zeros_layer([l1], graph, 'y_pred',  proj_dim=n_targets)
nll = categorical_crossentropy(y_pred, y_sym).mean()
weights = get_weights_from_graph(graph)
L2 = sum([(w ** 2).sum() for w in weights])
cost = nll + .0001 * L2


params, grads = get_params_and_grads(graph, cost)

learning_rate = 1E-4
momentum = 0.95
opt = rmsprop(params, learning_rate, momentum)
updates = opt.updates(params, grads)

fit_function = theano.function([X_sym, y_sym], [cost], updates=updates)
Beispiel #4
0
y = mnist["target"]
n_targets = 10
y = convert_to_one_hot(y, n_targets)

# graph holds information necessary to build layers from parents
graph = OrderedDict()
X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
# random state so script is deterministic
random_state = np.random.RandomState(1999)

minibatch_size = 20
n_hid = 1000

l1 = tanh_layer([X_sym],
                graph,
                'l1',
                proj_dim=n_hid,
                random_state=random_state)
y_pred = softmax_zeros_layer([l1], graph, 'y_pred', proj_dim=n_targets)
nll = categorical_crossentropy(y_pred, y_sym).mean()
weights = get_weights_from_graph(graph)
L2 = sum([(w**2).sum() for w in weights])
cost = nll + .0001 * L2

params, grads = get_params_and_grads(graph, cost)

learning_rate = 1E-4
momentum = 0.95
opt = rmsprop(params, learning_rate, momentum)
updates = opt.updates(params, grads)
Beispiel #5
0
graph = OrderedDict()
X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"],
                                     graph,
                                     list_of_test_values=[X[:10], y[:10]])
# random state so script is deterministic
random_state = np.random.RandomState(1999)

minibatch_size = 128
n_hid = 1000

on_off = tensor.iscalar()
on_off.tag.test_value = 0
l1 = tanh_layer([X_sym],
                graph,
                'l1',
                proj_dim=n_hid,
                batch_normalize=True,
                mode_switch=on_off,
                random_state=random_state)
y_pred = softmax_zeros_layer([l1], graph, 'y_pred', proj_dim=n_targets)
nll = categorical_crossentropy(y_pred, y_sym).mean()
weights = get_weights_from_graph(graph)
L2 = sum([(w**2).sum() for w in weights])
cost = nll + .0001 * L2

params, grads = get_params_and_grads(graph, cost)

learning_rate = 0.1
momentum = 0.9
opt = sgd_nesterov(params, learning_rate, momentum)
updates = opt.updates(params, grads)
Beispiel #6
0
y = convert_to_one_hot(y, n_targets)

# graph holds information necessary to build layers from parents
graph = OrderedDict()
X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph,
                                     list_of_test_values=[X[:10], y[:10]])
# random state so script is deterministic
random_state = np.random.RandomState(1999)

minibatch_size = 128
n_hid = 1000

on_off = tensor.iscalar()
on_off.tag.test_value = 0
l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid,
                batch_normalize=True, mode_switch=on_off,
                random_state=random_state)
y_pred = softmax_zeros_layer([l1], graph, 'y_pred',  proj_dim=n_targets)
nll = categorical_crossentropy(y_pred, y_sym).mean()
weights = get_weights_from_graph(graph)
L2 = sum([(w ** 2).sum() for w in weights])
cost = nll + .0001 * L2

params, grads = get_params_and_grads(graph, cost)

learning_rate = 0.1
momentum = 0.9
opt = sgd_nesterov(params, learning_rate, momentum)
updates = opt.updates(params, grads)

fit_function = theano.function([X_sym, y_sym, on_off], [cost], updates=updates)