Exemple #1
0
def test_tanh_rnn():
    # random state so script is deterministic
    random_state = np.random.RandomState(1999)
    # home of the computational graph
    graph = OrderedDict()

    # number of hidden features
    n_hid = 10
    # number of output_features = input_features
    n_out = X.shape[-1]

    # input (where first dimension is time)
    datasets_list = [X, X_mask, y, y_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    test_values_list = [X, X_mask, y, y_mask]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph, list_of_test_values=test_values_list)

    # Setup weights
    l1 = linear_layer([X_sym], graph, 'l1_proj', n_hid, random_state)

    h = tanh_recurrent_layer([l1], X_mask_sym, n_hid, graph, 'l1_rec',
                             random_state)

    # linear output activation
    y_hat = linear_layer([h], graph, 'l2_proj', n_out, random_state)

    # error between output and target
    cost = squared_error(y_hat, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    # Parameters of the model
    params, grads = get_params_and_grads(graph, cost)

    # Use stochastic gradient descent to optimize
    opt = sgd(params)
    learning_rate = 0.001
    updates = opt.updates(params, grads, learning_rate)

    fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                   [cost],
                                   updates=updates,
                                   mode="FAST_COMPILE")

    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost],
                                    mode="FAST_COMPILE")
    checkpoint_dict = {}
    train_indices = np.arange(X.shape[1])
    valid_indices = np.arange(X.shape[1])
    early_stopping_trainer(fit_function,
                           cost_function,
                           checkpoint_dict, [X, y],
                           minibatch_size,
                           train_indices,
                           valid_indices,
                           fit_function_output_names=["cost"],
                           cost_function_output_name="valid_cost",
                           n_epochs=1)
Exemple #2
0
def test_conditional_gru_recurrent():
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()
    n_hid = 5
    n_out = n_chars

    # input (where first dimension is time)
    datasets_list = [X_mb, X_mask, y_mb, y_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph)

    h = gru_recurrent_layer([X_sym], X_mask_sym, n_hid, graph, 'l1_end',
                            random_state)

    shifted_y_sym = shift_layer([y_sym], graph, 'shift')

    h_dec, context = conditional_gru_recurrent_layer([y_sym], [h], y_mask_sym,
                                                     n_hid, graph, 'l2_dec',
                                                     random_state)

    # linear output activation
    y_hat = softmax_layer([h_dec, context, shifted_y_sym], graph, 'l2_proj',
                          n_out, random_state=random_state)

    # error between output and target
    cost = categorical_crossentropy(y_hat, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    # Parameters of the model
    """
    params, grads = get_params_and_grads(graph, cost)

    # Use stochastic gradient descent to optimize
    opt = sgd(params)
    learning_rate = 0.00000
    updates = opt.updates(params, grads, learning_rate)


    fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                   [cost], updates=updates,
                                   mode="FAST_COMPILE")
    """

    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost], mode="FAST_COMPILE")

    checkpoint_dict = {}
    train_indices = np.arange(len(X))
    valid_indices = np.arange(len(X))
    early_stopping_trainer(cost_function, cost_function,
                           train_indices, valid_indices,
                           checkpoint_dict,
                           [X, y],
                           minibatch_size,
                           list_of_minibatch_functions=[text_minibatch_func],
                           list_of_train_output_names=["cost"],
                           valid_output_name="valid_cost",
                           n_epochs=1)
Exemple #3
0
def test_conditional_gru_recurrent():
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()
    n_hid = 5
    n_out = n_chars

    # input (where first dimension is time)
    datasets_list = [X_mb, X_mask, y_mb, y_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph)

    h = gru_recurrent_layer([X_sym], X_mask_sym, n_hid, graph, 'l1_end',
                            random_state)

    shifted_y_sym = shift_layer([y_sym], graph, 'shift')

    h_dec, context = conditional_gru_recurrent_layer([y_sym], [h], y_mask_sym,
                                                     n_hid, graph, 'l2_dec',
                                                     random_state)

    # linear output activation
    y_hat = softmax_layer([h_dec, context, shifted_y_sym], graph, 'l2_proj',
                          n_out, random_state)

    # error between output and target
    cost = categorical_crossentropy(y_hat, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    # Parameters of the model
    """
    params, grads = get_params_and_grads(graph, cost)

    # Use stochastic gradient descent to optimize
    opt = sgd(params)
    learning_rate = 0.00000
    updates = opt.updates(params, grads, learning_rate)


    fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                   [cost], updates=updates,
                                   mode="FAST_COMPILE")
    """

    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost], mode="FAST_COMPILE")

    checkpoint_dict = {}
    train_indices = np.arange(len(X))
    valid_indices = np.arange(len(X))
    early_stopping_trainer(cost_function, cost_function, checkpoint_dict,
                           [X, y],
                           minibatch_size, train_indices, valid_indices,
                           list_of_minibatch_functions=[text_minibatch_func],
                           fit_function_output_names=["cost"],
                           cost_function_output_name="valid_cost",
                           n_epochs=1)
Exemple #4
0
def test_tanh_rnn():
    # random state so script is deterministic
    random_state = np.random.RandomState(1999)
    # home of the computational graph
    graph = OrderedDict()

    # number of hidden features
    n_hid = 10
    # number of output_features = input_features
    n_out = X.shape[-1]

    # input (where first dimension is time)
    datasets_list = [X, X_mask, y, y_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    test_values_list = [X, X_mask, y, y_mask]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph, list_of_test_values=test_values_list)

    # Setup weights
    l1 = linear_layer([X_sym], graph, 'l1_proj', proj_dim=n_hid,
                      random_state=random_state)

    h = tanh_recurrent_layer([l1], X_mask_sym, n_hid, graph, 'l1_rec',
                             random_state)

    # linear output activation
    y_hat = linear_layer([h], graph, 'l2_proj', proj_dim=n_out,
                         random_state=random_state)

    # error between output and target
    cost = squared_error(y_hat, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    # Parameters of the model
    params, grads = get_params_and_grads(graph, cost)

    # Use stochastic gradient descent to optimize
    learning_rate = 0.001
    opt = sgd(params, learning_rate)
    updates = opt.updates(params, grads)

    fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                   [cost], updates=updates, mode="FAST_COMPILE")

    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost], mode="FAST_COMPILE")
    checkpoint_dict = {}
    train_indices = np.arange(X.shape[1])
    valid_indices = np.arange(X.shape[1])
    early_stopping_trainer(fit_function, cost_function,
                           train_indices, valid_indices,
                           checkpoint_dict,
                           [X, y], minibatch_size,
                           list_of_train_output_names=["cost"],
                           valid_output_name="valid_cost",
                           n_epochs=1)
Exemple #5
0
def test_rnn_correlated_mixture_density():
    # graph holds information necessary to build layers from parents
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()
    minibatch_size = 5
    X_seq = np.array([bernoulli_X for i in range(minibatch_size)])
    y_seq = np.array([bernoulli_y for i in range(minibatch_size)])
    X_mb, X_mb_mask = make_masked_minibatch(X_seq, slice(0, minibatch_size))
    y_mb, y_mb_mask = make_masked_minibatch(y_seq, slice(0, minibatch_size))
    datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph)
    n_hid = 5
    train_indices = np.arange(len(X_seq))
    valid_indices = np.arange(len(X_seq))

    l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid,
                    random_state=random_state)
    h = gru_recurrent_layer([l1], X_mask_sym, n_hid, graph, 'l1_rec',
                            random_state=random_state)
    rval = bernoulli_and_correlated_log_gaussian_mixture_layer(
        [h], graph, 'hw', proj_dim=2, n_components=3,
        random_state=random_state)
    binary, coeffs, mus, log_sigmas, corr = rval
    cost = bernoulli_and_correlated_log_gaussian_mixture_cost(
        binary, coeffs, mus, log_sigmas, corr, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost],
                                    mode="FAST_COMPILE")

    checkpoint_dict = create_checkpoint_dict(locals())

    epoch_results = fixed_n_epochs_trainer(
        cost_function, cost_function, train_indices, valid_indices,
        checkpoint_dict, [X_seq, y_seq],
        minibatch_size,
        list_of_minibatch_functions=[make_masked_minibatch,
                                     make_masked_minibatch],
        list_of_train_output_names=["train_cost"],
        valid_output_name="valid_cost",
        n_epochs=1)
Exemple #6
0
def test_masked_cost():
    graph = OrderedDict()
    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)
    cost = gaussian_error(.5 * X_sym, .5 * X_sym, X_sym)
    masked = masked_cost(X_sym, y_sym)
    theano.function([X_sym, y_sym], [cost, masked], mode="FAST_COMPILE")
datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask]
names_list = ["X", "X_mask", "y", "y_mask"]
X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
    datasets_list, names_list, graph, list_of_test_values=datasets_list)

n_hid = 256
n_out = 8

h = location_attention_tanh_recurrent_layer(
    [X_sym], [y_sym], X_mask_sym, y_mask_sym, n_hid, graph, 'l1_att_rec',
    random_state=random_state)

X_hat = sigmoid_layer([h], graph, 'output', proj_dim=n_out,
                      random_state=random_state)
cost = binary_crossentropy(X_hat, X_sym).mean()
cost = masked_cost(cost, X_mask_sym).mean()
params, grads = get_params_and_grads(graph, cost)
opt = adadelta(params)
updates = opt.updates(params, grads)
fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                               [cost], updates=updates)
valid_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost])

checkpoint_dict = {}
checkpoint_dict["fit_function"] = fit_function
checkpoint_dict["valid_function"] = valid_function
TL = TrainingLoop(fit_function, valid_function, train_itr, valid_itr,
                  checkpoint_dict=checkpoint_dict,
                  list_of_train_output_names=["train_cost"],
                  valid_output_name="valid_cost",
                  n_epochs=500,
def test_masked_cost():
    cost = gaussian_error(.5 * X_sym, .5 * X_sym, X_sym)
    masked = masked_cost(X_sym, y_sym)
    theano.function([X_sym, y_sym], [cost, masked], mode="FAST_COMPILE")
graph = OrderedDict()
X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
    datasets_list, names_list, graph)

l1 = relu_layer([X_sym], graph, 'l1', proj_dim=n_hid,
                random_state=random_state)
h = lstm_recurrent_layer([l1], X_mask_sym, rnn_dim, graph, 'l1_rec',
                         random_state=random_state)
l2 = relu_layer([h], graph, 'l2', proj_dim=n_hid,
                random_state=random_state)
rval = bernoulli_and_correlated_log_gaussian_mixture_layer(
    [l2], graph, 'hw', proj_dim=2, n_components=20, random_state=random_state)
binary, coeffs, mus, sigmas, corr = rval
cost = bernoulli_and_correlated_log_gaussian_mixture_cost(
    binary, coeffs, mus, sigmas, corr, y_sym)
cost = masked_cost(cost, y_mask_sym).sum(axis=0).mean()
params, grads = get_params_and_grads(graph, cost)

learning_rate = 0.0003
opt = adam(params, learning_rate)
clipped_grads = gradient_clipping(grads)
updates = opt.updates(params, clipped_grads)

fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost],
                               updates=updates)
cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost])
predict_function = theano.function([X_sym, X_mask_sym],
                                   [binary, coeffs, mus, sigmas, corr])

checkpoint_dict = create_checkpoint_dict(locals())
Exemple #10
0
def test_masked_cost():
    cost = gaussian_error(.5 * X_sym, .5 * X_sym, X_sym)
    masked = masked_cost(X_sym, y_sym)
    theano.function([X_sym, y_sym], [cost, masked], mode="FAST_COMPILE")
graph = OrderedDict()
X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
    datasets_list, names_list, graph)

l1 = relu_layer([X_sym], graph, 'l1', proj_dim=n_hid,
                random_state=random_state)
h = lstm_recurrent_layer([l1], X_mask_sym, rnn_dim, graph, 'l1_rec',
                         random_state=random_state)
l2 = relu_layer([h], graph, 'l2', proj_dim=n_hid,
                random_state=random_state)
rval = bernoulli_and_correlated_log_gaussian_mixture_layer(
    [l2], graph, 'hw', proj_dim=2, n_components=20, random_state=random_state)
binary, coeffs, mus, sigmas, corr = rval
cost = bernoulli_and_correlated_log_gaussian_mixture_cost(
    binary, coeffs, mus, sigmas, corr, y_sym)
cost = masked_cost(cost, y_mask_sym).sum(axis=0).mean()
params, grads = get_params_and_grads(graph, cost)

learning_rate = 0.0003
opt = adam(params, learning_rate)
clipped_grads = gradient_clipping(grads)
updates = opt.updates(params, clipped_grads)

fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost],
                               updates=updates)
cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost])
predict_function = theano.function([X_sym, X_mask_sym],
                                   [binary, coeffs, mus, sigmas, corr])

valid_itr = list_iterator([X, y], minibatch_size, axis=1, make_mask=True,
                          start_index=train_end)