def test_feedforward_theano_mix():
    minibatch_size = 100
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()

    X_sym, y_sym = add_datasets_to_graph([X, y], ["X", "y"], graph)

    l1_o = linear_layer([X_sym], graph, 'l1', proj_dim=20,
                        random_state=random_state)
    l1_o = .999 * l1_o
    y_pred = softmax_layer([l1_o], graph, 'pred', n_classes,
                           random_state=random_state)

    cost = categorical_crossentropy(y_pred, y_sym).mean()
    params, grads = get_params_and_grads(graph, cost)
    learning_rate = 0.001
    opt = sgd(params)
    updates = opt.updates(params, grads, learning_rate)

    fit_function = theano.function([X_sym, y_sym], [cost], updates=updates,
                                   mode="FAST_COMPILE")

    cost_function = theano.function([X_sym, y_sym], [cost],
                                    mode="FAST_COMPILE")

    checkpoint_dict = {}
    train_indices = np.arange(len(X))
    valid_indices = np.arange(len(X))
    early_stopping_trainer(fit_function, cost_function, checkpoint_dict, [X, y],
                           minibatch_size,
                           train_indices, valid_indices,
                           fit_function_output_names=["cost"],
                           cost_function_output_name="valid_cost",
                           n_epochs=1)
Beispiel #2
0
def test_conditional_gru_recurrent():
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()
    n_hid = 5
    n_out = n_chars

    # input (where first dimension is time)
    datasets_list = [X_mb, X_mask, y_mb, y_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph)

    h = gru_recurrent_layer([X_sym], X_mask_sym, n_hid, graph, 'l1_end',
                            random_state)

    shifted_y_sym = shift_layer([y_sym], graph, 'shift')

    h_dec, context = conditional_gru_recurrent_layer([y_sym], [h], y_mask_sym,
                                                     n_hid, graph, 'l2_dec',
                                                     random_state)

    # linear output activation
    y_hat = softmax_layer([h_dec, context, shifted_y_sym], graph, 'l2_proj',
                          n_out, random_state=random_state)

    # error between output and target
    cost = categorical_crossentropy(y_hat, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    # Parameters of the model
    """
    params, grads = get_params_and_grads(graph, cost)

    # Use stochastic gradient descent to optimize
    opt = sgd(params)
    learning_rate = 0.00000
    updates = opt.updates(params, grads, learning_rate)


    fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                   [cost], updates=updates,
                                   mode="FAST_COMPILE")
    """

    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost], mode="FAST_COMPILE")

    checkpoint_dict = {}
    train_indices = np.arange(len(X))
    valid_indices = np.arange(len(X))
    early_stopping_trainer(cost_function, cost_function,
                           train_indices, valid_indices,
                           checkpoint_dict,
                           [X, y],
                           minibatch_size,
                           list_of_minibatch_functions=[text_minibatch_func],
                           list_of_train_output_names=["cost"],
                           valid_output_name="valid_cost",
                           n_epochs=1)
Beispiel #3
0
def test_tanh_rnn():
    # random state so script is deterministic
    random_state = np.random.RandomState(1999)
    # home of the computational graph
    graph = OrderedDict()

    # number of hidden features
    n_hid = 10
    # number of output_features = input_features
    n_out = X.shape[-1]

    # input (where first dimension is time)
    datasets_list = [X, X_mask, y, y_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    test_values_list = [X, X_mask, y, y_mask]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph, list_of_test_values=test_values_list)

    # Setup weights
    l1 = linear_layer([X_sym], graph, 'l1_proj', n_hid, random_state)

    h = tanh_recurrent_layer([l1], X_mask_sym, n_hid, graph, 'l1_rec',
                             random_state)

    # linear output activation
    y_hat = linear_layer([h], graph, 'l2_proj', n_out, random_state)

    # error between output and target
    cost = squared_error(y_hat, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    # Parameters of the model
    params, grads = get_params_and_grads(graph, cost)

    # Use stochastic gradient descent to optimize
    opt = sgd(params)
    learning_rate = 0.001
    updates = opt.updates(params, grads, learning_rate)

    fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                   [cost],
                                   updates=updates,
                                   mode="FAST_COMPILE")

    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost],
                                    mode="FAST_COMPILE")
    checkpoint_dict = {}
    train_indices = np.arange(X.shape[1])
    valid_indices = np.arange(X.shape[1])
    early_stopping_trainer(fit_function,
                           cost_function,
                           checkpoint_dict, [X, y],
                           minibatch_size,
                           train_indices,
                           valid_indices,
                           fit_function_output_names=["cost"],
                           cost_function_output_name="valid_cost",
                           n_epochs=1)
Beispiel #4
0
def test_conditional_gru_recurrent():
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()
    n_hid = 5
    n_out = n_chars

    # input (where first dimension is time)
    datasets_list = [X_mb, X_mask, y_mb, y_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph)

    h = gru_recurrent_layer([X_sym], X_mask_sym, n_hid, graph, 'l1_end',
                            random_state)

    shifted_y_sym = shift_layer([y_sym], graph, 'shift')

    h_dec, context = conditional_gru_recurrent_layer([y_sym], [h], y_mask_sym,
                                                     n_hid, graph, 'l2_dec',
                                                     random_state)

    # linear output activation
    y_hat = softmax_layer([h_dec, context, shifted_y_sym], graph, 'l2_proj',
                          n_out, random_state)

    # error between output and target
    cost = categorical_crossentropy(y_hat, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    # Parameters of the model
    """
    params, grads = get_params_and_grads(graph, cost)

    # Use stochastic gradient descent to optimize
    opt = sgd(params)
    learning_rate = 0.00000
    updates = opt.updates(params, grads, learning_rate)


    fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                   [cost], updates=updates,
                                   mode="FAST_COMPILE")
    """

    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost], mode="FAST_COMPILE")

    checkpoint_dict = {}
    train_indices = np.arange(len(X))
    valid_indices = np.arange(len(X))
    early_stopping_trainer(cost_function, cost_function, checkpoint_dict,
                           [X, y],
                           minibatch_size, train_indices, valid_indices,
                           list_of_minibatch_functions=[text_minibatch_func],
                           fit_function_output_names=["cost"],
                           cost_function_output_name="valid_cost",
                           n_epochs=1)
Beispiel #5
0
def test_tanh_rnn():
    # random state so script is deterministic
    random_state = np.random.RandomState(1999)
    # home of the computational graph
    graph = OrderedDict()

    # number of hidden features
    n_hid = 10
    # number of output_features = input_features
    n_out = X.shape[-1]

    # input (where first dimension is time)
    datasets_list = [X, X_mask, y, y_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    test_values_list = [X, X_mask, y, y_mask]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph, list_of_test_values=test_values_list)

    # Setup weights
    l1 = linear_layer([X_sym], graph, 'l1_proj', proj_dim=n_hid,
                      random_state=random_state)

    h = tanh_recurrent_layer([l1], X_mask_sym, n_hid, graph, 'l1_rec',
                             random_state)

    # linear output activation
    y_hat = linear_layer([h], graph, 'l2_proj', proj_dim=n_out,
                         random_state=random_state)

    # error between output and target
    cost = squared_error(y_hat, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    # Parameters of the model
    params, grads = get_params_and_grads(graph, cost)

    # Use stochastic gradient descent to optimize
    learning_rate = 0.001
    opt = sgd(params, learning_rate)
    updates = opt.updates(params, grads)

    fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                   [cost], updates=updates, mode="FAST_COMPILE")

    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost], mode="FAST_COMPILE")
    checkpoint_dict = {}
    train_indices = np.arange(X.shape[1])
    valid_indices = np.arange(X.shape[1])
    early_stopping_trainer(fit_function, cost_function,
                           train_indices, valid_indices,
                           checkpoint_dict,
                           [X, y], minibatch_size,
                           list_of_train_output_names=["cost"],
                           valid_output_name="valid_cost",
                           n_epochs=1)
Beispiel #6
0
def test_vae():
    minibatch_size = 10
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()

    X_sym = add_datasets_to_graph([X], ["X"], graph)

    l1_enc = softplus_layer([X_sym], graph, 'l1_enc', proj_dim=100,
                            random_state=random_state)
    mu = linear_layer([l1_enc], graph, 'mu', proj_dim=50,
                      random_state=random_state)
    log_sigma = linear_layer([l1_enc], graph, 'log_sigma', proj_dim=50,
                             random_state=random_state)
    samp = gaussian_log_sample_layer([mu], [log_sigma], graph,
                                     'gaussian_log_sample',
                                     random_state=random_state)
    l1_dec = softplus_layer([samp], graph, 'l1_dec', proj_dim=100,
                            random_state=random_state)
    out = sigmoid_layer([l1_dec], graph, 'out', proj_dim=X.shape[1],
                        random_state=random_state)

    kl = gaussian_log_kl([mu], [log_sigma], graph, 'gaussian_kl').mean()
    cost = binary_crossentropy(out, X_sym).mean() + kl
    params, grads = get_params_and_grads(graph, cost)
    learning_rate = 0.00000
    opt = sgd(params, learning_rate)
    updates = opt.updates(params, grads)

    fit_function = theano.function([X_sym], [cost], updates=updates,
                                   mode="FAST_COMPILE")

    cost_function = theano.function([X_sym], [cost],
                                    mode="FAST_COMPILE")

    checkpoint_dict = {}
    train_indices = np.arange(len(X))
    valid_indices = np.arange(len(X))
    early_stopping_trainer(fit_function, cost_function,
                           train_indices, valid_indices,
                           checkpoint_dict, [X],
                           minibatch_size,
                           list_of_train_output_names=["cost"],
                           valid_output_name="valid_cost",
                           n_epochs=1)
Beispiel #7
0
fit_function = theano.function([X_sym, y_sym], [cost], updates=updates)
cost_function = theano.function([X_sym, y_sym], [cost])
predict_function = theano.function([X_sym], [y_pred])

checkpoint_dict = {}
checkpoint_dict["fit_function"] = fit_function
checkpoint_dict["cost_function"] = cost_function
checkpoint_dict["predict_function"] = predict_function
previous_results = None


def error(X_mb, y_mb):
    y_pred = predict_function(X_mb)[0]
    return 1 - np.mean((np.argmax(y_pred, axis=1).ravel()) ==
                       (np.argmax(y_mb, axis=1).ravel()))


epoch_results = early_stopping_trainer(
    fit_function,
    error,
    train_indices,
    valid_indices,
    checkpoint_dict, [X, y],
    minibatch_size,
    list_of_train_output_names=["train_cost"],
    valid_output_name="valid_error",
    n_epochs=1000,
    optimizer_object=opt,
    previous_results=previous_results)
Beispiel #8
0
updates = opt.updates(params, grads, learning_rate)

# Checkpointing
try:
    checkpoint_dict = load_last_checkpoint()
    fit_function = checkpoint_dict["fit_function"]
    cost_function = checkpoint_dict["cost_function"]
    encode_function = checkpoint_dict["encode_function"]
    decode_function = checkpoint_dict["decode_function"]
    previous_epoch_results = checkpoint_dict["previous_epoch_results"]
except KeyError:
    fit_function = theano.function([X_sym], [nll, kl, nll + kl],
                                   updates=updates)
    cost_function = theano.function([X_sym], [nll + kl])
    encode_function = theano.function([X_sym], [code_mu, code_log_sigma])
    decode_function = theano.function([samp], [out])
    checkpoint_dict = {}
    checkpoint_dict["fit_function"] = fit_function
    checkpoint_dict["cost_function"] = cost_function
    checkpoint_dict["encode_function"] = encode_function
    checkpoint_dict["decode_function"] = decode_function
    previous_epoch_results = None

epoch_results = early_stopping_trainer(
    fit_function, cost_function, checkpoint_dict, [X],
    minibatch_size, train_indices, valid_indices,
    fit_function_output_names=["nll", "kl", "lower_bound"],
    cost_function_output_name="valid_lower_bound",
    n_epochs=500, previous_epoch_results=previous_epoch_results,
    shuffle=True, random_state=random_state)
Beispiel #9
0
learning_rate = 1E-4
momentum = 0.95
opt = rmsprop(params, learning_rate, momentum)
updates = opt.updates(params, grads)

fit_function = theano.function([X_sym, y_sym], [cost], updates=updates)
cost_function = theano.function([X_sym, y_sym], [cost])
predict_function = theano.function([X_sym], [y_pred])

checkpoint_dict = create_checkpoint_dict(locals())


def error(*args):
    xargs = args[:-1]
    y = args[-1]
    final_args = xargs
    y_pred = predict_function(*final_args)[0]
    return 1 - np.mean((np.argmax(
        y_pred, axis=1).ravel()) == (np.argmax(y, axis=1).ravel()))


epoch_results = early_stopping_trainer(
    fit_function, error, train_indices, valid_indices,
    checkpoint_dict, [X, y],
    minibatch_size,
    list_of_train_output_names=["train_cost"],
    valid_output_name="valid_error",
    n_epochs=1000,
    optimizer_object=opt)
Beispiel #10
0
# Checkpointing
try:
    checkpoint_dict = load_last_checkpoint()
    fit_function = checkpoint_dict["fit_function"]
    cost_function = checkpoint_dict["cost_function"]
    predict_function = checkpoint_dict["predict_function"]
    previous_epoch_results = checkpoint_dict["previous_epoch_results"]
except KeyError:
    fit_function = theano.function([X_sym, y_sym], [cost], updates=updates)
    cost_function = theano.function([X_sym, y_sym], [cost])
    predict_function = theano.function([X_sym], [y_pred])
    checkpoint_dict = {}
    checkpoint_dict["fit_function"] = fit_function
    checkpoint_dict["cost_function"] = cost_function
    checkpoint_dict["predict_function"] = predict_function
    previous_epoch_results = None

epoch_results = early_stopping_trainer(
    fit_function,
    cost_function,
    checkpoint_dict, [X, y],
    minibatch_size,
    train_indices,
    valid_indices,
    fit_function_output_names=["cost"],
    cost_function_output_name="valid_cost",
    n_epochs=100,
    previous_epoch_results=previous_epoch_results,
    shuffle=True,
    random_state=random_state)
Beispiel #11
0
updates = opt.updates(params, grads, learning_rate)
print("Compiling fit...")
fit_function = theano.function(X_story_syms + [X_story_mask_sym] + X_query_syms
                               + [X_query_mask_sym, y_sym], [cost],
                               updates=updates)
print("Compiling cost...")
cost_function = theano.function(X_story_syms + [X_story_mask_sym] + X_query_syms
                                + [X_query_mask_sym, y_sym], [cost])
print("Compiling predict...")
predict_function = theano.function(X_story_syms + [X_story_mask_sym] +
                                   X_query_syms + [X_query_mask_sym], [y_pred])


def accuracy(*args):
    xargs = args[:-1]
    y = args[-1]
    y_pred = predict_function(*xargs)[0]
    return np.mean((np.argmax(
        y_pred, axis=1).ravel()) == (np.argmax(y, axis=1).ravel()))

checkpoint_dict = {}
epoch_results = early_stopping_trainer(
    fit_function, accuracy, checkpoint_dict,
    [X_story, X_query, y_answer],
    minibatch_size, train_indices, valid_indices,
    list_of_minibatch_functions=[make_embedding_minibatch,
                                 make_embedding_minibatch,
                                 make_minibatch],
    fit_function_output_names=["cost"],
    cost_function_output_name="valid_cost", n_epochs=20)
Beispiel #12
0
learning_rate = 0.0002
opt = adam(params)
updates = opt.updates(params, grads, learning_rate)

# Checkpointing
try:
    checkpoint_dict = load_last_checkpoint()
    fit_function = checkpoint_dict["fit_function"]
    cost_function = checkpoint_dict["cost_function"]
    predict_function = checkpoint_dict["predict_function"]
    previous_epoch_results = checkpoint_dict["previous_epoch_results"]
except KeyError:
    fit_function = theano.function([X_sym, y_sym], [cost],
                                   updates=updates)
    cost_function = theano.function([X_sym, y_sym], [cost])
    predict_function = theano.function([X_sym], [y_pred])
    checkpoint_dict = {}
    checkpoint_dict["fit_function"] = fit_function
    checkpoint_dict["cost_function"] = cost_function
    checkpoint_dict["predict_function"] = predict_function
    previous_epoch_results = None

epoch_results = early_stopping_trainer(
    fit_function, cost_function, checkpoint_dict, [X, y],
    minibatch_size, train_indices, valid_indices,
    fit_function_output_names=["cost"],
    cost_function_output_name="valid_cost",
    n_epochs=100, previous_epoch_results=previous_epoch_results,
    shuffle=True, random_state=random_state)
Beispiel #13
0
def test_vae():
    minibatch_size = 10
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()

    X_sym = add_datasets_to_graph([X], ["X"], graph)

    l1_enc = softplus_layer([X_sym],
                            graph,
                            'l1_enc',
                            proj_dim=100,
                            random_state=random_state)
    mu = linear_layer([l1_enc],
                      graph,
                      'mu',
                      proj_dim=50,
                      random_state=random_state)
    log_sigma = linear_layer([l1_enc],
                             graph,
                             'log_sigma',
                             proj_dim=50,
                             random_state=random_state)
    samp = gaussian_log_sample_layer([mu], [log_sigma],
                                     graph,
                                     'gaussian_log_sample',
                                     random_state=random_state)
    l1_dec = softplus_layer([samp],
                            graph,
                            'l1_dec',
                            proj_dim=100,
                            random_state=random_state)
    out = sigmoid_layer([l1_dec],
                        graph,
                        'out',
                        proj_dim=X.shape[1],
                        random_state=random_state)

    kl = gaussian_log_kl([mu], [log_sigma], graph, 'gaussian_kl').mean()
    cost = binary_crossentropy(out, X_sym).mean() + kl
    params, grads = get_params_and_grads(graph, cost)
    learning_rate = 0.00000
    opt = sgd(params)
    updates = opt.updates(params, grads, learning_rate)

    fit_function = theano.function([X_sym], [cost],
                                   updates=updates,
                                   mode="FAST_COMPILE")

    cost_function = theano.function([X_sym], [cost], mode="FAST_COMPILE")

    checkpoint_dict = {}
    train_indices = np.arange(len(X))
    valid_indices = np.arange(len(X))
    early_stopping_trainer(fit_function,
                           cost_function,
                           checkpoint_dict, [X],
                           minibatch_size,
                           train_indices,
                           valid_indices,
                           fit_function_output_names=["cost"],
                           cost_function_output_name="valid_cost",
                           n_epochs=1)