Ejemplo n.º 1
0
def test_rnn_correlated_mixture_density():
    # graph holds information necessary to build layers from parents
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()
    minibatch_size = 5
    X_seq = np.array([bernoulli_X for i in range(minibatch_size)])
    y_seq = np.array([bernoulli_y for i in range(minibatch_size)])
    X_mb, X_mb_mask = make_masked_minibatch(X_seq, slice(0, minibatch_size))
    y_mb, y_mb_mask = make_masked_minibatch(y_seq, slice(0, minibatch_size))
    datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph)
    n_hid = 5
    train_indices = np.arange(len(X_seq))
    valid_indices = np.arange(len(X_seq))

    l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid,
                    random_state=random_state)
    h = gru_recurrent_layer([l1], X_mask_sym, n_hid, graph, 'l1_rec',
                            random_state=random_state)
    rval = bernoulli_and_correlated_log_gaussian_mixture_layer(
        [h], graph, 'hw', proj_dim=2, n_components=3,
        random_state=random_state)
    binary, coeffs, mus, log_sigmas, corr = rval
    cost = bernoulli_and_correlated_log_gaussian_mixture_cost(
        binary, coeffs, mus, log_sigmas, corr, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost],
                                    mode="FAST_COMPILE")

    checkpoint_dict = create_checkpoint_dict(locals())

    epoch_results = fixed_n_epochs_trainer(
        cost_function, cost_function, train_indices, valid_indices,
        checkpoint_dict, [X_seq, y_seq],
        minibatch_size,
        list_of_minibatch_functions=[make_masked_minibatch,
                                     make_masked_minibatch],
        list_of_train_output_names=["train_cost"],
        valid_output_name="valid_cost",
        n_epochs=1)
Ejemplo n.º 2
0
def test_correlated_mixture_density():
    # graph holds information necessary to build layers from parents
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()
    X_sym, y_sym = add_datasets_to_graph([bernoulli_X, bernoulli_y], ["X", "y"],
                                         graph)
    n_hid = 20
    minibatch_size = len(bernoulli_X)
    train_indices = np.arange(len(bernoulli_X))
    valid_indices = np.arange(len(bernoulli_X))

    l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid,
                    random_state=random_state)
    rval = bernoulli_and_correlated_log_gaussian_mixture_layer(
        [l1], graph, 'hw', proj_dim=2, n_components=3,
        random_state=random_state)
    binary, coeffs, mus, log_sigmas, corr = rval
    cost = bernoulli_and_correlated_log_gaussian_mixture_cost(
        binary, coeffs, mus, log_sigmas, corr, y_sym).mean()
    params, grads = get_params_and_grads(graph, cost)

    learning_rate = 1E-6
    opt = sgd(params, learning_rate)
    updates = opt.updates(params, grads)

    fit_function = theano.function([X_sym, y_sym], [cost], updates=updates,
                                   mode="FAST_COMPILE")
    cost_function = theano.function([X_sym, y_sym], [cost],
                                    mode="FAST_COMPILE")

    checkpoint_dict = create_checkpoint_dict(locals())

    epoch_results = fixed_n_epochs_trainer(
        fit_function, cost_function, train_indices, valid_indices,
        checkpoint_dict, [bernoulli_X, bernoulli_y],
        minibatch_size,
        list_of_train_output_names=["train_cost"],
        valid_output_name="valid_cost",
        n_epochs=1)
Ejemplo n.º 3
0
l1 = tanh_layer([X_sym], graph, 'l1', proj_dim=n_hid, random_state=random_state)
y_pred = linear_layer([l1], graph, 'y_pred',  proj_dim=n_out,
                      random_state=random_state)
cost = ((y_pred - y_sym) ** 2).mean()
# Can also define cost this way using dagbldr
# cost = squared_error(y_pred, y_sym).mean()
params, grads = get_params_and_grads(graph, cost)

learning_rate = 1E-3
momentum = 0.8
opt = rmsprop(params, learning_rate, momentum)
updates = opt.updates(params, grads)

fit_function = theano.function([X_sym, y_sym], [cost], updates=updates)
cost_function = theano.function([X_sym, y_sym], [cost])
predict_function = theano.function([X_sym], [y_pred])

checkpoint_dict = create_checkpoint_dict(locals())

epoch_results = fixed_n_epochs_trainer(
    fit_function, cost_function, train_indices, valid_indices,
    checkpoint_dict, [X, y],
    minibatch_size,
    list_of_train_output_names=["train_cost"],
    valid_output_name="valid_cost",
    n_epochs=1000)

# pred_sine_y = predict_function(sine_x)[0]
# plt.plot(sine_x, pred_sine_y, "o", color="red", alpha=0.3)
# plt.plot(sine_x, sine_y, "o", color="steelblue", alpha=0.3)
Ejemplo n.º 4
0
    # remove repeats
    not_same = np.where((indices[1:] != indices[:-1]))[0]
    last_char = ""
    if len(not_same) > 0:
        last_char = vocab[indices[-1]]
        indices = indices[not_same]
    s = "".join([vocab[i] for i in indices])
    ctc_string = s + last_char
    return ctc_string, non_ctc_string


def print_ctc_prediction(X_sym, X_mask_sym, y_sym, y_mask_sym):
    all_y_pred = predict_function(X_sym, X_mask_sym)[0]
    for n in range(all_y_pred.shape[1]):
        y_pred = all_y_pred[:, n]
        ctc_string, non_ctc_string = prediction_strings(y_pred)
        print(ctc_string)
        print(non_ctc_string)

fixed_n_epochs_trainer(fit_function, cost_function,
                       train_indices, valid_indices,
                       checkpoint_dict, [X, y], minibatch_size,
                       monitor_function=print_ctc_prediction,
                       list_of_minibatch_functions=[
                           make_masked_minibatch,
                           make_masked_minibatch],
                       list_of_train_output_names=["cost"],
                       valid_output_name="valid_cost",
                       valid_frequency=10,
                       n_epochs=1000)
Ejemplo n.º 5
0
params, grads = get_params_and_grads(graph, cost)

learning_rate = 0.0001
opt = adam(params, learning_rate)
updates = opt.updates(params, grads)

fit_function = theano.function([X_sym, y_sym], [nll, kl, nll + kl],
                               updates=updates)
cost_function = theano.function([X_sym, y_sym], [nll + kl])
predict_function = theano.function([X_sym], [y_pred])
encode_function = theano.function([X_sym], [code_mu, code_log_sigma])
decode_function = theano.function([samp, y_sym], [out])
checkpoint_dict = {}
checkpoint_dict["fit_function"] = fit_function
checkpoint_dict["cost_function"] = cost_function
checkpoint_dict["predict_function"] = predict_function
checkpoint_dict["encode_function"] = encode_function
checkpoint_dict["decode_function"] = decode_function
previous_results = None

epoch_results = fixed_n_epochs_trainer(
    fit_function, cost_function, train_indices, valid_indices,
    checkpoint_dict, [X, y],
    minibatch_size,
    list_of_train_output_names=["nll", "kl", "lower_bound"],
    valid_output_name="valid_lower_bound",
    valid_frequency="train_length",
    n_epochs=2000, previous_results=previous_results,
    shuffle=True, random_state=random_state)
rval = bernoulli_and_correlated_log_gaussian_mixture_layer(
    [l2], graph, 'hw', proj_dim=2, n_components=20, random_state=random_state)
binary, coeffs, mus, sigmas, corr = rval
cost = bernoulli_and_correlated_log_gaussian_mixture_cost(
    binary, coeffs, mus, sigmas, corr, y_sym)
cost = masked_cost(cost, y_mask_sym).sum(axis=0).mean()
params, grads = get_params_and_grads(graph, cost)

learning_rate = 0.0003
opt = adam(params, learning_rate)
clipped_grads = gradient_clipping(grads)
updates = opt.updates(params, clipped_grads)

fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost],
                               updates=updates)
cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost])
predict_function = theano.function([X_sym, X_mask_sym],
                                   [binary, coeffs, mus, sigmas, corr])

checkpoint_dict = create_checkpoint_dict(locals())

epoch_results = fixed_n_epochs_trainer(
    fit_function, cost_function, train_indices, valid_indices,
    checkpoint_dict, [X, y],
    minibatch_size,
    list_of_minibatch_functions=[make_masked_minibatch, make_masked_minibatch],
    list_of_train_output_names=["train_cost"],
    valid_output_name="valid_cost",
    valid_frequency="train_length",
    n_epochs=20)
Ejemplo n.º 7
0
print("Compiling fit...")
fit_function = theano.function(X_story_syms + [X_story_mask_sym] + X_query_syms
                               + [X_query_mask_sym, y_sym], [cost],
                               updates=updates)
print("Compiling cost...")
cost_function = theano.function(X_story_syms + [X_story_mask_sym] + X_query_syms
                                + [X_query_mask_sym, y_sym], [cost])
print("Compiling predict...")
predict_function = theano.function(X_story_syms + [X_story_mask_sym] +
                                   X_query_syms + [X_query_mask_sym], [y_pred])


def error(*args):
    xargs = args[:-1]
    y = args[-1]
    final_args = xargs
    y_pred = predict_function(*final_args)[0]
    return 1 - np.mean((np.argmax(
        y_pred, axis=1).ravel()) == (np.argmax(y, axis=1).ravel()))

checkpoint_dict = {}
epoch_results = fixed_n_epochs_trainer(
    fit_function, error, train_indices, valid_indices, checkpoint_dict,
    [X_story, X_query, y_answer],
    minibatch_size,
    list_of_minibatch_functions=[make_embedding_minibatch,
                                 make_embedding_minibatch,
                                 make_minibatch],
    list_of_train_output_names=["cost"],
    valid_output_name="valid_error", n_epochs=100)
Ejemplo n.º 8
0
params, grads = get_params_and_grads(graph, cost)

learning_rate = .13
opt = sgd(params, learning_rate)
updates = opt.updates(params, grads)


fit_function = theano.function([X_sym, y_sym], [cost], updates=updates)
cost_function = theano.function([X_sym, y_sym], [cost])
predict_function = theano.function([X_sym], [y_pred])

checkpoint_dict = create_checkpoint_dict(locals())


def error(*args):
    xargs = args[:-1]
    y = args[-1]
    final_args = xargs
    y_pred = predict_function(*final_args)[0]
    return 1 - np.mean((np.argmax(
        y_pred, axis=1).ravel()) == (np.argmax(y, axis=1).ravel()))

epoch_results = fixed_n_epochs_trainer(
    fit_function, error, train_indices, valid_indices,
    checkpoint_dict, [X, y],
    minibatch_size,
    list_of_train_output_names=["train_cost"],
    valid_output_name="valid_error",
    n_epochs=100,
    optimizer_object=opt)
Ejemplo n.º 9
0
    if len(not_same) > 0:
        last_char = vocab[indices[-1]]
        indices = indices[not_same]
    s = "".join([vocab[i] for i in indices])
    ctc_string = s + last_char
    return ctc_string, non_ctc_string


def print_ctc_prediction(X_sym, X_mask_sym, y_sym, y_mask_sym):
    all_y_pred = predict_function(X_sym, X_mask_sym)[0]
    for n in range(all_y_pred.shape[1]):
        y_pred = all_y_pred[:, n]
        ctc_string, non_ctc_string = prediction_strings(y_pred)
        print(ctc_string)
        print(non_ctc_string)


fixed_n_epochs_trainer(
    fit_function,
    cost_function,
    train_indices,
    valid_indices,
    checkpoint_dict, [X, y],
    minibatch_size,
    monitor_function=print_ctc_prediction,
    list_of_minibatch_functions=[make_masked_minibatch, make_masked_minibatch],
    list_of_train_output_names=["cost"],
    valid_output_name="valid_cost",
    valid_frequency=10,
    n_epochs=1000)