Beispiel #1
0
def test_lstm_rnn():
    # random state so script is deterministic
    random_state = np.random.RandomState(1999)
    # home of the computational graph
    graph = OrderedDict()

    # number of hidden features
    n_hid = 10
    # number of output_features = input_features
    n_out = X.shape[-1]

    # input (where first dimension is time)
    datasets_list = [X, X_mask, y, y_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    test_values_list = [X, X_mask, y, y_mask]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph, list_of_test_values=test_values_list)

    # Setup weights
    l1 = linear_layer([X_sym], graph, 'l1_proj', n_hid, random_state)

    h = lstm_recurrent_layer([l1], X_mask_sym, n_hid, graph, 'l1_rec',
                             random_state)

    # linear output activation
    y_hat = linear_layer([h], graph, 'l2_proj', n_out, random_state)

    # error between output and target
    cost = squared_error(y_hat, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    # Parameters of the model
    params, grads = get_params_and_grads(graph, cost)

    # Use stochastic gradient descent to optimize
    opt = sgd(params)
    learning_rate = 0.01
    updates = opt.updates(params, grads, learning_rate)

    fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                   [cost],
                                   updates=updates,
                                   mode="FAST_COMPILE")

    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost],
                                    mode="FAST_COMPILE")
    checkpoint_dict = {}
    train_indices = np.arange(X.shape[1])
    valid_indices = np.arange(X.shape[1])
    early_stopping_trainer(fit_function,
                           cost_function,
                           checkpoint_dict, [X, y],
                           minibatch_size,
                           train_indices,
                           valid_indices,
                           fit_function_output_names=["cost"],
                           cost_function_output_name="valid_cost",
                           n_epochs=1)
Beispiel #2
0
def test_lstm_rnn():
    # random state so script is deterministic
    random_state = np.random.RandomState(1999)
    # home of the computational graph
    graph = OrderedDict()

    # number of hidden features
    n_hid = 10
    # number of output_features = input_features
    n_out = X.shape[-1]

    # input (where first dimension is time)
    datasets_list = [X, X_mask, y, y_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    test_values_list = [X, X_mask, y, y_mask]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph, list_of_test_values=test_values_list)

    # Setup weights
    l1 = linear_layer([X_sym], graph, 'l1_proj', n_hid,
                      random_state=random_state)

    h = lstm_recurrent_layer([l1], X_mask_sym, n_hid, graph, 'l1_rec',
                             random_state)

    # linear output activation
    y_hat = linear_layer([h], graph, 'l2_proj', n_out,
                         random_state=random_state)

    # error between output and target
    cost = squared_error(y_hat, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    # Parameters of the model
    params, grads = get_params_and_grads(graph, cost)

    # Use stochastic gradient descent to optimize
    learning_rate = 0.01
    opt = sgd(params, learning_rate)
    updates = opt.updates(params, grads)

    fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                   [cost], updates=updates, mode="FAST_COMPILE")

    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost], mode="FAST_COMPILE")
    checkpoint_dict = {}
    train_indices = np.arange(X.shape[1])
    valid_indices = np.arange(X.shape[1])
    early_stopping_trainer(fit_function, cost_function,
                           train_indices, valid_indices,
                           checkpoint_dict,
                           [X, y], minibatch_size,
                           list_of_train_output_names=["cost"],
                           valid_output_name="valid_cost",
                           n_epochs=1)
n_hid = 300
rnn_dim = 1200

X_mb, X_mb_mask = make_masked_minibatch(X, slice(0, minibatch_size))
y_mb, y_mb_mask = make_masked_minibatch(y, slice(0, minibatch_size))


datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask]
names_list = ["X", "X_mask", "y", "y_mask"]
graph = OrderedDict()
X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
    datasets_list, names_list, graph)

l1 = relu_layer([X_sym], graph, 'l1', proj_dim=n_hid,
                random_state=random_state)
h = lstm_recurrent_layer([l1], X_mask_sym, rnn_dim, graph, 'l1_rec',
                         random_state=random_state)
l2 = relu_layer([h], graph, 'l2', proj_dim=n_hid,
                random_state=random_state)
rval = bernoulli_and_correlated_log_gaussian_mixture_layer(
    [l2], graph, 'hw', proj_dim=2, n_components=20, random_state=random_state)
binary, coeffs, mus, sigmas, corr = rval
cost = bernoulli_and_correlated_log_gaussian_mixture_cost(
    binary, coeffs, mus, sigmas, corr, y_sym)
cost = masked_cost(cost, y_mask_sym).sum(axis=0).mean()
params, grads = get_params_and_grads(graph, cost)

learning_rate = 0.0003
opt = adam(params, learning_rate)
clipped_grads = gradient_clipping(grads)
updates = opt.updates(params, clipped_grads)

train_itr = list_iterator([X, y], minibatch_size, axis=1, make_mask=True,
                          stop_index=train_end)
X_mb, X_mb_mask, y_mb, y_mb_mask = next(train_itr)
train_itr.reset()

datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask]
names_list = ["X", "X_mask", "y", "y_mask"]
graph = OrderedDict()
X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
    datasets_list, names_list, graph)

l1 = relu_layer([X_sym], graph, 'l1', proj_dim=n_hid,
                random_state=random_state)
h = lstm_recurrent_layer([l1], X_mask_sym, rnn_dim, graph, 'l1_rec',
                         random_state=random_state)
l2 = relu_layer([h], graph, 'l2', proj_dim=n_hid,
                random_state=random_state)
rval = bernoulli_and_correlated_log_gaussian_mixture_layer(
    [l2], graph, 'hw', proj_dim=2, n_components=20, random_state=random_state)
binary, coeffs, mus, sigmas, corr = rval
cost = bernoulli_and_correlated_log_gaussian_mixture_cost(
    binary, coeffs, mus, sigmas, corr, y_sym)
cost = masked_cost(cost, y_mask_sym).sum(axis=0).mean()
params, grads = get_params_and_grads(graph, cost)

learning_rate = 0.0003
opt = adam(params, learning_rate)
clipped_grads = gradient_clipping(grads)
updates = opt.updates(params, clipped_grads)