Ejemplo n.º 1
0
def test_conditional_attention_gru_recurrent():
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()
    n_hid = 5
    n_out = n_chars

    # input (where first dimension is time)
    datasets_list = [X_mb, X_mask, y_mb, y_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph)

    h = bidirectional_gru_recurrent_layer([X_sym], X_mask_sym, n_hid, graph,
                                          'l1_end', random_state)

    shifted_y_sym = shift_layer([y_sym], graph, 'shift')

    h_dec, context, attention = conditional_attention_gru_recurrent_layer(
        [y_sym], [h], y_mask_sym, X_mask_sym, n_hid, graph, 'l2_dec',
        random_state)

    # linear output activation
    y_hat = softmax_layer([h_dec, context, shifted_y_sym], graph, 'l2_proj',
                          n_out, random_state=random_state)

    # error between output and target
    cost = categorical_crossentropy(y_hat, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    # Parameters of the model
    """
    params, grads = get_params_and_grads(graph, cost)

    # Use stochastic gradient descent to optimize
    opt = sgd(params)
    learning_rate = 0.00000
    updates = opt.updates(params, grads, learning_rate)

    fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                   [cost], updates=updates,
                                   mode="FAST_COMPILE")
    """

    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost], mode="FAST_COMPILE")

    checkpoint_dict = {}
    train_indices = np.arange(len(X))
    valid_indices = np.arange(len(X))
    early_stopping_trainer(cost_function, cost_function,
                           train_indices, valid_indices,
                           checkpoint_dict,
                           [X, y],
                           minibatch_size,
                           list_of_minibatch_functions=[text_minibatch_func],
                           list_of_train_output_names=["cost"],
                           valid_output_name="valid_cost",
                           n_epochs=1)
Ejemplo n.º 2
0
def test_conditional_attention_gru_recurrent():
    random_state = np.random.RandomState(1999)
    graph = OrderedDict()
    n_hid = 5
    n_out = n_chars

    # input (where first dimension is time)
    datasets_list = [X_mb, X_mask, y_mb, y_mask]
    names_list = ["X", "X_mask", "y", "y_mask"]
    X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
        datasets_list, names_list, graph)

    h = bidirectional_gru_recurrent_layer([X_sym], X_mask_sym, n_hid, graph,
                                          'l1_end', random_state)

    shifted_y_sym = shift_layer([y_sym], graph, 'shift')

    h_dec, context, attention = conditional_attention_gru_recurrent_layer(
        [y_sym], [h], y_mask_sym, X_mask_sym, n_hid, graph, 'l2_dec',
        random_state)

    # linear output activation
    y_hat = softmax_layer([h_dec, context, shifted_y_sym], graph, 'l2_proj',
                          n_out, random_state)

    # error between output and target
    cost = categorical_crossentropy(y_hat, y_sym)
    cost = masked_cost(cost, y_mask_sym).mean()
    # Parameters of the model
    """
    params, grads = get_params_and_grads(graph, cost)

    # Use stochastic gradient descent to optimize
    opt = sgd(params)
    learning_rate = 0.00000
    updates = opt.updates(params, grads, learning_rate)

    fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                   [cost], updates=updates,
                                   mode="FAST_COMPILE")
    """

    cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym],
                                    [cost], mode="FAST_COMPILE")

    checkpoint_dict = {}
    train_indices = np.arange(len(X))
    valid_indices = np.arange(len(X))
    early_stopping_trainer(cost_function, cost_function, checkpoint_dict,
                           [X, y],
                           minibatch_size, train_indices, valid_indices,
                           list_of_minibatch_functions=[text_minibatch_func],
                           fit_function_output_names=["cost"],
                           cost_function_output_name="valid_cost",
                           n_epochs=1)
Ejemplo n.º 3
0
minibatch_size = 10

X_mb, X_mb_mask = make_masked_minibatch(X, slice(0, minibatch_size))
y_mb, y_mb_mask = make_masked_minibatch(y, slice(0, minibatch_size))

n_hid = 500
n_out = vocab_size + 1

datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask]
names_list = ["X", "X_mask", "y", "y_mask"]
X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
    datasets_list, names_list, graph)

l1 = maxout_layer([X_sym], graph, 'l1', n_hid, random_state=random_state)
h = bidirectional_gru_recurrent_layer([l1], X_mask_sym, n_hid, graph,
                                      'l1_rec', random_state=random_state)
l2 = maxout_layer([h], graph, 'l2', n_hid, random_state=random_state)
y_pred = softmax_layer([l2], graph, 'softmax', n_out, random_state=random_state)

cost = log_ctc_cost(y_sym, y_mask_sym, y_pred, X_mask_sym).mean()
params, grads = get_params_and_grads(graph, cost)

opt = adadelta(params)
updates = opt.updates(params, grads)

checkpoint_dict = {}

fit_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost],
                               updates=updates)
cost_function = theano.function([X_sym, X_mask_sym, y_sym, y_mask_sym], [cost])
predict_function = theano.function([X_sym, X_mask_sym], [y_pred])
Ejemplo n.º 4
0
X_mb, X_mb_mask = make_masked_minibatch(X, slice(0, minibatch_size))
y_mb, y_mb_mask = make_masked_minibatch(y, slice(0, minibatch_size))

n_hid = 500
n_out = vocab_size + 1

datasets_list = [X_mb, X_mb_mask, y_mb, y_mb_mask]
names_list = ["X", "X_mask", "y", "y_mask"]
X_sym, X_mask_sym, y_sym, y_mask_sym = add_datasets_to_graph(
    datasets_list, names_list, graph)

l1 = maxout_layer([X_sym], graph, 'l1', n_hid, random_state=random_state)
h = bidirectional_gru_recurrent_layer([l1],
                                      X_mask_sym,
                                      n_hid,
                                      graph,
                                      'l1_rec',
                                      random_state=random_state)
l2 = maxout_layer([h], graph, 'l2', n_hid, random_state=random_state)
y_pred = softmax_layer([l2],
                       graph,
                       'softmax',
                       n_out,
                       random_state=random_state)

cost = log_ctc_cost(y_sym, y_mask_sym, y_pred, X_mask_sym).mean()
params, grads = get_params_and_grads(graph, cost)

opt = adadelta(params)
updates = opt.updates(params, grads)