Ejemplo n.º 1
0
    def __init__(self,
                 n_classes,
                 n_visible,
                 n_hidden=150,
                 n_hidden_recurrent=100,
                 lr=0.0001):
        (v, v_sample, cost, monitor, params, updates_train, v_t,
         updates_generate) = build_lstm_softmax_rbm(n_classes, n_visible,
                                                    n_hidden,
                                                    n_hidden_recurrent)

        grads = tensor.grad(cost, params, consider_constant=[v_sample])
        """
        opt = sgd(params, lr)
        """
        """
        opt = adadelta(params)
        grads = gradient_clipping(grads, 10.)
        """

        opt = adam(params, lr)
        grads = gradient_clipping(grads, 10.)
        updates = opt.updates(params, grads)
        updates_train.update(updates)
        self.train_function = theano.function([v],
                                              monitor,
                                              updates=updates_train)
        """
Ejemplo n.º 2
0
    def __init__(self, n_visible=88, n_hidden=150, n_hidden_recurrent=100,
                 lr=0.0001):
        (v, v_sample, cost, monitor, params, updates_train, v_t,
         updates_generate) = build_lstmrbm(n_visible, n_hidden,
                                           n_hidden_recurrent)

        grads = tensor.grad(cost, params, consider_constant=[v_sample])
        """
        opt = sgd(params, lr)
        """

        """
        opt = adadelta(params)
        grads = gradient_clipping(grads, 10.)
        """

        opt = adam(params, lr)
        grads = gradient_clipping(grads, 10.)
        updates = opt.updates(params, grads)
        updates_train.update(updates)
        self.train_function = theano.function([v], monitor,
                                               updates=updates_train)
        self.generate_function = theano.function([], v_t,
                                                 updates=updates_generate)
Ejemplo n.º 3
0
    # sum over sequence length and features, mean over minibatch
    cost = cost.dimshuffle(0, 2, 1)
    cost = cost.reshape((-1, cost.shape[2]))
    cost = cost.sum(axis=0).mean()

    l2_penalty = 0
    for p in list(set(params) - set(biases)):
        l2_penalty += (p**2).sum()

    cost = cost + 1E-3 * l2_penalty
    grads = tensor.grad(cost, params)
    grads = gradient_clipping(grads, 10.)

    learning_rate = 1E-4

    opt = adam(params, learning_rate)
    updates = opt.updates(params, grads)

    if args.cont is not None:
        print("Continuing training from saved model")
        continue_path = args.cont
        if not os.path.exists(continue_path):
            raise ValueError("Continue model %s, path not "
                             "found" % continue_path)
        saved_checkpoint = load_checkpoint(continue_path)
        checkpoint_dict = saved_checkpoint
        train_function = checkpoint_dict["train_function"]
        cost_function = checkpoint_dict["cost_function"]
        predict_function = checkpoint_dict["predict_function"]
        attention_function = checkpoint_dict["attention_function"]
        sample_function = checkpoint_dict["sample_function"]
    n_steps_sym = tensor.iscalar()
    n_steps_sym.tag.test_value = 10
    (sampled, h1_s, h2_s, h3_s, k_s, w_s, stop_s, stop_h), supdates = theano.scan(
        fn=sample_step,
        n_steps=n_steps_sym,
        sequences=[],
        outputs_info=[init_x, init_h1, init_h2, init_h3,
                      init_kappa, init_w, None, None],
        non_sequences=[context])
    """

    grads = gradient_clipping(grads, 10.)

    learning_rate = 1E-4

    opt = adam(params, learning_rate)
    updates = opt.updates(params, grads)

    train_function = theano.function([X_sym, X_mask_sym, c_sym, c_mask_sym,
                                      init_h1, init_h2, init_kappa,
                                      init_w], # bias_sym],
                                     [cost, h1, h2, kappa, w],
                                     updates=updates)
    cost_function = theano.function([X_sym, X_mask_sym, c_sym, c_mask_sym,
                                     init_h1, init_h2, init_kappa,
                                     init_w], # bias_sym],
                                    [cost, h1, h2, kappa, w])
    predict_function = theano.function([X_sym, X_mask_sym, c_sym, c_mask_sym,
                                        init_h1, init_h2, init_kappa,
                                        init_w], # bias_sym],
                                       [preds],