def __init__(self, n_classes, n_visible, n_hidden=150, n_hidden_recurrent=100, lr=0.0001): (v, v_sample, cost, monitor, params, updates_train, v_t, updates_generate) = build_lstm_softmax_rbm(n_classes, n_visible, n_hidden, n_hidden_recurrent) grads = tensor.grad(cost, params, consider_constant=[v_sample]) """ opt = sgd(params, lr) """ """ opt = adadelta(params) grads = gradient_clipping(grads, 10.) """ opt = adam(params, lr) grads = gradient_clipping(grads, 10.) updates = opt.updates(params, grads) updates_train.update(updates) self.train_function = theano.function([v], monitor, updates=updates_train) """
def __init__(self, n_visible=88, n_hidden=150, n_hidden_recurrent=100, lr=0.0001): (v, v_sample, cost, monitor, params, updates_train, v_t, updates_generate) = build_lstmrbm(n_visible, n_hidden, n_hidden_recurrent) grads = tensor.grad(cost, params, consider_constant=[v_sample]) """ opt = sgd(params, lr) """ """ opt = adadelta(params) grads = gradient_clipping(grads, 10.) """ opt = adam(params, lr) grads = gradient_clipping(grads, 10.) updates = opt.updates(params, grads) updates_train.update(updates) self.train_function = theano.function([v], monitor, updates=updates_train) self.generate_function = theano.function([], v_t, updates=updates_generate)
# sum over sequence length and features, mean over minibatch cost = cost.dimshuffle(0, 2, 1) cost = cost.reshape((-1, cost.shape[2])) cost = cost.sum(axis=0).mean() l2_penalty = 0 for p in list(set(params) - set(biases)): l2_penalty += (p**2).sum() cost = cost + 1E-3 * l2_penalty grads = tensor.grad(cost, params) grads = gradient_clipping(grads, 10.) learning_rate = 1E-4 opt = adam(params, learning_rate) updates = opt.updates(params, grads) if args.cont is not None: print("Continuing training from saved model") continue_path = args.cont if not os.path.exists(continue_path): raise ValueError("Continue model %s, path not " "found" % continue_path) saved_checkpoint = load_checkpoint(continue_path) checkpoint_dict = saved_checkpoint train_function = checkpoint_dict["train_function"] cost_function = checkpoint_dict["cost_function"] predict_function = checkpoint_dict["predict_function"] attention_function = checkpoint_dict["attention_function"] sample_function = checkpoint_dict["sample_function"]
n_steps_sym = tensor.iscalar() n_steps_sym.tag.test_value = 10 (sampled, h1_s, h2_s, h3_s, k_s, w_s, stop_s, stop_h), supdates = theano.scan( fn=sample_step, n_steps=n_steps_sym, sequences=[], outputs_info=[init_x, init_h1, init_h2, init_h3, init_kappa, init_w, None, None], non_sequences=[context]) """ grads = gradient_clipping(grads, 10.) learning_rate = 1E-4 opt = adam(params, learning_rate) updates = opt.updates(params, grads) train_function = theano.function([X_sym, X_mask_sym, c_sym, c_mask_sym, init_h1, init_h2, init_kappa, init_w], # bias_sym], [cost, h1, h2, kappa, w], updates=updates) cost_function = theano.function([X_sym, X_mask_sym, c_sym, c_mask_sym, init_h1, init_h2, init_kappa, init_w], # bias_sym], [cost, h1, h2, kappa, w]) predict_function = theano.function([X_sym, X_mask_sym, c_sym, c_mask_sym, init_h1, init_h2, init_kappa, init_w], # bias_sym], [preds],