def test_rmsprop(): results = [] for scale in scales: A = cgt.shared(1.0) B = cgt.shared(1.0) updates = nn.rmsprop(f(A, scale) + f(B, scale), [A, B], learning_rate=0.01) do_update = cgt.function([], [], updates=updates) for _ in range(10): do_update() assert np.allclose(A.op.get_value(), B.op.get_value()) results.append(A.op.get_value().copy()) assert np.allclose(results, torch_values['rmsprop'])
def run_rmsprop(): results = [] for scale in scales: A = cgt.shared(1.0) B = cgt.shared(1.0) updates = nn.rmsprop(f(A, scale) + f(B, scale), [A, B], learning_rate=0.01) do_update = cgt.function([], [], updates=updates) for _ in range(10): do_update() assert np.allclose(A.op.get_value(), B.op.get_value()) results.append(A.op.get_value().copy()) assert np.allclose(results, torch_values['rmsprop'])
def main(): print("Loading data...") X = cgt.matrix("X", fixed_shape=(None, 28*28)) y = cgt.vector("y", dtype='i8') model = build_model(X, 0.0) loss = -cgt.mean(categorical.loglik(y, model)) updates = nn.rmsprop(loss, nn.get_parameters(loss), 0.01) train = cgt.function(inputs=[X, y], outputs=[], updates=updates) y_nodrop = cgt.argmax(model, axis=1) cost_nodrop = -cgt.mean(categorical.loglik(y, model)) err_nodrop = cgt.cast(cgt.not_equal(y_nodrop, y), cgt.floatX).mean() computeloss = cgt.function(inputs=[X, y], outputs=[err_nodrop, cost_nodrop]) batch_size=128 Xdata, ydata = load_data() Xtrain = Xdata[0:60000] ytrain = ydata[0:60000] Xtest = Xdata[60000:70000] ytest = ydata[60000:70000] sortinds = np.random.permutation(60000) Xtrain = Xtrain[sortinds] ytrain = ytrain[sortinds] print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"]) for i_epoch in xrange(3): tstart = time.time() for start in xrange(0, Xtrain.shape[0], batch_size): end = start+batch_size train(Xtrain[start:end], ytrain[start:end]) elapsed = time.time() - tstart trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)]) testerr, testloss = computeloss(Xtest, ytest) print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed]) nnbuilder.save_weights(model, 'mnist')
def main(num_epochs=NUM_EPOCHS): #cgt.set_precision('half') print("Building network ...") # Recurrent layers expect input of shape # (batch size, max sequence length, number of features) X = cgt.tensor3(name='X', fixed_shape=(N_BATCH, MAX_LENGTH, 2)) l_forward = nnbuilder.recurrentLayer(nn_input=X, num_units=N_HIDDEN) l_backward = nnbuilder.recurrentLayer(nn_input=X, num_units=N_HIDDEN, backwards=True) #l_forward = nnbuilder.LSTMLayer(nn_input=X, num_units=N_HIDDEN, activation=cgt.sigmoid) #l_backward = nnbuilder.LSTMLayer(nn_input=X, num_units=N_HIDDEN, activation=cgt.sigmoid, backwards=True) #l_forward = nnbuilder.GRULayer(nn_input=X, num_units=N_HIDDEN, activation=nn.rectify) #l_backward = nnbuilder.GRULayer(nn_input=X, num_units=N_HIDDEN, activation=nn.rectify, backwards=True) l_forward_slice = l_forward[:, MAX_LENGTH-1, :] # Take the last element in the forward slice time dimension l_backward_slice = l_backward[:, 0, :] # And the first element in the backward slice time dimension l_sum = cgt.concatenate([l_forward_slice, l_backward_slice], axis=1) l_out = nnbuilder.denseLayer(l_sum, num_units=1, activation=cgt.tanh) target_values = cgt.vector('target_output') predicted_values = l_out[:, 0] # For this task we only need the last value cost = cgt.mean((predicted_values - target_values)**2) # Compute SGD updates for training print("Computing updates ...") updates = nn.rmsprop(cost, nn.get_parameters(l_out), LEARNING_RATE) #updates = nn.nesterov_momentum(cost, nn.get_parameters(l_out), 0.05) # cgt functions for training and computing cost print("Compiling functions ...") train = cgt.function([X, target_values], cost, updates=updates) compute_cost = cgt.function([X, target_values], cost) # We'll use this "validation set" to periodically check progress X_val, y_val, mask_val = gen_data() print("Training ...") time_start = time.time() try: for epoch in range(num_epochs): for _ in range(EPOCH_SIZE): X, y, m = gen_data() train(X, y) cost_val = compute_cost(X_val, y_val) print("Epoch {} validation cost = {}".format(epoch+1, cost_val)) print ('Epoch took ' + str(time.time() - time_start)) time_start = time.time() except KeyboardInterrupt: pass
def test_the_test_problem(): #Works batch_size = 32 # How many samples do you want to batch. feat_t_steps = 20 # How many 10ms sound clips. feat_num_features = 10 # The dimension of the 10ms clips. max_label_length = feat_t_steps # The maximal label length of the transcription. includes start character. num_out_classes = 27 num_out_classes_true = num_out_classes + 2 num_batches = 756 num_epochs = 30 feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features)) ground_labels_basis = cgt.tensor3(fixed_shape=(batch_size, max_label_length, num_out_classes_true)) last_time = time.time() print 'initializing temporal dense layer' d1 = nnbuilder.temporalDenseLayer(feats, num_units=128, activation=cgt.sigmoid) #d2 = nnbuilder.temporalDenseLayer(d1, num_units=128, activation=cgt.sigmoid) d3 = nnbuilder.temporalDenseLayer(d1, num_units=num_out_classes_true, activation=nnbuilder.linear) out = nn.three_d_softmax(d3, axis=2) log_probs = None for iter_step in range(0, max_label_length): this_character_dist_bc = out[:, iter_step, :] prev_out_bc = ground_labels_basis[:, iter_step, :] log_probs_pre = prev_out_bc * this_character_dist_bc log_probs_pre = cgt.log(cgt.sum(log_probs_pre, axis=1)) if log_probs is None: log_probs = cgt.sum(log_probs_pre) else: log_probs += cgt.sum(log_probs_pre) log_probs = -log_probs print 'that took ' + str(time.time() - last_time) + ' seconds' last_time = time.time() print 'compiling objective function' updates = nn.rmsprop(log_probs, nn.get_parameters(log_probs), learning_rate=0.01) pred_train = cgt.function([feats, ground_labels_basis], [], updates=updates) pred_fun = cgt.function([feats, ground_labels_basis], [log_probs]) most_likely_chars = cgt.argmax(out, axis=1) actual_predictions = cgt.function([feats, ground_labels_basis], [most_likely_chars]) print 'that took ' + str(time.time() - last_time) + ' seconds' test_data = np.load('test_data.npy') test_labels = np.load('test_labels.npy') data_mean = np.mean(test_data) data_sd = np.mean(test_data) print 'now training' for one_epoch in range(0, num_epochs): trained = 0 last_time = time.time() print 'starting epoch ' + str(one_epoch) for batch_iter in range(0, num_batches): batch, labels_basis = normalize_batch_and_labels(test_data, batch_iter, feat_t_steps, data_mean, data_sd, test_labels, num_out_classes_true) pred_train(batch, labels_basis) for batch_iter in range(0, num_batches): batch, labels_basis = normalize_batch_and_labels(test_data, batch_iter, feat_t_steps, data_mean, data_sd, test_labels, num_out_classes_true) trained += pred_fun(batch, labels_basis)[0] trained = trained/batch_iter print 'train loss is ' + str(trained) print 'that took ' + str(time.time() - last_time) + ' seconds' act_pred = actual_predictions(batch, labels_basis)[0] print 'an actual prediction is ' print act_pred
def test_seq_2_seq(): batch_size = 32 # How many samples do you want to batch. feat_t_steps = 3 # How many 10ms sound clips. feat_num_features = 10 # The dimension of the 10ms clips. max_label_length = feat_t_steps # The maximal label length of the transcription. num_out_classes = 27 # 26 letters and space. num_out_classes_true = 27 + 2 # Start and end tokens are added. num_batches = 512 # 1032 num_epochs = 40 feats = cgt.tensor3(fixed_shape=(batch_size, feat_t_steps, feat_num_features)) ground_labels_basis = cgt.tensor3(fixed_shape=(batch_size, max_label_length, num_out_classes_true)) last_time = time.time() print 'initializing seq2seq' seq2seq = nnbuilder.Seq2Seq(nn_input_btf=feats, num_out_classes=num_out_classes) print 'that took ' + str(time.time() - last_time) + ' seconds' last_time = time.time() print 'making train objective' train_objective = seq2seq.get_train_objective(max_label_length=max_label_length, ground_labels_basis_btc=ground_labels_basis) print 'that took ' + str(time.time() - last_time) + ' seconds' last_time = time.time() print 'making updates' updates = nn.rmsprop(train_objective, nn.get_parameters(train_objective), learning_rate=0.0001) #updates = nn.nesterov_momentum(train_objective, nn.get_parameters(train_objective), learning_rate=0.0001, mu=0.4) #updates = nn.momentum(train_objective, nn.get_parameters(train_objective), learning_rate=0.00001, mu=0.4) #updates = nn.adadelta(train_objective, nn.get_parameters(train_objective), learning_rate=0.0001, rho=0.95) print 'that took ' + str(time.time() - last_time) + ' seconds' last_time = time.time() print 'compiling train function, test function, and prediction output function' train_function = cgt.function([feats, ground_labels_basis], [], updates=updates) test_function = cgt.function([feats, ground_labels_basis], [train_objective]) pred = seq2seq.make_prediction(ground_labels_basis_btc=ground_labels_basis, max_label_length=feat_t_steps) pred_fun = cgt.function([feats, ground_labels_basis], [pred]) print 'that took ' + str(time.time() - last_time) + ' seconds' test_data = np.load('test_data.npy') test_labels = np.load('test_labels.npy') data_mean = np.mean(test_data) data_sd = np.std(test_data) print 'now training' last_time = time.time() for one_epoch in range(0, num_epochs): tested = 0 print 'starting epoch ' + str(one_epoch) for batch_iter in range(0, num_batches): batch, labels_basis = normalize_batch_and_labels(test_data, batch_iter, feat_t_steps, data_mean, data_sd, test_labels, num_out_classes_true) train_function(batch, labels_basis) for batch_iter in range(0, num_batches): batch, labels_basis = normalize_batch_and_labels(test_data, batch_iter, feat_t_steps, data_mean, data_sd, test_labels, num_out_classes_true) tested += test_function(batch, labels_basis)[0] tested = tested / batch_iter print 'train loss for batch ' + str(batch_iter) + ' is ' + str(tested) print 'an actual prediction is ' print pred_fun(batch, labels_basis)[0] print 'the truth is' print test_labels[batch_iter, :, 0:feat_t_steps] print 'that took ' + str(time.time() - last_time) + ' seconds' last_time = time.time() prediction_final = pred_fun(batch, labels_basis)[0] print prediction_final