def test_train(self): num_iter = 100000 x_node = n.VarNode('x') y_target_node = n.VarNode('y_target') rnn_node = rnn.SimpleRnnLayer(x_node, self.name_ds.n_categories, 15) loss_node = loss.SoftmaxCrossEntropy(rnn_node, y_target_node) all_losses = [] optimizer_func = autodiff_optim.AdamOptimizer() optimizer_func = autodiff_optim.SGDOptimizer(lr=0.0001) optimizer = autodiff_optim.OptimizerIterator([x_node, y_target_node], loss_node, optimizer_func) ctx = n.ComputeContext({'x': "", 'y_target': ""}) log_at_info() every = 500 t = time.time() for i in range(1, num_iter + 1): rnn_node.set_initial_state_to_zero() c, l, category_index, name_tensor = self.name_ds.random_training_example( ) cat_tensor = self.name_ds.category_idx_to_tensor([category_index]) ctx['x'] = name_tensor ctx['y_target'] = cat_tensor ctx['i'] = i loss_value = optimizer.step(ctx, 1.0) all_losses.append(loss_value) if i % every == 0: t = time.time() - t last_10 = all_losses[-every:] av = np.average(last_10) info("[{:06d}] Avg. loss = {:10.6f}" " | {:04.2f}s per {} | Total Iters set to:{}".format( i, av, t, every, num_iter)) all_losses = [] t = time.time()
def test_linear_fit(self): epochs = 2000 iris = Iris() x_node = node.VarNode('x') yt_node = node.VarNode('yt') dense = node.DenseLayer(x_node, 3) softmax = act.Softmax(dense) cross_entropy = loss.CrossEntropy(softmax, yt_node) optimizer_func = core.np.Optimization.AdamOptimizer(lr=0.001) optimizer = core.np.Optimization.OptimizerIterator([x_node, yt_node], cross_entropy, optimizer_func) log_at_info() epoch = 0 ctx = node.ComputeContext() for x, y in iris.train_iterator(epochs, 8): ctx['x'], ctx['yt'] = x, y loss_now = optimizer.step(ctx, 1.0) if epoch % 100 == 0: info("[{}]\tloss_now = {}".format(epoch, loss_now)) epoch += 1 f = node.make_evaluator([x_node, yt_node], softmax) total, correct = 40, 0 for x, y_actual in iris.test_iterator(total, one_hot=False): ctx['x'], ctx['yt'] = x, y_actual y_predicted = f.at(ctx) max_idx = np.argmax(y_predicted) if max_idx == y_actual: correct += 1 percent = correct * 100 / total print("Correct= {}%".format(percent))
def test_linear_training_tf_fast(self): r""" For fastest results, use batch size of 64, adam optimizer and 3 epochs. You should get more than 97% accuracy :return: """ # Build the network x_node = node.VarNode('x') yt_node = node.VarNode('yt') linear1 = node.DenseLayer(x_node, 100, name="Dense-First") relu1 = act.RelUNode(linear1, name="RelU-First") linear2 = node.DenseLayer(relu1, 200, name="Dense-Second") relu2 = act.RelUNode(linear2, name="RelU-Second") linear3 = node.DenseLayer(relu2, 10, name="Dense-Third") cross_entropy = loss.LogitsCrossEntropy(linear3, yt_node, name="XEnt") # Set up optimizers and params batch_size = 64 epochs = 5 # use 25 for SGD optimizer_func = autodiff_optim.AdamOptimizer() # optimizer_func = autodiff_optim.SGDOptimizer(lr=.1) optimizer = autodiff_optim.OptimizerIterator([x_node, yt_node], cross_entropy, optimizer_func) log_at_info() losses = [] x_train, y_train, x_val, y_val, x_test, y_test = mn.load_dataset( flatten=True) iter_count = 1 predictor = node.make_evaluator([x_node, yt_node], linear3) total_time = time.time() ctx = node.ComputeContext({}) for epoch in range(epochs): epoch_time = time.time() for x, y in iterate_over_minibatches(x_train, y_train, batch_size=batch_size): ctx['x'], ctx['yt'] = x.T, to_one_hot(y, max_cat_num=9) iter_loss = optimizer.step(ctx, 1.0) / batch_size losses.append(iter_loss) iter_count += 1 epoch_time = time.time() - epoch_time loss_av = np.array(losses[:-batch_size + 1]) loss_av = np.mean(loss_av) ctx['x'], ctx['yt'] = x_val.T, to_one_hot(y_val, max_cat_num=9) y_predicted = predictor(ctx) arg_max = np.argmax(y_predicted, axis=0) correct = arg_max == y_val percent = np.mean(correct) * 100 info("Epoch {:2d}:: Validation " "accuracy:[{:5.2f}%] loss av={:01.8f}, time:{:2.3f}s".format( epoch, percent, loss_av, epoch_time)) self.assertTrue(percent > 95) total_time = time.time() - total_time info("[Mnist784DsTest.test_linear_training()] total_time = {:5.3f} s". format(total_time))
def test_multi_layer(self): r""" This actually performs better with SGD and normal initialization. Gets almost 99% with SGD and normal initialization :return: """ iris = Iris() x_node = node.VarNode('x') yt_node = node.VarNode('yt') dense = node.DenseLayer(x_node, 16) tanh = act.TanhNode(dense) dense2 = node.DenseLayer(tanh, 10) relu = act.RelUNode(dense2) dense3 = node.DenseLayer(relu, 3) softmax = act.Softmax(dense3) cross_entropy = loss.CrossEntropy(softmax, yt_node) #optimizer_func = core.np.Optimization.AdamOptimizer() optimizer_func = core.np.Optimization.SGDOptimizer(lr=0.01) optimizer = core.np.Optimization.OptimizerIterator([x_node, yt_node], cross_entropy, optimizer_func) log_at_info() epoch = 0 epochs = 10000 batch_size = 8 ctx = node.ComputeContext(weight_initializer=None) for x, y in iris.train_iterator(epochs, batch_size): ctx['x'], ctx['yt'] = x, y loss_now = optimizer.step(ctx, 1.0) / batch_size if epoch % 500 == 0: info("[{}]\tloss_now = {}".format(epoch, loss_now)) epoch += 1 f = node.make_evaluator([x_node, yt_node], softmax) total, correct = 100, 0 for x, y_actual in iris.test_iterator(total, one_hot=False): var_map = {'x': x, 'yt': y_actual} y_predicted = f(var_map) max_idx = np.argmax(y_predicted) mark = 'x' if max_idx == y_actual: correct += 1 mark = u'\u2713' print("X:{}, y_pred:{}, Actual={}, Predicted:{} {}".format( x.T, y_predicted.T, y_actual[0], max_idx, mark)) percent = correct * 100 / total print("Correct= {}%".format(percent)) self.assertTrue(percent > 95)
def test_dropout_simple_input(self): x = np.array([[1, 2, 3], [3, 4, 1]]) x_node = node.VarNode('x') dropout = reg.Dropout(x_node) ctx = node.ComputeContext({'x': x}) x_node.forward(ctx) value = dropout.value() info( "[BatchNormalizationTest.test_dropout_simple_input()] input value = np.{}" .format(repr(x))) info( "[BatchNormalizationTest.test_dropout_simple_input()] dropout value = np.{}" .format(repr(value)))
def test_dropout_with_dense(self): model_w = np.array([[1, 3, -1], [0, -4, 2.]]) model_b = np.array([-3, 2.]).reshape((2, 1)) x_node = node.VarNode('x') dense = node.DenseLayer(x_node, output_dim=2, initial_w=model_w, initial_b=model_b) p = .6 dropout = reg.Dropout(dense, dropout_prob=p) x = np.array([[1, -1], [2, 3], [-1, -2.]]) ctx = node.ComputeContext({'x': x}) found_0 = False count = 0 row_to_check = 0 while not found_0: x_node.forward(ctx) output = dropout.value() sq = np.sum(np.square(output), axis=1) found_0 = sq[row_to_check] == 0 count += 1 if count > 100: raise Exception( "Could not get 0's in first row after {} iterations.". format(count)) info("[DenseLayerStandAlone.test_single_step()] output = np.{}".format( repr(output))) dropout.backward(np.ones_like(output), self, ctx) w_grad = dense.get_w_grad() info("[DenseLayerStandAlone.test_single_step()] w_grad = np.{}".format( repr(w_grad))) b_grad = dense.get_b_grad() info("[DenseLayerStandAlone.test_single_step()] b_grad = np.{}".format( repr(b_grad))) wg_sq_sum = np.sum(np.square(w_grad), axis=1) self.assertEqual(0, wg_sq_sum[row_to_check]) bg_sum_sq = np.sum(np.square(b_grad), axis=1) self.assertEqual(0, bg_sum_sq[row_to_check]) # Test validation time (not training time) ctx.set_is_training(False) x_node.forward(ctx) test_output = dropout.value() np.testing.assert_array_almost_equal(test_output, dense.value() * p)
def test_rnn_layer_with_loss(self): debug( "[RnnLayerFullTests.test_rnn_layer_with_loss()] self.data_dir = {}" .format(self.data_dir)) x = self.name_ds.line_to_numpy('ABCD') debug("[RnnLayerFullTests.test_rnn_layer_with_loss()] ABCD: x = np.{}". format(repr(x))) debug("------------------------------------------------------") x = self.name_ds.line_to_numpy('Albert') debug( "[RnnLayerFullTests.test_rnn_layer_with_loss()] x = np.{}".format( repr(x))) debug("------------------------------------------------------") log_at_info() for i in range(5): c, l, category_index, name_tensor = self.name_ds.random_training_example( ) debug("[{}]:{}".format(c, l)) cat_tensor = self.name_ds.category_idx_to_tensor([category_index]) debug( "[RnnLayerFullTests.test_rnn_layer_with_loss()] cat_tensor = np.{}" .format(repr(cat_tensor))) x_node = n.VarNode('x') y_target_node = n.VarNode('y_target') ctx = n.ComputeContext({'x': name_tensor, 'y_target': cat_tensor}) rnn_node = rnn.SimpleRnnLayer(x_node, self.name_ds.n_categories, 128) loss_node = loss.LogitsCrossEntropy(rnn_node, y_target_node) x_node.forward(ctx) y_target_node.forward(ctx) y = rnn_node.value() info( "[RnnLayerFullTests.test_rnn_layer_with_loss()] y = np.{}".format( repr(y))) loss_value = loss_node.value() info("[RnnLayerFullTests.test_rnn_layer_with_loss()] loss = np.{}". format(repr(loss_value))) loss_node.backward(1.0, self, ctx) grads = rnn_node.total_incoming_gradient() info(grads)
def test_softmax_cross_entropy(self): predicted = np.array([[1, 3, -1, 0], [0, 9, 1, 3.]]).T.reshape(4, 2) debug("[SoftmaxTestCase.test_batch()] predicted = np.{}".format(repr(predicted))) target = np.array([[0, 1, 0, 0], [0, 0, 0, 1]]).T.reshape(4, 2) ctx = node.ComputeContext({'pred': predicted, 'target': target}) pred_node = node.VarNode('pred') target_node = node.VarNode('target') sx = SoftmaxCrossEntropy(pred_node, target_node) pred_node.forward(ctx) target_node.forward(ctx) loss = sx.value() debug("[SimpleCrossEntryTestCase.test_softmax_cross_entropy()] loss = {}".format(repr(loss))) np.testing.assert_equal(6.188115770824936, loss) sx.backward(1.0, self, ctx) grad_at_p = pred_node.total_incoming_gradient() debug("[SimpleCrossEntryTestCase.test_softmax_cross_entropy()] grad_at_p = np.{}".format(repr(grad_at_p))) expected_grad = np.array([[1.12457214e-01, 1.23048334e-04], [-1.69047339e-01, 9.97070980e-01], [1.52194289e-02, 3.34480051e-04], [4.13706969e-02, -9.97528508e-01]]) np.testing.assert_array_almost_equal(expected_grad, grad_at_p)
def test_single_step(self): model_w = np.array([[1, 3, -1], [0, -4, 2.]]) model_b = np.array([-3, 2.]).reshape((2, 1)) x_node = node.VarNode('x') dense = node.DenseLayer(x_node, output_dim=2, initial_w=model_w, initial_b=model_b) x = np.array([[1, -1], [2, 3], [-1, -2.]]) ctx = node.ComputeContext({'x': x}) x_node.forward(ctx) output = dense.value() info("[DenseLayerStandAlone.test_single_step()] output = np.{}".format( repr(output))) dense.backward(np.ones_like(output), self, ctx) w_grad = dense.get_w_grad() info("[DenseLayerStandAlone.test_single_step()] w_grad = np.{}".format( repr(w_grad))) b_grad = dense.get_b_grad() info("[DenseLayerStandAlone.test_single_step()] b_grad = np.{}".format( repr(b_grad)))
def test_linear_training(self): r""" For fastest results, use batch size of 64, adam optimizer and 3 epochs. You should get more than 97% accuracy :return: """ # Build the network x_node = node.VarNode('x') yt_node = node.VarNode('yt') linear1 = node.DenseLayer(x_node, 100, name="Dense-First", weight_scale=0.01) relu1 = act.RelUNode(linear1, name="RelU-First") linear2 = node.DenseLayer(relu1, 200, name="Dense-Second", weight_scale=0.01) relu2 = act.RelUNode(linear2, name="RelU-Second") linear3 = node.DenseLayer(relu2, 10, name="Dense-Third", weight_scale=0.01) cross_entropy = loss.LogitsCrossEntropy(linear3, yt_node, name="XEnt") # Set up optimizers and params batch_size = 64 epochs = 3 optimizer_func = autodiff_optim.AdamOptimizer() # optimizer_func = autodiff_optim.SGDOptimizer(lr=.1) optimizer = autodiff_optim.OptimizerIterator([x_node, yt_node], cross_entropy, optimizer_func) log_at_info() losses = [] iter_count = 1 predictor = node.make_evaluator([x_node, yt_node], linear3) ctx = node.ComputeContext({}) mnist = Mnist784() total_time = time.time() for epoch in range(epochs): epoch_time = time.time() iter = 0 for x, y in mnist.train_iterator_seq(batch_size=batch_size): ctx['x'], ctx['yt'] = x, y iter_loss = optimizer.step(ctx, 1.0) / batch_size losses.append(iter_loss) iter += 1 if iter % 100 == 0: print("iter:{}".format(iter)) loss_av = np.array(losses[:-batch_size + 1]) loss_av = np.mean(loss_av) e, xv, yv = mnist.test_iterator(1, batch_size=-1, one_hot=False) ctx['x'], ctx['yt'] = xv, yv percent = self.measure_validation_perf(predictor, ctx, yv) epoch_time = time.time() - epoch_time info("Iter {:2d}:: Val:{:2.4f}% , loss av={:01.8f}, time:{:2.3f}s". format(epoch, percent, loss_av, epoch_time)) total_time = time.time() - total_time info("Total time taken:{:4.4f}".format(total_time))