def test_linear_training_tf_fast(self): r""" For fastest results, use batch size of 64, adam optimizer and 3 epochs. You should get more than 97% accuracy :return: """ # Build the network x_node = node.VarNode('x') yt_node = node.VarNode('yt') linear1 = node.DenseLayer(x_node, 100, name="Dense-First") relu1 = act.RelUNode(linear1, name="RelU-First") linear2 = node.DenseLayer(relu1, 200, name="Dense-Second") relu2 = act.RelUNode(linear2, name="RelU-Second") linear3 = node.DenseLayer(relu2, 10, name="Dense-Third") cross_entropy = loss.LogitsCrossEntropy(linear3, yt_node, name="XEnt") # Set up optimizers and params batch_size = 64 epochs = 5 # use 25 for SGD optimizer_func = autodiff_optim.AdamOptimizer() # optimizer_func = autodiff_optim.SGDOptimizer(lr=.1) optimizer = autodiff_optim.OptimizerIterator([x_node, yt_node], cross_entropy, optimizer_func) log_at_info() losses = [] x_train, y_train, x_val, y_val, x_test, y_test = mn.load_dataset( flatten=True) iter_count = 1 predictor = node.make_evaluator([x_node, yt_node], linear3) total_time = time.time() ctx = node.ComputeContext({}) for epoch in range(epochs): epoch_time = time.time() for x, y in iterate_over_minibatches(x_train, y_train, batch_size=batch_size): ctx['x'], ctx['yt'] = x.T, to_one_hot(y, max_cat_num=9) iter_loss = optimizer.step(ctx, 1.0) / batch_size losses.append(iter_loss) iter_count += 1 epoch_time = time.time() - epoch_time loss_av = np.array(losses[:-batch_size + 1]) loss_av = np.mean(loss_av) ctx['x'], ctx['yt'] = x_val.T, to_one_hot(y_val, max_cat_num=9) y_predicted = predictor(ctx) arg_max = np.argmax(y_predicted, axis=0) correct = arg_max == y_val percent = np.mean(correct) * 100 info("Epoch {:2d}:: Validation " "accuracy:[{:5.2f}%] loss av={:01.8f}, time:{:2.3f}s".format( epoch, percent, loss_av, epoch_time)) self.assertTrue(percent > 95) total_time = time.time() - total_time info("[Mnist784DsTest.test_linear_training()] total_time = {:5.3f} s". format(total_time))
def test_multi_layer(self): r""" This actually performs better with SGD and normal initialization. Gets almost 99% with SGD and normal initialization :return: """ iris = Iris() x_node = node.VarNode('x') yt_node = node.VarNode('yt') dense = node.DenseLayer(x_node, 16) tanh = act.TanhNode(dense) dense2 = node.DenseLayer(tanh, 10) relu = act.RelUNode(dense2) dense3 = node.DenseLayer(relu, 3) softmax = act.Softmax(dense3) cross_entropy = loss.CrossEntropy(softmax, yt_node) #optimizer_func = core.np.Optimization.AdamOptimizer() optimizer_func = core.np.Optimization.SGDOptimizer(lr=0.01) optimizer = core.np.Optimization.OptimizerIterator([x_node, yt_node], cross_entropy, optimizer_func) log_at_info() epoch = 0 epochs = 10000 batch_size = 8 ctx = node.ComputeContext(weight_initializer=None) for x, y in iris.train_iterator(epochs, batch_size): ctx['x'], ctx['yt'] = x, y loss_now = optimizer.step(ctx, 1.0) / batch_size if epoch % 500 == 0: info("[{}]\tloss_now = {}".format(epoch, loss_now)) epoch += 1 f = node.make_evaluator([x_node, yt_node], softmax) total, correct = 100, 0 for x, y_actual in iris.test_iterator(total, one_hot=False): var_map = {'x': x, 'yt': y_actual} y_predicted = f(var_map) max_idx = np.argmax(y_predicted) mark = 'x' if max_idx == y_actual: correct += 1 mark = u'\u2713' print("X:{}, y_pred:{}, Actual={}, Predicted:{} {}".format( x.T, y_predicted.T, y_actual[0], max_idx, mark)) percent = correct * 100 / total print("Correct= {}%".format(percent)) self.assertTrue(percent > 95)
def test_linear_fit(self): epochs = 2000 iris = Iris() x_node = node.VarNode('x') yt_node = node.VarNode('yt') dense = node.DenseLayer(x_node, 3) softmax = act.Softmax(dense) cross_entropy = loss.CrossEntropy(softmax, yt_node) optimizer_func = core.np.Optimization.AdamOptimizer(lr=0.001) optimizer = core.np.Optimization.OptimizerIterator([x_node, yt_node], cross_entropy, optimizer_func) log_at_info() epoch = 0 ctx = node.ComputeContext() for x, y in iris.train_iterator(epochs, 8): ctx['x'], ctx['yt'] = x, y loss_now = optimizer.step(ctx, 1.0) if epoch % 100 == 0: info("[{}]\tloss_now = {}".format(epoch, loss_now)) epoch += 1 f = node.make_evaluator([x_node, yt_node], softmax) total, correct = 40, 0 for x, y_actual in iris.test_iterator(total, one_hot=False): ctx['x'], ctx['yt'] = x, y_actual y_predicted = f.at(ctx) max_idx = np.argmax(y_predicted) if max_idx == y_actual: correct += 1 percent = correct * 100 / total print("Correct= {}%".format(percent))
def test_basic_op(self): np.random.seed(100) x = np.array([[1, -1], [2, 3], [-1, -2]], dtype=np.float) y = np.array([[-1, 1], [-3, -1]], dtype=np.float) x_node = node.VarNode('x') y_target = node.VarNode('y_target') dense = node.DenseLayer(x_node, 2, self.model_w, self.model_b) l2_node = L2DistanceSquaredNorm(dense, y_target) var_map = {'x': x, 'y_target': y} x_node.forward(var_map) y_target.forward(var_map) log_at_info() value = dense.value() info("------------------------------------------") info("Predicted value = np.{}".format(repr(value))) info("Target value = np.{}".format(repr(y))) value = l2_node.value() info("L2 node value (loss):{}".format(value)) info("------------------------------------------") info("Printing weights (not updated yet)") info("------------------------------------------") info("Linear layer weight = np.{}".format(repr(dense.get_w()))) info("Linear layer bias = np.{}".format(repr(dense.get_b()))) optim_func = self.rate_adjustable_optimizer_func(0.001) optimizer = core.np.Optimization.OptimizerIterator([x_node, y_target], l2_node, optim_func) optimizer.step(var_map, 1.0) np.set_printoptions(precision=64, floatmode='maxprec_equal') info("------------------------------------------") info("Printing after updating weights") info("------------------------------------------") info("Linear layer weight:{}".format(repr(dense.get_w()))) info("Linear layer bias:{}".format(repr(dense.get_b()))) info("w_grad = np.{}".format(repr(dense.get_w_grad()))) info("b_grad = np.{}".format(repr(dense.get_b_grad()))) expected_weight = np.array([[1.0000, 2.9850, -0.9910], [-0.0040, -3.9755, 1.9845]]) expected_bias = np.array([[-3.006], [2.009]]) expected_w_grad = np.array([[0.0, 15.0, -9.0], [4.0, -24.5, 15.5]]) expected_b_grad = np.array([[6.], [-9.]]) np.testing.assert_almost_equal(expected_weight, dense.get_w()) np.testing.assert_almost_equal(expected_w_grad, dense.get_w_grad()) np.testing.assert_almost_equal(expected_bias, dense.get_b()) np.testing.assert_almost_equal(expected_b_grad, dense.get_b_grad())
def test_something(self): var = node.VarNode('') dense = node.DenseLayer(var, 100) print(dense.__class__.__name__) if isinstance(dense, node.MComputeNode): print("Is a compute node") else: print("Is not a compute node") mcomputeNode = node.MComputeNode() class_obj = mcomputeNode.__class__ if isinstance(dense, class_obj): print("OK .. is compute node") else: print("No a compute node..")
def test_dropout_with_dense(self): model_w = np.array([[1, 3, -1], [0, -4, 2.]]) model_b = np.array([-3, 2.]).reshape((2, 1)) x_node = node.VarNode('x') dense = node.DenseLayer(x_node, output_dim=2, initial_w=model_w, initial_b=model_b) p = .6 dropout = reg.Dropout(dense, dropout_prob=p) x = np.array([[1, -1], [2, 3], [-1, -2.]]) ctx = node.ComputeContext({'x': x}) found_0 = False count = 0 row_to_check = 0 while not found_0: x_node.forward(ctx) output = dropout.value() sq = np.sum(np.square(output), axis=1) found_0 = sq[row_to_check] == 0 count += 1 if count > 100: raise Exception( "Could not get 0's in first row after {} iterations.". format(count)) info("[DenseLayerStandAlone.test_single_step()] output = np.{}".format( repr(output))) dropout.backward(np.ones_like(output), self, ctx) w_grad = dense.get_w_grad() info("[DenseLayerStandAlone.test_single_step()] w_grad = np.{}".format( repr(w_grad))) b_grad = dense.get_b_grad() info("[DenseLayerStandAlone.test_single_step()] b_grad = np.{}".format( repr(b_grad))) wg_sq_sum = np.sum(np.square(w_grad), axis=1) self.assertEqual(0, wg_sq_sum[row_to_check]) bg_sum_sq = np.sum(np.square(b_grad), axis=1) self.assertEqual(0, bg_sum_sq[row_to_check]) # Test validation time (not training time) ctx.set_is_training(False) x_node.forward(ctx) test_output = dropout.value() np.testing.assert_array_almost_equal(test_output, dense.value() * p)
def do_linear_optimization(self, optim_func, epochs=25000, batch_size=8, do_assert=True): np.random.seed(100) x_node = node.VarNode('x') y_node = node.VarNode('y') net_w = np.array([[-1, -3, 1], [0, 4, -2]]) net_b = np.array([3, -2]).reshape((2, 1)) dense = node.DenseLayer(x_node, 2, net_w, net_b) l2_node = L2DistanceSquaredNorm(dense, y_node) # optim_func = self.rate_adjustable_optimizer_func(0.01) # adam = core.np.Optimization.AdamOptimizer() optimizer = core.np.Optimization.OptimizerIterator([x_node, y_node], l2_node, optim_func) log_at_info() epoch = 0 losses = [] for x, y in self.model.data(epochs, batch_size): var_map = {'x': x, 'y': y} loss = optimizer.step(var_map, 1.0) # losses.append(loss) if epoch % 100 == 0: losses.append([epoch, loss]) if epoch % 1000 == 0: info("[{}] Loss:{}".format(epoch, loss)) epoch += 1 info("[{}] Loss:{}".format(epoch, loss)) dense_w = dense.get_w() dense_b = dense.get_b() info("w = np.{}".format(repr(dense_w))) info("b = np.{}".format(repr(dense_b))) if do_assert: np.testing.assert_array_almost_equal(dense_w, self.model_w, 3) np.testing.assert_array_almost_equal(dense_b, self.model_b, 3) return np.array(losses)
def test_single_step(self): model_w = np.array([[1, 3, -1], [0, -4, 2.]]) model_b = np.array([-3, 2.]).reshape((2, 1)) x_node = node.VarNode('x') dense = node.DenseLayer(x_node, output_dim=2, initial_w=model_w, initial_b=model_b) x = np.array([[1, -1], [2, 3], [-1, -2.]]) ctx = node.ComputeContext({'x': x}) x_node.forward(ctx) output = dense.value() info("[DenseLayerStandAlone.test_single_step()] output = np.{}".format( repr(output))) dense.backward(np.ones_like(output), self, ctx) w_grad = dense.get_w_grad() info("[DenseLayerStandAlone.test_single_step()] w_grad = np.{}".format( repr(w_grad))) b_grad = dense.get_b_grad() info("[DenseLayerStandAlone.test_single_step()] b_grad = np.{}".format( repr(b_grad)))
def test_linear_training(self): r""" For fastest results, use batch size of 64, adam optimizer and 3 epochs. You should get more than 97% accuracy :return: """ # Build the network x_node = node.VarNode('x') yt_node = node.VarNode('yt') linear1 = node.DenseLayer(x_node, 100, name="Dense-First", weight_scale=0.01) relu1 = act.RelUNode(linear1, name="RelU-First") linear2 = node.DenseLayer(relu1, 200, name="Dense-Second", weight_scale=0.01) relu2 = act.RelUNode(linear2, name="RelU-Second") linear3 = node.DenseLayer(relu2, 10, name="Dense-Third", weight_scale=0.01) cross_entropy = loss.LogitsCrossEntropy(linear3, yt_node, name="XEnt") # Set up optimizers and params batch_size = 64 epochs = 3 optimizer_func = autodiff_optim.AdamOptimizer() # optimizer_func = autodiff_optim.SGDOptimizer(lr=.1) optimizer = autodiff_optim.OptimizerIterator([x_node, yt_node], cross_entropy, optimizer_func) log_at_info() losses = [] iter_count = 1 predictor = node.make_evaluator([x_node, yt_node], linear3) ctx = node.ComputeContext({}) mnist = Mnist784() total_time = time.time() for epoch in range(epochs): epoch_time = time.time() iter = 0 for x, y in mnist.train_iterator_seq(batch_size=batch_size): ctx['x'], ctx['yt'] = x, y iter_loss = optimizer.step(ctx, 1.0) / batch_size losses.append(iter_loss) iter += 1 if iter % 100 == 0: print("iter:{}".format(iter)) loss_av = np.array(losses[:-batch_size + 1]) loss_av = np.mean(loss_av) e, xv, yv = mnist.test_iterator(1, batch_size=-1, one_hot=False) ctx['x'], ctx['yt'] = xv, yv percent = self.measure_validation_perf(predictor, ctx, yv) epoch_time = time.time() - epoch_time info("Iter {:2d}:: Val:{:2.4f}% , loss av={:01.8f}, time:{:2.3f}s". format(epoch, percent, loss_av, epoch_time)) total_time = time.time() - total_time info("Total time taken:{:4.4f}".format(total_time))
def test_basic_op_large_matrix(self): r""" Runs test for a slightly larger matrix :return: """ x = np.array([[ 0.54566752, 0.66921034, 0.35265542, 0.32324271, 0.35036963, 0.05317591 ], [ 0.97433629, 0.5027976, 0.15637831, 0.72948084, 0.42097552, 0.52522781 ], [ 0.41793729, 0.48112345, 0.46862087, 0.88918467, 0.48792933, 0.32439625 ], [ 0.4775774, 0.58105899, 0.35079832, 0.79657794, 0.3910011, 0.72908915 ]]) w = np.array([[0.61013274, 0.86914947, 0.95211922, 0.96385655], [0.64290252, 0.2717017, 0.193146, 0.05004571], [0.14360354, 0.54256991, 0.90870491, 0.06577582]]) b = np.array([[0.76026806], [0.32982798], [0.01258297]]) pred = w @ x + b target = np.ones_like(pred) x_node = node.VarNode('x') target_node = node.VarNode('y_target') dense = node.DenseLayer(x_node, 3, w, b) l2_node = L2DistanceSquaredNorm(dense, target_node) var_map = {'x': x, 'y_target': target} x_node.forward(var_map) target_node.forward(var_map) log_at_info() predicted = dense.value() info("------------------------------------------") info("Predicted value = np.{}".format(repr(predicted))) info("Target value = np.{}".format(repr(target))) loss = l2_node.value() info("L2 node value (loss):{}".format(loss)) info("------------------------------------------") info("Printing weights (not updated yet)") info("------------------------------------------") info("Linear layer weight = np.{}".format(repr(dense.get_w()))) info("Linear layer bias = np.{}".format(repr(dense.get_b()))) optim_func = self.rate_adjustable_optimizer_func(0.001) optimizer = core.np.Optimization.OptimizerIterator( [x_node, target_node], l2_node, optim_func) optimizer.step(var_map, 1.0) np.set_printoptions(precision=64, floatmode='maxprec_equal') info("------------------------------------------") info("Printing after updating weights") info("------------------------------------------") info("weight=np.{}".format(repr(dense.get_w()))) info("w_grad = np.{}".format(repr(dense.get_w_grad()))) info("bias = np.{}".format(repr(dense.get_b()))) info("b_grad = np.{}".format(repr(dense.get_b_grad()))) # These are values from pytorch expected_weight = np.array( [[0.60973525, 0.86854088, 0.95157486, 0.96327269], [0.64292222, 0.27173772, 0.19318908, 0.05009926], [0.14362818, 0.54258782, 0.90872669, 0.06581017]]) expected_w_grad = np.array( [[0.39752683, 0.60859025, 0.54437733, 0.58387089], [-0.01970989, -0.03603142, -0.04307830, -0.05355303], [-0.02465229, -0.01786957, -0.02174304, -0.03434603]]) expected_bias = np.array([[0.75927186, 0.32992661, 0.01267095]]).T expected_b_grad = np.array([[0.99619532, -0.09862594, -0.08797690]]).T np.testing.assert_almost_equal(expected_weight, dense.get_w()) np.testing.assert_almost_equal(expected_w_grad, dense.get_w_grad()) np.testing.assert_almost_equal(expected_bias, dense.get_b()) np.testing.assert_almost_equal(expected_b_grad, dense.get_b_grad())