def test_cross_entropy(self): predicted = np.array([[1, 3, -1, 0], [0, 9, 1, 3.]]).T target = np.array([[0, 1, 0, 0], [0, 0, 0, 1]]).T predicted_node = node.VarNode('predicted') target_node = node.VarNode('target') softmax = Softmax(predicted_node) cross_entropy = CrossEntropy(softmax, target_node) var_map = {'predicted': predicted, 'target': target} predicted_node.forward(var_map) target_node.forward(var_map) loss = cross_entropy.value() debug("loss = {}".format(loss)) expected_loss = 6.188115770824936 self.assertAlmostEqual(expected_loss, loss) cross_entropy.backward(1.0, self, var_map) x_grad = predicted_node.total_incoming_gradient() debug("x_grad = np.{}".format(repr(x_grad))) # Note that this grad is 1/8 the size reported by pytorch # because pytorch does not average out during softmax for CrossEntropy # whereas I use softmax node expected_grad = np.array([[1.40571517e-02, 1.53810418e-05], [-2.11309174e-02, 1.24633872e-01], [1.90242861e-03, 4.18100064e-05], [5.17133712e-03, -1.24691064e-01]]) np.testing.assert_array_almost_equal(expected_grad, x_grad)
def test_convolution2d_plotting(self): image_path = self.get_image( 'Vd-Orig.png') image = plt.imread(image_path) shape = image.shape print("shape {}".format(shape)) img_node = node.VarNode('x') x_image = self.rgb2gray(image * 20) print(x_image.shape) plt.imshow(x_image) plt.show() debug("Now showing ..") var_map = {'x': x_image} x_shape = (image.shape[0], image.shape[1]) conv_node = conv.Convolution2D(img_node, x_shape) img_node.forward(var_map) final_image = conv_node.value() plt.imshow(final_image) plt.show() edge_kernel = np.array([[-1, -1, -1], [-1, 8, -1], [-1, -1, -1]]) img_node = node.VarNode('x') conv_node = conv.Convolution2D(img_node, x_shape, kernel=edge_kernel) img_node.forward(var_map) edge_img = conv_node.value() plt.imshow(edge_img) plt.show()
def test_train(self): num_iter = 100000 x_node = n.VarNode('x') y_target_node = n.VarNode('y_target') rnn_node = rnn.SimpleRnnLayer(x_node, self.name_ds.n_categories, 15) loss_node = loss.SoftmaxCrossEntropy(rnn_node, y_target_node) all_losses = [] optimizer_func = autodiff_optim.AdamOptimizer() optimizer_func = autodiff_optim.SGDOptimizer(lr=0.0001) optimizer = autodiff_optim.OptimizerIterator([x_node, y_target_node], loss_node, optimizer_func) ctx = n.ComputeContext({'x': "", 'y_target': ""}) log_at_info() every = 500 t = time.time() for i in range(1, num_iter + 1): rnn_node.set_initial_state_to_zero() c, l, category_index, name_tensor = self.name_ds.random_training_example( ) cat_tensor = self.name_ds.category_idx_to_tensor([category_index]) ctx['x'] = name_tensor ctx['y_target'] = cat_tensor ctx['i'] = i loss_value = optimizer.step(ctx, 1.0) all_losses.append(loss_value) if i % every == 0: t = time.time() - t last_10 = all_losses[-every:] av = np.average(last_10) info("[{:06d}] Avg. loss = {:10.6f}" " | {:04.2f}s per {} | Total Iters set to:{}".format( i, av, t, every, num_iter)) all_losses = [] t = time.time()
def test_sigmoid(self): r""" See TestActivations.Sigmoid.ipynb for the corresponding pytorch calculations :return: """ x = np.array([[1, 2, 3, 4], [3, 4, 5, 6], [-1, 0, 1, 3]]) x_node = node.VarNode('x') target = np.zeros(x.shape) target_node = node.VarNode('target') var_map = {'x': x, 'target': target} sigmoid = SigmoidNode(x_node) l2loss = L2DistanceSquaredNorm(sigmoid, target_node) x_node.forward(var_map) target_node.forward(var_map) value = sigmoid.value() expected_value = np.array([[0.73105858, 0.88079708, 0.95257413, 0.98201379], [0.95257413, 0.98201379, 0.99330715, 0.99752738], [0.26894142, 0.5, 0.73105858, 0.95257413]]) np.testing.assert_almost_equal(expected_value, value) loss = l2loss.value() info("L2 Loss:{}".format(loss)) log_at_info() l2loss.backward(1.0, self, var_map) x_grad = x_node.total_incoming_gradient() expected_x_grad = np.array([[0.28746968, 0.18495609, 0.08606823, 0.03469004], [0.08606823, 0.03469004, 0.01320712, 0.00492082], [0.10575419, 0.25, 0.28746968, 0.08606823]]) info("-------------------------------------------------------------") info("x_grad = np.{}".format(repr(x_grad))) info("x_grad_expected= np.{}".format(repr(expected_x_grad))) np.testing.assert_almost_equal(expected_x_grad, x_grad)
def test_softmax(self): r""" See TestActivations.Softmax.ipynb for corresponding pytorch calculations :return: """ x = np.array([[1, 3, -1, 0], [0, 9, 1, 3]]).T x_node = node.VarNode('x') softmax = Softmax(x_node) target = np.zeros(x.shape) target_node = node.VarNode('target') var_map = {'x': x, 'target': target} l2loss = L2DistanceSquaredNorm(softmax, target_node) x_node.forward(var_map) target_node.forward(var_map) expected_value = np.array([[1.12457214e-01, 1.23048334e-04], [8.30952661e-01, 9.97070980e-01], [1.52194289e-02, 3.34480051e-04], [4.13706969e-02, 2.47149186e-03]]) value = softmax.value() np.testing.assert_almost_equal(expected_value, value) loss_value = l2loss.value() debug("Loss = {}".format(loss_value)) l2loss.backward(1.0, self, var_map) x_grad = x_node.total_incoming_gradient() expected_grad = np.array([[-0.01666096, -0.00003058], [0.02615019, 0.00072642], [-0.00262479, -0.0000831], [-0.00686445, -0.00061274]]) debug("x_grad = np.{}".format(repr(x_grad))) np.testing.assert_almost_equal(expected_grad, x_grad)
def test_linear_fit(self): epochs = 2000 iris = Iris() x_node = node.VarNode('x') yt_node = node.VarNode('yt') dense = node.DenseLayer(x_node, 3) softmax = act.Softmax(dense) cross_entropy = loss.CrossEntropy(softmax, yt_node) optimizer_func = core.np.Optimization.AdamOptimizer(lr=0.001) optimizer = core.np.Optimization.OptimizerIterator([x_node, yt_node], cross_entropy, optimizer_func) log_at_info() epoch = 0 ctx = node.ComputeContext() for x, y in iris.train_iterator(epochs, 8): ctx['x'], ctx['yt'] = x, y loss_now = optimizer.step(ctx, 1.0) if epoch % 100 == 0: info("[{}]\tloss_now = {}".format(epoch, loss_now)) epoch += 1 f = node.make_evaluator([x_node, yt_node], softmax) total, correct = 40, 0 for x, y_actual in iris.test_iterator(total, one_hot=False): ctx['x'], ctx['yt'] = x, y_actual y_predicted = f.at(ctx) max_idx = np.argmax(y_predicted) if max_idx == y_actual: correct += 1 percent = correct * 100 / total print("Correct= {}%".format(percent))
def test_linear_transformation(self): np.random.seed(100) w_node = node.VarNode('w') x_node = node.VarNode('x') ya_node = node.VarNode('y_a') w = np.array([[1, 2, 1], [2, 0, -1]]) x = np.array( [[0.54340494, 0.27836939, 0.42451759, 0.84477613, 0.00471886], [0.12156912, 0.67074908, 0.82585276, 0.13670659, 0.57509333], [0.89132195, 0.20920212, 0.18532822, 0.10837689, 0.21969749]]) y_act = np.array( [[0.97862378, 0.81168315, 0.17194101, 0.81622475, 0.27407375], [0.43170418, 0.94002982, 0.81764938, 0.33611195, 0.17541045]]) info("Printing x...") info(x) info("printing y_pred") info(y_act) var_map = {'w': w, 'x': x, 'y_a': y_act} wx_node = node.MatrixMultiplication(w_node, x_node) l2_node = L2DistanceSquaredNorm(wx_node, ya_node) log_at_info() w_node.forward(var_map) x_node.forward(var_map) ya_node.forward(var_map) l2_node.backward(1.0, self, var_map) info(wx_node.value()) info("grad...") info(wx_node.total_incoming_gradient())
def make__two_layer_model(): r""" Designed to be used only with TestFullNetwork class.. variable names etc. are used in debugging in the testing code """ w_node = node.VarNode('w', True) x_node = node.VarNode('x') ya_node = node.VarNode('y_a') b_node = node.VarNode('b', True) w2_node = node.VarNode('w2', True) b2_node = node.VarNode('b2', True) start_nodes = [w_node, x_node, b_node, ya_node, w2_node, b2_node] w = np.array([[1, 3], [0, 1]]) x = (np.array([[1, -1, 2]])).T b = np.array([[-2, -3]]).T y_act = np.array([[.5, .7]]).T w2 = np.array([[.1, .2], [.3, .07]]) b2 = np.array([[.02, .3]]).T var_map = {'w': w, 'x': x, 'y_a': y_act, 'b': b, 'w2': w2, 'b2': b2} wx_node = node.MatrixMultiplication(w_node, x_node, "wx") sum_node = node.MatrixAddition(wx_node, b_node, "wx+b") sigmoid_node = SigmoidNode(sum_node, "sig") wx2_node = node.MatrixMultiplication(w2_node, sigmoid_node, "wx2") sum2_node = node.MatrixAddition(wx2_node, b2_node, "wx2+b2") l2_node = L2DistanceSquaredNorm(sum2_node, ya_node, "l2") return var_map, start_nodes, l2_node
def test_matrix_prd(self): w_node = node.VarNode('w') x_node = node.VarNode('x') w = np.array([[1, 3, 0], [0, 1, -1]]) x = (np.array([[1, -1, 2]])).T w_grad_expected = np.array([x[:, 0], x[:, 0]]) local_grad = np.array([[1, 1]]).T x_grad_expected = np.multiply(w, local_grad).sum(axis=0).T x_grad_expected = np.reshape(x_grad_expected, (len(x_grad_expected), 1)) mult_node = node.MatrixMultiplication(w_node, x_node, name="wx") var_map = {'x': x, 'w': w} x_node.forward(var_map) self.assertIsNone(mult_node.value()) w_node.forward(var_map) value = mult_node.value() expected = w @ x np.testing.assert_array_almost_equal(expected, value) mult_node.backward(local_grad, self, var_map) w_grad = w_node.total_incoming_gradient() print("---- printing w_grad ---") print(w_grad) np.testing.assert_array_almost_equal(w_grad, w_grad_expected) print("---- end printing ----") x_grad = x_node.total_incoming_gradient() print("---- printing x_grad ---") print(x_grad) np.testing.assert_array_almost_equal(x_grad_expected, x_grad) print("---- end printing ----")
def test_basic(self): w_node = node.VarNode('w') x_node = node.VarNode('x') w = np.array([[1, 3, 0], [0, 1, -1]]) x = np.array([[1, -1], [0, 2], [9, 1]]) mult_node = node.MatrixMultiplication(w_node, x_node) var_map = {'x': x, 'w': w} x_node.forward(var_map) self.assertIsNone(mult_node.value()) w_node.forward(var_map) value = mult_node.value() expected = w @ x np.testing.assert_array_almost_equal(expected, value) print(value) self.assertIsNotNone(x_node.value()) mult_node.reset_network_fwd() # Just checking # Not none because fwd should start from start vars self.assertIsNotNone(x_node.value()) b_node = node.VarNode('b') b = np.array([-1, -1]) var_map['b'] = b sum_node = node.MatrixAddition(mult_node, b_node) var_nodes = [x_node, w_node, b_node] for var_node in var_nodes: var_node.forward(var_map) expected = expected + b np.testing.assert_array_almost_equal(expected, sum_node.value()) print(sum_node.value())
def test_basic_op(self): np.random.seed(100) x = np.array([[1, -1], [2, 3], [-1, -2]], dtype=np.float) y = np.array([[-1, 1], [-3, -1]], dtype=np.float) x_node = node.VarNode('x') y_target = node.VarNode('y_target') dense = node.DenseLayer(x_node, 2, self.model_w, self.model_b) l2_node = L2DistanceSquaredNorm(dense, y_target) var_map = {'x': x, 'y_target': y} x_node.forward(var_map) y_target.forward(var_map) log_at_info() value = dense.value() info("------------------------------------------") info("Predicted value = np.{}".format(repr(value))) info("Target value = np.{}".format(repr(y))) value = l2_node.value() info("L2 node value (loss):{}".format(value)) info("------------------------------------------") info("Printing weights (not updated yet)") info("------------------------------------------") info("Linear layer weight = np.{}".format(repr(dense.get_w()))) info("Linear layer bias = np.{}".format(repr(dense.get_b()))) optim_func = self.rate_adjustable_optimizer_func(0.001) optimizer = core.np.Optimization.OptimizerIterator([x_node, y_target], l2_node, optim_func) optimizer.step(var_map, 1.0) np.set_printoptions(precision=64, floatmode='maxprec_equal') info("------------------------------------------") info("Printing after updating weights") info("------------------------------------------") info("Linear layer weight:{}".format(repr(dense.get_w()))) info("Linear layer bias:{}".format(repr(dense.get_b()))) info("w_grad = np.{}".format(repr(dense.get_w_grad()))) info("b_grad = np.{}".format(repr(dense.get_b_grad()))) expected_weight = np.array([[1.0000, 2.9850, -0.9910], [-0.0040, -3.9755, 1.9845]]) expected_bias = np.array([[-3.006], [2.009]]) expected_w_grad = np.array([[0.0, 15.0, -9.0], [4.0, -24.5, 15.5]]) expected_b_grad = np.array([[6.], [-9.]]) np.testing.assert_almost_equal(expected_weight, dense.get_w()) np.testing.assert_almost_equal(expected_w_grad, dense.get_w_grad()) np.testing.assert_almost_equal(expected_bias, dense.get_b()) np.testing.assert_almost_equal(expected_b_grad, dense.get_b_grad())
def test_convolution_with_l2(self): img = np.array([[1, 2, 3, 4], [3, 4, 5, 6], [-1, 0, 1, 3], [0, 2, -1, 4]]) kernel = np.array([[1, -1], [0, 2]]) y = np.ones((3, 3)) img_node = node.VarNode('img') c2d = conv.Convolution2D(img_node, input_shape=(4, 4), kernel=kernel) target_node = node.VarNode('y') l2 = L2DistanceSquaredNorm(c2d, target_node) var_map = {'img': img, 'y': y} img_node.forward(var_map) target_node.forward(var_map) info("Original x into the convolution layer") info(repr(img)) output_image = c2d.value() info("Output of the convolution layer") expected_output = np.array([[7., 9., 11.], [-1., 1., 5.], [3., -3., 6.]]) np.testing.assert_array_almost_equal(expected_output, output_image) info(repr(output_image)) log_at_info() info("Kernel before gradient descent") info(repr(c2d.get_kernel())) optim_func = self.rate_adjustable_optimizer_func(0.001) optimizer = core.np.Optimization.OptimizerIterator([img_node, target_node], l2, optim_func) loss = optimizer.step(var_map, 1.0) info("Took a single gradient descent step - calculated weights and updated gradients") info("<<<<Printing loss matrix after single step>>>>") info(repr(loss)) info("Printing kernel:") info(repr(c2d.get_kernel())) info("--------------------------------------") info("Printing kernel gradient:") info(repr(c2d.get_kernel_grad())) info("-------------------------") info("Bias :{}".format(c2d.get_bias())) info("Bias gradient :{}".format(c2d.get_bias_grad())) expected_kernel = np.array([[0.98466667, -1.02288889], [-0.02066667, 1.96355556]]) np.testing.assert_array_almost_equal(expected_kernel, c2d.get_kernel()) expected_kernel_grad = np.array([[15.33333333, 22.88888889], [20.66666667, 36.44444444]]) np.testing.assert_array_almost_equal(expected_kernel_grad, c2d.get_kernel_grad()) expected_bias = -0.0064444444444444445 expected_bias_grad = 6.444444444444445 np.testing.assert_almost_equal(expected_bias, c2d.get_bias()) np.testing.assert_almost_equal(expected_bias_grad, c2d.get_bias_grad())
def test_dropout_simple_input(self): x = np.array([[1, 2, 3], [3, 4, 1]]) x_node = node.VarNode('x') dropout = reg.Dropout(x_node) ctx = node.ComputeContext({'x': x}) x_node.forward(ctx) value = dropout.value() info( "[BatchNormalizationTest.test_dropout_simple_input()] input value = np.{}" .format(repr(x))) info( "[BatchNormalizationTest.test_dropout_simple_input()] dropout value = np.{}" .format(repr(value)))
def test_something(self): var = node.VarNode('') dense = node.DenseLayer(var, 100) print(dense.__class__.__name__) if isinstance(dense, node.MComputeNode): print("Is a compute node") else: print("Is not a compute node") mcomputeNode = node.MComputeNode() class_obj = mcomputeNode.__class__ if isinstance(dense, class_obj): print("OK .. is compute node") else: print("No a compute node..")
def step(self, var_map, incoming_grad=1.0): r""" Will reset the network, do forward and backward prop and then update gradient. The loss returned is before the gradient update :param var_map: :param incoming_grad: Starting grad, typically ones :return: loss before the reset and gradient updates. """ for node in self.start_nodes: node.reset_network_fwd() self.end_node.reset_network_back() for node in self.start_nodes: node.forward(var_map) self.end_node.backward(incoming_grad, self, var_map) loss = self.end_node.value() self.end_node.optimizer_step(self.optimizer_function, var_map) return loss
def test_dropout_with_dense(self): model_w = np.array([[1, 3, -1], [0, -4, 2.]]) model_b = np.array([-3, 2.]).reshape((2, 1)) x_node = node.VarNode('x') dense = node.DenseLayer(x_node, output_dim=2, initial_w=model_w, initial_b=model_b) p = .6 dropout = reg.Dropout(dense, dropout_prob=p) x = np.array([[1, -1], [2, 3], [-1, -2.]]) ctx = node.ComputeContext({'x': x}) found_0 = False count = 0 row_to_check = 0 while not found_0: x_node.forward(ctx) output = dropout.value() sq = np.sum(np.square(output), axis=1) found_0 = sq[row_to_check] == 0 count += 1 if count > 100: raise Exception( "Could not get 0's in first row after {} iterations.". format(count)) info("[DenseLayerStandAlone.test_single_step()] output = np.{}".format( repr(output))) dropout.backward(np.ones_like(output), self, ctx) w_grad = dense.get_w_grad() info("[DenseLayerStandAlone.test_single_step()] w_grad = np.{}".format( repr(w_grad))) b_grad = dense.get_b_grad() info("[DenseLayerStandAlone.test_single_step()] b_grad = np.{}".format( repr(b_grad))) wg_sq_sum = np.sum(np.square(w_grad), axis=1) self.assertEqual(0, wg_sq_sum[row_to_check]) bg_sum_sq = np.sum(np.square(b_grad), axis=1) self.assertEqual(0, bg_sum_sq[row_to_check]) # Test validation time (not training time) ctx.set_is_training(False) x_node.forward(ctx) test_output = dropout.value() np.testing.assert_array_almost_equal(test_output, dense.value() * p)
def setUp(self): x = np.array([[1, 2, -1, 4], [2, -1, 3, 1], [4, 9, -4, 5]]) debug("x = np.{}".format(repr(x))) self.x_node = node.VarNode('x') self.var_map = {'x': x} self.max_pool_node = conv.MaxPool2D(self.x_node, pool_size=(2, 2), name="maxpool") debug("x = np.{}".format(repr(x)))
def test_linear_training_tf_fast(self): r""" For fastest results, use batch size of 64, adam optimizer and 3 epochs. You should get more than 97% accuracy :return: """ # Build the network x_node = node.VarNode('x') yt_node = node.VarNode('yt') linear1 = node.DenseLayer(x_node, 100, name="Dense-First") relu1 = act.RelUNode(linear1, name="RelU-First") linear2 = node.DenseLayer(relu1, 200, name="Dense-Second") relu2 = act.RelUNode(linear2, name="RelU-Second") linear3 = node.DenseLayer(relu2, 10, name="Dense-Third") cross_entropy = loss.LogitsCrossEntropy(linear3, yt_node, name="XEnt") # Set up optimizers and params batch_size = 64 epochs = 5 # use 25 for SGD optimizer_func = autodiff_optim.AdamOptimizer() # optimizer_func = autodiff_optim.SGDOptimizer(lr=.1) optimizer = autodiff_optim.OptimizerIterator([x_node, yt_node], cross_entropy, optimizer_func) log_at_info() losses = [] x_train, y_train, x_val, y_val, x_test, y_test = mn.load_dataset( flatten=True) iter_count = 1 predictor = node.make_evaluator([x_node, yt_node], linear3) total_time = time.time() ctx = node.ComputeContext({}) for epoch in range(epochs): epoch_time = time.time() for x, y in iterate_over_minibatches(x_train, y_train, batch_size=batch_size): ctx['x'], ctx['yt'] = x.T, to_one_hot(y, max_cat_num=9) iter_loss = optimizer.step(ctx, 1.0) / batch_size losses.append(iter_loss) iter_count += 1 epoch_time = time.time() - epoch_time loss_av = np.array(losses[:-batch_size + 1]) loss_av = np.mean(loss_av) ctx['x'], ctx['yt'] = x_val.T, to_one_hot(y_val, max_cat_num=9) y_predicted = predictor(ctx) arg_max = np.argmax(y_predicted, axis=0) correct = arg_max == y_val percent = np.mean(correct) * 100 info("Epoch {:2d}:: Validation " "accuracy:[{:5.2f}%] loss av={:01.8f}, time:{:2.3f}s".format( epoch, percent, loss_av, epoch_time)) self.assertTrue(percent > 95) total_time = time.time() - total_time info("[Mnist784DsTest.test_linear_training()] total_time = {:5.3f} s". format(total_time))
def test_rnn_layer_with_loss(self): debug( "[RnnLayerFullTests.test_rnn_layer_with_loss()] self.data_dir = {}" .format(self.data_dir)) x = self.name_ds.line_to_numpy('ABCD') debug("[RnnLayerFullTests.test_rnn_layer_with_loss()] ABCD: x = np.{}". format(repr(x))) debug("------------------------------------------------------") x = self.name_ds.line_to_numpy('Albert') debug( "[RnnLayerFullTests.test_rnn_layer_with_loss()] x = np.{}".format( repr(x))) debug("------------------------------------------------------") log_at_info() for i in range(5): c, l, category_index, name_tensor = self.name_ds.random_training_example( ) debug("[{}]:{}".format(c, l)) cat_tensor = self.name_ds.category_idx_to_tensor([category_index]) debug( "[RnnLayerFullTests.test_rnn_layer_with_loss()] cat_tensor = np.{}" .format(repr(cat_tensor))) x_node = n.VarNode('x') y_target_node = n.VarNode('y_target') ctx = n.ComputeContext({'x': name_tensor, 'y_target': cat_tensor}) rnn_node = rnn.SimpleRnnLayer(x_node, self.name_ds.n_categories, 128) loss_node = loss.LogitsCrossEntropy(rnn_node, y_target_node) x_node.forward(ctx) y_target_node.forward(ctx) y = rnn_node.value() info( "[RnnLayerFullTests.test_rnn_layer_with_loss()] y = np.{}".format( repr(y))) loss_value = loss_node.value() info("[RnnLayerFullTests.test_rnn_layer_with_loss()] loss = np.{}". format(repr(loss_value))) loss_node.backward(1.0, self, ctx) grads = rnn_node.total_incoming_gradient() info(grads)
def do_linear_optimization(self, optim_func, epochs=25000, batch_size=8, do_assert=True): np.random.seed(100) x_node = node.VarNode('x') y_node = node.VarNode('y') net_w = np.array([[-1, -3, 1], [0, 4, -2]]) net_b = np.array([3, -2]).reshape((2, 1)) dense = node.DenseLayer(x_node, 2, net_w, net_b) l2_node = L2DistanceSquaredNorm(dense, y_node) # optim_func = self.rate_adjustable_optimizer_func(0.01) # adam = core.np.Optimization.AdamOptimizer() optimizer = core.np.Optimization.OptimizerIterator([x_node, y_node], l2_node, optim_func) log_at_info() epoch = 0 losses = [] for x, y in self.model.data(epochs, batch_size): var_map = {'x': x, 'y': y} loss = optimizer.step(var_map, 1.0) # losses.append(loss) if epoch % 100 == 0: losses.append([epoch, loss]) if epoch % 1000 == 0: info("[{}] Loss:{}".format(epoch, loss)) epoch += 1 info("[{}] Loss:{}".format(epoch, loss)) dense_w = dense.get_w() dense_b = dense.get_b() info("w = np.{}".format(repr(dense_w))) info("b = np.{}".format(repr(dense_b))) if do_assert: np.testing.assert_array_almost_equal(dense_w, self.model_w, 3) np.testing.assert_array_almost_equal(dense_b, self.model_b, 3) return np.array(losses)
def test_softmax_cross_entropy(self): predicted = np.array([[1, 3, -1, 0], [0, 9, 1, 3.]]).T.reshape(4, 2) debug("[SoftmaxTestCase.test_batch()] predicted = np.{}".format(repr(predicted))) target = np.array([[0, 1, 0, 0], [0, 0, 0, 1]]).T.reshape(4, 2) ctx = node.ComputeContext({'pred': predicted, 'target': target}) pred_node = node.VarNode('pred') target_node = node.VarNode('target') sx = SoftmaxCrossEntropy(pred_node, target_node) pred_node.forward(ctx) target_node.forward(ctx) loss = sx.value() debug("[SimpleCrossEntryTestCase.test_softmax_cross_entropy()] loss = {}".format(repr(loss))) np.testing.assert_equal(6.188115770824936, loss) sx.backward(1.0, self, ctx) grad_at_p = pred_node.total_incoming_gradient() debug("[SimpleCrossEntryTestCase.test_softmax_cross_entropy()] grad_at_p = np.{}".format(repr(grad_at_p))) expected_grad = np.array([[1.12457214e-01, 1.23048334e-04], [-1.69047339e-01, 9.97070980e-01], [1.52194289e-02, 3.34480051e-04], [4.13706969e-02, -9.97528508e-01]]) np.testing.assert_array_almost_equal(expected_grad, grad_at_p)
def test_single_step(self): model_w = np.array([[1, 3, -1], [0, -4, 2.]]) model_b = np.array([-3, 2.]).reshape((2, 1)) x_node = node.VarNode('x') dense = node.DenseLayer(x_node, output_dim=2, initial_w=model_w, initial_b=model_b) x = np.array([[1, -1], [2, 3], [-1, -2.]]) ctx = node.ComputeContext({'x': x}) x_node.forward(ctx) output = dense.value() info("[DenseLayerStandAlone.test_single_step()] output = np.{}".format( repr(output))) dense.backward(np.ones_like(output), self, ctx) w_grad = dense.get_w_grad() info("[DenseLayerStandAlone.test_single_step()] w_grad = np.{}".format( repr(w_grad))) b_grad = dense.get_b_grad() info("[DenseLayerStandAlone.test_single_step()] b_grad = np.{}".format( repr(b_grad)))
def test_full_sgmoid_node(self): w_node = node.VarNode('w', True) x_node = node.VarNode('x') ya_node = node.VarNode('y_a') b_node = node.VarNode('b', True) start_nodes = [w_node, x_node, b_node, ya_node] w = np.array([[1, 3, 0], [0, 1, -1]]) x = (np.array([[1, -1, 2]])).T b = np.array([[-2, -3]]).T y_act = np.array([[.5, .7]]).T var_map = {'w': w, 'x': x, 'y_a': y_act, 'b': b} wx_node = node.MatrixMultiplication(w_node, x_node) sum_node = node.MatrixAddition(wx_node, b_node) sigmoid_node = SigmoidNode(sum_node) l2_node = L2DistanceSquaredNorm(sigmoid_node, ya_node) optim_func = self.rate_adjustable_optimizer_func(0.01) optimizer = core.np.Optimization.OptimizerIterator( start_nodes, l2_node, optim_func) log_at_info() losses = [] for i in range(100): loss = optimizer.step(var_map, 1.0) losses.append(loss) if i % 10 == 0: print("[{}] Loss:{}".format(i, loss)) print("Final loss:{}".format(loss)) print("w:{}".format(var_map['w'])) print("b:{}".format(var_map['b']))
def test_rnn_layer(self): x = np.array([[1, 2, 1], [-1, 0, -.5]]).T x = x.reshape((3, 1, 2)) input_node = n.VarNode('x') var_map = {'x': x} rnn_layer = rnn.SimpleRnnLayer(input_node, 4, 2) input_node.forward(var_map) y = rnn_layer.value() dely = y * .1 rnn_layer.backward(dely, self, var_map) x_grad = input_node.total_incoming_gradient() debug("[SimpleRnnCellTests.test_rnn_layer()] x_grad = np.{}".format( repr(x_grad)))
def test_matrix_sum(self): a_node = node.VarNode('a') b_node = node.VarNode('b') a = np.array([[1, 2, 3], [-1, -3, 0]]) b = np.array([[0, 1, -1], [2, 0, 1]]) sum_node = node.MatrixAddition(a_node, b_node) var_map = {'a': a, 'b': b} a_node.forward(var_map) b_node.forward(var_map) matrix_sum = sum_node.value() expected_sum = a + b print(matrix_sum) np.testing.assert_array_almost_equal(expected_sum, matrix_sum) start_grad = np.ones_like(a) sum_node.backward(start_grad, self, var_map) grad_at_a = a_node.total_incoming_gradient() grad_at_b = b_node.total_incoming_gradient() print(grad_at_a) print("-------------") print(grad_at_b) np.testing.assert_array_almost_equal(grad_at_a, start_grad) np.testing.assert_array_almost_equal(grad_at_b, start_grad)
def test_forward(self): input_x_node = n.VarNode('x') rnn_cell = rnn.RnnCell(input_x_node, None, self.w_param, self.wb_param, self.u_param, self.ub_param, self.h) input_x_node.forward(self.var_map) y, h = rnn_cell.value() debug("[SimpleRnnCellTests.test_forward()] y = np.{}".format(repr(y))) debug("[SimpleRnnCellTests.test_forward()] h = np.{}".format(repr(h))) dely, delh = y * .1, h * .1 rnn_cell.backward((dely, delh), self, self.var_map) grad_x = input_x_node.total_incoming_gradient() debug("[SimpleRnnCellTests.test_forward()] grad_x = np.{}".format(repr(grad_x)))
def test_multi_layer(self): r""" This actually performs better with SGD and normal initialization. Gets almost 99% with SGD and normal initialization :return: """ iris = Iris() x_node = node.VarNode('x') yt_node = node.VarNode('yt') dense = node.DenseLayer(x_node, 16) tanh = act.TanhNode(dense) dense2 = node.DenseLayer(tanh, 10) relu = act.RelUNode(dense2) dense3 = node.DenseLayer(relu, 3) softmax = act.Softmax(dense3) cross_entropy = loss.CrossEntropy(softmax, yt_node) #optimizer_func = core.np.Optimization.AdamOptimizer() optimizer_func = core.np.Optimization.SGDOptimizer(lr=0.01) optimizer = core.np.Optimization.OptimizerIterator([x_node, yt_node], cross_entropy, optimizer_func) log_at_info() epoch = 0 epochs = 10000 batch_size = 8 ctx = node.ComputeContext(weight_initializer=None) for x, y in iris.train_iterator(epochs, batch_size): ctx['x'], ctx['yt'] = x, y loss_now = optimizer.step(ctx, 1.0) / batch_size if epoch % 500 == 0: info("[{}]\tloss_now = {}".format(epoch, loss_now)) epoch += 1 f = node.make_evaluator([x_node, yt_node], softmax) total, correct = 100, 0 for x, y_actual in iris.test_iterator(total, one_hot=False): var_map = {'x': x, 'yt': y_actual} y_predicted = f(var_map) max_idx = np.argmax(y_predicted) mark = 'x' if max_idx == y_actual: correct += 1 mark = u'\u2713' print("X:{}, y_pred:{}, Actual={}, Predicted:{} {}".format( x.T, y_predicted.T, y_actual[0], max_idx, mark)) percent = correct * 100 / total print("Correct= {}%".format(percent)) self.assertTrue(percent > 95)
def test_l2norm(self): y_pred = np.array([[1, 2, 3]]).T y_act = np.array([[1, 1, 1]]).T y_del = y_pred - y_act expected_norm = np.sum(np.square(y_del)) / y_del.size y_p_node = node.VarNode('y_p') y_a_node = node.VarNode('y_a') var_map = {'y_p': y_pred, 'y_a': y_act} l2norm_node = L2DistanceSquaredNorm(y_p_node, y_a_node) y_p_node.forward(var_map) y_a_node.forward(var_map) l2norm = l2norm_node.value() print(l2norm) self.assertEqual(l2norm, expected_norm) ones = np.ones_like(y_pred) l2norm_node.backward(ones, self, var_map) grad_at_yp = y_p_node.total_incoming_gradient() print("start print grad at y_p:") print(grad_at_yp) print("end print grad at y_p")
def test_sigmoid_node(self): x_node = node.VarNode('x') x = (np.array([[1, -1, 2]])).T var_map = {'x': x} sigmoid = SigmoidNode(x_node) x_node.forward(var_map) value = sigmoid.value() expected_value = 1 / (1 + np.exp(-x)) np.testing.assert_array_almost_equal(expected_value, value) debug(value) sigmoid.backward(np.ones_like(value), self, var_map) grad = x_node.total_incoming_gradient() expected_grad = expected_value * (1 - expected_value) debug(grad) np.testing.assert_array_almost_equal(expected_grad / expected_grad.size, grad)
def test_logit_cross_entropy(self): logits = np.array([[2, 1, 4, -1], [3, 2, 1, -9]]) target_values = np.array([2, 0]) one_hot_target = to_one_hot(target_values, logits.shape[1] - 1) debug(" [SimpleActivationTests.test_logit_cross_entropy()] one_hot_target = np.{}".format(repr(one_hot_target))) pred_node = node.VarNode('yp') target_node = node.VarNode('yt') var_map = {'yp': logits.T, 'yt': one_hot_target} lx = LogitsCrossEntropy(pred_node, target_node) pred_node.forward(var_map) target_node.forward(var_map) value = lx.value() expected = 0.2915627072172198 debug(" [SimpleActivationTests.test_logit_cross_entropy()] value = {}".format(repr(value))) self.assertAlmostEqual(expected, value) lx.backward(1.0, self, var_map) grad = pred_node.total_incoming_gradient() debug(" [LogitCrossEntropyTests.test_logit_cross_entropy()] grad = np.{}".format(repr(grad))) expected_grad = np.array([[5.67748097e-02, -1.67380882e-01], [2.08862853e-02, 1.22363735e-01], [-8.04877463e-02, 4.50151026e-02], [2.82665133e-03, 2.04368250e-06]]) debug(" [LogitCrossEntropyTests.test_logit_cross_entropy()] expected_grad = np.{}".format(repr(expected_grad))) np.testing.assert_array_almost_equal(expected_grad, grad)