Beispiel #1
0
    def test_full_sgmoid_node(self):
        w_node = node.VarNode('w', True)
        x_node = node.VarNode('x')
        ya_node = node.VarNode('y_a')
        b_node = node.VarNode('b', True)
        start_nodes = [w_node, x_node, b_node, ya_node]

        w = np.array([[1, 3, 0], [0, 1, -1]])
        x = (np.array([[1, -1, 2]])).T
        b = np.array([[-2, -3]]).T
        y_act = np.array([[.5, .7]]).T
        var_map = {'w': w, 'x': x, 'y_a': y_act, 'b': b}
        wx_node = node.MatrixMultiplication(w_node, x_node)
        sum_node = node.MatrixAddition(wx_node, b_node)
        sigmoid_node = SigmoidNode(sum_node)
        l2_node = L2DistanceSquaredNorm(sigmoid_node, ya_node)

        optim_func = self.rate_adjustable_optimizer_func(0.01)
        optimizer = core.np.Optimization.OptimizerIterator(
            start_nodes, l2_node, optim_func)
        log_at_info()
        losses = []
        for i in range(100):
            loss = optimizer.step(var_map, 1.0)
            losses.append(loss)
            if i % 10 == 0:
                print("[{}] Loss:{}".format(i, loss))
        print("Final loss:{}".format(loss))
        print("w:{}".format(var_map['w']))
        print("b:{}".format(var_map['b']))
Beispiel #2
0
 def test_train(self):
     num_iter = 100000
     x_node = n.VarNode('x')
     y_target_node = n.VarNode('y_target')
     rnn_node = rnn.SimpleRnnLayer(x_node, self.name_ds.n_categories, 15)
     loss_node = loss.SoftmaxCrossEntropy(rnn_node, y_target_node)
     all_losses = []
     optimizer_func = autodiff_optim.AdamOptimizer()
     optimizer_func = autodiff_optim.SGDOptimizer(lr=0.0001)
     optimizer = autodiff_optim.OptimizerIterator([x_node, y_target_node],
                                                  loss_node, optimizer_func)
     ctx = n.ComputeContext({'x': "", 'y_target': ""})
     log_at_info()
     every = 500
     t = time.time()
     for i in range(1, num_iter + 1):
         rnn_node.set_initial_state_to_zero()
         c, l, category_index, name_tensor = self.name_ds.random_training_example(
         )
         cat_tensor = self.name_ds.category_idx_to_tensor([category_index])
         ctx['x'] = name_tensor
         ctx['y_target'] = cat_tensor
         ctx['i'] = i
         loss_value = optimizer.step(ctx, 1.0)
         all_losses.append(loss_value)
         if i % every == 0:
             t = time.time() - t
             last_10 = all_losses[-every:]
             av = np.average(last_10)
             info("[{:06d}] Avg. loss = {:10.6f}"
                  " | {:04.2f}s per {}  | Total Iters set to:{}".format(
                      i, av, t, every, num_iter))
             all_losses = []
             t = time.time()
    def test_linear_transformation(self):
        np.random.seed(100)
        w_node = node.VarNode('w')
        x_node = node.VarNode('x')
        ya_node = node.VarNode('y_a')

        w = np.array([[1, 2, 1], [2, 0, -1]])
        x = np.array(
            [[0.54340494, 0.27836939, 0.42451759, 0.84477613, 0.00471886],
             [0.12156912, 0.67074908, 0.82585276, 0.13670659, 0.57509333],
             [0.89132195, 0.20920212, 0.18532822, 0.10837689, 0.21969749]])
        y_act = np.array(
            [[0.97862378, 0.81168315, 0.17194101, 0.81622475, 0.27407375],
             [0.43170418, 0.94002982, 0.81764938, 0.33611195, 0.17541045]])
        info("Printing x...")
        info(x)
        info("printing y_pred")
        info(y_act)

        var_map = {'w': w, 'x': x, 'y_a': y_act}

        wx_node = node.MatrixMultiplication(w_node, x_node)
        l2_node = L2DistanceSquaredNorm(wx_node, ya_node)
        log_at_info()
        w_node.forward(var_map)
        x_node.forward(var_map)
        ya_node.forward(var_map)
        l2_node.backward(1.0, self, var_map)
        info(wx_node.value())
        info("grad...")
        info(wx_node.total_incoming_gradient())
 def test_sigmoid(self):
     r"""
     See TestActivations.Sigmoid.ipynb for the corresponding pytorch calculations
     :return:
     """
     x = np.array([[1, 2, 3, 4], [3, 4, 5, 6], [-1, 0, 1, 3]])
     x_node = node.VarNode('x')
     target = np.zeros(x.shape)
     target_node = node.VarNode('target')
     var_map = {'x': x, 'target': target}
     sigmoid = SigmoidNode(x_node)
     l2loss = L2DistanceSquaredNorm(sigmoid, target_node)
     x_node.forward(var_map)
     target_node.forward(var_map)
     value = sigmoid.value()
     expected_value = np.array([[0.73105858, 0.88079708, 0.95257413, 0.98201379],
                                [0.95257413, 0.98201379, 0.99330715, 0.99752738],
                                [0.26894142, 0.5, 0.73105858, 0.95257413]])
     np.testing.assert_almost_equal(expected_value, value)
     loss = l2loss.value()
     info("L2 Loss:{}".format(loss))
     log_at_info()
     l2loss.backward(1.0, self, var_map)
     x_grad = x_node.total_incoming_gradient()
     expected_x_grad = np.array([[0.28746968, 0.18495609, 0.08606823, 0.03469004],
                                 [0.08606823, 0.03469004, 0.01320712, 0.00492082],
                                 [0.10575419, 0.25, 0.28746968, 0.08606823]])
     info("-------------------------------------------------------------")
     info("x_grad = np.{}".format(repr(x_grad)))
     info("x_grad_expected= np.{}".format(repr(expected_x_grad)))
     np.testing.assert_almost_equal(expected_x_grad, x_grad)
Beispiel #5
0
    def test_linear_fit(self):
        epochs = 2000
        iris = Iris()
        x_node = node.VarNode('x')
        yt_node = node.VarNode('yt')
        dense = node.DenseLayer(x_node, 3)
        softmax = act.Softmax(dense)
        cross_entropy = loss.CrossEntropy(softmax, yt_node)
        optimizer_func = core.np.Optimization.AdamOptimizer(lr=0.001)
        optimizer = core.np.Optimization.OptimizerIterator([x_node, yt_node],
                                                           cross_entropy,
                                                           optimizer_func)
        log_at_info()
        epoch = 0
        ctx = node.ComputeContext()
        for x, y in iris.train_iterator(epochs, 8):
            ctx['x'], ctx['yt'] = x, y
            loss_now = optimizer.step(ctx, 1.0)
            if epoch % 100 == 0:
                info("[{}]\tloss_now = {}".format(epoch, loss_now))
            epoch += 1

        f = node.make_evaluator([x_node, yt_node], softmax)
        total, correct = 40, 0
        for x, y_actual in iris.test_iterator(total, one_hot=False):
            ctx['x'], ctx['yt'] = x, y_actual
            y_predicted = f.at(ctx)
            max_idx = np.argmax(y_predicted)
            if max_idx == y_actual:
                correct += 1
        percent = correct * 100 / total
        print("Correct= {}%".format(percent))
    def test_linear_training_tf_fast(self):
        r"""
        For fastest results, use batch size of 64, adam optimizer
        and 3 epochs. You should get more than 97% accuracy
        :return:
        """
        # Build the network
        x_node = node.VarNode('x')
        yt_node = node.VarNode('yt')
        linear1 = node.DenseLayer(x_node, 100, name="Dense-First")
        relu1 = act.RelUNode(linear1, name="RelU-First")
        linear2 = node.DenseLayer(relu1, 200, name="Dense-Second")
        relu2 = act.RelUNode(linear2, name="RelU-Second")
        linear3 = node.DenseLayer(relu2, 10, name="Dense-Third")
        cross_entropy = loss.LogitsCrossEntropy(linear3, yt_node, name="XEnt")

        # Set up optimizers and params
        batch_size = 64
        epochs = 5  # use 25 for SGD
        optimizer_func = autodiff_optim.AdamOptimizer()
        # optimizer_func = autodiff_optim.SGDOptimizer(lr=.1)
        optimizer = autodiff_optim.OptimizerIterator([x_node, yt_node],
                                                     cross_entropy,
                                                     optimizer_func)

        log_at_info()
        losses = []

        x_train, y_train, x_val, y_val, x_test, y_test = mn.load_dataset(
            flatten=True)
        iter_count = 1
        predictor = node.make_evaluator([x_node, yt_node], linear3)
        total_time = time.time()
        ctx = node.ComputeContext({})
        for epoch in range(epochs):
            epoch_time = time.time()
            for x, y in iterate_over_minibatches(x_train,
                                                 y_train,
                                                 batch_size=batch_size):
                ctx['x'], ctx['yt'] = x.T, to_one_hot(y, max_cat_num=9)
                iter_loss = optimizer.step(ctx, 1.0) / batch_size
                losses.append(iter_loss)
                iter_count += 1
            epoch_time = time.time() - epoch_time
            loss_av = np.array(losses[:-batch_size + 1])
            loss_av = np.mean(loss_av)
            ctx['x'], ctx['yt'] = x_val.T, to_one_hot(y_val, max_cat_num=9)
            y_predicted = predictor(ctx)
            arg_max = np.argmax(y_predicted, axis=0)
            correct = arg_max == y_val
            percent = np.mean(correct) * 100
            info("Epoch {:2d}:: Validation "
                 "accuracy:[{:5.2f}%] loss av={:01.8f}, time:{:2.3f}s".format(
                     epoch, percent, loss_av, epoch_time))
        self.assertTrue(percent > 95)
        total_time = time.time() - total_time
        info("[Mnist784DsTest.test_linear_training()] total_time = {:5.3f} s".
             format(total_time))
Beispiel #7
0
    def test_multi_layer(self):
        r"""
        This actually performs better with SGD and normal initialization.
        Gets almost 99% with SGD and normal initialization
        :return:
        """

        iris = Iris()
        x_node = node.VarNode('x')
        yt_node = node.VarNode('yt')
        dense = node.DenseLayer(x_node, 16)
        tanh = act.TanhNode(dense)

        dense2 = node.DenseLayer(tanh, 10)
        relu = act.RelUNode(dense2)

        dense3 = node.DenseLayer(relu, 3)
        softmax = act.Softmax(dense3)

        cross_entropy = loss.CrossEntropy(softmax, yt_node)
        #optimizer_func = core.np.Optimization.AdamOptimizer()
        optimizer_func = core.np.Optimization.SGDOptimizer(lr=0.01)
        optimizer = core.np.Optimization.OptimizerIterator([x_node, yt_node],
                                                           cross_entropy,
                                                           optimizer_func)
        log_at_info()

        epoch = 0
        epochs = 10000
        batch_size = 8
        ctx = node.ComputeContext(weight_initializer=None)
        for x, y in iris.train_iterator(epochs, batch_size):
            ctx['x'], ctx['yt'] = x, y
            loss_now = optimizer.step(ctx, 1.0) / batch_size
            if epoch % 500 == 0:
                info("[{}]\tloss_now = {}".format(epoch, loss_now))
            epoch += 1

        f = node.make_evaluator([x_node, yt_node], softmax)
        total, correct = 100, 0
        for x, y_actual in iris.test_iterator(total, one_hot=False):
            var_map = {'x': x, 'yt': y_actual}
            y_predicted = f(var_map)
            max_idx = np.argmax(y_predicted)
            mark = 'x'
            if max_idx == y_actual:
                correct += 1
                mark = u'\u2713'
            print("X:{}, y_pred:{}, Actual={}, Predicted:{}  {}".format(
                x.T, y_predicted.T, y_actual[0], max_idx, mark))
        percent = correct * 100 / total
        print("Correct= {}%".format(percent))
        self.assertTrue(percent > 95)
Beispiel #8
0
    def test_basic_op(self):
        np.random.seed(100)

        x = np.array([[1, -1], [2, 3], [-1, -2]], dtype=np.float)
        y = np.array([[-1, 1], [-3, -1]], dtype=np.float)

        x_node = node.VarNode('x')
        y_target = node.VarNode('y_target')

        dense = node.DenseLayer(x_node, 2, self.model_w, self.model_b)
        l2_node = L2DistanceSquaredNorm(dense, y_target)

        var_map = {'x': x, 'y_target': y}
        x_node.forward(var_map)
        y_target.forward(var_map)

        log_at_info()
        value = dense.value()
        info("------------------------------------------")
        info("Predicted value = np.{}".format(repr(value)))
        info("Target    value = np.{}".format(repr(y)))
        value = l2_node.value()
        info("L2 node value (loss):{}".format(value))

        info("------------------------------------------")
        info("Printing weights (not updated yet)")
        info("------------------------------------------")
        info("Linear layer weight = np.{}".format(repr(dense.get_w())))
        info("Linear layer bias   = np.{}".format(repr(dense.get_b())))

        optim_func = self.rate_adjustable_optimizer_func(0.001)

        optimizer = core.np.Optimization.OptimizerIterator([x_node, y_target],
                                                           l2_node, optim_func)
        optimizer.step(var_map, 1.0)
        np.set_printoptions(precision=64, floatmode='maxprec_equal')
        info("------------------------------------------")
        info("Printing after updating weights")
        info("------------------------------------------")
        info("Linear layer weight:{}".format(repr(dense.get_w())))
        info("Linear layer bias:{}".format(repr(dense.get_b())))
        info("w_grad = np.{}".format(repr(dense.get_w_grad())))
        info("b_grad = np.{}".format(repr(dense.get_b_grad())))
        expected_weight = np.array([[1.0000, 2.9850, -0.9910],
                                    [-0.0040, -3.9755, 1.9845]])
        expected_bias = np.array([[-3.006], [2.009]])
        expected_w_grad = np.array([[0.0, 15.0, -9.0], [4.0, -24.5, 15.5]])
        expected_b_grad = np.array([[6.], [-9.]])

        np.testing.assert_almost_equal(expected_weight, dense.get_w())
        np.testing.assert_almost_equal(expected_w_grad, dense.get_w_grad())
        np.testing.assert_almost_equal(expected_bias, dense.get_b())
        np.testing.assert_almost_equal(expected_b_grad, dense.get_b_grad())
Beispiel #9
0
    def test_convolution_with_l2(self):
        img = np.array([[1, 2, 3, 4], [3, 4, 5, 6], [-1, 0, 1, 3], [0, 2, -1, 4]])
        kernel = np.array([[1, -1], [0, 2]])
        y = np.ones((3, 3))

        img_node = node.VarNode('img')
        c2d = conv.Convolution2D(img_node, input_shape=(4, 4), kernel=kernel)
        target_node = node.VarNode('y')
        l2 = L2DistanceSquaredNorm(c2d, target_node)

        var_map = {'img': img, 'y': y}
        img_node.forward(var_map)
        target_node.forward(var_map)

        info("Original x into the convolution layer")
        info(repr(img))
        output_image = c2d.value()
        info("Output of the convolution layer")
        expected_output = np.array([[7., 9., 11.],
                                    [-1., 1., 5.],
                                    [3., -3., 6.]])
        np.testing.assert_array_almost_equal(expected_output, output_image)
        info(repr(output_image))
        log_at_info()
        info("Kernel before gradient descent")
        info(repr(c2d.get_kernel()))

        optim_func = self.rate_adjustable_optimizer_func(0.001)

        optimizer = core.np.Optimization.OptimizerIterator([img_node, target_node], l2, optim_func)
        loss = optimizer.step(var_map, 1.0)
        info("Took a single gradient descent step - calculated weights and updated gradients")
        info("<<<<Printing loss matrix after single step>>>>")
        info(repr(loss))
        info("Printing kernel:")
        info(repr(c2d.get_kernel()))
        info("--------------------------------------")
        info("Printing kernel gradient:")
        info(repr(c2d.get_kernel_grad()))
        info("-------------------------")
        info("Bias :{}".format(c2d.get_bias()))
        info("Bias gradient :{}".format(c2d.get_bias_grad()))
        expected_kernel = np.array([[0.98466667, -1.02288889],
                                    [-0.02066667, 1.96355556]])
        np.testing.assert_array_almost_equal(expected_kernel, c2d.get_kernel())
        expected_kernel_grad = np.array([[15.33333333, 22.88888889],
                                         [20.66666667, 36.44444444]])
        np.testing.assert_array_almost_equal(expected_kernel_grad, c2d.get_kernel_grad())
        expected_bias = -0.0064444444444444445
        expected_bias_grad = 6.444444444444445
        np.testing.assert_almost_equal(expected_bias, c2d.get_bias())
        np.testing.assert_almost_equal(expected_bias_grad, c2d.get_bias_grad())
Beispiel #10
0
    def run_model(self, model, optimizer_func, epochs):
        var_map, start_nodes, l2_node = models.make__two_layer_model()
        optimizer = core.np.Optimization.OptimizerIterator(
            start_nodes, l2_node, optimizer_func)
        log_at_info()
        count = 0
        losses = []
        sum_losses = 0
        av = []
        x_axis = []
        for (x, y) in model.data(epochs, 2):
            # print("count:{}".format(count))
            var_map['x'] = x
            var_map['y_a'] = y
            loss = optimizer.step(var_map, 1.0)
            losses.append(loss)
            x_axis.append(count)
            sum_losses += loss
            if count % 500 == 0:
                last_100 = losses[-100:]
                average_l100 = sum(last_100) / len(last_100)
                av.append([count, average_l100])
                print("[{}] Current loss:{} Average loss so far:{}".format(
                    count, loss, average_l100))

            count += 1

        last_100 = losses[-100:]
        average_l100 = sum(last_100) / len(last_100)
        av.append([count, average_l100])

        info("Now printing w and b ..W:")
        info(var_map['w'])
        info("-------------b:")
        info(var_map['b'])
        info("---- print w2 and b2...  W2:")
        info(var_map['w2'])
        info("----- b2 ----")
        info(var_map['b2'])

        info("[{}] Current loss:{} Average loss so far:{}".format(
            count, loss, average_l100))
    def test_rnn_layer_with_loss(self):
        debug(
            "[RnnLayerFullTests.test_rnn_layer_with_loss()] self.data_dir = {}"
            .format(self.data_dir))
        x = self.name_ds.line_to_numpy('ABCD')
        debug("[RnnLayerFullTests.test_rnn_layer_with_loss()] ABCD: x = np.{}".
              format(repr(x)))
        debug("------------------------------------------------------")
        x = self.name_ds.line_to_numpy('Albert')
        debug(
            "[RnnLayerFullTests.test_rnn_layer_with_loss()] x = np.{}".format(
                repr(x)))
        debug("------------------------------------------------------")
        log_at_info()
        for i in range(5):
            c, l, category_index, name_tensor = self.name_ds.random_training_example(
            )
            debug("[{}]:{}".format(c, l))
            cat_tensor = self.name_ds.category_idx_to_tensor([category_index])
            debug(
                "[RnnLayerFullTests.test_rnn_layer_with_loss()] cat_tensor = np.{}"
                .format(repr(cat_tensor)))

        x_node = n.VarNode('x')
        y_target_node = n.VarNode('y_target')

        ctx = n.ComputeContext({'x': name_tensor, 'y_target': cat_tensor})
        rnn_node = rnn.SimpleRnnLayer(x_node, self.name_ds.n_categories, 128)
        loss_node = loss.LogitsCrossEntropy(rnn_node, y_target_node)

        x_node.forward(ctx)
        y_target_node.forward(ctx)
        y = rnn_node.value()
        info(
            "[RnnLayerFullTests.test_rnn_layer_with_loss()]  y = np.{}".format(
                repr(y)))
        loss_value = loss_node.value()
        info("[RnnLayerFullTests.test_rnn_layer_with_loss()] loss = np.{}".
             format(repr(loss_value)))
        loss_node.backward(1.0, self, ctx)
        grads = rnn_node.total_incoming_gradient()
        info(grads)
Beispiel #12
0
    def do_linear_optimization(self,
                               optim_func,
                               epochs=25000,
                               batch_size=8,
                               do_assert=True):
        np.random.seed(100)
        x_node = node.VarNode('x')
        y_node = node.VarNode('y')

        net_w = np.array([[-1, -3, 1], [0, 4, -2]])
        net_b = np.array([3, -2]).reshape((2, 1))

        dense = node.DenseLayer(x_node, 2, net_w, net_b)
        l2_node = L2DistanceSquaredNorm(dense, y_node)

        # optim_func = self.rate_adjustable_optimizer_func(0.01)
        # adam = core.np.Optimization.AdamOptimizer()
        optimizer = core.np.Optimization.OptimizerIterator([x_node, y_node],
                                                           l2_node, optim_func)
        log_at_info()
        epoch = 0
        losses = []
        for x, y in self.model.data(epochs, batch_size):
            var_map = {'x': x, 'y': y}
            loss = optimizer.step(var_map, 1.0)
            # losses.append(loss)
            if epoch % 100 == 0:
                losses.append([epoch, loss])
            if epoch % 1000 == 0:
                info("[{}] Loss:{}".format(epoch, loss))
            epoch += 1
        info("[{}] Loss:{}".format(epoch, loss))

        dense_w = dense.get_w()
        dense_b = dense.get_b()
        info("w = np.{}".format(repr(dense_w)))
        info("b = np.{}".format(repr(dense_b)))
        if do_assert:
            np.testing.assert_array_almost_equal(dense_w, self.model_w, 3)
            np.testing.assert_array_almost_equal(dense_b, self.model_b, 3)
        return np.array(losses)
    def test_network_optimizer(self):
        w_node = node.VarNode('w', True)
        x_node = node.VarNode('x')
        ya_node = node.VarNode('y_a')
        b_node = node.VarNode('b', True)
        start_nodes = [w_node, x_node, b_node, ya_node]

        w = np.array([[1, 3, 0], [0, 1, -1]])
        x = (np.array([[1, -1, 2]])).T
        b = np.array([[-2, -3]]).T
        y_act = np.array([[1, 2]]).T
        var_map = {'w': w, 'x': x, 'y_a': y_act, 'b': b}

        wx_node = node.MatrixMultiplication(w_node, x_node)
        sum_node = node.MatrixAddition(wx_node, b_node)
        l2_node = L2DistanceSquaredNorm(sum_node, ya_node)
        optimizer = optim.OptimizerIterator(start_nodes, l2_node)
        log_at_info()
        for _ in range(500):
            loss = optimizer.step(var_map, 1.0)
        info("Final loss:{}".format(loss))
        self.assertTrue(math.fabs(loss) < 1e-25)
Beispiel #14
0
    def test_convolution_small(self):
        img = np.array([[1, 2, 3, 4], [3, 4, 5, 6], [-1, 0, 1, 3], [0, 2, -1, 4]])
        kernel = np.array([[1, -1], [0, 2]])
        img_node = node.VarNode('img')

        c2d = conv.Convolution2D(img_node, input_shape=(4, 4), kernel=kernel)
        var_map = {'img': img}
        img_node.forward(var_map)
        info("Original x into the convolution layer")
        info(repr(img))
        output_image = c2d.value()
        info("Output of the convolution layer")
        expected_output = np.array([[7., 9., 11.],
                                    [-1., 1., 5.],
                                    [3., -3., 6.]])
        np.testing.assert_array_almost_equal(expected_output, output_image)
        info(repr(output_image))
        log_at_info()
        c2d.backward(output_image * 0.1, self, var_map)
        info("Kernel before gradient descent")
        info(repr(c2d.get_kernel()))

        def optimizer_function(_w, grad, local_node_storage={}):
            return _w - 0.001 * grad

        optimizer = core.np.Optimization.OptimizerIterator([img_node], c2d, optimizer_function)
        loss = optimizer.step(var_map, np.ones_like(output_image))
        info("Printing loss matrix - not really loss  but just the output of the last node")
        info(repr(loss))
        info("Printing kernel after gradient descent")
        info(repr(c2d.get_kernel()))
        expected_kernel = np.array([[0.998, -1.003111],
                                    [-1.444444444e-3, 1.9973333]])
        info("kernel gradient:{}".format(repr(c2d.kernel_grad)))
        np.testing.assert_array_almost_equal(expected_kernel, c2d.get_kernel())
        self.assertAlmostEqual(-0.001, c2d.get_bias())
        info("Bias after gradient descent:{}".format(c2d.get_bias()))
        info("Gradient of bias :{}".format(c2d.bias_grad))
    def test_linear_training(self):
        r"""
        For fastest results, use batch size of 64, adam optimizer
        and 3 epochs. You should get more than 97% accuracy
        :return:
        """
        # Build the network
        x_node = node.VarNode('x')
        yt_node = node.VarNode('yt')
        linear1 = node.DenseLayer(x_node,
                                  100,
                                  name="Dense-First",
                                  weight_scale=0.01)
        relu1 = act.RelUNode(linear1, name="RelU-First")
        linear2 = node.DenseLayer(relu1,
                                  200,
                                  name="Dense-Second",
                                  weight_scale=0.01)
        relu2 = act.RelUNode(linear2, name="RelU-Second")
        linear3 = node.DenseLayer(relu2,
                                  10,
                                  name="Dense-Third",
                                  weight_scale=0.01)
        cross_entropy = loss.LogitsCrossEntropy(linear3, yt_node, name="XEnt")

        # Set up optimizers and params
        batch_size = 64
        epochs = 3
        optimizer_func = autodiff_optim.AdamOptimizer()
        # optimizer_func = autodiff_optim.SGDOptimizer(lr=.1)
        optimizer = autodiff_optim.OptimizerIterator([x_node, yt_node],
                                                     cross_entropy,
                                                     optimizer_func)

        log_at_info()
        losses = []

        iter_count = 1
        predictor = node.make_evaluator([x_node, yt_node], linear3)

        ctx = node.ComputeContext({})
        mnist = Mnist784()
        total_time = time.time()
        for epoch in range(epochs):
            epoch_time = time.time()
            iter = 0
            for x, y in mnist.train_iterator_seq(batch_size=batch_size):
                ctx['x'], ctx['yt'] = x, y
                iter_loss = optimizer.step(ctx, 1.0) / batch_size
                losses.append(iter_loss)
                iter += 1
                if iter % 100 == 0:
                    print("iter:{}".format(iter))

            loss_av = np.array(losses[:-batch_size + 1])
            loss_av = np.mean(loss_av)
            e, xv, yv = mnist.test_iterator(1, batch_size=-1, one_hot=False)
            ctx['x'], ctx['yt'] = xv, yv
            percent = self.measure_validation_perf(predictor, ctx, yv)
            epoch_time = time.time() - epoch_time
            info("Iter {:2d}:: Val:{:2.4f}% , loss av={:01.8f}, time:{:2.3f}s".
                 format(epoch, percent, loss_av, epoch_time))
        total_time = time.time() - total_time
        info("Total time taken:{:4.4f}".format(total_time))
Beispiel #16
0
    def test_basic_op_large_matrix(self):
        r"""
        Runs test for a slightly larger matrix
        :return:
        """
        x = np.array([[
            0.54566752, 0.66921034, 0.35265542, 0.32324271, 0.35036963,
            0.05317591
        ],
                      [
                          0.97433629, 0.5027976, 0.15637831, 0.72948084,
                          0.42097552, 0.52522781
                      ],
                      [
                          0.41793729, 0.48112345, 0.46862087, 0.88918467,
                          0.48792933, 0.32439625
                      ],
                      [
                          0.4775774, 0.58105899, 0.35079832, 0.79657794,
                          0.3910011, 0.72908915
                      ]])
        w = np.array([[0.61013274, 0.86914947, 0.95211922, 0.96385655],
                      [0.64290252, 0.2717017, 0.193146, 0.05004571],
                      [0.14360354, 0.54256991, 0.90870491, 0.06577582]])
        b = np.array([[0.76026806], [0.32982798], [0.01258297]])
        pred = w @ x + b
        target = np.ones_like(pred)

        x_node = node.VarNode('x')
        target_node = node.VarNode('y_target')

        dense = node.DenseLayer(x_node, 3, w, b)
        l2_node = L2DistanceSquaredNorm(dense, target_node)

        var_map = {'x': x, 'y_target': target}
        x_node.forward(var_map)
        target_node.forward(var_map)

        log_at_info()
        predicted = dense.value()
        info("------------------------------------------")
        info("Predicted value = np.{}".format(repr(predicted)))
        info("Target    value = np.{}".format(repr(target)))
        loss = l2_node.value()
        info("L2 node value (loss):{}".format(loss))

        info("------------------------------------------")
        info("Printing weights (not updated yet)")
        info("------------------------------------------")
        info("Linear layer weight = np.{}".format(repr(dense.get_w())))
        info("Linear layer bias   = np.{}".format(repr(dense.get_b())))

        optim_func = self.rate_adjustable_optimizer_func(0.001)

        optimizer = core.np.Optimization.OptimizerIterator(
            [x_node, target_node], l2_node, optim_func)
        optimizer.step(var_map, 1.0)
        np.set_printoptions(precision=64, floatmode='maxprec_equal')
        info("------------------------------------------")
        info("Printing after updating weights")
        info("------------------------------------------")
        info("weight=np.{}".format(repr(dense.get_w())))
        info("w_grad = np.{}".format(repr(dense.get_w_grad())))
        info("bias = np.{}".format(repr(dense.get_b())))
        info("b_grad = np.{}".format(repr(dense.get_b_grad())))

        # These are values from pytorch
        expected_weight = np.array(
            [[0.60973525, 0.86854088, 0.95157486, 0.96327269],
             [0.64292222, 0.27173772, 0.19318908, 0.05009926],
             [0.14362818, 0.54258782, 0.90872669, 0.06581017]])
        expected_w_grad = np.array(
            [[0.39752683, 0.60859025, 0.54437733, 0.58387089],
             [-0.01970989, -0.03603142, -0.04307830, -0.05355303],
             [-0.02465229, -0.01786957, -0.02174304, -0.03434603]])
        expected_bias = np.array([[0.75927186, 0.32992661, 0.01267095]]).T
        expected_b_grad = np.array([[0.99619532, -0.09862594, -0.08797690]]).T

        np.testing.assert_almost_equal(expected_weight, dense.get_w())
        np.testing.assert_almost_equal(expected_w_grad, dense.get_w_grad())
        np.testing.assert_almost_equal(expected_bias, dense.get_b())
        np.testing.assert_almost_equal(expected_b_grad, dense.get_b_grad())