Esempio n. 1
0
    def test_ModelTest_CheckSaveLoad_Insufficient(self):
        shape = Shape([2, 2])
        values1 = [1, 2, 3, 4]
        values2 = [5, 6, 7, 8]
        tmp = tempfile.NamedTemporaryFile()

        m1 = Model()
        m2 = Model()
        p1 = Parameter(shape, I.Constant(0))
        p1.value += tF.raw_input(shape, values1)
        p2 = Parameter(shape, I.Constant(0))
        p2.value += tF.raw_input(shape, values2)
        m1.add("p", p1)
        m2.add("p", p2)
        m1.add("sm", m2)

        m1.save(tmp.name)

        m1 = Model()
        m2 = Model()
        p1 = Parameter()
        m1.add("p", p1)
        m1.add("sm", m2)

        with self.assertRaises(RuntimeError):
            m1.load(tmp.name)
Esempio n. 2
0
def main():

    with DefaultScopeDevice(CPUDevice()):
        pw1 = Parameter("w1", [8, 2], I.XavierUniform())
        pb1 = Parameter("b1", [8], I.Constant(0))
        pw2 = Parameter("w2", [1, 8], I.XavierUniform())
        pb2 = Parameter("b2", [], I.Constant(0))

        trainer = T.SGD(0.1)

        trainer.add_parameter(pw1)
        trainer.add_parameter(pb1)
        trainer.add_parameter(pw2)
        trainer.add_parameter(pb2)

        input_data = np.array(
            [
                [1, 1],  # Sample 1
                [1, -1],  # Sample 2
                [-1, 1],  # Sample 3
                [-1, -1],  # Sample 4
            ],
            dtype=np.float32)

        output_data = np.array(
            [
                1,  # Label 1
                -1,  # Label 2
                -1,  # Label 3
                1,  # Label 4
            ],
            dtype=np.float32)

        for i in range(100):
            g = Graph()
            with DefaultScopeGraph(g):
                # Builds a computation graph.
                #x = F.input(shape=Shape([2], 4), data=input_data)
                x = F.input(data=input_data)
                w1 = F.input(param=pw1)
                b1 = F.input(param=pb1)
                w2 = F.input(param=pw2)
                b2 = F.input(param=pb2)
                h = F.tanh(F.matmul(w1, x) + b1)
                y = F.matmul(w2, h) + b2

                # Calculates values.
                y_val = g.forward(y).to_list()
                print("epoch ", i, ":")
                for j in range(4):
                    print("  [", j, "]: ", y_val[j])
                    #t = F.input(shape=Shape([], 4), data=output_data)
                    t = F.input(data=output_data)
                diff = t - y
                loss = F.batch.mean(diff * diff)
                loss_val = g.forward(loss).to_list()[0]
                print("  loss: ", loss_val)
                trainer.reset_gradients()
                g.backward(loss)
                trainer.update()
Esempio n. 3
0
    def test_ModelTest_CheckSaveLoad_Same(self):
        shape = Shape([2, 2])
        values1 = [1, 2, 3, 4]
        values2 = [5, 6, 7, 8]
        tmp = tempfile.NamedTemporaryFile()

        m1 = Model()
        m2 = Model()
        p1 = Parameter(shape, I.Constant(0))
        p1.value += tF.raw_input(shape, values1)
        p2 = Parameter(shape, I.Constant(0))
        p2.value += tF.raw_input(shape, values2)
        m1.add("p", p1)
        m2.add("p", p2)
        m1.add("sm", m2)

        m1.save(tmp.name)

        m1 = Model()
        m2 = Model()
        p1 = Parameter()
        p2 = Parameter()
        m1.add("p", p1)
        m2.add("p", p2)
        m1.add("sm", m2)

        m1.load(tmp.name)

        self.assertTrue(p1.valid())
        self.assertTrue(p2.valid())
        self.assertEqual(shape, p1.shape())
        self.assertEqual(shape, p2.shape())
        self.assertEqual(values1, p1.value.to_list())
        self.assertEqual(values2, p2.value.to_list())
Esempio n. 4
0
 def __init__(self, in_size, out_size, trainer):
     self.out_size_ = out_size
     self.pw_ = Parameter([3 * out_size, in_size], I.Uniform(-0.1, 0.1))
     self.pbf_ = Parameter([out_size], I.Constant(0))
     self.pbr_ = Parameter([out_size], I.Constant(0))
     trainer.add_parameter(self.pw_)
     trainer.add_parameter(self.pbf_)
     trainer.add_parameter(self.pbr_)
Esempio n. 5
0
 def init(self, src_vocab_size, trg_vocab_size, embed_size, hidden_size):
     """Creates a new AttentionalEncoderDecoder object."""
     self.psrc_lookup.init([embed_size, src_vocab_size], I.XavierUniform())
     self.ptrg_lookup.init([embed_size, trg_vocab_size], I.XavierUniform())
     self.pwhj.init([embed_size, 2 * hidden_size], I.XavierUniform())
     self.pbj.init([embed_size], I.Constant(0))
     self.pwjy.init([trg_vocab_size, embed_size], I.XavierUniform())
     self.pby.init([trg_vocab_size], I.Constant(0))
     self.src_fw_lstm.init(embed_size, hidden_size)
     self.src_bw_lstm.init(embed_size, hidden_size)
     self.trg_lstm.init(2 * embed_size, hidden_size)
Esempio n. 6
0
 def test_model_load_save(self):
     submodel = TestModel()
     submodel.sp1 = Parameter([2, 4], I.Constant(0))
     submodel.sp1.value = tF.input(np.array([[0, 1, 2, 3], [4, 5, 6, 7]]))
     submodel.sp2 = Parameter([2, 4], I.Constant(0))
     submodel.sp2.value = tF.input(np.array([[9, 8, 7, 6], [5, 4, 3, 2]]))
     submodel.add("sp1", submodel.sp1)
     submodel.add("sp2", submodel.sp2)
     parentmodel = TestModel()
     parentmodel.p1 = Parameter([4, 2], I.Constant(0))
     parentmodel.p1.value = tF.input(
         np.array([[0, 1], [2, 3], [4, 5], [6, 7]]))
     parentmodel.p2 = Parameter([4, 2], I.Constant(0))
     parentmodel.p2.value = tF.input(
         np.array([[9, 8], [7, 6], [5, 4], [3, 2]]))
     parentmodel.sub = submodel
     parentmodel.add("p1", parentmodel.p1)
     parentmodel.add("p2", parentmodel.p2)
     parentmodel.add("sub", parentmodel.sub)
     submodel_load = TestModel()
     submodel_load.sp1 = Parameter()
     submodel_load.sp2 = Parameter()
     submodel_load.add("sp1", submodel_load.sp1)
     submodel_load.add("sp2", submodel_load.sp2)
     parentmodel_load = TestModel()
     parentmodel_load.p1 = Parameter()
     parentmodel_load.p2 = Parameter()
     parentmodel_load.sub = submodel_load
     parentmodel_load.add("p1", parentmodel_load.p1)
     parentmodel_load.add("p2", parentmodel_load.p2)
     parentmodel_load.add("sub", parentmodel_load.sub)
     with tempfile.NamedTemporaryFile() as fp:
         parentmodel.save(fp.name)
         parentmodel_load.load(fp.name)
     self.assertTrue(
         (parentmodel_load.p1.value.to_ndarrays()[0] == np.array([[0, 1],
                                                                  [2, 3],
                                                                  [4, 5],
                                                                  [6, 7]
                                                                  ])).all())
     self.assertTrue(
         (parentmodel_load.p2.value.to_ndarrays()[0] == np.array([[9, 8],
                                                                  [7, 6],
                                                                  [5, 4],
                                                                  [3, 2]
                                                                  ])).all())
     self.assertTrue(
         (parentmodel_load.sub.sp1.value.to_ndarrays()[0] == np.array(
             [[0, 1, 2, 3], [4, 5, 6, 7]])).all())
     self.assertTrue(
         (parentmodel_load.sub.sp2.value.to_ndarrays()[0] == np.array(
             [[9, 8, 7, 6], [5, 4, 3, 2]])).all())
Esempio n. 7
0
 def init(self, src_vocab_size, trg_vocab_size, embed_size, hidden_size):
     self.psrc_lookup_.init([embed_size, src_vocab_size], I.XavierUniform())
     self.ptrg_lookup_.init([embed_size, trg_vocab_size], I.XavierUniform())
     self.pwfbw_.init([2*hidden_size, hidden_size], I.XavierUniform())
     self.pwhw_.init([hidden_size, hidden_size], I.XavierUniform())
     self.pwwe_.init([hidden_size], I.XavierUniform())
     self.pwhj_.init([embed_size, hidden_size], I.XavierUniform())
     self.pbj_.init([embed_size], I.Constant(0))
     self.pwjy_.init([trg_vocab_size, embed_size], I.XavierUniform())
     self.pby_.init([trg_vocab_size], I.Constant(0))
     self.src_fw_lstm_.init(embed_size, hidden_size)
     self.src_bw_lstm_.init(embed_size, hidden_size)
     self.trg_lstm_.init(embed_size+hidden_size*2, hidden_size)
    def test_device_instance(self):
        dev = Device.get_default()
        self.assertIs(dev, self.device)

        tensor = tF.raw_input([], [0])
        dev = tensor.device()
        self.assertIs(dev, self.device)

        node = F.raw_input([], [0])
        dev = node.device()
        self.assertIs(dev, self.device)

        my_device = Naive()
        self.assertIsNot(my_device, self.device)

        node = F.raw_input([], [0], device=my_device)
        dev = node.device()
        self.assertIs(dev, my_device)

        dev = self.graph.get_device(node)
        self.assertIs(dev, my_device)

        param = Parameter([], I.Constant(1))
        dev = param.device()
        self.assertIs(dev, self.device)
Esempio n. 9
0
    def test_Parameter_argument(self):
        # shape w/o data
        p = Parameter(Shape([2, 3]))
        self.assertEqual(p.shape(), Shape([2, 3]))

        # shape w/ Initializer
        p = Parameter(Shape([4, 3]), I.Constant(1))
        self.assertEqual(p.shape(), Shape([4, 3]))
        self.assertEqual(p.value.to_list(), [1] * 12)

        # shape w/ list[float]
        p = Parameter(Shape([4, 3]), self.list_data[:12])
        self.assertEqual(p.shape(), Shape([4, 3]))
        self.assertEqual(p.value.to_list(), self.list_data[:12])

        # ndarray w/o shape
        p = Parameter(init=self.ndarray_data[0])
        self.assertEqual(p.shape(), Shape([4, 3]))
        self.assertEqual(p.value.to_list(), self.list_data[:12])

        # ndarray w/ shape
        p = Parameter(Shape([2, 6]), init=self.ndarray_data[0])
        self.assertEqual(p.shape(), Shape([2, 6]))
        self.assertEqual(p.value.to_list(), self.list_data[:12])

        # list[float] w/o shape
        self.assertRaises(TypeError, lambda: Parameter(init=self.list_data[:12]))
Esempio n. 10
0
 def __init__(self, in_size, out_size, trainer):
     self.out_size_ = out_size
     self.pwxh_ = Parameter([4 * out_size, in_size], I.Uniform(-0.1, 0.1))
     self.pwhh_ = Parameter([4 * out_size, out_size], I.Uniform(-0.1, 0.1))
     self.pbh_ = Parameter([4 * out_size], I.Constant(0))
     trainer.add_parameter(self.pwxh_)
     trainer.add_parameter(self.pwhh_)
     trainer.add_parameter(self.pbh_)
Esempio n. 11
0
 def init(self, src_vocab_size, trg_vocab_size, embed_size, hidden_size):
     """Creates a new EncoderDecoder object."""
     self.psrc_lookup.init([embed_size, src_vocab_size], I.XavierUniform())
     self.ptrg_lookup.init([embed_size, trg_vocab_size], I.XavierUniform())
     self.pwhy.init([trg_vocab_size, hidden_size], I.XavierUniform())
     self.pby.init([trg_vocab_size], I.Constant(0))
     self.src_lstm.init(embed_size, hidden_size)
     self.trg_lstm.init(embed_size, hidden_size)
Esempio n. 12
0
    def test_Parameter_argument(self):
        # no argument
        p = Parameter()
        self.assertFalse(p.valid())

        # shape w/ Initializer
        p = Parameter(Shape([4, 3]), I.Constant(1))
        self.assertEqual(p.shape(), Shape([4, 3]))
        self.assertEqual(p.value.to_list(), [1] * 12)
Esempio n. 13
0
    def test_ModelTest_CheckSaveLoadWithStats(self):
        shape = Shape([2, 2])
        values1 = [1, 2, 3, 4]
        values2 = [5, 6, 7, 8]
        stats1 = [10, 20, 30, 40]
        stats2 = [50, 60, 70, 80]
        tmp = tempfile.NamedTemporaryFile()

        m1 = Model()
        m2 = Model()
        p1 = Parameter(shape, I.Constant(0))
        p1.value += tF.raw_input(shape, values1)
        p2 = Parameter(shape, I.Constant(0))
        p2.value += tF.raw_input(shape, values2)
        p1.add_stats("a", shape)
        p2.add_stats("b", shape)
        p1.stats["a"].reset_by_vector(stats1);
        p2.stats["b"].reset_by_vector(stats2);
        m1.add("p", p1)
        m2.add("p", p2)
        m1.add("sm", m2)

        m1.save(tmp.name)

        m1 = Model()
        m2 = Model()
        p1 = Parameter()
        p2 = Parameter()
        m1.add("p", p1)
        m2.add("p", p2)
        m1.add("sm", m2)

        m1.load(tmp.name)

        self.assertTrue(p1.valid())
        self.assertTrue(p2.valid())
        self.assertEqual(shape, p1.shape())
        self.assertEqual(shape, p2.shape())
        self.assertEqual(values1, p1.value.to_list())
        self.assertEqual(values2, p2.value.to_list())
        self.assertTrue("a" in p1.stats)
        self.assertTrue("b" in p2.stats)
        self.assertEqual(stats1, p1.stats["a"].to_list())
        self.assertEqual(stats2, p2.stats["b"].to_list())
Esempio n. 14
0
 def __init__(self, name, src_vocab_size, trg_vocab_size, embed_size,
              hidden_size, dropout_rate):
     self.name_ = name
     self.embed_size_ = embed_size
     self.dropout_rate_ = dropout_rate
     self.psrc_lookup_ = Parameter([embed_size, src_vocab_size],
                                   I.XavierUniform())
     self.ptrg_lookup_ = Parameter([embed_size, trg_vocab_size],
                                   I.XavierUniform())
     self.pwhj_ = Parameter([embed_size, 2 * hidden_size],
                            I.XavierUniform())
     self.pbj_ = Parameter([embed_size], I.Constant(0))
     self.pwjy_ = Parameter([trg_vocab_size, embed_size], I.XavierUniform())
     self.pby_ = Parameter([trg_vocab_size], I.Constant(0))
     self.src_fw_lstm_ = LSTM(name + "_src_fw_lstm", embed_size,
                              hidden_size)
     self.src_bw_lstm_ = LSTM(name + "_src_bw_lstm", embed_size,
                              hidden_size)
     self.trg_lstm_ = LSTM(name + "_trg_lstm", embed_size * 2, hidden_size)
Esempio n. 15
0
def train_func(trainer):
    dev = D.Naive(12345)
    Device.set_default(dev)
    g = Graph()
    Graph.set_default(g)

    pw1 = Parameter([8, 2], I.XavierUniform())
    pb1 = Parameter([8], I.Constant(0))
    pw2 = Parameter([1, 8], I.XavierUniform())
    pb2 = Parameter([1], I.Constant(0))

    trainer.add_parameter(pw1)
    trainer.add_parameter(pb1)
    trainer.add_parameter(pw2)
    trainer.add_parameter(pb2)

    input_data = [1, 1, 1, -1, -1, 1, -1, -1]
    output_data = [1, -1, -1, 1]

    for i in range(10):
        g.clear()
        x = F.input(input_data, Shape([2], 4))
        w1 = F.parameter(pw1)
        b1 = F.parameter(pb1)
        w2 = F.parameter(pw2)
        b2 = F.parameter(pb2)
        h = F.tanh(w1 @ x + b1)
        y = w2 @ h + b2

        t = F.input(output_data, Shape([], 4))
        diff = t - y
        loss = F.batch.mean(diff * diff)

        trainer.reset_gradients()
        loss.backward()
        trainer.update()

    return [
        pw1.value.to_list(),
        pb1.value.to_list(),
        pw2.value.to_list(),
        pb2.value.to_list()
    ]
Esempio n. 16
0
 def test_optimizer_add(self):
     model = TestModel()
     p = Parameter([5], I.Constant(0))
     p.gradient = tF.raw_input([5], [1, 2, 3, 4, 5])
     optimizer = O.Adam()
     optimizer.set_weight_decay(1e-6)
     optimizer.set_gradient_clipping(5)
     optimizer.add(model)
     optimizer.add(p)
     self.assertEqual(p.gradient.to_list(), [1, 2, 3, 4, 5])
     self.assertEqual(model.param.gradient.to_list(), [1, 2, 3, 4, 5])
     optimizer.reset_gradients()
     self.assertEqual(p.gradient.to_list(), [0, 0, 0, 0, 0])
     self.assertEqual(model.param.gradient.to_list(), [0, 0, 0, 0, 0])
Esempio n. 17
0
    def test_tensor_instance(self):
        param = Parameter([], I.Constant(1))
        t_origin = param.gradient
        t = param.gradient
        self.assertIs(t, t_origin)

        t = Tensor(t_origin)
        self.assertEqual(t.to_list(), t.to_list())
        self.assertIsNot(t, t_origin)

        t = t_origin
        t *= 2
        self.assertIs(t, t_origin)

        t = t * 2
        self.assertIsNot(t, t_origin)
Esempio n. 18
0
 def __init__(self, in_size, out_size):
     self.out_size = out_size
     self.pw = Parameter([3 * out_size, in_size], I.Uniform(-0.1, 0.1))
     self.pbf = Parameter([out_size], I.Constant(0))
     self.pbr = Parameter([out_size], I.Constant(0))
     self.add_all_parameters()
Esempio n. 19
0
 def __init__(self, name, in_size, out_size):
     self.name_ = name
     self.out_size_ = out_size
     self.pwxh_ = Parameter([4 * out_size, in_size], I.XavierUniform())
     self.pwhh_ = Parameter([4 * out_size, out_size], I.XavierUniform())
     self.pbh_ = Parameter([4 * out_size], I.Constant(0))
Esempio n. 20
0
 def init(self, in_size, out_size):
     """Creates a new LSTM."""
     self._pwxh.init([4 * out_size, in_size], I.XavierUniform())
     self._pwhh.init([4 * out_size, out_size], I.XavierUniform())
     self._pbh.init([4 * out_size], I.Constant(0))
Esempio n. 21
0
 def __init__(self, in_size, out_size):
     self.out_size = out_size
     self.pw = Parameter([3 * out_size, in_size], I.Uniform(-0.1, 0.1))
     self.pbf = Parameter([out_size], I.Constant(0))
     self.pbr = Parameter([out_size], I.Constant(0))
     self.scan_attributes()
Esempio n. 22
0
def main():
    # Loads data
    train_inputs = load_images("data/train-images-idx3-ubyte", NUM_TRAIN_SAMPLES)
    train_labels = load_labels("data/train-labels-idx1-ubyte", NUM_TRAIN_SAMPLES)
    test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES)
    test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES)

    # Initializes 2 device objects which manage different GPUs.
    dev0 = D.CUDA(0)
    dev1 = D.CUDA(1)

    # Parameters on GPU 0.
    pw1 = Parameter([NUM_HIDDEN_UNITS, NUM_INPUT_UNITS], I.XavierUniform(), dev0)
    pb1 = Parameter([NUM_HIDDEN_UNITS], I.Constant(0), dev0)

    # Parameters on GPU 1.
    pw2 = Parameter([NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS], I.XavierUniform(), dev1)
    pb2 = Parameter([NUM_OUTPUT_UNITS], I.Constant(0), dev1)

    trainer = T.SGD(.1)
    trainer.add_parameter(pw1)
    trainer.add_parameter(pb1)
    trainer.add_parameter(pw2)
    trainer.add_parameter(pb2)

    def make_graph(inputs):
        # We first store input values explicitly on GPU 0.
        x = F.input(inputs, device=dev0)
        w1 = F.parameter(pw1)
        b1 = F.parameter(pb1)
        w2 = F.parameter(pw2)
        b2 = F.parameter(pb2)
        # The hidden layer is calculated and implicitly stored on GPU 0.
        h_on_gpu0 = F.relu(w1 @ x + b1)
        # `copy()` transfers the hiddne layer to GPU 1.
        h_on_gpu1 = F.copy(h_on_gpu0, dev1)
        # The output layer is calculated and implicitly stored on GPU 1.
        return w2 @ h_on_gpu1 + b2

    ids = list(range(NUM_TRAIN_SAMPLES))

    g = Graph()
    Graph.set_default(g)

    for epoch in range(MAX_EPOCH):
        random.shuffle(ids)

        # Training loop
        for batch in range(NUM_TRAIN_BATCHES):
            print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES), end="")
            inputs = [train_inputs[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)]
            labels = [train_labels[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)]

            g.clear()

            y = make_graph(inputs)
            loss = F.softmax_cross_entropy(y, labels, 0)
            avg_loss = F.batch.mean(loss)

            trainer.reset_gradients()
            avg_loss.backward()
            trainer.update()

        print()

        match = 0

        # Test loop
        for batch in range(NUM_TEST_BATCHES):
            print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES), end="")
            inputs = [test_inputs[batch * BATCH_SIZE + i] for i in range(BATCH_SIZE)]

            g.clear()

            y = make_graph(inputs)
            y_val = y.to_list()
            for i in range(BATCH_SIZE):
                maxval = -1e10
                argmax = -1
                for j in range(NUM_OUTPUT_UNITS):
                    v = y_val[j + i * NUM_OUTPUT_UNITS]
                    if (v > maxval):
                        maxval = v
                        argmax = j
                if argmax == test_labels[i + batch * BATCH_SIZE]:
                    match += 1

        accuracy = 100.0 * match / NUM_TEST_SAMPLES
        print("\nepoch %d: accuracy: %.2f%%\n" % (epoch, accuracy))
Esempio n. 23
0
 def __init__(self):
     self.param = Parameter([5], I.Constant(0))
     self.param.gradient = tF.raw_input([5], [1, 2, 3, 4, 5])
     self.scan_attributes()
Esempio n. 24
0
def main():
    dev = D.Naive()  # or D.CUDA(gpuid)
    Device.set_default(dev)

    # Parameters
    pw1 = Parameter([8, 2], I.XavierUniform())
    pb1 = Parameter([8], I.Constant(0))
    pw2 = Parameter([1, 8], I.XavierUniform())
    pb2 = Parameter([], I.Constant(0))

    # Optimizer
    optimizer = O.SGD(0.1)

    # Registers parameters.
    optimizer.add_parameter(pw1)
    optimizer.add_parameter(pb1)
    optimizer.add_parameter(pw2)
    optimizer.add_parameter(pb2)

    # Training data
    input_data = [
        np.array([1, 1], dtype=np.float32),  # Sample 1
        np.array([1, -1], dtype=np.float32),  # Sample 2
        np.array([-1, 1], dtype=np.float32),  # Sample 3
        np.array([-1, -1], dtype=np.float32),  # Sample 4
    ]
    output_data = [
        np.array([1], dtype=np.float32),  # Label 1
        np.array([-1], dtype=np.float32),  # Label 2
        np.array([-1], dtype=np.float32),  # Label 3
        np.array([1], dtype=np.float32),  # Label 4
    ]

    g = Graph()
    Graph.set_default(g)

    for i in range(10):
        g.clear()

        # Builds a computation graph.
        x = F.input(input_data)
        w1 = F.parameter(pw1)
        b1 = F.parameter(pb1)
        w2 = F.parameter(pw2)
        b2 = F.parameter(pb2)
        h = F.tanh(w1 @ x + b1)
        y = w2 @ h + b2

        # Obtains values.
        y_val = y.to_list()
        print("epoch ", i, ":")
        for j in range(4):
            print("  [", j, "]: ", y_val[j])

        # Extends the computation graph to calculate loss values.
        t = F.input(output_data)
        diff = t - y
        loss = F.batch.mean(diff * diff)

        # Obtains the loss.
        loss_val = loss.to_float()
        print("  loss: ", loss_val)

        # Updates parameters.
        optimizer.reset_gradients()
        loss.backward()
        optimizer.update()
Esempio n. 25
0
    def primitiv_xor_test(self):
        dev = D.Naive()
        Device.set_default(dev)
        g = Graph()
        Graph.set_default(g)

        input_data = [
            np.array([[1], [1]]),
            np.array([[-1], [1]]),
            np.array([[-1], [-1]]),
            np.array([[1], [-1]]),
        ]

        label_data = [
            np.array([1]),
            np.array([-1]),
            np.array([1]),
            np.array([-1]),
        ]

        N = 8
        pw = Parameter([1, N], I.XavierUniform())
        pb = Parameter([], I.Constant(0))
        pu = Parameter([N, 2], I.XavierUniform())
        pc = Parameter([N], I.Constant(0))
        if os.path.isfile('output/xor/pw.data') and os.path.isfile(
                'output/xor/pb.data') and os.path.isfile(
                    'output/xor/pu.data') and os.path.isfile(
                        'output/xor/pc.data'):
            pw.load('output/xor/pw.data')
            pb.load('output/xor/pb.data')
            pu.load('output/xor/pu.data')
            pc.load('output/xor/pc.data')

        optimizer = O.SGD(0.01)
        optimizer.add(pw, pb, pu, pc)

        for epoch in range(1000):
            print(epoch, end=' ')

            g.clear()

            x = F.input(input_data)
            w = F.parameter(pw)
            b = F.parameter(pb)
            u = F.parameter(pu)
            c = F.parameter(pc)
            h = F.tanh(u @ x + c)
            y = F.tanh(w @ h + b)

            for val in y.to_list():
                print('{:+.6f},'.format(val), end=' ')

            loss = self.calc_loss(y, label_data)
            print('loss={:.6f}'.format(loss.to_float()))

            optimizer.reset_gradients()
            loss.backward()
            optimizer.update()

        pw.save('output/xor/pw.data')
        pb.save('output/xor/pb.data')
        pu.save('output/xor/pu.data')
        pc.save('output/xor/pc.data')

        return y.to_list()
Esempio n. 26
0
def main():
    # Loads data
    train_inputs = load_images("data/train-images-idx3-ubyte", NUM_TRAIN_SAMPLES)
    train_labels = load_labels("data/train-labels-idx1-ubyte", NUM_TRAIN_SAMPLES)
    test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES)
    test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES)

    dev = D.Naive()  # or D.CUDA(gpuid)
    Device.set_default(dev)

    pw1 = Parameter([NUM_HIDDEN_UNITS, NUM_INPUT_UNITS], I.XavierUniform())
    pb1 = Parameter([NUM_HIDDEN_UNITS], I.Constant(0))
    pw2 = Parameter([NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS], I.XavierUniform())
    pb2 = Parameter([NUM_OUTPUT_UNITS], I.Constant(0))

    optimizer = O.SGD(.5)
    optimizer.add(pw1, pb1, pw2, pb2)

    def make_graph(inputs, train):
        x = F.input(inputs)

        w1 = F.parameter(pw1)
        b1 = F.parameter(pb1)
        h = F.relu(w1 @ x + b1)

        h = F.dropout(h, .5, train)

        w2 = F.parameter(pw2)
        b2 = F.parameter(pb2)
        return w2 @ h + b2

    ids = list(range(NUM_TRAIN_SAMPLES))

    g = Graph()
    Graph.set_default(g)

    for epoch in range(MAX_EPOCH):
        random.shuffle(ids)

        # Training loop
        for batch in range(NUM_TRAIN_BATCHES):
            print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES), end="")
            inputs = [train_inputs[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)]
            labels = [train_labels[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)]

            g.clear()

            y = make_graph(inputs, True)
            loss = F.softmax_cross_entropy(y, labels, 0)
            avg_loss = F.batch.mean(loss)

            optimizer.reset_gradients()
            avg_loss.backward()
            optimizer.update()

        print()

        match = 0

        # Test loop
        for batch in range(NUM_TEST_BATCHES):
            print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES), end="")
            inputs = [test_inputs[batch * BATCH_SIZE + i] for i in range(BATCH_SIZE)]

            g.clear()

            y = make_graph(inputs, False)
            y_val = y.to_list()
            for i in range(BATCH_SIZE):
                maxval = -1e10
                argmax = -1
                for j in range(NUM_OUTPUT_UNITS):
                    v = y_val[j + i * NUM_OUTPUT_UNITS]
                    if (v > maxval):
                        maxval = v
                        argmax = j
                if argmax == test_labels[i + batch * BATCH_SIZE]:
                    match += 1

        accuracy = 100.0 * match / NUM_TEST_SAMPLES
        print("\nepoch %d: accuracy: %.2f%%\n" % (epoch, accuracy))
Esempio n. 27
0
 def setUp(self):
     self.dev = D.Naive()
     Device.set_default(self.dev)
     self.p = Parameter([8], I.Constant(0))
     self.p.value.reset_by_vector([1, 2, 3, 4, 5, 6, 7, 8])
Esempio n. 28
0
def main():
    # Loads data
    train_inputs = load_images("data/train-images-idx3-ubyte",
                               NUM_TRAIN_SAMPLES)
    train_labels = load_labels("data/train-labels-idx1-ubyte",
                               NUM_TRAIN_SAMPLES)
    test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES)
    test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES)

    dev = D.CUDA(0)
    Device.set_default(dev)
    g = Graph()
    Graph.set_default(g)

    # Parameters of CNNs
    # Shape: {kernel_height, kernel_width, in_channels, out_channels}
    pw_cnn1 = Parameter(Shape([KERNEL_SIZE1, KERNEL_SIZE1, 1, NUM_CHANNELS1]),
                        I.XavierUniformConv2D())
    pw_cnn2 = Parameter(
        Shape([KERNEL_SIZE2, KERNEL_SIZE2, NUM_CHANNELS1, NUM_CHANNELS2]),
        I.XavierUniformConv2D())

    # Parameters of FC layers
    pw_fc1 = Parameter(Shape([NUM_HIDDEN_UNITS, NUM_INPUT_UNITS]),
                       I.XavierUniform())
    pw_fc2 = Parameter(Shape([NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS]),
                       I.XavierUniform())
    pb_fc1 = Parameter(Shape([NUM_HIDDEN_UNITS]), I.Constant(0))
    pb_fc2 = Parameter(Shape([NUM_OUTPUT_UNITS]), I.Constant(0))

    # Optimizer
    optimizer = O.SGD(.1)
    optimizer.add(pw_cnn1, pw_cnn2, pw_fc1, pw_fc2, pb_fc1, pb_fc2)

    # Helper lambda to construct the predictor network.
    def make_graph(inputs, train):
        # Input and parameters.
        #x = F.input(Shape([IMAGE_HEIGHT, IMAGE_WIDTH], BATCH_SIZE), inputs)
        x = F.input(inputs)
        w_cnn1 = F.parameter(pw_cnn1)
        w_cnn2 = F.parameter(pw_cnn2)
        w_fc1 = F.parameter(pw_fc1)
        w_fc2 = F.parameter(pw_fc2)
        b_fc1 = F.parameter(pb_fc1)
        b_fc2 = F.parameter(pb_fc2)
        # CNNs
        h_cnn1 = F.relu(F.conv2d(x, w_cnn1, PADDING1, PADDING1, 1, 1, 1, 1))
        h_pool1 = F.max_pool2d(h_cnn1, 2, 2, 0, 0, 2, 2)
        h_cnn2 = F.relu(
            F.conv2d(h_pool1, w_cnn2, PADDING2, PADDING2, 1, 1, 1, 1))
        h_pool2 = F.max_pool2d(h_cnn2, 2, 2, 0, 0, 2, 2)
        # FC layers
        x_fc = F.dropout(F.flatten(h_pool2), .5, train)
        h_fc = F.dropout(F.relu(F.matmul(w_fc1, x_fc) + b_fc1), .5, train)
        return F.matmul(w_fc2, h_fc) + b_fc2

    # Batch randomizer
    ids = list(range(NUM_TRAIN_SAMPLES))

    for epoch in range(MAX_EPOCH):
        # Shuffles sample IDs.
        random.shuffle(ids)

        # Training loop
        for batch in range(NUM_TRAIN_BATCHES):
            print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES),
                  end="")
            # Makes a minibatch for training.
            inputs = [
                train_inputs[ids[batch * BATCH_SIZE + i]]
                for i in range(BATCH_SIZE)
            ]
            labels = [
                train_labels[ids[batch * BATCH_SIZE + i]]
                for i in range(BATCH_SIZE)
            ]

            # Constructs the graph.
            g.clear()
            y = make_graph(inputs, True)
            loss = F.softmax_cross_entropy(y, labels, 0)
            avg_loss = F.batch.mean(loss)

            # Dump computation graph at the first time.
            # if epoch == 0 and batch == 0:
            #     print(g.dump("dot"))

            # Implicit forward, backward, and updates parameters.
            optimizer.reset_gradients()
            avg_loss.backward()
            optimizer.update()

        print()

        match = 0

        # Test loop
        for batch in range(NUM_TEST_BATCHES):
            print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES),
                  end="")
            # Makes a test minibatch.
            inputs = [
                test_inputs[batch * BATCH_SIZE + i] for i in range(BATCH_SIZE)
            ]

            # Constructs the graph.
            g.clear()
            y = make_graph(inputs, False)

            # Gets outputs, argmax, and compares them with the label.
            y_val = y.to_list()
            for i in range(BATCH_SIZE):
                maxval = -1e10
                argmax = -1
                for j in range(NUM_OUTPUT_UNITS):
                    v = y_val[j + i * NUM_OUTPUT_UNITS]
                    if v > maxval:
                        maxval = v
                        argmax = j

                if argmax == test_labels[i + batch * BATCH_SIZE]:
                    match += 1

        accuracy = 100.0 * match / NUM_TEST_SAMPLES
        print("epoch %d: accuracy: %.2f%%" % (epoch, accuracy))

    return 0
Esempio n. 29
0
 def init(self, d_model):
     self.pgain.init([1, d_model], I.Constant(1))
     self.pbias.init([1, d_model], I.Constant(0))