def loss(self, trg_batch, train):
     losses = []
     for i in range(len(trg_batch) - 1):
         y = self.decode_step(trg_batch[i], train)
         loss = F.softmax_cross_entropy(y, trg_batch[i + 1], 0)
         losses.append(loss)
     return F.batch.mean(F.sum(losses))
def main():
    # Loads data
    train_inputs = load_images("data/train-images-idx3-ubyte", NUM_TRAIN_SAMPLES)
    train_labels = load_labels("data/train-labels-idx1-ubyte", NUM_TRAIN_SAMPLES)
    test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES)
    test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES)

    # Initializes 2 device objects which manage different GPUs.
    dev0 = D.CUDA(0)
    dev1 = D.CUDA(1)

    # Parameters on GPU 0.
    pw1 = Parameter([NUM_HIDDEN_UNITS, NUM_INPUT_UNITS], I.XavierUniform(), dev0)
    pb1 = Parameter([NUM_HIDDEN_UNITS], I.Constant(0), dev0)

    # Parameters on GPU 1.
    pw2 = Parameter([NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS], I.XavierUniform(), dev1)
    pb2 = Parameter([NUM_OUTPUT_UNITS], I.Constant(0), dev1)

    trainer = T.SGD(.1)
    trainer.add_parameter(pw1)
    trainer.add_parameter(pb1)
    trainer.add_parameter(pw2)
    trainer.add_parameter(pb2)

    def make_graph(inputs):
        # We first store input values explicitly on GPU 0.
        x = F.input(inputs, device=dev0)
        w1 = F.parameter(pw1)
        b1 = F.parameter(pb1)
        w2 = F.parameter(pw2)
        b2 = F.parameter(pb2)
        # The hidden layer is calculated and implicitly stored on GPU 0.
        h_on_gpu0 = F.relu(w1 @ x + b1)
        # `copy()` transfers the hiddne layer to GPU 1.
        h_on_gpu1 = F.copy(h_on_gpu0, dev1)
        # The output layer is calculated and implicitly stored on GPU 1.
        return w2 @ h_on_gpu1 + b2

    ids = list(range(NUM_TRAIN_SAMPLES))

    g = Graph()
    Graph.set_default(g)

    for epoch in range(MAX_EPOCH):
        random.shuffle(ids)

        # Training loop
        for batch in range(NUM_TRAIN_BATCHES):
            print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES), end="")
            inputs = [train_inputs[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)]
            labels = [train_labels[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)]

            g.clear()

            y = make_graph(inputs)
            loss = F.softmax_cross_entropy(y, labels, 0)
            avg_loss = F.batch.mean(loss)

            trainer.reset_gradients()
            avg_loss.backward()
            trainer.update()

        print()

        match = 0

        # Test loop
        for batch in range(NUM_TEST_BATCHES):
            print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES), end="")
            inputs = [test_inputs[batch * BATCH_SIZE + i] for i in range(BATCH_SIZE)]

            g.clear()

            y = make_graph(inputs)
            y_val = y.to_list()
            for i in range(BATCH_SIZE):
                maxval = -1e10
                argmax = -1
                for j in range(NUM_OUTPUT_UNITS):
                    v = y_val[j + i * NUM_OUTPUT_UNITS]
                    if (v > maxval):
                        maxval = v
                        argmax = j
                if argmax == test_labels[i + batch * BATCH_SIZE]:
                    match += 1

        accuracy = 100.0 * match / NUM_TEST_SAMPLES
        print("\nepoch %d: accuracy: %.2f%%\n" % (epoch, accuracy))
Exemple #3
0
 def loss(self, outputs, inputs):
     losses = [
         F.softmax_cross_entropy(outputs[i], inputs[i + 1], 0)
         for i in range(len(outputs))
     ]
     return F.batch.mean(F.sum(losses))
def main():
    # Loads data
    train_inputs = load_images("data/train-images-idx3-ubyte",
                               NUM_TRAIN_SAMPLES)
    train_labels = load_labels("data/train-labels-idx1-ubyte",
                               NUM_TRAIN_SAMPLES)
    test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES)
    test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES)

    # Uses GPU.
    #dev = CUDADevice(0)
    with DefaultScopeDevice(CPUDevice()):

        # Parameters for the multilayer perceptron.
        pw1 = Parameter("w1", [NUM_HIDDEN_UNITS, NUM_INPUT_UNITS],
                        XavierUniform())
        pb1 = Parameter("b1", [NUM_HIDDEN_UNITS], Constant(0))
        pw2 = Parameter("w2", [NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS],
                        XavierUniform())
        pb2 = Parameter("b2", [NUM_OUTPUT_UNITS], Constant(0))

        # Parameters for batch normalization.
        #Parameter pbeta("beta", {NUM_HIDDEN_UNITS}, Constant(0));
        #Parameter pgamma("gamma", {NUM_HIDDEN_UNITS}, Constant(1));

        # Trainer
        trainer = SGD(.5)
        trainer.add_parameter(pw1)
        trainer.add_parameter(pb1)
        trainer.add_parameter(pw2)
        trainer.add_parameter(pb2)

        #trainer.add_parameter(&pbeta);
        #trainer.add_parameter(&pgamma);

        # Helper lambda to construct the predictor network.
        def make_graph(inputs, train):
            # Stores input values.
            x = F.input(data=inputs)
            # Calculates the hidden layer.
            w1 = F.input(param=pw1)
            b1 = F.input(param=pb1)
            h = F.relu(F.matmul(w1, x) + b1)
            # Batch normalization
            #Node beta = F::input(pbeta);
            #Node gamma = F::input(pgamma);
            #h = F::batch::normalize(h) * gamma + beta;
            # Dropout
            h = F.dropout(h, .5, train)
            # Calculates the output layer.
            w2 = F.input(param=pw2)
            b2 = F.input(param=pb2)
            return F.matmul(w2, h) + b2

        ids = list(range(NUM_TRAIN_SAMPLES))

        for epoch in range(MAX_EPOCH):
            # Shuffles sample IDs.
            random.shuffle(ids)

            # Training loop
            for batch in range(NUM_TRAIN_BATCHES):
                print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES),
                      end="")
                inputs = train_inputs[ids[batch * BATCH_SIZE:(batch + 1) *
                                          BATCH_SIZE]]
                labels = train_labels[ids[batch * BATCH_SIZE:(batch + 1) *
                                          BATCH_SIZE]]

                trainer.reset_gradients()

                # Constructs the graph.
                g = Graph()
                with DefaultScopeGraph(g):
                    y = make_graph(inputs, True)
                    loss = F.softmax_cross_entropy(y, labels, 0)
                    avg_loss = F.batch.mean(loss)

                    # Dump computation graph at the first time.
                    #if (epoch == 0 && batch == 0) g.dump();

                    # Forward, backward, and updates parameters.
                    g.forward(avg_loss)
                    g.backward(avg_loss)

                    trainer.update()

            print()

            match = 0

            # Test loop
            for batch in range(NUM_TEST_BATCHES):
                print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES),
                      end="")
                # Makes a test minibatch.
                inputs = test_inputs[batch * BATCH_SIZE:(batch + 1) *
                                     BATCH_SIZE]

                # Constructs the graph.
                with Graph() as g:
                    y = make_graph(inputs, False)

                    # Gets outputs, argmax, and compares them with the label.
                    y_val = g.forward(y).to_list()
                    for i in range(BATCH_SIZE):
                        maxval = -1e10
                        argmax = -1
                        for j in range(NUM_OUTPUT_UNITS):
                            v = y_val[j + i * NUM_OUTPUT_UNITS]
                            if (v > maxval):
                                maxval = v
                                argmax = j
                        if argmax == test_labels[i + batch * BATCH_SIZE]:
                            match += 1

            accuracy = 100.0 * match / NUM_TEST_SAMPLES
            print("\nepoch %d: accuracy: %.2f%%\n" % (epoch, accuracy))
Exemple #5
0
def main():
    # Loads data
    train_inputs = load_images("data/train-images-idx3-ubyte", NUM_TRAIN_SAMPLES)
    train_labels = load_labels("data/train-labels-idx1-ubyte", NUM_TRAIN_SAMPLES)
    test_inputs = load_images("data/t10k-images-idx3-ubyte", NUM_TEST_SAMPLES)
    test_labels = load_labels("data/t10k-labels-idx1-ubyte", NUM_TEST_SAMPLES)

    dev = D.Naive()  # or D.CUDA(gpuid)
    Device.set_default(dev)

    pw1 = Parameter([NUM_HIDDEN_UNITS, NUM_INPUT_UNITS], I.XavierUniform())
    pb1 = Parameter([NUM_HIDDEN_UNITS], I.Constant(0))
    pw2 = Parameter([NUM_OUTPUT_UNITS, NUM_HIDDEN_UNITS], I.XavierUniform())
    pb2 = Parameter([NUM_OUTPUT_UNITS], I.Constant(0))

    optimizer = O.SGD(.5)
    optimizer.add_parameter(pw1)
    optimizer.add_parameter(pb1)
    optimizer.add_parameter(pw2)
    optimizer.add_parameter(pb2)

    def make_graph(inputs, train):
        x = F.input(inputs)

        w1 = F.parameter(pw1)
        b1 = F.parameter(pb1)
        h = F.relu(w1 @ x + b1)

        h = F.dropout(h, .5, train)

        w2 = F.parameter(pw2)
        b2 = F.parameter(pb2)
        return w2 @ h + b2

    ids = list(range(NUM_TRAIN_SAMPLES))

    g = Graph()
    Graph.set_default(g)

    for epoch in range(MAX_EPOCH):
        random.shuffle(ids)

        # Training loop
        for batch in range(NUM_TRAIN_BATCHES):
            print("\rTraining... %d / %d" % (batch + 1, NUM_TRAIN_BATCHES), end="")
            inputs = [train_inputs[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)]
            labels = [train_labels[ids[batch * BATCH_SIZE + i]] for i in range(BATCH_SIZE)]

            g.clear()

            y = make_graph(inputs, True)
            loss = F.softmax_cross_entropy(y, labels, 0)
            avg_loss = F.batch.mean(loss)

            optimizer.reset_gradients()
            avg_loss.backward()
            optimizer.update()

        print()

        match = 0

        # Test loop
        for batch in range(NUM_TEST_BATCHES):
            print("\rTesting... %d / %d" % (batch + 1, NUM_TEST_BATCHES), end="")
            inputs = [test_inputs[batch * BATCH_SIZE + i] for i in range(BATCH_SIZE)]

            g.clear()

            y = make_graph(inputs, False)
            y_val = y.to_list()
            for i in range(BATCH_SIZE):
                maxval = -1e10
                argmax = -1
                for j in range(NUM_OUTPUT_UNITS):
                    v = y_val[j + i * NUM_OUTPUT_UNITS]
                    if (v > maxval):
                        maxval = v
                        argmax = j
                if argmax == test_labels[i + batch * BATCH_SIZE]:
                    match += 1

        accuracy = 100.0 * match / NUM_TEST_SAMPLES
        print("\nepoch %d: accuracy: %.2f%%\n" % (epoch, accuracy))