def lstm_forward():
            hs, _, _ = rnn(inputs, (h0, c0))

            loss = autograd.softmax_cross_entropy(hs[0], target[0])
            for i in range(1, len(hs)):
                l = autograd.softmax_cross_entropy(hs[i], target[i])
                loss = autograd.add(loss, l)
            return loss
Example #2
0
        def lstm_forward():
            hs, _, _ = rnn(inputs, (h0, c0))

            loss = autograd.softmax_cross_entropy(hs[0], target[0])
            for i in range(1, len(hs)):
                l = autograd.softmax_cross_entropy(hs[i], target[i])
                loss = autograd.add(loss, l)
            return loss
        def valinna_rnn_forward():
            hs, _ = rnn(inputs, h0)

            loss = autograd.softmax_cross_entropy(hs[0], target[0])
            for i in range(1, len(hs)):
                l = autograd.softmax_cross_entropy(hs[i], target[i])
                loss = autograd.add(loss, l)
            #grads = autograd.gradients(loss)
            return loss
Example #4
0
        def valinna_rnn_forward():
            hs, _ = rnn(inputs, h0)

            loss = autograd.softmax_cross_entropy(hs[0], target[0])
            for i in range(1, len(hs)):
                l = autograd.softmax_cross_entropy(hs[i], target[i])
                loss = autograd.add(loss, l)
            #grads = autograd.gradients(loss)
            return loss
Example #5
0
    def test_lstm_model(self, dev=gpu_dev):
        hidden_size = 3
        seq_length = 2
        batch_size = 4
        feature_size = 3
        bidirectional = False
        directions = 2 if bidirectional else 1
        num_layers = 2
        out_size = hidden_size
        return_sequences = False
        batch_first = True
        rnn_mode = "lstm"

        # manual test case
        x_data = np.array(
            [[[0, 0, 1], [0, 1, 0]], [[0, 1, 0], [1, 0, 0]],
             [[0, 0, 1], [0, 1, 0]], [[1, 0, 0], [0, 0, 1]]],
            dtype=np.float32).reshape(batch_size, seq_length,
                                      hidden_size)  # bs, seq, fea
        if return_sequences:
            y_data = np.array([[[0, 1, 0], [1, 0, 0]], [[1, 0, 0], [0, 0, 1]],
                               [[0, 1, 0], [1, 0, 0]], [[0, 0, 1], [0, 1, 0]]],
                              dtype=np.float32).reshape(
                                  batch_size, seq_length,
                                  hidden_size)  # bs, hidden
            y_data.reshape(batch_size, -1)
        else:
            y_data = np.array([[1, 0, 0], [0, 0, 1], [1, 0, 0], [0, 1, 0]],
                              dtype=np.float32).reshape(
                                  batch_size, hidden_size)  # bs, hidden

        x = tensor.Tensor(device=dev, data=x_data)
        y_t = tensor.Tensor(device=dev, data=y_data)

        m = LSTMModel(hidden_size, seq_length, batch_size, bidirectional,
                      num_layers, return_sequences, rnn_mode, batch_first)
        m.compile([x], is_train=True, use_graph=False, sequential=False)

        m.train()
        for i in range(1000):
            y = m.forward(x)
            assert y.shape == y_t.shape
            loss = autograd.softmax_cross_entropy(y, y_t)
            if i % 100 == 0:
                print("loss", loss)
            m.optimizer(loss)

        m.eval()
        y = m.forward(x)
        loss = autograd.softmax_cross_entropy(y, y_t)
        print("eval loss", loss)
    def test_vanillaRNN_gpu_tiny_ops_shape_check(self):
        # gradients shape check.
        inputs, target, h0 = prepare_inputs_targets_for_rnn_test()
        rnn = autograd.RNN(3, 2)

        hs, _ = rnn(inputs, h0)

        loss = autograd.softmax_cross_entropy(hs[0], target[0])
        for i in range(1, len(hs)):
            l = autograd.softmax_cross_entropy(hs[i], target[i])
            loss = autograd.add(loss, l)
        # d=autograd.infer_dependency(loss.creator)
        # print(d)
        for t, dt in autograd.backward(loss):
            self.check_shape(t.shape, dt.shape)
Example #7
0
    def test_vanillaRNN_gpu_tiny_ops_shape_check(self):
        # gradients shape check.
        inputs, target, h0 = prepare_inputs_targets_for_rnn_test()
        rnn = autograd.RNN(3, 2)

        hs, _ = rnn(inputs, h0)

        loss = autograd.softmax_cross_entropy(hs[0], target[0])
        for i in range(1, len(hs)):
            l = autograd.softmax_cross_entropy(hs[i], target[i])
            loss = autograd.add(loss, l)
        # d=autograd.infer_dependency(loss.creator)
        # print(d)
        for t, dt in autograd.backward(loss):
            self.check_shape(t.shape, dt.shape)
Example #8
0
def transfer_learning(sg_ir,
                      x,
                      y,
                      epochs=1,
                      batch_size=64,
                      dev=device.get_default_device()):
    batch_number = x.shape[0] // batch_size

    trans_model = Trans(sg_ir, -1)

    for i in range(epochs):
        for b in range(batch_number):
            l_idx = b * batch_size
            r_idx = (b + 1) * batch_size

            x_batch = tensor.Tensor(device=dev, data=x[l_idx:r_idx])
            target_batch = tensor.Tensor(device=dev, data=y[l_idx:r_idx])
            output_batch = trans_model.forward(x_batch)

            loss = autograd.softmax_cross_entropy(output_batch, target_batch)
            accuracy_rate = accuracy(tensor.to_numpy(output_batch),
                                     tensor.to_numpy(target_batch))

            sgd = opt.SGD(lr=0.07)
            for p, gp in autograd.backward(loss):
                sgd.update(p, gp)
            sgd.step()

            if b % 1e2 == 0:
                print("acc %6.2f loss, %6.2f" %
                      (accuracy_rate, tensor.to_numpy(loss)[0]))
    print("transfer-learning completed")
    return trans_model
Example #9
0
def onnx_to_singa(niter, use_cpu=False):
    if use_cpu:
        print("Using CPU")
        dev = device.get_default_device()
    else:
        print("Using GPU")
        dev = device.create_cuda_gpu()
    model = sonnx.load("mlp.onnx")
    backend = sonnx.prepare(model, device=dev)
    sgd = opt.SGD(0.1)
    inputs = Tensor(
        data=data,
        device=dev,
        requires_grad=False,
        stores_grad=False,
        name="input",
    )
    target = Tensor(
        data=label,
        device=dev,
        requires_grad=False,
        stores_grad=False,
        name="target",
    )

    for i in range(100):
        y = backend.run([inputs])[0]
        loss = autograd.softmax_cross_entropy(y, target)
        for p, gp in autograd.backward(loss):
            sgd.update(p, gp)
        loss_rate = tensor.to_numpy(loss)[0]
        accuracy_rate = accuracy(tensor.to_numpy(y), label)

        print("Iter {}, accurate={}, loss={}".format(i, accuracy_rate, loss_rate))
Example #10
0
def train(model,
          x,
          y,
          epochs=1,
          batch_size=64,
          dev=device.get_default_device()):
    batch_number = x.shape[0] // batch_size

    for i in range(epochs):
        for b in range(batch_number):
            l_idx = b * batch_size
            r_idx = (b + 1) * batch_size

            x_batch = tensor.Tensor(device=dev, data=x[l_idx:r_idx])
            target_batch = tensor.Tensor(device=dev, data=y[l_idx:r_idx])

            output_batch = model.forward(x_batch)
            # onnx_model = sonnx.to_onnx([x_batch], [y])
            # print('The model is:\n{}'.format(onnx_model))

            loss = autograd.softmax_cross_entropy(output_batch, target_batch)
            accuracy_rate = accuracy(tensor.to_numpy(output_batch),
                                     tensor.to_numpy(target_batch))

            sgd = opt.SGD(lr=0.001)
            for p, gp in autograd.backward(loss):
                sgd.update(p, gp)
            sgd.step()

            if b % 1e2 == 0:
                print("acc %6.2f loss, %6.2f" %
                      (accuracy_rate, tensor.to_numpy(loss)[0]))
    print("training completed")
    return x_batch, output_batch
    def test_LSTM_gpu_tiny_ops_shape_check(self):
        # gradients shape check.
        inputs, target, h0 = prepare_inputs_targets_for_rnn_test()
        c_0 = np.random.random((2, 1)).astype(np.float32)
        c0 = tensor.Tensor(device=gpu_dev, data=c_0)

        rnn = autograd.LSTM(3, 2)

        hs, _, _ = rnn(inputs, (h0, c0))
        loss = autograd.softmax_cross_entropy(hs[0], target[0])

        for i in range(1, len(hs)):
            l = autograd.softmax_cross_entropy(hs[i], target[i])
            loss = autograd.add(loss, l)
        # d=autograd.infer_dependency(loss.creator)
        # print(d)
        for t, dt in autograd.backward(loss):
            self.check_shape(t.shape, dt.shape)
Example #12
0
    def test_LSTM_gpu_tiny_ops_shape_check(self):
        # gradients shape check.
        inputs, target, h0 = prepare_inputs_targets_for_rnn_test()
        c_0 = np.random.random((2, 1)).astype(np.float32)
        c0 = tensor.Tensor(device=gpu_dev, data=c_0)

        rnn = autograd.LSTM(3, 2)

        hs, _, _ = rnn(inputs, (h0, c0))
        loss = autograd.softmax_cross_entropy(hs[0], target[0])

        for i in range(1, len(hs)):
            l = autograd.softmax_cross_entropy(hs[i], target[i])
            loss = autograd.add(loss, l)
        # d=autograd.infer_dependency(loss.creator)
        # print(d)
        for t, dt in autograd.backward(loss):
            self.check_shape(t.shape, dt.shape)
Example #13
0
 def forward(x, t):
     y = conv1(x)
     y = autograd.relu(y)
     y = conv2(y)
     y = autograd.relu(y)
     y = pooling(y)
     y = autograd.flatten(y)
     y = linear(y)
     loss = autograd.softmax_cross_entropy(y, t)
     return loss, y
Example #14
0
 def forward(x, t):
     y = conv1(x)
     y = autograd.relu(y)
     y1 = conv21(y)
     y2 = conv22(y)
     y = autograd.cat((y1, y2), 1)
     y = autograd.relu(y)
     y = autograd.flatten(y)
     y = linear(y)
     loss = autograd.softmax_cross_entropy(y, t)
     return loss, y
Example #15
0
def evaluate(model, data, batch_size, seq_length, dev, inputs, labels):
    model.eval()
    val_loss = 0.0
    for b in range(data.num_test_batch):
        batch = data.val_dat[b * batch_size:(b + 1) * batch_size]
        inputs, labels = convert(batch, batch_size, seq_length,
                                 data.vocab_size, dev, inputs, labels)
        model.reset_states(dev)
        y = model(inputs)
        loss = autograd.softmax_cross_entropy(y, labels)[0]
        val_loss += tensor.to_numpy(loss)[0]
    print('            validation loss is %f' %
          (val_loss / data.num_test_batch / seq_length))
Example #16
0
 def forward(x, t):
     y = conv1(x)
     y = autograd.relu(y)
     y = bn1(y)
     y = pooling1(y)
     y1 = conv21(y)
     y2 = conv22(y)
     y = autograd.cat((y1, y2), 1)
     y = bn2(y)
     y = autograd.relu(y)
     y = bn2(y)
     y = pooling2(y)
     y = autograd.flatten(y)
     y = linear(y)
     loss = autograd.softmax_cross_entropy(y, t)
     return loss, y
Example #17
0
def singa_to_onnx(niter, use_cpu=False):
    if use_cpu:
        print("Using CPU")
        dev = device.get_default_device()
    else:
        print("Using GPU")
        dev = device.create_cuda_gpu()
    inputs = Tensor(
        data=data,
        device=dev,
        requires_grad=False,
        stores_grad=False,
        name="input",
    )
    target = Tensor(
        data=label,
        device=dev,
        requires_grad=False,
        stores_grad=False,
        name="target",
    )

    w0 = Tensor(shape=(2, 3), device=dev, requires_grad=True, stores_grad=True)
    w0.gaussian(0.0, 0.1)
    b0 = Tensor(shape=(3,), device=dev, requires_grad=True, stores_grad=True)
    b0.set_value(0.0)

    w1 = Tensor(shape=(3, 2), device=dev, requires_grad=True, stores_grad=True)
    w1.gaussian(0.0, 0.1)
    b1 = Tensor(shape=(2,), device=dev, requires_grad=True, stores_grad=True)
    b1.set_value(0.0)

    sgd = opt.SGD(0.1)
    # training process
    for i in range(100):
        x = autograd.matmul(inputs, w0)
        x = autograd.add_bias(x, b0)
        x = autograd.relu(x)
        x = autograd.matmul(x, w1)
        x = autograd.add_bias(x, b1)
        loss = autograd.softmax_cross_entropy(x, target)
        for p, gp in autograd.backward(loss):
            sgd.update(p, gp)

        print("training loss = ", tensor.to_numpy(loss)[0])
    sonnx.export([inputs], [x], file_path="mlp.onnx")
Example #18
0
 def train_one_batch(self, x, y, dist_option, spars):
     out = self.forward(x)
     loss = autograd.softmax_cross_entropy(out, y)
     if dist_option == 'fp32':
         self.optimizer.backward_and_update(loss)
     elif dist_option == 'fp16':
         self.optimizer.backward_and_update_half(loss)
     elif dist_option == 'partialUpdate':
         self.optimizer.backward_and_partial_update(loss)
     elif dist_option == 'sparseTopK':
         self.optimizer.backward_and_sparse_update(loss,
                                                   topK=True,
                                                   spars=spars)
     elif dist_option == 'sparseThreshold':
         self.optimizer.backward_and_sparse_update(loss,
                                                   topK=False,
                                                   spars=spars)
     return out, loss
Example #19
0
    def forward(x, t):
        y = conv1(x)
        y = autograd.tanh(y)
        y1 = conv21(y)
        y2 = conv22(y)
        y = autograd.cat((y1, y2), 1)
        y = autograd.sigmoid(y)
        y = bn(y)
        y = autograd.relu(y)
        y = autograd.mul(y, y)
        y = pooling1(y)
        y = autograd.sigmoid(y)

        y = pooling2(y)

        print(tensor.to_numpy(y).shape)
        y = autograd.flatten(y)
        y = linear(y)
        print(tensor.to_numpy(y).shape)
        loss = autograd.softmax_cross_entropy(y, t)
        return loss, y
Example #20
0
def onnx_to_singa(epochs, use_cpu=False, batchsize=32):
    (x_train, y_train), (x_test, y_test), dev = common(use_cpu)
    model = sonnx.load("cnn.onnx")
    backend = sonnx.prepare(model, dev)
    autograd.training = True
    sgd = opt.SGD(lr=0.01)
    niter = x_train.shape[0] // batchsize
    for epoch in range(epochs):
        accuracy_rate = 0.0
        loss_rate = 0.0
        for i in range(niter):
            inputs = tensor.Tensor(
                device=dev,
                data=x_train[i * batchsize : (i + 1) * batchsize],
                stores_grad=False,
                name="input",
            )
            targets = tensor.Tensor(
                device=dev,
                data=y_train[i * batchsize : (i + 1) * batchsize],
                requires_grad=False,
                stores_grad=False,
                name="target",
            )
            y = backend.run([inputs])[0]
            loss = autograd.softmax_cross_entropy(y, targets)

            accuracy_rate += accuracy(
                tensor.to_numpy(y), y_train[i * batchsize : (i + 1) * batchsize]
            )
            loss_rate += tensor.to_numpy(loss)[0]

            for p, gp in autograd.backward(loss):
                sgd.update(p, gp)

        print("accuracy is {}, loss is {}".format(accuracy_rate / niter, loss_rate / niter))
Example #21
0
    import time

    dev.Sync()
    start = time.time()
    fd = 0
    softmax = 0
    update = 0
    with trange(niters) as t:
        for _ in t:
            dev.Sync()
            tick = time.time()
            x = model(tx)
            dev.Sync()
            fd += time.time() - tick
            tick = time.time()
            loss = autograd.softmax_cross_entropy(x, ty)
            dev.Sync()
            softmax += time.time() - tick
            plist = []
            for p, g in autograd.backward(loss):
                #dev.Sync()  # this Sync affects the concurrency and hence omitted
                tick = time.time()
                sgd.all_reduce(g)
                #dev.Sync()  # this Sync affects the concurrency and hence omitted
                update += time.time() - tick
                plist.append((p, g))
            sgd.wait()
            for p, g in plist:
                sgd.update(p, g)

    dev.Sync()
Example #22
0
 def train_one_batch(self, x, y):
     out = self.forward(x)
     loss = autograd.softmax_cross_entropy(out, y)
     self.optimizer(loss)
     return out, loss
Example #23
0
def train_mnist_cnn(DIST=False,
                    local_rank=None,
                    world_size=None,
                    nccl_id=None,
                    spars=0,
                    topK=False,
                    corr=True):

    # Define the hypermeters good for the mnist_cnn
    max_epoch = 10
    batch_size = 64
    sgd = opt.SGD(lr=0.005, momentum=0.9, weight_decay=1e-5)

    # Prepare training and valadiation data
    train_x, train_y, test_x, test_y = load_dataset()
    IMG_SIZE = 28
    num_classes = 10
    train_y = to_categorical(train_y, num_classes)
    test_y = to_categorical(test_y, num_classes)

    # Normalization
    train_x = train_x / 255
    test_x = test_x / 255

    if DIST:
        # For Distributed GPU Training
        sgd = opt.DistOpt(sgd,
                          nccl_id=nccl_id,
                          local_rank=local_rank,
                          world_size=world_size)
        dev = device.create_cuda_gpu_on(sgd.local_rank)
        # Dataset partition for distributed training
        train_x, train_y = data_partition(train_x, train_y, sgd.global_rank,
                                          sgd.world_size)
        test_x, test_y = data_partition(test_x, test_y, sgd.global_rank,
                                        sgd.world_size)
        world_size = sgd.world_size
    else:
        # For Single GPU
        dev = device.create_cuda_gpu()
        world_size = 1

    # create model
    model = CNN()

    tx = tensor.Tensor((batch_size, 1, IMG_SIZE, IMG_SIZE), dev, tensor.float32)
    ty = tensor.Tensor((batch_size, num_classes), dev, tensor.int32)
    num_train_batch = train_x.shape[0] // batch_size
    num_test_batch = test_x.shape[0] // batch_size
    idx = np.arange(train_x.shape[0], dtype=np.int32)

    if DIST:
        #Sychronize the initial parameters
        autograd.training = True
        x = np.random.randn(batch_size, 1, IMG_SIZE,
                            IMG_SIZE).astype(np.float32)
        y = np.zeros(shape=(batch_size, num_classes), dtype=np.int32)
        tx.copy_from_numpy(x)
        ty.copy_from_numpy(y)
        out = model.forward(tx)
        loss = autograd.softmax_cross_entropy(out, ty)
        for p, g in autograd.backward(loss):
            synchronize(p, sgd)

    # Training and Evaulation Loop
    for epoch in range(max_epoch):
        start_time = time.time()
        np.random.shuffle(idx)

        if ((DIST == False) or (sgd.global_rank == 0)):
            print('Starting Epoch %d:' % (epoch))

        # Training Phase
        autograd.training = True
        train_correct = np.zeros(shape=[1], dtype=np.float32)
        test_correct = np.zeros(shape=[1], dtype=np.float32)
        train_loss = np.zeros(shape=[1], dtype=np.float32)

        for b in range(num_train_batch):
            x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
            x = augmentation(x, batch_size)
            y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            out = model.forward(tx)
            loss = autograd.softmax_cross_entropy(out, ty)
            train_correct += accuracy(tensor.to_numpy(out), y)
            train_loss += tensor.to_numpy(loss)[0]
            if DIST:
                if (spars == 0):
                    sgd.backward_and_update(loss, threshold=50000)
                else:
                    sgd.backward_and_sparse_update(loss,
                                                   spars=spars,
                                                   topK=topK,
                                                   corr=corr)
            else:
                sgd.backward_and_update(loss)

        if DIST:
            # Reduce the Evaluation Accuracy and Loss from Multiple Devices
            reducer = tensor.Tensor((1,), dev, tensor.float32)
            train_correct = reduce_variable(train_correct, sgd, reducer)
            train_loss = reduce_variable(train_loss, sgd, reducer)

        # Output the Training Loss and Accuracy
        if ((DIST == False) or (sgd.global_rank == 0)):
            print('Training loss = %f, training accuracy = %f' %
                  (train_loss, train_correct /
                   (num_train_batch * batch_size * world_size)),
                  flush=True)

        # Evaluation Phase
        autograd.training = False
        for b in range(num_test_batch):
            x = test_x[b * batch_size:(b + 1) * batch_size]
            y = test_y[b * batch_size:(b + 1) * batch_size]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            out_test = model.forward(tx)
            test_correct += accuracy(tensor.to_numpy(out_test), y)

        if DIST:
            # Reduce the Evaulation Accuracy from Multiple Devices
            test_correct = reduce_variable(test_correct, sgd, reducer)

        # Output the Evaluation Accuracy
        if ((DIST == False) or (sgd.global_rank == 0)):
            print('Evaluation accuracy = %f, Elapsed Time = %fs' %
                  (test_correct / (num_test_batch * batch_size * world_size),
                   time.time() - start_time),
                  flush=True)
Example #24
0
def train_mnist_cnn(DIST=False,
                    local_rank=None,
                    world_size=None,
                    nccl_id=None,
                    spars=0,
                    topK=False,
                    corr=True):

    # Define the hypermeters good for the mnist_cnn
    max_epoch = 10
    batch_size = 128
    sgd = opt.SGD(lr=0.005, momentum=0.9, weight_decay=1e-5)

    # Prepare training and valadiation data
    train_x, train_y, test_x, test_y = load_dataset()
    IMG_SIZE = 28
    num_classes = 10
    train_y = to_categorical(train_y, num_classes)
    test_y = to_categorical(test_y, num_classes)

    # Normalization
    train_x = train_x / 255
    test_x = test_x / 255

    if DIST:
        # For Distributed GPU Training
        '''
        sgd = opt.DistOpt(sgd,
                          nccl_id=nccl_id,
                          local_rank=local_rank,
                          world_size=world_size)
        '''
        dev = device.get_default_device()
        # create kvstore
        kv_type = 'dist_sync'  #set synchronization mode
        lr = 0.005
        kv = singa_kvstore.create_kvstore(kv_type,
                                          'SingaSGD',
                                          lr=0.005,
                                          momentum=0.9,
                                          weight_decay=1e-5)
        global_rank = kv.rank
        world_size = kv.num_workers
        # Dataset partition for distributed training
        train_x, train_y = data_partition(train_x, train_y, global_rank,
                                          world_size)
        test_x, test_y = data_partition(test_x, test_y, global_rank,
                                        world_size)

    # create model
    model = CNN()
    ''' 
    num_channels = train_x.shape[1]
    image_size = train_x.shape[2]
    data_size = np.prod(train_x.shape[1:train_x.ndim]).item()
    num_classes = (np.max(train_y) + 1).item()
    model = resnet.resnet18(num_channels=1, num_classes=num_classes)
    '''
    tx = tensor.Tensor((batch_size, 1, IMG_SIZE, IMG_SIZE), dev,
                       tensor.float32)
    ty = tensor.Tensor((batch_size, num_classes), dev, tensor.int32)
    num_train_batch = train_x.shape[0] // batch_size
    num_test_batch = test_x.shape[0] // batch_size
    idx = np.arange(train_x.shape[0], dtype=np.int32)
    '''
    if DIST:
        #Initial a batch to help obtain model parameters
        autograd.training = True
        x = np.random.randn(batch_size, 1, IMG_SIZE,
                            IMG_SIZE).astype(np.float32)
        y = np.zeros(shape=(batch_size, num_classes), dtype=np.int32)
        tx.copy_from_numpy(x)
        ty.copy_from_numpy(y)
        out = model.forward(tx)
        loss = autograd.softmax_cross_entropy(out, ty)
        #Initial kv store for workers of ps-architecture
        key = 0
        for p, g in autograd.backward(loss):
            kv.init(key, mx.nd.array(tensor.to_numpy(p)))
            key += 1
     '''

    # Training and Evaulation Loop
    for epoch in range(max_epoch):
        start_time = time.time()
        np.random.shuffle(idx)

        if (DIST == True):
            print('^_^Starting Epoch %d:' % (epoch))

        # Training Phase
        autograd.training = True
        train_correct = np.zeros(shape=[1], dtype=np.float32)
        test_correct = np.zeros(shape=[1], dtype=np.float32)
        train_loss = np.zeros(shape=[1], dtype=np.float32)
        time_start = time.time()
        for b in range(num_train_batch):
            x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
            x = augmentation(x, batch_size)
            y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            out = model.forward(tx)
            loss = autograd.softmax_cross_entropy(out, ty)
            train_correct += accuracy(tensor.to_numpy(out), y)
            train_loss += tensor.to_numpy(loss)[0]
            singa_kvstore.backward_and_update(kv, loss)
            '''
            if DIST:
                #push
                kv_pairs = []
                key = 0
                for p, g in autograd.backward(loss):
                    kv.push(key,mx.nd.array(tensor.to_numpy(g)))
                    kv_pairs.append((key,p,g))
                    key += 1
                #pull
                for key,p,g in kv_pairs:
                    out_buf = mx.nd.zeros(p.shape)
                    kv.pull(key,out=out_buf)
                    p.copy_from_numpy(out_buf.asnumpy())
             '''

            # Evaluation Phase
            if b % 20 != 0:
                continue
            autograd.training = False
            num_test_batch_inside = 20
            test_correct = 0
            for b in range(num_test_batch_inside):
                x = test_x[b * batch_size:(b + 1) * batch_size]
                y = test_y[b * batch_size:(b + 1) * batch_size]
                tx.copy_from_numpy(x)
                ty.copy_from_numpy(y)
                out_test = model.forward(tx)
                test_correct += accuracy(tensor.to_numpy(out_test), y)
            print('Evaluation accuracy = %f' %
                  (test_correct / (batch_size * num_test_batch_inside)),
                  flush=True)
            autograd.training = True
        print('epoch time is %f' % (time.time() - time_start))
Example #25
0
def train_mnist_cnn(sgd,
                    max_epoch,
                    batch_size,
                    DIST=False,
                    data_partition=None,
                    gpu_num=None,
                    gpu_per_node=None,
                    nccl_id=None):

    # Prepare training and valadiation data
    train_x, train_y, test_x, test_y = load_dataset()
    IMG_SIZE = 28
    num_classes = 10
    train_y = to_categorical(train_y, num_classes)
    test_y = to_categorical(test_y, num_classes)

    # Normalization
    train_x = train_x / 255
    test_x = test_x / 255

    if DIST:
        # For Distributed GPU Training
        sgd = opt.DistOpt(sgd,
                          nccl_id=nccl_id,
                          gpu_num=gpu_num,
                          gpu_per_node=gpu_per_node)
        dev = device.create_cuda_gpu_on(sgd.rank_in_local)
        # Dataset partition for distributed training
        train_x, train_y = data_partition(train_x, train_y, sgd.rank_in_global,
                                          sgd.world_size)
        test_x, test_y = data_partition(test_x, test_y, sgd.rank_in_global,
                                        sgd.world_size)
        world_size = sgd.world_size
    else:
        # For Single GPU
        dev = device.create_cuda_gpu()
        world_size = 1

    # create model
    model = CNN()

    tx = tensor.Tensor((batch_size, 1, IMG_SIZE, IMG_SIZE), dev,
                       tensor.float32)
    ty = tensor.Tensor((batch_size, num_classes), dev, tensor.int32)
    num_train_batch = train_x.shape[0] // batch_size
    num_test_batch = test_x.shape[0] // batch_size
    idx = np.arange(train_x.shape[0], dtype=np.int32)

    if DIST:
        #Sychronize the initial parameters
        autograd.training = True
        x = np.random.randn(batch_size, 1, IMG_SIZE,
                            IMG_SIZE).astype(np.float32)
        y = np.zeros(shape=(batch_size, num_classes), dtype=np.int32)
        tx.copy_from_numpy(x)
        ty.copy_from_numpy(y)
        out = model.forward(tx)
        loss = autograd.softmax_cross_entropy(out, ty)
        for p, g in autograd.backward(loss):
            sychronize(p, sgd)

    # Training and Evaulation Loop
    for epoch in range(max_epoch):
        start_time = time.time()
        np.random.shuffle(idx)

        if ((DIST == False) or (sgd.rank_in_global == 0)):
            print('Starting Epoch %d:' % (epoch))

        # Training Phase
        autograd.training = True
        train_correct = np.zeros(shape=[1], dtype=np.float32)
        test_correct = np.zeros(shape=[1], dtype=np.float32)
        train_loss = np.zeros(shape=[1], dtype=np.float32)

        for b in range(num_train_batch):
            x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
            x = augmentation(x, batch_size)
            y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            out = model.forward(tx)
            loss = autograd.softmax_cross_entropy(out, ty)
            train_correct += accuracy(tensor.to_numpy(out), y)
            train_loss += tensor.to_numpy(loss)[0]
            plist = []
            for p, g in autograd.backward(loss):
                if DIST:
                    sgd.all_reduce(g)
                plist.append((p, g))
            if DIST:
                sgd.wait()
            for p, g in plist:
                sgd.update(p, g)

        if DIST:
            # Reduce the Evaluation Accuracy and Loss from Multiple Devices
            reducer = tensor.Tensor((1, ), dev, tensor.float32)
            train_correct = reduce_variable(train_correct, sgd, reducer)
            train_loss = reduce_variable(train_loss, sgd, reducer)

        # Output the Training Loss and Accuracy
        if ((DIST == False) or (sgd.rank_in_global == 0)):
            print('Training loss = %f, training accuracy = %f' %
                  (train_loss, train_correct /
                   (num_train_batch * batch_size * world_size)),
                  flush=True)

        # Evaluation Phase
        autograd.training = False
        for b in range(num_test_batch):
            x = test_x[b * batch_size:(b + 1) * batch_size]
            y = test_y[b * batch_size:(b + 1) * batch_size]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            out_test = model.forward(tx)
            test_correct += accuracy(tensor.to_numpy(out_test), y)

        if DIST:
            # Reduce the Evaulation Accuracy from Multiple Devices
            test_correct = reduce_variable(test_correct, sgd, reducer)

        # Output the Evaluation Accuracy
        if ((DIST == False) or (sgd.rank_in_global == 0)):
            print('Evaluation accuracy = %f, Elapsed Time = %fs' %
                  (test_correct / (num_test_batch * batch_size * world_size),
                   time.time() - start_time),
                  flush=True)
    epochs = 1

    sgd = opt.SGD(lr=0.00)

    x_train = preprocess(train[0])
    y_train = to_categorical(train[1], num_classes)

    x_test = preprocess(test[0])
    y_test = to_categorical(test[1], num_classes)
    print('the shape of training data is', x_train.shape)
    print('the shape of training label is', y_train.shape)
    print('the shape of testing data is', x_test.shape)
    print('the shape of testing label is', y_test.shape)

    model = onnx.load('cnn.onnx')
    rep = sonnx.prepare(model, dev)
    print('finish init')
    autograd.training = True
    # training process
    for epoch in range(1):
        inputs = tensor.Tensor(device=dev,
                               data=x_train[0:100],
                               stores_grad=False)
        targets = tensor.Tensor(device=dev,
                                data=y_train[0:100],
                                requires_grad=False,
                                stores_grad=False)
        y0 = rep.run([inputs])[0]
        loss = autograd.softmax_cross_entropy(y0, targets)
        print('outputs', tensor.to_numpy(loss)[0])
Example #27
0
def loss(out, y):
    return autograd.softmax_cross_entropy(out, y)
Example #28
0
        return x


if __name__ == '__main__':
    model = Xception(num_classes=1000)
    print('Start intialization............')
    dev = device.create_cuda_gpu_on(0)
    #dev = device.create_cuda_gpu()

    niters = 20
    batch_size = 16
    IMG_SIZE = 299
    sgd = opt.SGD(lr=0.1, momentum=0.9, weight_decay=1e-5)

    tx = tensor.Tensor((batch_size, 3, IMG_SIZE, IMG_SIZE), dev)
    ty = tensor.Tensor((batch_size,), dev, tensor.int32)
    autograd.training = True
    x = np.random.randn(batch_size, 3, IMG_SIZE, IMG_SIZE).astype(np.float32)
    y = np.random.randint(0, 1000, batch_size, dtype=np.int32)
    tx.copy_from_numpy(x)
    ty.copy_from_numpy(y)

    with trange(niters) as t:
        for b in t:
            x = model(tx)
            loss = autograd.softmax_cross_entropy(x, ty)
            for p, g in autograd.backward(loss):
                # print(p.shape, g.shape)
                sgd.update(p, g)
                # pass
Example #29
0
 def loss(self, out, ty):
     return autograd.softmax_cross_entropy(out, ty)
Example #30
0
 def loss(self, out, ty):
     ty = autograd.reshape(ty, (-1, 1))
     return autograd.softmax_cross_entropy(out, ty)
Example #31
0
def train_cifar10(sgd, max_epoch, batch_size, DIST=False, data_partition=None, gpu_num=None, gpu_per_node=None, nccl_id=None, partial_update=False):

    train_x, train_y = load_train_data()
    test_x, test_y = load_test_data()
    train_x, test_x = normalize_for_resnet(train_x, test_x)
    IMG_SIZE = 224
    num_classes=10

    if DIST:
        # For Distributed GPU Training
        sgd = opt.DistOpt(sgd, nccl_id=nccl_id, gpu_num=gpu_num, gpu_per_node=gpu_per_node)
        dev = device.create_cuda_gpu_on(sgd.rank_in_local)
        # Dataset partition for distributed training
        train_x, train_y = data_partition(train_x, train_y, sgd.rank_in_global, sgd.world_size)
        test_x, test_y = data_partition(test_x, test_y, sgd.rank_in_global, sgd.world_size)
        world_size = sgd.world_size
    else:
        # For Single GPU
        dev = device.create_cuda_gpu()
        world_size = 1

    from resnet import resnet50
    model = resnet50(num_classes=num_classes)

    tx = tensor.Tensor((batch_size, 3, IMG_SIZE, IMG_SIZE), dev, tensor.float32)
    ty = tensor.Tensor((batch_size,), dev, tensor.int32)
    num_train_batch = train_x.shape[0] // batch_size
    num_test_batch = test_x.shape[0] // batch_size
    idx = np.arange(train_x.shape[0], dtype=np.int32)

    if DIST:
        #Sychronize the initial parameters
        autograd.training = True
        x = np.random.randn(batch_size, 3, IMG_SIZE, IMG_SIZE).astype(np.float32)
        y = np.zeros( shape=(batch_size,), dtype=np.int32)
        tx.copy_from_numpy(x)
        ty.copy_from_numpy(y)
        out = model(tx)
        loss = autograd.softmax_cross_entropy(out, ty)
        param = []
        for p, _ in autograd.backward(loss):
            sychronize(p, sgd)
            param.append(p)

    for epoch in range(max_epoch):
        start_time = time.time()
        np.random.shuffle(idx)

        if ((DIST == False) or (sgd.rank_in_global == 0)):
            print('Starting Epoch %d:' % (epoch))

        #Training Phase
        autograd.training = True
        train_correct = np.zeros(shape=[1],dtype=np.float32)
        test_correct = np.zeros(shape=[1],dtype=np.float32)
        train_loss = np.zeros(shape=[1],dtype=np.float32)

        for b in range(num_train_batch):
            x = train_x[idx[b * batch_size: (b + 1) * batch_size]]
            x = augmentation(x, batch_size)
            x = resize_dataset(x,IMG_SIZE)
            y = train_y[idx[b * batch_size: (b + 1) * batch_size]]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            out = model(tx)
            loss = autograd.softmax_cross_entropy(out, ty)               
            train_correct += accuracy(tensor.to_numpy(out), to_categorical(y, num_classes)).astype(np.float32)
            train_loss += tensor.to_numpy(loss)[0]
            if not partial_update:
                sgd.backward_and_update(loss)
            else:
                sgd.backward_and_partial_update(loss)

        if DIST:
            # Reduce the Evaluation Accuracy and Loss from Multiple Devices
            reducer = tensor.Tensor((1,), dev, tensor.float32)
            train_correct = reduce_variable(train_correct, sgd, reducer)
            train_loss = reduce_variable(train_loss, sgd, reducer)

        # Output the Training Loss and Accuracy
        if ((DIST == False) or (sgd.rank_in_global == 0)):
            print('Training loss = %f, training accuracy = %f' % (train_loss, train_correct / (num_train_batch*batch_size*world_size)), flush=True)

        if partial_update:
            # sychronize parameters before evaluation phase
            for p in param:
                sychronize(p, sgd)

        #Evaulation Phase
        autograd.training = False
        for b in range(num_test_batch):
            x = test_x[b * batch_size: (b + 1) * batch_size]
            x = resize_dataset(x,IMG_SIZE)
            y = test_y[b * batch_size: (b + 1) * batch_size]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            out_test = model(tx)
            test_correct += accuracy(tensor.to_numpy(out_test), to_categorical(y, num_classes))

        if DIST:
            # Reduce the Evaulation Accuracy from Multiple Devices
            test_correct = reduce_variable(test_correct, sgd, reducer)

        # Output the Evaluation Accuracy
        if ((DIST == False) or (sgd.rank_in_global == 0)):
            print('Evaluation accuracy = %f, Elapsed Time = %fs' % (test_correct / (num_test_batch*batch_size*world_size), time.time() - start_time ), flush=True)