Esempio n. 1
0
def onnx_to_singa(niter, use_cpu=False):
    if use_cpu:
        print("Using CPU")
        dev = device.get_default_device()
    else:
        print("Using GPU")
        dev = device.create_cuda_gpu()
    model = sonnx.load("mlp.onnx")
    backend = sonnx.prepare(model, device=dev)
    sgd = opt.SGD(0.1)
    inputs = Tensor(
        data=data,
        device=dev,
        requires_grad=False,
        stores_grad=False,
        name="input",
    )
    target = Tensor(
        data=label,
        device=dev,
        requires_grad=False,
        stores_grad=False,
        name="target",
    )

    for i in range(100):
        y = backend.run([inputs])[0]
        loss = autograd.softmax_cross_entropy(y, target)
        for p, gp in autograd.backward(loss):
            sgd.update(p, gp)
        loss_rate = tensor.to_numpy(loss)[0]
        accuracy_rate = accuracy(tensor.to_numpy(y), label)

        print("Iter {}, accurate={}, loss={}".format(i, accuracy_rate, loss_rate))
Esempio n. 2
0
def serve(agent, use_cpu, parameter_file, topk=5):
    if use_cpu:
        print('running with cpu')
        dev = device.get_default_device()
        layer.engine = 'singacpp'
    else:
        print("runing with gpu")
        dev = device.create_cuda_gpu()

    print('Start intialization............')
    net = create_net((3, 224, 224), parameter_file)
    net.to_device(dev)
    print('End intialization............')

    labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
    while True:
        key, val = agent.pull()
        if key is None:
            time.sleep(0.1)
            continue
        msg_type = MsgType.parse(key)
        if msg_type.is_request():
            try:
                response = ""
                img = imread(val['image'], mode='RGB').astype(np.float32)
                height,width = img.shape[:2]
                img[:, :, 0] -= 123.68
                img[:, :, 1] -= 116.779
                img[:, :, 2] -= 103.939
                img[:,:,[0,1,2]] = img[:,:,[2,1,0]]
                img = img.transpose((2, 0, 1))
                img = img[:, (height-224)//2:(height+224)//2,\
                          (width-224)//2:(width+224)//2]
                images = np.expand_dims(img, axis=0)

                x = tensor.from_numpy(images.astype(np.float32))
                x.to_device(dev)
                y = net.predict(x)
                prob = np.average(tensor.to_numpy(y), 0)
                # sort and reverse
                idx = np.argsort(-prob)[0:topk]
                for i in idx:
                    response += "%s:%s<br/>" % (labels[i], prob[i])
            except Exception:
                traceback.print_exc()
                response = "Sorry, system error during prediction."
            except SystemExit:
                traceback.print_exc()
                response = "Sorry, error triggered sys.exit() during prediction."
            agent.push(MsgType.kResponse, response)
        elif MsgType.kCommandStop.equal(msg_type):
                print('get stop command')
                agent.push(MsgType.kStatus, "success")
                break
        else:
            print('get unsupported message %s' % str(msg_type))
            agent.push(MsgType.kStatus, "Unknown command")
            break
        # while loop
    print("server stop")
Esempio n. 3
0
def train(data,
          net,
          max_epoch,
          get_lr,
          weight_decay,
          batch_size=100,
          use_cpu=False):
    print('Start intialization............')
    if use_cpu:
        print('Using CPU')
        dev = device.get_default_device()
    else:
        print('Using GPU')
        dev = device.create_cuda_gpu()

    net.to_device(dev)
    opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay)
    for (p, specs) in zip(net.param_names(), net.param_specs()):
        opt.register(p, specs)

    tx = tensor.Tensor((batch_size, 3, 32, 32), dev)
    ty = tensor.Tensor((batch_size, ), dev, core_pb2.kInt)
    train_x, train_y, test_x, test_y = data
    num_train_batch = train_x.shape[0] // batch_size
    num_test_batch = test_x.shape[0] // batch_size
    idx = np.arange(train_x.shape[0], dtype=np.int32)
    for epoch in range(max_epoch):
        np.random.shuffle(idx)
        loss, acc = 0.0, 0.0
        print('Epoch %d' % epoch)
        for b in range(num_train_batch):
            x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
            y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            grads, (l, a) = net.train(tx, ty)
            loss += l
            acc += a
            for (s, p, g) in zip(net.param_names(), net.param_values(), grads):
                opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s), b)
            # update progress bar
            utils.update_progress(b * 1.0 / num_train_batch,
                                  'training loss = %f, accuracy = %f' % (l, a))
        info = '\ntraining loss = %f, training accuracy = %f, lr = %f' \
            % ((loss / num_train_batch), (acc / num_train_batch), get_lr(epoch))
        print(info)

        loss, acc = 0.0, 0.0
        for b in range(num_test_batch):
            x = test_x[b * batch_size:(b + 1) * batch_size]
            y = test_y[b * batch_size:(b + 1) * batch_size]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            l, a = net.evaluate(tx, ty)
            loss += l
            acc += a

        print('test loss = %f, test accuracy = %f' % ((loss / num_test_batch),
                                                      (acc / num_test_batch)))
    net.save('model', 20)  # save model params into checkpoint file
Esempio n. 4
0
def transfer_learning(sg_ir,
                      x,
                      y,
                      epochs=1,
                      batch_size=64,
                      dev=device.get_default_device()):
    batch_number = x.shape[0] // batch_size

    trans_model = Trans(sg_ir, -1)

    for i in range(epochs):
        for b in range(batch_number):
            l_idx = b * batch_size
            r_idx = (b + 1) * batch_size

            x_batch = tensor.Tensor(device=dev, data=x[l_idx:r_idx])
            target_batch = tensor.Tensor(device=dev, data=y[l_idx:r_idx])
            output_batch = trans_model.forward(x_batch)

            loss = autograd.softmax_cross_entropy(output_batch, target_batch)
            accuracy_rate = accuracy(tensor.to_numpy(output_batch),
                                     tensor.to_numpy(target_batch))

            sgd = opt.SGD(lr=0.07)
            for p, gp in autograd.backward(loss):
                sgd.update(p, gp)
            sgd.step()

            if b % 1e2 == 0:
                print("acc %6.2f loss, %6.2f" %
                      (accuracy_rate, tensor.to_numpy(loss)[0]))
    print("transfer-learning completed")
    return trans_model
Esempio n. 5
0
def train(model,
          x,
          y,
          epochs=1,
          batch_size=64,
          dev=device.get_default_device()):
    batch_number = x.shape[0] // batch_size

    for i in range(epochs):
        for b in range(batch_number):
            l_idx = b * batch_size
            r_idx = (b + 1) * batch_size

            x_batch = tensor.Tensor(device=dev, data=x[l_idx:r_idx])
            target_batch = tensor.Tensor(device=dev, data=y[l_idx:r_idx])

            output_batch = model.forward(x_batch)
            # onnx_model = sonnx.to_onnx([x_batch], [y])
            # print('The model is:\n{}'.format(onnx_model))

            loss = autograd.softmax_cross_entropy(output_batch, target_batch)
            accuracy_rate = accuracy(tensor.to_numpy(output_batch),
                                     tensor.to_numpy(target_batch))

            sgd = opt.SGD(lr=0.001)
            for p, gp in autograd.backward(loss):
                sgd.update(p, gp)
            sgd.step()

            if b % 1e2 == 0:
                print("acc %6.2f loss, %6.2f" %
                      (accuracy_rate, tensor.to_numpy(loss)[0]))
    print("training completed")
    return x_batch, output_batch
Esempio n. 6
0
def serve(agent, use_cpu, parameter_file, topk=5):
    if use_cpu:
        print('running with cpu')
        dev = device.get_default_device()
        layer.engine = 'singacpp'
    else:
        print("runing with gpu")
        dev = device.create_cuda_gpu()
    agent = agent

    print('Start intialization............')
    net = create_net((3, 224, 224), parameter_file)
    net.to_device(dev)
    print('End intialization............')

    labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
    while True:
        key, val = agent.pull()
        if key is None:
            time.sleep(0.1)
            continue
        msg_type = MsgType.parse(key)
        if msg_type.is_request():
            try:
                response = ""
                img = imread(val['image'], mode='RGB').astype(np.float32)
                height,width = img.shape[:2]
                img[:, :, 0] -= 123.68
                img[:, :, 1] -= 116.779
                img[:, :, 2] -= 103.939
                img[:,:,[0,1,2]] = img[:,:,[2,1,0]]
                img = img.transpose((2, 0, 1))
                img = img[:, (height-224)//2:(height+224)//2,\
                          (width-224)//2:(width+224)//2]
                images = np.expand_dims(img, axis=0)

                x = tensor.from_numpy(images.astype(np.float32))
                x.to_device(dev)
                y = net.predict(x)
                prob = np.average(tensor.to_numpy(y), 0)
                # sort and reverse
                idx = np.argsort(-prob)[0:topk]
                for i in idx:
                    response += "%s:%s<br/>" % (labels[i], prob[i])
            except:
                traceback.print_exc()
                response = "Sorry, system error during prediction."
            agent.push(MsgType.kResponse, response)
        elif MsgType.kCommandStop.equal(msg_type):
                print('get stop command')
                agent.push(MsgType.kStatus, "success")
                break
        else:
            print('get unsupported message %s' % str(msg_type))
            agent.push(MsgType.kStatus, "Unknown command")
            break
        # while loop
    print("server stop")
Esempio n. 7
0
    def __init__(self, length=20, **knobs):
        super().__init__(**knobs)
        # onnx model url
        self.model_url = 'https://github.com/onnx/models/raw/master/text/machine_comprehension/gpt-2/model/gpt2-lm-head-10.tar.gz'

        # model path in the downloaded tar file
        self.model_path = 'GPT-2-LM-HEAD/model.onnx'
        self.dev = device.get_default_device()
        self.length = length
        self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
Esempio n. 8
0
def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
          use_cpu=False):
    print('Start intialization............')
    if use_cpu:
        print('Using CPU')
        dev = device.get_default_device()
    else:
        print('Using GPU')
        dev = device.create_cuda_gpu()

    net.to_device(dev)
    opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay)
    for (p, specs) in zip(net.param_names(), net.param_specs()):
        opt.register(p, specs)

    tx = tensor.Tensor((batch_size, 3, 32, 32), dev)
    ty = tensor.Tensor((batch_size,), dev, tensor.int32)
    train_x, train_y, test_x, test_y = data
    num_train_batch = train_x.shape[0] // batch_size
    num_test_batch = test_x.shape[0] // batch_size
    idx = np.arange(train_x.shape[0], dtype=np.int32)
    for epoch in range(max_epoch):
        np.random.shuffle(idx)
        loss, acc = 0.0, 0.0
        print('Epoch %d' % epoch)
        for b in range(num_train_batch):
            x = train_x[idx[b * batch_size: (b + 1) * batch_size]]
            y = train_y[idx[b * batch_size: (b + 1) * batch_size]]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            grads, (l, a) = net.train(tx, ty)
            loss += l
            acc += a
            for (s, p, g) in zip(net.param_names(), net.param_values(), grads):
                opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s), b)
            # update progress bar
            utils.update_progress(b * 1.0 / num_train_batch,
                                  'training loss = %f, accuracy = %f' % (l, a))
        info = '\ntraining loss = %f, training accuracy = %f, lr = %f' \
            % ((loss / num_train_batch), (acc / num_train_batch), get_lr(epoch))
        print(info)

        loss, acc = 0.0, 0.0
        for b in range(num_test_batch):
            x = test_x[b * batch_size: (b + 1) * batch_size]
            y = test_y[b * batch_size: (b + 1) * batch_size]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            l, a = net.evaluate(tx, ty)
            loss += l
            acc += a

        print('test loss = %f, test accuracy = %f' %
              ((loss / num_test_batch), (acc / num_test_batch)))
    net.save('model', 20)  # save model params into checkpoint file
Esempio n. 9
0
def main():
    try:
        # Setup argument parser
        parser = ArgumentParser(description="VGG inference")

        parser.add_argument("--port", default=9999, help="listen port")
        parser.add_argument("--use_cpu",
                            action="store_true",
                            help="If set, load models onto CPU devices")
        parser.add_argument("--parameter_file", default="")
        parser.add_argument("--depth",
                            type=int,
                            choices=[11, 13, 16, 19],
                            default='11')
        parser.add_argument("--batchnorm",
                            action='store_true',
                            help='use batchnorm or not')

        # Process arguments
        args = parser.parse_args()
        port = args.port

        # start to train
        agent = Agent(port)

        net = model.create_net(args.depth, 1000, args.batchnorm, args.use_cpu)
        if args.use_cpu:
            print('Using CPU')
            dev = device.get_default_device()
        else:
            print('Using GPU')
            dev = device.create_cuda_gpu()
            net.to_device(dev)
        model.init_params(net, args.parameter_file)
        print('Finish loading models')

        labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
        serve(net, labels, dev, agent)

        # acc = evaluate(net, '../val_list.txt',  'image/val', dev)
        # print acc

        # wait the agent finish handling http request
        agent.stop()
    except SystemExit:
        return
    except:
        traceback.print_exc()
        sys.stderr.write("  for help use --help \n\n")
        return 2
Esempio n. 10
0
def main():
    try:
        # Setup argument parser
        parser = ArgumentParser(description="Wide residual network")

        parser.add_argument("--port", default=9999, help="listen port")
        parser.add_argument("--use_cpu",
                            action="store_true",
                            help="If set, load models onto CPU devices")
        parser.add_argument("--parameter_file", default="wrn-50-2.pickle")
        parser.add_argument("--model",
                            choices=['resnet', 'wrn', 'preact', 'addbn'],
                            default='wrn')
        parser.add_argument("--depth",
                            type=int,
                            choices=[18, 34, 50, 101, 152, 200],
                            default='50')

        # Process arguments
        args = parser.parse_args()
        port = args.port

        # start to train
        agent = Agent(port)

        net = model.create_net(args.model, args.depth, args.use_cpu)
        if args.use_cpu:
            print('Using CPU')
            dev = device.get_default_device()
        else:
            print('Using GPU')
            dev = device.create_cuda_gpu()
            net.to_device(dev)
        model.init_params(net, args.parameter_file)
        print('Finish loading models')

        labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
        serve(net, labels, dev, agent)

        # acc = evaluate(net, '../val_list.txt',  'image/val', dev)
        # print acc

        # wait the agent finish handling http request
        agent.stop()
    except SystemExit:
        return
    except:
        traceback.print_exc()
        sys.stderr.write("  for help use --help \n\n")
        return 2
def main():
    '''Command line options'''
    try:
        # Setup argument parser
        parser = ArgumentParser(description="Train CNN Readmission Model")
        parser.add_argument('-inputfolder', type=str, help='inputfolder')
        parser.add_argument('-outputfolder', type=str, help='outputfolder')
        parser.add_argument('-visfolder', type=str, help='visfolder')
        parser.add_argument('-trainratio',
                            type=float,
                            help='ratio of train samples')
        parser.add_argument('-validationratio',
                            type=float,
                            help='ratio of validation samples')
        parser.add_argument('-testratio',
                            type=float,
                            help='ratio of test samples')
        parser.add_argument('-p',
                            '--port',
                            default=9989,
                            help='listening port')
        parser.add_argument('-C', '--use_cpu', action="store_true")
        parser.add_argument('--max_epoch', default=100)

        # Process arguments
        args = parser.parse_args()
        port = args.port

        use_cpu = args.use_cpu
        if use_cpu:
            print("runing with cpu")
            dev = device.get_default_device()
        else:
            print("runing with gpu")
            dev = device.create_cuda_gpu()

        # start to train
        agent = Agent(port)
        train(args.inputfolder, args.outputfolder, args.visfolder,
              args.trainratio, args.validationratio, args.testratio, dev,
              agent, args.max_epoch, use_cpu)
        # wait the agent finish handling http request
        agent.stop()
    except SystemExit:
        return
    except:
        # p.terminate()
        traceback.print_exc()
        sys.stderr.write("  for help use --help \n\n")
Esempio n. 12
0
def test(model, x, y, batch_size=64, dev=device.get_default_device()):
    batch_number = x.shape[0] // batch_size

    result = 0
    for b in range(batch_number):
        l_idx = b * batch_size
        r_idx = (b + 1) * batch_size

        x_batch = tensor.Tensor(device=dev, data=x[l_idx:r_idx])
        target_batch = tensor.Tensor(device=dev, data=y[l_idx:r_idx])

        output_batch = model.forward(x_batch)
        result += accuracy(tensor.to_numpy(output_batch),
                           tensor.to_numpy(target_batch))

    print("testing acc %6.2f" % (result / batch_number))
Esempio n. 13
0
def main():
    try:
        # Setup argument parser
        parser = ArgumentParser(description='DenseNet inference')

        parser.add_argument("--port", default=9999, help="listen port")
        parser.add_argument("--use_cpu",
                            action="store_true",
                            help="If set, load models onto CPU devices")
        parser.add_argument("--parameter_file", default="densenet-121.pickle")
        parser.add_argument("--depth",
                            type=int,
                            choices=[121, 169, 201, 161],
                            default=121)

        parser.add_argument('--nb_classes', default=1000, type=int)

        # Process arguments
        args = parser.parse_args()
        port = args.port

        # start to train
        agent = Agent(port)

        net = model.create_net(args.depth, args.nb_classes, 0, args.use_cpu)
        if args.use_cpu:
            print('Using CPU')
            dev = device.get_default_device()
        else:
            print('Using GPU')
            dev = device.create_cuda_gpu()
            net.to_device(dev)
        print('start to load parameter_file')
        model.init_params(net, args.parameter_file)
        print('Finish loading models')

        labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
        serve(net, labels, dev, agent)
        # wait the agent finish handling http request
        agent.stop()

    except SystemExit:
        return
    except:
        traceback.print_exc()
        sys.stderr.write("  for help use --help \n\n")
        return 2
Esempio n. 14
0
def main():
    try:
        # Setup argument parser
        parser = ArgumentParser(description="Wide residual network")

        parser.add_argument("--port", default=9999, help="listen port")
        parser.add_argument("--use_cpu", action="store_true",
                            help="If set, load models onto CPU devices")
        parser.add_argument("--parameter_file", default="wrn-50-2.pickle")
        parser.add_argument("--model", choices=['resnet', 'wrn', 'preact',
                                                'addbn'], default='wrn')
        parser.add_argument("--depth", type=int, choices=[18, 34, 50, 101,
                                                          152, 200],
                            default='50')

        # Process arguments
        args = parser.parse_args()
        port = args.port

        # start to train
        agent = Agent(port)

        net = model.create_net(args.model, args.depth, args.use_cpu)
        if args.use_cpu:
            print('Using CPU')
            dev = device.get_default_device()
        else:
            print('Using GPU')
            dev = device.create_cuda_gpu()
            net.to_device(dev)
        model.init_params(net, args.parameter_file)
        print('Finish loading models')

        labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
        serve(net, labels, dev, agent)

        # acc = evaluate(net, '../val_list.txt',  'image/val', dev)
        # print acc

        # wait the agent finish handling http request
        agent.stop()
    except SystemExit:
        return
    except:
        traceback.print_exc()
        sys.stderr.write("  for help use --help \n\n")
        return 2
Esempio n. 15
0
def main():
    try:
        # Setup argument parser
        parser = ArgumentParser(description="VGG inference")

        parser.add_argument("--port", default=9999, help="listen port")
        parser.add_argument("--use_cpu", action="store_true",
                            help="If set, load models onto CPU devices")
        parser.add_argument("--parameter_file", default="")
        parser.add_argument("--depth", type=int, choices=[11, 13, 16, 19],
                            default='11')
        parser.add_argument("--batchnorm", action='store_true',
                            help='use batchnorm or not')

        # Process arguments
        args = parser.parse_args()
        port = args.port

        # start to train
        agent = Agent(port)

        net = model.create_net(args.depth, 1000, args.batchnorm, args.use_cpu)
        if args.use_cpu:
            print('Using CPU')
            dev = device.get_default_device()
        else:
            print('Using GPU')
            dev = device.create_cuda_gpu()
            net.to_device(dev)
        model.init_params(net, args.parameter_file)
        print('Finish loading models')

        labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
        serve(net, labels, dev, agent)

        # acc = evaluate(net, '../val_list.txt',  'image/val', dev)
        # print acc

        # wait the agent finish handling http request
        agent.stop()
    except SystemExit:
        return
    except:
        traceback.print_exc()
        sys.stderr.write("  for help use --help \n\n")
        return 2
Esempio n. 16
0
def singa_to_onnx(niter, use_cpu=False):
    if use_cpu:
        print("Using CPU")
        dev = device.get_default_device()
    else:
        print("Using GPU")
        dev = device.create_cuda_gpu()
    inputs = Tensor(
        data=data,
        device=dev,
        requires_grad=False,
        stores_grad=False,
        name="input",
    )
    target = Tensor(
        data=label,
        device=dev,
        requires_grad=False,
        stores_grad=False,
        name="target",
    )

    w0 = Tensor(shape=(2, 3), device=dev, requires_grad=True, stores_grad=True)
    w0.gaussian(0.0, 0.1)
    b0 = Tensor(shape=(3,), device=dev, requires_grad=True, stores_grad=True)
    b0.set_value(0.0)

    w1 = Tensor(shape=(3, 2), device=dev, requires_grad=True, stores_grad=True)
    w1.gaussian(0.0, 0.1)
    b1 = Tensor(shape=(2,), device=dev, requires_grad=True, stores_grad=True)
    b1.set_value(0.0)

    sgd = opt.SGD(0.1)
    # training process
    for i in range(100):
        x = autograd.matmul(inputs, w0)
        x = autograd.add_bias(x, b0)
        x = autograd.relu(x)
        x = autograd.matmul(x, w1)
        x = autograd.add_bias(x, b1)
        loss = autograd.softmax_cross_entropy(x, target)
        for p, gp in autograd.backward(loss):
            sgd.update(p, gp)

        print("training loss = ", tensor.to_numpy(loss)[0])
    sonnx.export([inputs], [x], file_path="mlp.onnx")
Esempio n. 17
0
def train():
    """Start the training procedure 
    """
    num_epochs = 1
    learning_rate = 0.05
    batch_size = 8

    data_loader = DataLoader(os.path.join("data", "fetal_health.csv"))
    data_loader.standardize_column("baseline value")
    x_train, y_train = data_loader.load_data(subset="train")
    x_valid, y_valid = data_loader.load_data(subset="valid")

    num_classes = len(np.unique(y_train))
    num_samples, num_features = x_train.shape

    assert x_train.shape[1] == x_valid.shape[
        1], "Number of features should be equal!"
    assert x_train.shape[0] == y_train.shape[
        0], "Number of training samples should be equal!"
    assert x_valid.shape[0] == y_valid.shape[
        0], "Number of validation samples should be equal!"

    dev = get_default_device()
    tx = tensor.Tensor((num_samples, num_features), dev, tensor.float32)
    ty = tensor.Tensor((num_samples, ), dev, tensor.int32)

    sgd = opt.SGD(learning_rate)
    model = create_MLP_model(perceptron_size=10, num_classes=num_classes)
    model.set_optimizer(sgd)
    model.compile([tx], is_train=True, use_graph=True, sequential=False)
    model.train()

    for i in range(num_epochs):
        tx.copy_from_numpy(x_train.astype(np.float32))
        ty.copy_from_numpy(y_train.astype(np.int32))
        out, loss = model(tx, ty, 'fp32', spars=None)

        # TODO: Add metric evaluation on validation data
        if i % 10 == 0:
            print("training loss = {:.3f}".format(tensor.to_numpy(loss)[0]))
Esempio n. 18
0
def common(use_cpu):
    file_path = "mnist.npz"
    assert os.path.exists(
        file_path
    ), "Pls download the MNIST dataset from https://s3.amazonaws.com/img-datasets/mnist.npz"
    if use_cpu:
        print("Using CPU")
        dev = device.get_default_device()
    else:
        print("Using GPU")
        dev = device.create_cuda_gpu()

    train, test = load_data(file_path)
    print(train[0].shape)
    x_train = preprocess(train[0])
    y_train = to_categorical(train[1], 10)

    x_test = preprocess(test[0])
    y_test = to_categorical(test[1], 10)
    print("the shape of training data is", x_train.shape)
    print("the shape of training label is", y_train.shape)
    print("the shape of testing data is", x_test.shape)
    print("the shape of testing label is", y_test.shape)
    return (x_train, y_train), (x_test, y_test), dev
Esempio n. 19
0
import unittest
from builtins import str

from singa import tensor
from singa import singa_wrap as singa
from singa import device
from singa import autograd

autograd.training = True

CTensor = singa.Tensor

gpu_dev = device.create_cuda_gpu()
cpu_dev = device.get_default_device()

dy = CTensor([2, 1, 2, 2])
singa.Gaussian(0.0, 1.0, dy)


def _tuple_to_string(t):
    lt = [str(x) for x in t]
    return '(' + ', '.join(lt) + ')'


class TestPythonOperation(unittest.TestCase):

    def check_shape(self, actual, expect):
        self.assertEqual(actual, expect, 'shape mismatch, actual shape is %s'
                         ' exepcted is %s' % (_tuple_to_string(actual),
                                              _tuple_to_string(expect))
                         )
Esempio n. 20
0
def train(data_file, use_gpu, num_epoch=10, batch_size=100):
    print 'Start intialization............'
    lr = 0.1   # Learning rate
    weight_decay  = 0.0002
    hdim = 1000
    vdim = 784
    opt = optimizer.SGD(momentum=0.8, weight_decay=weight_decay)

    tweight = tensor.Tensor((vdim, hdim))
    tweight.gaussian(0.0, 0.1)
    tvbias = tensor.from_numpy(np.zeros(vdim, dtype = np.float32))
    thbias = tensor.from_numpy(np.zeros(hdim, dtype = np.float32))
    opt = optimizer.SGD(momentum=0.5, weight_decay=weight_decay)

    print 'Loading data ..................'
    train_x, valid_x = load_train_data(data_file)

    if use_gpu:
        dev = device.create_cuda_gpu()
    else:
        dev = device.get_default_device()

    for t in [tweight, tvbias, thbias]:
        t.to_device(dev)

    num_train_batch = train_x.shape[0] / batch_size
    print "num_train_batch = %d " % (num_train_batch)
    for epoch in range(num_epoch):
        trainerrorsum = 0.0
        print 'Epoch %d' % epoch
        for b in range(num_train_batch):
            # positive phase
            tdata = tensor.from_numpy(
                    train_x[(b * batch_size):((b + 1) * batch_size), : ])
            tdata.to_device(dev)
            tposhidprob = tensor.mult(tdata, tweight)
            tposhidprob.add_row(thbias)
            tposhidprob = tensor.sigmoid(tposhidprob)
            tposhidrandom = tensor.Tensor(tposhidprob.shape, dev)
            tposhidrandom.uniform(0.0, 1.0)
            tposhidsample = tensor.gt(tposhidprob, tposhidrandom)

            # negative phase
            tnegdata = tensor.mult(tposhidsample, tweight.T())
            tnegdata.add_row(tvbias)
            tnegdata = tensor.sigmoid(tnegdata)

            tneghidprob = tensor.mult(tnegdata, tweight)
            tneghidprob.add_row(thbias)
            tneghidprob = tensor.sigmoid(tneghidprob)
            error = tensor.sum(tensor.square((tdata - tnegdata)))
            trainerrorsum = error + trainerrorsum

            tgweight = tensor.mult(tnegdata.T(), tneghidprob) -\
                    tensor.mult(tdata.T(), tposhidprob)
            tgvbias = tensor.sum(tnegdata, 0) - tensor.sum(tdata, 0)
            tghbias = tensor.sum(tneghidprob, 0) - tensor.sum(tposhidprob, 0)

            opt.apply_with_lr(epoch, lr / batch_size, tgweight, tweight, 'w')
            opt.apply_with_lr(epoch, lr / batch_size, tgvbias, tvbias, 'vb')
            opt.apply_with_lr(epoch, lr / batch_size, tghbias, thbias, 'hb')

        print 'training errorsum = %f' % (trainerrorsum)

        tvaliddata = tensor.from_numpy(valid_x)
        tvaliddata.to_device(dev)
        tvalidposhidprob = tensor.mult(tvaliddata, tweight)
        tvalidposhidprob.add_row(thbias)
        tvalidposhidprob = tensor.sigmoid(tvalidposhidprob)
        tvalidposhidrandom = tensor.Tensor(tvalidposhidprob.shape, dev)
        initializer.uniform(tvalidposhidrandom, 0.0, 1.0)
        tvalidposhidsample = tensor.gt(tvalidposhidprob, tvalidposhidrandom)

        tvalidnegdata = tensor.mult(tvalidposhidsample, tweight.T())
        tvalidnegdata.add_row(tvbias)
        tvalidnegdata = tensor.sigmoid(tvalidnegdata)

        validerrorsum = tensor.sum(tensor.square((tvaliddata - tvalidnegdata)))
        print 'valid errorsum = %f' % (validerrorsum)
Esempio n. 21
0
def train(data_file, use_gpu, num_epoch=10, batch_size=100):
    print('Start intialization............')
    lr = 0.1  # Learning rate
    weight_decay = 0.0002
    hdim = 1000
    vdim = 784

    tweight = tensor.Tensor((vdim, hdim))
    tweight.gaussian(0.0, 0.1)
    tvbias = tensor.from_numpy(np.zeros(vdim, dtype=np.float32))
    thbias = tensor.from_numpy(np.zeros(hdim, dtype=np.float32))
    opt = optimizer.SGD(momentum=0.5, weight_decay=weight_decay)

    print('Loading data ..................')
    train_x, valid_x = load_train_data(data_file)

    if use_gpu:
        dev = device.create_cuda_gpu()
    else:
        dev = device.get_default_device()

    for t in [tweight, tvbias, thbias]:
        t.to_device(dev)

    num_train_batch = train_x.shape[0] // batch_size
    print("num_train_batch = %d " % (num_train_batch))
    for epoch in range(num_epoch):
        trainerrorsum = 0.0
        print('Epoch %d' % epoch)
        for b in range(num_train_batch):
            # positive phase
            tdata = tensor.from_numpy(
                train_x[(b * batch_size):((b + 1) * batch_size), :])
            tdata.to_device(dev)
            tposhidprob = tensor.mult(tdata, tweight)
            tposhidprob = tposhidprob + thbias
            tposhidprob = tensor.sigmoid(tposhidprob)
            tposhidrandom = tensor.Tensor(tposhidprob.shape, dev)
            tposhidrandom.uniform(0.0, 1.0)
            tposhidsample = tensor.gt(tposhidprob, tposhidrandom)

            # negative phase
            tnegdata = tensor.mult(tposhidsample, tweight.T())
            tnegdata = tnegdata + tvbias
            tnegdata = tensor.sigmoid(tnegdata)

            tneghidprob = tensor.mult(tnegdata, tweight)
            tneghidprob = tneghidprob + thbias
            tneghidprob = tensor.sigmoid(tneghidprob)
            error = tensor.sum(tensor.square((tdata - tnegdata)))
            trainerrorsum = error + trainerrorsum

            tgweight = tensor.mult(tnegdata.T(), tneghidprob) \
                - tensor.mult(tdata.T(), tposhidprob)
            tgvbias = tensor.sum(tnegdata, 0) - tensor.sum(tdata, 0)
            tghbias = tensor.sum(tneghidprob, 0) - tensor.sum(tposhidprob, 0)

            opt.apply_with_lr(epoch, lr / batch_size, tgweight, tweight, 'w')
            opt.apply_with_lr(epoch, lr / batch_size, tgvbias, tvbias, 'vb')
            opt.apply_with_lr(epoch, lr / batch_size, tghbias, thbias, 'hb')

        print('training erroraverage = %f' %
              (tensor.to_numpy(trainerrorsum) / train_x.shape[0]))

        tvaliddata = tensor.from_numpy(valid_x)
        tvaliddata.to_device(dev)
        tvalidposhidprob = tensor.mult(tvaliddata, tweight)
        tvalidposhidprob = tvalidposhidprob + thbias
        tvalidposhidprob = tensor.sigmoid(tvalidposhidprob)
        tvalidposhidrandom = tensor.Tensor(tvalidposhidprob.shape, dev)
        initializer.uniform(tvalidposhidrandom, 0.0, 1.0)
        tvalidposhidsample = tensor.gt(tvalidposhidprob, tvalidposhidrandom)

        tvalidnegdata = tensor.mult(tvalidposhidsample, tweight.T())
        tvalidnegdata = tvalidnegdata + tvbias
        tvalidnegdata = tensor.sigmoid(tvalidnegdata)

        validerrorsum = tensor.sum(tensor.square((tvaliddata - tvalidnegdata)))
        print('valid erroraverage = %f' %
              (tensor.to_numpy(validerrorsum) / valid_x.shape[0]))
Esempio n. 22
0
def serve(agent, use_cpu, parameter_file, topk=5):
    if use_cpu:
        print('running with cpu')
        dev = device.get_default_device()
        layer.engine = 'singacpp'
    else:
        print("runing with gpu")
        dev = device.create_cuda_gpu()
    agent = agent

    print('Start intialization............')
    net, _ = model.create_net(is_training=False)
    net.load(parameter_file, use_pickle=True)
    net.to_device(dev)
    print('End intialization............')

    labels = np.loadtxt('synset_words.txt', str, delimiter='\t').tolist()
    labels.insert(0, 'empty background')
    while True:
        key, val = agent.pull()
        if key is None:
            time.sleep(0.1)
            continue
        msg_type = MsgType.parse(key)
        if msg_type.is_request():
            try:
                response = ""
                ratio = 0.875
                img = image_tool.load_img(val['image'])
                height, width = img.size[0], img.size[1]
                print(img.size)
                crop_h, crop_w = int(height * ratio), int(width * ratio)
                img = np.array(
                    image_tool.crop(img, (crop_h, crop_w), 'center').resize(
                        (299, 299))).astype(np.float32) / float(255)
                img -= 0.5
                img *= 2
                # img[:,:,[0,1,2]] = img[:,:,[2,1,0]]
                img = img.transpose((2, 0, 1))
                images = np.expand_dims(img, axis=0)
                x = tensor.from_numpy(images.astype(np.float32))
                x.to_device(dev)
                y = net.predict(x)
                prob = np.average(tensor.to_numpy(y), 0)
                # sort and reverse
                idx = np.argsort(-prob)[0:topk]
                for i in idx:
                    response += "%s:%s<br/>" % (labels[i], prob[i])
            except:
                traceback.print_exc()
                response = "Sorry, system error during prediction."
            agent.push(MsgType.kResponse, response)
        elif MsgType.kCommandStop.equal(msg_type):
            print('get stop command')
            agent.push(MsgType.kStatus, "success")
            break
        else:
            print('get unsupported message %s' % str(msg_type))
            agent.push(MsgType.kStatus, "Unknown command")
            break
        # while loop
    print("server stop")
Esempio n. 23
0
def train(lr,
          ssfolder,
          meta_train,
          meta_test,
          data,
          net,
          mean,
          max_epoch,
          get_lr,
          weight_decay,
          input_shape,
          batch_size=100,
          use_cpu=False):

    print 'Start intialization............'
    if use_cpu:
        print 'Using CPU'
        dev = device.get_default_device()
    else:
        print 'Using GPU'
        dev = device.create_cuda_gpu()

    net.to_device(dev)
    opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay)
    for (p, specs) in zip(net.param_names(), net.param_specs()):
        opt.register(p, specs)

    dl_train = dt.MImageBatchIter(meta_train,
                                  batch_size,
                                  dt.load_from_img,
                                  shuffle=True,
                                  delimiter=' ',
                                  image_folder=data,
                                  capacity=10)
    dl_train.start()
    dl_test = dt.MImageBatchIter(meta_test,
                                 batch_size,
                                 dt.load_from_img,
                                 shuffle=False,
                                 delimiter=' ',
                                 image_folder=data,
                                 capacity=10)
    dl_test.start()
    num_train = dl_train.num_samples
    num_train_batch = num_train / batch_size
    num_test = dl_test.num_samples
    num_test_batch = num_test / batch_size
    remainder = num_test % batch_size

    best_acc = 0.0
    best_loss = 0.0
    nb_epoch_for_best_acc = 0
    tx = tensor.Tensor((batch_size, ) + input_shape, dev)
    ty = tensor.Tensor((batch_size, ), dev, core_pb2.kInt)
    for epoch in range(max_epoch):
        loss, acc = 0.0, 0.0
        print 'Epoch %d' % epoch
        for b in range(num_train_batch):
            t1 = time.time()
            x, y = dl_train.next()
            #print 'x.norm: ', np.linalg.norm(x)
            x -= mean
            t2 = time.time()
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            #print 'copy tx ty ok'
            grads, (l, a) = net.train(tx, ty)
            loss += l
            acc += a
            for (s, p, g) in zip(net.param_names(), net.param_values(), grads):
                opt.apply_with_lr(epoch, lr, g, p, str(s), b)
            t3 = time.time()
            # update progress bar
            info = datetime.datetime.now().strftime('%b-%d-%y %H:%M:%S') \
            + ', batch %d: training loss = %f, accuracy = %f, load_time = %.4f, training_time = %.4f' % (b, l, a, t2-t1, t3-t2)
            print info
            #utils.update_progress(b * 1.0 / num_train_batch, info)

        disp = datetime.datetime.now().strftime('%b-%d-%y %H:%M:%S') \
        + ', epoch %d: training loss = %f, training accuracy = %f, lr = %f' \
            % (epoch, loss / num_train_batch, acc / num_train_batch, lr)
        logging.info(disp)
        print disp

        if epoch % 50 == 0 and epoch > 0:
            try:
                net.save(os.path.join(ssfolder, 'model-%d' % epoch),
                         buffer_size=200)
            except Exception as e:
                print e
                net.save(os.path.join(ssfolder, 'model-%d' % epoch),
                         buffer_size=300)
            sinfo = datetime.datetime.now().strftime('%b-%d-%y %H:%M:%S') \
            + ', epoch %d: save model in %s' % (epoch, os.path.join(ssfolder, 'model-%d.bin' % epoch))
            logging.info(sinfo)
            print sinfo

        loss, acc = 0.0, 0.0
        #dominator = num_test_batch
        #print 'num_test_batch: ', num_test_batch
        for b in range(num_test_batch):
            x, y = dl_test.next()
            x -= mean
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            l, a = net.evaluate(tx, ty)
            loss += l * batch_size
            acc += a * batch_size
            #print datetime.datetime.now().strftime('%b-%d-%y %H:%M:%S') \
            #+ ' batch %d, test loss = %f, test accuracy = %f' % (b, l, a)

        if remainder > 0:
            #print 'remainder: ', remainder
            x, y = dl_test.next()
            x -= mean
            tx_rmd = tensor.Tensor((remainder, ) + input_shape, dev)
            ty_rmd = tensor.Tensor((remainder, ), dev, core_pb2.kInt)
            tx_rmd.copy_from_numpy(x[0:remainder, :, :])
            ty_rmd.copy_from_numpy(y[0:remainder, ])
            l, a = net.evaluate(tx_rmd, ty_rmd)
            loss += l * remainder
            acc += a * remainder
            #dominator += 1
            #print datetime.datetime.now().strftime('%b-%d-%y %H:%M:%S') \
            #+ ' test loss = %f, test accuracy = %f' % (l, a)
        acc /= num_test
        loss /= num_test
        disp = datetime.datetime.now().strftime('%b-%d-%y %H:%M:%S') \
        + ', epoch %d: test loss = %f, test accuracy = %f' % (epoch, loss, acc)
        logging.info(disp)
        print disp

        if acc > best_acc + 0.005:
            best_acc = acc
            best_loss = loss
            nb_epoch_for_best_acc = 0
        else:
            nb_epoch_for_best_acc += 1
            if nb_epoch_for_best_acc > 8:
                break
            elif nb_epoch_for_best_acc % 4 == 0:
                lr /= 10
                logging.info("Decay the learning rate from %f to %f" %
                             (lr * 10, lr))

    try:
        net.save(str(os.path.join(ssfolder, 'model')), buffer_size=200)
    except Exception as e:
        net.save(str(os.path.join(ssfolder, 'model')), buffer_size=300)
    sinfo = datetime.datetime.now().strftime('%b-%d-%y %H:%M:%S') \
    + ', save final model in %s' % os.path.join(ssfolder, 'model.bin')
    logging.info(sinfo)
    print sinfo

    dl_train.end()
    dl_test.end()
    return (best_acc, best_loss)
Esempio n. 24
0
    def train(self, data, max_epoch, model_path='model'):
        if self.use_cpu:
            print 'Using CPU'
            self.dev = device.get_default_device()
        else:
            print 'Using GPU'
            self.dev = device.create_cuda_gpu()

        self.net.to_device(self.dev)
        opt = optimizer.SGD(momentum=0.9, weight_decay=1e-4)
        # opt = optimizer.RMSProp(constraint=optimizer.L2Constraint(5))
        for (p, n) in zip(self.net.param_values(), self.net.param_names()):
            if 'var' in n:
                p.set_value(1.0)
            elif 'gamma' in n:
                p.uniform(0, 1)
            elif 'weight' in n:
                p.gaussian(0, 0.01)
            else:
                p.set_value(0.0)
            print n, p.shape, p.l1()

        tx = tensor.Tensor((self.batch_size, self.maxlen, self.vocab_size),
                           self.dev)
        ty = tensor.Tensor((self.batch_size, ), self.dev, core_pb2.kInt)
        train_x, train_y, test_x, test_y = data
        num_train_batch = train_x.shape[0] / self.batch_size
        num_test_batch = test_x.shape[0] / self.batch_size
        idx = np.arange(train_x.shape[0], dtype=np.int32)
        for epoch in range(max_epoch):
            np.random.shuffle(idx)
            loss, acc = 0.0, 0.0
            print '\nEpoch %d' % epoch
            start = time()
            for b in range(num_train_batch):
                batch_loss, batch_acc = 0.0, 0.0
                grads = []
                x = train_x[
                    idx[b * self.batch_size:(b + 1) *
                        self.batch_size]]  # x.shape = (batch_size, maxlen)
                y = train_y[idx[b * self.batch_size:(b + 1) *
                                self.batch_size]]  # y.shape = (batch_size,)
                # for input as (batch_size, max_len, vocab_size)
                sam_arrs = convert_samples(x, x.shape[1], self.vocab_size,
                                           self.dev)
                tx.copy_from_numpy(sam_arrs)
                ty.copy_from_numpy(np.array(y, dtype='int32'))
                grads, (batch_loss, batch_acc) = self.net.train(tx, ty)
                for (s, p, g) in zip(self.net.param_names(),
                                     self.net.param_values(), grads):
                    opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s), b)
                # update progress bar
                utils.update_progress(
                    b * 1.0 / num_train_batch,
                    'training loss = %f, accuracy = %f' %
                    (batch_loss, batch_acc))
                loss += batch_loss
                acc += batch_acc

            print "\ntraining time = ", time() - start
            info = 'training loss = %f, training accuracy = %f, lr = %f' \
                   % (loss / num_train_batch, acc / num_train_batch, get_lr(epoch))
            print info

            loss, acc = 0.0, 0.0
            start = time()
            for b in range(num_test_batch):
                batch_loss, batch_acc = 0.0, 0.0
                x = test_x[b * self.batch_size:(b + 1) *
                           self.batch_size]  # x.shape = (batch_size, maxlen)
                y = test_y[b * self.batch_size:(b + 1) * self.batch_size]
                sam_arrs = convert_samples(x, x.shape[1], self.vocab_size,
                                           self.dev)
                tx.copy_from_numpy(sam_arrs)
                ty.copy_from_numpy(np.array(y, dtype='int32'))
                grads, (batch_loss, batch_acc) = self.net.train(tx, ty)
                loss += batch_loss
                acc += batch_acc

            print "evaluation time = ", time() - start
            print 'test loss = %f, test accuracy = %f \n' \
                  % (loss / num_test_batch, acc / num_test_batch)

            if (epoch % 2) == 1 or epoch + 1 == max_epoch:
                # checkpoint the file model
                with open('%s_%d.bin' % (model_path, epoch), 'wb') as fd:
                    print 'saving model to %s_%d.bin' % (model_path, epoch)
                    d = {}
                    for name, w in zip(self.net.param_names(),
                                       self.net.param_values()):
                        w.to_host()
                        d[name] = tensor.to_numpy(w)
                        w.to_device(self.dev)
                    pickle.dump(d, fd)
Esempio n. 25
0
def train_mnist_cnn(DIST=False,
                    local_rank=None,
                    world_size=None,
                    nccl_id=None,
                    spars=0,
                    topK=False,
                    corr=True):

    # Define the hypermeters good for the mnist_cnn
    max_epoch = 10
    batch_size = 128
    sgd = opt.SGD(lr=0.005, momentum=0.9, weight_decay=1e-5)

    # Prepare training and valadiation data
    train_x, train_y, test_x, test_y = load_dataset()
    IMG_SIZE = 28
    num_classes = 10
    train_y = to_categorical(train_y, num_classes)
    test_y = to_categorical(test_y, num_classes)

    # Normalization
    train_x = train_x / 255
    test_x = test_x / 255

    if DIST:
        # For Distributed GPU Training
        '''
        sgd = opt.DistOpt(sgd,
                          nccl_id=nccl_id,
                          local_rank=local_rank,
                          world_size=world_size)
        '''
        dev = device.get_default_device()
        # create kvstore
        kv_type = 'dist_sync'  #set synchronization mode
        lr = 0.005
        kv = singa_kvstore.create_kvstore(kv_type,
                                          'SingaSGD',
                                          lr=0.005,
                                          momentum=0.9,
                                          weight_decay=1e-5)
        global_rank = kv.rank
        world_size = kv.num_workers
        # Dataset partition for distributed training
        train_x, train_y = data_partition(train_x, train_y, global_rank,
                                          world_size)
        test_x, test_y = data_partition(test_x, test_y, global_rank,
                                        world_size)

    # create model
    model = CNN()
    ''' 
    num_channels = train_x.shape[1]
    image_size = train_x.shape[2]
    data_size = np.prod(train_x.shape[1:train_x.ndim]).item()
    num_classes = (np.max(train_y) + 1).item()
    model = resnet.resnet18(num_channels=1, num_classes=num_classes)
    '''
    tx = tensor.Tensor((batch_size, 1, IMG_SIZE, IMG_SIZE), dev,
                       tensor.float32)
    ty = tensor.Tensor((batch_size, num_classes), dev, tensor.int32)
    num_train_batch = train_x.shape[0] // batch_size
    num_test_batch = test_x.shape[0] // batch_size
    idx = np.arange(train_x.shape[0], dtype=np.int32)
    '''
    if DIST:
        #Initial a batch to help obtain model parameters
        autograd.training = True
        x = np.random.randn(batch_size, 1, IMG_SIZE,
                            IMG_SIZE).astype(np.float32)
        y = np.zeros(shape=(batch_size, num_classes), dtype=np.int32)
        tx.copy_from_numpy(x)
        ty.copy_from_numpy(y)
        out = model.forward(tx)
        loss = autograd.softmax_cross_entropy(out, ty)
        #Initial kv store for workers of ps-architecture
        key = 0
        for p, g in autograd.backward(loss):
            kv.init(key, mx.nd.array(tensor.to_numpy(p)))
            key += 1
     '''

    # Training and Evaulation Loop
    for epoch in range(max_epoch):
        start_time = time.time()
        np.random.shuffle(idx)

        if (DIST == True):
            print('^_^Starting Epoch %d:' % (epoch))

        # Training Phase
        autograd.training = True
        train_correct = np.zeros(shape=[1], dtype=np.float32)
        test_correct = np.zeros(shape=[1], dtype=np.float32)
        train_loss = np.zeros(shape=[1], dtype=np.float32)
        time_start = time.time()
        for b in range(num_train_batch):
            x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
            x = augmentation(x, batch_size)
            y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            out = model.forward(tx)
            loss = autograd.softmax_cross_entropy(out, ty)
            train_correct += accuracy(tensor.to_numpy(out), y)
            train_loss += tensor.to_numpy(loss)[0]
            singa_kvstore.backward_and_update(kv, loss)
            '''
            if DIST:
                #push
                kv_pairs = []
                key = 0
                for p, g in autograd.backward(loss):
                    kv.push(key,mx.nd.array(tensor.to_numpy(g)))
                    kv_pairs.append((key,p,g))
                    key += 1
                #pull
                for key,p,g in kv_pairs:
                    out_buf = mx.nd.zeros(p.shape)
                    kv.pull(key,out=out_buf)
                    p.copy_from_numpy(out_buf.asnumpy())
             '''

            # Evaluation Phase
            if b % 20 != 0:
                continue
            autograd.training = False
            num_test_batch_inside = 20
            test_correct = 0
            for b in range(num_test_batch_inside):
                x = test_x[b * batch_size:(b + 1) * batch_size]
                y = test_y[b * batch_size:(b + 1) * batch_size]
                tx.copy_from_numpy(x)
                ty.copy_from_numpy(y)
                out_test = model.forward(tx)
                test_correct += accuracy(tensor.to_numpy(out_test), y)
            print('Evaluation accuracy = %f' %
                  (test_correct / (batch_size * num_test_batch_inside)),
                  flush=True)
            autograd.training = True
        print('epoch time is %f' % (time.time() - time_start))
Esempio n. 26
0
def train_mnist_cnn(DIST=False,
                    local_rank=None,
                    world_size=None,
                    nccl_id=None,
                    spars=0,
                    topK=False,
                    corr=True):

    # Define the hypermeters good for the mnist_cnn
    max_epoch = 10
    batch_size = 64
    sgd = opt.SGD(lr=0.005, momentum=0.9, weight_decay=1e-5)

    # Prepare training and valadiation data
    train_x, train_y, test_x, test_y = load_dataset()
    IMG_SIZE = 28
    num_classes = 10
    train_y = to_categorical(train_y, num_classes)
    test_y = to_categorical(test_y, num_classes)

    # Normalization
    train_x = train_x / 255
    test_x = test_x / 255

    if DIST:
        # For Distributed GPU Training
        sgd = opt.DistOpt(sgd,
                          nccl_id=nccl_id,
                          local_rank=local_rank,
                          world_size=world_size)
        dev = device.get_default_device(sgd.local_rank)
        # Dataset partition for distributed training
        train_x, train_y = data_partition(train_x, train_y, sgd.global_rank,
                                          sgd.world_size)
        test_x, test_y = data_partition(test_x, test_y, sgd.global_rank,
                                        sgd.world_size)
        world_size = sgd.world_size
    else:
        # For Single GPU
        dev = device.get_default_device()
        world_size = 1

    # create model
    model = CNN()

    tx = tensor.Tensor((batch_size, 1, IMG_SIZE, IMG_SIZE), dev,
                       tensor.float32)
    ty = tensor.Tensor((batch_size, num_classes), dev, tensor.int32)
    num_train_batch = train_x.shape[0] // batch_size
    num_test_batch = test_x.shape[0] // batch_size
    idx = np.arange(train_x.shape[0], dtype=np.int32)

    if DIST:
        #Sychronize the initial parameters
        autograd.training = True
        x = np.random.randn(batch_size, 1, IMG_SIZE,
                            IMG_SIZE).astype(np.float32)
        y = np.zeros(shape=(batch_size, num_classes), dtype=np.int32)
        tx.copy_from_numpy(x)
        ty.copy_from_numpy(y)
        out = model.forward(tx)
        loss = autograd.softmax_cross_entropy(out, ty)
        for p, g in autograd.backward(loss):
            print('tensor.data.type is %s' % type(p.data).__name__)
            synchronize(p, sgd)

    # Training and Evaulation Loop
    for epoch in range(max_epoch):
        start_time = time.time()
        np.random.shuffle(idx)

        if ((DIST == False) or (sgd.global_rank == 0)):
            print('Starting Epoch %d:' % (epoch))

        # Training Phase
        autograd.training = True
        train_correct = np.zeros(shape=[1], dtype=np.float32)
        test_correct = np.zeros(shape=[1], dtype=np.float32)
        train_loss = np.zeros(shape=[1], dtype=np.float32)
        time_start = time.time()
        for b in range(num_train_batch):
            x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
            x = augmentation(x, batch_size)
            y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            out = model.forward(tx)
            loss = autograd.softmax_cross_entropy(out, ty)
            train_correct += accuracy(tensor.to_numpy(out), y)
            train_loss += tensor.to_numpy(loss)[0]
            if DIST:
                if (spars == 0):
                    sgd.backward_and_update(loss, threshold=50000)
                else:
                    sgd.backward_and_sparse_update(loss,
                                                   spars=spars,
                                                   topK=topK,
                                                   corr=corr)
            else:
                sgd.backward_and_update(loss)

            # Evaluation Phase
            if b % 20 != 0:
                continue
            autograd.training = False
            num_test_batch_inside = 20
            test_correct = 0
            for b in range(num_test_batch_inside):
                x = test_x[b * batch_size:(b + 1) * batch_size]
                y = test_y[b * batch_size:(b + 1) * batch_size]
                tx.copy_from_numpy(x)
                ty.copy_from_numpy(y)
                out_test = model.forward(tx)
                test_correct += accuracy(tensor.to_numpy(out_test), y)
            print('Evaluation accuracy = %f' %
                  (test_correct / (batch_size * num_test_batch_inside)),
                  flush=True)
            autograd.training = True

        print('epoch time is %f' % (time.time() - time_start))
        if DIST:
            # Reduce the Evaluation Accuracy and Loss from Multiple Devices
            reducer = tensor.Tensor((1, ), dev, tensor.float32)
            train_correct = reduce_variable(train_correct, sgd, reducer)
            train_loss = reduce_variable(train_loss, sgd, reducer)

        # Output the Training Loss and Accuracy
        if ((DIST == False) or (sgd.global_rank == 0)):
            print('Training loss = %f, training accuracy = %f' %
                  (train_loss, train_correct /
                   (num_train_batch * batch_size * world_size)),
                  flush=True)

        # Evaluation Phase
        autograd.training = False
        for b in range(num_test_batch):
            x = test_x[b * batch_size:(b + 1) * batch_size]
            y = test_y[b * batch_size:(b + 1) * batch_size]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            out_test = model.forward(tx)
            test_correct += accuracy(tensor.to_numpy(out_test), y)

        if DIST:
            # Reduce the Evaulation Accuracy from Multiple Devices
            test_correct = reduce_variable(test_correct, sgd, reducer)

        # Output the Evaluation Accuracy
        if ((DIST == False) or (sgd.global_rank == 0)):
            print('Evaluation accuracy = %f, Elapsed Time = %fs' %
                  (test_correct / (num_test_batch * batch_size * world_size),
                   time.time() - start_time),
                  flush=True)
Esempio n. 27
0
import unittest
from builtins import str

from singa import tensor
from singa import singa_wrap as singa
from singa import device
from singa import autograd

import numpy as np

autograd.training = True

CTensor = singa.Tensor

gpu_dev = device.create_cuda_gpu()
cpu_dev = device.get_default_device()

dy = CTensor([2, 1, 2, 2])
singa.Gaussian(0.0, 1.0, dy)


def _tuple_to_string(t):
    lt = [str(x) for x in t]
    return '(' + ', '.join(lt) + ')'


def prepare_inputs_targets_for_rnn_test():
    x_0 = np.random.random((2, 3)).astype(np.float32)
    x_1 = np.random.random((2, 3)).astype(np.float32)
    x_2 = np.random.random((2, 3)).astype(np.float32)
Esempio n. 28
0
def serve(agent, net, use_cpu, parameter_file, topk=5):
    if use_cpu:
        print('running with cpu')
        dev = device.get_default_device()
        layer.engine = 'singacpp'
    else:
        print("runing with gpu")
        dev = device.create_cuda_gpu()
    agent = agent

    print('Start intialization............')
    # fix the bug when creating net
    if net == 'v3':
        model = inception_v3
    else:
        model = inception_v4
    net, _ = model.create_net(is_training=False)
    net.load(parameter_file, use_pickle=True)
    net.to_device(dev)
    print('End intialization............')

    labels = np.loadtxt('synset_words.txt', str, delimiter='\t').tolist()
    labels.insert(0, 'empty background')
    while True:
        key, val = agent.pull()
        if key is None:
            time.sleep(0.1)
            continue
        msg_type = MsgType.parse(key)
        if msg_type.is_request():
            try:
                response = ""
                ratio = 0.875
                img = image_tool.load_img(val['image'])
                height, width = img.size[0], img.size[1]
                print(img.size)
                crop_h, crop_w = int(height * ratio), int(width * ratio)
                img = np.array(image_tool.crop(img,\
                      (crop_h, crop_w), 'center').\
                      resize((299, 299))).astype(np.float32) / float(255)
                img -= 0.5
                img *= 2
                # img[:,:,[0,1,2]] = img[:,:,[2,1,0]]
                img = img.transpose((2, 0, 1))
                images = np.expand_dims(img, axis=0)
                x = tensor.from_numpy(images.astype(np.float32))
                x.to_device(dev)
                y = net.predict(x)
                prob = np.average(tensor.to_numpy(y), 0)
                # sort and reverse
                idx = np.argsort(-prob)[0:topk]
                for i in idx:
                    response += "%s:%s<br/>" % (labels[i], prob[i])
            except:
                traceback.print_exc()
                response = "Sorry, system error during prediction."
            agent.push(MsgType.kResponse, response)
        elif MsgType.kCommandStop.equal(msg_type):
                print('get stop command')
                agent.push(MsgType.kStatus, "success")
                break
        else:
            print('get unsupported message %s' % str(msg_type))
            agent.push(MsgType.kStatus, "Unknown command")
            break
        # while loop
    print("server stop")