예제 #1
0
def train(data_dir, net, num_epoch=20, batch_size=250):
    
    print 'Start intialization............'
    cuda = device.create_cuda_gpu()
    net.to_device(cuda)
    opt = optimizer.SGD(momentum=0.9,weight_decay=0.04)
    for (p, specs) in zip(net.param_values(), net.param_specs()):
        filler = specs.filler
        if filler.type == 'gaussian':
            initializer.gaussian(p, filler.mean, filler.std)
        else:
            p.set_value(0)
        opt.register(p, specs)
        print specs.name, filler.type, p.l1()
    print 'Loading data ..................'
    train_x, train_y = load_dataset(data_dir,1)
    test_x, test_y = load_dataset(data_dir,2)
    
    tx = tensor.Tensor((batch_size,3), cuda)
    ty = tensor.Tensor((batch_size,),cuda, core_pb2.kInt)
    #ta = tensor.Tensor((batch_size,3), cuda)
    #tb = tensor.Tensor((batch_size,),cuda, core_pb2.kInt)
    num_train_batch = train_x.shape[0]/batch_size 
    num_test_batch = test_x.shape[0]/batch_size
    idx = np.arange(train_x.shape[0], dtype=np.int32)
    id  = np.arange(test_x.shape[0],dtype=np.int32)
    for epoch in range(num_epoch):
        np.random.shuffle(idx)
        loss, acc = 0.000,0.000
        print 'Epoch %d' % epoch
        for b in range(num_train_batch):
            x = train_x[idx[b * batch_size:(b+1)* batch_size]]
            y = train_y[idx[b * batch_size:(b+1)* batch_size]]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            grads, (l, a) = net.train(tx, ty)
            loss += l
            acc += a
            for (s, p, g) in zip(net.param_specs(), net.param_values(), grads):
                opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s.name))
            # update progress bar
            	utils.update_progress(b * 1.0 / num_train_batch,
                                 'training loss = %f, accuracy = %f' % (l, a))
                info = '\ntraining loss = %f, training accuracy = %f' \
                % (loss/num_train_batch, acc/num_train_batch)
        print info
        
        loss,acc=0.000,0.000
        np.random.shuffle(id)
        for b in range(num_test_batch):
         	x = test_x[b * batch_size:(b+1) * batch_size]
            	y = test_y[b * batch_size:(b+1) * batch_size]
                tx.copy_from_numpy(x)
            	ty.copy_from_numpy(y)
            	l, a = net.evaluate(tx, ty)
		loss += l
                acc += a
 	print 'test loss = %f, test accuracy = %f' \
            % (loss / num_test_batch, acc / num_test_batch)
    net.save('model.bin')  # save model params into checkpoint file
예제 #2
0
def train(data,
          max_epoch,
          hidden_size=100,
          seq_length=100,
          batch_size=16,
          model_path='model'):
    # SGD with L2 gradient normalization
    cuda = device.create_cuda_gpu()
    model = CharRNN(data.vocab_size, hidden_size)
    model.graph(True, False)

    inputs, labels = None, None

    for epoch in range(max_epoch):
        model.train()
        train_loss = 0
        for b in tqdm(range(data.num_train_batch)):
            batch = data.train_dat[b * batch_size:(b + 1) * batch_size]
            inputs, labels = convert(batch, batch_size, seq_length,
                                     data.vocab_size, cuda, inputs, labels)
            out, loss = model(inputs, labels)
            model.reset_states(cuda)
            train_loss += tensor.to_numpy(loss)[0]

        print('\nEpoch %d, train loss is %f' %
              (epoch, train_loss / data.num_train_batch / seq_length))

        evaluate(model, data, batch_size, seq_length, cuda, inputs, labels)
        sample(model, data, cuda)
예제 #3
0
def serve(agent, use_cpu, parameter_file, topk=5):
    if use_cpu:
        print('running with cpu')
        dev = device.get_default_device()
        layer.engine = 'singacpp'
    else:
        print("runing with gpu")
        dev = device.create_cuda_gpu()

    print('Start intialization............')
    net = create_net((3, 224, 224), parameter_file)
    net.to_device(dev)
    print('End intialization............')

    labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
    while True:
        key, val = agent.pull()
        if key is None:
            time.sleep(0.1)
            continue
        msg_type = MsgType.parse(key)
        if msg_type.is_request():
            try:
                response = ""
                img = imread(val['image'], mode='RGB').astype(np.float32)
                height,width = img.shape[:2]
                img[:, :, 0] -= 123.68
                img[:, :, 1] -= 116.779
                img[:, :, 2] -= 103.939
                img[:,:,[0,1,2]] = img[:,:,[2,1,0]]
                img = img.transpose((2, 0, 1))
                img = img[:, (height-224)//2:(height+224)//2,\
                          (width-224)//2:(width+224)//2]
                images = np.expand_dims(img, axis=0)

                x = tensor.from_numpy(images.astype(np.float32))
                x.to_device(dev)
                y = net.predict(x)
                prob = np.average(tensor.to_numpy(y), 0)
                # sort and reverse
                idx = np.argsort(-prob)[0:topk]
                for i in idx:
                    response += "%s:%s<br/>" % (labels[i], prob[i])
            except Exception:
                traceback.print_exc()
                response = "Sorry, system error during prediction."
            except SystemExit:
                traceback.print_exc()
                response = "Sorry, error triggered sys.exit() during prediction."
            agent.push(MsgType.kResponse, response)
        elif MsgType.kCommandStop.equal(msg_type):
                print('get stop command')
                agent.push(MsgType.kStatus, "success")
                break
        else:
            print('get unsupported message %s' % str(msg_type))
            agent.push(MsgType.kStatus, "Unknown command")
            break
        # while loop
    print("server stop")
예제 #4
0
def onnx_to_singa(niter, use_cpu=False):
    if use_cpu:
        print("Using CPU")
        dev = device.get_default_device()
    else:
        print("Using GPU")
        dev = device.create_cuda_gpu()
    model = sonnx.load("mlp.onnx")
    backend = sonnx.prepare(model, device=dev)
    sgd = opt.SGD(0.1)
    inputs = Tensor(
        data=data,
        device=dev,
        requires_grad=False,
        stores_grad=False,
        name="input",
    )
    target = Tensor(
        data=label,
        device=dev,
        requires_grad=False,
        stores_grad=False,
        name="target",
    )

    for i in range(100):
        y = backend.run([inputs])[0]
        loss = autograd.softmax_cross_entropy(y, target)
        for p, gp in autograd.backward(loss):
            sgd.update(p, gp)
        loss_rate = tensor.to_numpy(loss)[0]
        accuracy_rate = accuracy(tensor.to_numpy(y), label)

        print("Iter {}, accurate={}, loss={}".format(i, accuracy_rate, loss_rate))
예제 #5
0
def train(data,
          net,
          max_epoch,
          get_lr,
          weight_decay,
          batch_size=100,
          use_cpu=False):
    print('Start intialization............')
    if use_cpu:
        print('Using CPU')
        dev = device.get_default_device()
    else:
        print('Using GPU')
        dev = device.create_cuda_gpu()

    net.to_device(dev)
    opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay)
    for (p, specs) in zip(net.param_names(), net.param_specs()):
        opt.register(p, specs)

    tx = tensor.Tensor((batch_size, 3, 32, 32), dev)
    ty = tensor.Tensor((batch_size, ), dev, core_pb2.kInt)
    train_x, train_y, test_x, test_y = data
    num_train_batch = train_x.shape[0] // batch_size
    num_test_batch = test_x.shape[0] // batch_size
    idx = np.arange(train_x.shape[0], dtype=np.int32)
    for epoch in range(max_epoch):
        np.random.shuffle(idx)
        loss, acc = 0.0, 0.0
        print('Epoch %d' % epoch)
        for b in range(num_train_batch):
            x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
            y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            grads, (l, a) = net.train(tx, ty)
            loss += l
            acc += a
            for (s, p, g) in zip(net.param_names(), net.param_values(), grads):
                opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s), b)
            # update progress bar
            utils.update_progress(b * 1.0 / num_train_batch,
                                  'training loss = %f, accuracy = %f' % (l, a))
        info = '\ntraining loss = %f, training accuracy = %f, lr = %f' \
            % ((loss / num_train_batch), (acc / num_train_batch), get_lr(epoch))
        print(info)

        loss, acc = 0.0, 0.0
        for b in range(num_test_batch):
            x = test_x[b * batch_size:(b + 1) * batch_size]
            y = test_y[b * batch_size:(b + 1) * batch_size]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            l, a = net.evaluate(tx, ty)
            loss += l
            acc += a

        print('test loss = %f, test accuracy = %f' % ((loss / num_test_batch),
                                                      (acc / num_test_batch)))
    net.save('model', 20)  # save model params into checkpoint file
예제 #6
0
def serve(agent, use_cpu, parameter_file, topk=5):
    if use_cpu:
        print('running with cpu')
        dev = device.get_default_device()
        layer.engine = 'singacpp'
    else:
        print("runing with gpu")
        dev = device.create_cuda_gpu()
    agent = agent

    print('Start intialization............')
    net = create_net((3, 224, 224), parameter_file)
    net.to_device(dev)
    print('End intialization............')

    labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
    while True:
        key, val = agent.pull()
        if key is None:
            time.sleep(0.1)
            continue
        msg_type = MsgType.parse(key)
        if msg_type.is_request():
            try:
                response = ""
                img = imread(val['image'], mode='RGB').astype(np.float32)
                height,width = img.shape[:2]
                img[:, :, 0] -= 123.68
                img[:, :, 1] -= 116.779
                img[:, :, 2] -= 103.939
                img[:,:,[0,1,2]] = img[:,:,[2,1,0]]
                img = img.transpose((2, 0, 1))
                img = img[:, (height-224)//2:(height+224)//2,\
                          (width-224)//2:(width+224)//2]
                images = np.expand_dims(img, axis=0)

                x = tensor.from_numpy(images.astype(np.float32))
                x.to_device(dev)
                y = net.predict(x)
                prob = np.average(tensor.to_numpy(y), 0)
                # sort and reverse
                idx = np.argsort(-prob)[0:topk]
                for i in idx:
                    response += "%s:%s<br/>" % (labels[i], prob[i])
            except:
                traceback.print_exc()
                response = "Sorry, system error during prediction."
            agent.push(MsgType.kResponse, response)
        elif MsgType.kCommandStop.equal(msg_type):
                print('get stop command')
                agent.push(MsgType.kStatus, "success")
                break
        else:
            print('get unsupported message %s' % str(msg_type))
            agent.push(MsgType.kStatus, "Unknown command")
            break
        # while loop
    print("server stop")
예제 #7
0
def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
          use_cpu=False):
    print('Start intialization............')
    if use_cpu:
        print('Using CPU')
        dev = device.get_default_device()
    else:
        print('Using GPU')
        dev = device.create_cuda_gpu()

    net.to_device(dev)
    opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay)
    for (p, specs) in zip(net.param_names(), net.param_specs()):
        opt.register(p, specs)

    tx = tensor.Tensor((batch_size, 3, 32, 32), dev)
    ty = tensor.Tensor((batch_size,), dev, tensor.int32)
    train_x, train_y, test_x, test_y = data
    num_train_batch = train_x.shape[0] // batch_size
    num_test_batch = test_x.shape[0] // batch_size
    idx = np.arange(train_x.shape[0], dtype=np.int32)
    for epoch in range(max_epoch):
        np.random.shuffle(idx)
        loss, acc = 0.0, 0.0
        print('Epoch %d' % epoch)
        for b in range(num_train_batch):
            x = train_x[idx[b * batch_size: (b + 1) * batch_size]]
            y = train_y[idx[b * batch_size: (b + 1) * batch_size]]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            grads, (l, a) = net.train(tx, ty)
            loss += l
            acc += a
            for (s, p, g) in zip(net.param_names(), net.param_values(), grads):
                opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s), b)
            # update progress bar
            utils.update_progress(b * 1.0 / num_train_batch,
                                  'training loss = %f, accuracy = %f' % (l, a))
        info = '\ntraining loss = %f, training accuracy = %f, lr = %f' \
            % ((loss / num_train_batch), (acc / num_train_batch), get_lr(epoch))
        print(info)

        loss, acc = 0.0, 0.0
        for b in range(num_test_batch):
            x = test_x[b * batch_size: (b + 1) * batch_size]
            y = test_y[b * batch_size: (b + 1) * batch_size]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            l, a = net.evaluate(tx, ty)
            loss += l
            acc += a

        print('test loss = %f, test accuracy = %f' %
              ((loss / num_test_batch), (acc / num_test_batch)))
    net.save('model', 20)  # save model params into checkpoint file
예제 #8
0
def main():
    try:
        # Setup argument parser
        parser = ArgumentParser(description="Wide residual network")

        parser.add_argument("--port", default=9999, help="listen port")
        parser.add_argument("--use_cpu",
                            action="store_true",
                            help="If set, load models onto CPU devices")
        parser.add_argument("--parameter_file", default="wrn-50-2.pickle")
        parser.add_argument("--model",
                            choices=['resnet', 'wrn', 'preact', 'addbn'],
                            default='wrn')
        parser.add_argument("--depth",
                            type=int,
                            choices=[18, 34, 50, 101, 152, 200],
                            default='50')

        # Process arguments
        args = parser.parse_args()
        port = args.port

        # start to train
        agent = Agent(port)

        net = model.create_net(args.model, args.depth, args.use_cpu)
        if args.use_cpu:
            print('Using CPU')
            dev = device.get_default_device()
        else:
            print('Using GPU')
            dev = device.create_cuda_gpu()
            net.to_device(dev)
        model.init_params(net, args.parameter_file)
        print('Finish loading models')

        labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
        serve(net, labels, dev, agent)

        # acc = evaluate(net, '../val_list.txt',  'image/val', dev)
        # print acc

        # wait the agent finish handling http request
        agent.stop()
    except SystemExit:
        return
    except:
        traceback.print_exc()
        sys.stderr.write("  for help use --help \n\n")
        return 2
예제 #9
0
def main():
    try:
        # Setup argument parser
        parser = ArgumentParser(description="VGG inference")

        parser.add_argument("--port", default=9999, help="listen port")
        parser.add_argument("--use_cpu",
                            action="store_true",
                            help="If set, load models onto CPU devices")
        parser.add_argument("--parameter_file", default="")
        parser.add_argument("--depth",
                            type=int,
                            choices=[11, 13, 16, 19],
                            default='11')
        parser.add_argument("--batchnorm",
                            action='store_true',
                            help='use batchnorm or not')

        # Process arguments
        args = parser.parse_args()
        port = args.port

        # start to train
        agent = Agent(port)

        net = model.create_net(args.depth, 1000, args.batchnorm, args.use_cpu)
        if args.use_cpu:
            print('Using CPU')
            dev = device.get_default_device()
        else:
            print('Using GPU')
            dev = device.create_cuda_gpu()
            net.to_device(dev)
        model.init_params(net, args.parameter_file)
        print('Finish loading models')

        labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
        serve(net, labels, dev, agent)

        # acc = evaluate(net, '../val_list.txt',  'image/val', dev)
        # print acc

        # wait the agent finish handling http request
        agent.stop()
    except SystemExit:
        return
    except:
        traceback.print_exc()
        sys.stderr.write("  for help use --help \n\n")
        return 2
def main():
    '''Command line options'''
    try:
        # Setup argument parser
        parser = ArgumentParser(description="Train CNN Readmission Model")
        parser.add_argument('-inputfolder', type=str, help='inputfolder')
        parser.add_argument('-outputfolder', type=str, help='outputfolder')
        parser.add_argument('-visfolder', type=str, help='visfolder')
        parser.add_argument('-trainratio',
                            type=float,
                            help='ratio of train samples')
        parser.add_argument('-validationratio',
                            type=float,
                            help='ratio of validation samples')
        parser.add_argument('-testratio',
                            type=float,
                            help='ratio of test samples')
        parser.add_argument('-p',
                            '--port',
                            default=9989,
                            help='listening port')
        parser.add_argument('-C', '--use_cpu', action="store_true")
        parser.add_argument('--max_epoch', default=100)

        # Process arguments
        args = parser.parse_args()
        port = args.port

        use_cpu = args.use_cpu
        if use_cpu:
            print("runing with cpu")
            dev = device.get_default_device()
        else:
            print("runing with gpu")
            dev = device.create_cuda_gpu()

        # start to train
        agent = Agent(port)
        train(args.inputfolder, args.outputfolder, args.visfolder,
              args.trainratio, args.validationratio, args.testratio, dev,
              agent, args.max_epoch, use_cpu)
        # wait the agent finish handling http request
        agent.stop()
    except SystemExit:
        return
    except:
        # p.terminate()
        traceback.print_exc()
        sys.stderr.write("  for help use --help \n\n")
예제 #11
0
def main():
    try:
        # Setup argument parser
        parser = ArgumentParser(description='DenseNet inference')

        parser.add_argument("--port", default=9999, help="listen port")
        parser.add_argument("--use_cpu",
                            action="store_true",
                            help="If set, load models onto CPU devices")
        parser.add_argument("--parameter_file", default="densenet-121.pickle")
        parser.add_argument("--depth",
                            type=int,
                            choices=[121, 169, 201, 161],
                            default=121)

        parser.add_argument('--nb_classes', default=1000, type=int)

        # Process arguments
        args = parser.parse_args()
        port = args.port

        # start to train
        agent = Agent(port)

        net = model.create_net(args.depth, args.nb_classes, 0, args.use_cpu)
        if args.use_cpu:
            print('Using CPU')
            dev = device.get_default_device()
        else:
            print('Using GPU')
            dev = device.create_cuda_gpu()
            net.to_device(dev)
        print('start to load parameter_file')
        model.init_params(net, args.parameter_file)
        print('Finish loading models')

        labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
        serve(net, labels, dev, agent)
        # wait the agent finish handling http request
        agent.stop()

    except SystemExit:
        return
    except:
        traceback.print_exc()
        sys.stderr.write("  for help use --help \n\n")
        return 2
예제 #12
0
def main():
    try:
        # Setup argument parser
        parser = ArgumentParser(description="Wide residual network")

        parser.add_argument("--port", default=9999, help="listen port")
        parser.add_argument("--use_cpu", action="store_true",
                            help="If set, load models onto CPU devices")
        parser.add_argument("--parameter_file", default="wrn-50-2.pickle")
        parser.add_argument("--model", choices=['resnet', 'wrn', 'preact',
                                                'addbn'], default='wrn')
        parser.add_argument("--depth", type=int, choices=[18, 34, 50, 101,
                                                          152, 200],
                            default='50')

        # Process arguments
        args = parser.parse_args()
        port = args.port

        # start to train
        agent = Agent(port)

        net = model.create_net(args.model, args.depth, args.use_cpu)
        if args.use_cpu:
            print('Using CPU')
            dev = device.get_default_device()
        else:
            print('Using GPU')
            dev = device.create_cuda_gpu()
            net.to_device(dev)
        model.init_params(net, args.parameter_file)
        print('Finish loading models')

        labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
        serve(net, labels, dev, agent)

        # acc = evaluate(net, '../val_list.txt',  'image/val', dev)
        # print acc

        # wait the agent finish handling http request
        agent.stop()
    except SystemExit:
        return
    except:
        traceback.print_exc()
        sys.stderr.write("  for help use --help \n\n")
        return 2
예제 #13
0
def main():
    try:
        # Setup argument parser
        parser = ArgumentParser(description="VGG inference")

        parser.add_argument("--port", default=9999, help="listen port")
        parser.add_argument("--use_cpu", action="store_true",
                            help="If set, load models onto CPU devices")
        parser.add_argument("--parameter_file", default="")
        parser.add_argument("--depth", type=int, choices=[11, 13, 16, 19],
                            default='11')
        parser.add_argument("--batchnorm", action='store_true',
                            help='use batchnorm or not')

        # Process arguments
        args = parser.parse_args()
        port = args.port

        # start to train
        agent = Agent(port)

        net = model.create_net(args.depth, 1000, args.batchnorm, args.use_cpu)
        if args.use_cpu:
            print('Using CPU')
            dev = device.get_default_device()
        else:
            print('Using GPU')
            dev = device.create_cuda_gpu()
            net.to_device(dev)
        model.init_params(net, args.parameter_file)
        print('Finish loading models')

        labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
        serve(net, labels, dev, agent)

        # acc = evaluate(net, '../val_list.txt',  'image/val', dev)
        # print acc

        # wait the agent finish handling http request
        agent.stop()
    except SystemExit:
        return
    except:
        traceback.print_exc()
        sys.stderr.write("  for help use --help \n\n")
        return 2
예제 #14
0
def singa_to_onnx(niter, use_cpu=False):
    if use_cpu:
        print("Using CPU")
        dev = device.get_default_device()
    else:
        print("Using GPU")
        dev = device.create_cuda_gpu()
    inputs = Tensor(
        data=data,
        device=dev,
        requires_grad=False,
        stores_grad=False,
        name="input",
    )
    target = Tensor(
        data=label,
        device=dev,
        requires_grad=False,
        stores_grad=False,
        name="target",
    )

    w0 = Tensor(shape=(2, 3), device=dev, requires_grad=True, stores_grad=True)
    w0.gaussian(0.0, 0.1)
    b0 = Tensor(shape=(3,), device=dev, requires_grad=True, stores_grad=True)
    b0.set_value(0.0)

    w1 = Tensor(shape=(3, 2), device=dev, requires_grad=True, stores_grad=True)
    w1.gaussian(0.0, 0.1)
    b1 = Tensor(shape=(2,), device=dev, requires_grad=True, stores_grad=True)
    b1.set_value(0.0)

    sgd = opt.SGD(0.1)
    # training process
    for i in range(100):
        x = autograd.matmul(inputs, w0)
        x = autograd.add_bias(x, b0)
        x = autograd.relu(x)
        x = autograd.matmul(x, w1)
        x = autograd.add_bias(x, b1)
        loss = autograd.softmax_cross_entropy(x, target)
        for p, gp in autograd.backward(loss):
            sgd.update(p, gp)

        print("training loss = ", tensor.to_numpy(loss)[0])
    sonnx.export([inputs], [x], file_path="mlp.onnx")
예제 #15
0
def common(use_cpu):
    file_path = "mnist.npz"
    assert os.path.exists(
        file_path
    ), "Pls download the MNIST dataset from https://s3.amazonaws.com/img-datasets/mnist.npz"
    if use_cpu:
        print("Using CPU")
        dev = device.get_default_device()
    else:
        print("Using GPU")
        dev = device.create_cuda_gpu()

    train, test = load_data(file_path)
    print(train[0].shape)
    x_train = preprocess(train[0])
    y_train = to_categorical(train[1], 10)

    x_test = preprocess(test[0])
    y_test = to_categorical(test[1], 10)
    print("the shape of training data is", x_train.shape)
    print("the shape of training label is", y_train.shape)
    print("the shape of testing data is", x_test.shape)
    print("the shape of testing label is", y_test.shape)
    return (x_train, y_train), (x_test, y_test), dev
예제 #16
0
def train(data_file, use_gpu, num_epoch=10, batch_size=100):
    print 'Start intialization............'
    lr = 0.1   # Learning rate
    weight_decay  = 0.0002
    hdim = 1000
    vdim = 784
    opt = optimizer.SGD(momentum=0.8, weight_decay=weight_decay)

    tweight = tensor.Tensor((vdim, hdim))
    tweight.gaussian(0.0, 0.1)
    tvbias = tensor.from_numpy(np.zeros(vdim, dtype = np.float32))
    thbias = tensor.from_numpy(np.zeros(hdim, dtype = np.float32))
    opt = optimizer.SGD(momentum=0.5, weight_decay=weight_decay)

    print 'Loading data ..................'
    train_x, valid_x = load_train_data(data_file)

    if use_gpu:
        dev = device.create_cuda_gpu()
    else:
        dev = device.get_default_device()

    for t in [tweight, tvbias, thbias]:
        t.to_device(dev)

    num_train_batch = train_x.shape[0] / batch_size
    print "num_train_batch = %d " % (num_train_batch)
    for epoch in range(num_epoch):
        trainerrorsum = 0.0
        print 'Epoch %d' % epoch
        for b in range(num_train_batch):
            # positive phase
            tdata = tensor.from_numpy(
                    train_x[(b * batch_size):((b + 1) * batch_size), : ])
            tdata.to_device(dev)
            tposhidprob = tensor.mult(tdata, tweight)
            tposhidprob.add_row(thbias)
            tposhidprob = tensor.sigmoid(tposhidprob)
            tposhidrandom = tensor.Tensor(tposhidprob.shape, dev)
            tposhidrandom.uniform(0.0, 1.0)
            tposhidsample = tensor.gt(tposhidprob, tposhidrandom)

            # negative phase
            tnegdata = tensor.mult(tposhidsample, tweight.T())
            tnegdata.add_row(tvbias)
            tnegdata = tensor.sigmoid(tnegdata)

            tneghidprob = tensor.mult(tnegdata, tweight)
            tneghidprob.add_row(thbias)
            tneghidprob = tensor.sigmoid(tneghidprob)
            error = tensor.sum(tensor.square((tdata - tnegdata)))
            trainerrorsum = error + trainerrorsum

            tgweight = tensor.mult(tnegdata.T(), tneghidprob) -\
                    tensor.mult(tdata.T(), tposhidprob)
            tgvbias = tensor.sum(tnegdata, 0) - tensor.sum(tdata, 0)
            tghbias = tensor.sum(tneghidprob, 0) - tensor.sum(tposhidprob, 0)

            opt.apply_with_lr(epoch, lr / batch_size, tgweight, tweight, 'w')
            opt.apply_with_lr(epoch, lr / batch_size, tgvbias, tvbias, 'vb')
            opt.apply_with_lr(epoch, lr / batch_size, tghbias, thbias, 'hb')

        print 'training errorsum = %f' % (trainerrorsum)

        tvaliddata = tensor.from_numpy(valid_x)
        tvaliddata.to_device(dev)
        tvalidposhidprob = tensor.mult(tvaliddata, tweight)
        tvalidposhidprob.add_row(thbias)
        tvalidposhidprob = tensor.sigmoid(tvalidposhidprob)
        tvalidposhidrandom = tensor.Tensor(tvalidposhidprob.shape, dev)
        initializer.uniform(tvalidposhidrandom, 0.0, 1.0)
        tvalidposhidsample = tensor.gt(tvalidposhidprob, tvalidposhidrandom)

        tvalidnegdata = tensor.mult(tvalidposhidsample, tweight.T())
        tvalidnegdata.add_row(tvbias)
        tvalidnegdata = tensor.sigmoid(tvalidnegdata)

        validerrorsum = tensor.sum(tensor.square((tvaliddata - tvalidnegdata)))
        print 'valid errorsum = %f' % (validerrorsum)
예제 #17
0
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from singa import device
from singa import singa_wrap

# avoid singleton error
gpu_dev = None
if singa_wrap.USE_CUDA:
    gpu_dev = device.create_cuda_gpu(set_default=False)
cpu_dev = device.get_default_device()
예제 #18
0
파일: mnist_cnn.py 프로젝트: zxr8192/singa
def train_mnist_cnn(DIST=False,
                    local_rank=None,
                    world_size=None,
                    nccl_id=None,
                    spars=0,
                    topK=False,
                    corr=True):

    # Define the hypermeters good for the mnist_cnn
    max_epoch = 10
    batch_size = 64
    sgd = opt.SGD(lr=0.005, momentum=0.9, weight_decay=1e-5)

    # Prepare training and valadiation data
    train_x, train_y, test_x, test_y = load_dataset()
    IMG_SIZE = 28
    num_classes = 10
    train_y = to_categorical(train_y, num_classes)
    test_y = to_categorical(test_y, num_classes)

    # Normalization
    train_x = train_x / 255
    test_x = test_x / 255

    if DIST:
        # For Distributed GPU Training
        sgd = opt.DistOpt(sgd,
                          nccl_id=nccl_id,
                          local_rank=local_rank,
                          world_size=world_size)
        dev = device.create_cuda_gpu_on(sgd.local_rank)
        # Dataset partition for distributed training
        train_x, train_y = data_partition(train_x, train_y, sgd.global_rank,
                                          sgd.world_size)
        test_x, test_y = data_partition(test_x, test_y, sgd.global_rank,
                                        sgd.world_size)
        world_size = sgd.world_size
    else:
        # For Single GPU
        dev = device.create_cuda_gpu()
        world_size = 1

    # create model
    model = CNN()

    tx = tensor.Tensor((batch_size, 1, IMG_SIZE, IMG_SIZE), dev, tensor.float32)
    ty = tensor.Tensor((batch_size, num_classes), dev, tensor.int32)
    num_train_batch = train_x.shape[0] // batch_size
    num_test_batch = test_x.shape[0] // batch_size
    idx = np.arange(train_x.shape[0], dtype=np.int32)

    if DIST:
        #Sychronize the initial parameters
        autograd.training = True
        x = np.random.randn(batch_size, 1, IMG_SIZE,
                            IMG_SIZE).astype(np.float32)
        y = np.zeros(shape=(batch_size, num_classes), dtype=np.int32)
        tx.copy_from_numpy(x)
        ty.copy_from_numpy(y)
        out = model.forward(tx)
        loss = autograd.softmax_cross_entropy(out, ty)
        for p, g in autograd.backward(loss):
            synchronize(p, sgd)

    # Training and Evaulation Loop
    for epoch in range(max_epoch):
        start_time = time.time()
        np.random.shuffle(idx)

        if ((DIST == False) or (sgd.global_rank == 0)):
            print('Starting Epoch %d:' % (epoch))

        # Training Phase
        autograd.training = True
        train_correct = np.zeros(shape=[1], dtype=np.float32)
        test_correct = np.zeros(shape=[1], dtype=np.float32)
        train_loss = np.zeros(shape=[1], dtype=np.float32)

        for b in range(num_train_batch):
            x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
            x = augmentation(x, batch_size)
            y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            out = model.forward(tx)
            loss = autograd.softmax_cross_entropy(out, ty)
            train_correct += accuracy(tensor.to_numpy(out), y)
            train_loss += tensor.to_numpy(loss)[0]
            if DIST:
                if (spars == 0):
                    sgd.backward_and_update(loss, threshold=50000)
                else:
                    sgd.backward_and_sparse_update(loss,
                                                   spars=spars,
                                                   topK=topK,
                                                   corr=corr)
            else:
                sgd.backward_and_update(loss)

        if DIST:
            # Reduce the Evaluation Accuracy and Loss from Multiple Devices
            reducer = tensor.Tensor((1,), dev, tensor.float32)
            train_correct = reduce_variable(train_correct, sgd, reducer)
            train_loss = reduce_variable(train_loss, sgd, reducer)

        # Output the Training Loss and Accuracy
        if ((DIST == False) or (sgd.global_rank == 0)):
            print('Training loss = %f, training accuracy = %f' %
                  (train_loss, train_correct /
                   (num_train_batch * batch_size * world_size)),
                  flush=True)

        # Evaluation Phase
        autograd.training = False
        for b in range(num_test_batch):
            x = test_x[b * batch_size:(b + 1) * batch_size]
            y = test_y[b * batch_size:(b + 1) * batch_size]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            out_test = model.forward(tx)
            test_correct += accuracy(tensor.to_numpy(out_test), y)

        if DIST:
            # Reduce the Evaulation Accuracy from Multiple Devices
            test_correct = reduce_variable(test_correct, sgd, reducer)

        # Output the Evaluation Accuracy
        if ((DIST == False) or (sgd.global_rank == 0)):
            print('Evaluation accuracy = %f, Elapsed Time = %fs' %
                  (test_correct / (num_test_batch * batch_size * world_size),
                   time.time() - start_time),
                  flush=True)
예제 #19
0
파일: train.py 프로젝트: zxr8192/singa
def train(data_file, use_gpu, num_epoch=10, batch_size=100):
    print('Start intialization............')
    lr = 0.1  # Learning rate
    weight_decay = 0.0002
    hdim = 1000
    vdim = 784

    tweight = tensor.Tensor((vdim, hdim))
    tweight.gaussian(0.0, 0.1)
    tvbias = tensor.from_numpy(np.zeros(vdim, dtype=np.float32))
    thbias = tensor.from_numpy(np.zeros(hdim, dtype=np.float32))
    opt = optimizer.SGD(momentum=0.5, weight_decay=weight_decay)

    print('Loading data ..................')
    train_x, valid_x = load_train_data(data_file)

    if use_gpu:
        dev = device.create_cuda_gpu()
    else:
        dev = device.get_default_device()

    for t in [tweight, tvbias, thbias]:
        t.to_device(dev)

    num_train_batch = train_x.shape[0] // batch_size
    print("num_train_batch = %d " % (num_train_batch))
    for epoch in range(num_epoch):
        trainerrorsum = 0.0
        print('Epoch %d' % epoch)
        for b in range(num_train_batch):
            # positive phase
            tdata = tensor.from_numpy(
                train_x[(b * batch_size):((b + 1) * batch_size), :])
            tdata.to_device(dev)
            tposhidprob = tensor.mult(tdata, tweight)
            tposhidprob = tposhidprob + thbias
            tposhidprob = tensor.sigmoid(tposhidprob)
            tposhidrandom = tensor.Tensor(tposhidprob.shape, dev)
            tposhidrandom.uniform(0.0, 1.0)
            tposhidsample = tensor.gt(tposhidprob, tposhidrandom)

            # negative phase
            tnegdata = tensor.mult(tposhidsample, tweight.T())
            tnegdata = tnegdata + tvbias
            tnegdata = tensor.sigmoid(tnegdata)

            tneghidprob = tensor.mult(tnegdata, tweight)
            tneghidprob = tneghidprob + thbias
            tneghidprob = tensor.sigmoid(tneghidprob)
            error = tensor.sum(tensor.square((tdata - tnegdata)))
            trainerrorsum = error + trainerrorsum

            tgweight = tensor.mult(tnegdata.T(), tneghidprob) \
                - tensor.mult(tdata.T(), tposhidprob)
            tgvbias = tensor.sum(tnegdata, 0) - tensor.sum(tdata, 0)
            tghbias = tensor.sum(tneghidprob, 0) - tensor.sum(tposhidprob, 0)

            opt.apply_with_lr(epoch, lr / batch_size, tgweight, tweight, 'w')
            opt.apply_with_lr(epoch, lr / batch_size, tgvbias, tvbias, 'vb')
            opt.apply_with_lr(epoch, lr / batch_size, tghbias, thbias, 'hb')

        print('training erroraverage = %f' %
              (tensor.to_numpy(trainerrorsum) / train_x.shape[0]))

        tvaliddata = tensor.from_numpy(valid_x)
        tvaliddata.to_device(dev)
        tvalidposhidprob = tensor.mult(tvaliddata, tweight)
        tvalidposhidprob = tvalidposhidprob + thbias
        tvalidposhidprob = tensor.sigmoid(tvalidposhidprob)
        tvalidposhidrandom = tensor.Tensor(tvalidposhidprob.shape, dev)
        initializer.uniform(tvalidposhidrandom, 0.0, 1.0)
        tvalidposhidsample = tensor.gt(tvalidposhidprob, tvalidposhidrandom)

        tvalidnegdata = tensor.mult(tvalidposhidsample, tweight.T())
        tvalidnegdata = tvalidnegdata + tvbias
        tvalidnegdata = tensor.sigmoid(tvalidnegdata)

        validerrorsum = tensor.sum(tensor.square((tvaliddata - tvalidnegdata)))
        print('valid erroraverage = %f' %
              (tensor.to_numpy(validerrorsum) / valid_x.shape[0]))
예제 #20
0
def sample(model_path, nsamples=100, seed_text='', do_sample=True):
    with open(model_path, 'rb') as fd:
        d = pickle.load(fd)
        rnn_w = tensor.from_numpy(d['rnn_w'])
        idx_to_char = d['idx_to_char']
        char_to_idx = d['char_to_idx']
        vocab_size = len(idx_to_char)
        dense_w = tensor.from_numpy(d['dense_w'])
        dense_b = tensor.from_numpy(d['dense_b'])
        hidden_size = d['hidden_size']
        num_stacks = d['num_stacks']
        dropout = d['dropout']

    cuda = device.create_cuda_gpu()
    rnn = layer.LSTM(name='lstm', hidden_size=hidden_size,
                     num_stacks=num_stacks, dropout=dropout,
                     input_sample_shape=(len(idx_to_char),))
    rnn.to_device(cuda)
    rnn.param_values()[0].copy_data(rnn_w)
    dense = layer.Dense('dense', vocab_size, input_sample_shape=(hidden_size,))
    dense.to_device(cuda)
    dense.param_values()[0].copy_data(dense_w)
    dense.param_values()[1].copy_data(dense_b)
    hx = tensor.Tensor((num_stacks, 1, hidden_size), cuda)
    cx = tensor.Tensor((num_stacks, 1, hidden_size), cuda)
    hx.set_value(0.0)
    cx.set_value(0.0)
    if len(seed_text) > 0:
        for c in seed_text:
            x = np.zeros((1, vocab_size), dtype=np.float32)
            x[0, char_to_idx[c]] = 1
            tx = tensor.from_numpy(x)
            tx.to_device(cuda)
            inputs = [tx, hx, cx]
            outputs = rnn.forward(False, inputs)
            y = dense.forward(False, outputs[0])
            y = tensor.softmax(y)
            hx = outputs[1]
            cx = outputs[2]
        sys.stdout.write(seed_text)
    else:
        y = tensor.Tensor((1, vocab_size), cuda)
        y.set_value(1.0 / vocab_size)

    for i in range(nsamples):
        y.to_host()
        prob = tensor.to_numpy(y)[0]
        if do_sample:
            cur = np.random.choice(vocab_size, 1, p=prob)[0]
        else:
            cur = np.argmax(prob)
        sys.stdout.write(idx_to_char[cur])
        x = np.zeros((1, vocab_size), dtype=np.float32)
        x[0, cur] = 1
        tx = tensor.from_numpy(x)
        tx.to_device(cuda)
        inputs = [tx, hx, cx]
        outputs = rnn.forward(False, inputs)
        y = dense.forward(False, outputs[0])
        y = tensor.softmax(y)
        hx = outputs[1]
        cx = outputs[2]
    print('')
예제 #21
0
def serve(agent, use_cpu, parameter_file, topk=5):
    if use_cpu:
        print('running with cpu')
        dev = device.get_default_device()
        layer.engine = 'singacpp'
    else:
        print("runing with gpu")
        dev = device.create_cuda_gpu()
    agent = agent

    print('Start intialization............')
    net, _ = model.create_net(is_training=False)
    net.load(parameter_file, use_pickle=True)
    net.to_device(dev)
    print('End intialization............')

    labels = np.loadtxt('synset_words.txt', str, delimiter='\t').tolist()
    labels.insert(0, 'empty background')
    while True:
        key, val = agent.pull()
        if key is None:
            time.sleep(0.1)
            continue
        msg_type = MsgType.parse(key)
        if msg_type.is_request():
            try:
                response = ""
                ratio = 0.875
                img = image_tool.load_img(val['image'])
                height, width = img.size[0], img.size[1]
                print(img.size)
                crop_h, crop_w = int(height * ratio), int(width * ratio)
                img = np.array(
                    image_tool.crop(img, (crop_h, crop_w), 'center').resize(
                        (299, 299))).astype(np.float32) / float(255)
                img -= 0.5
                img *= 2
                # img[:,:,[0,1,2]] = img[:,:,[2,1,0]]
                img = img.transpose((2, 0, 1))
                images = np.expand_dims(img, axis=0)
                x = tensor.from_numpy(images.astype(np.float32))
                x.to_device(dev)
                y = net.predict(x)
                prob = np.average(tensor.to_numpy(y), 0)
                # sort and reverse
                idx = np.argsort(-prob)[0:topk]
                for i in idx:
                    response += "%s:%s<br/>" % (labels[i], prob[i])
            except:
                traceback.print_exc()
                response = "Sorry, system error during prediction."
            agent.push(MsgType.kResponse, response)
        elif MsgType.kCommandStop.equal(msg_type):
            print('get stop command')
            agent.push(MsgType.kStatus, "success")
            break
        else:
            print('get unsupported message %s' % str(msg_type))
            agent.push(MsgType.kStatus, "Unknown command")
            break
        # while loop
    print("server stop")
예제 #22
0
def train_mnist_cnn(sgd,
                    max_epoch,
                    batch_size,
                    DIST=False,
                    data_partition=None,
                    gpu_num=None,
                    gpu_per_node=None,
                    nccl_id=None):

    # Prepare training and valadiation data
    train_x, train_y, test_x, test_y = load_dataset()
    IMG_SIZE = 28
    num_classes = 10
    train_y = to_categorical(train_y, num_classes)
    test_y = to_categorical(test_y, num_classes)

    # Normalization
    train_x = train_x / 255
    test_x = test_x / 255

    if DIST:
        # For Distributed GPU Training
        sgd = opt.DistOpt(sgd,
                          nccl_id=nccl_id,
                          gpu_num=gpu_num,
                          gpu_per_node=gpu_per_node)
        dev = device.create_cuda_gpu_on(sgd.rank_in_local)
        # Dataset partition for distributed training
        train_x, train_y = data_partition(train_x, train_y, sgd.rank_in_global,
                                          sgd.world_size)
        test_x, test_y = data_partition(test_x, test_y, sgd.rank_in_global,
                                        sgd.world_size)
        world_size = sgd.world_size
    else:
        # For Single GPU
        dev = device.create_cuda_gpu()
        world_size = 1

    # create model
    model = CNN()

    tx = tensor.Tensor((batch_size, 1, IMG_SIZE, IMG_SIZE), dev,
                       tensor.float32)
    ty = tensor.Tensor((batch_size, num_classes), dev, tensor.int32)
    num_train_batch = train_x.shape[0] // batch_size
    num_test_batch = test_x.shape[0] // batch_size
    idx = np.arange(train_x.shape[0], dtype=np.int32)

    if DIST:
        #Sychronize the initial parameters
        autograd.training = True
        x = np.random.randn(batch_size, 1, IMG_SIZE,
                            IMG_SIZE).astype(np.float32)
        y = np.zeros(shape=(batch_size, num_classes), dtype=np.int32)
        tx.copy_from_numpy(x)
        ty.copy_from_numpy(y)
        out = model.forward(tx)
        loss = autograd.softmax_cross_entropy(out, ty)
        for p, g in autograd.backward(loss):
            sychronize(p, sgd)

    # Training and Evaulation Loop
    for epoch in range(max_epoch):
        start_time = time.time()
        np.random.shuffle(idx)

        if ((DIST == False) or (sgd.rank_in_global == 0)):
            print('Starting Epoch %d:' % (epoch))

        # Training Phase
        autograd.training = True
        train_correct = np.zeros(shape=[1], dtype=np.float32)
        test_correct = np.zeros(shape=[1], dtype=np.float32)
        train_loss = np.zeros(shape=[1], dtype=np.float32)

        for b in range(num_train_batch):
            x = train_x[idx[b * batch_size:(b + 1) * batch_size]]
            x = augmentation(x, batch_size)
            y = train_y[idx[b * batch_size:(b + 1) * batch_size]]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            out = model.forward(tx)
            loss = autograd.softmax_cross_entropy(out, ty)
            train_correct += accuracy(tensor.to_numpy(out), y)
            train_loss += tensor.to_numpy(loss)[0]
            plist = []
            for p, g in autograd.backward(loss):
                if DIST:
                    sgd.all_reduce(g)
                plist.append((p, g))
            if DIST:
                sgd.wait()
            for p, g in plist:
                sgd.update(p, g)

        if DIST:
            # Reduce the Evaluation Accuracy and Loss from Multiple Devices
            reducer = tensor.Tensor((1, ), dev, tensor.float32)
            train_correct = reduce_variable(train_correct, sgd, reducer)
            train_loss = reduce_variable(train_loss, sgd, reducer)

        # Output the Training Loss and Accuracy
        if ((DIST == False) or (sgd.rank_in_global == 0)):
            print('Training loss = %f, training accuracy = %f' %
                  (train_loss, train_correct /
                   (num_train_batch * batch_size * world_size)),
                  flush=True)

        # Evaluation Phase
        autograd.training = False
        for b in range(num_test_batch):
            x = test_x[b * batch_size:(b + 1) * batch_size]
            y = test_y[b * batch_size:(b + 1) * batch_size]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            out_test = model.forward(tx)
            test_correct += accuracy(tensor.to_numpy(out_test), y)

        if DIST:
            # Reduce the Evaulation Accuracy from Multiple Devices
            test_correct = reduce_variable(test_correct, sgd, reducer)

        # Output the Evaluation Accuracy
        if ((DIST == False) or (sgd.rank_in_global == 0)):
            print('Evaluation accuracy = %f, Elapsed Time = %fs' %
                  (test_correct / (num_test_batch * batch_size * world_size),
                   time.time() - start_time),
                  flush=True)
예제 #23
0
def train(lr,
          ssfolder,
          meta_train,
          meta_test,
          data,
          net,
          mean,
          max_epoch,
          get_lr,
          weight_decay,
          input_shape,
          batch_size=100,
          use_cpu=False):

    print 'Start intialization............'
    if use_cpu:
        print 'Using CPU'
        dev = device.get_default_device()
    else:
        print 'Using GPU'
        dev = device.create_cuda_gpu()

    net.to_device(dev)
    opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay)
    for (p, specs) in zip(net.param_names(), net.param_specs()):
        opt.register(p, specs)

    dl_train = dt.MImageBatchIter(meta_train,
                                  batch_size,
                                  dt.load_from_img,
                                  shuffle=True,
                                  delimiter=' ',
                                  image_folder=data,
                                  capacity=10)
    dl_train.start()
    dl_test = dt.MImageBatchIter(meta_test,
                                 batch_size,
                                 dt.load_from_img,
                                 shuffle=False,
                                 delimiter=' ',
                                 image_folder=data,
                                 capacity=10)
    dl_test.start()
    num_train = dl_train.num_samples
    num_train_batch = num_train / batch_size
    num_test = dl_test.num_samples
    num_test_batch = num_test / batch_size
    remainder = num_test % batch_size

    best_acc = 0.0
    best_loss = 0.0
    nb_epoch_for_best_acc = 0
    tx = tensor.Tensor((batch_size, ) + input_shape, dev)
    ty = tensor.Tensor((batch_size, ), dev, core_pb2.kInt)
    for epoch in range(max_epoch):
        loss, acc = 0.0, 0.0
        print 'Epoch %d' % epoch
        for b in range(num_train_batch):
            t1 = time.time()
            x, y = dl_train.next()
            #print 'x.norm: ', np.linalg.norm(x)
            x -= mean
            t2 = time.time()
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            #print 'copy tx ty ok'
            grads, (l, a) = net.train(tx, ty)
            loss += l
            acc += a
            for (s, p, g) in zip(net.param_names(), net.param_values(), grads):
                opt.apply_with_lr(epoch, lr, g, p, str(s), b)
            t3 = time.time()
            # update progress bar
            info = datetime.datetime.now().strftime('%b-%d-%y %H:%M:%S') \
            + ', batch %d: training loss = %f, accuracy = %f, load_time = %.4f, training_time = %.4f' % (b, l, a, t2-t1, t3-t2)
            print info
            #utils.update_progress(b * 1.0 / num_train_batch, info)

        disp = datetime.datetime.now().strftime('%b-%d-%y %H:%M:%S') \
        + ', epoch %d: training loss = %f, training accuracy = %f, lr = %f' \
            % (epoch, loss / num_train_batch, acc / num_train_batch, lr)
        logging.info(disp)
        print disp

        if epoch % 50 == 0 and epoch > 0:
            try:
                net.save(os.path.join(ssfolder, 'model-%d' % epoch),
                         buffer_size=200)
            except Exception as e:
                print e
                net.save(os.path.join(ssfolder, 'model-%d' % epoch),
                         buffer_size=300)
            sinfo = datetime.datetime.now().strftime('%b-%d-%y %H:%M:%S') \
            + ', epoch %d: save model in %s' % (epoch, os.path.join(ssfolder, 'model-%d.bin' % epoch))
            logging.info(sinfo)
            print sinfo

        loss, acc = 0.0, 0.0
        #dominator = num_test_batch
        #print 'num_test_batch: ', num_test_batch
        for b in range(num_test_batch):
            x, y = dl_test.next()
            x -= mean
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            l, a = net.evaluate(tx, ty)
            loss += l * batch_size
            acc += a * batch_size
            #print datetime.datetime.now().strftime('%b-%d-%y %H:%M:%S') \
            #+ ' batch %d, test loss = %f, test accuracy = %f' % (b, l, a)

        if remainder > 0:
            #print 'remainder: ', remainder
            x, y = dl_test.next()
            x -= mean
            tx_rmd = tensor.Tensor((remainder, ) + input_shape, dev)
            ty_rmd = tensor.Tensor((remainder, ), dev, core_pb2.kInt)
            tx_rmd.copy_from_numpy(x[0:remainder, :, :])
            ty_rmd.copy_from_numpy(y[0:remainder, ])
            l, a = net.evaluate(tx_rmd, ty_rmd)
            loss += l * remainder
            acc += a * remainder
            #dominator += 1
            #print datetime.datetime.now().strftime('%b-%d-%y %H:%M:%S') \
            #+ ' test loss = %f, test accuracy = %f' % (l, a)
        acc /= num_test
        loss /= num_test
        disp = datetime.datetime.now().strftime('%b-%d-%y %H:%M:%S') \
        + ', epoch %d: test loss = %f, test accuracy = %f' % (epoch, loss, acc)
        logging.info(disp)
        print disp

        if acc > best_acc + 0.005:
            best_acc = acc
            best_loss = loss
            nb_epoch_for_best_acc = 0
        else:
            nb_epoch_for_best_acc += 1
            if nb_epoch_for_best_acc > 8:
                break
            elif nb_epoch_for_best_acc % 4 == 0:
                lr /= 10
                logging.info("Decay the learning rate from %f to %f" %
                             (lr * 10, lr))

    try:
        net.save(str(os.path.join(ssfolder, 'model')), buffer_size=200)
    except Exception as e:
        net.save(str(os.path.join(ssfolder, 'model')), buffer_size=300)
    sinfo = datetime.datetime.now().strftime('%b-%d-%y %H:%M:%S') \
    + ', save final model in %s' % os.path.join(ssfolder, 'model.bin')
    logging.info(sinfo)
    print sinfo

    dl_train.end()
    dl_test.end()
    return (best_acc, best_loss)
예제 #24
0
def train(data,
          max_epoch,
          hidden_size=100,
          seq_length=100,
          batch_size=16,
          num_stacks=1,
          dropout=0.5,
          model_path='model'):
    # SGD with L2 gradient normalization
    opt = optimizer.RMSProp(constraint=optimizer.L2Constraint(5))
    cuda = device.create_cuda_gpu()
    rnn = layer.LSTM(name='lstm',
                     hidden_size=hidden_size,
                     num_stacks=num_stacks,
                     dropout=dropout,
                     input_sample_shape=(data.vocab_size, ))
    rnn.to_device(cuda)
    print 'created rnn'
    rnn_w = rnn.param_values()[0]
    rnn_w.uniform(-0.08, 0.08)  # init all rnn parameters
    print 'rnn weight l1 = %f' % (rnn_w.l1())
    dense = layer.Dense('dense',
                        data.vocab_size,
                        input_sample_shape=(hidden_size, ))
    dense.to_device(cuda)
    dense_w = dense.param_values()[0]
    dense_b = dense.param_values()[1]
    print 'dense w ', dense_w.shape
    print 'dense b ', dense_b.shape
    initializer.uniform(dense_w, dense_w.shape[0], 0)
    print 'dense weight l1 = %f' % (dense_w.l1())
    dense_b.set_value(0)
    print 'dense b l1 = %f' % (dense_b.l1())

    g_dense_w = tensor.Tensor(dense_w.shape, cuda)
    g_dense_b = tensor.Tensor(dense_b.shape, cuda)

    lossfun = loss.SoftmaxCrossEntropy()
    for epoch in range(max_epoch):
        train_loss = 0
        for b in range(data.num_train_batch):
            batch = data.train_dat[b * batch_size:(b + 1) * batch_size]
            inputs, labels = convert(batch, batch_size, seq_length,
                                     data.vocab_size, cuda)
            inputs.append(tensor.Tensor())
            inputs.append(tensor.Tensor())

            outputs = rnn.forward(model_pb2.kTrain, inputs)[0:-2]
            grads = []
            batch_loss = 0
            g_dense_w.set_value(0.0)
            g_dense_b.set_value(0.0)
            for output, label in zip(outputs, labels):
                act = dense.forward(model_pb2.kTrain, output)
                lvalue = lossfun.forward(model_pb2.kTrain, act, label)
                batch_loss += lvalue.l1()
                grad = lossfun.backward()
                grad /= batch_size
                grad, gwb = dense.backward(model_pb2.kTrain, grad)
                grads.append(grad)
                g_dense_w += gwb[0]
                g_dense_b += gwb[1]
                # print output.l1(), act.l1()
            utils.update_progress(
                b * 1.0 / data.num_train_batch,
                'training loss = %f' % (batch_loss / seq_length))
            train_loss += batch_loss

            grads.append(tensor.Tensor())
            grads.append(tensor.Tensor())
            g_rnn_w = rnn.backward(model_pb2.kTrain, grads)[1][0]
            dense_w, dense_b = dense.param_values()
            opt.apply_with_lr(epoch, get_lr(epoch), g_rnn_w, rnn_w, 'rnnw')
            opt.apply_with_lr(epoch, get_lr(epoch), g_dense_w, dense_w,
                              'dense_w')
            opt.apply_with_lr(epoch, get_lr(epoch), g_dense_b, dense_b,
                              'dense_b')
        print '\nEpoch %d, train loss is %f' % \
            (epoch, train_loss / data.num_train_batch / seq_length)

        eval_loss = 0
        for b in range(data.num_test_batch):
            batch = data.val_dat[b * batch_size:(b + 1) * batch_size]
            inputs, labels = convert(batch, batch_size, seq_length,
                                     data.vocab_size, cuda)
            inputs.append(tensor.Tensor())
            inputs.append(tensor.Tensor())
            outputs = rnn.forward(model_pb2.kEval, inputs)[0:-2]
            for output, label in zip(outputs, labels):
                output = dense.forward(model_pb2.kEval, output)
                eval_loss += lossfun.forward(model_pb2.kEval, output,
                                             label).l1()
        print 'Epoch %d, evaluation loss is %f' % \
            (epoch, eval_loss / data.num_test_batch / seq_length)

        if (epoch + 1) % 30 == 0:
            # checkpoint the file model
            with open('%s_%d.bin' % (model_path, epoch), 'wb') as fd:
                print 'saving model to %s' % model_path
                d = {}
                for name, w in zip(['rnn_w', 'dense_w', 'dense_b'],
                                   [rnn_w, dense_w, dense_b]):
                    w.to_host()
                    d[name] = tensor.to_numpy(w)
                    w.to_device(cuda)
                d['idx_to_char'] = data.idx_to_char
                d['char_to_idx'] = data.char_to_idx
                d['hidden_size'] = hidden_size
                d['num_stacks'] = num_stacks
                d['dropout'] = dropout

                pickle.dump(d, fd)
예제 #25
0
def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
          num_stacks=1, dropout=0.5, model_path='model'):
    # SGD with L2 gradient normalization
    opt = optimizer.RMSProp(constraint=optimizer.L2Constraint(5))
    cuda = device.create_cuda_gpu()
    rnn = layer.LSTM(
        name='lstm',
        hidden_size=hidden_size,
        num_stacks=num_stacks,
        dropout=dropout,
        input_sample_shape=(
            data.vocab_size,
        ))
    rnn.to_device(cuda)
    print 'created rnn'
    rnn_w = rnn.param_values()[0]
    rnn_w.uniform(-0.08, 0.08)  # init all rnn parameters
    print 'rnn weight l1 = %f' % (rnn_w.l1())
    dense = layer.Dense(
        'dense',
        data.vocab_size,
        input_sample_shape=(
            hidden_size,
        ))
    dense.to_device(cuda)
    dense_w = dense.param_values()[0]
    dense_b = dense.param_values()[1]
    print 'dense w ', dense_w.shape
    print 'dense b ', dense_b.shape
    initializer.uniform(dense_w, dense_w.shape[0], 0)
    print 'dense weight l1 = %f' % (dense_w.l1())
    dense_b.set_value(0)
    print 'dense b l1 = %f' % (dense_b.l1())

    g_dense_w = tensor.Tensor(dense_w.shape, cuda)
    g_dense_b = tensor.Tensor(dense_b.shape, cuda)

    lossfun = loss.SoftmaxCrossEntropy()
    for epoch in range(max_epoch):
        train_loss = 0
        for b in range(data.num_train_batch):
            batch = data.train_dat[b * batch_size: (b + 1) * batch_size]
            inputs, labels = convert(batch, batch_size, seq_length,
                                     data.vocab_size, cuda)
            inputs.append(tensor.Tensor())
            inputs.append(tensor.Tensor())

            outputs = rnn.forward(model_pb2.kTrain, inputs)[0:-2]
            grads = []
            batch_loss = 0
            g_dense_w.set_value(0.0)
            g_dense_b.set_value(0.0)
            for output, label in zip(outputs, labels):
                act = dense.forward(model_pb2.kTrain, output)
                lvalue = lossfun.forward(model_pb2.kTrain, act, label)
                batch_loss += lvalue.l1()
                grad = lossfun.backward()
                grad /= batch_size
                grad, gwb = dense.backward(model_pb2.kTrain, grad)
                grads.append(grad)
                g_dense_w += gwb[0]
                g_dense_b += gwb[1]
                # print output.l1(), act.l1()
            utils.update_progress(
                b * 1.0 / data.num_train_batch, 'training loss = %f' %
                (batch_loss / seq_length))
            train_loss += batch_loss

            grads.append(tensor.Tensor())
            grads.append(tensor.Tensor())
            g_rnn_w = rnn.backward(model_pb2.kTrain, grads)[1][0]
            dense_w, dense_b = dense.param_values()
            opt.apply_with_lr(epoch, get_lr(epoch), g_rnn_w, rnn_w, 'rnnw')
            opt.apply_with_lr(
                epoch, get_lr(epoch),
                g_dense_w, dense_w, 'dense_w')
            opt.apply_with_lr(
                epoch, get_lr(epoch),
                g_dense_b, dense_b, 'dense_b')
        print '\nEpoch %d, train loss is %f' % \
            (epoch, train_loss / data.num_train_batch / seq_length)

        eval_loss = 0
        for b in range(data.num_test_batch):
            batch = data.val_dat[b * batch_size: (b + 1) * batch_size]
            inputs, labels = convert(batch, batch_size, seq_length,
                                     data.vocab_size, cuda)
            inputs.append(tensor.Tensor())
            inputs.append(tensor.Tensor())
            outputs = rnn.forward(model_pb2.kEval, inputs)[0:-2]
            for output, label in zip(outputs, labels):
                output = dense.forward(model_pb2.kEval, output)
                eval_loss += lossfun.forward(model_pb2.kEval,
                                             output, label).l1()
        print 'Epoch %d, evaluation loss is %f' % \
            (epoch, eval_loss / data.num_test_batch / seq_length)

        if (epoch + 1) % 30 == 0:
            # checkpoint the file model
            with open('%s_%d.bin' % (model_path, epoch), 'wb') as fd:
                print 'saving model to %s' % model_path
                d = {}
                for name, w in zip(
                        ['rnn_w', 'dense_w', 'dense_b'],
                        [rnn_w, dense_w, dense_b]):
                    w.to_host()
                    d[name] = tensor.to_numpy(w)
                    w.to_device(cuda)
                d['idx_to_char'] = data.idx_to_char
                d['char_to_idx'] = data.char_to_idx
                d['hidden_size'] = hidden_size
                d['num_stacks'] = num_stacks
                d['dropout'] = dropout

                pickle.dump(d, fd)
예제 #26
0
    def train(self, data, max_epoch, model_path='model'):
        if self.use_cpu:
            print 'Using CPU'
            self.dev = device.get_default_device()
        else:
            print 'Using GPU'
            self.dev = device.create_cuda_gpu()

        self.net.to_device(self.dev)
        opt = optimizer.SGD(momentum=0.9, weight_decay=1e-4)
        # opt = optimizer.RMSProp(constraint=optimizer.L2Constraint(5))
        for (p, n) in zip(self.net.param_values(), self.net.param_names()):
            if 'var' in n:
                p.set_value(1.0)
            elif 'gamma' in n:
                p.uniform(0, 1)
            elif 'weight' in n:
                p.gaussian(0, 0.01)
            else:
                p.set_value(0.0)
            print n, p.shape, p.l1()

        tx = tensor.Tensor((self.batch_size, self.maxlen, self.vocab_size),
                           self.dev)
        ty = tensor.Tensor((self.batch_size, ), self.dev, core_pb2.kInt)
        train_x, train_y, test_x, test_y = data
        num_train_batch = train_x.shape[0] / self.batch_size
        num_test_batch = test_x.shape[0] / self.batch_size
        idx = np.arange(train_x.shape[0], dtype=np.int32)
        for epoch in range(max_epoch):
            np.random.shuffle(idx)
            loss, acc = 0.0, 0.0
            print '\nEpoch %d' % epoch
            start = time()
            for b in range(num_train_batch):
                batch_loss, batch_acc = 0.0, 0.0
                grads = []
                x = train_x[
                    idx[b * self.batch_size:(b + 1) *
                        self.batch_size]]  # x.shape = (batch_size, maxlen)
                y = train_y[idx[b * self.batch_size:(b + 1) *
                                self.batch_size]]  # y.shape = (batch_size,)
                # for input as (batch_size, max_len, vocab_size)
                sam_arrs = convert_samples(x, x.shape[1], self.vocab_size,
                                           self.dev)
                tx.copy_from_numpy(sam_arrs)
                ty.copy_from_numpy(np.array(y, dtype='int32'))
                grads, (batch_loss, batch_acc) = self.net.train(tx, ty)
                for (s, p, g) in zip(self.net.param_names(),
                                     self.net.param_values(), grads):
                    opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s), b)
                # update progress bar
                utils.update_progress(
                    b * 1.0 / num_train_batch,
                    'training loss = %f, accuracy = %f' %
                    (batch_loss, batch_acc))
                loss += batch_loss
                acc += batch_acc

            print "\ntraining time = ", time() - start
            info = 'training loss = %f, training accuracy = %f, lr = %f' \
                   % (loss / num_train_batch, acc / num_train_batch, get_lr(epoch))
            print info

            loss, acc = 0.0, 0.0
            start = time()
            for b in range(num_test_batch):
                batch_loss, batch_acc = 0.0, 0.0
                x = test_x[b * self.batch_size:(b + 1) *
                           self.batch_size]  # x.shape = (batch_size, maxlen)
                y = test_y[b * self.batch_size:(b + 1) * self.batch_size]
                sam_arrs = convert_samples(x, x.shape[1], self.vocab_size,
                                           self.dev)
                tx.copy_from_numpy(sam_arrs)
                ty.copy_from_numpy(np.array(y, dtype='int32'))
                grads, (batch_loss, batch_acc) = self.net.train(tx, ty)
                loss += batch_loss
                acc += batch_acc

            print "evaluation time = ", time() - start
            print 'test loss = %f, test accuracy = %f \n' \
                  % (loss / num_test_batch, acc / num_test_batch)

            if (epoch % 2) == 1 or epoch + 1 == max_epoch:
                # checkpoint the file model
                with open('%s_%d.bin' % (model_path, epoch), 'wb') as fd:
                    print 'saving model to %s_%d.bin' % (model_path, epoch)
                    d = {}
                    for name, w in zip(self.net.param_names(),
                                       self.net.param_values()):
                        w.to_host()
                        d[name] = tensor.to_numpy(w)
                        w.to_device(self.dev)
                    pickle.dump(d, fd)
예제 #27
0
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
# =============================================================================
import sys
import os
import unittest
import numpy as np

sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))

import singa.tensor as tensor
import singa.optimizer as opt
import singa.device as device

cuda = device.create_cuda_gpu()


class TestOptimizer(unittest.TestCase):

    def setUp(self):
        self.np_W = np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32)
        self.W = tensor.from_numpy(self.np_W)
        self.np_g = np.array([0.1, 0.3, 0.1, 0.2], dtype=np.float32)
        self.g = tensor.from_numpy(self.np_g)

    def to_cuda(self):
        self.W.to_device(cuda)
        self.g.to_device(cuda)

    def test_sgd(self):
예제 #28
0
import unittest
from builtins import str

from singa import tensor
from singa import singa_wrap as singa
from singa import device
from singa import autograd

autograd.training = True

CTensor = singa.Tensor

gpu_dev = device.create_cuda_gpu()
cpu_dev = device.get_default_device()

dy = CTensor([2, 1, 2, 2])
singa.Gaussian(0.0, 1.0, dy)


def _tuple_to_string(t):
    lt = [str(x) for x in t]
    return '(' + ', '.join(lt) + ')'


class TestPythonOperation(unittest.TestCase):

    def check_shape(self, actual, expect):
        self.assertEqual(actual, expect, 'shape mismatch, actual shape is %s'
                         ' exepcted is %s' % (_tuple_to_string(actual),
                                              _tuple_to_string(expect))
                         )
예제 #29
0
파일: sample.py 프로젝트: yutong010/singa
def sample(model_path, nsamples=100, seed_text='', do_sample=True):
    with open(model_path, 'rb') as fd:
        d = pickle.load(fd)
        rnn_w = tensor.from_numpy(d['rnn_w'])
        idx_to_char = d['idx_to_char']
        char_to_idx = d['char_to_idx']
        vocab_size = len(idx_to_char)
        dense_w = tensor.from_numpy(d['dense_w'])
        dense_b = tensor.from_numpy(d['dense_b'])
        hidden_size = d['hidden_size']
        num_stacks = d['num_stacks']
        dropout = d['dropout']

    cuda = device.create_cuda_gpu()
    rnn = layer.LSTM(name='lstm',
                     hidden_size=hidden_size,
                     num_stacks=num_stacks,
                     dropout=dropout,
                     input_sample_shape=(len(idx_to_char), ))
    rnn.to_device(cuda)
    rnn.param_values()[0].copy_data(rnn_w)
    dense = layer.Dense('dense',
                        vocab_size,
                        input_sample_shape=(hidden_size, ))
    dense.to_device(cuda)
    dense.param_values()[0].copy_data(dense_w)
    dense.param_values()[1].copy_data(dense_b)
    hx = tensor.Tensor((num_stacks, 1, hidden_size), cuda)
    cx = tensor.Tensor((num_stacks, 1, hidden_size), cuda)
    hx.set_value(0.0)
    cx.set_value(0.0)
    if len(seed_text) > 0:
        for c in seed_text:
            x = np.zeros((1, vocab_size), dtype=np.float32)
            x[0, char_to_idx[c]] = 1
            tx = tensor.from_numpy(x)
            tx.to_device(cuda)
            inputs = [tx, hx, cx]
            outputs = rnn.forward(False, inputs)
            y = dense.forward(False, outputs[0])
            y = tensor.softmax(y)
            hx = outputs[1]
            cx = outputs[2]
        sys.stdout.write(seed_text)
    else:
        y = tensor.Tensor((1, vocab_size), cuda)
        y.set_value(1.0 / vocab_size)

    for i in range(nsamples):
        y.to_host()
        prob = tensor.to_numpy(y)[0]
        if do_sample:
            cur = np.random.choice(vocab_size, 1, p=prob)[0]
        else:
            cur = np.argmax(prob)
        sys.stdout.write(idx_to_char[cur])
        x = np.zeros((1, vocab_size), dtype=np.float32)
        x[0, cur] = 1
        tx = tensor.from_numpy(x)
        tx.to_device(cuda)
        inputs = [tx, hx, cx]
        outputs = rnn.forward(False, inputs)
        y = dense.forward(False, outputs[0])
        y = tensor.softmax(y)
        hx = outputs[1]
        cx = outputs[2]
    print('')
예제 #30
0
def train_cifar10(sgd, max_epoch, batch_size, DIST=False, data_partition=None, gpu_num=None, gpu_per_node=None, nccl_id=None, partial_update=False):

    train_x, train_y = load_train_data()
    test_x, test_y = load_test_data()
    train_x, test_x = normalize_for_resnet(train_x, test_x)
    IMG_SIZE = 224
    num_classes=10

    if DIST:
        # For Distributed GPU Training
        sgd = opt.DistOpt(sgd, nccl_id=nccl_id, gpu_num=gpu_num, gpu_per_node=gpu_per_node)
        dev = device.create_cuda_gpu_on(sgd.rank_in_local)
        # Dataset partition for distributed training
        train_x, train_y = data_partition(train_x, train_y, sgd.rank_in_global, sgd.world_size)
        test_x, test_y = data_partition(test_x, test_y, sgd.rank_in_global, sgd.world_size)
        world_size = sgd.world_size
    else:
        # For Single GPU
        dev = device.create_cuda_gpu()
        world_size = 1

    from resnet import resnet50
    model = resnet50(num_classes=num_classes)

    tx = tensor.Tensor((batch_size, 3, IMG_SIZE, IMG_SIZE), dev, tensor.float32)
    ty = tensor.Tensor((batch_size,), dev, tensor.int32)
    num_train_batch = train_x.shape[0] // batch_size
    num_test_batch = test_x.shape[0] // batch_size
    idx = np.arange(train_x.shape[0], dtype=np.int32)

    if DIST:
        #Sychronize the initial parameters
        autograd.training = True
        x = np.random.randn(batch_size, 3, IMG_SIZE, IMG_SIZE).astype(np.float32)
        y = np.zeros( shape=(batch_size,), dtype=np.int32)
        tx.copy_from_numpy(x)
        ty.copy_from_numpy(y)
        out = model(tx)
        loss = autograd.softmax_cross_entropy(out, ty)
        param = []
        for p, _ in autograd.backward(loss):
            sychronize(p, sgd)
            param.append(p)

    for epoch in range(max_epoch):
        start_time = time.time()
        np.random.shuffle(idx)

        if ((DIST == False) or (sgd.rank_in_global == 0)):
            print('Starting Epoch %d:' % (epoch))

        #Training Phase
        autograd.training = True
        train_correct = np.zeros(shape=[1],dtype=np.float32)
        test_correct = np.zeros(shape=[1],dtype=np.float32)
        train_loss = np.zeros(shape=[1],dtype=np.float32)

        for b in range(num_train_batch):
            x = train_x[idx[b * batch_size: (b + 1) * batch_size]]
            x = augmentation(x, batch_size)
            x = resize_dataset(x,IMG_SIZE)
            y = train_y[idx[b * batch_size: (b + 1) * batch_size]]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            out = model(tx)
            loss = autograd.softmax_cross_entropy(out, ty)               
            train_correct += accuracy(tensor.to_numpy(out), to_categorical(y, num_classes)).astype(np.float32)
            train_loss += tensor.to_numpy(loss)[0]
            if not partial_update:
                sgd.backward_and_update(loss)
            else:
                sgd.backward_and_partial_update(loss)

        if DIST:
            # Reduce the Evaluation Accuracy and Loss from Multiple Devices
            reducer = tensor.Tensor((1,), dev, tensor.float32)
            train_correct = reduce_variable(train_correct, sgd, reducer)
            train_loss = reduce_variable(train_loss, sgd, reducer)

        # Output the Training Loss and Accuracy
        if ((DIST == False) or (sgd.rank_in_global == 0)):
            print('Training loss = %f, training accuracy = %f' % (train_loss, train_correct / (num_train_batch*batch_size*world_size)), flush=True)

        if partial_update:
            # sychronize parameters before evaluation phase
            for p in param:
                sychronize(p, sgd)

        #Evaulation Phase
        autograd.training = False
        for b in range(num_test_batch):
            x = test_x[b * batch_size: (b + 1) * batch_size]
            x = resize_dataset(x,IMG_SIZE)
            y = test_y[b * batch_size: (b + 1) * batch_size]
            tx.copy_from_numpy(x)
            ty.copy_from_numpy(y)
            out_test = model(tx)
            test_correct += accuracy(tensor.to_numpy(out_test), to_categorical(y, num_classes))

        if DIST:
            # Reduce the Evaulation Accuracy from Multiple Devices
            test_correct = reduce_variable(test_correct, sgd, reducer)

        # Output the Evaluation Accuracy
        if ((DIST == False) or (sgd.rank_in_global == 0)):
            print('Evaluation accuracy = %f, Elapsed Time = %fs' % (test_correct / (num_test_batch*batch_size*world_size), time.time() - start_time ), flush=True)
예제 #31
0
    model_path = os.path.join(download_dir, 'model', 'test_shufflenetv2',
                              'model.onnx')

    logging.info("onnx load model...")
    download_model(url)
    onnx_model = onnx.load(model_path)

    # inference
    logging.info("preprocessing...")
    img, labels = get_image_labe()
    img = preprocess(img)
    # sg_ir = sonnx.prepare(onnx_model) # run without graph
    # y = sg_ir.run([img])

    logging.info("model compling...")
    dev = device.create_cuda_gpu()
    x = tensor.PlaceHolder(img.shape, device=dev)
    model = MyModel(onnx_model)
    model.compile([x], is_train=False, use_graph=True, sequential=True)

    # verifty the test
    # from utils import load_dataset
    # inputs, ref_outputs = load_dataset(os.path.join('/tmp', 'model', 'test_shufflenetv2',
    #                           'model.onnx'))
    # x_batch = tensor.Tensor(device=dev, data=inputs[0])
    # outputs = sg_ir.run([x_batch])
    # for ref_o, o in zip(ref_outputs, outputs):
    #     np.testing.assert_almost_equal(ref_o, tensor.to_numpy(o), 4)

    logging.info("model running...")
    x = tensor.Tensor(device=dev, data=img)
예제 #32
0
def serve(agent, net, use_cpu, parameter_file, topk=5):
    if use_cpu:
        print('running with cpu')
        dev = device.get_default_device()
        layer.engine = 'singacpp'
    else:
        print("runing with gpu")
        dev = device.create_cuda_gpu()
    agent = agent

    print('Start intialization............')
    # fix the bug when creating net
    if net == 'v3':
        model = inception_v3
    else:
        model = inception_v4
    net, _ = model.create_net(is_training=False)
    net.load(parameter_file, use_pickle=True)
    net.to_device(dev)
    print('End intialization............')

    labels = np.loadtxt('synset_words.txt', str, delimiter='\t').tolist()
    labels.insert(0, 'empty background')
    while True:
        key, val = agent.pull()
        if key is None:
            time.sleep(0.1)
            continue
        msg_type = MsgType.parse(key)
        if msg_type.is_request():
            try:
                response = ""
                ratio = 0.875
                img = image_tool.load_img(val['image'])
                height, width = img.size[0], img.size[1]
                print(img.size)
                crop_h, crop_w = int(height * ratio), int(width * ratio)
                img = np.array(image_tool.crop(img,\
                      (crop_h, crop_w), 'center').\
                      resize((299, 299))).astype(np.float32) / float(255)
                img -= 0.5
                img *= 2
                # img[:,:,[0,1,2]] = img[:,:,[2,1,0]]
                img = img.transpose((2, 0, 1))
                images = np.expand_dims(img, axis=0)
                x = tensor.from_numpy(images.astype(np.float32))
                x.to_device(dev)
                y = net.predict(x)
                prob = np.average(tensor.to_numpy(y), 0)
                # sort and reverse
                idx = np.argsort(-prob)[0:topk]
                for i in idx:
                    response += "%s:%s<br/>" % (labels[i], prob[i])
            except:
                traceback.print_exc()
                response = "Sorry, system error during prediction."
            agent.push(MsgType.kResponse, response)
        elif MsgType.kCommandStop.equal(msg_type):
                print('get stop command')
                agent.push(MsgType.kStatus, "success")
                break
        else:
            print('get unsupported message %s' % str(msg_type))
            agent.push(MsgType.kStatus, "Unknown command")
            break
        # while loop
    print("server stop")