Esempio n. 1
0
def infer(topology, data_dir, model_path, word_dict_path, label_dict_path,
          batch_size):
    def _infer_a_batch(inferer, test_batch, ids_2_word, ids_2_label):
        probs = inferer.infer(input=test_batch, field=["value"])
        assert len(probs) == len(test_batch)
        for word_ids, prob in zip(test_batch, probs):
            word_text = " ".join([ids_2_word[id] for id in word_ids[0]])
            print("%s\t%s\t%s" % (ids_2_label[prob.argmax()],
                                  " ".join(["{:0.4f}".format(p)
                                            for p in prob]), word_text))

    logger.info("begin to predict...")
    use_default_data = (data_dir is None)

    if use_default_data:
        word_dict = paddle.dataset.imdb.word_dict()
        word_reverse_dict = dict((value, key)
                                 for key, value in word_dict.iteritems())
        label_reverse_dict = {0: "positive", 1: "negative"}
        test_reader = paddle.dataset.imdb.test(word_dict)
    else:
        assert os.path.exists(
            word_dict_path), "the word dictionary file does not exist"
        assert os.path.exists(
            label_dict_path), "the label dictionary file does not exist"

        word_dict = load_dict(word_dict_path)
        word_reverse_dict = load_reverse_dict(word_dict_path)
        label_reverse_dict = load_reverse_dict(label_dict_path)

        test_reader = reader.test_reader(data_dir, word_dict)()

    dict_dim = len(word_dict)
    class_num = len(label_reverse_dict)
    prob_layer = topology(dict_dim, class_num, is_infer=True)

    # initialize PaddlePaddle
    paddle.init(use_gpu=False, trainer_count=1)

    # load the trained models
    parameters = paddle.parameters.Parameters.from_tar(
        gzip.open(model_path, "r"))
    inferer = paddle.inference.Inference(
        output_layer=prob_layer, parameters=parameters)

    test_batch = []
    for idx, item in enumerate(test_reader):
        test_batch.append([item[0]])
        if len(test_batch) == batch_size:
            _infer_a_batch(inferer, test_batch, word_reverse_dict,
                           label_reverse_dict)
            test_batch = []

    if len(test_batch):
        _infer_a_batch(inferer, test_batch, word_reverse_dict,
                       label_reverse_dict)
        test_batch = []
Esempio n. 2
0
def infer(args):
    id2word_dict = reader.load_dict(args.word_dict_path)
    word2id_dict = reader.load_reverse_dict(args.word_dict_path)

    id2label_dict = reader.load_dict(args.label_dict_path)
    label2id_dict = reader.load_reverse_dict(args.label_dict_path)
    q2b_dict = reader.load_dict(args.word_rep_dict_path)
    test_data = paddle.batch(reader.test_reader(args.test_data_dir,
                                                word2id_dict, label2id_dict,
                                                q2b_dict),
                             batch_size=args.batch_size)
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)

    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        [inference_program, feed_target_names,
         fetch_targets] = fluid.io.load_inference_model(args.model_path, exe)
        for data in test_data():
            full_out_str = ""
            word_idx = to_lodtensor([x[0] for x in data], place)
            word_list = [x[1] for x in data]
            (crf_decode, ) = exe.run(inference_program,
                                     feed={"word": word_idx},
                                     fetch_list=fetch_targets,
                                     return_numpy=False)
            lod_info = (crf_decode.lod())[0]
            np_data = np.array(crf_decode)
            assert len(data) == len(lod_info) - 1
            for sen_index in xrange(len(data)):
                assert len(
                    data[sen_index][0]) == lod_info[sen_index +
                                                    1] - lod_info[sen_index]
                word_index = 0
                outstr = ""
                cur_full_word = ""
                cur_full_tag = ""
                words = word_list[sen_index]
                for tag_index in xrange(lod_info[sen_index],
                                        lod_info[sen_index + 1]):
                    cur_word = words[word_index]
                    cur_tag = id2label_dict[str(np_data[tag_index][0])]
                    if cur_tag.endswith("-B") or cur_tag.endswith("O"):
                        if len(cur_full_word) != 0:
                            outstr += cur_full_word.encode(
                                'utf8') + "/" + cur_full_tag.encode(
                                    'utf8') + " "
                        cur_full_word = cur_word
                        cur_full_tag = get_real_tag(cur_tag)
                    else:
                        cur_full_word += cur_word
                    word_index += 1
                outstr += cur_full_word.encode(
                    'utf8') + "/" + cur_full_tag.encode('utf8') + " "
                outstr = outstr.strip()
                full_out_str += outstr + "\n"
            print full_out_str.strip()
Esempio n. 3
0
def main(args):
    input, output, data_size = construct_sample(args)
    places = fluid.cuda_places() if args.use_cuda else fluid.cpu_places()
    exe = Executor(places[0])
    [inference_program, feed_target_names,
     fetch_targets] = (fluid.io.load_inference_model(dirname="./model",
                                                     executor=exe))
    feat, lod = test_reader()
    result = exe.run(inference_program,
                     feed={
                         feed_target_names[0]: feat,
                         feed_target_names[1]: lod
                     },
                     fetch_list=fetch_targets)
    print(result[0].shape)
    output_final_result(result[0])
Esempio n. 4
0
def test(model_path, vocabs_path, test_data_path, batch_size, beam_size,
         save_file, use_gpu):
    assert os.path.exists(model_path), "The given model does not exist."
    assert os.path.exists(
        test_data_path), "The given test data does not exist."

    with gzip.open(model_path, "r") as f:
        parameters = paddle.parameters.Parameters.from_tar(f)

    id_to_text = {}
    assert os.path.exists(
        vocabs_path), "The given word dictionary path does not exist."
    with open(vocabs_path, "r") as f:
        for i, line in enumerate(f):
            id_to_text[i] = line.strip().split("\t")[0]

    paddle.init(use_gpu=use_gpu, trainer_count=1)
    beam_gen = encoder_decoder_network(word_count=len(id_to_text),
                                       emb_dim=512,
                                       encoder_depth=3,
                                       encoder_hidden_dim=512,
                                       decoder_depth=3,
                                       decoder_hidden_dim=512,
                                       bos_id=0,
                                       eos_id=1,
                                       max_length=50,
                                       beam_size=beam_size,
                                       is_generating=True)

    inferer = paddle.inference.Inference(output_layer=beam_gen,
                                         parameters=parameters)

    test_batch = []
    with open(save_file, "w") as fout:
        for idx, item in enumerate(
                reader.test_reader(test_data_path, vocabs_path)()):
            test_batch.append([item])
            if len(test_batch) == batch_size:
                infer_a_batch(inferer, test_batch, beam_size, id_to_text, fout)
                test_batch = []

        if len(test_batch):
            infer_a_batch(inferer, test_batch, beam_size, id_to_text, fout)
            test_batch = []
    end = time.clock()
    print('time:{}'.format(end - start))
Esempio n. 5
0
cost = fluid.layers.square_error_cost(input=model, label=label)
avg_cost = fluid.layers.mean(cost)

# 获取训练和测试程序
test_program = fluid.default_main_program().clone(for_test=True)

# 定义优化方法
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=0.001)

opts = optimizer.minimize(avg_cost)

# 获取自定义数据
train_reader = paddle.batch(reader=reader.train_reader(train_list, crop_size,
                                                       resize_size),
                            batch_size=32)
test_reader = paddle.batch(reader=reader.test_reader(test_list, crop_size),
                           batch_size=32)

# 定义执行器
place = fluid.CUDAPlace(0)  # place = fluid.CPUPlace()
exe = fluid.Executor(place)
# 进行参数初始化
exe.run(fluid.default_startup_program())

# 定义输入数据维度
feeder = fluid.DataFeeder(place=place, feed_list=[image, label])

# 训练
all_test_cost = []
for pass_id in range(100):
    # 进行训练
cost = fluid.layers.cross_entropy(input=model, label=label)
avg_cost = fluid.layers.mean(cost)
acc = fluid.layers.accuracy(input=model, label=label)

# 获取训练和测试程序
test_program = fluid.default_main_program().clone(for_test=True)

# 定义优化方法
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=1e-3,
                                          regularization=fluid.regularizer.L2Decay(
                                              regularization_coeff=0.001))
opts = optimizer.minimize(avg_cost)

# 获取自定义数据
train_reader = reader.train_reader('dataset/train', batch_size=32)
test_reader = reader.test_reader('dataset/test', batch_size=32)

# 定义一个使用GPU的执行器
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
# 进行参数初始化
exe.run(fluid.default_startup_program())

# 加载初始化模型
if init_model:
    fluid.load(program=fluid.default_main_program(),
               model_path=init_model,
               executor=exe,
               var_list=fluid.io.get_program_parameter(fluid.default_main_program()))
    print("Init model from: %s." % init_model)
Esempio n. 7
0
acc = fluid.layers.accuracy(input=model, label=label)

# 获取训练和测试程序
test_program = fluid.default_main_program().clone(for_test=True)

# 定义优化方法
l2 = fluid.regularizer.L2DecayRegularizer(1e-4)
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=1e-3,
                                          regularization=l2)
opts = optimizer.minimize(avg_cost)

# 获取自定义数据
train_reader = paddle.batch(reader=reader.train_reader('images/train.list',
                                                       crop_size, resize_size),
                            batch_size=32)
test_reader = paddle.batch(reader=reader.test_reader('images/test.list',
                                                     crop_size),
                           batch_size=32)

# 定义一个使用GPU的执行器
place = fluid.CUDAPlace(0)
# place = fluid.CPUPlace()
exe = fluid.Executor(place)
# 进行参数初始化
exe.run(fluid.default_startup_program())

# 定义输入数据维度
feeder = fluid.DataFeeder(place=place, feed_list=[image, label])

# 训练100次
for pass_id in range(100):
    # 进行训练
Esempio n. 8
0
def main():
    # parse the argument
    parser = argparse.ArgumentParser()
    parser.add_argument('-m',
                        '--model',
                        help='The model for image classification',
                        choices=[
                            'alexnet', 'vgg13', 'vgg16', 'vgg19', 'resnet',
                            'googlenet', 'inception-resnet-v2', 'inception_v4',
                            'xception'
                        ])
    parser.add_argument(
        '-r',
        '--retrain_file',
        type=str,
        default='',
        help="The model file to retrain, none is for train from scratch")
    args = parser.parse_args()

    # PaddlePaddle init
    paddle.init(use_gpu=True, trainer_count=1)

    image = paddle.layer.data(name="image",
                              type=paddle.data_type.dense_vector(DATA_DIM))
    lbl = paddle.layer.data(name="label",
                            type=paddle.data_type.integer_value(CLASS_DIM))

    extra_layers = None
    learning_rate = 0.0001
    if args.model == 'alexnet':
        out = alexnet.alexnet(image, class_dim=CLASS_DIM)
    elif args.model == 'vgg13':
        out = vgg.vgg13(image, class_dim=CLASS_DIM)
    elif args.model == 'vgg16':
        out = vgg.vgg16(image, class_dim=CLASS_DIM)
    elif args.model == 'vgg19':
        out = vgg.vgg19(image, class_dim=CLASS_DIM)
    elif args.model == 'resnet':
        conv, pool, out = resnet.resnet_imagenet(image, class_dim=CLASS_DIM)
        learning_rate = 0.1
    elif args.model == 'googlenet':
        out, out1, out2 = googlenet.googlenet(image, class_dim=CLASS_DIM)
        loss1 = paddle.layer.cross_entropy_cost(input=out1,
                                                label=lbl,
                                                coeff=0.3)
        paddle.evaluator.classification_error(input=out1, label=lbl)
        loss2 = paddle.layer.cross_entropy_cost(input=out2,
                                                label=lbl,
                                                coeff=0.3)
        paddle.evaluator.classification_error(input=out2, label=lbl)
        extra_layers = [loss1, loss2]
    elif args.model == 'inception-resnet-v2':
        assert DATA_DIM == 3 * 331 * 331 or DATA_DIM == 3 * 299 * 299
        out = inception_resnet_v2.inception_resnet_v2(image,
                                                      class_dim=CLASS_DIM,
                                                      dropout_rate=0.5,
                                                      data_dim=DATA_DIM)
    elif args.model == 'inception_v4':
        conv, pool, out = inception_v4.inception_v4(image, class_dim=CLASS_DIM)
    elif args.model == 'xception':
        out = xception.xception(image, class_dim=CLASS_DIM)

    cost = paddle.layer.classification_cost(input=out, label=lbl)

    # Create parameters
    parameters = paddle.parameters.create(cost)
    for k, v in parameters.__param_conf__.items():
        print(" config key {0}\t\t\tval{1}".format(k, v))
    print("-" * 50)
    #print(parameters.__param_conf__[0])

    if args.retrain_file is not None and '' != args.retrain_file:
        print("restore parameters from {0}".format(args.retrain_file))
        exclude_params = [
            param for param in parameters.names()
            if param.startswith('___fc_layer_0__')
        ]
        parameters.init_from_tar(gzip.open(args.retrain_file), exclude_params)

    # Create optimizer
    optimizer = paddle.optimizer.Momentum(
        momentum=0.9,
        regularization=paddle.optimizer.L2Regularization(rate=0.0005 *
                                                         BATCH_SIZE),
        learning_rate=learning_rate / BATCH_SIZE,
        learning_rate_decay_a=0.1,
        learning_rate_decay_b=128000 * 35,
        learning_rate_schedule="discexp",
    )

    train_reader = paddle.batch(
        paddle.reader.shuffle(
            # flowers.train(),
            # To use other data, replace the above line with:
            reader.train_reader('valid_train0.lst'),
            buf_size=2048),
        batch_size=BATCH_SIZE)
    test_reader = paddle.batch(
        # flowers.valid(),
        # To use other data, replace the above line with:
        reader.test_reader('valid_val.lst'),
        batch_size=BATCH_SIZE)

    # Create trainer
    trainer = paddle.trainer.SGD(cost=cost,
                                 parameters=parameters,
                                 update_equation=optimizer,
                                 extra_layers=extra_layers)

    # End batch and end pass event handler
    def event_handler(event):
        global step
        global start
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 10 == 0:
                print "\nPass %d, Batch %d, Cost %f, %s, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics,
                    time.time() - start)
                start = time.time()
                loss_scalar.add_record(step, event.cost)
                acc_scalar.add_record(
                    step, 1 - event.metrics['classification_error_evaluator'])
                start = time.time()
                step += 1
            if event.batch_id % 100 == 0:
                with gzip.open('params_pass_%d.tar.gz' % event.pass_id,
                               'w') as f:
                    trainer.save_parameter_to_tar(f)

        if isinstance(event, paddle.event.EndPass):
            with gzip.open('params_pass_%d.tar.gz' % event.pass_id, 'w') as f:
                trainer.save_parameter_to_tar(f)
            result = trainer.test(reader=test_reader)
            print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)

    trainer.train(reader=train_reader,
                  num_passes=200,
                  event_handler=event_handler)
Esempio n. 9
0
cost = paddle.layer.classification_cost(input=out, label=lbl)

# Create parameters
# parameters = paddle.parameters.create(cost)
with gzip.open('/book/working/models/params_pass_47.tar.gz', 'r') as f:
    parameters = paddle.parameters.Parameters.from_tar(f)

# Read training data
train_reader = paddle.batch(
    paddle.reader.shuffle(
        reader.train_reader('/book/working/data/train.list', buffered_size=1024),
        buf_size=20000),
    batch_size=BATCH_SIZE)
# Read testing data
test_reader = paddle.batch(
    reader.test_reader('/book/working/data/val.list', buffered_size=1024),
    batch_size=BATCH_SIZE)

# End batch and end pass event handler
def event_handler(event):
    # Report result of batch.
    if isinstance(event, paddle.event.EndIteration):
        print "\nPass %d, Batch %d, Cost %f, %s" % (
            event.pass_id, event.batch_id, event.cost, event.metrics)
    # Report result of pass.
    if isinstance(event, paddle.event.EndPass):
        if (event.pass_id + 1) % 4 == 0:
            # Save parameters
            with gzip.open('/book/working/models/params_pass_%d.tar.gz' % event.pass_id, 'w') as f:
                parameters.to_tar(f)
            # Report validation accuracy
Esempio n. 10
0
# 获取训练和测试程序
test_program = fluid.default_main_program().clone(for_test=True)

# 定义优化方法
l2 = fluid.regularizer.L2DecayRegularizer(1e-4)
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=1e-3,
                                          regularization=l2)
opts = optimizer.minimize(avg_cost)

# 获取自定义数据
train_reader = paddle.batch(
    reader=reader.train_reader('images/train.list',
                               crop_size, resize_size), batch_size=32)
test_reader = paddle.batch(
    reader=reader.test_reader('images/test.list',
                              crop_size), batch_size=32)

# 定义一个使用GPU的执行器
place = fluid.CUDAPlace(0)
# place = fluid.CPUPlace()
exe = fluid.Executor(place)
# 进行参数初始化
exe.run(fluid.default_startup_program())

# 定义输入数据维度
feeder = fluid.DataFeeder(place=place, feed_list=[image, label])

# 训练100次
for pass_id in range(100):
    # 进行训练
    for batch_id, data in enumerate(train_reader()):