Exemple #1
0
    def _prepare(self):
        # prepare reader
        self.test_reader = paddle.batch(
                           reader=self.test_reader.create_reader(),
                           batch_size=self.args.batch_size)

        # init paddle
        paddle.init(use_gpu=self.args.use_gpu,
                    trainer_count=self.args.trainer_count)

        # create parameters and trainer
        model_out = self.model()
        out_names = [x.name for x in model_out] \
                if isinstance(model_out, collections.Iterable) \
                else model_out.name
        self.logger.info("out type: {}".format(model_out))
        self.logger.info("out names: {}".format(out_names))
        try:
            self.parameters = paddle.parameters.Parameters.from_tar(
                              gzip.open(self.args.model_file, 'r'))
        except IOError:
            raise IOError('can not find: {}'.format(self.args.model_file))
        self.inferer = paddle.inference.Inference(
                       output_layer=model_out,
                       parameters=self.parameters)
Exemple #2
0
def main():
    # init
    paddle.init(use_gpu=False, trainer_count=1)

    # network config
    x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))
    y_predict = paddle.layer.fc(input=x,
                                param_attr=paddle.attr.Param(name='w'),
                                size=1,
                                act=paddle.activation.Linear(),
                                bias_attr=paddle.attr.Param(name='b'))
    y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
    cost = paddle.layer.mse_cost(input=y_predict, label=y)

    # create parameters
    parameters = paddle.parameters.create(cost)

    # create optimizer of new remote updater to pserver
    optimizer = paddle.optimizer.Momentum(momentum=0, learning_rate=1e-3)

    trainer = paddle.trainer.SGD(cost=cost,
                                 parameters=parameters,
                                 update_equation=optimizer,
                                 is_local=False,
                                 pserver_spec=etcd_endpoints,
                                 use_etcd=True)

    # event_handler to print training and testing info
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            # FIXME: for cloud data reader, pass number is managed by master
            # should print the server side pass number
            if event.batch_id % 100 == 0:
                print "Pass %d, Batch %d, Cost %f" % (
                    event.pass_id, event.batch_id, event.cost)

        if isinstance(event, paddle.event.EndPass):
            if (event.pass_id + 1) % 10 == 0:
                result = trainer.test(
                    reader=paddle.batch(
                        uci_housing.test(), batch_size=2),
                    feeding={'x': 0,
                             'y': 1})
                print "Test %d, %.2f" % (event.pass_id, result.cost)

    # training
    # NOTE: use uci_housing.train() as reader for non-paddlecloud training
    trainer.train(
        reader=paddle.batch(
            paddle.reader.shuffle(
                cloud_reader(
                    ["/pfs/dlnel/public/dataset/uci_housing/uci_housing*"],
                    etcd_endpoints),
                buf_size=500),
            batch_size=2),
        feeding={'x': 0,
                 'y': 1},
        event_handler=event_handler,
        num_passes=30)
    def test_ifelse(self):
        kwargs = {'startup_program': Program(), 'main_program': Program()}
        image = layers.data(name='x', shape=[784], dtype='float32', **kwargs)

        label = layers.data(name='y', shape=[1], dtype='int64', **kwargs)

        limit = layers.fill_constant_batch_size_like(
            input=label, dtype='int64', shape=[1], value=5.0, **kwargs)

        cond = layers.less_than(x=label, y=limit, **kwargs)

        ie = layers.IfElse(cond, **kwargs)

        with ie.true_block():
            true_image = ie.input(image)
            hidden = layers.fc(input=true_image, size=100, act='tanh', **kwargs)
            prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs)
            ie.output(prob)

        with ie.false_block():
            false_image = ie.input(image)
            hidden = layers.fc(input=false_image,
                               size=200,
                               act='tanh',
                               **kwargs)
            prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs)
            ie.output(prob)

        prob = ie()
        loss = layers.cross_entropy(input=prob[0], label=label, **kwargs)
        avg_loss = layers.mean(x=loss, **kwargs)

        optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
        optimizer.minimize(avg_loss, kwargs['startup_program'])
        train_reader = paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.mnist.train(), buf_size=8192),
            batch_size=200)

        place = core.CPUPlace()
        exe = Executor(place)

        exe.run(kwargs['startup_program'])
        PASS_NUM = 100
        for pass_id in range(PASS_NUM):
            for data in train_reader():
                x_data = np.array(map(lambda x: x[0], data)).astype("float32")
                y_data = np.array(map(lambda x: x[1], data)).astype("int64")
                y_data = y_data.reshape((y_data.shape[0], 1))

                outs = exe.run(kwargs['main_program'],
                               feed={'x': x_data,
                                     'y': y_data},
                               fetch_list=[avg_loss])
                print outs[0]
                if outs[0] < 1.0:
                    return
        self.assertFalse(True)
Exemple #4
0
 def event_handler(event):
     if isinstance(event, paddle.event.EndIteration):
         if event.batch_id % 100 == 0:
             result = trainer.test(
                 paddle.batch(
                     paddle.dataset.imikolov.test(word_dict, N), 32))
             print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
                 event.pass_id, event.batch_id, event.cost, event.metrics,
                 result.metrics)
 def event_handler(event):
     if isinstance(event, paddle.event.EndIteration):
         if event.batch_id % 100 == 0:
             result = trainer.test(
                 paddle.batch(
                     cluster_reader_cluster(cluster_test_file, node_id), 32))
             print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
                 event.pass_id, event.batch_id, event.cost, event.metrics,
                 result.metrics)
Exemple #6
0
def main():
    paddle.init(use_gpu=False, trainer_count=1)
    word_dict = paddle.dataset.imikolov.build_dict()
    dict_size = len(word_dict)
    firstword = paddle.layer.data(
        name="firstw", type=paddle.data_type.integer_value(dict_size))
    secondword = paddle.layer.data(
        name="secondw", type=paddle.data_type.integer_value(dict_size))
    thirdword = paddle.layer.data(
        name="thirdw", type=paddle.data_type.integer_value(dict_size))
    fourthword = paddle.layer.data(
        name="fourthw", type=paddle.data_type.integer_value(dict_size))
    nextword = paddle.layer.data(
        name="fifthw", type=paddle.data_type.integer_value(dict_size))

    Efirst = wordemb(firstword)
    Esecond = wordemb(secondword)
    Ethird = wordemb(thirdword)
    Efourth = wordemb(fourthword)

    contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
    hidden1 = paddle.layer.fc(input=contextemb,
                              size=hiddensize,
                              act=paddle.activation.Sigmoid(),
                              layer_attr=paddle.attr.Extra(drop_rate=0.5),
                              bias_attr=paddle.attr.Param(learning_rate=2),
                              param_attr=paddle.attr.Param(
                                  initial_std=1. / math.sqrt(embsize * 8),
                                  learning_rate=1))
    predictword = paddle.layer.fc(input=hidden1,
                                  size=dict_size,
                                  bias_attr=paddle.attr.Param(learning_rate=2),
                                  act=paddle.activation.Softmax())

    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                result = trainer.test(
                    paddle.batch(
                        paddle.dataset.imikolov.test(word_dict, N), 32))
                print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics,
                    result.metrics)

    cost = paddle.layer.classification_cost(input=predictword, label=nextword)
    parameters = paddle.parameters.create(cost)
    adam_optimizer = paddle.optimizer.Adam(
        learning_rate=3e-3,
        regularization=paddle.optimizer.L2Regularization(8e-4))
    trainer = paddle.trainer.SGD(cost, parameters, adam_optimizer)
    trainer.train(
        paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
        num_passes=30,
        event_handler=event_handler)
Exemple #7
0
 def event_handler(event):
     if isinstance(event, paddle.event.EndIteration):
         if event.batch_id % 100 == 0:
             with gzip.open("batch-" + str(event.batch_id) + ".tar.gz",
                            'w') as f:
                 trainer.save_parameter_to_tar(f)
             result = trainer.test(
                 paddle.batch(
                     paddle.dataset.imikolov.test(word_dict, N), 32))
             print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
                 event.pass_id, event.batch_id, event.cost, event.metrics,
                 result.metrics)
def infer():
    feeding_dict, predicted_ids = seq_to_seq_net(
        embedding_dim=args.embedding_dim,
        encoder_size=args.encoder_size,
        decoder_size=args.decoder_size,
        source_dict_dim=args.dict_size,
        target_dict_dim=args.dict_size,
        is_generating=True,
        beam_size=args.beam_size,
        max_generation_length=args.max_generation_length)

    src_dict, trg_dict = paddle.dataset.wmt14.get_dict(args.dict_size)
    test_batch_generator = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.wmt14.train(args.dict_size), buf_size=1000),
        batch_size=args.batch_size)

    config = tf.ConfigProto(
        intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
    with tf.Session(config=config) as sess:
        restore(sess, './checkpoint/tf_seq2seq-1500')
        for batch_id, data in enumerate(test_batch_generator()):
            src_seq = map(lambda x: x[0], data)

            source_language_seq = [
                src_dict[item] for seq in src_seq for item in seq
            ]

            src_sequence_length = np.array(
                [len(seq) for seq in src_seq]).astype('int32')
            src_seq_maxlen = np.max(src_sequence_length)
            src_seq = np.array([
                padding_data(seq, src_seq_maxlen, END_TOKEN_IDX)
                for seq in src_seq
            ]).astype('int32')

            outputs = sess.run([predicted_ids],
                               feed_dict={
                                   feeding_dict['src_word_idx']: src_seq,
                                   feeding_dict['src_sequence_length']:
                                   src_sequence_length
                               })

            print("\nDecoder result comparison: ")
            source_language_seq = ' '.join(source_language_seq).lstrip(
                '<s>').rstrip('<e>').strip()
            inference_seq = ''
            print(" --> source: " + source_language_seq)
            for item in outputs[0][0]:
                if item[0] == END_TOKEN_IDX: break
                inference_seq += ' ' + trg_dict.get(item[0], '<unk>')
            print(" --> inference: " + inference_seq)
 def setUp(self):
     with fluid.program_guard(fluid.Program(), fluid.Program()):
         reader = paddle.batch(mnist.train(), batch_size=32)
         feeder = fluid.DataFeeder(
             feed_list=[  # order is image and label
                 fluid.layers.data(
                     name='image', shape=[784]),
                 fluid.layers.data(
                     name='label', shape=[1], dtype='int64'),
             ],
             place=fluid.CPUPlace())
         self.num_batches = fluid.recordio_writer.convert_reader_to_recordio_file(
             './mnist_for_preprocessor_test.recordio', reader, feeder)
Exemple #10
0
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 1000 == 0:
                print "Pass %d, Batch %d, Cost %f, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics)

        elif isinstance(event, paddle.event.EndPass):
            result = trainer.test(reader=paddle.batch(
                paddle.dataset.mnist.test(), batch_size=128))
            print "Test with Pass %d, Cost %f, %s\n" % (
                event.pass_id, result.cost, result.metrics)
            lists.append((event.pass_id, result.cost,
                          result.metrics['classification_error_evaluator']))
Exemple #11
0
def main():
    # init
    paddle.init(use_gpu=False, trainer_count=1)

    # network config
    x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13))
    y_predict = paddle.layer.fc(input=x,
                                param_attr=paddle.attr.Param(name='w'),
                                size=1,
                                act=paddle.activation.Linear(),
                                bias_attr=paddle.attr.Param(name='b'))
    y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
    cost = paddle.layer.mse_cost(input=y_predict, label=y)

    # create parameters
    parameters = paddle.parameters.create(cost)

    # create optimizer
    optimizer = paddle.optimizer.Momentum(momentum=0)

    trainer = paddle.trainer.SGD(cost=cost,
                                 parameters=parameters,
                                 update_equation=optimizer)

    # event_handler to print training and testing info
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                print "Pass %d, Batch %d, Cost %f" % (
                    event.pass_id, event.batch_id, event.cost)

        if isinstance(event, paddle.event.EndPass):
            if (event.pass_id + 1) % 10 == 0:
                result = trainer.test(
                    reader=paddle.batch(
                        uci_housing.test(), batch_size=2),
                    feeding={'x': 0,
                             'y': 1})
                print "Test %d, %.2f" % (event.pass_id, result.cost)

    # training
    trainer.train(
        reader=paddle.batch(
            paddle.reader.shuffle(
                uci_housing.train(), buf_size=500),
            batch_size=2),
        feeding={'x': 0,
                 'y': 1},
        event_handler=event_handler,
        num_passes=30)
Exemple #12
0
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                print "Pass %d, Batch %d, Cost %f" % (
                    event.pass_id, event.batch_id, event.cost)

        if isinstance(event, paddle.event.EndPass):
            if (event.pass_id + 1) % 10 == 0:
                result = trainer.test(
                    reader=paddle.batch(
                        uci_housing.test(), batch_size=2),
                    feeding={'x': 0,
                             'y': 1})
                print "Test %d, %.2f" % (event.pass_id, result.cost)
Exemple #13
0
def main():
    paddle.init(use_gpu=False, trainer_count=1)

    # define network topology
    crf_cost, crf_dec = db_lstm()

    # create parameters
    parameters = paddle.parameters.create([crf_cost, crf_dec])

    # create optimizer
    optimizer = paddle.optimizer.Momentum(
        momentum=0,
        learning_rate=2e-2,
        regularization=paddle.optimizer.L2Regularization(rate=8e-4),
        model_average=paddle.optimizer.ModelAverage(
            average_window=0.5, max_average_window=10000), )

    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                print "Pass %d, Batch %d, Cost %f, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics)

    trainer = paddle.trainer.SGD(cost=crf_cost,
                                 parameters=parameters,
                                 update_equation=optimizer)
    parameters.set('emb', load_parameter(conll05.get_embedding(), 44068, 32))

    trn_reader = paddle.batch(
        paddle.reader.shuffle(
            conll05.test(), buf_size=8192), batch_size=10)

    feeding = {
        'word_data': 0,
        'ctx_n2_data': 1,
        'ctx_n1_data': 2,
        'ctx_0_data': 3,
        'ctx_p1_data': 4,
        'ctx_p2_data': 5,
        'verb_data': 6,
        'mark_data': 7,
        'target': 8
    }

    trainer.train(
        reader=trn_reader,
        event_handler=event_handler,
        num_passes=10000,
        feeding=feeding)
Exemple #14
0
 def event_handler(event):
     if isinstance(event, paddle.event.EndIteration):
         if event.batch_id % 100 == 0:
             print "\nPass %d, Batch %d, Cost %f, %s" % (
                 event.pass_id, event.batch_id, event.cost, event.metrics)
         else:
             sys.stdout.write('.')
             sys.stdout.flush()
     if isinstance(event, paddle.event.EndPass):
         result = trainer.test(
             reader=paddle.batch(
                 paddle.dataset.cifar.test10(), batch_size=128),
             feeding={'image': 0,
                      'label': 1})
         print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
Exemple #15
0
    def _prepare(self):
        # prepare reader
        self.train_reader = paddle.batch(
                            reader=self.train_reader.create_reader(),
                            batch_size=self.args.batch_size)
        self.test_reader = paddle.batch(
                           reader=self.test_reader.create_reader(),
                           batch_size=self.args.batch_size)

        # init paddle
        paddle.init(use_gpu=self.args.use_gpu,
                    trainer_count=self.args.trainer_count)

        # create optimizer
        optimizer = paddle.optimizer.RMSProp(
                    learning_rate=self.args.learning_rate,
                    regularization=opt.L2Regularization(rate=self.args.l2))

        # create parameters and trainer
        model_out = self.model()
        if self.args.init_from:
            self.parameters = paddle.parameters.Parameters.from_tar(
                              gzip.open(self.args.init_from, 'r'))
        else:
            self.parameters = paddle.parameters.create(model_out)
        if self.args.pre_emb.strip() != '':
            embeddings = np.loadtxt(self.args.pre_emb,
                    dtype=float)[:self.args.vocab_size]
            self.parameters.set(self.model.name + '.embs', embeddings)
            self.logger.info('init emb from {} to {}'.format(
                self.args.pre_emb,
                self.model.name + '.embs'))

        self.trainer = paddle.trainer.SGD(cost=model_out,
                                          parameters=self.parameters,
                                          update_equation=optimizer)
Exemple #16
0
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            # FIXME: for cloud data reader, pass number is managed by master
            # should print the server side pass number
            if event.batch_id % 100 == 0:
                print "Pass %d, Batch %d, Cost %f" % (
                    event.pass_id, event.batch_id, event.cost)

        if isinstance(event, paddle.event.EndPass):
            if (event.pass_id + 1) % 10 == 0:
                result = trainer.test(
                    reader=paddle.batch(
                        uci_housing.test(), batch_size=2),
                    feeding={'x': 0,
                             'y': 1})
                print "Test %d, %.2f" % (event.pass_id, result.cost)
Exemple #17
0
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                print "Pass %d, Batch %d, Cost %f, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics)
        if isinstance(event, paddle.event.EndPass):
            # 保存参数
            with open('params_pass_%d.tar' % event.pass_id, 'w') as f:
                parameters.to_tar(f)

            result = trainer.test(reader=paddle.batch(
                paddle.dataset.mnist.test(), batch_size=128))
            print "Test with Pass %d, Cost %f, %s\n" % (
                event.pass_id, result.cost, result.metrics)
            lists.append((event.pass_id, result.cost,
                          result.metrics['classification_error_evaluator']))
Exemple #18
0
def eval_test(exe, batch_acc, batch_size_tensor, inference_program):
    test_reader = paddle.batch(paddle.dataset.mnist.test(),
                               batch_size=args.batch_size)
    test_pass_acc = fluid.average.WeightedAverage()
    for batch_id, data in enumerate(test_reader()):
        img_data = np.array(map(lambda x: x[0].reshape([1, 28, 28]),
                                data)).astype(DTYPE)
        y_data = np.array(map(lambda x: x[1], data)).astype("int64")
        y_data = y_data.reshape([len(y_data), 1])

        acc, weight = exe.run(inference_program,
                              feed={
                                  "pixel": img_data,
                                  "label": y_data
                              },
                              fetch_list=[batch_acc, batch_size_tensor])
        test_pass_acc.add(value=acc, weight=weight)
        pass_acc = test_pass_acc.eval()
    return pass_acc
Exemple #19
0
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % conf.log_period == 0:
                print("Pass %d, batch %d, Samples %d, Cost %f, Eval %s" %
                      (event.pass_id, event.batch_id, event.batch_id *
                       conf.batch_size, event.cost, event.metrics))

        if isinstance(event, paddle.event.EndPass):
            # 这里由于训练和测试数据共享相同的格式
            # 我们仍然使用reader.train_reader来读取测试数据
            result = trainer.test(reader=paddle.batch(
                data_generator.train_reader(test_file_list),
                batch_size=conf.batch_size),
                                  feeding=feeding)
            print("Test %d, Cost %f, Eval %s" %
                  (event.pass_id, result.cost, result.metrics))
            with gzip.open(os.path.join(model_save_dir, "params_pass.tar.gz"),
                           "w") as f:
                trainer.save_parameter_to_tar(f)
Exemple #20
0
 def event_handler(event):
     """
     handle paddle event
     """
     if isinstance(event, paddle.event.EndIteration):
         if event.batch_id % 100 == 0:
             print "Pass %d, Batch %d, Cost %f" % (
                 event.pass_id, event.batch_id, event.cost)
     if isinstance(event, paddle.event.EndPass):
         if event.pass_id % 10 == 0:
             filename = './output/model/params_pass_%d_%s.tar' % (
                 event.pass_id, trainer_id_str)
             with open(filename, "w") as f:
                 trainer.save_parameter_to_tar(f)
         test_reader = paddle.batch(paddle.reader.shuffle(
             cluster_data_reader(cluster_test_dir), buf_size=500),
                                    batch_size=2)
         result = trainer.test(reader=test_reader, feeding=feeding)
         print "Test %d, Cost %f" % (event.pass_id, result.cost)
Exemple #21
0
def main(task="train", use_gpu=False, trainer_count=1, save_dir="models"):
    if task == "train":
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)

        paddle.init(use_gpu=use_gpu, trainer_count=trainer_count)
        cost = network()
        parameters = paddle.parameters.create(cost)
        optimizer = paddle.optimizer.Momentum(
            learning_rate=0.1 / 128.0,
            momentum=0.9,
            regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128))

        trainer = paddle.trainer.SGD(cost=cost,
                                     parameters=parameters,
                                     update_equation=optimizer)

        def event_handler(event):
            if isinstance(event, paddle.event.EndIteration):
                if event.batch_id % 100 == 0:
                    logger.info("Pass %d, Batch %d, Cost %f, %s" %
                                (event.pass_id, event.batch_id, event.cost,
                                 event.metrics))
            if isinstance(event, paddle.event.EndPass):
                with gzip.open(
                        os.path.join(save_dir, "params_pass_%d.tar" %
                                     event.pass_id), "w") as f:
                    trainer.save_parameter_to_tar(f)

        trainer.train(
            reader=paddle.batch(
                paddle.reader.shuffle(
                    paddle.dataset.mnist.train(), buf_size=8192),
                batch_size=128),
            event_handler=event_handler,
            num_passes=5)
    elif task == "dump_config":
        predict = network(is_infer=True)
        dump_v2_config(predict, "trainer_config.bin", True)
    else:
        raise RuntimeError(("Error value for parameter task. "
                            "Available options are: train and dump_config."))
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 50 == 0:
                if not isinstance(event.cost, list):
                    cost = [event.cost]
                else:
                    cost = event.cost
                print "\nPass %d, Batch %d, " % (event.pass_id,
                                                 event.batch_id),
                for i in range(len(cost)):
                    print "%s: %f, " % (cost_name[i], cost[i]),
                print "\n"
            else:
                sys.stdout.write('.')
                sys.stdout.flush()

        elif (isinstance(event, paddle.event.EndPass) and \
              event.pass_id % 4 == 1):
            print "Testing",
            result = trainer.test(reader=paddle.batch(
                sun3d.test_upsampler(params['test_scene'][0:5],
                                     rate=params['sample_rate'],
                                     height=params['size'][0],
                                     width=params['size'][1]),
                batch_size=params['batch_size']),
                                  feeding=feeding)

            print "\nTask upsample, Pass %d," % (event.pass_id),
            if not isinstance(result.cost, list):
                cost = [result.cost]
            else:
                cost = result.cost
            for i in range(len(cost)):
                print "%s: %f, " % (cost_name[i], cost[i]),

            folder = params['output_path'] + '/upsampler/'
            uts.mkdir_if_need(folder)
            model_name = folder + '/upsample_model_' + \
                         FLAGS.suffix + '.tar.gz'
            with gzip.open(model_name, 'w') as f:
                parameters.to_tar(f)
            print "model saved at %s" % model_name
 def event_handler_plot(event):
     """
     定义event_handler_plot事件处理函数:
         事件处理器,可以根据训练过程的信息做相应操作:包括绘图和输出训练结果信息
     Args:
         event -- 事件对象,包含event.pass_id, event.batch_id, event.cost等信息
     Return:
     """
     global STEP
     if isinstance(event, paddle.event.EndIteration):
         # 每训练100次(即100个batch),添加一个绘图点
         if STEP % 100 == 0:
             cost_ploter.append(TRAIN_TITLE_COST, STEP, event.cost)
             # 绘制cost图像,保存图像为‘train_test_cost.png’
             cost_ploter.plot('./train_test_cost')
             error_ploter.append(
                 TRAIN_TITLE_ERROR, STEP,
                 event.metrics['classification_error_evaluator'])
             # 绘制error_rate图像,保存图像为‘train_test_error_rate.png’
             error_ploter.plot('./train_test_error_rate')
         STEP += 1
         # 每训练100个batch,输出一次训练结果信息
         if event.batch_id % 100 == 0:
             print "Pass %d, Batch %d, Cost %f, %s" % (
                 event.pass_id, event.batch_id, event.cost, event.metrics)
     if isinstance(event, paddle.event.EndPass):
         # 保存参数至文件
         with open('params_pass_%d.tar' % event.pass_id, 'w') as param_f:
             trainer.save_parameter_to_tar(param_f)
         # 利用测试数据进行测试
         result = trainer.test(reader=paddle.batch(
             paddle.dataset.mnist.test(), batch_size=128))
         print "Test with Pass %d, Cost %f, %s\n" % (
             event.pass_id, result.cost, result.metrics)
         # 添加测试数据的cost和error_rate绘图数据
         cost_ploter.append(TEST_TITLE_COST, STEP, result.cost)
         error_ploter.append(
             TEST_TITLE_ERROR, STEP,
             result.metrics['classification_error_evaluator'])
         # 存储测试数据的cost和error_rate数据
         lists.append((event.pass_id, result.cost,
                       result.metrics['classification_error_evaluator']))
Exemple #24
0
def main(task="train", use_gpu=False, trainer_count=1, save_dir="models"):
    if task == "train":
        if not os.path.exists(save_dir):
            os.mkdir(save_dir)

        paddle.init(use_gpu=use_gpu, trainer_count=trainer_count)
        cost = network()
        parameters = paddle.parameters.create(cost)
        optimizer = paddle.optimizer.Momentum(
            learning_rate=0.1 / 128.0,
            momentum=0.9,
            regularization=paddle.optimizer.L2Regularization(rate=0.0005 *
                                                             128))

        trainer = paddle.trainer.SGD(cost=cost,
                                     parameters=parameters,
                                     update_equation=optimizer)

        def event_handler(event):
            if isinstance(event, paddle.event.EndIteration):
                if event.batch_id % 100 == 0:
                    logger.info("Pass %d, Batch %d, Cost %f, %s" %
                                (event.pass_id, event.batch_id, event.cost,
                                 event.metrics))
            if isinstance(event, paddle.event.EndPass):
                with gzip.open(
                        os.path.join(save_dir,
                                     "params_pass_%d.tar" % event.pass_id),
                        "w") as f:
                    trainer.save_parameter_to_tar(f)

        trainer.train(reader=paddle.batch(paddle.reader.shuffle(
            paddle.dataset.mnist.train(), buf_size=8192),
                                          batch_size=128),
                      event_handler=event_handler,
                      num_passes=5)
    elif task == "dump_config":
        predict = network(is_infer=True)
        dump_v2_config(predict, "trainer_config.bin", True)
    else:
        raise RuntimeError(("Error value for parameter task. "
                            "Available options are: train and dump_config."))
Exemple #25
0
def evaluate(net_file, model_file):
    """ main
    """
    #1, build model
    net_path = os.path.dirname(net_file)
    if net_path not in sys.path:
        sys.path.insert(0, net_path)

    from lenet import LeNet as MyNet

    #1, define network topology
    images = fluid.layers.data(name='image',
                               shape=[1, 28, 28],
                               dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    net = MyNet({'data': images})
    prediction = net.layers['prob']
    acc = fluid.layers.accuracy(input=prediction, label=label)

    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    #2, load weights
    if model_file.find('.npy') > 0:
        net.load(data_path=model_file, exe=exe, place=place)
    else:
        net.load(data_path=model_file, exe=exe)

    #3, test this model
    test_program = fluid.default_main_program().clone()
    test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)

    feeder = fluid.DataFeeder(feed_list=[images, label], place=place)
    fetch_list = [acc, prediction]

    print('go to test model using test set')
    acc_val = test_model(exe, test_program, \
            fetch_list, test_reader, feeder)

    print('test accuracy is [%.4f], expected value[0.919]' % (acc_val))
Exemple #26
0
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                print "Pass %d, Batch %d, Cost %f" % (
                    event.pass_id, event.batch_id, event.cost)

        if isinstance(event, paddle.event.EndPass):
            #保存参数
            #if event.pass_id % 10 == 0:
            with open('params_pass_%d.tar' % event.pass_id, 'w') as f:
                trainer.save_parameter_to_tar(f)
            result = trainer.test(
                reader=paddle.batch(uci_housing.test(), batch_size=2),
                feeding=feeding)
            print "Test %d, Cost %f" % (event.pass_id, result.cost)
            #print result.metrics
            #保存训练结果损失情况
            lists.append((event.pass_id, result.cost,
                          #result.metrics['classification_error_evaluator']))
                          result.metrics))
def event_handler(event):
    if isinstance(event, paddle.event.EndIteration):
        if event.batch_id % 100 == 0:
            print "\n pass %d, Batch: %d cost: %f, %s" % (
                event.pass_id, event.batch_id, event.cost, event.metrics)
        else:
            sys.stdout.write('.')
            sys.stdout.flush()
    if isinstance(event, paddle.event.EndPass):
        # save parameters
        feeding = {'image': 0, 'label': 1}
        with gzip.open('output/params_pass_%d.tar.gz' % event.pass_id,
                       'w') as f:
            parameters.to_tar(f)
        filepath = '/Users/vic/Dev/DeepLearning/Paddle/VGG-CIFAR/Images/cifar-10-batches-py/test_batch'
        result = trainer.test(reader=paddle.batch(data_provider.data_reader(
            filepath, 0),
                                                  batch_size=128),
                              feeding=feeding)
        print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
Exemple #28
0
        def event_handler(event):
            if isinstance(event, paddle.event.EndIteration):
                if event.batch_id % 100 == 0:
                    print "\nPass %d, Batch %d, Cost %f, %s" % (
                        event.pass_id, event.batch_id, event.cost, event.metrics)
                else:
                    sys.stdout.write('.')
                    sys.stdout.flush()

            # 每一轮训练完成之后
            if isinstance(event, paddle.event.EndPass):
                # 保存训练好的参数
                with open(save_parameters_name, 'w') as f:
                    trainer.save_parameter_to_tar(f)

                # 测试准确率
                result = trainer.test(reader=paddle.batch(reader=test_reader,
                                                          batch_size=128),
                                      feeding=feeding)
                print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
Exemple #29
0
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                print "\nPass %d, Batch %d, Cost %f, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics)
            else:
                sys.stdout.write('.')
                sys.stdout.flush()
        if isinstance(event, paddle.event.EndPass):
            # save parameters
            with open('params_pass_%d.tar' % event.pass_id, 'w') as f:
                trainer.save_parameter_to_tar(f)

            result = trainer.test(reader=paddle.batch(
                paddle.dataset.cifar.test10(), batch_size=128),
                                  feeding={
                                      'image': 0,
                                      'label': 1
                                  })
            print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
Exemple #30
0
 def event_handler(event):
     lists = []
     if isinstance(event, paddle.event.EndIteration):
         if event.batch_id % 100 == 0:
             print "\nPass %d, Batch %d, Cost %f, %s" % (
                 event.pass_id, event.batch_id, event.cost, event.metrics)
         else:
             sys.stdout.write('.')
             sys.stdout.flush()
     if isinstance(event, paddle.event.EndPass):
         # 保存训练好的参数
         model_path = '../model'
         if not os.path.exists(model_path):
             os.makedirs(model_path)
         with open(model_path + "/model.tar", 'w') as f:
             trainer.save_parameter_to_tar(f=f)
         # 使用测试进行测试
         result = trainer.test(reader=paddle.batch(paddle.dataset.mnist.test(), batch_size=128))
         print "\nTest with Pass %d, Cost %f, %s\n" % (event.pass_id, result.cost, result.metrics)
         lists.append((event.pass_id, result.cost, result.metrics['classification_error_evaluator']))
    def start_trainer(self, trainer, num_passes, save_parameters_name, trainer_reader, test_reader):
        reader = paddle.batch(reader=paddle.reader.shuffle(reader=trainer_reader,
                                                           buf_size=50000),
                              batch_size=128)
        father_path = save_parameters_name[:save_parameters_name.rfind("/")]
        if not os.path.exists(father_path):
            os.makedirs(father_path)

        # 指定每条数据和padd.layer.data的对应关系
        feeding = {"image": 0, "label": 1}

        # 定义训练事件
    	step = 0
        def event_handler(event):
            if isinstance(event, paddle.event.EndIteration):
                print "look event",event
                if event.batch_id % 1 == 0:
                    print "\nPass %d, Batch %d, Cost %f, Error %s" % (
                        event.pass_id, event.batch_id, event.cost, event.metrics['classification_error_evaluator'])
                else:
                    sys.stdout.write('.')
                    sys.stdout.flush()

            # 每一轮训练完成之后
            if isinstance(event, paddle.event.EndPass):
                # 保存训练好的参数
                with open(save_parameters_name, 'w') as f:
                    trainer.save_parameter_to_tar(f)

                # 测试准确率
                result = trainer.test(reader=paddle.batch(reader=test_reader,batch_size=64),feeding=feeding)
                print "\nTest with Pass %d, Classification_Error %s" % (
                event.pass_id, result.metrics['classification_error_evaluator'])

            	#loss_scalar.add_record(step, event.cost)
		#step +=1

        trainer.train(reader=reader,
                      num_passes=num_passes,
                      event_handler=event_handler,
                      feeding=feeding)
Exemple #32
0
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % conf.log_period == 0:
                print("Pass %d, batch %d, Samples %d, Cost %f, Eval %s" %
                      (event.pass_id, event.batch_id, event.batch_id *
                       conf.batch_size, event.cost, event.metrics))

        if isinstance(event, paddle.event.EndPass):
            # Here, because training and testing data share a same format,
            # we still use the reader.train_reader to read the testing data.
            result = trainer.test(reader=paddle.batch(
                data_generator.train_reader(test_file_list),
                batch_size=conf.batch_size),
                                  feeding=feeding)
            print("Test %d, Cost %f, Eval %s" %
                  (event.pass_id, result.cost, result.metrics))
            with gzip.open(
                    os.path.join(model_save_dir,
                                 "params_pass_%05d.tar.gz" % event.pass_id),
                    "w") as f:
                trainer.save_parameter_to_tar(f)
Exemple #33
0
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                print "Pass %d, Batch %d, Cost %f, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics)

        if isinstance(event, paddle.event.EndPass):
            result = trainer.test(
                reader=paddle.batch(reader.test(data_path,
                                            feature_dim+1,
                                            args.model_type.is_classification()),
                            batch_size=batch_size),
                feeding=feeding)
            print "Test %d, Cost %f, %s" % (event.pass_id, result.cost, result.metrics)
            
            model_desc = "{type}".format(
                    type=str(args.model_type))
            with open("%sdnn_%s_pass_%05d.tar" %
                          (args.model_output_prefix, model_desc,
                           event.pass_id), "w") as f:
                parameters.to_tar(f)
Exemple #34
0
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                print("Pass %d, batch %d, Samples %d, Cost %f" %
                      (event.pass_id, event.batch_id, event.batch_id *
                       BATCH_SIZE, event.cost))

        if isinstance(event, paddle.event.EndPass):
            # 这里由于训练和测试数据共享相同的格式
            # 我们仍然使用reader.train_reader来读取测试数据
            test_reader = paddle.batch(
                my_reader.train_reader(test_file_list),
                batch_size=BATCH_SIZE)
            result = trainer.test(reader=test_reader, feeding=feeding)
            print("Test %d, Cost %f" % (event.pass_id, result.cost))
            # 检查保存model的路径是否存在,如果不存在就创建
            if not os.path.exists(model_save_dir):
                os.mkdir(model_save_dir)
            with gzip.open(
                    os.path.join(model_save_dir, "params_pass.tar.gz"), "w") as f:
                trainer.save_parameter_to_tar(f)
Exemple #35
0
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 5 == 0:
                print "\nPass %d, Batch %d, Cost %f, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics)
            else:
                sys.stdout.write('.')
                sys.stdout.flush()
        if isinstance(event, paddle.event.EndPass):
            # save parameters
            if event.pass_id > 190:
                with open('params_pass_%d.tar' % event.pass_id, 'w') as f:
                    parameters.to_tar(f)

            result = trainer.test(
                reader=paddle.batch(
					paddle.reader.shuffle(
                    test(), buf_size=50000), batch_size=128),
                feeding={'image': 0,
                         'label': 1})
            print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)
def event_handler(event):
    if isinstance(event, paddle.event.EndIteration):
        if event.batch_id % 100 == 0:
            print "\nPass: %d Batch: %d [Cost: %f ][%s]\n" % (event.pass_id, event.batch_id, event.cost, event.metrics)
        else:
            sys.stdout.write('.')
            sys.stdout.flush()
    if isinstance(event, paddle.event.EndPass):

        #save parameters

        with gzip.open('output/params.tar.gz', 'w') as f:
            parameters.to_tar(f)

        # test
        feeding = {'image': 0,
                   'label': 1}
        filepath = ""
        result = trainer.test(reader=paddle.batch(reader=data_provider.data_reader(filepath, 0), batch_size=128),
                              feeding=feeding)
        print "\nTest Result: [Cost: %f] [%s] " % (result.cost, result.metrics)
Exemple #37
0
def train(dict_size, batch_size, num_passes, beam_size, schedule_type, decay_a,
          decay_b, model_dir):
    optimizer = paddle.optimizer.Adam(
        learning_rate=1e-4,
        regularization=paddle.optimizer.L2Regularization(rate=1e-5))

    cost = seqToseq_net(dict_size, dict_size, beam_size)

    parameters = paddle.parameters.create(cost)

    trainer = paddle.trainer.SGD(cost=cost,
                                 parameters=parameters,
                                 update_equation=optimizer)

    wmt14_reader = reader.gen_schedule_data(
        paddle.reader.shuffle(paddle.dataset.wmt14.train(dict_size),
                              buf_size=8192), schedule_type, decay_a, decay_b)

    # define event_handler callback
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 10 == 0:
                print "\nPass %d, Batch %d, Cost %f, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics)
            else:
                sys.stdout.write('.')
                sys.stdout.flush()
        if isinstance(event, paddle.event.EndPass):
            # save parameters
            with gzip.open(
                    os.path.join(model_dir,
                                 'params_pass_%d.tar.gz' % event.pass_id),
                    'w') as f:
                trainer.save_parameter_to_tar(f)

    # start to train
    trainer.train(reader=paddle.batch(wmt14_reader, batch_size=batch_size),
                  event_handler=event_handler,
                  feeding=reader.feeding,
                  num_passes=num_passes)
def main():
    BATCH_SIZE = 100
    PASS_NUM = 5

    word_dict = paddle.dataset.imdb.word_dict()
    print "load word dict successfully"
    dict_dim = len(word_dict)
    class_dim = 2

    data = fluid.layers.data(name="words",
                             shape=[1],
                             dtype="int64",
                             lod_level=1)
    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
    cost, accuracy, acc_out = stacked_lstm_net(data,
                                               label,
                                               input_dim=dict_dim,
                                               class_dim=class_dim)

    train_data = paddle.batch(paddle.reader.shuffle(
        paddle.dataset.imdb.train(word_dict), buf_size=1000),
                              batch_size=BATCH_SIZE)
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    feeder = fluid.DataFeeder(feed_list=[data, label], place=place)

    exe.run(fluid.default_startup_program())

    for pass_id in xrange(PASS_NUM):
        accuracy.reset(exe)
        for data in train_data():
            cost_val, acc_val = exe.run(fluid.default_main_program(),
                                        feed=feeder.feed(data),
                                        fetch_list=[cost, acc_out])
            pass_acc = accuracy.eval(exe)
            print("cost=" + str(cost_val) + " acc=" + str(acc_val) +
                  " pass_acc=" + str(pass_acc))
            if cost_val < 1.0 and acc_val > 0.8:
                exit(0)
    exit(1)
def main():
    rnn_out = encoder_decoder()
    label = layers.data(name="target_language_next_word",
                        shape=[1],
                        dtype='int64',
                        lod_level=1)
    cost = layers.cross_entropy(input=rnn_out, label=label)
    avg_cost = fluid.layers.mean(x=cost)

    optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4)
    optimizer.minimize(avg_cost)

    train_data = paddle.batch(paddle.reader.shuffle(
        paddle.dataset.wmt14.train(dict_size), buf_size=1000),
                              batch_size=batch_size)

    place = core.CPUPlace()
    exe = Executor(place)

    exe.run(framework.default_startup_program())

    batch_id = 0
    for pass_id in xrange(2):
        for data in train_data():
            word_data = to_lodtensor(map(lambda x: x[0], data), place)
            trg_word = to_lodtensor(map(lambda x: x[1], data), place)
            trg_word_next = to_lodtensor(map(lambda x: x[2], data), place)
            outs = exe.run(framework.default_main_program(),
                           feed={
                               'src_word_id': word_data,
                               'target_language_word': trg_word,
                               'target_language_next_word': trg_word_next
                           },
                           fetch_list=[avg_cost])
            avg_cost_val = np.array(outs[0])
            print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) +
                  " avg_cost=" + str(avg_cost_val))
            if batch_id > 3:
                exit(0)
            batch_id += 1
Exemple #40
0
    def __event_handler__(event):
        if isinstance(event, paddle.event.EndIteration):
            num_samples = event.batch_id * args.batch_size
            if event.batch_id % 100 == 0:
                logger.warning("Pass %d, Batch %d, Samples %d, Cost %f, %s" %
                               (event.pass_id, event.batch_id, num_samples,
                                event.cost, event.metrics))

            if event.batch_id % 10000 == 0:
                if args.test_data_path:
                    result = trainer.test(reader=paddle.batch(
                        dataset.test(args.test_data_path),
                        batch_size=args.batch_size),
                                          feeding=reader.feeding)
                    logger.warning("Test %d-%d, Cost %f, %s" %
                                   (event.pass_id, event.batch_id, result.cost,
                                    result.metrics))

                path = "{}/model-pass-{}-batch-{}.tar.gz".format(
                    args.model_output_dir, event.pass_id, event.batch_id)
                with gzip.open(path, 'w') as f:
                    trainer.save_parameter_to_tar(f)
Exemple #41
0
def test(use_cuda, BATCH_SIZE=4, model_dir='./model'):
    # 数据定义
    image_shape = [3, 1024, 1024]
    label_shape = [1, 1024, 1024]
    # 输入数据
    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    # 标签,即密度图
    label = fluid.layers.data(name='label', shape=label_shape, dtype='float32')

    # 获取测试数据
    test_reader = paddle.batch(
        train_read(TEST_IMAGE_PATH, TEST_DENSITY_PATH), batch_size=BATCH_SIZE)

    # 是否使用GPU
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    # 创建调试器
    exe = fluid.Executor(place)
    # 指定数据和label的对于关系
    feeder = fluid.DataFeeder(place=place, feed_list=[image, label])

    # 初始化调试器
    exe.run(fluid.default_startup_program())

    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        [inference_program, feed_target_names, fetch_targets] = (fluid.io.load_inference_model(model_dir, exe))
        # test_reader = paddle.batch(test_reader, batch_size=BATCH_SIZE)
	error_rate = []
        for test_data in test_reader():
            test_feat = np.array([data[0] for data in test_data]).astype(np.float32)            
	    test_label = np.array([data[1] for data in test_data]).astype(np.float32)
            results = exe.run(inference_program,
                              feed={feed_target_names[0]: np.array(test_feat)},
                              fetch_list=fetch_targets)
	    predict = np.sum(results[0])
	    truth = np.sum(test_label)
            print "infer_results: ", predict, ", ground_truths: ", truth
	    error_rate.append(abs(predict-truth)/(truth))
	print np.mean(error_rate)
Exemple #42
0
    def infer(self, data_path):
        dataset = reader.Dataset(
            train_path=data_path,
            test_path=None,
            source_dic_path=args.source_dic_path,
            target_dic_path=args.target_dic_path,
            model_type=args.model_type,
        )
        infer_reader = paddle.batch(dataset.infer, batch_size=1000)
        logger.warning("Write predictions to %s." %
                       args.prediction_output_path)

        output_f = open(args.prediction_output_path, "w")

        for id, batch in enumerate(infer_reader()):
            res = self.inferer.infer(input=batch)
            predictions = [" ".join(map(str, x)) for x in res]
            assert len(batch) == len(predictions), (
                "Error! %d inputs are given, "
                "but only %d predictions are returned.") % (len(batch),
                                                            len(predictions))
            output_f.write("\n".join(map(str, predictions)) + "\n")
Exemple #43
0
 def event_handler(event):
     if isinstance(event, paddle.event.EndIteration):
         if event.batch_id % 5 == 0:
             class_error_rate = event.metrics[
                 'classification_error_evaluator']
             print("\npass %d, Batch: %d cost: %f error: %s" %
                   (event.pass_id, event.batch_id, event.cost,
                    class_error_rate))
         else:
             sys.stdout.write('.')
             sys.stdout.flush()
     if isinstance(event, paddle.event.EndPass):
         # save parameters
         with gzip.open('output/params_pass_%d.tar.gz' % event.pass_id,
                        'w') as f:
             parameters.to_tar(f)
         result = trainer.test(reader=paddle.batch(test_data_reader,
                                                   batch_size=32),
                               feeding=feeding)
         class_error_rate = result.metrics['classification_error_evaluator']
         print("\nTest with Pass %d, cost: %s error: %f" %
               (event.pass_id, result.cost, class_error_rate))
def main():
    rnn_out = encoder_decoder()
    label = layers.data(
        name="target_language_next_word", shape=[1], dtype='int64', lod_level=1)
    cost = layers.cross_entropy(input=rnn_out, label=label)
    avg_cost = fluid.layers.mean(x=cost)

    optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4)
    optimizer.minimize(avg_cost)

    train_data = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.wmt14.train(dict_size), buf_size=1000),
        batch_size=batch_size)

    place = core.CPUPlace()
    exe = Executor(place)

    exe.run(framework.default_startup_program())

    batch_id = 0
    for pass_id in xrange(2):
        for data in train_data():
            word_data = to_lodtensor(map(lambda x: x[0], data), place)
            trg_word = to_lodtensor(map(lambda x: x[1], data), place)
            trg_word_next = to_lodtensor(map(lambda x: x[2], data), place)
            outs = exe.run(framework.default_main_program(),
                           feed={
                               'src_word_id': word_data,
                               'target_language_word': trg_word,
                               'target_language_next_word': trg_word_next
                           },
                           fetch_list=[avg_cost])
            avg_cost_val = np.array(outs[0])
            print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) +
                  " avg_cost=" + str(avg_cost_val))
            if batch_id > 3:
                exit(0)
            batch_id += 1
Exemple #45
0
def infer():
    args = parse_args()

    paddle.init(use_gpu=False, trainer_count=1)

    model = DeepFM(args.factor_size, infer=True)

    parameters = paddle.parameters.Parameters.from_tar(
        gzip.open(args.model_gz_path, 'r'))

    inferer = paddle.inference.Inference(
        output_layer=model, parameters=parameters)

    dataset = reader.Dataset()

    infer_reader = paddle.batch(dataset.infer(args.data_path), batch_size=1000)

    with open(args.prediction_output_path, 'w') as out:
        for id, batch in enumerate(infer_reader()):
            res = inferer.infer(input=batch)
            predictions = [x for x in itertools.chain.from_iterable(res)]
            out.write('\n'.join(map(str, predictions)) + '\n')
Exemple #46
0
def eval(eval_file_list, batch_size, data_args, model_path):
    cost, detect_out = vgg_ssd_net.net_conf(mode='eval')

    assert os.path.isfile(model_path), 'Invalid model.'
    parameters = paddle.parameters.Parameters.from_tar(gzip.open(model_path))

    optimizer = paddle.optimizer.Momentum()

    trainer = paddle.trainer.SGD(cost=cost,
                                 parameters=parameters,
                                 extra_layers=[detect_out],
                                 update_equation=optimizer)

    feeding = {'image': 0, 'bbox': 1}

    reader = paddle.batch(data_provider.test(data_args, eval_file_list),
                          batch_size=batch_size)

    result = trainer.test(reader=reader, feeding=feeding)

    print "TestCost: %f, Detection mAP=%g" % \
            (result.cost, result.metrics['detection_evaluator'])
Exemple #47
0
    def infer(self, data_path):
        logger.info("infer data...")
        dataset = reader.Dataset(
            train_path=data_path,
            test_path=None,
            source_dic_path=args.source_dic_path,
            target_dic_path=args.target_dic_path,
            model_type=args.model_type,
        )
        infer_reader = paddle.batch(dataset.infer, batch_size=1000)
        logger.warning('write predictions to %s' % args.prediction_output_path)

        output_f = open(args.prediction_output_path, 'w')

        for id, batch in enumerate(infer_reader()):
            res = self.inferer.infer(input=batch)
            predictions = [' '.join(map(str, x)) for x in res]
            assert len(batch) == len(
                predictions
            ), "predict error, %d inputs, but %d predictions" % (
                len(batch), len(predictions))
            output_f.write('\n'.join(map(str, predictions)) + '\n')
Exemple #48
0
def train(model_save_dir):
    if not os.path.exists(model_save_dir):
        os.mkdir(model_save_dir)

    paddle.init(use_gpu=False, trainer_count=1)
    word_dict = paddle.dataset.imikolov.build_dict()
    dict_size = len(word_dict)

    optimizer = paddle.optimizer.Adam(learning_rate=1e-4)

    cost = ngram_lm(hidden_size=128, emb_size=512, dict_size=dict_size)
    parameters = paddle.parameters.create(cost)
    trainer = paddle.trainer.SGD(cost, parameters, optimizer)

    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id and not event.batch_id % 10:
                logger.info("Pass %d, Batch %d, Cost %f" %
                            (event.pass_id, event.batch_id, event.cost))

        if isinstance(event, paddle.event.EndPass):
            result = trainer.test(
                paddle.batch(paddle.dataset.imikolov.test(word_dict, 5), 64))
            logger.info("Test Pass %d, Cost %f" % (event.pass_id, result.cost))

            save_path = os.path.join(model_save_dir,
                                     "model_pass_%05d.tar.gz" % event.pass_id)
            logger.info("Save model into %s ..." % save_path)
            with gzip.open(save_path, "w") as f:
                parameters.to_tar(f)

    trainer.train(
        paddle.batch(
            paddle.reader.shuffle(
                lambda: paddle.dataset.imikolov.train(word_dict, 5)(),
                buf_size=1000), 64),
        num_passes=1000,
        event_handler=event_handler)
Exemple #49
0
def train():

    params = paddle.parameters.create(model.train_cost)
    optimizer = paddle.optimizer.AdaGrad()

    trainer = paddle.trainer.SGD(cost=model.train_cost,
                                 parameters=params,
                                 update_equation=optimizer)

    dataset = AvazuDataset(args.train_data_path,
                           n_records_as_test=args.test_set_size)

    def __event_handler__(event):
        if isinstance(event, paddle.event.EndIteration):
            num_samples = event.batch_id * args.batch_size
            if event.batch_id % 100 == 0:
                logging.warning("Pass %d, Samples %d, Cost %f" %
                                (event.pass_id, num_samples, event.cost))

            if event.batch_id % 1000 == 0:
                result = trainer.test(reader=paddle.batch(
                    dataset.test, batch_size=args.batch_size),
                                      feeding=field_index)
                logging.warning("Test %d-%d, Cost %f" %
                                (event.pass_id, event.batch_id, result.cost))

                path = "{}-pass-{}-batch-{}-test-{}.tar.gz".format(
                    args.model_output_prefix, event.pass_id, event.batch_id,
                    result.cost)
                with gzip.open(path, 'w') as f:
                    params.to_tar(f)

    trainer.train(reader=paddle.batch(paddle.reader.shuffle(dataset.train,
                                                            buf_size=500),
                                      batch_size=args.batch_size),
                  feeding=field_index,
                  event_handler=__event_handler__,
                  num_passes=args.num_passes)
Exemple #50
0
    def start_trainer(self):
        # 获取训练器
        trainer = self.get_trainer()

        # 定义训练事件
        def event_handler(event):
            lists = []
            if isinstance(event, paddle.event.EndIteration):
                if event.batch_id % 100 == 0:
                    print "\nPass %d, Batch %d, Cost %f, %s" % (
                        event.pass_id, event.batch_id, event.cost, event.metrics)
                else:
                    sys.stdout.write('.')
                    sys.stdout.flush()
            if isinstance(event, paddle.event.EndPass):
                # 保存训练好的参数
                model_path = '../model'
                if not os.path.exists(model_path):
                    os.makedirs(model_path)
                with open(model_path + "/model.tar", 'w') as f:
                    trainer.save_parameter_to_tar(f=f)
                # 使用测试进行测试
                result = trainer.test(reader=paddle.batch(paddle.dataset.mnist.test(), batch_size=128))
                print "\nTest with Pass %d, Cost %f, %s\n" % (event.pass_id, result.cost, result.metrics)
                lists.append((event.pass_id, result.cost, result.metrics['classification_error_evaluator']))

        # 获取数据
        reader = paddle.batch(paddle.reader.shuffle(paddle.dataset.mnist.train(), buf_size=20000),
                              batch_size=128)
        '''
        开始训练
        reader 训练数据
        num_passes 训练的轮数
        event_handler 训练的事件,比如在训练的时候要做一些什么事情
        '''
        trainer.train(reader=reader,
                      num_passes=100,
                      event_handler=event_handler)
def main():
    BATCH_SIZE = 100
    PASS_NUM = 5

    word_dict = paddle.dataset.imdb.word_dict()
    print "load word dict successfully"
    dict_dim = len(word_dict)
    class_dim = 2

    data = fluid.layers.data(
        name="words", shape=[1], dtype="int64", lod_level=1)
    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
    cost, accuracy, acc_out = stacked_lstm_net(
        data, label, input_dim=dict_dim, class_dim=class_dim)

    train_data = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.imdb.train(word_dict), buf_size=1000),
        batch_size=BATCH_SIZE)
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    feeder = fluid.DataFeeder(feed_list=[data, label], place=place)

    exe.run(fluid.default_startup_program())

    for pass_id in xrange(PASS_NUM):
        accuracy.reset(exe)
        for data in train_data():
            cost_val, acc_val = exe.run(fluid.default_main_program(),
                                        feed=feeder.feed(data),
                                        fetch_list=[cost, acc_out])
            pass_acc = accuracy.eval(exe)
            print("cost=" + str(cost_val) + " acc=" + str(acc_val) +
                  " pass_acc=" + str(pass_acc))
            if cost_val < 1.0 and acc_val > 0.8:
                exit(0)
    exit(1)
def main():
    BATCH_SIZE = 100
    PASS_NUM = 5

    word_dict = paddle.dataset.imdb.word_dict()
    print "load word dict successfully"
    dict_dim = len(word_dict)
    class_dim = 2

    cost, acc = lstm_net(dict_dim=dict_dim, class_dim=class_dim)

    train_data = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.imdb.train(word_dict), buf_size=BATCH_SIZE * 10),
        batch_size=BATCH_SIZE)
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)

    exe.run(fluid.default_startup_program())

    for pass_id in xrange(PASS_NUM):
        for data in train_data():
            chopped_data = chop_data(data)
            tensor_words, tensor_label = prepare_feed_data(chopped_data, place)

            outs = exe.run(fluid.default_main_program(),
                           feed={"words": tensor_words,
                                 "label": tensor_label},
                           fetch_list=[cost, acc])
            cost_val = np.array(outs[0])
            acc_val = np.array(outs[1])

            print("cost=" + str(cost_val) + " acc=" + str(acc_val))
            if acc_val > 0.7:
                exit(0)
    exit(1)
def main():
    # get arguments from env

    # for local training
    TRUTH = ["true", "True", "TRUE", "1", "yes", "Yes", "YES"]
    cluster_train = os.getenv('PADDLE_CLUSTER_TRAIN', "False") in TRUTH
    use_gpu = os.getenv('PADDLE_INIT_USE_GPU', "False")

    if not cluster_train:
        paddle.init(
            use_gpu=use_gpu,
            trainer_count=int(os.getenv("PADDLE_INIT_TRAINER_COUNT", "1")))
    else:
        paddle.init(
            use_gpu=use_gpu,
            trainer_count=int(os.getenv("PADDLE_INIT_TRAINER_COUNT", "1")),
            port=int(os.getenv("PADDLE_INIT_PORT", "7164")),
            ports_num=int(os.getenv("PADDLE_INIT_PORTS_NUM", "1")),
            ports_num_for_sparse=int(
                os.getenv("PADDLE_INIT_PORTS_NUM_FOR_SPARSE", "1")),
            num_gradient_servers=int(
                os.getenv("PADDLE_INIT_NUM_GRADIENT_SERVERS", "1")),
            trainer_id=int(os.getenv("PADDLE_INIT_TRAINER_ID", "0")),
            pservers=os.getenv("PADDLE_INIT_PSERVERS", "127.0.0.1"))
    fn = open("thirdparty/wuyi_train_thdpty/word_dict.pickle", "r")
    word_dict = pickle.load(fn)
    fn.close()
    dict_size = len(word_dict)
    firstword = paddle.layer.data(
        name="firstw", type=paddle.data_type.integer_value(dict_size))
    secondword = paddle.layer.data(
        name="secondw", type=paddle.data_type.integer_value(dict_size))
    thirdword = paddle.layer.data(
        name="thirdw", type=paddle.data_type.integer_value(dict_size))
    fourthword = paddle.layer.data(
        name="fourthw", type=paddle.data_type.integer_value(dict_size))
    nextword = paddle.layer.data(
        name="fifthw", type=paddle.data_type.integer_value(dict_size))

    Efirst = wordemb(firstword)
    Esecond = wordemb(secondword)
    Ethird = wordemb(thirdword)
    Efourth = wordemb(fourthword)

    contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
    hidden1 = paddle.layer.fc(input=contextemb,
                              size=hiddensize,
                              act=paddle.activation.Sigmoid(),
                              layer_attr=paddle.attr.Extra(drop_rate=0.5),
                              bias_attr=paddle.attr.Param(learning_rate=2),
                              param_attr=paddle.attr.Param(
                                  initial_std=1. / math.sqrt(embsize * 8),
                                  learning_rate=1))
    predictword = paddle.layer.fc(input=hidden1,
                                  size=dict_size,
                                  bias_attr=paddle.attr.Param(learning_rate=2),
                                  act=paddle.activation.Softmax())

    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                result = trainer.test(
                    paddle.batch(
                        cluster_reader_cluster(cluster_test_file, node_id), 32))
                print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics,
                    result.metrics)

    cost = paddle.layer.classification_cost(input=predictword, label=nextword)
    parameters = paddle.parameters.create(cost)
    adagrad = paddle.optimizer.AdaGrad(
        learning_rate=3e-3,
        regularization=paddle.optimizer.L2Regularization(8e-4))
    trainer = paddle.trainer.SGD(cost,
                                 parameters,
                                 adagrad,
                                 is_local=not cluster_train)
    trainer.train(
        paddle.batch(cluster_reader_cluster(cluster_train_file, node_id), 32),
        num_passes=30,
        event_handler=event_handler)
avg_cost = fluid.layers.mean(x=cost)

optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9)
opts = optimizer.minimize(avg_cost)

accuracy = fluid.evaluator.Accuracy(input=predict, label=label)

inference_program = fluid.default_main_program().clone()
test_accuracy = fluid.evaluator.Accuracy(
    input=predict, label=label, main_program=inference_program)
test_target = [avg_cost] + test_accuracy.metrics + test_accuracy.states
inference_program = fluid.io.get_inference_program(
    test_target, main_program=inference_program)

train_reader = paddle.batch(
    paddle.reader.shuffle(
        paddle.dataset.mnist.train(), buf_size=8192),
    batch_size=BATCH_SIZE)

test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)

place = fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=[image, label], place=place)
exe.run(fluid.default_startup_program())

PASS_NUM = 100
for pass_id in range(PASS_NUM):
    accuracy.reset(exe)
    for data in train_reader():
        out, acc = exe.run(fluid.default_main_program(),
                           feed=feeder.feed(data),
def main():
    datadim = 3 * 32 * 32
    classdim = 10

    # PaddlePaddle init
    paddle.init(use_gpu=with_gpu, trainer_count=7)

    image = paddle.layer.data(
        name="image", type=paddle.data_type.dense_vector(datadim))

    # Add neural network config
    # option 1. resnet
    # net = resnet_cifar10(image, depth=32)
    # option 2. vgg
    net = simple_cnn(image)

    out = paddle.layer.fc(
        input=net, size=classdim, act=paddle.activation.Softmax())

    lbl = paddle.layer.data(
        name="label", type=paddle.data_type.integer_value(classdim))
    cost = paddle.layer.classification_cost(input=out, label=lbl)

    # Create parameters
    parameters = paddle.parameters.create(cost)

    # Create optimizer
    momentum_optimizer = paddle.optimizer.Momentum(
        momentum=0.9,
        regularization=paddle.optimizer.L2Regularization(rate=0.0002 * 128),
        learning_rate=0.1 / 128.0,
        learning_rate_decay_a=0.1,
        learning_rate_decay_b=50000 * 100,
        learning_rate_schedule='discexp')

    # End batch and end pass event handler
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                print "\nPass %d, Batch %d, Cost %f, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics)
            else:
                sys.stdout.write('.')
                sys.stdout.flush()
        if isinstance(event, paddle.event.EndPass):
            # save parameters
            with open('params_pass_%d.tar' % event.pass_id, 'w') as f:
                parameters.to_tar(f)

            result = trainer.test(
                reader=paddle.batch(
                    paddle.dataset.cifar.test10(), batch_size=128),
                feeding={'image': 0,
                         'label': 1})
            print "\nTest with Pass %d, %s" % (event.pass_id, result.metrics)

    # Create trainer
    trainer = paddle.trainer.SGD(
        cost=cost, parameters=parameters, update_equation=momentum_optimizer)

    # Save the inference topology to protobuf.
    inference_topology = paddle.topology.Topology(layers=out)
    with open("inference_topology.pkl", 'wb') as f:
        inference_topology.serialize_for_inference(f)

    trainer.train(
        reader=paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.cifar.train10(), buf_size=50000),
            batch_size=128),
        num_passes=200,
        event_handler=event_handler,
        feeding={'image': 0,
                 'label': 1})

    # inference
    from PIL import Image
    import numpy as np
    import os

    def load_image(file):
        im = Image.open(file)
        im = im.resize((32, 32), Image.ANTIALIAS)
        im = np.array(im).astype(np.float32)
        # The storage order of the loaded image is W(widht),
        # H(height), C(channel). PaddlePaddle requires
        # the CHW order, so transpose them.
        im = im.transpose((2, 0, 1))  # CHW
        # In the training phase, the channel order of CIFAR
        # image is B(Blue), G(green), R(Red). But PIL open
        # image in RGB mode. It must swap the channel order.
        im = im[(2, 1, 0), :, :]  # BGR
        im = im.flatten()
        im = im / 255.0
        return im

    test_data = []
    cur_dir = os.path.dirname(os.path.realpath(__file__))
    test_data.append((load_image(cur_dir + '/image/dog.png'), ))

    # users can remove the comments and change the model name
    # with open('params_pass_50.tar', 'r') as f:
    #    parameters = paddle.parameters.Parameters.from_tar(f)

    probs = paddle.infer(
        output_layer=out, parameters=parameters, input=test_data)
    lab = np.argsort(-probs)  # probs and lab are the results of one batch data
    print "Label of image/dog.png is: %d" % lab[0][0]
Exemple #56
0
def run_benchmark(args, data_format='channels_last', device='/cpu:0'):
    """Our model_fn for ResNet to be used with our Estimator."""

    class_dim = 1000
    dshape = (None, 224, 224, 3)

    pdshape = (3, 224, 224)
    if args.data == 'flowers102':
        class_dim = 102
        dshape = (None, 224, 224, 3)
        pdshape = (3, 224, 224)
    elif args.data == 'cifar10':
        class_dim = 10
        dshape = (None, 32, 32, 3)
        pdshape = (3, 32, 32)

    with tf.device(device):
        images = tf.placeholder(DTYPE, shape=dshape)
        labels = tf.placeholder(tf.int64, shape=(None, ))
        is_training = tf.placeholder('bool')
        onehot_labels = tf.one_hot(labels, depth=class_dim)

        network = resnet_cifar10(
            32, class_dim,
            data_format) if args.data == 'cifar10' else resnet_imagenet(
                50, class_dim, data_format)

        logits = network(inputs=images, is_training=is_training)

        cross_entropy = tf.losses.softmax_cross_entropy(
            logits=logits, onehot_labels=onehot_labels)
        avg_cost = tf.reduce_mean(cross_entropy)

        correct = tf.equal(tf.argmax(logits, 1), labels)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

        lr = 0.1 if args.data == 'cifar10' else 0.01
        optimizer = tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.9)

        # Batch norm requires update_ops to be added as a train_op dependency.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(avg_cost)

    train_reader = paddle.batch(
        paddle.reader.shuffle(
            paddle.dataset.cifar.train10()
            if args.data == 'cifar10' else paddle.dataset.flowers.train(),
            buf_size=5120),
        batch_size=args.batch_size)
    test_reader = paddle.batch(
        paddle.dataset.cifar.test10()
        if args.data == 'cifar10' else paddle.dataset.flowers.test(),
        batch_size=100)

    def test():
        test_accs = []
        for batch_id, data in enumerate(test_reader()):
            test_images = np.array(
                map(lambda x: np.transpose(x[0].reshape(pdshape),
                axes=[1, 2, 0]), data)).astype("float32")
            test_labels = np.array(map(lambda x: x[1], data)).astype('int64')
            test_accs.append(
                accuracy.eval(feed_dict={
                    images: test_images,
                    labels: test_labels,
                    is_training: False
                }))
        print("Pass = %d, Train performance = %f imgs/s, Test accuracy = %f\n" %
              (pass_id, num_samples / train_elapsed, np.mean(test_accs)))

    config = tf.ConfigProto(
        intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        init_g = tf.global_variables_initializer()
        init_l = tf.local_variables_initializer()
        sess.run(init_g)
        sess.run(init_l)

        if args.use_fake_data:
            data = train_reader().next()
            images_data = np.array(
                    map(lambda x: np.transpose(x[0].reshape(pdshape),
                    axes=[1, 2, 0]), data)).astype("float32")
            labels_data = np.array(map(lambda x: x[1], data)).astype('int64')
        iters, num_samples, start_time = 0, 0, 0.0
        for pass_id in range(args.pass_num):
            if iters == args.iterations:
                break
            train_accs = []
            train_losses = []
            for batch_id, data in enumerate(train_reader()):
                if iters == args.skip_batch_num:
                    start_time = time.time()
                    num_samples = 0
                if iters == args.iterations:
                    break
                if not args.use_fake_data:
                    images_data = np.array(
                        map(lambda x: np.transpose(x[0].reshape(pdshape),
                        axes=[1, 2, 0]), data)).astype("float32")
                    labels_data = np.array(map(lambda x: x[1], data)).astype(
                        'int64')
                _, loss, acc = sess.run([train_op, avg_cost, accuracy],
                                        feed_dict={
                                            images: images_data,
                                            labels: labels_data,
                                            is_training: True
                                        })
                iters += 1
                train_accs.append(acc)
                train_losses.append(loss)
                num_samples += len(data)
                print("Pass=%d, Iter=%d, Loss=%f, Accuray=%f\n" %
                      (pass_id, iters, loss, acc))

            train_elapsed = time.time() - start_time
            print("Pass=%d, Loss=%f, Accuray=%f\n" %
                  (pass_id, np.mean(train_losses), np.mean(train_accs)))

            # evaluation
            if args.with_test:
                test()

        if not args.with_test:
            duration = time.time() - start_time
            examples_per_sec = num_samples / duration
            sec_per_batch = duration / (iters - args.skip_batch_num)

            print('Total examples: %d, total time: %.5f' %
                  (num_samples, duration))
            print('%.5f examples/sec, %.5f sec/batch' %
                  (examples_per_sec, sec_per_batch))
Exemple #57
0
def main():
    paddle.init(use_gpu=False)
    movie_title_dict = paddle.dataset.movielens.get_movie_title_dict()
    uid = paddle.layer.data(
        name='user_id',
        type=paddle.data_type.integer_value(
            paddle.dataset.movielens.max_user_id() + 1))
    usr_emb = paddle.layer.embedding(input=uid, size=32)

    usr_gender_id = paddle.layer.data(
        name='gender_id', type=paddle.data_type.integer_value(2))
    usr_gender_emb = paddle.layer.embedding(input=usr_gender_id, size=16)

    usr_age_id = paddle.layer.data(
        name='age_id',
        type=paddle.data_type.integer_value(
            len(paddle.dataset.movielens.age_table)))
    usr_age_emb = paddle.layer.embedding(input=usr_age_id, size=16)

    usr_job_id = paddle.layer.data(
        name='job_id',
        type=paddle.data_type.integer_value(paddle.dataset.movielens.max_job_id(
        ) + 1))

    usr_job_emb = paddle.layer.embedding(input=usr_job_id, size=16)

    usr_combined_features = paddle.layer.fc(
        input=[usr_emb, usr_gender_emb, usr_age_emb, usr_job_emb],
        size=200,
        act=paddle.activation.Tanh())

    mov_id = paddle.layer.data(
        name='movie_id',
        type=paddle.data_type.integer_value(
            paddle.dataset.movielens.max_movie_id() + 1))
    mov_emb = paddle.layer.embedding(input=mov_id, size=32)

    mov_categories = paddle.layer.data(
        name='category_id',
        type=paddle.data_type.sparse_binary_vector(
            len(paddle.dataset.movielens.movie_categories())))

    mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32)

    mov_title_id = paddle.layer.data(
        name='movie_title',
        type=paddle.data_type.integer_value_sequence(len(movie_title_dict)))
    mov_title_emb = paddle.layer.embedding(input=mov_title_id, size=32)
    mov_title_conv = paddle.networks.sequence_conv_pool(
        input=mov_title_emb, hidden_size=32, context_len=3)

    mov_combined_features = paddle.layer.fc(
        input=[mov_emb, mov_categories_hidden, mov_title_conv],
        size=200,
        act=paddle.activation.Tanh())

    inference = paddle.layer.cos_sim(
        a=usr_combined_features, b=mov_combined_features, size=1, scale=5)
    cost = paddle.layer.mse_cost(
        input=inference,
        label=paddle.layer.data(
            name='score', type=paddle.data_type.dense_vector(1)))

    parameters = paddle.parameters.create(cost)

    trainer = paddle.trainer.SGD(cost=cost,
                                 parameters=parameters,
                                 update_equation=paddle.optimizer.Adam(
                                     learning_rate=1e-4))
    feeding = {
        'user_id': 0,
        'gender_id': 1,
        'age_id': 2,
        'job_id': 3,
        'movie_id': 4,
        'category_id': 5,
        'movie_title': 6,
        'score': 7
    }

    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                print "Pass %d Batch %d Cost %.2f" % (
                    event.pass_id, event.batch_id, event.cost)

    trainer.train(
        reader=paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.movielens.train(), buf_size=8192),
            batch_size=256),
        event_handler=event_handler,
        feeding=feeding,
        num_passes=1)

    user_id = 234
    movie_id = 345

    user = paddle.dataset.movielens.user_info()[user_id]
    movie = paddle.dataset.movielens.movie_info()[movie_id]

    feature = user.value() + movie.value()

    def reader():
        yield feature

    infer_dict = copy.copy(feeding)
    del infer_dict['score']

    prediction = paddle.infer(
        output=inference,
        parameters=parameters,
        reader=paddle.batch(
            reader, batch_size=32),
        feeding=infer_dict)
    print(prediction + 5) / 2
Exemple #58
0
x = fluid.layers.data(name='x', shape=[13], dtype='float32')

y_predict = fluid.layers.fc(input=x, size=1, act=None)

y = fluid.layers.data(name='y', shape=[1], dtype='float32')

cost = fluid.layers.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(x=cost)

sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
sgd_optimizer.minimize(avg_cost)

BATCH_SIZE = 20

train_reader = paddle.batch(
    paddle.reader.shuffle(
        paddle.dataset.uci_housing.train(), buf_size=500),
    batch_size=BATCH_SIZE)

place = fluid.CPUPlace()
feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
exe = fluid.Executor(place)

exe.run(fluid.default_startup_program())

PASS_NUM = 100
for pass_id in range(PASS_NUM):
    fluid.io.save_persistables(exe, "./fit_a_line.model/")
    fluid.io.load_persistables(exe, "./fit_a_line.model/")
    for data in train_reader():
        avg_loss_value, = exe.run(fluid.default_main_program(),
                                  feed=feeder.feed(data),
Exemple #59
0
    lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
    cost = paddle.layer.classification_cost(input=output, label=lbl)
    return cost


if __name__ == '__main__':
    # init
    paddle.init(use_gpu=False)

    #data
    print 'load dictionary...'
    word_dict = paddle.dataset.imdb.word_dict()
    dict_dim = len(word_dict)
    class_dim = 2
    train_reader = paddle.batch(
        paddle.reader.shuffle(
            lambda: paddle.dataset.imdb.train(word_dict), buf_size=1000),
        batch_size=100)
    test_reader = paddle.batch(
        lambda: paddle.dataset.imdb.test(word_dict), batch_size=100)

    feeding = {'word': 0, 'label': 1}

    # network config
    # Please choose the way to build the network
    # by uncommenting the corresponding line.
    cost = convolution_net(dict_dim, class_dim=class_dim)
    # cost = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)

    # create parameters
    parameters = paddle.parameters.create(cost)
    pool_size=2,
    pool_stride=2,
    act="relu")

predict = fluid.layers.fc(input=conv_pool_2, size=10, act="softmax")
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost)
optimizer = fluid.optimizer.Adam(learning_rate=0.01)
optimizer.minimize(avg_cost)

accuracy = fluid.evaluator.Accuracy(input=predict, label=label)

BATCH_SIZE = 50
PASS_NUM = 3
train_reader = paddle.batch(
    paddle.reader.shuffle(
        paddle.dataset.mnist.train(), buf_size=500),
    batch_size=BATCH_SIZE)

place = fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=[images, label], place=place)
exe.run(fluid.default_startup_program())

for pass_id in range(PASS_NUM):
    accuracy.reset(exe)
    for data in train_reader():
        loss, acc = exe.run(fluid.default_main_program(),
                            feed=feeder.feed(data),
                            fetch_list=[avg_cost] + accuracy.metrics)
        pass_acc = accuracy.eval(exe)
        print("pass_id=" + str(pass_id) + " acc=" + str(acc) + " pass_acc=" +