Ejemplo n.º 1
0
def train():
    args = parse_args()
    args.model_type = ModelType(
        args.model_type)  #--model_type=0,1 classification regression

    #只使用cpu而且cpu只开一个线程
    paddle.init(use_gpu=False, trainer_count=1)
    '''dnn_input_dim: 61
       lr_input_dim: 10040001'''
    dnn_input_dim, lr_input_dim = reader.load_data_meta(args.data_meta_file)

    # create ctr model.
    model = CTRmodel(dnn_layer_dims,
                     dnn_input_dim,
                     lr_input_dim,
                     model_type=args.model_type,
                     is_infer=False)

    params = paddle.parameters.create(model.train_cost)
    optimizer = paddle.optimizer.AdaGrad()  #学习率优化

    trainer = paddle.trainer.SGD(cost=model.train_cost,
                                 parameters=params,
                                 update_equation=optimizer)

    dataset = reader.Dataset()

    def __event_handler__(event):
        if isinstance(event, paddle.event.EndIteration):
            num_samples = event.batch_id * args.batch_size
            if event.batch_id % 100 == 0:
                logger.warning(
                    "Pass %d, Samples %d, Cost %f, %s" %
                    (event.pass_id, num_samples, event.cost, event.metrics))

            if event.batch_id % 1000 == 0:
                if args.test_data_path:
                    result = trainer.test(reader=paddle.batch(
                        dataset.test(args.test_data_path),
                        batch_size=args.batch_size),
                                          feeding=reader.feeding_index)
                    logger.warning("Test %d-%d, Cost %f, %s" %
                                   (event.pass_id, event.batch_id, result.cost,
                                    result.metrics))

                path = "{}-pass-{}-batch-{}-test-{}.tar.gz".format(
                    args.model_output_prefix, event.pass_id, event.batch_id,
                    result.cost)
                with gzip.open(path, 'w') as f:
                    trainer.save_parameter_to_tar(f)

    trainer.train(reader=paddle.batch(paddle.reader.shuffle(dataset.train(
        args.train_data_path),
                                                            buf_size=500),
                                      batch_size=args.batch_size),
                  feeding=reader.feeding_index,
                  event_handler=__event_handler__,
                  num_passes=args.num_passes)
Ejemplo n.º 2
0
 def __init__(self, param_path):
     logger.info("create CTR model")
     dnn_input_dim, lr_input_dim = reader.load_data_meta(args.data_meta_path)
     # create the mdoel
     self.ctr_model = network_conf.CTRmodel(
         dnn_layer_dims,
         dnn_input_dim,
         lr_input_dim,
         model_type=ModelType(args.model_type),
         is_infer=True)
     # load parameter
     logger.info("load model parameters from %s" % param_path)
     self.parameters = paddle.parameters.Parameters.from_tar(
         gzip.open(param_path, 'r'))
     self.inferer = paddle.inference.Inference(
         output_layer=self.ctr_model.model,
         parameters=self.parameters, )