def train(): args = parse_args() args.model_type = ModelType( args.model_type) #--model_type=0,1 classification regression #只使用cpu而且cpu只开一个线程 paddle.init(use_gpu=False, trainer_count=1) '''dnn_input_dim: 61 lr_input_dim: 10040001''' dnn_input_dim, lr_input_dim = reader.load_data_meta(args.data_meta_file) # create ctr model. model = CTRmodel(dnn_layer_dims, dnn_input_dim, lr_input_dim, model_type=args.model_type, is_infer=False) params = paddle.parameters.create(model.train_cost) optimizer = paddle.optimizer.AdaGrad() #学习率优化 trainer = paddle.trainer.SGD(cost=model.train_cost, parameters=params, update_equation=optimizer) dataset = reader.Dataset() def __event_handler__(event): if isinstance(event, paddle.event.EndIteration): num_samples = event.batch_id * args.batch_size if event.batch_id % 100 == 0: logger.warning( "Pass %d, Samples %d, Cost %f, %s" % (event.pass_id, num_samples, event.cost, event.metrics)) if event.batch_id % 1000 == 0: if args.test_data_path: result = trainer.test(reader=paddle.batch( dataset.test(args.test_data_path), batch_size=args.batch_size), feeding=reader.feeding_index) logger.warning("Test %d-%d, Cost %f, %s" % (event.pass_id, event.batch_id, result.cost, result.metrics)) path = "{}-pass-{}-batch-{}-test-{}.tar.gz".format( args.model_output_prefix, event.pass_id, event.batch_id, result.cost) with gzip.open(path, 'w') as f: trainer.save_parameter_to_tar(f) trainer.train(reader=paddle.batch(paddle.reader.shuffle(dataset.train( args.train_data_path), buf_size=500), batch_size=args.batch_size), feeding=reader.feeding_index, event_handler=__event_handler__, num_passes=args.num_passes)
def __init__(self, param_path): logger.info("create CTR model") dnn_input_dim, lr_input_dim = reader.load_data_meta(args.data_meta_path) # create the mdoel self.ctr_model = network_conf.CTRmodel( dnn_layer_dims, dnn_input_dim, lr_input_dim, model_type=ModelType(args.model_type), is_infer=True) # load parameter logger.info("load model parameters from %s" % param_path) self.parameters = paddle.parameters.Parameters.from_tar( gzip.open(param_path, 'r')) self.inferer = paddle.inference.Inference( output_layer=self.ctr_model.model, parameters=self.parameters, )