def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc, args, train_prog, startup_prog, nccl_id_var, num_trainers, trainer_id): place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0) if not args.use_reader_op: feed_var_list = [ var for var in train_prog.global_block().vars.itervalues() if var.is_data ] feeder = fluid.DataFeeder(feed_var_list, place) # generate fake: if args.use_fake_data: for var in feed_var_list: v = startup_prog.global_block()._clone_variable(var) var.persistable = True v.persistable = True real_shape = list(var.shape) real_shape[0] = args.batch_size / args.gpus startup_prog.global_block().append_op(outputs={"Out": v}, type="fill_constant", attrs={ "shape": real_shape, "value": 1.0, "dtype": var.dtype }) if nccl_id_var and trainer_id == 0: #FIXME(wuyi): wait other trainer to start listening time.sleep(30) startup_exe = fluid.Executor(place) startup_exe.run(startup_prog) strategy = fluid.ExecutionStrategy() strategy.num_threads = 1 strategy.allow_op_delay = False exe = fluid.ParallelExecutor(True, avg_loss.name, exec_strategy=strategy, num_trainers=num_trainers, trainer_id=trainer_id) for pass_id in range(args.pass_num): num_samples = 0 iters = 0 start_time = time.time() if not args.use_reader_op: reader_generator = train_reader() batch_id = 0 data = None while True: if not args.use_reader_op: data = next(reader_generator, None) if data == None: break if iters == args.iterations: break if args.profile and pass_id == 0 and batch_id == 5: profiler.start_profiler("All") elif args.profile and pass_id == 0 and batch_id == 10: profiler.stop_profiler("total", "/tmp/profile_%d" % trainer_id) if iters == args.skip_batch_num: start_time = time.time() num_samples = 0 if args.use_fake_data or args.use_reader_op: try: loss, = exe.run([avg_loss.name]) except fluid.core.EnforceNotMet as ex: break else: loss, = exe.run([avg_loss.name], feed=feeder.feed(data)) if args.use_reader_op: num_samples += args.batch_size * args.gpus else: num_samples += len(data) iters += 1 if batch_id % 1 == 0: print("Pass %d, batch %d, loss %s" % (pass_id, batch_id, np.array(loss))) batch_id += 1 print_train_time(start_time, time.time(), num_samples) print("current activate thread num: ", threading.active_count()) if not args.no_test and batch_acc and not args.use_reader_op: # we have not implement record io for test # skip test when use args.use_reader_op test_acc = test(startup_exe, infer_prog, test_reader, feeder, batch_acc) print("Pass: %d, Test Accuracy: %f\n" % (pass_id, test_acc)) print_test_acc(pass_id, test_acc)
def eval(): # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) if '2014' in cfg.dataset: test_list = 'annotations/instances_val2014.json' elif '2017' in cfg.dataset: test_list = 'annotations/instances_val2017.json' if cfg.debug: if not os.path.exists('output'): os.mkdir('output') model = YOLOv3(is_train=False) model.build_model() outputs = model.get_pred() place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) # yapf: disable if cfg.weights: def if_exist(var): return os.path.exists(os.path.join(cfg.weights, var.name)) fluid.io.load_vars(exe, cfg.weights, predicate=if_exist) # yapf: enable # you can save inference model by following code # fluid.io.save_inference_model("./output/yolov3", # feeded_var_names=['image', 'im_shape'], # target_vars=outputs, # executor=exe) input_size = cfg.input_size test_reader = reader.test(input_size, 1) label_names, label_ids = reader.get_label_infos() if cfg.debug: print("Load in labels {} with ids {}".format(label_names, label_ids)) feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) def get_pred_result(boxes, scores, labels, im_id): result = [] for box, score, label in zip(boxes, scores, labels): x1, y1, x2, y2 = box w = x2 - x1 + 1 h = y2 - y1 + 1 bbox = [x1, y1, w, h] res = { 'image_id': im_id, 'category_id': label_ids[int(label)], 'bbox': list(map(float, bbox)), 'score': float(score) } result.append(res) return result dts_res = [] fetch_list = [outputs] total_time = 0 for batch_id, batch_data in enumerate(test_reader()): start_time = time.time() batch_outputs = exe.run(fetch_list=[v.name for v in fetch_list], feed=feeder.feed(batch_data), return_numpy=False, use_program_cache=True) lod = batch_outputs[0].lod()[0] nmsed_boxes = np.array(batch_outputs[0]) if nmsed_boxes.shape[1] != 6: continue for i in range(len(lod) - 1): im_id = batch_data[i][1] start = lod[i] end = lod[i + 1] if start == end: continue nmsed_box = nmsed_boxes[start:end, :] labels = nmsed_box[:, 0] scores = nmsed_box[:, 1] boxes = nmsed_box[:, 2:6] dts_res += get_pred_result(boxes, scores, labels, im_id) end_time = time.time() print("batch id: {}, time: {}".format(batch_id, end_time - start_time)) total_time += end_time - start_time with io.open("yolov3_result.json", 'w') as outfile: encode_func = unicode if six.PY2 else str outfile.write(encode_func(json.dumps(dts_res))) print("start evaluate detection result with coco api") coco = COCO(os.path.join(cfg.data_dir, test_list)) cocoDt = coco.loadRes("yolov3_result.json") cocoEval = COCOeval(coco, cocoDt, 'bbox') cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() print("evaluate done.") print("Time per batch: {}".format(total_time / batch_id))
def train_parallel_do(args, learning_rate, batch_size, num_passes, init_model=None, pretrained_model=None, model_save_dir='model', parallel=True, use_nccl=True, lr_strategy=None, layers=50): class_dim = 1000 image_shape = [3, 224, 224] image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') if parallel: places = fluid.layers.device.get_places() pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl) with pd.do(): image_ = pd.read_input(image) label_ = pd.read_input(label) if args.model is 'se_resnext': out = SE_ResNeXt(input=image_, class_dim=class_dim, layers=layers) else: out = mobile_net(img=image_, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label_) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label_, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label_, k=5) pd.write_output(avg_cost) pd.write_output(acc_top1) pd.write_output(acc_top5) avg_cost, acc_top1, acc_top5 = pd() avg_cost = fluid.layers.mean(x=avg_cost) acc_top1 = fluid.layers.mean(x=acc_top1) acc_top5 = fluid.layers.mean(x=acc_top5) else: if args.model is 'se_resnext': out = SE_ResNeXt(input=image, class_dim=class_dim, layers=layers) else: out = mobile_net(img=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) inference_program = fluid.default_main_program().clone(for_test=True) if "piecewise_decay" in lr_strategy: bd = lr_strategy["piecewise_decay"]["bd"] lr = lr_strategy["piecewise_decay"]["lr"] optimizer = fluid.optimizer.Momentum( learning_rate=fluid.layers.piecewise_decay(boundaries=bd, values=lr), momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) elif "cosine_decay" in lr_strategy: step_each_epoch = lr_strategy["cosine_decay"]["step_each_epoch"] epochs = lr_strategy["cosine_decay"]["epochs"] optimizer = fluid.optimizer.Momentum( learning_rate=cosine_decay(learning_rate=learning_rate, step_each_epoch=step_each_epoch, epochs=epochs), momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) else: optimizer = fluid.optimizer.Momentum( learning_rate=learning_rate, momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) opts = optimizer.minimize(avg_cost) if args.with_mem_opt: fluid.memory_optimize(fluid.default_main_program()) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if init_model is not None: fluid.io.load_persistables(exe, init_model) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) train_reader = paddle.batch(reader.train(), batch_size=batch_size) test_reader = paddle.batch(reader.test(), batch_size=batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) for pass_id in range(num_passes): train_info = [[], [], []] test_info = [[], [], []] for batch_id, data in enumerate(train_reader()): t1 = time.time() loss, acc1, acc5 = exe.run( fluid.default_main_program(), feed=feeder.feed(data), fetch_list=[avg_cost, acc_top1, acc_top5]) t2 = time.time() period = t2 - t1 train_info[0].append(loss[0]) train_info[1].append(acc1[0]) train_info[2].append(acc5[0]) if batch_id % 10 == 0: print("Pass {0}, trainbatch {1}, loss {2}, \ acc1 {3}, acc5 {4} time {5}" .format(pass_id, \ batch_id, loss[0], acc1[0], acc5[0], \ "%2.2f sec" % period)) sys.stdout.flush() train_loss = np.array(train_info[0]).mean() train_acc1 = np.array(train_info[1]).mean() train_acc5 = np.array(train_info[2]).mean() for data in test_reader(): t1 = time.time() loss, acc1, acc5 = exe.run( inference_program, feed=feeder.feed(data), fetch_list=[avg_cost, acc_top1, acc_top5]) t2 = time.time() period = t2 - t1 test_info[0].append(loss[0]) test_info[1].append(acc1[0]) test_info[2].append(acc5[0]) if batch_id % 10 == 0: print("Pass {0},testbatch {1},loss {2}, \ acc1 {3},acc5 {4},time {5}" .format(pass_id, \ batch_id, loss[0], acc1[0], acc5[0], \ "%2.2f sec" % period)) sys.stdout.flush() test_loss = np.array(test_info[0]).mean() test_acc1 = np.array(test_info[1]).mean() test_acc5 = np.array(test_info[2]).mean() print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, \ test_loss {4}, test_acc1 {5}, test_acc5 {6}" .format(pass_id, \ train_loss, train_acc1, train_acc5, test_loss, test_acc1, \ test_acc5)) sys.stdout.flush() model_path = os.path.join(model_save_dir + '/' + args.model, str(pass_id)) if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_persistables(exe, model_path)
def main(train_data_file, test_data_file, vocab_file, target_file, emb_file, model_save_dir, num_passes, use_gpu, parallel): args = parse_args() if not os.path.exists(model_save_dir): os.mkdir(model_save_dir) BATCH_SIZE = 200 word_dict = load_dict(vocab_file) label_dict = load_dict(target_file) word_vector_values = get_embedding(emb_file) word_dict_len = len(word_dict) label_dict_len = len(label_dict) avg_cost, feature_out, word, mark, target = ner_net( word_dict_len, label_dict_len, parallel) sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3) sgd_optimizer.minimize(avg_cost) crf_decode = fluid.layers.crf_decoding( input=feature_out, param_attr=fluid.ParamAttr(name='crfw')) chunk_evaluator = fluid.evaluator.ChunkEvaluator( input=crf_decode, label=target, chunk_scheme="IOB", num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0))) inference_program = fluid.default_main_program().clone() with fluid.program_guard(inference_program): test_target = chunk_evaluator.metrics + chunk_evaluator.states inference_program = fluid.io.get_inference_program(test_target) train_reader = paddle.batch( reader.data_reader(train_data_file, word_dict, label_dict), batch_size=BATCH_SIZE, drop_last=False) test_reader = paddle.batch( reader.data_reader(test_data_file, word_dict, label_dict), batch_size=BATCH_SIZE, drop_last=False) place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() feeder = fluid.DataFeeder(feed_list=[word, mark, target], place=place) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) embedding_name = 'emb' embedding_param = fluid.global_scope().find_var(embedding_name).get_tensor( ) embedding_param.set(word_vector_values, place) batch_id = 0 total_time = 0.0 for pass_id in xrange(num_passes): chunk_evaluator.reset(exe) start_time = time.time() for data in train_reader(): cost, batch_precision, batch_recall, batch_f1_score = exe.run( fluid.default_main_program(), feed=feeder.feed(data), fetch_list=[avg_cost] + chunk_evaluator.metrics) batch_id = batch_id + 1 t1 = time.time() total_time += t1 - start_time pass_precision, pass_recall, pass_f1_score = chunk_evaluator.eval(exe) if pass_id == num_passes - 1: if args.gpu_card_num == 1: train_acc_kpi.add_record(pass_precision) pass_duration_kpi.add_record(total_time / num_passes) else: train_acc_kpi_card4.add_record(pass_precision) pass_duration_kpi_card4.add_record(total_time / num_passes) if pass_id % 100 == 0: print("[TrainSet] pass_id:" + str(pass_id) + " pass_precision:" + str(pass_precision) + " pass_recall:" + str( pass_recall) + " pass_f1_score:" + str(pass_f1_score)) pass_precision, pass_recall, pass_f1_score = test( exe, chunk_evaluator, inference_program, test_reader, place) if pass_id % 100 == 0: print("[TestSet] pass_id:" + str(pass_id) + " pass_precision:" + str(pass_precision) + " pass_recall:" + str( pass_recall) + " pass_f1_score:" + str(pass_f1_score)) #save_dirname = os.path.join(model_save_dir, "params_pass_%d" % pass_id) #fluid.io.save_inference_model( # save_dirname, ['word', 'mark', 'target'], [crf_decode], exe) if args.gpu_card_num == 1: train_acc_kpi.persist() pass_duration_kpi.persist() else: train_acc_kpi_card4.persist() pass_duration_kpi_card4.persist()
def train(logger, args): logger.info('Load data_set and vocab...') with open(os.path.join(args.vocab_dir, 'vocab.data'), 'rb') as fin: if six.PY2: vocab = pickle.load(fin) else: vocab = pickle.load(fin, encoding='bytes') logger.info('vocab size is {} and embed dim is {}'.format( vocab.size(), vocab.embed_dim)) brc_data = BRCDataset(args.max_p_num, args.max_p_len, args.max_q_len, args.trainset, args.devset) logger.info('Converting text into ids...') brc_data.convert_to_ids(vocab) logger.info('Initialize the model...') if not args.use_gpu: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) else: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() # build model main_program = fluid.Program() startup_prog = fluid.Program() fluid.memory_optimize(startup_prog) if args.enable_ce: main_program.random_seed = args.random_seed startup_prog.random_seed = args.random_seed with fluid.program_guard(main_program, startup_prog): with fluid.unique_name.guard(): avg_cost, s_probs, e_probs, match, feed_order = rc_model.rc_model( args.hidden_size, vocab, args) # clone from default main program and use it as the validation program inference_program = main_program.clone(for_test=True) # build optimizer if args.optim == 'sgd': optimizer = fluid.optimizer.SGD( learning_rate=args.learning_rate) elif args.optim == 'adam': optimizer = fluid.optimizer.Adam( learning_rate=args.learning_rate) elif args.optim == 'rprop': optimizer = fluid.optimizer.RMSPropOptimizer( learning_rate=args.learning_rate) else: logger.error('Unsupported optimizer: {}'.format(args.optim)) exit(-1) if args.weight_decay > 0.0: obj_func = avg_cost + args.weight_decay * l2_loss(main_program) #ipdb.set_trace() optimizer.minimize(obj_func) else: obj_func = avg_cost optimizer.minimize(obj_func) # initialize parameters place = core.CUDAPlace(0) if args.use_gpu else core.CPUPlace() exe = Executor(place) if args.load_dir: logger.info('load from {}'.format(args.load_dir)) fluid.io.load_persistables(exe, args.load_dir, main_program=main_program) else: exe.run(startup_prog) embedding_para = fluid.global_scope().find_var( 'embedding_para_1').get_tensor() embedding_para.set(vocab.embeddings.astype(np.float32), place) #load elmo data src_pretrain_model_path = '490001' fluid.io.load_vars(executor=exe, dirname=src_pretrain_model_path, predicate=if_exist, main_program=main_program) # prepare data feed_list = [ main_program.global_block().var(var_name) for var_name in feed_order ] #ipdb.set_trace() feeder = fluid.DataFeeder(feed_list, place) logger.info('Training the model...') parallel_executor = fluid.ParallelExecutor( main_program=main_program, use_cuda=bool(args.use_gpu), loss_name=avg_cost.name) print_para(main_program, parallel_executor, logger, args) for pass_id in range(1, args.pass_num + 1): pass_start_time = time.time() pad_id = vocab.get_id(vocab.pad_token) if args.enable_ce: train_reader = lambda: brc_data.gen_mini_batches( 'train', args.batch_size, pad_id, shuffle=False) else: train_reader = lambda: brc_data.gen_mini_batches( 'train', args.batch_size, pad_id, shuffle=True) train_reader = read_multiple(train_reader, dev_count) log_every_n_batch, n_batch_loss = args.log_interval, 0 total_num, total_loss = 0, 0 for batch_id, batch_list in enumerate(train_reader(), 1): feed_data = batch_reader(batch_list, args) #ipdb.set_trace() fetch_outs = parallel_executor.run( feed=list(feeder.feed_parallel(feed_data, dev_count)), fetch_list=[obj_func.name], return_numpy=False) cost_train = np.array(fetch_outs[0]).mean() total_num += args.batch_size * dev_count n_batch_loss += cost_train total_loss += cost_train * args.batch_size * dev_count if args.enable_ce and batch_id >= 100: break if log_every_n_batch > 0 and batch_id % log_every_n_batch == 0: print_para(main_program, parallel_executor, logger, args) logger.info( 'Average loss from batch {} to {} is {}'.format( batch_id - log_every_n_batch + 1, batch_id, "%.10f" % (n_batch_loss / log_every_n_batch))) n_batch_loss = 0 if args.dev_interval > 0 and batch_id % args.dev_interval == 0: if brc_data.dev_set is not None: eval_loss, bleu_rouge = validation( inference_program, avg_cost, s_probs, e_probs, match, feed_order, place, dev_count, vocab, brc_data, logger, args) logger.info('Dev eval loss {}'.format(eval_loss)) logger.info( 'Dev eval result: {}'.format(bleu_rouge)) pass_end_time = time.time() time_consumed = pass_end_time - pass_start_time logger.info('epoch: {0}, epoch_time_cost: {1:.2f}'.format( pass_id, time_consumed)) logger.info( 'Evaluating the model after epoch {}'.format(pass_id)) if brc_data.dev_set is not None: eval_loss, bleu_rouge = validation(inference_program, avg_cost, s_probs, e_probs, match, feed_order, place, dev_count, vocab, brc_data, logger, args) logger.info('Dev eval loss {}'.format(eval_loss)) logger.info('Dev eval result: {}'.format(bleu_rouge)) else: logger.warning( 'No dev set is loaded for evaluation in the dataset!') logger.info('Average train loss for epoch {} is {}'.format( pass_id, "%.10f" % (1.0 * total_loss / total_num))) if pass_id % args.save_interval == 0: model_path = os.path.join(args.save_dir, str(pass_id)) if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_persistables(executor=exe, dirname=model_path, main_program=main_program) if args.enable_ce: # For CE print("kpis\ttrain_cost_card%d\t%f" % (dev_count, total_loss / total_num)) if brc_data.dev_set is not None: print("kpis\ttest_cost_card%d\t%f" % (dev_count, eval_loss)) print("kpis\ttrain_duration_card%d\t%f" % (dev_count, time_consumed))
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) #创建一个Executor实例exe exe.run(fluid.default_startup_program()) #Executor的run()方法执行startup_program(),进行参数初始化 # **(2)定义输入数据维度** # # DataFeeder负责将数据提供器(train_reader,test_reader)返回的数据转成一种特殊的数据结构,使其可以输入到Executor中。 # # feed_list设置向模型输入的向变量表或者变量表名 # In[9]: # 定义输入数据维度 feeder = fluid.DataFeeder(place=place, feed_list=[x, y])#feed_list:向模型输入的变量表或变量表名 # **(3)定义绘制训练过程的损失值变化趋势的方法draw_train_process** # In[10]: iter=0 iters=[] train_costs=[] def draw_train_process(iters,train_costs): title="training cost" plt.title(title, fontsize=24) plt.xlabel("iter", fontsize=14)
def create_network(self, is_infer=False): """Create data layers and model network. :param is_training: Whether to create a network for training. :type is_training: bool :return reader: Reader for input. :rtype reader: read generater :return log_probs: An output unnormalized log probability layer. :rtype lig_probs: Varable :return loss: A ctc loss layer. :rtype loss: Variable """ if not is_infer: input_fields = { 'names': ['audio_data', 'text_data', 'seq_len_data', 'masks'], 'shapes': [[None, 161, None], [None, 1], [None, 1], [None, 32, 81, None]], 'dtypes': ['float32', 'int32', 'int64', 'float32'], 'lod_levels': [0, 1, 0, 0] } inputs = [ fluid.data(name=input_fields['names'][i], shape=input_fields['shapes'][i], dtype=input_fields['dtypes'][i], lod_level=input_fields['lod_levels'][i]) for i in range(len(input_fields['names'])) ] reader = fluid.io.DataLoader.from_generator(feed_list=inputs, capacity=64, iterable=False, use_double_buffer=True) (audio_data, text_data, seq_len_data, masks) = inputs else: audio_data = fluid.data(name='audio_data', shape=[None, 161, None], dtype='float32', lod_level=0) seq_len_data = fluid.data(name='seq_len_data', shape=[None, 1], dtype='int64', lod_level=0) masks = fluid.data(name='masks', shape=[None, 32, 81, None], dtype='float32', lod_level=0) text_data = None reader = fluid.DataFeeder([audio_data, seq_len_data, masks], self._place) log_probs, loss = deep_speech_v2_network( audio_data=audio_data, text_data=text_data, seq_len_data=seq_len_data, masks=masks, dict_size=self._vocab_size, num_conv_layers=self._num_conv_layers, num_rnn_layers=self._num_rnn_layers, rnn_size=self._rnn_layer_size, use_gru=self._use_gru, share_rnn_weights=self._share_rnn_weights) return reader, log_probs, loss
def run_trainer(self, args): test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \ self.get_model(batch_size=args.batch_size) if args.mem_opt: fluid.memory_optimize(fluid.default_main_program(), skip_grads=True) if args.is_dist: t = self.get_transpiler(args.trainer_id, fluid.default_main_program(), args.endpoints, args.trainers, args.sync_mode, args.dc_asgd) trainer_prog = t.get_trainer_program() else: trainer_prog = fluid.default_main_program() if args.use_cuda: place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() startup_exe = fluid.Executor(place) startup_exe.run(fluid.default_startup_program()) strategy = fluid.ExecutionStrategy() strategy.num_threads = 1 strategy.allow_op_delay = False build_stra = fluid.BuildStrategy() if args.batch_merge_repeat > 1: pass_builder = build_stra._create_passes_from_strategy() mypass = pass_builder.insert_pass( len(pass_builder.all_passes()) - 2, "multi_batch_merge_pass") mypass.set_int("num_repeats", args.batch_merge_repeat) if args.use_reduce: build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce else: build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce exe = fluid.ParallelExecutor(args.use_cuda, loss_name=avg_cost.name, exec_strategy=strategy, build_strategy=build_stra) feed_var_list = [ var for var in trainer_prog.global_block().vars.values() if var.is_data ] feeder = fluid.DataFeeder(feed_var_list, place) reader_generator = train_reader() def get_data(): origin_batch = next(reader_generator) if args.is_dist and args.use_reader_alloc: new_batch = [] for offset, item in enumerate(origin_batch): if offset % 2 == args.trainer_id: new_batch.append(item) return new_batch else: return origin_batch out_losses = [] for _ in six.moves.xrange(RUN_STEP): loss, = exe.run(fetch_list=[avg_cost.name], feed=feeder.feed(get_data())) out_losses.append(loss[0]) if six.PY2: print(pickle.dumps(out_losses)) else: sys.stdout.buffer.write(pickle.dumps(out_losses))
def compress(args): # 1. quantization configs quant_config = { # weight quantize type, default is 'channel_wise_abs_max' 'weight_quantize_type': 'channel_wise_abs_max', # activation quantize type, default is 'moving_average_abs_max' 'activation_quantize_type': 'moving_average_abs_max', # weight quantize bit num, default is 8 'weight_bits': 8, # activation quantize bit num, default is 8 'activation_bits': 8, # ops of name_scope in not_quant_pattern list, will not be quantized 'not_quant_pattern': ['skip_quant'], # ops of type in quantize_op_types, will be quantized 'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'], # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8' 'dtype': 'int8', # window size for 'range_abs_max' quantization. defaulf is 10000 'window_size': 10000, # The decay coefficient of moving average, default is 0.9 'moving_rate': 0.9, } train_reader = None test_reader = None if args.data == "mnist": import paddle.dataset.mnist as reader train_reader = reader.train() val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') if args.use_pact: image.stop_gradient = False label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) train_prog = fluid.default_main_program() val_program = fluid.default_main_program().clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() opt = create_optimizer(args) opt.minimize(avg_cost) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # 2. quantization transform programs (training aware) # Make some quantization transforms in the graph before training and testing. # According to the weight and activation quantization type, the graph will be added # some fake quantize operators and fake dequantize operators. if args.use_pact: act_preprocess_func = pact optimizer_func = get_optimizer executor = exe else: act_preprocess_func = None optimizer_func = None executor = None val_program = quant_aware(val_program, place, quant_config, scope=None, act_preprocess_func=act_preprocess_func, optimizer_func=optimizer_func, executor=executor, for_test=True) compiled_train_prog = quant_aware(train_prog, place, quant_config, scope=None, act_preprocess_func=act_preprocess_func, optimizer_func=optimizer_func, executor=executor, for_test=False) assert os.path.exists( args.pretrained_model), "pretrained_model doesn't exist" if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.fluid.io.batch(val_reader, batch_size=args.batch_size) train_reader = paddle.fluid.io.batch(train_reader, batch_size=args.batch_size, drop_last=True) train_feeder = feeder = fluid.DataFeeder([image, label], place) val_feeder = feeder = fluid.DataFeeder([image, label], place, program=val_program) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) return np.mean(np.array(acc_top1_ns)) def train(epoch, compiled_train_prog): batch_id = 0 for data in train_reader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( compiled_train_prog, feed=train_feeder.feed(data), fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) if args.use_pact and batch_id % 1000 == 0: threshold = {} for var in val_program.list_vars(): if 'pact' in var.name: array = np.array(fluid.global_scope().find_var( var.name).get_tensor()) threshold[var.name] = array[0] print(threshold) batch_id += 1 build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = False build_strategy.fuse_all_reduce_ops = False build_strategy.sync_batch_norm = False exec_strategy = fluid.ExecutionStrategy() compiled_train_prog = compiled_train_prog.with_data_parallel( loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) # train loop best_acc1 = 0.0 best_epoch = 0 start_epoch = 0 if args.checkpoint_dir is not None: ckpt_path = args.checkpoint_dir assert args.checkpoint_epoch is not None, "checkpoint_epoch must be set" start_epoch = args.checkpoint_epoch fluid.io.load_persistables(exe, dirname=args.checkpoint_dir, main_program=val_program) start_step = start_epoch * int( math.ceil(float(args.total_images) / args.batch_size)) v = fluid.global_scope().find_var('@LR_DECAY_COUNTER@').get_tensor() v.set(np.array([start_step]).astype(np.float32), place) for i in range(start_epoch, args.num_epochs): train(i, compiled_train_prog) acc1 = test(i, val_program) fluid.io.save_persistables(exe, dirname=os.path.join( args.output_dir, str(i)), main_program=val_program) if acc1 > best_acc1: best_acc1 = acc1 best_epoch = i fluid.io.save_persistables(exe, dirname=os.path.join( args.output_dir, 'best_model'), main_program=val_program) if os.path.exists(os.path.join(args.output_dir, 'best_model')): fluid.io.load_persistables(exe, dirname=os.path.join( args.output_dir, 'best_model'), main_program=val_program) # 3. Freeze the graph after training by adjusting the quantize # operators' order for the inference. # The dtype of float_program's weights is float32, but in int8 range. float_program, int8_program = convert(val_program, place, quant_config, \ scope=None, \ save_int8=True) print("eval best_model after convert") final_acc1 = test(best_epoch, float_program) # 4. Save inference model model_path = os.path.join( quantization_model_save_dir, args.model, 'act_' + quant_config['activation_quantize_type'] + '_w_' + quant_config['weight_quantize_type']) float_path = os.path.join(model_path, 'float') int8_path = os.path.join(model_path, 'int8') if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_inference_model(dirname=float_path, feeded_var_names=[image.name], target_vars=[out], executor=exe, main_program=float_program, model_filename=float_path + '/model', params_filename=float_path + '/params') fluid.io.save_inference_model(dirname=int8_path, feeded_var_names=[image.name], target_vars=[out], executor=exe, main_program=int8_program, model_filename=int8_path + '/model', params_filename=int8_path + '/params')
def run_gpu_fleet_api_trainer(self, args): assert args.update_method == "nccl2" self.lr = args.lr exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = 1 dist_strategy = DistributedStrategy() dist_strategy.exec_strategy = exec_strategy dist_strategy.fuse_memory_size = 1 # MB dist_strategy.fuse_laryer_size = 1 if args.use_local_sgd: dist_strategy.use_local_sgd = True if args.ut4grad_allreduce: dist_strategy._ut4grad_allreduce = True if args.sync_batch_norm: dist_strategy.sync_batch_norm = True role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) print_to_err("gpu_fleet", "fleet.node_num:") # "fleet.node_id:", fleet.node_id(), # "fleet.trainer_num:", fleet.worker_num()) test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \ self.get_model(batch_size=args.batch_size, dist_strategy=dist_strategy) trainer_prog = fleet._origin_program dist_prog = fleet.main_program device_id = int(os.getenv("FLAGS_selected_gpus", "0")) place = fluid.CUDAPlace(device_id) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) eprint(type(self).__name__, "run worker startup program done.") feed_var_list = [ var for var in trainer_prog.global_block().vars.values() if var.is_data ] eprint("feed_var_list:", feed_var_list) # tmp add this code to pass python35 gcc8 CI # Fixme(gongweibao, wangxi), need fix fleet api program order if feed_var_list[0].name == 'label': feed_var_list = feed_var_list[::-1] feeder = fluid.DataFeeder(feed_var_list, place) reader_generator = train_reader() def get_data(): origin_batch = next(reader_generator) if args.update_method != "local" and args.use_reader_alloc: new_batch = [] for offset, item in enumerate(origin_batch): if offset % 2 == args.trainer_id: new_batch.append(item) return new_batch else: return origin_batch print_to_err(type(self).__name__, "begin to train on trainer") out_losses = [] for i in six.moves.xrange(RUN_STEP): loss, = exe.run(dist_prog, fetch_list=[avg_cost.name], feed=feeder.feed(get_data())) out_losses.append(loss[0]) print_to_err(type(self).__name__, "run step %d finished" % i) print_to_err(type(self).__name__, "trainer run finished") if six.PY2: print(pickle.dumps(out_losses)) else: sys.stdout.buffer.write(pickle.dumps(out_losses)) if args.save_model: model_save_dir = "/tmp" if fleet.worker_index() == 0: model_save_dir_fluid = os.path.join(model_save_dir, "fluid_persistables") model_save_dir_fleet = os.path.join(model_save_dir, "fleet_persistables") infer_save_dir_fluid = os.path.join(model_save_dir, "fluid_infer") infer_save_dir_fleet = os.path.join(model_save_dir, "fleet_infer") else: model_save_dir_fluid = os.path.join(model_save_dir, "fluid_persistables_2") model_save_dir_fleet = os.path.join(model_save_dir, "fleet_persistables_2") infer_save_dir_fluid = os.path.join(model_save_dir, "fluid_infer_2") infer_save_dir_fleet = os.path.join(model_save_dir, "fleet_infer_2") fluid.io.save_persistables(exe, model_save_dir_fluid, fleet._origin_program) fleet.save_persistables(executor=exe, dirname=model_save_dir_fleet) feeded_var_names = [var.name for var in feed_var_list] fluid.io.save_inference_model(infer_save_dir_fluid, feeded_var_names, [avg_cost], exe, fleet._origin_program) fleet.save_inference_model(exe, infer_save_dir_fleet, feeded_var_names, [avg_cost])
def run_trainer(self, args): self.lr = args.lr if args.nccl2_reduce_layer_local_run: test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \ self.get_model(batch_size=args.batch_size, single_device=True) elif args.use_dgc: test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \ self.get_model(batch_size=args.batch_size, use_dgc=args.use_dgc) else: test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \ self.get_model(batch_size=args.batch_size) if args.update_method == "pserver": print_to_err( type(self).__name__, "begin to run transpile on trainer with pserver mode") t = self.get_transpiler(trainer_id=args.trainer_id, main_program=fluid.default_main_program(), pserver_endpoints=args.endpoints, trainers=args.trainers, sync_mode=args.sync_mode, dc_asgd=args.dc_asgd, hogwild_mode=args.hogwild) trainer_prog = t.get_trainer_program() print_to_err( type(self).__name__, "get trainer program done with pserver mode.") elif args.update_method == "nccl2" or args.update_method == "nccl2_reduce_layer": # transpile for nccl2 config = fluid.DistributeTranspilerConfig() config.mode = "nccl2" config.nccl_comm_num = args.nccl_comm_num if args.use_hallreduce: config.use_hierarchical_allreduce = True config.hierarchical_allreduce_inter_nranks = args.hallreduce_inter_nranks print_to_err( type(self).__name__, "begin to run transpile on trainer with nccl2 mode") nccl2_t = fluid.DistributeTranspiler(config=config) nccl2_t.transpile(args.trainer_id, program=fluid.default_main_program(), startup_program=fluid.default_startup_program(), trainers=args.endpoints, current_endpoint=args.current_endpoint) print_to_err( type(self).__name__, "get trainer program done. with nccl2 mode") trainer_prog = fluid.default_main_program() else: print_to_err( type(self).__name__, "do nothing about main program, just use it") trainer_prog = fluid.default_main_program() print_to_err(type(self).__name__, "use main program done.") # FIXME(gongwb):wait pserver initialization. time.sleep(1) if args.use_cuda: device_id = int(os.getenv("FLAGS_selected_gpus", "0")) place = fluid.CUDAPlace(device_id) else: place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) print_to_err(type(self).__name__, "run worker startup program done.") exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = 1 build_stra = fluid.BuildStrategy() # FIXME force disable enable_inplace and memory_optimize build_stra.enable_inplace = False build_stra.memory_optimize = False if args.hogwild: build_stra.async_mode = True if args.enable_backward_deps: build_stra.enable_backward_optimizer_op_deps = True if args.use_reduce: build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce else: build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce pass_builder = None if args.batch_merge_repeat > 1: pass_builder = build_stra._finalize_strategy_and_create_passes() mypass = pass_builder.insert_pass(0, "multi_batch_merge_pass") mypass.set("num_repeats", args.batch_merge_repeat) if args.update_method == "nccl2" or args.update_method == "nccl2_reduce_layer": build_stra.num_trainers = len(args.endpoints.split(",")) build_stra.trainer_id = args.trainer_id else: # case args.update_method == "nccl2_reduce_layer": build_stra.num_trainers = 1 build_stra.trainer_id = 0 print_to_err( type(self).__name__, "begin to compile with data parallel") binary = compiler.CompiledProgram(trainer_prog).with_data_parallel( loss_name=avg_cost.name, build_strategy=build_stra, exec_strategy=exec_strategy) print_to_err( type(self).__name__, "program compiled with data parallel") feed_var_list = [ var for var in trainer_prog.global_block().vars.values() if var.is_data ] feeder = fluid.DataFeeder(feed_var_list, place) reader_generator = train_reader() def get_data(): origin_batch = next(reader_generator) if args.update_method != "local" and args.use_reader_alloc: new_batch = [] for offset, item in enumerate(origin_batch): if offset % 2 == args.trainer_id: new_batch.append(item) return new_batch else: return origin_batch print_to_err(type(self).__name__, "begin to train on trainer") out_losses = [] for i in six.moves.xrange(RUN_STEP): loss, = exe.run(binary, fetch_list=[avg_cost.name], feed=feeder.feed(get_data())) out_losses.append(loss[0]) print_to_err(type(self).__name__, "run step %d finished" % i) print_to_err(type(self).__name__, "trainer run finished") print_to_out(out_losses)
def train_one_user(arg_dict, trainer_config): show_metric = trainer_config["show_metric"] shuffle = trainer_config["shuffle"] max_training_steps = trainer_config["max_training_steps"] batch_size = trainer_config["batch_size"] # logging.info("training one user...") main_program = fluid.Program.parse_from_string( trainer_config["main_program_desc"]) startup_program = fluid.Program.parse_from_string( trainer_config["startup_program_desc"]) place = fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.global_scope() if (startup_program is None): logging.error("startup_program is None") exit() exe.run(startup_program) feeder = fluid.DataFeeder(feed_list=trainer_config["input_names"], place=place, program=main_program) data_server_endpoints = arg_dict["data_endpoints"] # create data clients data_client = DataClient() data_client.set_data_server_endpoints(data_server_endpoints) uid = arg_dict["uid"] date = arg_dict["date"] global_param_dict = arg_dict["global_params"] user_data = data_client.get_data_by_uid(uid, date) train_reader = reader.train_reader(user_data) if shuffle == True: train_reader = paddle.reader.shuffle(train_reader, buf_size=10000) train_reader = paddle.batch(train_reader, batch_size=batch_size) # get user param # logging.debug("do not need to get user params") set_global_param_dict(arg_dict["global_param_names"], arg_dict["global_params"], scope) if (main_program is None): logging.error("main_program is None") exit() epoch = trainer_config["epoch"] max_steps_in_epoch = trainer_config.get("max_steps_in_epoch", -1) metrics = trainer_config["metrics"] fetch_list = [] for var in trainer_config["target_names"]: fetch_list.append(var) for ei in range(epoch): fetch_res_list = [] trained_sample_num = 0 step = 0 num_layers = trainer_config["num_layers"] hidden_size = trainer_config["n_hidden"] tot_loss, tot_correct = 0, 0 tot_samples = 0 init_hidden, init_cell = generate_init_data(batch_size, num_layers, hidden_size) for data in train_reader(): feed_data, input_lengths = prepare_input(batch_size, data, init_hidden, init_cell) fetch_res = exe.run(main_program, feed=feeder.feed(feed_data), fetch_list=fetch_list) loss, last_hidden, last_cell, correct = fetch_res init_hidden = np.array(last_hidden) init_cell = np.array(last_cell) tot_loss += np.array(loss) tot_correct += np.array(correct) tot_samples += np.sum(input_lengths) step += 1 trained_sample_num += len(data) fetch_res_list.append([np.array(loss), np.array(correct)]) if max_steps_in_epoch != -1 and step >= max_steps_in_epoch: break if show_metric and trained_sample_num > 0: loss = tot_loss / step acc = float(tot_correct) / tot_samples print("loss: {}, acc: {}".format(loss, acc)) local_updated_param_dict = {} # update user param # logging.debug("do not need to update user params") data_client.set_param_by_uid(uid, local_updated_param_dict) # global_updated_param_dict = {} write_global_param_file = arg_dict["write_global_param_file"] #os.makedirs("%s/params" % write_global_param_file) for var_name in arg_dict["global_param_names"]: var = scope.var(var_name).get_tensor().__array__().astype(np.float32) filename = os.path.join(write_global_param_file, "params", var_name) #logging.info("filename: {}".format(filename)) dirname = os.path.dirname(filename) if not os.path.exists(dirname): os.makedirs(dirname) with open(filename, "w") as f: np.save(f, var) with open("%s/_info" % write_global_param_file, "w") as f: pickle.dump([uid, trained_sample_num], file=f)
def infer_one_user(arg_dict, trainer_config): """ infer a model with global_param and user params input: global_param user_params infer_program user_data output: [sample_cout, top1] """ # run startup program, set params uid = arg_dict["uid"] batch_size = trainer_config["batch_size"] startup_program = fluid.Program.parse_from_string( trainer_config["startup_program_desc"]) infer_program = fluid.Program.parse_from_string( trainer_config["infer_program_desc"]) place = fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.global_scope() if (startup_program is None): logging.error("startup_program is None") exit() if (infer_program is None): logging.error("infer_program is None") exit() exe.run(startup_program) data_client = DataClient() data_client.set_data_server_endpoints(arg_dict["data_endpoints"]) # get user param # logging.debug("do not need to get user params") set_global_param_dict(arg_dict["global_param_names"], arg_dict["global_params"], scope) # reader date = arg_dict["date"] global_param_dict = arg_dict["global_params"] user_data = data_client.get_data_by_uid(uid, date) infer_reader = reader.infer_reader(user_data) infer_reader = paddle.batch(infer_reader, batch_size=batch_size) # run infer program os.mkdir(arg_dict["infer_result_dir"]) #pred_file = open(arg_dict["infer_result_dir"] + '/' + "pred_file", "w") feeder = fluid.DataFeeder(feed_list=trainer_config["input_names"], place=place, program=infer_program) fetch_list = trainer_config["target_names"] #logging.info("fetch_list: {}".format(fetch_list)) fetch_res = [] sample_count = 0 num_layers = trainer_config["num_layers"] hidden_size = trainer_config["n_hidden"] tot_correct, tot_loss = 0, 0 tot_samples, tot_batches = 0, 0 init_hidden, init_cell = generate_init_data(batch_size, num_layers, hidden_size) for data in infer_reader(): feed_data, input_lengths = prepare_input(batch_size, data, init_hidden, init_cell) fetch_res = exe.run(infer_program, feed=feeder.feed(feed_data), fetch_list=fetch_list) loss, last_hidden, last_cell, correct = fetch_res cost_eval = np.array(loss) init_hidden = np.array(last_hidden) init_cell = np.array(last_cell) correct_val = np.array(correct) tot_loss += cost_eval tot_correct += correct_val tot_samples += np.sum(input_lengths) tot_batches += 1 loss = tot_loss / tot_batches acc = float(tot_correct) / tot_samples logging.info("infer acc: {}".format(acc)) with open(arg_dict["infer_result_dir"] + "/res", "w") as f: f.write("%d\t%f\n" % (1, acc))
def check_network_convergence(self, is_sparse, build_strategy=None): main = fluid.Program() startup = fluid.Program() with fluid.program_guard(main, startup): word = fluid.layers.data(name='word_data', shape=[1], dtype='int64', lod_level=1) predicate = fluid.layers.data(name='verb_data', shape=[1], dtype='int64', lod_level=1) ctx_n2 = fluid.layers.data(name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1) ctx_n1 = fluid.layers.data(name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1) ctx_0 = fluid.layers.data(name='ctx_0_data', shape=[1], dtype='int64', lod_level=1) ctx_p1 = fluid.layers.data(name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1) ctx_p2 = fluid.layers.data(name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1) mark = fluid.layers.data(name='mark_data', shape=[1], dtype='int64', lod_level=1) feature_out = db_lstm(**locals()) target = fluid.layers.data(name='target', shape=[1], dtype='int64', lod_level=1) crf_cost = fluid.layers.linear_chain_crf( input=feature_out, label=target, param_attr=fluid.ParamAttr(name='crfw', learning_rate=1e-1)) avg_cost = fluid.layers.mean(crf_cost) sgd_optimizer = fluid.optimizer.SGD( learning_rate=fluid.layers.exponential_decay( learning_rate=0.01, decay_steps=100000, decay_rate=0.5, staircase=True)) sgd_optimizer.minimize(avg_cost) train_data = paddle.batch(paddle.reader.shuffle( paddle.dataset.conll05.test(), buf_size=8192), batch_size=16) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup) pe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name, build_strategy=build_strategy) feeder = fluid.DataFeeder(feed_list=[ word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, mark, target ], place=fluid.CPUPlace()) data = train_data() for i in xrange(10): cur_batch = next(data) print map( np.array, pe.run(feed=feeder.feed(cur_batch), fetch_list=[avg_cost.name]))[0]
# create loss learning_rate = fluid.layers.piecewise_decay(BOUNDARIES, LR_STEPS) # case1, Tensor optimizer = fluid.optimizer.Adam(learning_rate=learning_rate, regularization=fluid.regularizer.L2Decay( regularization_coeff=REGULARIZATION_COEFF)) optimizer.minimize(loss) # feed data train_reader = reader(DATA_CSV, is_none_pre=NONE_PRE, train_rate=TRAIN_DATA_RATE) val_reader = reader(DATA_CSV, is_none_pre=NONE_PRE, is_val=True, train_rate=TRAIN_DATA_RATE) train_reader = fluid.io.batch(fluid.io.shuffle(train_reader, buf_size=1024), batch_size=BATCH_SIZE) val_reader = fluid.io.batch(val_reader, batch_size=BATCH_SIZE) feed_list = ["ori_input_ids", "ori_position_ids", "ori_segment_ids", "ori_input_mask", "input_ids", "position_ids", "segment_ids", "input_mask", "scores"] train_feeder = fluid.DataFeeder(feed_list=feed_list, place=place, program=train_program) val_feeder = fluid.DataFeeder(feed_list=feed_list, place=place, program=train_program) # define train def controller_process(program, data_reader, feeder): global FIRST_FLAG, DATA_NUM infos = {"loss": [], "out": [], "label": []} for i, data in enumerate(data_reader()): info = controller.run(program=program, feed=feeder.feed(data), fetch_list=[loss, net, scores_label]) try:
def main(dict_path): word_dict = load_vocab(dict_path) word_dict["<unk>"] = len(word_dict) dict_dim = len(word_dict) print("The dictionary size is : %d" % dict_dim) data, label, prediction, avg_cost = conv_net(dict_dim) sgd_optimizer = fluid.optimizer.SGD(learning_rate=conf.learning_rate) optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost) batch_size_var = fluid.layers.create_tensor(dtype='int64') batch_acc_var = fluid.layers.accuracy(input=prediction, label=label, total=batch_size_var) inference_program = fluid.default_main_program().clone() with fluid.program_guard(inference_program): inference_program = fluid.io.get_inference_program( target_vars=[batch_acc_var, batch_size_var]) # The training data set. train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.imdb.train(word_dict), buf_size=51200), batch_size=conf.batch_size) # The testing data set. test_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.imdb.test(word_dict), buf_size=51200), batch_size=conf.batch_size) if conf.use_gpu: place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[data, label], place=place) # exe.run(fluid.default_startup_program()) train_pass_acc_evaluator = fluid.average.WeightedAverage() test_pass_acc_evaluator = fluid.average.WeightedAverage() def test(exe): test_pass_acc_evaluator.reset() for batch_id, data in enumerate(test_reader()): input_seq = to_lodtensor(map(lambda x: x[0], data), place) y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = y_data.reshape([-1, 1]) b_acc, b_size = exe.run(inference_program, feed={ "words": input_seq, "label": y_data }, fetch_list=[batch_acc_var, batch_size_var]) test_pass_acc_evaluator.add(value=b_acc, weight=b_size) test_acc = test_pass_acc_evaluator.eval() return test_acc def train_loop(exe, train_program, trainer_id): total_time = 0. for pass_id in xrange(conf.num_passes): train_pass_acc_evaluator.reset() start_time = time.time() total_samples = 0 #with profiler.profiler("CPU", 'total', profile_path='./profile_res_%d' % trainer_id) as prof: for batch_id, data in enumerate(train_reader()): batch_start = time.time() cost_val, acc_val, size_val = exe.run( train_program, feed=feeder.feed(data), fetch_list=[avg_cost, batch_acc_var, batch_size_var]) train_pass_acc_evaluator.add(value=acc_val, weight=size_val) total_samples += float(size_val) if batch_id and batch_id % conf.log_period == 0: print( "Pass id: %d, batch id: %d, cost: %f, pass_acc: %f, speed: %f, time: %f" % (pass_id, batch_id, cost_val, train_pass_acc_evaluator.eval(), float(size_val) / (time.time() - batch_start), time.time() - batch_start)) end_time = time.time() total_time += (end_time - start_time) pass_test_acc = test(exe) print("Pass id: %d, test_acc: %f, speed: %f" % (pass_id, pass_test_acc, total_samples / (end_time - start_time))) print("Total train time: %f" % (total_time)) if args.local: print("run as local mode") exe.run(fluid.default_startup_program()) train_loop(exe, fluid.default_main_program(), 0) else: pserver_ips = os.getenv( "PADDLE_INIT_PSERVERS") # all pserver endpoints eplist = [] port = os.getenv("PADDLE_INIT_PORT") for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) print("pserver endpoints: ", pserver_endpoints) trainers = int(os.getenv("TRAINERS")) # total trainer count print("trainers total: ", trainers) trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID", "0")) current_endpoint = os.getenv( "POD_IP") + ":" + port # current pserver endpoint training_role = os.getenv( "TRAINING_ROLE", "TRAINER") # get the training role: trainer/pserver t = fluid.DistributeTranspiler() t.transpile(optimize_ops, params_grads, trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": if not current_endpoint: print("need env SERVER_ENDPOINT") exit(1) pserver_prog = t.get_pserver_program(current_endpoint) with open("/tmp/pserver_prog", "w") as f: f.write(pserver_prog.__str__()) print("######## pserver prog in /tmp/pserver_prog #############") pserver_startup = t.get_startup_program(current_endpoint, pserver_prog) print("starting server side startup") exe.run(pserver_startup) print("starting parameter server...") exe.run(pserver_prog) elif training_role == "TRAINER": trainer_prog = t.get_trainer_program() with open("/tmp/trainer_prog", "w") as f: f.write(trainer_prog.__str__()) print("######## trainer prog in /tmp/trainer_prog #############") # TODO(typhoonzero): change trainer startup program to fetch parameters from pserver exe.run(fluid.default_startup_program()) train_loop(exe, trainer_prog, trainer_id) else: print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
crf_cost = fluid.layers.linear_chain_crf( input=score, label=tags, param_attr=fluid.ParamAttr(name="crfw") ) avg_cost = fluid.layers.mean(crf_cost) crf_decode = fluid.layers.crf_decoding( input=score, param_attr=fluid.ParamAttr(name="crfw") ) sgd_optimizer = fluid.optimizer.AdamOptimizer(learning_rate=0.01) sgd_optimizer.minimize(avg_cost) feeder = fluid.DataFeeder(place=place, feed_list=[words, tags]) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) save_dirname = "test.inference.model" main_program = fluid.default_main_program() PASS_NUM = 20 for pass_id in range(PASS_NUM): print(">>> pass_id: {}".format(pass_id)) for data in train_reader(): feed = feeder.feed(data) avg_loss_value, = exe.run( main_program, feed=feed, fetch_list=[avg_cost], return_numpy=True
def eval(args): # parameters from arguments class_dim = args.class_dim model_name = args.model pretrained_model = args.pretrained_model image_shape = [int(m) for m in args.image_shape.split(",")] assert model_name in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') # model definition model = models.__dict__[model_name]() if model_name is "GoogleNet": out, _, _ = model.net(input=image, class_dim=class_dim) else: out = model.net(input=image, class_dim=class_dim) test_program = fluid.default_main_program().clone(for_test=True) fetch_list = [out.name] place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) test_batch_size = args.batch_size img_size = image_shape[1] test_reader = paddle.batch(reader.test(args, img_size), batch_size=test_batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image]) targets = [] with open(args.img_list, 'r') as f: for line in f.readlines(): targets.append(line.strip().split()[-1]) targets = np.array(targets, dtype=np.int) preds = [] TOPK = 5 for batch_id, data in enumerate(test_reader()): all_result = exe.run(test_program, fetch_list=fetch_list, feed=feeder.feed(data)) pred_label = np.argsort(-all_result[0], 1)[:, :5] print("Test-{0}".format(batch_id)) preds.append(pred_label) preds = np.vstack(preds) top1, top5 = accuracy(targets, preds) print("top1:{:.4f} top5:{:.4f}".format(top1, top5))
def main(args): task_name = args.task_name.lower() processor = reader.MatchProcessor(data_dir=args.data_dir, task_name=task_name, vocab_path=args.vocab_path, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case) args.voc_size = len(open(args.vocab_path, 'r').readlines()) num_labels = len(processor.get_labels()) train_data_generator = processor.data_generator(batch_size=args.batch_size, phase='train', epoch=args.epoch, shuffle=True) num_train_examples = processor.get_num_examples(phase='train') dev_data_generator = processor.data_generator(batch_size=args.batch_size, phase='dev', epoch=1, shuffle=False) num_dev_examples = processor.get_num_examples(phase='dev') if args.use_cuda: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) max_train_steps = args.epoch * num_train_examples // args.batch_size warmup_steps = int(max_train_steps * args.warmup_proportion) train_program = fluid.Program() train_startup = fluid.Program() with fluid.program_guard(train_program, train_startup): with fluid.unique_name.guard(): feed_order, loss, predict, accuracy, num_seqs, labels, softmax, prob, indexs1,logits = \ create_model(args, num_labels, \ is_prediction=False) lr_decay = fluid.layers.learning_rate_scheduler.noam_decay( 256, warmup_steps) with fluid.default_main_program()._lr_schedule_guard(): learning_rate = lr_decay * args.learning_rate optimizer = fluid.optimizer.Adam(learning_rate=learning_rate) optimizer.minimize(loss) test_program = fluid.Program() test_startup = fluid.Program() with fluid.program_guard(test_program, test_startup): with fluid.unique_name.guard(): feed_order, loss, predict, accuracy, num_seqs, labels, softmax, prob, indexs1,logits = \ create_model(args, num_labels, \ is_prediction=True) test_program = test_program.clone(for_test=True) exe = Executor(place) exe.run(train_startup) exe.run(test_startup) exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = dev_count train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, loss_name=loss.name, exec_strategy=exec_strategy, main_program=train_program) test_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, main_program=test_program, share_vars_from=train_exe) feed_list = [ train_program.global_block().var(var_name) for var_name in feed_order ] feeder = fluid.DataFeeder(feed_list, place) time_begin = time.time() total_cost, total_acc, total_num_seqs = [], [], [] tmp11 = [] tmp22 = [] for batch_id, data in enumerate(train_data_generator()): fetch_outs = train_exe.run(feed=feeder.feed(data), fetch_list=[ loss.name, accuracy.name, num_seqs.name, predict.name, labels.name, softmax.name, logits.name ]) avg_loss = fetch_outs[0] avg_acc = fetch_outs[1] cur_num_seqs = fetch_outs[2] total_cost.extend(avg_loss * cur_num_seqs) total_acc.extend(avg_acc * cur_num_seqs) total_num_seqs.extend(cur_num_seqs) results1 = fetch_outs[3] act1 = fetch_outs[4] for index in range(len(results1)): if results1[index][0] > results1[index][1]: tmp11.append(0) else: tmp11.append(1) tmp22.append(act1[index]) if batch_id % args.skip_steps == 0: print(fetch_outs[5][0:3]) #print(fetch_outs[6]) print(fetch_outs[6][0:3]) print(classification_report(tmp22, tmp11)) tmp11 = [] tmp22 = [] time_end = time.time() used_time = time_end - time_begin current_example, current_epoch = processor.get_train_progress() print("epoch: %d, progress: %d/%d, step: %d, ave loss: %f, " "ave acc: %f, speed: %f steps/s" % (current_epoch, current_example, num_train_examples, batch_id, np.sum(total_cost) / np.sum(total_num_seqs), np.sum(total_acc) / np.sum(total_num_seqs), args.skip_steps / used_time)) time_begin = time.time() total_cost, total_acc, total_num_seqs = [], [], [] if batch_id % args.validation_steps == 0: total_dev_cost, total_dev_acc, total_dev_num_seqs = [], [], [] tmp1 = [] tmp2 = [] for dev_id, dev_data in enumerate(dev_data_generator()): fetch_outs = test_exe.run(feed=feeder.feed(dev_data), fetch_list=[ loss.name, accuracy.name, num_seqs.name, predict.name, labels.name ]) avg_dev_loss = fetch_outs[0] avg_dev_acc = fetch_outs[1] cur_dev_num_seqs = fetch_outs[2] results = fetch_outs[3] act = fetch_outs[4] for index in range(len(results)): if results[index][0] > results[index][1]: tmp1.append(0) else: tmp1.append(1) tmp2.append(act[index]) total_dev_cost.extend(avg_dev_loss * cur_dev_num_seqs) total_dev_acc.extend(avg_dev_acc * cur_dev_num_seqs) total_dev_num_seqs.extend(cur_dev_num_seqs) print(classification_report(tmp2, tmp1)) print("valid eval: ave loss: %f, ave acc: %f" % (np.sum(total_dev_cost) / np.sum(total_dev_num_seqs), np.sum(total_dev_acc) / np.sum(total_dev_num_seqs))) total_dev_cost, total_dev_acc, total_dev_num_seqs = [], [], [] # if batch_id % args.save_steps == 0: model_path = os.path.join(args.checkpoints, str(batch_id)) if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_persistables(executor=exe, dirname=model_path, main_program=train_program) # predict print("=================for predict===================") infer_data_generator = processor.data_generator(batch_size=args.batch_size, phase='test', epoch=1, shuffle=False) for batch_id, data in enumerate(infer_data_generator()): results = test_exe.run(fetch_list=[predict.name], feed=feeder.feed(data), return_numpy=True) for elem in results[0]: print(elem[1]) print("=================for dev===================") infer_data_generator2 = processor.data_generator( batch_size=args.batch_size, phase='dev', epoch=1, shuffle=False) for batch_id, data in enumerate(infer_data_generator2()): results = test_exe.run(fetch_list=[predict.name], feed=feeder.feed(data), return_numpy=True) for elem in results[0]: print(elem[1])
def train(): update_lr(cfg) learning_rate = cfg.learning_rate image_shape = [3, cfg.TRAIN.max_size, cfg.TRAIN.max_size] if cfg.enable_ce: fluid.default_startup_program().random_seed = 1000 fluid.default_main_program().random_seed = 1000 import random random.seed(0) np.random.seed(0) devices_num = get_device_num() total_batch_size = devices_num * cfg.TRAIN.im_per_batch use_random = True if cfg.enable_ce: use_random = False model = model_builder.RCNN( add_conv_body_func=resnet.add_ResNet50_conv4_body, add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head, use_pyreader=cfg.use_pyreader, use_random=use_random) model.build_model(image_shape) losses, keys = model.loss() loss = losses[0] fetch_list = losses boundaries = cfg.lr_steps gamma = cfg.lr_gamma step_num = len(cfg.lr_steps) values = [learning_rate * (gamma**i) for i in range(step_num + 1)] lr = exponential_with_warmup_decay( learning_rate=learning_rate, boundaries=boundaries, values=values, warmup_iter=cfg.warm_up_iter, warmup_factor=cfg.warm_up_factor) optimizer = fluid.optimizer.Momentum( learning_rate=lr, regularization=fluid.regularizer.L2Decay(cfg.weight_decay), momentum=cfg.momentum) optimizer.minimize(loss) fetch_list = fetch_list + [lr] for var in fetch_list: var.persistable = True #fluid.memory_optimize(fluid.default_main_program(), skip_opt_set=set(fetch_list)) gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if cfg.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist) if cfg.parallel: build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = False if cfg.use_gpu: dist_utils.prepare_for_multi_process( exe, build_strategy, fluid.default_main_program(), fluid.default_startup_program()) exec_strategy = fluid.ExecutionStrategy() exec_strategy.use_experimental_executor = True exec_strategy.num_iteration_per_drop_scope = 10 train_exe = fluid.ParallelExecutor(use_cuda=bool(cfg.use_gpu), loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) else: train_exe = exe shuffle = True if cfg.enable_ce: shuffle = False if cfg.use_pyreader: train_reader = reader.train( batch_size=cfg.TRAIN.im_per_batch, total_batch_size=total_batch_size, padding_total=cfg.TRAIN.padding_minibatch, shuffle=shuffle) py_reader = model.py_reader py_reader.decorate_paddle_reader(train_reader) else: train_reader = reader.train( batch_size=total_batch_size, shuffle=shuffle) feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) def save_model(postfix): model_path = os.path.join(cfg.model_save_dir, postfix) if os.path.isdir(model_path): shutil.rmtree(model_path) fluid.io.save_persistables(exe, model_path) def train_loop_pyreader(): py_reader.start() train_stats = TrainingStats(cfg.log_window, keys) try: start_time = time.time() prev_start_time = start_time for iter_id in range(cfg.max_iter): prev_start_time = start_time start_time = time.time() outs = train_exe.run(fetch_list=[v.name for v in fetch_list]) stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])} train_stats.update(stats) logs = train_stats.log() strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format( now_time(), iter_id, np.mean(outs[-1]), logs, start_time - prev_start_time) print(strs) sys.stdout.flush() if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0: save_model("model_iter{}".format(iter_id)) end_time = time.time() total_time = end_time - start_time last_loss = np.array(outs[0]).mean() if cfg.enable_ce: gpu_num = devices_num epoch_idx = iter_id + 1 loss = last_loss print("kpis\teach_pass_duration_card%s\t%s" % (gpu_num, total_time / epoch_idx)) print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, loss)) except (StopIteration, fluid.core.EOFException): py_reader.reset() def train_loop(): start_time = time.time() prev_start_time = start_time start = start_time train_stats = TrainingStats(cfg.log_window, keys) for iter_id, data in enumerate(train_reader()): prev_start_time = start_time start_time = time.time() outs = train_exe.run(fetch_list=[v.name for v in fetch_list], feed=feeder.feed(data)) stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])} train_stats.update(stats) logs = train_stats.log() strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format( now_time(), iter_id, np.mean(outs[-1]), logs, start_time - prev_start_time) print(strs) sys.stdout.flush() if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0: save_model("model_iter{}".format(iter_id)) if (iter_id + 1) == cfg.max_iter: break end_time = time.time() total_time = end_time - start_time last_loss = np.array(outs[0]).mean() # only for ce if cfg.enable_ce: gpu_num = devices_num epoch_idx = iter_id + 1 loss = last_loss print("kpis\teach_pass_duration_card%s\t%s" % (gpu_num, total_time / epoch_idx)) print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, loss)) return np.mean(every_pass_loss) if cfg.use_pyreader: train_loop_pyreader() else: train_loop() save_model('model_final')
trainer_id = int(sys.argv[1]) # trainer id for each guest job_path = "fl_job_config" job = FLRunTimeJob() job.load_trainer_job(job_path, trainer_id) job._scheduler_ep = "127.0.0.1:9091" # Inform the scheduler IP to trainer print(job._target_names) trainer = FLTrainerFactory().create_fl_trainer(job) trainer._current_ep = "127.0.0.1:{}".format(9000 + trainer_id) place = fluid.CPUPlace() trainer.start(place) print(trainer._step) test_program = trainer._main_program.clone(for_test=True) img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') feeder = fluid.DataFeeder(feed_list=[img, label], place=fluid.CPUPlace()) def train_test(train_test_program, train_test_feed, train_test_reader): acc_set = [] for test_data in train_test_reader(): acc_np = trainer.exe.run(program=train_test_program, feed=train_test_feed.feed(test_data), fetch_list=["accuracy_0.tmp_0"]) acc_set.append(float(acc_np[0])) acc_val_mean = numpy.array(acc_set).mean() return acc_val_mean epoch_id = 0 step = 0
def eval(args): train_reader = None test_reader = None if args.data == "mnist": import paddle.dataset.mnist as reader train_reader = reader.train() val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) val_reader = paddle.fluid.io.batch(val_reader, batch_size=args.batch_size) val_feeder = feeder = fluid.DataFeeder( [image, label], place, program=val_program) load_model(exe, val_program, args.model_path) batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( val_program, feed=val_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval batch[{}] - acc_top1: {}; acc_top5: {}; time: {}".format( batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info("Final eval - acc_top1: {}; acc_top5: {}".format( np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns))))
def validation(inference_program, avg_cost, s_probs, e_probs, match, feed_order, place, dev_count, vocab, brc_data, logger, args): """ """ build_strategy = fluid.BuildStrategy() build_strategy.enable_inplace = False build_strategy.memory_optimize = False parallel_executor = fluid.ParallelExecutor(main_program=inference_program, use_cuda=bool(args.use_gpu), loss_name=avg_cost.name, build_strategy=build_strategy) print_para(inference_program, parallel_executor, logger, args) # Use test set as validation each pass total_loss = 0.0 count = 0 n_batch_cnt = 0 n_batch_loss = 0.0 pred_answers, ref_answers = [], [] val_feed_list = [ inference_program.global_block().var(var_name) for var_name in feed_order ] val_feeder = fluid.DataFeeder(val_feed_list, place) pad_id = vocab.get_id(vocab.pad_token) dev_reader = lambda: brc_data.gen_mini_batches( 'dev', args.batch_size, pad_id, shuffle=False) dev_reader = read_multiple(dev_reader, dev_count) for batch_id, batch_list in enumerate(dev_reader(), 1): feed_data = batch_reader(batch_list, args) val_fetch_outs = parallel_executor.run( feed=list(val_feeder.feed_parallel(feed_data, dev_count)), fetch_list=[avg_cost.name, s_probs.name, e_probs.name, match.name], return_numpy=False) total_loss += np.array(val_fetch_outs[0]).sum() start_probs_m = LodTensor_Array(val_fetch_outs[1]) end_probs_m = LodTensor_Array(val_fetch_outs[2]) match_lod = val_fetch_outs[3].lod() count += len(np.array(val_fetch_outs[0])) n_batch_cnt += len(np.array(val_fetch_outs[0])) n_batch_loss += np.array(val_fetch_outs[0]).sum() log_every_n_batch = args.log_interval if log_every_n_batch > 0 and batch_id % log_every_n_batch == 0: logger.info('Average dev loss from batch {} to {} is {}'.format( batch_id - log_every_n_batch + 1, batch_id, "%.10f" % (n_batch_loss / n_batch_cnt))) n_batch_loss = 0.0 n_batch_cnt = 0 batch_offset = 0 for idx, batch in enumerate(batch_list): #one batch batch_size = len(batch['raw_data']) batch_range = match_lod[0][batch_offset:batch_offset + batch_size + 1] batch_lod = [[batch_range[x], batch_range[x + 1]] for x in range(len(batch_range[:-1]))] start_prob_batch = start_probs_m[batch_offset:batch_offset + batch_size + 1] end_prob_batch = end_probs_m[batch_offset:batch_offset + batch_size + 1] for sample, start_prob_inst, end_prob_inst, inst_range in zip( batch['raw_data'], start_prob_batch, end_prob_batch, batch_lod): #one instance inst_lod = match_lod[1][inst_range[0]:inst_range[1] + 1] best_answer, best_span = find_best_answer_for_inst( sample, start_prob_inst, end_prob_inst, inst_lod) pred = { 'question_id': sample['question_id'], 'question_type': sample['question_type'], 'answers': [best_answer], 'entity_answers': [[]], 'yesno_answers': [best_span] } pred_answers.append(pred) if 'answers' in sample: ref = { 'question_id': sample['question_id'], 'question_type': sample['question_type'], 'answers': sample['answers'], 'entity_answers': [[]], 'yesno_answers': [] } ref_answers.append(ref) batch_offset = batch_offset + batch_size result_dir = args.result_dir result_prefix = args.result_name if result_dir is not None and result_prefix is not None: if not os.path.exists(args.result_dir): os.makedirs(args.result_dir) result_file = os.path.join(result_dir, result_prefix + 'json') with open(result_file, 'w') as fout: for pred_answer in pred_answers: fout.write(json.dumps(pred_answer, ensure_ascii=False) + '\n') logger.info('Saving {} results to {}'.format(result_prefix, result_file)) ave_loss = 1.0 * total_loss / count # compute the bleu and rouge scores if reference answers is provided if len(ref_answers) > 0: pred_dict, ref_dict = {}, {} for pred, ref in zip(pred_answers, ref_answers): question_id = ref['question_id'] if len(ref['answers']) > 0: pred_dict[question_id] = normalize(pred['answers']) ref_dict[question_id] = normalize(ref['answers']) bleu_rouge = compute_bleu_rouge(pred_dict, ref_dict) else: bleu_rouge = None return ave_loss, bleu_rouge
def predict_infer(conf_dict, data_reader, predict_data_path, predict_result_path, model_path): """ Predict with trained models """ if len(predict_result_path) > 0: result_writer = open(predict_result_path, 'w') else: result_writer = sys.stdout np.set_printoptions(precision=3) if len(model_path) == 0: return place = fluid.CPUPlace() word = fluid.layers.data(name='word_data', shape=[1], dtype='int64', lod_level=1) postag = fluid.layers.data(name='token_pos', shape=[1], dtype='int64', lod_level=1) p_word = fluid.layers.data(name='p_word', shape=[1], dtype='int64', lod_level=1) feeder = fluid.DataFeeder(feed_list=[word, postag, p_word], place=place) exe = fluid.Executor(place) test_batch_reader = paddle.batch(paddle.reader.buffered( data_reader.get_predict_reader(predict_data_path, need_input=True, need_label=False), size=8192), batch_size=conf_dict['batch_size']) inference_scope = fluid.core.Scope() text_spo_dic = {} # final triples with fluid.scope_guard(inference_scope): [inference_program, feed_target_names, fetch_targets] = \ fluid.io.load_inference_model( model_path, exe, params_filename='params') # batch batch_id = 0 for data in test_batch_reader(): feeder_data = [] input_data = [] for item in data: feeder_data.append(item[1:]) input_data.append(item[0]) results = exe.run(inference_program, feed=feeder.feed(feeder_data), fetch_list=fetch_targets, return_numpy=False) tag_split_idx = results[0].lod()[0] label_tag_scores = np.array(results[0]) # sentence #print('batch_id=', batch_id) for sent_idx, tag_idx in enumerate(tag_split_idx[:-1]): input_sent = input_data[sent_idx].split('\t')[0] input_p = input_data[sent_idx].split('\t')[1] tag_scores = label_tag_scores[tag_idx:tag_split_idx[sent_idx + 1]] # token tag_list = [] for token_idx, token_tags in enumerate(tag_scores): tag = data_reader.get_label_output(token_tags) tag_list.append(tag) predicted_s_list, predicted_o_list = refine_predict_seq( input_sent, tag_list) tag_list_str = json.dumps(tag_list, ensure_ascii=False) if len(predicted_s_list) == 0 or len(predicted_o_list) == 0: continue else: text = json.loads(input_sent)["text"] predicted_s_list = list(set(predicted_s_list)) predicted_o_list = list(set(predicted_o_list)) for predicted_s in predicted_s_list: for predicted_o in predicted_o_list: if text not in text_spo_dic: text_spo_dic[text] = set() text_spo_dic[text].add( (predicted_s, input_p, predicted_o)) batch_id += 1 output(text_spo_dic, result_writer)
regularization=l2) opts = optimizer.minimize(avg_cost) # 获取CIFAR数据 train_reader = paddle.batch(cifar.train10(), batch_size=32) test_reader = paddle.batch(cifar.test10(), batch_size=32) # 定义一个使用GPU的执行器 place = fluid.CUDAPlace(0) # place = fluid.CPUPlace() exe = fluid.Executor(place) # 进行参数初始化 exe.run(fluid.default_startup_program()) # 定义输入数据维度 feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) # 定义日志的开始位置和获取参数名称 train_step = 0 test_step = 0 params_name = fluid.default_startup_program().global_block().all_parameters( )[0].name # 训练10次 for pass_id in range(10): # 进行训练 for batch_id, data in enumerate(train_reader()): train_cost, train_acc, params = exe.run( program=fluid.default_main_program(), feed=feeder.feed(data), fetch_list=[avg_cost, acc, params_name])
def compress(args): train_reader = None test_reader = None if args.data == "mnist": import paddle.dataset.mnist as reader train_reader = reader.train() val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone(for_test=True) opt = create_optimizer(args) opt.minimize(avg_cost) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.batch(val_reader, batch_size=args.batch_size) train_reader = paddle.batch(train_reader, batch_size=args.batch_size, drop_last=True) train_feeder = feeder = fluid.DataFeeder([image, label], place) val_feeder = feeder = fluid.DataFeeder([image, label], place, program=val_program) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {:.3f}; acc_top5: {:.3f}; time: {:.3f}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info( "Final eval epoch[{}] - acc_top1: {:.3f}; acc_top5: {:.3f}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) return np.mean(np.array(acc_top1_ns)) def train(epoch, program): build_strategy = fluid.BuildStrategy() exec_strategy = fluid.ExecutionStrategy() train_program = fluid.compiler.CompiledProgram( program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) batch_id = 0 for data in train_reader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( train_program, feed=train_feeder.feed(data), fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] - loss: {:.3f}; acc_top1: {:.3f}; acc_top5: {:.3f}; time: {:.3f}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) batch_id += 1 params = [] for param in fluid.default_main_program().global_block().all_parameters(): if "_sep_weights" in param.name: params.append(param.name) def eval_func(program): return test(0, program) if args.data == "mnist": train(0, fluid.default_main_program()) pruner = SensitivePruner(place, eval_func, checkpoints=args.checkpoints) pruned_program, pruned_val_program, iter = pruner.restore() if pruned_program is None: pruned_program = fluid.default_main_program() if pruned_val_program is None: pruned_val_program = val_program start = iter end = 6 for iter in range(start, end): pruned_program, pruned_val_program = pruner.prune( pruned_program, pruned_val_program, params, 0.1) train(iter, pruned_program) test(iter, pruned_val_program) pruner.save_checkpoint(pruned_program, pruned_val_program) print("before flops: {}".format(flops(fluid.default_main_program()))) print("after flops: {}".format(flops(pruned_val_program)))
def train_parallel_exe(args, learning_rate, batch_size, num_passes, init_model=None, pretrained_model=None, model_save_dir='model', parallel=True, use_nccl=True, lr_strategy=None, layers=50): class_dim = 1000 image_shape = [3, 224, 224] image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') if args.model is 'se_resnext': out = SE_ResNeXt(input=image, class_dim=class_dim, layers=layers) else: out = mobile_net(img=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) avg_cost = fluid.layers.mean(x=cost) test_program = fluid.default_main_program().clone(for_test=True) if "piecewise_decay" in lr_strategy: bd = lr_strategy["piecewise_decay"]["bd"] lr = lr_strategy["piecewise_decay"]["lr"] optimizer = fluid.optimizer.Momentum( learning_rate=fluid.layers.piecewise_decay(boundaries=bd, values=lr), momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) elif "cosine_decay" in lr_strategy: step_each_epoch = lr_strategy["cosine_decay"]["step_each_epoch"] epochs = lr_strategy["cosine_decay"]["epochs"] optimizer = fluid.optimizer.Momentum( learning_rate=cosine_decay(learning_rate=learning_rate, step_each_epoch=step_each_epoch, epochs=epochs), momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) else: optimizer = fluid.optimizer.Momentum( learning_rate=learning_rate, momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) opts = optimizer.minimize(avg_cost) if args.with_mem_opt: fluid.memory_optimize(fluid.default_main_program()) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) fluid.default_startup_program.random_seed = 1000 exe.run(fluid.default_startup_program()) if init_model is not None: fluid.io.load_persistables(exe, init_model) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) train_reader = paddle.batch(flowers.train(), batch_size=batch_size) test_reader = paddle.batch(flowers.test(), batch_size=batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name) test_exe = fluid.ParallelExecutor(use_cuda=True, main_program=test_program, share_vars_from=train_exe) fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name] train_speed = [] for pass_id in range(num_passes): train_info = [[], [], []] test_info = [[], [], []] pass_time = 0 pass_num = 0 pass_speed = 0.0 for batch_id, data in enumerate(train_reader()): t1 = time.time() loss, acc1, acc5 = train_exe.run(fetch_list, feed=feeder.feed(data)) t2 = time.time() period = t2 - t1 pass_time += period pass_num += len(data) loss = np.mean(np.array(loss)) acc1 = np.mean(np.array(acc1)) acc5 = np.mean(np.array(acc5)) train_info[0].append(loss) train_info[1].append(acc1) train_info[2].append(acc5) if batch_id % 10 == 0: print("Pass {0}, trainbatch {1}, loss {2}, \ acc1 {3}, acc5 {4} time {5}" .format(pass_id, \ batch_id, loss, acc1, acc5, \ "%2.2f sec" % period)) sys.stdout.flush() train_loss = np.array(train_info[0]).mean() train_acc1 = np.array(train_info[1]).mean() train_acc5 = np.array(train_info[2]).mean() pass_speed = pass_num / pass_time train_speed.append(pass_speed) if pass_id == num_passes - 1: train_acc_top1_kpi.add_record(train_acc1) train_acc_top5_kpi.add_record(train_acc5) train_cost_kpi.add_record(train_loss) mean_pass_speed = np.array(pass_speed).mean() train_speed_kpi.add_record(mean_pass_speed) for data in test_reader(): t1 = time.time() loss, acc1, acc5 = test_exe.run(fetch_list, feed=feeder.feed(data)) t2 = time.time() period = t2 - t1 loss = np.mean(np.array(loss)) acc1 = np.mean(np.array(acc1)) acc5 = np.mean(np.array(acc5)) test_info[0].append(loss) test_info[1].append(acc1) test_info[2].append(acc5) if batch_id % 10 == 0: print("Pass {0},testbatch {1},loss {2}, \ acc1 {3},acc5 {4},time {5}" .format(pass_id, \ batch_id, loss, acc1, acc5, \ "%2.2f sec" % period)) sys.stdout.flush() test_loss = np.array(test_info[0]).mean() test_acc1 = np.array(test_info[1]).mean() test_acc5 = np.array(test_info[2]).mean() print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, \ test_loss {4}, test_acc1 {5}, test_acc5 {6}, pass_time {7}, train_speed {8}" .format(pass_id, \ train_loss, train_acc1, train_acc5, test_loss, test_acc1, \ test_acc5, pass_time, pass_num / pass_time)) sys.stdout.flush() train_acc_top1_kpi.persist() train_acc_top5_kpi.persist() train_cost_kpi.persist() train_speed_kpi.persist()
def test_converter(): img = fluid.layers.data(name='image', shape=[1, 28, 28]) label = fluid.layers.data(name='label', shape=[1], dtype='int64') feeder = fluid.DataFeeder([img, label], fluid.CPUPlace()) result = feeder.feed([[[0] * 784, [9]], [[1] * 784, [1]]]) print(result)
fluid.backward.append_backward(loss=avg_cost_clip, callbacks=[fluid.clip.error_clip_callback]) hidden1_grad = prog.block(0).var(hidden1.name + "@GRAD") hidden1_grad_clip = prog_clip.block(0).var(hidden1.name + "@GRAD") hidden2_grad = prog.block(0).var(hidden2.name + "@GRAD") hidden2_grad_clip = prog_clip.block(0).var(hidden2.name + "@GRAD") train_reader = paddle.batch(paddle.reader.shuffle(paddle.dataset.mnist.train(), buf_size=8192), batch_size=BATCH_SIZE) place = fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[image, label], place=place) exe.run(fluid.default_startup_program()) count = 0 for data in train_reader(): count += 1 if count > 5: break out1, out2 = exe.run(prog, feed=feeder.feed(data), fetch_list=[hidden1_grad, hidden2_grad]) out1_clip, out2_clip = exe.run( prog_clip, feed=feeder.feed(data), fetch_list=[hidden1_grad_clip, hidden2_grad_clip]) if not ((out1.clip(min=CLIP_MIN, max=CLIP_MAX) == out1_clip).all() and
def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc, args, train_prog, startup_prog): if os.getenv("TRAINING_ROLE") == "PSERVER": place = core.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) exe.run(train_prog) return if args.use_fake_data: raise Exception( "fake data is not supported in single GPU test for now.") place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup_prog) # Use inference_transpiler to speedup if not args.use_reader_op: feed_var_list = [ var for var in train_prog.global_block().vars.itervalues() if var.is_data ] feeder = fluid.DataFeeder(feed_var_list, place) iters, num_samples, start_time = 0, 0, time.time() for pass_id in range(args.pass_num): train_losses = [] if not args.use_reader_op: reader_generator = train_reader() batch_id = 0 data = None while True: if not args.use_reader_op: data = next(reader_generator, None) if data == None: break if iters == args.iterations: break if iters == args.skip_batch_num: start_time = time.time() num_samples = 0 if args.use_reader_op: try: loss = exe.run(train_prog, fetch_list=[avg_loss]) except fluid.core.EnforceNotMet as ex: break else: loss = exe.run(train_prog, feed=feeder.feed(data), fetch_list=[avg_loss]) iters += 1 batch_id += 1 # FIXME(wuyi): For use_reader_op, if the current # pass is not the last, the last batch of this pass # is also equal to args.batch_size. if args.use_reader_op: num_samples += args.batch_size * args.gpus else: num_samples += len(data) train_losses.append(loss) print("Pass: %d, Iter: %d, Loss: %f\n" % (pass_id, iters, np.mean(train_losses))) print_train_time(start_time, time.time(), num_samples) print("Pass: %d, Loss: %f" % (pass_id, np.mean(train_losses))), # evaluation if not args.no_test and batch_acc and not args.use_reader_op: if args.use_inference_transpiler: t = fluid.InferenceTranspiler() t.transpile(infer_prog, place) pass_test_acc = test(exe, infer_prog, test_reader, feeder, batch_acc) print(", Test Accuracy: %f" % pass_test_acc) print("\n") # TODO(wuyi): add warmup passes to get better perf data. exit(0)