def test_prune(self): main_program = fluid.Program() startup_program = fluid.Program() # X X O X O # conv1-->conv2-->sum1-->conv3-->conv4-->sum2-->conv5-->conv6 # | ^ | ^ # |____________| |____________________| # # X: prune output channels # O: prune input channels with fluid.unique_name.guard(): with fluid.program_guard(main_program, startup_program): input = fluid.data(name="image", shape=[None, 3, 16, 16]) label = fluid.data(name='label', shape=[None, 1], dtype='int64') conv1 = conv_bn_layer(input, 8, 3, "conv1", act='relu') conv2 = conv_bn_layer(conv1, 8, 3, "conv2", act='leaky_relu') sum1 = conv1 + conv2 conv3 = conv_bn_layer(sum1, 8, 3, "conv3", act='relu6') conv4 = conv_bn_layer(conv3, 8, 3, "conv4") sum2 = conv4 + sum1 conv5 = conv_bn_layer(sum2, 8, 3, "conv5") flag = fluid.layers.fill_constant([1], value=1, dtype='int32') rand_flag = paddle.randint(2, dtype='int32') cond = fluid.layers.less_than(x=flag, y=rand_flag) cond_output = fluid.layers.create_global_var( shape=[1], value=0.0, dtype='float32', persistable=False, name='cond_output') def cond_block1(): cond_conv = conv_bn_layer(conv5, 8, 3, "conv_cond1_1") fluid.layers.assign(input=cond_conv, output=cond_output) def cond_block2(): cond_conv1 = conv_bn_layer(conv5, 8, 3, "conv_cond2_1") cond_conv2 = conv_bn_layer(cond_conv1, 8, 3, "conv_cond2_2") fluid.layers.assign(input=cond_conv2, output=cond_output) fluid.layers.cond(cond, cond_block1, cond_block2) sum3 = fluid.layers.sum([sum2, cond_output]) conv6 = conv_bn_layer(sum3, 8, 3, "conv6") sub1 = conv6 - sum3 mult = sub1 * sub1 conv7 = conv_bn_layer(mult, 8, 3, "Depthwise_Conv7", groups=8, use_cudnn=False) floored = fluid.layers.floor(conv7) scaled = fluid.layers.scale(floored) concated = fluid.layers.concat([scaled, mult], axis=1) conv8 = conv_bn_layer(concated, 8, 3, "conv8") predict = fluid.layers.fc(input=conv8, size=10, act='softmax') cost = fluid.layers.cross_entropy(input=predict, label=label) adam_optimizer = fluid.optimizer.AdamOptimizer(0.01) avg_cost = fluid.layers.mean(cost) adam_optimizer.minimize(avg_cost) params = [] for param in main_program.all_parameters(): if 'conv' in param.name: params.append(param.name) #TODO: To support pruning convolution before fc layer. params.remove('conv8_weights') place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup_program) x = np.random.random(size=(10, 3, 16, 16)).astype('float32') label = np.random.random(size=(10, 1)).astype('int64') loss_data, = exe.run(main_program, feed={ "image": x, "label": label }, fetch_list=[cost.name]) pruner = Pruner() main_program, _, _ = pruner.prune(main_program, fluid.global_scope(), params=params, ratios=[0.5] * len(params), place=place, lazy=False, only_graph=False, param_backup=None, param_shape_backup=None) loss_data, = exe.run(main_program, feed={ "image": x, "label": label }, fetch_list=[cost.name])
def fast_infer(args): """ Inference by beam search decoder based solely on Fluid operators. """ out_ids, out_scores, pyreader = fast_decoder( ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size, ModelHyperParams.phone_vocab_size, ModelHyperParams.max_length + 1, ModelHyperParams.n_layer, ModelHyperParams.n_head, ModelHyperParams.d_key, ModelHyperParams.d_value, ModelHyperParams.d_model, ModelHyperParams.d_inner_hid, ModelHyperParams.prepostprocess_dropout, ModelHyperParams.attention_dropout, ModelHyperParams.relu_dropout, ModelHyperParams.preprocess_cmd, ModelHyperParams.postprocess_cmd, ModelHyperParams.weight_sharing, InferTaskConfig.beam_size, InferTaskConfig.max_out_len, ModelHyperParams.bos_idx, ModelHyperParams.eos_idx, beta=ModelHyperParams.beta, use_py_reader=args.use_py_reader) # This is used here to set dropout to the test mode. infer_program = fluid.default_main_program().clone(for_test=True) if InferTaskConfig.use_gpu: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) fluid.io.load_vars(exe, InferTaskConfig.model_path, vars=[ var for var in infer_program.list_vars() if isinstance(var, fluid.framework.Parameter) ]) exec_strategy = fluid.ExecutionStrategy() # For faster executor exec_strategy.use_experimental_executor = True exec_strategy.num_threads = 1 build_strategy = fluid.BuildStrategy() infer_exe = fluid.ParallelExecutor(use_cuda=TrainTaskConfig.use_gpu, main_program=infer_program, build_strategy=build_strategy, exec_strategy=exec_strategy) # data reader settings for inference args.train_file_pattern = args.test_file_pattern args.use_token_batch = False args.sort_type = reader.SortType.NONE args.shuffle = False args.shuffle_batch = False test_data = prepare_data_generator( args, is_test=False, count=dev_count, pyreader=pyreader, py_reader_provider_wrapper=py_reader_provider_wrapper, place=place) if args.use_py_reader: pyreader.start() data_generator = None else: data_generator = test_data() trg_idx2word = reader.DataReader.load_dict(dict_path=args.trg_vocab_fpath, reverse=True) while True: try: feed_dict_list = prepare_feed_dict_list(data_generator, dev_count, place) if args.use_parallel_exe: seq_ids, seq_scores = infer_exe.run( fetch_list=[out_ids.name, out_scores.name], feed=feed_dict_list, return_numpy=False) else: seq_ids, seq_scores = exe.run( program=infer_program, fetch_list=[out_ids.name, out_scores.name], feed=feed_dict_list[0] if feed_dict_list is not None else None, return_numpy=False, use_program_cache=True) seq_ids_list, seq_scores_list = [ seq_ids ], [seq_scores] if isinstance( seq_ids, paddle.fluid.LoDTensor) else (seq_ids, seq_scores) for seq_ids, seq_scores in zip(seq_ids_list, seq_scores_list): # How to parse the results: # Suppose the lod of seq_ids is: # [[0, 3, 6], [0, 12, 24, 40, 54, 67, 82]] # then from lod[0]: # there are 2 source sentences, beam width is 3. # from lod[1]: # the first source sentence has 3 hyps; the lengths are 12, 12, 16 # the second source sentence has 3 hyps; the lengths are 14, 13, 15 hyps = [[] for i in range(len(seq_ids.lod()[0]) - 1)] scores = [[] for i in range(len(seq_scores.lod()[0]) - 1)] for i in range(len(seq_ids.lod()[0]) - 1): # for each source sentence start = seq_ids.lod()[0][i] end = seq_ids.lod()[0][i + 1] for j in range(end - start): # for each candidate sub_start = seq_ids.lod()[1][start + j] sub_end = seq_ids.lod()[1][start + j + 1] hyps[i].append(" ".join([ trg_idx2word[idx] for idx in post_process_seq( np.array(seq_ids)[sub_start:sub_end]) ])) scores[i].append(np.array(seq_scores)[sub_end - 1]) print(hyps[i][-1]) if len(hyps[i]) >= InferTaskConfig.n_best: break except (StopIteration, fluid.core.EOFException): # The data pass is over. if args.use_py_reader: pyreader.reset() break
def do_train(args): train_prog = fluid.default_main_program() startup_prog = fluid.default_startup_program() with fluid.program_guard(train_prog, startup_prog): train_prog.random_seed = args.random_seed startup_prog.random_seed = args.random_seed with fluid.unique_name.guard(): # define input and reader input_slots = [{ "name": "src_ids", "shape": (-1, args.max_seq_len, 1), "dtype": "int64" }, { "name": "pos_ids", "shape": (-1, args.max_seq_len, 1), "dtype": "int64" }, { "name": "sent_ids", "shape": (-1, args.max_seq_len, 1), "dtype": "int64" }, { "name": "input_mask", "shape": (-1, args.max_seq_len, 1), "dtype": "float32" }, { "name": "input_span_mask", "shape": (-1, args.max_seq_len), "dtype": "float32" }, { "name": "start_positions", "shape": (-1, 1), "dtype": "int64" }, { "name": "end_positions", "shape": (-1, 1), "dtype": "int64" }, { "name": "is_null_answer", "shape": (-1, 1), "dtype": "int64" }] input_field = InputField(input_slots) input_field.build(build_pyreader=True) # define the network loss = create_net(is_training=True, model_input=input_field, args=args) loss.persistable = True # define the optimizer if args.use_cuda: dev_count = fluid.core.get_cuda_device_count() else: dev_count = int( os.environ.get('CPU_NUM', multiprocessing.cpu_count())) # as we need to get the max training steps for warmup training, # we define the data processer in advance # usually, we can declare data processor later, outsides the program_gurad scope processor = DataProcessor(vocab_path=args.vocab_path, do_lower_case=args.do_lower_case, max_seq_length=args.max_seq_len, in_tokens=args.in_tokens, doc_stride=args.doc_stride, do_stride=args.do_stride, max_query_length=args.max_query_len) ## define the data generator batch_generator = processor.data_generator( data_path=args.training_file, batch_size=args.batch_size, phase="train", shuffle=True, dev_count=dev_count, epoch=args.epoch) num_train_examples = processor.get_num_examples(phase='train') max_train_steps = args.epoch * num_train_examples // dev_count // args.batch_size warmup_steps = int(max_train_steps * args.warmup_proportion) print(max_train_steps, warmup_steps, num_train_examples) optimizor = optimization(loss=loss, warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_prog, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16, loss_scaling=args.loss_scaling) # prepare training ## decorate the pyreader with batch_generator input_field.reader.decorate_batch_generator(batch_generator) ## define the executor and program for training if args.use_cuda: place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) assert (args.init_from_checkpoint == "") or (args.init_from_pretrain_model == "") ## init from some checkpoint, to resume the previous training if args.init_from_checkpoint: init_from_checkpoint(args, exe, train_prog) ## init from some pretrain models, to better solve the current task if args.init_from_pretrain_model: init_from_pretrain_model(args, exe, train_prog) build_strategy = fluid.compiler.BuildStrategy() build_strategy.enable_inplace = True compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) # start training step = 0 for epoch_step in range(args.epoch): input_field.reader.start() while True: try: # this is for minimizing the fetching op, saving the training speed. if step % args.print_step == 0: fetch_list = [loss.name] else: fetch_list = [] output = exe.run(compiled_train_prog, fetch_list=fetch_list) if step % args.print_step == 0: print("step: %d, loss: %.4f" % (step, np.sum(output[0]))) if step % args.save_step == 0 and step != 0: if args.save_checkpoint: save_checkpoint(args, exe, train_prog, "step_" + str(step)) if args.save_param: save_param(args, exe, train_prog, "step_" + str(step)) step += 1 except fluid.core.EOFException: input_field.reader.reset() break if args.save_checkpoint: save_checkpoint(args, exe, train_prog, "step_final") if args.save_param: save_param(args, exe, train_prog, "step_final")
def fit(): role = role_maker.UserDefinedRoleMaker( current_id=current_id, role=role_maker.Role.WORKER if bool(1==int(roles)) else role_maker.Role.SERVER, worker_num=2, server_endpoints=["127.0.0.1:36011"]) fleet.init(role) BATCH_SIZE = 128 type_size=createDataList(model_file_path,model_file_path+'.data'+"/") # 用于训练的数据提供器 train_reader=paddle.batch(reader=paddle.reader.shuffle(reader=dataReader(in_file_path+".data/trainer.list"),buf_size=BATCH_SIZE*100), batch_size=BATCH_SIZE) test_reader=paddle.batch(reader=paddle.reader.shuffle(reader=dataReader(in_file_path+".data/test.list"),buf_size=BATCH_SIZE*100), batch_size=BATCH_SIZE) data_shape = [3, 32, 32] images = fluid.layers.data(name='images', shape=data_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # 获取分类器 predict = networkConfiguration(images,type_size) # 定义损失函数和准确率 cost = fluid.layers.cross_entropy(input=predict, label=label) # 交叉熵 avg_cost = fluid.layers.mean(cost) # 计算cost中所有元素的平均值 acc = fluid.layers.accuracy(input=predict, label=label) # 使用输入和标签计算准确率 # 定义优化方法 test_program = fluid.default_main_program().clone(for_test=True) # 获取测试程序 optimizer = fluid.optimizer.Adam(learning_rate=0.001) strategy = DistributeTranspilerConfig() strategy.sync_mode = True optimizer = fleet.distributed_optimizer(optimizer,strategy) # 定义优化方法 optimizer.minimize(avg_cost) if fleet.is_server(): print("启动server") fleet.init_server() fleet.run_server() elif fleet.is_worker(): print("启动worker") ########## 模型训练&模型评估 ########## # 创建Executor use_cuda = False # 定义使用CPU还是GPU,使用CPU时use_cuda=False place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() print("cpu") # 定义数据映射器 feeder = fluid.DataFeeder(feed_list=[images, label], place=place) print("数据映射") exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) fleet.init_worker() print(fleet.worker_endpoints()) for pass_id in range(EPOCH_NUM): print(pass_id) # 开始训练 for batch_id, data in enumerate(train_reader()): # 遍历train_reader train_cost, train_acc = exe.run(program=fluid.default_main_program(), # 运行主程序 feed=feeder.feed(data), # 喂入一个batch的数据 fetch_list=[avg_cost, acc]) # fetch均方误差和准确率 # fetch均方误差和准确率 # 每100次batch打印一次训练、进行一次测试 if batch_id % 20 == 0: print('Pass:%d, Batch:%d, Cost:%0.5f, Accuracy:%0.5f' %(pass_id, batch_id, train_cost[0], train_acc[0])) # 开始测试 test_costs = [] # 测试的损失值 test_accs = [] # 测试的准确率 for batch_id, data in enumerate(test_reader()): test_cost, test_acc = exe.run(program=test_program, # 执行训练程序 feed=feeder.feed(data), # 喂入数据 fetch_list=[avg_cost, acc]) # fetch误差、准确率 test_costs.append(test_cost[0]) # 记录每个batch的损失值 test_accs.append(test_acc[0]) # 记录每个batch的准确率 test_cost = (sum(test_costs) / len(test_costs)) # 计算误差平均值 test_acc = (sum(test_accs) / len(test_accs)) # 计算准确率平均值 print('Test:%d, Cost:%0.5f, ACC:%0.5f' % (pass_id, test_cost, test_acc)) save(predict,model_file_path,exe) fleet.stop_worker()
def eval(): args = parse_args() print_arguments(args) # check whether the installed paddle is compiled with GPU # PointRCNN model can only run on GPU check_gpu(True) load_config(args.cfg) if args.set_cfgs is not None: set_config_from_list(args.set_cfgs) if not os.path.isdir(args.output_dir): os.makedirs(args.output_dir) if args.eval_mode == 'rpn': cfg.RPN.ENABLED = True cfg.RCNN.ENABLED = False elif args.eval_mode == 'rcnn': cfg.RCNN.ENABLED = True cfg.RPN.ENABLED = cfg.RPN.FIXED = True assert args.batch_size, "batch size must be 1 in rcnn evaluation" elif args.eval_mode == 'rcnn_offline': cfg.RCNN.ENABLED = True cfg.RPN.ENABLED = False assert args.batch_size, "batch size must be 1 in rcnn_offline evaluation" else: raise NotImplementedError("unkown eval mode: {}".format( args.eval_mode)) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) # build model startup = fluid.Program() eval_prog = fluid.Program() with fluid.program_guard(eval_prog, startup): with fluid.unique_name.guard(): eval_model = PointRCNN(cfg, args.batch_size, True, 'TEST') eval_model.build() eval_loader = eval_model.get_loader() eval_feeds = eval_model.get_feeds() eval_outputs = eval_model.get_outputs() eval_prog = eval_prog.clone(True) extra_keys = [] if args.eval_mode == 'rpn': extra_keys.extend(['sample_id', 'rpn_cls_label', 'gt_boxes3d']) if args.save_rpn_feature: extra_keys.extend([ 'pts_rect', 'pts_features', 'pts_input', ]) eval_keys, eval_values = parse_outputs(eval_outputs, prog=eval_prog, extra_keys=extra_keys) eval_compile_prog = fluid.compiler.CompiledProgram( eval_prog).with_data_parallel() exe.run(startup) # load weights if not os.path.isdir(args.weights): assert os.path.exists("{}.pdparams".format(args.weights)), \ "Given resume weight {}.pdparams not exist.".format(args.weights) fluid.load(eval_prog, args.weights, exe) kitti_feature_dir = os.path.join(args.output_dir, 'features') kitti_output_dir = os.path.join(args.output_dir, 'detections', 'data') seg_output_dir = os.path.join(args.output_dir, 'seg_result') if args.save_rpn_feature: if os.path.exists(kitti_feature_dir): shutil.rmtree(kitti_feature_dir) os.makedirs(kitti_feature_dir) if os.path.exists(kitti_output_dir): shutil.rmtree(kitti_output_dir) os.makedirs(kitti_output_dir) if os.path.exists(seg_output_dir): shutil.rmtree(seg_output_dir) os.makedirs(seg_output_dir) # must make sure these dirs existing roi_output_dir = os.path.join('./result_dir', 'roi_result', 'data') refine_output_dir = os.path.join('./result_dir', 'refine_result', 'data') final_output_dir = os.path.join("./result_dir", 'final_result', 'data') if not os.path.exists(final_output_dir): os.makedirs(final_output_dir) if args.save_result: if not os.path.exists(roi_output_dir): os.makedirs(roi_output_dir) if not os.path.exists(refine_output_dir): os.makedirs(refine_output_dir) # get reader kitti_rcnn_reader = KittiRCNNReader( data_dir=args.data_dir, npoints=cfg.RPN.NUM_POINTS, split=cfg.TEST.SPLIT, mode='EVAL', classes=cfg.CLASSES, rcnn_eval_roi_dir=args.rcnn_eval_roi_dir, rcnn_eval_feature_dir=args.rcnn_eval_feature_dir) eval_reader = kitti_rcnn_reader.get_multiprocess_reader( args.batch_size, eval_feeds) eval_loader.set_sample_list_generator(eval_reader, place) thresh_list = [0.1, 0.3, 0.5, 0.7, 0.9] queue = multiprocessing.Queue(128) mgr = multiprocessing.Manager() lock = multiprocessing.Lock() mdict = mgr.dict() if cfg.RPN.ENABLED: mdict['exit_proc'] = 0 mdict['total_gt_bbox'] = 0 mdict['total_cnt'] = 0 mdict['total_rpn_iou'] = 0 for i in range(len(thresh_list)): mdict['total_recalled_bbox_list_{}'.format(i)] = 0 p_list = [] for i in range(METRIC_PROC_NUM): p_list.append( multiprocessing.Process(target=rpn_metric, args=(queue, mdict, lock, thresh_list, args.save_rpn_feature, kitti_feature_dir, seg_output_dir, kitti_output_dir, kitti_rcnn_reader, cfg.CLASSES))) p_list[-1].start() if cfg.RCNN.ENABLED: for i in range(len(thresh_list)): mdict['total_recalled_bbox_list_{}'.format(i)] = 0 mdict['total_roi_recalled_bbox_list_{}'.format(i)] = 0 mdict['exit_proc'] = 0 mdict['total_cls_acc'] = 0 mdict['total_cls_acc_refined'] = 0 mdict['total_det_num'] = 0 mdict['total_gt_bbox'] = 0 p_list = [] for i in range(METRIC_PROC_NUM): p_list.append( multiprocessing.Process( target=rcnn_metric, args=(queue, mdict, lock, thresh_list, kitti_rcnn_reader, roi_output_dir, refine_output_dir, final_output_dir, args.save_result))) p_list[-1].start() try: eval_loader.start() eval_iter = 0 start_time = time.time() cur_time = time.time() while True: eval_outs = exe.run(eval_compile_prog, fetch_list=eval_values, return_numpy=False) rets_dict = { k: (np.array(v), v.recursive_sequence_lengths()) for k, v in zip(eval_keys, eval_outs) } run_time = time.time() - cur_time cur_time = time.time() queue.put(rets_dict) eval_iter += 1 logger.info("[EVAL] iter {}, time: {:.2f}".format( eval_iter, run_time)) except fluid.core.EOFException: # terminate metric process for i in range(METRIC_PROC_NUM): queue.put(None) while mdict['exit_proc'] < METRIC_PROC_NUM: time.sleep(1) for p in p_list: if p.is_alive(): p.join() end_time = time.time() logger.info( "[EVAL] total {} iter finished, average time: {:.2f}".format( eval_iter, (end_time - start_time) / float(eval_iter))) if cfg.RPN.ENABLED: avg_rpn_iou = mdict['total_rpn_iou'] / max(len(kitti_rcnn_reader), 1.) logger.info("average rpn iou: {:.3f}".format(avg_rpn_iou)) total_gt_bbox = float(max(mdict['total_gt_bbox'], 1.0)) for idx, thresh in enumerate(thresh_list): recall = mdict['total_recalled_bbox_list_{}'.format( idx)] / total_gt_bbox logger.info( "total bbox recall(thresh={:.3f}): {} / {} = {:.3f}". format(thresh, mdict['total_recalled_bbox_list_{}'.format(idx)], mdict['total_gt_bbox'], recall)) if cfg.RCNN.ENABLED: cnt = float(max(eval_iter, 1.0)) avg_cls_acc = mdict['total_cls_acc'] / cnt avg_cls_acc_refined = mdict['total_cls_acc_refined'] / cnt avg_det_num = mdict['total_det_num'] / cnt logger.info("avg_cls_acc: {}".format(avg_cls_acc)) logger.info("avg_cls_acc_refined: {}".format(avg_cls_acc_refined)) logger.info("avg_det_num: {}".format(avg_det_num)) total_gt_bbox = float(max(mdict['total_gt_bbox'], 1.0)) for idx, thresh in enumerate(thresh_list): cur_roi_recall = mdict['total_roi_recalled_bbox_list_{}'. format(idx)] / total_gt_bbox logger.info( 'total roi bbox recall(thresh=%.3f): %d / %d = %f' % (thresh, mdict['total_roi_recalled_bbox_list_{}'.format(idx)], total_gt_bbox, cur_roi_recall)) for idx, thresh in enumerate(thresh_list): cur_recall = mdict['total_recalled_bbox_list_{}'.format( idx)] / total_gt_bbox logger.info( 'total bbox recall(thresh=%.2f) %d / %.2f = %.4f' % (thresh, mdict['total_recalled_bbox_list_{}'.format(idx)], total_gt_bbox, cur_recall)) split_file = os.path.join('./data/KITTI', 'ImageSets', 'val.txt') image_idx_list = [x.strip() for x in open(split_file).readlines()] for k in range(image_idx_list.__len__()): cur_file = os.path.join(final_output_dir, '%s.txt' % image_idx_list[k]) if not os.path.exists(cur_file): with open(cur_file, 'w') as temp_f: pass if float(sys.version[:3]) >= 3.6: label_dir = os.path.join('./data/KITTI/object/training', 'label_2') split_file = os.path.join('./data/KITTI', 'ImageSets', 'val.txt') final_output_dir = os.path.join("./result_dir", 'final_result', 'data') name_to_class = {'Car': 0, 'Pedestrian': 1, 'Cyclist': 2} from tools.kitti_object_eval_python.evaluate import evaluate as kitti_evaluate ap_result_str, ap_dict = kitti_evaluate( label_dir, final_output_dir, label_split_file=split_file, current_class=name_to_class["Car"]) logger.info("KITTI evaluate: {}, {}".format( ap_result_str, ap_dict)) else: logger.info( "KITTI mAP only support python version >= 3.6, users can " "run 'python3 tools/kitti_eval.py' to evaluate KITTI mAP.") finally: eval_loader.reset()
def run_trainer(self, args): self.lr = args.lr if args.nccl2_reduce_layer_local_run: test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \ self.get_model(batch_size=args.batch_size, single_device=True) elif args.use_dgc: test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \ self.get_model(batch_size=args.batch_size, use_dgc=args.use_dgc) else: test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \ self.get_model(batch_size=args.batch_size) if args.update_method == "pserver": print_to_err( type(self).__name__, "begin to run transpile on trainer with pserver mode") t = self.get_transpiler( trainer_id=args.trainer_id, main_program=fluid.default_main_program(), pserver_endpoints=args.endpoints, trainers=args.trainers, sync_mode=args.sync_mode, dc_asgd=args.dc_asgd, hogwild_mode=args.hogwild) trainer_prog = t.get_trainer_program() print_to_err( type(self).__name__, "get trainer program done with pserver mode.") elif args.update_method == "nccl2" or args.update_method == "nccl2_reduce_layer": # transpile for nccl2 config = fluid.DistributeTranspilerConfig() config.mode = "nccl2" config.nccl_comm_num = args.nccl_comm_num if args.use_hallreduce: config.use_hierarchical_allreduce = True config.hierarchical_allreduce_inter_nranks = args.hallreduce_inter_nranks print_to_err( type(self).__name__, "begin to run transpile on trainer with nccl2 mode") nccl2_t = fluid.DistributeTranspiler(config=config) nccl2_t.transpile( args.trainer_id, program=fluid.default_main_program(), startup_program=fluid.default_startup_program(), trainers=args.endpoints, current_endpoint=args.current_endpoint) print_to_err( type(self).__name__, "get trainer program done. with nccl2 mode") trainer_prog = fluid.default_main_program() else: print_to_err( type(self).__name__, "do nothing about main program, just use it") trainer_prog = fluid.default_main_program() print_to_err(type(self).__name__, "use main program done.") if args.use_cuda: device_id = int(os.getenv("FLAGS_selected_gpus", "0")) place = fluid.CUDAPlace(device_id) else: place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) print_to_err(type(self).__name__, "run worker startup program done.") exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = 1 build_stra = fluid.BuildStrategy() # FIXME force disable enable_inplace and memory_optimize build_stra.enable_inplace = False build_stra.memory_optimize = False if args.hogwild: build_stra.async_mode = True if args.enable_backward_deps: build_stra.enable_backward_optimizer_op_deps = True if args.use_reduce: build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce else: build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce pass_builder = None if args.batch_merge_repeat > 1: pass_builder = build_stra._finalize_strategy_and_create_passes() mypass = pass_builder.insert_pass(0, "multi_batch_merge_pass") mypass.set("num_repeats", args.batch_merge_repeat) if args.update_method == "nccl2" or args.update_method == "nccl2_reduce_layer": build_stra.num_trainers = len(args.endpoints.split(",")) build_stra.trainer_id = args.trainer_id else: # case args.update_method == "nccl2_reduce_layer": build_stra.num_trainers = 1 build_stra.trainer_id = 0 print_to_err(type(self).__name__, "begin to compile with data parallel") binary = compiler.CompiledProgram(trainer_prog).with_data_parallel( loss_name=avg_cost.name, build_strategy=build_stra, exec_strategy=exec_strategy) print_to_err(type(self).__name__, "program compiled with data parallel") feed_var_list = [ var for var in trainer_prog.global_block().vars.values() if var.is_data ] feeder = fluid.DataFeeder(feed_var_list, place) reader_generator = train_reader() def get_data(): origin_batch = next(reader_generator) if args.update_method != "local" and args.use_reader_alloc: new_batch = [] for offset, item in enumerate(origin_batch): if offset % 2 == args.trainer_id: new_batch.append(item) return new_batch else: return origin_batch print_to_err(type(self).__name__, "begin to train on trainer") out_losses = [] for i in six.moves.xrange(RUN_STEP): loss, = exe.run(binary, fetch_list=[avg_cost.name], feed=feeder.feed(get_data())) out_losses.append(loss[0]) print_to_err(type(self).__name__, "run step %d finished" % i) print_to_err(type(self).__name__, "trainer run finished") print_to_out(out_losses)
def compress(args): ############################################################################################################ # 1. quantization configs ############################################################################################################ quant_config = { # weight quantize type, default is 'channel_wise_abs_max' 'weight_quantize_type': 'channel_wise_abs_max', # activation quantize type, default is 'moving_average_abs_max' 'activation_quantize_type': 'moving_average_abs_max', # weight quantize bit num, default is 8 'weight_bits': 8, # activation quantize bit num, default is 8 'activation_bits': 8, # ops of name_scope in not_quant_pattern list, will not be quantized 'not_quant_pattern': ['skip_quant'], # ops of type in quantize_op_types, will be quantized 'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'], # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8' 'dtype': 'int8', # window size for 'range_abs_max' quantization. defaulf is 10000 'window_size': 10000, # The decay coefficient of moving average, default is 0.9 'moving_rate': 0.9, } train_reader = None test_reader = None if args.data == "mnist": import paddle.dataset.mnist as reader train_reader = reader.train() val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) train_prog = fluid.default_main_program() val_program = fluid.default_main_program().clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() ############################################################################################################ # 2. quantization transform programs (training aware) # Make some quantization transforms in the graph before training and testing. # According to the weight and activation quantization type, the graph will be added # some fake quantize operators and fake dequantize operators. ############################################################################################################ val_program = quant_aware(val_program, place, quant_config, scope=None, for_test=True) compiled_train_prog = quant_aware(train_prog, place, quant_config, scope=None, for_test=False) opt = create_optimizer(args) opt.minimize(avg_cost) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.batch(val_reader, batch_size=args.batch_size) train_reader = paddle.batch(train_reader, batch_size=args.batch_size, drop_last=True) train_feeder = feeder = fluid.DataFeeder([image, label], place) val_feeder = feeder = fluid.DataFeeder([image, label], place, program=val_program) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) return np.mean(np.array(acc_top1_ns)) def train(epoch, compiled_train_prog): batch_id = 0 for data in train_reader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( compiled_train_prog, feed=train_feeder.feed(data), fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) batch_id += 1 build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = False build_strategy.fuse_all_reduce_ops = False build_strategy.sync_batch_norm = False exec_strategy = fluid.ExecutionStrategy() compiled_train_prog = compiled_train_prog.with_data_parallel( loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) ############################################################################################################ # train loop ############################################################################################################ best_acc1 = 0.0 best_epoch = 0 for i in range(args.num_epochs): train(i, compiled_train_prog) acc1 = test(i, val_program) fluid.io.save_persistables(exe, dirname=os.path.join( args.checkpoint_dir, str(i)), main_program=val_program) if acc1 > best_acc1: best_acc1 = acc1 best_epoch = i fluid.io.save_persistables(exe, dirname=os.path.join( args.checkpoint_dir, 'best_model'), main_program=val_program) fluid.io.load_persistables(exe, dirname=os.path.join(args.checkpoint_dir, 'best_model'), main_program=val_program) ############################################################################################################ # 3. Freeze the graph after training by adjusting the quantize # operators' order for the inference. # The dtype of float_program's weights is float32, but in int8 range. ############################################################################################################ float_program, int8_program = convert(val_program, place, quant_config, \ scope=None, \ save_int8=True) print("eval best_model after convert") final_acc1 = test(best_epoch, float_program) ############################################################################################################ # 4. Save inference model ############################################################################################################ model_path = os.path.join( quantization_model_save_dir, args.model, 'act_' + quant_config['activation_quantize_type'] + '_w_' + quant_config['weight_quantize_type']) float_path = os.path.join(model_path, 'float') int8_path = os.path.join(model_path, 'int8') if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_inference_model(dirname=float_path, feeded_var_names=[image.name], target_vars=[out], executor=exe, main_program=float_program, model_filename=float_path + '/model', params_filename=float_path + '/params') fluid.io.save_inference_model(dirname=int8_path, feeded_var_names=[image.name], target_vars=[out], executor=exe, main_program=int8_program, model_filename=int8_path + '/model', params_filename=int8_path + '/params')
def setUp(self): self.places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): self.places.append(fluid.CUDAPlace(0))
def main(args): hidden_size = args.hidden_size neg_num = args.neg_num epoch = args.epoch p = args.p q = args.q save_path = args.save_path batch_size = args.batch_size walk_len = args.walk_len win_size = args.win_size if not os.path.isdir(save_path): os.makedirs(save_path) dataset = load(args.dataset) if args.offline_learning: log.info("Start random walk on disk...") walk_save_path = os.path.join(save_path, "walks") if not os.path.isdir(walk_save_path): os.makedirs(walk_save_path) pool = Pool(args.processes) args_list = [(x, dataset.graph, walk_save_path, 1, batch_size, walk_len, p, q, np.random.randint(2**32)) for x in range(epoch)] pool.map(process, args_list) filelist = glob.glob(os.path.join(walk_save_path, "*")) log.info("Random walk on disk Done.") else: filelist = None train_steps = int(dataset.graph.num_nodes / batch_size) * epoch place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() node2vec_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(node2vec_prog, startup_prog): with fluid.unique_name.guard(): node2vec_pyreader, node2vec_loss = node2vec_model( dataset.graph, hidden_size=hidden_size, neg_num=neg_num) lr = l.polynomial_decay(0.025, train_steps, 0.0001) adam = fluid.optimizer.Adam(lr) adam.minimize(node2vec_loss) node2vec_pyreader.decorate_tensor_provider( node2vec_generator(dataset.graph, batch_size=batch_size, walk_len=walk_len, win_size=win_size, epoch=epoch, neg_num=neg_num, p=p, q=q, filelist=filelist)) node2vec_pyreader.start() exe = fluid.Executor(place) exe.run(startup_prog) prev_time = time.time() step = 0 while 1: try: node2vec_loss_val = exe.run(node2vec_prog, fetch_list=[node2vec_loss], return_numpy=True)[0] cur_time = time.time() use_time = cur_time - prev_time prev_time = cur_time step += 1 log.info("Step %d " % step + "Node2vec Loss: %f " % node2vec_loss_val + " %f s/step." % use_time) except fluid.core.EOFException: node2vec_pyreader.reset() break fluid.io.save_persistables(exe, os.path.join(save_path, "paddle_model"), node2vec_prog)
def eval(vocab, infer_progs, dev_count, logger, args): infer_prog, infer_startup_prog, infer_model = infer_progs feed_order = infer_model.feed_order loss = infer_model.loss # prepare device place = core.CUDAPlace(0) if args.use_gpu else core.CPUPlace() exe = Executor(place) if not args.use_gpu: place = fluid.CPUPlace() import multiprocessing dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) else: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() if args.para_print: with open("infer_program.desc", 'w') as f: print(str(infer_prog), file=f) # todo infer_startup_prog is not used, implicitly training scope will be used # Use test set as validation each pass total_loss = 0.0 total_cnt = 0 n_batch_cnt = 0 n_batch_loss = 0.0 val_feed_list = [ infer_prog.global_block().var(var_name) for var_name in feed_order ] val_feeder = fluid.DataFeeder(val_feed_list, place) dev_data = data.BidirectionalLMDataset( args.test_path, vocab, test=True, shuffle_on_load=False) dev_data_iter = lambda: dev_data.iter_batches(args.batch_size * dev_count, args.num_steps) dev_reader = read_multiple(dev_data_iter, args.batch_size, dev_count) last_hidden_values = np.zeros( (dev_count, args.num_layers * 2 * args.batch_size * args.embed_size), dtype='float32') last_cell_values = np.zeros( (dev_count, args.num_layers * 2 * args.batch_size * args.hidden_size), dtype='float32') for batch_id, batch_list in enumerate(dev_reader(), 1): feed_data = batch_reader(batch_list, args) feed = list(val_feeder.feed_parallel(feed_data, dev_count)) for i in range(dev_count): init_hidden_tensor = fluid.core.LoDTensor() if args.use_gpu: placex = fluid.CUDAPlace(i) else: placex = fluid.CPUPlace() init_hidden_tensor.set(last_hidden_values[i], placex) init_cell_tensor = fluid.core.LoDTensor() init_cell_tensor.set(last_cell_values[i], placex) feed[i]['init_hiddens'] = init_hidden_tensor feed[i]['init_cells'] = init_cell_tensor #todo test pe has bug in r1.3 #import pdb; pdb.set_trace() last_hidden_values = [] last_cell_values = [] for i in range(dev_count): val_fetch_outs = exe.run( program=infer_prog, feed=feed[i], fetch_list=[ infer_model.loss.name, infer_model.last_hidden.name, infer_model.last_cell.name ], # + [x[0] for x in names] + [x[0] for x in grad_names], return_numpy=False) last_hidden_values.append(np.array(val_fetch_outs[1])) last_cell_values.append(np.array(val_fetch_outs[2])) total_loss += np.array(val_fetch_outs[0]).sum() n_batch_cnt += len(np.array(val_fetch_outs[0])) total_cnt += len(np.array(val_fetch_outs[0])) n_batch_loss += np.array(val_fetch_outs[0]).sum() last_hidden_values = np.array(last_hidden_values).reshape(( dev_count, args.num_layers * 2 * args.batch_size * args.embed_size)) last_cell_values = np.array(last_cell_values).reshape( (dev_count, args.num_layers * 2 * args.batch_size * args.hidden_size)) log_every_n_batch = args.log_interval if log_every_n_batch > 0 and batch_id % log_every_n_batch == 0: logger.info('Average dev loss from batch {} to {} is {}'.format( batch_id - log_every_n_batch + 1, batch_id, "%.10f" % ( n_batch_loss / n_batch_cnt))) n_batch_loss = 0.0 n_batch_cnt = 0 batch_offset = 0 ppl = np.exp(total_loss / total_cnt) return ppl
def train_loop(args, logger, vocab, train_progs, infer_progs, optimizer, nccl2_num_trainers=1, nccl2_trainer_id=0, worker_endpoints=None): train_prog, train_startup_prog, train_model = train_progs infer_prog, infer_startup_prog, infer_model = infer_progs # prepare device place = core.CUDAPlace(0) if args.use_gpu else core.CPUPlace() exe = Executor(place) if not args.use_gpu: place = fluid.CPUPlace() import multiprocessing dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) else: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() if args.load_dir: logger.info('load pretrained checkpoints from {}'.format(args.load_dir)) # Todo: why not need to run train_startup_prog before load_persistables fluid.io.load_persistables(exe, args.load_dir, main_program=train_prog) elif args.load_pretraning_params: logger.info('load pretrained params from {}'.format(args.load_pretraning_params)) exe.run(train_startup_prog) init_pretraining_params(exe, args.load_pretraning_params, main_program=train_prog) else: exe.run(train_startup_prog) # prepare data feed_list = [ train_prog.global_block().var(var_name) for var_name in train_model.feed_order ] feeder = fluid.DataFeeder(feed_list, place) logger.info('Training the model...') exe_strategy = fluid.parallel_executor.ExecutionStrategy() if args.para_print: exe_strategy.num_threads = 1 debug_init(train_prog, train_model.grad_vars, train_model.grad_vars_name) with open("program.desc", 'w') as f: print(str(train_prog), file=f) parallel_executor = fluid.ParallelExecutor( loss_name=train_model.loss.name, main_program=train_prog, use_cuda=bool(args.use_gpu), exec_strategy=exe_strategy, num_trainers=nccl2_num_trainers, trainer_id=nccl2_trainer_id) load_params(train_prog, parallel_executor, place, logger, args) print_para(train_prog, parallel_executor, logger, optimizer, args) logger.info("begin to load data") train_data = data.BidirectionalLMDataset( args.train_path, vocab, test=(not args.shuffle), shuffle_on_load=args.shuffle) logger.info("finished load vocab") # get train epoch size log_interval = args.log_interval total_time = 0.0 batch_size = args.batch_size hidden_size = args.hidden_size custom_samples_array = np.zeros( (batch_size, args.num_steps, args.n_negative_samples_batch + 1), dtype='int64') custom_probabilities_array = np.zeros( (batch_size, args.num_steps, args.n_negative_samples_batch + 1), dtype='float32') for i in range(batch_size): for j in range(0, args.num_steps): for k in range(0, args.n_negative_samples_batch + 1): custom_samples_array[i][j][k] = k custom_probabilities_array[i][j][k] = 1.0 for epoch_id in range(args.max_epoch): start_time = time.time() logger.info("epoch id {}".format(epoch_id)) train_data_iter = lambda: train_data.iter_batches(batch_size * dev_count, args.num_steps) train_reader = read_multiple(train_data_iter, batch_size, dev_count) total_num = 0 n_batch_loss = 0.0 n_batch_cnt = 0 last_hidden_values = np.zeros( (dev_count, args.num_layers * 2 * batch_size * args.embed_size), dtype='float32') last_cell_values = np.zeros( (dev_count, args.num_layers * 2 * batch_size * hidden_size), dtype='float32') begin_time = time.time() for batch_id, batch_list in enumerate(train_reader(), 1): feed_data = batch_reader(batch_list, args) feed = list(feeder.feed_parallel(feed_data, dev_count)) for i in range(dev_count): init_hidden_tensor = fluid.core.LoDTensor() if args.use_gpu: placex = fluid.CUDAPlace(i) else: placex = fluid.CPUPlace() init_hidden_tensor.set(last_hidden_values[i], placex) init_cell_tensor = fluid.core.LoDTensor() init_cell_tensor.set(last_cell_values[i], placex) feed[i]['init_hiddens'] = init_hidden_tensor feed[i]['init_cells'] = init_cell_tensor fetch_outs = parallel_executor.run( feed=feed, fetch_list=[ train_model.loss.name, train_model.last_hidden.name, train_model.last_cell.name ], # + [x[0] for x in names] + [x[0] for x in grad_names], return_numpy=False) cost_train = np.array(fetch_outs[0]).mean() #import pdb; pdb.set_trace() last_hidden_values = np.array(fetch_outs[1]) last_hidden_values = last_hidden_values.reshape( (dev_count, args.num_layers * 2 * batch_size * args.embed_size)) last_cell_values = np.array(fetch_outs[2]) last_cell_values = last_cell_values.reshape(( dev_count, args.num_layers * 2 * batch_size * args.hidden_size)) #vars = fetch_outs[2:2+len(names)] #grad_vars = fetch_outs[2+len(names):] total_num += args.batch_size * dev_count n_batch_loss += np.array(fetch_outs[0]).sum() #logger.info("n_batch_loss from {} to {} is {}, {} ".format( # batch_id - log_interval, batch_id, n_batch_loss, # np.array(fetch_outs[0]).sum())) n_batch_cnt += len(np.array(fetch_outs[0])) if batch_id > 0 and batch_id % log_interval == 0: #vars_print(logger, args, vars=(vars, names), grad_vars=(grad_vars, grad_names)) print_para(train_prog, parallel_executor, logger, optimizer, args) smoothed_ppl = np.exp(n_batch_loss / n_batch_cnt) ppl = np.exp( np.array(fetch_outs[0]).sum() / len(np.array(fetch_outs[0]))) used_time = time.time() - begin_time speed = log_interval / used_time logger.info( "[train] epoch:{}, step:{}, loss:{:.3f}, ppl:{:.3f}, smoothed_ppl:{:.3f}, speed:{:.3f}". format(epoch_id, batch_id, n_batch_loss / n_batch_cnt, ppl, smoothed_ppl, speed)) n_batch_loss = 0.0 n_batch_cnt = 0 begin_time = time.time() if batch_id > 0 and batch_id % args.dev_interval == 0: valid_ppl = eval(vocab, infer_progs, dev_count, logger, args) logger.info("valid ppl {}".format(valid_ppl)) if batch_id > 0 and batch_id % args.save_interval == 0: model_path = os.path.join(args.para_save_dir, str(batch_id + epoch_id)) if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_persistables( executor=exe, dirname=model_path, main_program=train_prog) if args.detail and batch_id > 100: exit() end_time = time.time() total_time += end_time - start_time logger.info("train ppl {}".format(ppl)) if epoch_id == args.max_epoch - 1 and args.enable_ce: logger.info("lstm_language_model_duration\t%s" % (total_time / args.max_epoch)) logger.info("lstm_language_model_loss\t%s" % ppl[0]) model_path = os.path.join(args.para_save_dir, str(epoch_id)) if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_persistables( executor=exe, dirname=model_path, main_program=train_prog) valid_ppl = eval(vocab, infer_progs, dev_count, logger, args) logger.info("valid ppl {}".format(valid_ppl)) test_ppl = eval(vocab, infer_progs, dev_count, place, logger, args) logger.info("test ppl {}".format(test_ppl))
# 使用交叉熵函数,描述真实样本标签与预测概率之间的误差 cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(cost) # 计算分类准确率 acc = fluid.layers.accuracy(input=predict, label=label) # 定义优化函数 # 使用adam算法优化,学习率设定为0.001 optimizer = fluid.optimizer.AdamaxOptimizer(learning_rate=0.001) opts = optimizer.minimize(avg_cost) # 模型训练 # 创建训练的Executor use_cuda = False place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() test_program = fluid.default_main_program().clone(for_test=True) exe = fluid.Executor(place) exe.run(fluid.default_main_program()) # 告知网络传入数据分两部分,image,label feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) # 展示模型曲线 all_train_iter = 0 all_train_iters = [] all_train_costs = [] all_train_accs = [] def draw_train_process(title, iters, costs, accs, label_cost, label_acc):
def train(): learning_rate = cfg.learning_rate image_shape = [3, 512, 512] if cfg.enable_ce: fluid.default_startup_program().random_seed = 1000 fluid.default_main_program().random_seed = 1000 import random random.seed(0) np.random.seed(0) devices_num = get_device_num() total_batch_size = devices_num * cfg.TRAIN.im_per_batch use_random = True startup_prog = fluid.Program() train_prog = fluid.Program() with fluid.program_guard(train_prog, startup_prog): with fluid.unique_name.guard(): model = model_builder.EAST( add_conv_body_func=resnet.ResNet(), use_random=use_random) model.build_model(image_shape) losses, keys = model.loss() loss = losses[0] fetch_list = losses boundaries = cfg.lr_steps gamma = cfg.lr_gamma step_num = len(cfg.lr_steps) values = [learning_rate * (gamma**i) for i in range(step_num + 1)] lr = exponential_with_warmup_decay( learning_rate=learning_rate, boundaries=boundaries, values=values, warmup_iter=cfg.warm_up_iter, warmup_factor=cfg.warm_up_factor) optimizer = fluid.optimizer.AdamOptimizer(learning_rate=lr, regularization=fluid.regularizer.L2Decay(cfg.weight_decay)) optimizer.minimize(loss) fetch_list = fetch_list + [lr] for var in fetch_list: var.persistable = True gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_optimizer_ops = False build_strategy.fuse_elewise_add_act_ops = True build_strategy.sync_batch_norm=True exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_iteration_per_drop_scope = 1 exe.run(startup_prog) if cfg.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist) compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) dataset = icdar.ICDAR2015Dataset() data_generator = dataset.get_batch(num_workers=24, input_size=512, batch_size=14) def train_loop(): start_time = time.time() prev_start_time = start_time start = start_time train_stats = TrainingStats(cfg.log_window, keys) #for iter_id, data in enumerate(next(data_generator)): for iter_id in range(100000): data = next(data_generator) #for data in data_list: prev_start_time = start_time start_time = time.time() outs = exe.run(compiled_train_prog, fetch_list=[v.name for v in fetch_list], feed={"input_images": data[0], "input_score_maps": data[2], "input_geo_maps": data[3], "input_training_masks": data[4]}) stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])} train_stats.update(stats) logs = train_stats.log() strs = '{}, batch: {}, lr: {:.5f}, {}, time: {:.3f}'.format( now_time(), iter_id, np.mean(outs[-1]), logs, start_time - prev_start_time) if iter_id % 10 == 0: print(strs) sys.stdout.flush() if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0: save_model(exe, "model_iter{}".format(iter_id), train_prog) if (iter_id + 1) == cfg.max_iter: break end_time = time.time() total_time = end_time - start_time last_loss = np.array(outs[0]).mean() train_loop()
def test_case(self): self._test_case(fluid.CPUPlace()) if fluid.is_compiled_with_cuda(): self._test_case(fluid.CUDAPlace(0))
'models/lm/zh_giga.no_cna_cmn.prune01244.klm', "Filepath for language model.") add_arg('decoding_method', str, 'ctc_beam_search', # 'ctc_greedy', "Decoding method. Options: ctc_beam_search, ctc_greedy", choices=['ctc_beam_search', 'ctc_greedy']) add_arg('specgram_type', str, 'linear', "Audio feature type. Options: linear, mfcc.", choices=['linear', 'mfcc']) args = parser.parse_args() if args.use_gpu: place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() data_generator = DataGenerator( vocab_filepath=args.vocab_path, mean_std_filepath=args.mean_std_path, augmentation_config='{}', specgram_type=args.specgram_type, keep_transcription_text=True, place=place, is_training=False) # prepare ASR model ds2_model = DeepSpeech2Model( vocab_size=data_generator.vocab_size, num_conv_layers=args.num_conv_layers, num_rnn_layers=args.num_rnn_layers,
def main(): batch_size = 20 if args.enable_ce: train_reader = paddle.batch( paddle.dataset.uci_housing.train(), batch_size=batch_size) test_reader = paddle.batch( paddle.dataset.uci_housing.test(), batch_size=batch_size) else: train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.uci_housing.train(), buf_size=500), batch_size=batch_size) test_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.uci_housing.test(), buf_size=500), batch_size=batch_size) # feature vector of length 13 x = fluid.data(name='x', shape=[None, 13], dtype='float32') y = fluid.data(name='y', shape=[None, 1], dtype='float32') main_program = fluid.default_main_program() startup_program = fluid.default_startup_program() if args.enable_ce: main_program.random_seed = 90 startup_program.random_seed = 90 y_predict = fluid.layers.fc(input=x, size=1, act=None) cost = fluid.layers.square_error_cost(input=y_predict, label=y) avg_loss = fluid.layers.mean(cost) test_program = main_program.clone(for_test=True) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) sgd_optimizer.minimize(avg_loss) # can use CPU or GPU use_cuda = args.use_gpu place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) # Specify the directory to save the parameters params_dirname = "fit_a_line.inference.model" num_epochs = args.num_epochs # main train loop. feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) exe.run(startup_program) train_prompt = "Train cost" test_prompt = "Test cost" step = 0 exe_test = fluid.Executor(place) for pass_id in range(num_epochs): for data_train in train_reader(): avg_loss_value, = exe.run( main_program, feed=feeder.feed(data_train), fetch_list=[avg_loss]) if step % 10 == 0: # record a train cost every 10 batches print("%s, Step %d, Cost %f" % (train_prompt, step, avg_loss_value[0])) if step % 100 == 0: # record a test cost every 100 batches test_metics = train_test( executor=exe_test, program=test_program, reader=test_reader, fetch_list=[avg_loss], feeder=feeder) print("%s, Step %d, Cost %f" % (test_prompt, step, test_metics[0])) # If the accuracy is good enough, we can stop the training. if test_metics[0] < 10.0: break step += 1 if math.isnan(float(avg_loss_value[0])): sys.exit("got NaN loss, training failed.") if params_dirname is not None: # We can save the trained parameters for the inferences later fluid.io.save_inference_model(params_dirname, ['x'], [y_predict], exe) if args.enable_ce and pass_id == args.num_epochs - 1: print("kpis\ttrain_cost\t%f" % avg_loss_value[0]) print("kpis\ttest_cost\t%f" % test_metics[0]) infer_exe = fluid.Executor(place) inference_scope = fluid.core.Scope() # infer with fluid.scope_guard(inference_scope): [inference_program, feed_target_names, fetch_targets ] = fluid.io.load_inference_model(params_dirname, infer_exe) batch_size = 10 infer_reader = paddle.batch( paddle.dataset.uci_housing.test(), batch_size=batch_size) infer_data = next(infer_reader()) infer_feat = numpy.array( [data[0] for data in infer_data]).astype("float32") infer_label = numpy.array( [data[1] for data in infer_data]).astype("float32") assert feed_target_names[0] == 'x' results = infer_exe.run( inference_program, feed={feed_target_names[0]: numpy.array(infer_feat)}, fetch_list=fetch_targets) print("infer results: (House Price)") for idx, val in enumerate(results[0]): print("%d: %.2f" % (idx, val)) print("\nground truth:") for idx, val in enumerate(infer_label): print("%d: %.2f" % (idx, val)) save_result(results[0], infer_label)
def run_gpu_fleet_api_trainer(self, args): assert args.update_method == "nccl2" self.lr = args.lr exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = 1 dist_strategy = DistributedStrategy() dist_strategy.exec_strategy = exec_strategy dist_strategy.fuse_memory_size = 1 # MB dist_strategy.fuse_laryer_size = 1 if args.use_local_sgd: dist_strategy.use_local_sgd = True if args.ut4grad_allreduce: dist_strategy._ut4grad_allreduce = True role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) print_to_err("gpu_fleet", "fleet.node_num:") # "fleet.node_id:", fleet.node_id(), # "fleet.trainer_num:", fleet.worker_num()) test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \ self.get_model(batch_size=args.batch_size, dist_strategy=dist_strategy) trainer_prog = fleet._origin_program dist_prog = fleet.main_program device_id = int(os.getenv("FLAGS_selected_gpus", "0")) place = fluid.CUDAPlace(device_id) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) eprint(type(self).__name__, "run worker startup program done.") feed_var_list = [ var for var in trainer_prog.global_block().vars.values() if var.is_data ] feeder = fluid.DataFeeder(feed_var_list, place) reader_generator = train_reader() def get_data(): origin_batch = next(reader_generator) if args.update_method != "local" and args.use_reader_alloc: new_batch = [] for offset, item in enumerate(origin_batch): if offset % 2 == args.trainer_id: new_batch.append(item) return new_batch else: return origin_batch print_to_err(type(self).__name__, "begin to train on trainer") out_losses = [] for i in six.moves.xrange(RUN_STEP): loss, = exe.run(dist_prog, fetch_list=[avg_cost.name], feed=feeder.feed(get_data())) out_losses.append(loss[0]) print_to_err(type(self).__name__, "run step %d finished" % i) print_to_err(type(self).__name__, "trainer run finished") if six.PY2: print(pickle.dumps(out_losses)) else: sys.stdout.buffer.write(pickle.dumps(out_losses))
def get_places(): places = [fluid.CPUPlace()] if fluid.core.is_compiled_with_cuda(): places.append(fluid.CUDAPlace(0)) return places
def run_trainer(self, args): seed = 90 device_id = int(os.getenv("FLAGS_selected_gpus", "0")) place = fluid.CUDAPlace(device_id) def _get_data(batch): if args.update_method != "local": new_batch = [] for offset, item in enumerate(batch): if offset % 2 == args.trainer_id: new_batch.append(item) return new_batch else: return batch with fluid.dygraph.guard(place): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed np.random.seed(seed) import random random.seed = seed model, train_reader, opt = self.get_model() nranks = len(args.endpoints.split(",")) if args.endpoints else 1 if args.update_method == "nccl2": strategy = dygraph.parallel.ParallelStrategy() strategy.nranks = nranks strategy.local_rank = args.trainer_id strategy.trainer_endpoints = args.endpoints.split(",") strategy.current_endpoint = args.current_endpoint print_to_err( type(self).__name__, "begin to prepare context in dygraph with nccl2") dygraph.parallel.prepare_context(strategy) model = dygraph.parallel.DataParallel(model, strategy) print_to_err(type(self).__name__, "model built in dygraph") out_losses = [] print_to_err(type(self).__name__, "begin to run dygraph training") for step_id, data in enumerate(train_reader()): data = _get_data(data) if step_id == RUN_STEP: break loss = self.run_one_loop(model, opt, data) if step_id % 10 == 0: print_to_err( type(self).__name__, "loss at step %d: %f" % (step_id, loss.numpy())) out_losses.append(loss.numpy()) # FIXME(Yancey1989): scale the loss inplace if args.update_method == "nccl2": loss = model.scale_loss(loss) loss.backward() if args.update_method == "nccl2": model.apply_collective_grads() opt.minimize(loss) model.clear_gradients() print_to_out(out_losses)
def __next__(self): if self._first_batch is not None: batch = self._first_batch self._first_batch = None return batch if self._counter >= self._size and self._size > 0: if self._auto_reset: self.reset() raise StopIteration # Gather outputs outputs = [] for p in self._pipes: with p._check_api_type_scope(types.PipelineAPIType.ITERATOR): outputs.append(p.share_outputs()) for i in range(self._num_gpus): dev_id = self._pipes[i].device_id # Initialize dict for all output categories category_outputs = dict() # Segregate outputs into categories for j, out in enumerate(outputs[i]): category_outputs[self.output_map[j]] = out pd_gpu_place = fluid.CUDAPlace(dev_id) pd_cpu_place = fluid.CPUPlace() category_pd_type = dict() category_place = dict() category_tensors = dict() category_shapes = dict() category_lengths = dict() for cat, out in category_outputs.items(): lod = self.normalized_map[cat] assert out.is_dense_tensor() or lod > 0, \ "non-dense tensor lists must have LoD > 0" if lod > 0: # +1 for batch dim seq_len = recursive_length(out, lod + 1)[1:] shape = out.at(0).shape if callable(shape): shape = shape() shape = [sum(seq_len[-1])] + list(shape[lod:]) category_shapes[cat] = shape category_lengths[cat] = seq_len else: out = out.as_tensor() category_shapes[cat] = out.shape() category_lengths[cat] = [] category_tensors[cat] = out category_pd_type[cat] = to_paddle_type(out) if isinstance(out, (TensorGPU, TensorListGPU)): category_place[cat] = pd_gpu_place else: category_place[cat] = pd_cpu_place if self._data_batches[i] is None: pd_tensors = {} for cat, lod in self.normalized_map.items(): lod_tensor = fluid.core.LoDTensor() lod_tensor._set_dims(category_shapes[cat]) pd_tensors[cat] = lod_tensor self._data_batches[i] = pd_tensors else: pd_tensors = self._data_batches[i] # Copy data from DALI Tensors to LoDTensors for cat, tensor in category_tensors.items(): if hasattr(tensor, 'shape'): # could be tensor list assert self._dynamic_shape or \ tensor.shape() == pd_tensors[cat].shape(), \ ("Shapes do not match: DALI tensor has size {0}, " "but LoDTensor has size {1}".format( tensor.shape(), pd_tensors[cat].shape())) lod_tensor = pd_tensors[cat] lod_tensor._set_dims(category_shapes[cat]) seq_len = category_lengths[cat] lod_tensor.set_recursive_sequence_lengths(seq_len) ptr = lod_tensor._mutable_data(category_place[cat], category_pd_type[cat]) feed_ndarray(tensor, ptr) for p in self._pipes: with p._check_api_type_scope(types.PipelineAPIType.ITERATOR): p.release_outputs() p.schedule_run() self._counter += self._num_gpus * self.batch_size if (not self._fill_last_batch) and (self._counter > self._size) and self._size > 0: # First calculate how much data is required to # return exactly self._size entries. diff = self._num_gpus * self.batch_size - (self._counter - self._size) # Figure out how many GPUs to grab from. num_gpus_to_grab = int(math.ceil(diff / self.batch_size)) # Figure out how many results to grab from the last GPU # (as a fractional GPU batch may be required to bring us # right up to self._size). mod_diff = diff % self.batch_size data_from_last_gpu = mod_diff if mod_diff else self.batch_size # Grab the relevant data. # 1) Grab everything from the relevant GPUs. # 2) Grab the right data from the last GPU. # 3) Append data together correctly and return. output = self._data_batches[0:num_gpus_to_grab] output[-1] = output[-1].copy() for cat in self.output_map: lod_tensor = output[-1][cat] output[-1][cat] = lod_tensor_clip(lod_tensor, data_from_last_gpu) return output return self._data_batches
import os import paddle import paddle.fluid as fluid from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear import numpy as np import Data_Load_Module import Network #调用加载数据的函数 train_loader = Data_Load_Module.load_data('train') #在使用GPU机器时,可以将use_gpu变量设置成True use_gpu = True place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() with fluid.dygraph.guard(place): model = Network.MNIST() model.train() #四种优化算法的设置方案,可以逐一尝试效果 #optimizer = fluid.optimizer.SGDOptimizer(learning_rate=0.01, parameter_list=model.parameters()) #optimizer = fluid.optimizer.MomentumOptimizer(learning_rate=0.01, momentum=0.9, parameter_list=model.parameters()) #optimizer = fluid.optimizer.AdagradOptimizer(learning_rate=0.01, parameter_list=model.parameters()) optimizer = fluid.optimizer.AdamOptimizer(learning_rate=0.01, parameter_list=model.parameters()) EPOCH_NUM = 5 for epoch_id in range(EPOCH_NUM): for batch_id, data in enumerate(train_loader()): #准备数据
def main(): batch_size = 32 num_epochs = args.num_epochs net_type = args.net # 训练ckpt和inf模型路径 param_base_dir = os.path.join(code_base_dir, 'params') param_base_dir = os.path.join(param_base_dir, net_type) infer_param_path = os.path.join(param_base_dir, "inf") ckpt_param_path = os.path.join(param_base_dir, "ckpt") print(infer_param_path) print(ckpt_param_path) train_reader = paddle.batch( paddle.reader.shuffle(data_reader(), int(batch_size * 1.5)), batch_size) test_reader = paddle.batch( paddle.reader.shuffle(data_reader(8, 10), int(batch_size * 1.5)), batch_size) train_program = fluid.Program() train_init = fluid.Program() with fluid.program_guard(train_program, train_init): image = fluid.layers.data(name='image', shape=[3, 512, 512], dtype='float32') label = fluid.layers.data(name='label', shape=[1, 512, 512], dtype='int32') train_loader = fluid.io.DataLoader.from_generator( feed_list=[image, label], capacity=batch_size) test_loader = fluid.io.DataLoader.from_generator( feed_list=[image, label], capacity=batch_size) if net_type == "unet_simple": prediction = unet_simple(image, 2, [512, 512]) elif net_type == "unet_base": prediction = unet_base(image, 2, [512, 512]) elif net_type == "deeplabv3": prediction = deeplabv3p(image, 2) else: print("错误的网络类型") sys.exit(0) avg_loss = create_loss(prediction, label, 2) miou = mean_iou(prediction, label, 2) # decay=paddle.fluid.regularizer.L2Decay(0.1) # optimizer = fluid.optimizer.SGD(learning_rate=0.0005,regularization=decay) # optimizer = fluid.optimizer.DecayedAdagradOptimizer(learning_rate=0.02,regularization=decay) # optimizer = fluid.optimizer.RMSProp(learning_rate=0.1,momentum=0.8,centered=True, regularization=decay) optimizer = fluid.optimizer.AdamOptimizer(learning_rate=0.003) # optimizer = fluid.optimizer.MomentumOptimizer(learning_rate=0.006, momentum=0.8,regularization=decay) optimizer.minimize(avg_loss) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(train_init) # fluid.io.load_persistables(exe, ckpt_param_path, train_init) exe_test = fluid.Executor(place) test_program = train_program.clone(for_test=True) # train_program=fluid.CompiledProgram(train_program).with_data_parallel(loss_name=avg_loss.name) test_program = fluid.CompiledProgram(test_program).with_data_parallel( loss_name=avg_loss.name) train_loader.set_sample_list_generator(train_reader, places=place) test_loader.set_sample_list_generator(test_reader, places=place) feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) step = 1 best_miou = 0 train_prompt = "Train_miou" test_prompt = "Test_miou" plot_prompt = Ploter(train_prompt, test_prompt) for pass_id in range(num_epochs): for data_train in train_loader(): avg_loss_value, miou_value = exe.run(train_program, feed=data_train, fetch_list=[avg_loss, miou]) if step % 10 == 0: print("\t\tTrain pass %d, Step %d, Cost %f, Miou %f" % (pass_id, step, avg_loss_value[0], miou_value[0])) # if step % 10 ==0: # plot_prompt.append(train_prompt, step, miou_value[0]) # plot_prompt.plot() eval_miou = 0 if step % 100 == 0: auc_metric = fluid.metrics.Auc("AUC") test_losses = [] test_mious = [] for _, test_data in enumerate(test_loader()): # print(test_data) # input("pause") _, test_loss, test_miou = exe_test.run( test_program, feed=test_data, fetch_list=[prediction, avg_loss, miou]) test_losses.append(test_loss[0]) test_mious.append(test_miou[0]) eval_miou = np.average(np.array(test_mious)) # plot_prompt.append(test_prompt, step, eval_miou) # plot_prompt.plot() print("Test loss: %f ,miou: %f" % (np.average(np.array(test_losses)), eval_miou)) if math.isnan(float(avg_loss_value[0])): sys.exit("got NaN loss, training failed.") if step % 100 == 0 and param_base_dir is not None and eval_miou > best_miou: best_miou = eval_miou print("Saving params of step: %d" % step) fluid.io.save_inference_model(infer_param_path, feeded_var_names=['image'], target_vars=[prediction], executor=exe, main_program=train_program) fluid.io.save_persistables(exe, ckpt_param_path, train_program) step += 1 print(best_miou)
def get_all_places(self): p = [fluid.CPUPlace()] if fluid.is_compiled_with_cuda(): p.append(fluid.CUDAPlace(0)) return p
def main(use_cuda): """ Advbox demo which demonstrate how to use advbox. """ TOTAL_NUM = 500 IMG_NAME = 'img' LABEL_NAME = 'label' img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32') # gradient should flow img.stop_gradient = False label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64') logits = mnist_cnn_model(img) cost = fluid.layers.cross_entropy(input=logits, label=label) avg_cost = fluid.layers.mean(x=cost) #根据配置选择使用CPU资源还是GPU资源 place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) BATCH_SIZE = 1 test_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.mnist.test(), buf_size=128 * 10), batch_size=BATCH_SIZE) fluid.io.load_params(exe, "./mnist/", main_program=fluid.default_main_program()) # advbox demo m = PaddleModel(fluid.default_main_program(), IMG_NAME, LABEL_NAME, logits.name, avg_cost.name, (-1, 1), channel_axis=1) #使用静态FGSM epsilon不可变 attack = FGSM_static(m) attack_config = {"epsilon": 0.01} # use test data to generate adversarial examples total_count = 0 fooling_count = 0 for data in test_reader(): total_count += 1 adversary = Adversary(data[0][0], data[0][1]) # FGSM non-targeted attack adversary = attack(adversary, **attack_config) if adversary.is_successful(): fooling_count += 1 #print( # 'attack success, original_label=%d, adversarial_label=%d, count=%d' # % (data[0][1], adversary.adversarial_label, total_count)) else: logger.info('attack failed, original_label=%d, count=%d' % (data[0][1], total_count)) if total_count >= TOTAL_NUM: print( "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" % (fooling_count, total_count, float(fooling_count) / total_count)) break print("fgsm attack done without any defence") #使用FeatureFqueezingDefence # advbox FeatureFqueezingDefence demo n = PaddleLabelSmoothingDefenceModel(fluid.default_main_program(), IMG_NAME, LABEL_NAME, logits.name, avg_cost.name, (-1, 1), channel_axis=1, preprocess=None, smoothing=0.1) attack_new = FGSM_static(n) attack_config = {"epsilon": 0.01} total_count = 0 fooling_count = 0 for data in test_reader(): total_count += 1 #不设置y 会自动获取 adversary = Adversary(data[0][0], None) # FGSM non-targeted attack adversary = attack_new(adversary, **attack_config) if adversary.is_successful(): fooling_count += 1 logger.info( 'attack success, original_label=%d, adversarial_label=%d, count=%d' % (data[0][1], adversary.adversarial_label, total_count)) else: logger.info('attack failed, original_label=%d, count=%d' % (data[0][1], total_count)) if total_count >= TOTAL_NUM: print( "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" % (fooling_count, total_count, float(fooling_count) / total_count)) break print("fgsm attack done with LabelSmoothingDefence")
def train(args): config = parse_config(args.config) train_config = merge_configs(config, 'train', vars(args)) valid_config = merge_configs(config, 'valid', vars(args)) print_configs(train_config, 'Train') use_data_parallel = False trainer_count = fluid.dygraph.parallel.Env().nranks place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \ if use_data_parallel else fluid.CUDAPlace(0) with fluid.dygraph.guard(place): if use_data_parallel: strategy = fluid.dygraph.parallel.prepare_context() video_model = TSM_ResNet("TSM", train_config) optimizer = create_optimizer(train_config.TRAIN, video_model.parameters()) if use_data_parallel: video_model = fluid.dygraph.parallel.DataParallel( video_model, strategy) bs_denominator = 1 if args.use_gpu: # check number of GPUs gpus = os.getenv("CUDA_VISIBLE_DEVICES", "") if gpus == "": pass else: gpus = gpus.split(",") num_gpus = len(gpus) assert num_gpus == train_config.TRAIN.num_gpus, \ "num_gpus({}) set by CUDA_VISIBLE_DEVICES" \ "shoud be the same as that" \ "set in {}({})".format( num_gpus, args.config, train_config.TRAIN.num_gpus) bs_denominator = train_config.TRAIN.num_gpus train_config.TRAIN.batch_size = int(train_config.TRAIN.batch_size / bs_denominator) train_reader = KineticsReader(mode="train", cfg=train_config) train_reader = train_reader.create_reader() if use_data_parallel: train_reader = fluid.contrib.reader.distributed_batch_reader( train_reader) for epoch in range(train_config.TRAIN.epoch): video_model.train() total_loss = 0.0 total_acc1 = 0.0 total_acc5 = 0.0 total_sample = 0 for batch_id, data in enumerate(train_reader()): x_data = np.array([item[0] for item in data]) y_data = np.array([item[1] for item in data]).reshape([-1, 1]) imgs = to_variable(x_data) labels = to_variable(y_data) labels.stop_gradient = True outputs = video_model(imgs) loss = fluid.layers.cross_entropy(input=outputs, label=labels, ignore_index=-1) avg_loss = fluid.layers.mean(loss) acc_top1 = fluid.layers.accuracy(input=outputs, label=labels, k=1) acc_top5 = fluid.layers.accuracy(input=outputs, label=labels, k=5) if use_data_parallel: avg_loss = video_model.scale_loss(avg_loss) avg_loss.backward() video_model.apply_collective_grads() else: avg_loss.backward() optimizer.minimize(avg_loss) video_model.clear_gradients() total_loss += avg_loss.numpy()[0] total_acc1 += acc_top1.numpy()[0] total_acc5 += acc_top5.numpy()[0] total_sample += 1 print('TRAIN Epoch {}, iter {}, loss = {}, acc1 {}, acc5 {}'. format(epoch, batch_id, avg_loss.numpy()[0], acc_top1.numpy()[0], acc_top5.numpy()[0])) print( 'TRAIN End, Epoch {}, avg_loss= {}, avg_acc1= {}, avg_acc5= {}' .format(epoch, total_loss / total_sample, total_acc1 / total_sample, total_acc5 / total_sample)) video_model.eval() val(epoch, video_model, valid_config, args) if fluid.dygraph.parallel.Env().local_rank == 0: fluid.dygraph.save_dygraph(video_model.state_dict(), "final") logger.info('[TRAIN] training finished')
def test_grad(self): places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): places.append(fluid.CUDAPlace(0)) for p in places: self.func(p)
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() reader = ClassifyReader(vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=False, is_inference=True) predict_prog = fluid.Program() predict_startup = fluid.Program() with fluid.program_guard(predict_prog, predict_startup): with fluid.unique_name.guard(): predict_pyreader, probs, feed_target_names = create_model( args, pyreader_name='predict_reader', ernie_config=ernie_config, is_classify=True, is_prediction=True, ernie_version=args.ernie_version) predict_prog = predict_prog.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(predict_startup) if args.init_checkpoint: init_pretraining_params(exe, args.init_checkpoint, predict_prog) else: raise ValueError( "args 'init_checkpoint' should be set for prediction!") #保存模型 assert args.save_inference_model_path, "args save_inference_model_path should be set for prediction" _, ckpt_dir = os.path.split(args.init_checkpoint.rstrip('/')) dir_name = ckpt_dir + '_inference_model' model_path = os.path.join(args.save_inference_model_path, dir_name) print("save inference model to %s" % model_path) fluid.io.save_inference_model(model_path, feed_target_names, [probs], exe, main_program=predict_prog) #载入模型 print("load inference model from %s" % model_path) infer_program, feed_target_names, probs = fluid.io.load_inference_model( model_path, exe) src_ids = feed_target_names[0] sent_ids = feed_target_names[1] pos_ids = feed_target_names[2] input_mask = feed_target_names[3] if args.ernie_version == "2.0": task_ids = feed_target_names[4] #计算相似度 predict_data_generator = reader.data_generator(input_file=args.predict_set, batch_size=args.batch_size, epoch=1, shuffle=False) print("-------------- prediction results --------------") np.set_printoptions(precision=4, suppress=True) index = 0 for sample in predict_data_generator(): src_ids_data = sample[0] sent_ids_data = sample[1] pos_ids_data = sample[2] task_ids_data = sample[3] input_mask_data = sample[4] if args.ernie_version == "1.0": output = exe.run(infer_program, feed={ src_ids: src_ids_data, sent_ids: sent_ids_data, pos_ids: pos_ids_data, input_mask: input_mask_data }, fetch_list=probs) elif args.ernie_version == "2.0": output = exe.run(infer_program, feed={ src_ids: src_ids_data, sent_ids: sent_ids_data, pos_ids: pos_ids_data, task_ids: task_ids_data, input_mask: input_mask_data }, fetch_list=probs) else: raise ValueError("ernie_version must be 1.0 or 2.0") for single_result in output[0]: print("example_index:{}\t{}".format(index, single_result)) index += 1
def train(use_cuda): classdim = 102 data_shape = [3, 224, 224] images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') net = resnet_cifar10(images, 50) predict = fluid.layers.fc(input=net, size=classdim, act='softmax') cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(cost) acc = fluid.layers.accuracy(input=predict, label=label) # Test program test_program = fluid.default_main_program().clone(for_test=True) optimizer = fluid.optimizer.Adam(learning_rate=0.001) optimizer.minimize(avg_cost) BATCH_SIZE = 20 PASS_NUM = 50 train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.flowers.train(), buf_size=128 * 10), batch_size=BATCH_SIZE) test_reader = paddle.batch(paddle.dataset.flowers.test(), batch_size=BATCH_SIZE) num_trainers = 1 trainer_id = 0 # ========================= for nccl2 dist train ================================= if os.getenv("PADDLE_INIT_TRAINER_ID", None) != None: # append gen_nccl_id at the end of startup program trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) print("using trainer_id: ", trainer_id) port = os.getenv("PADDLE_INIT_PORT") worker_ips = os.getenv("PADDLE_WORKERS") worker_endpoints = [] for ip in worker_ips.split(","): print(ip) worker_endpoints.append(':'.join([ip, port])) num_trainers = len(worker_endpoints) current_endpoint = os.getenv("POD_IP") + ":" + port worker_endpoints.remove(current_endpoint) nccl_id_var = fluid.default_startup_program().global_block( ).create_var(name="NCCLID", persistable=True, type=fluid.core.VarDesc.VarType.RAW) fluid.default_startup_program().global_block().append_op( type="gen_nccl_id", inputs={}, outputs={"NCCLID": nccl_id_var}, attrs={ "endpoint": current_endpoint, "endpoint_list": worker_endpoints, "trainer_id": trainer_id }) # ========================= for nccl2 dist train ================================= place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() feeder = fluid.DataFeeder(place=place, feed_list=[images, label]) def test(pass_id, exe): acc_list = [] avg_loss_list = [] for tid, test_data in enumerate(test_reader()): loss_t, acc_t = exe.run(program=test_program, feed=feeder.feed(test_data), fetch_list=[avg_cost, acc]) if math.isnan(float(loss_t)): sys.exit("got NaN loss, training failed.") acc_list.append(float(acc_t)) avg_loss_list.append(float(loss_t)) acc_value = numpy.array(acc_list).mean() avg_loss_value = numpy.array(avg_loss_list).mean() print('PassID {0:1}, Test Loss {1:2.2}, Acc {2:2.2}'.format( pass_id, float(avg_loss_value), float(acc_value))) def train_loop(main_program): exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) for pass_id in range(PASS_NUM): pass_start = time.time() num_samples = 0 for batch_id, data in enumerate(train_reader()): exe.run(main_program, feed=feeder.feed(data)) num_samples += len(data) pass_spent = time.time() - pass_start print("Pass id %d, train speed %f" % (pass_id, num_samples / pass_spent)) test(pass_id, exe) def train_loop_parallel(main_program): startup_exe = fluid.Executor(place) startup_exe.run(fluid.default_startup_program()) exe = fluid.ParallelExecutor(True, avg_cost.name, num_threads=1, allow_op_delay=False, num_trainers=num_trainers, trainer_id=trainer_id) feeder = fluid.DataFeeder(place=place, feed_list=[images, label]) for pass_id in range(PASS_NUM): num_samples = 0 start_time = time.time() for batch_id, data in enumerate(train_reader()): loss, = exe.run([avg_cost.name], feed=feeder.feed(data)) num_samples += len(data) if batch_id % 1 == 0: print("Pass %d, batch %d, loss %s" % (pass_id, batch_id, np.array(loss))) pass_elapsed = time.time() - start_time print("Pass = %d, Training performance = %f imgs/s\n" % (pass_id, num_samples / pass_elapsed)) test(pass_id, startup_exe) train_loop_parallel(fluid.default_main_program())
def setUp(self): self.input = numpy.ones(5).astype("int32") self.place = fluid.CUDAPlace( 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() self.init_test_func()
def main(use_cuda): """ Advbox demo which demonstrate how to use advbox. """ class_dim = 1000 IMG_NAME = 'img' LABEL_NAME = 'label' #模型路径 pretrained_model = "models/resnet_50/115" with_memory_optimization = 1 image_shape = [3,224,224] image = fluid.layers.data(name=IMG_NAME, shape=image_shape, dtype='float32') # gradient should flow image.stop_gradient = False label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64') # model definition model = resnet.ResNet50() out = model.net(input=image, class_dim=class_dim) #test_program = fluid.default_main_program().clone(for_test=True) if with_memory_optimization: fluid.memory_optimize(fluid.default_main_program()) # 根据配置选择使用CPU资源还是GPU资源 place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) #加载模型参数 if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) # advbox demo m = PaddleModel( fluid.default_main_program(), IMG_NAME, LABEL_NAME, out.name, avg_cost.name, (-1, 1), channel_axis=3) attack = FGSM(m) attack_config = {"epsilons": 0.3} test_data = get_image("cat.jpg") # 猫对应的标签 test_label = 285 adversary = Adversary(test_data, test_label) # FGSM non-targeted attack adversary = attack(adversary, **attack_config) if adversary.is_successful(): print( 'attack success, original_label=%d, adversarial_label=%d' % (test_label, adversary.adversarial_label)) else: print('attack failed, original_label=%d, ' % (test_label)) print("fgsm attack done")