def main(cfg): #pdb.set_trace() count = 0 ReidDataset = Dataset(root=cfg.data_dir) if cfg.use_crop: ReidDataset.load_trainval('all_trainval_pids_crop.txt') else: ReidDataset.load_trainval('all_trainval_pids.txt') reader_config = { 'dataset': ReidDataset.train, 'img_dir': './dataset/aicity20_all/', 'batch_size': cfg.batch_size, 'num_instances': cfg.num_instances, 'sample_type': 'Identity', 'shuffle': True, 'drop_last': True, 'worker_num': 8, 'use_process': True, 'bufsize': 32, 'cfg': cfg, 'input_fields': ['image', 'pid', 'colorid', 'typeid'] } devices_num = fluid.core.get_cuda_device_count() print("Found {} CUDA devices.".format(devices_num)) new_reader, num_classes, num_batch_pids, num_iters_per_epoch = create_readerMT( reader_config, max_iter=cfg.max_iter * devices_num) assert cfg.batch_size % cfg.num_instances == 0 num_iters_per_epoch = int(num_iters_per_epoch / devices_num) print('per epoch contain iterations:', num_iters_per_epoch) max_epoch = int(cfg.max_iter / num_iters_per_epoch) cfg.train_class_num = num_classes print("num_pid: ", cfg.train_class_num) startup_prog = fluid.Program() train_prog = fluid.Program() train_reader, avg_cost, pid_cost, color_cost, type_cost, lr_node = build_train_program( main_prog=train_prog, startup_prog=startup_prog, cfg=cfg) avg_cost.persistable = True pid_cost.persistable = True color_cost.persistable = True type_cost.persistable = True train_fetch_vars = [avg_cost, pid_cost, color_cost, type_cost, lr_node] place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup_prog) def save_model(exe, postfix, prog): model_path = os.path.join(cfg.model_save_dir, cfg.model_arch, postfix) if os.path.isdir(model_path): shutil.rmtree(model_path) else: os.makedirs(model_path) fluid.io.save_persistables(exe, model_path, main_program=prog) if cfg.pretrain: print(cfg.pretrain) def if_exist(var): if os.path.exists(os.path.join(cfg.pretrain, var.name)): print(var.name) return True else: return False fluid.io.load_vars(exe, cfg.pretrain, main_program=train_prog, predicate=if_exist) compile_program = fluid.compiler.CompiledProgram( train_prog).with_data_parallel(loss_name=avg_cost.name) if devices_num == 1: places = fluid.cuda_places(0) else: places = fluid.cuda_places() train_reader.set_sample_list_generator(new_reader, places=places) train_reader.start() try: start_time = time.time() snapshot_loss = 0 snapshot_time = 0 for cur_iter in range(cfg.start_iter, cfg.max_iter): cur_peoch = int(cur_iter / num_iters_per_epoch) losses = exe.run(compile_program, fetch_list=[v.name for v in train_fetch_vars]) cur_loss = np.mean(np.array(losses[0])) cur_pid_loss = np.mean(np.array(losses[1])) cur_color_loss = np.mean(np.array(losses[2])) cur_type_loss = np.mean(np.array(losses[3])) cur_lr = np.mean(np.array(losses[4])) # cur_lr = np.array(fluid.global_scope().find_var('learning_rate').get_tensor()) snapshot_loss += cur_loss cur_time = time.time() - start_time start_time = time.time() snapshot_time += cur_time #pdb.set_trace() output_str = '{}/{}epoch, {}/{}iter, lr:{:.6f}, loss:{:.4f}, pid:{:.4f}, color:{:.4f}, type:{:.4f}, time:{} '.format( cur_peoch, max_epoch, cur_iter, cfg.max_iter, cur_lr, cur_loss, cur_pid_loss, cur_color_loss, cur_type_loss, cur_time) print(output_str) #fluid.io.save_inference_model(cfg.model_save_dir+'/infer_model', infer_node.name, pred_list, exe, main_program=train_prog, model_filename='model', params_filename='params') if (cur_iter + 1) % cfg.snapshot_iter == 0: save_model(exe, "model_iter{}".format(cur_iter), train_prog) print("Snapshot {} saved, average loss: {}, \ average time: {}".format( cur_iter + 1, snapshot_loss / float(cfg.snapshot_iter), snapshot_time / float(cfg.snapshot_iter))) snapshot_loss = 0 snapshot_time = 0 except fluid.core.EOFException: train_reader.reset() save_model(exe, 'model_final', train_prog) print('Done!')
def test_out_scale_acc(self): def _build_static_lenet(main, startup, is_test=False, seed=1000): with fluid.unique_name.guard(): with fluid.program_guard(main, startup): main.random_seed = seed startup.random_seed = seed img = fluid.layers.data(name='image', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') prediction = StaticLenet(img) if not is_test: loss = fluid.layers.cross_entropy(input=prediction, label=label) avg_loss = fluid.layers.mean(loss) else: avg_loss = prediction return img, label, avg_loss reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=32, drop_last=True) weight_quantize_type = 'abs_max' activation_quantize_type = 'moving_average_abs_max' param_init_map = {} seed = 1000 lr = 0.001 dynamic_out_scale_list = [] static_out_scale_list = [] # imperative train _logger.info( "--------------------------dynamic graph qat--------------------------" ) imperative_out_scale = ImperativeQuantAware( weight_quantize_type=weight_quantize_type, activation_quantize_type=activation_quantize_type) with fluid.dygraph.guard(): np.random.seed(seed) fluid.default_main_program().random_seed = seed fluid.default_startup_program().random_seed = seed lenet = ImperativeLenet() fixed_state = {} for name, param in lenet.named_parameters(): p_shape = param.numpy().shape p_value = param.numpy() if name.endswith("bias"): value = np.zeros_like(p_value).astype('float32') else: value = np.random.normal(loc=0.0, scale=0.01, size=np.product(p_shape)).reshape( p_shape).astype('float32') fixed_state[name] = value param_init_map[param.name] = value lenet.set_dict(fixed_state) imperative_out_scale.quantize(lenet) adam = AdamOptimizer(learning_rate=lr, parameter_list=lenet.parameters()) dynamic_loss_rec = [] lenet.train() for batch_id, data in enumerate(reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data ]).astype('int64').reshape(-1, 1) img = fluid.dygraph.to_variable(x_data) label = fluid.dygraph.to_variable(y_data) out = lenet(img) loss = fluid.layers.cross_entropy(out, label) avg_loss = fluid.layers.mean(loss) avg_loss.backward() adam.minimize(avg_loss) lenet.clear_gradients() dynamic_loss_rec.append(avg_loss.numpy()[0]) if batch_id % 100 == 0: _logger.info('{}: {}'.format('loss', avg_loss.numpy())) lenet.eval() param_save_path = "test_save_quantized_model/lenet.pdparams" save_dict = lenet.state_dict() paddle.save(save_dict, param_save_path) path = "./dynamic_outscale_infer_model/lenet" dynamic_save_dir = "./dynamic_outscale_infer_model" imperative_out_scale.save_quantized_model( layer=lenet, path=path, input_spec=[ paddle.static.InputSpec(shape=[None, 1, 28, 28], dtype='float32') ]) _logger.info( "--------------------------static graph qat--------------------------" ) static_loss_rec = [] if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = fluid.Executor(place) main = fluid.Program() infer = fluid.Program() startup = fluid.Program() static_img, static_label, static_loss = _build_static_lenet( main, startup, False, seed) infer_img, _, infer_pre = _build_static_lenet(infer, startup, True, seed) with fluid.unique_name.guard(): with fluid.program_guard(main, startup): opt = AdamOptimizer(learning_rate=lr) opt.minimize(static_loss) scope = core.Scope() with fluid.scope_guard(scope): exe.run(startup) for param in main.all_parameters(): if "batch_norm" in param.name: param_name = param.name.replace("norm", "norm2d") elif 'prelu' in param.name: param_name = param.name.replace("prelu", 'p_re_lu') else: param_name = param.name param_tensor = scope.var(param.name).get_tensor() param_tensor.set(param_init_map[param_name], place) main_graph = IrGraph(core.Graph(main.desc), for_test=False) infer_graph = IrGraph(core.Graph(infer.desc), for_test=True) transform_pass = QuantizationTransformPass( scope=scope, place=place, activation_quantize_type=activation_quantize_type, weight_quantize_type=weight_quantize_type, quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul']) transform_pass.apply(main_graph) transform_pass.apply(infer_graph) outscale_pass = OutScaleForTrainingPass(scope=scope, place=place) outscale_pass.apply(main_graph) build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_reduce_ops = False binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel( loss_name=static_loss.name, build_strategy=build_strategy) feeder = fluid.DataFeeder(feed_list=[static_img, static_label], place=place) with fluid.scope_guard(scope): for batch_id, data in enumerate(reader()): loss_v, = exe.run(binary, feed=feeder.feed(data), fetch_list=[static_loss]) static_loss_rec.append(loss_v[0]) if batch_id % 100 == 0: _logger.info('{}: {}'.format('loss', loss_v)) scale_inference_pass = OutScaleForInferencePass(scope=scope) scale_inference_pass.apply(infer_graph) save_program = infer_graph.to_program() static_save_dir = "./static_outscale_infer_model" with fluid.scope_guard(scope): fluid.io.save_inference_model( dirname=static_save_dir, feeded_var_names=[infer_img.name], target_vars=[infer_pre], executor=exe, main_program=save_program, model_filename="lenet" + INFER_MODEL_SUFFIX, params_filename="lenet" + INFER_PARAMS_SUFFIX) rtol = 1e-05 atol = 1e-08 for i, (loss_d, loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)): diff = np.abs(loss_d - loss_s) if diff > (atol + rtol * np.abs(loss_s)): _logger.info( "diff({}) at {}, dynamic loss = {}, static loss = {}". format(diff, i, loss_d, loss_s)) break self.assertTrue(np.allclose(np.array(dynamic_loss_rec), np.array(static_loss_rec), rtol=rtol, atol=atol, equal_nan=True), msg='Failed to do the imperative qat.') # load dynamic model [dynamic_inference_program, feed_target_names, fetch_targets] = (fluid.io.load_inference_model( dirname=dynamic_save_dir, executor=exe, model_filename="lenet" + INFER_MODEL_SUFFIX, params_filename="lenet" + INFER_PARAMS_SUFFIX)) # load static model [static_inference_program, feed_target_names, fetch_targets] = (fluid.io.load_inference_model( dirname=static_save_dir, executor=exe, model_filename="lenet" + INFER_MODEL_SUFFIX, params_filename="lenet" + INFER_PARAMS_SUFFIX)) dynamic_ops = dynamic_inference_program.global_block().ops static_ops = static_inference_program.global_block().ops for op in dynamic_ops[:]: if op.type == "flatten2" or 'fake' in op.type: dynamic_ops.remove(op) for op in static_ops[:]: if 'fake' in op.type: static_ops.remove(op) op_count = 0 for i in range(len(dynamic_ops)): if dynamic_ops[i].has_attr("out_threshold"): op_count += 1 self.assertTrue(dynamic_ops[i].type == static_ops[i].type) if dynamic_ops[i].attr("out_threshold") != static_ops[i].attr( "out_threshold"): _logger.info(dynamic_ops[i].attr("out_threshold")) _logger.info(static_ops[i].attr("out_threshold")) self.assertTrue(dynamic_ops[i].attr("out_threshold") == static_ops[i].attr("out_threshold")) _logger.info("op_cout: {}".format(op_count)) self.assertTrue(op_count == 14)
def test_prune(self): main_program = fluid.Program() startup_program = fluid.Program() # X X O X O # conv1-->conv2-->sum1-->conv3-->conv4-->sum2-->conv5-->conv6 # | ^ | ^ # |____________| |____________________| # # X: prune output channels # O: prune input channels with fluid.unique_name.guard(): with fluid.program_guard(main_program, startup_program): input = fluid.data(name="image", shape=[None, 3, 16, 16]) label = fluid.data(name='label', shape=[None, 1], dtype='int64') conv1 = conv_bn_layer(input, 8, 3, "conv1", act='relu') conv2 = conv_bn_layer(conv1, 8, 3, "conv2", act='leaky_relu') sum1 = conv1 + conv2 conv3 = conv_bn_layer(sum1, 8, 3, "conv3", act='relu6') conv4 = conv_bn_layer(conv3, 8, 3, "conv4") sum2 = conv4 + sum1 conv5 = conv_bn_layer(sum2, 8, 3, "conv5") flag = fluid.layers.fill_constant([1], value=1, dtype='int32') rand_flag = paddle.randint(2, dtype='int32') cond = fluid.layers.less_than(x=flag, y=rand_flag) cond_output = fluid.layers.create_global_var( shape=[1], value=0.0, dtype='float32', persistable=False, name='cond_output') def cond_block1(): cond_conv = conv_bn_layer(conv5, 8, 3, "conv_cond1_1") fluid.layers.assign(input=cond_conv, output=cond_output) def cond_block2(): cond_conv1 = conv_bn_layer(conv5, 8, 3, "conv_cond2_1") cond_conv2 = conv_bn_layer(cond_conv1, 8, 3, "conv_cond2_2") fluid.layers.assign(input=cond_conv2, output=cond_output) fluid.layers.cond(cond, cond_block1, cond_block2) sum3 = fluid.layers.sum([sum2, cond_output]) conv6 = conv_bn_layer(sum3, 8, 3, "conv6") sub1 = conv6 - sum3 mult = sub1 * sub1 conv7 = conv_bn_layer(mult, 8, 3, "Depthwise_Conv7", groups=8, use_cudnn=False) floored = fluid.layers.floor(conv7) scaled = fluid.layers.scale(floored) concated = fluid.layers.concat([scaled, mult], axis=1) conv8 = conv_bn_layer(concated, 8, 3, "conv8") predict = fluid.layers.fc(input=conv8, size=10, act='softmax') cost = fluid.layers.cross_entropy(input=predict, label=label) adam_optimizer = fluid.optimizer.AdamOptimizer(0.01) avg_cost = fluid.layers.mean(cost) adam_optimizer.minimize(avg_cost) params = [] for param in main_program.all_parameters(): if 'conv' in param.name: params.append(param.name) #TODO: To support pruning convolution before fc layer. params.remove('conv8_weights') place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup_program) x = np.random.random(size=(10, 3, 16, 16)).astype('float32') label = np.random.random(size=(10, 1)).astype('int64') loss_data, = exe.run(main_program, feed={ "image": x, "label": label }, fetch_list=[cost.name]) pruner = Pruner() main_program, _, _ = pruner.prune(main_program, fluid.global_scope(), params=params, ratios=[0.5] * len(params), place=place, lazy=False, only_graph=False, param_backup=None, param_shape_backup=None) loss_data, = exe.run(main_program, feed={ "image": x, "label": label }, fetch_list=[cost.name])
def test_dataset_fleet2(self): """ Testcase for InMemoryDataset from create to run. """ with open("test_in_memory_dataset2_run2_a.txt", "w") as f: data = "1 1 2 3 3 4 5 5 5 5 1 1\n" data += "1 2 2 3 4 4 6 6 6 6 1 2\n" data += "1 3 2 3 5 4 7 7 7 7 1 3\n" f.write(data) with open("test_in_memory_dataset2_run2_b.txt", "w") as f: data = "1 4 2 3 3 4 5 5 5 5 1 4\n" data += "1 5 2 3 4 4 6 6 6 6 1 5\n" data += "1 6 2 3 5 4 7 7 7 7 1 6\n" data += "1 7 2 3 6 4 8 8 8 8 1 7\n" f.write(data) train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet with fluid.program_guard(train_program, startup_program): slots = ["slot1_ff", "slot2_ff", "slot3_ff", "slot4_ff"] slots_vars = [] for slot in slots: var = fluid.layers.data(\ name=slot, shape=[1], dtype="float32", lod_level=1) slots_vars.append(var) fake_cost = \ fluid.layers.elementwise_sub(slots_vars[0], slots_vars[-1]) fake_cost = fluid.layers.mean(fake_cost) with fluid.scope_guard(scope): place = fluid.CPUPlace() exe = fluid.Executor(place) try: fleet.init() except ImportError as e: print("warning: no mpi4py") adam = fluid.optimizer.Adam(learning_rate=0.000005) try: adam = fleet.distributed_optimizer(adam, strategy={ "fs_uri": "fs_uri_xxx", "fs_user": "******", "fs_passwd": "fs_passwd_xxx", "fs_hadoop_bin": "fs_hadoop_bin_xxx" }) adam.minimize([fake_cost], [scope]) except AttributeError as e: print("warning: no mpi") except ImportError as e: print("warning: no mpi4py") exe.run(startup_program) dataset = paddle.distributed.InMemoryDataset() dataset.init(batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars) dataset.set_filelist([ "test_in_memory_dataset2_run2_a.txt", "test_in_memory_dataset2_run2_b.txt" ]) dataset.load_into_memory() try: dataset.global_shuffle(fleet) except: print("warning: catch expected error") fleet._opt_info = None fleet._fleet_ptr = None dataset = paddle.distributed.InMemoryDataset() dataset.init(fs_name="", fs_ugi="") d = paddle.distributed.fleet.DatasetBase() try: dataset._set_feed_type("MultiSlotInMemoryDataFeed") except: print("warning: catch expected error") dataset.thread_num = 0 try: dataset._prepare_to_run() except: print("warning: catch expected error") try: dataset.preprocess_instance() except: print("warning: catch expected error") try: dataset.set_current_phase(1) except: print("warning: catch expected error") try: dataset.postprocess_instance() except: print("warning: catch expected error") dataset._set_fleet_send_batch_size(1024) try: dataset.global_shuffle() except: print("warning: catch expected error") #dataset.get_pv_data_size() dataset.get_memory_data_size() dataset.get_shuffle_data_size() dataset = paddle.distributed.QueueDataset() try: dataset.local_shuffle() except: print("warning: catch expected error") try: dataset.global_shuffle() except: print("warning: catch expected error") dataset = paddle.distributed.fleet.FileInstantDataset() try: dataset.local_shuffle() except: print("warning: catch expected error") try: dataset.global_shuffle() except: print("warning: catch expected error") os.remove("./test_in_memory_dataset2_run2_a.txt") os.remove("./test_in_memory_dataset2_run2_b.txt")
def train(cfg): startup_prog = fluid.Program() train_prog = fluid.Program() drop_last = True dataset = SegDataset( file_list=cfg.DATASET.TRAIN_FILE_LIST, mode=ModelPhase.TRAIN, shuffle=True, data_dir=cfg.DATASET.DATA_DIR) def data_generator(): if args.use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() batch_data = [] for b in data_gen: batch_data.append(b) if len(batch_data) == cfg.BATCH_SIZE: for item in batch_data: yield item[0], item[1], item[2] batch_data = [] # If use sync batch norm strategy, drop last batch if number of samples # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues if not cfg.TRAIN.SYNC_BATCH_NORM: for item in batch_data: yield item[0], item[1], item[2] # Get device environment places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() place = places[0] # Get number of GPU dev_count = len(places) print("#Device count: {}".format(dev_count)) # Make sure BATCH_SIZE can divided by GPU cards assert cfg.BATCH_SIZE % dev_count == 0, ( 'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format( cfg.BATCH_SIZE, dev_count)) # If use multi-gpu training mode, batch data will allocated to each GPU evenly batch_size_per_dev = cfg.BATCH_SIZE // dev_count print("batch_size_per_dev: {}".format(batch_size_per_dev)) py_reader, avg_loss, lr, pred, grts, masks = build_model( train_prog, startup_prog, phase=ModelPhase.TRAIN) py_reader.decorate_sample_generator( data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) exe = fluid.Executor(place) exe.run(startup_prog) exec_strategy = fluid.ExecutionStrategy() # Clear temporary variables every 100 iteration if args.use_gpu: exec_strategy.num_threads = fluid.core.get_cuda_device_count() exec_strategy.num_iteration_per_drop_scope = 100 build_strategy = fluid.BuildStrategy() if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu: if dev_count > 1: # Apply sync batch norm strategy print("Sync BatchNorm strategy is effective.") build_strategy.sync_batch_norm = True else: print("Sync BatchNorm strategy will not be effective if GPU device" " count <= 1") compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=avg_loss.name, exec_strategy=exec_strategy, build_strategy=build_strategy) # Resume training begin_epoch = cfg.SOLVER.BEGIN_EPOCH if cfg.TRAIN.RESUME_MODEL_DIR: begin_epoch = load_checkpoint(exe, train_prog) # Load pretrained model elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): print('Pretrained model dir:', cfg.TRAIN.PRETRAINED_MODEL_DIR) load_vars = [] load_fail_vars = [] def var_shape_matched(var, shape): """ Check whehter persitable variable shape is match with current network """ var_exist = os.path.exists( os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name)) if var_exist: var_shape = parse_shape_from_file( os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name)) return var_shape == shape return False for x in train_prog.list_vars(): if isinstance(x, fluid.framework.Parameter): shape = tuple(fluid.global_scope().find_var( x.name).get_tensor().shape()) if var_shape_matched(x, shape): load_vars.append(x) else: load_fail_vars.append(x) if cfg.MODEL.FP16: # If open FP16 training mode, load FP16 var separate load_fp16_vars(exe, cfg.TRAIN.PRETRAINED_MODEL_DIR, train_prog) else: fluid.io.load_vars( exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars) for var in load_vars: print("Parameter[{}] loaded sucessfully!".format(var.name)) for var in load_fail_vars: print("Parameter[{}] don't exist or shape does not match current network, skip" " to load it.".format(var.name)) print("{}/{} pretrained parameters loaded successfully!".format( len(load_vars), len(load_vars) + len(load_fail_vars))) else: print('Pretrained model dir {} not exists, training from scratch...'. format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) fetch_list = [avg_loss.name, lr.name] if args.debug: # Fetch more variable info and use streaming confusion matrix to # calculate IoU results if in debug mode np.set_printoptions( precision=4, suppress=True, linewidth=160, floatmode="fixed") fetch_list.extend([pred.name, grts.name, masks.name]) cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True) if args.use_tb: if not args.tb_log_dir: print("Please specify the log directory by --tb_log_dir.") exit(1) from tb_paddle import SummaryWriter log_writer = SummaryWriter(args.tb_log_dir) global_step = 0 all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True: all_step += 1 all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1) avg_loss = 0.0 timer = Timer() timer.start() if begin_epoch > cfg.SOLVER.NUM_EPOCHS: raise ValueError( ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format( begin_epoch, cfg.SOLVER.NUM_EPOCHS)) if args.use_mpio: print("Use multiprocess reader") else: print("Use multi-thread reader") for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): py_reader.start() while True: try: if args.debug: # Print category IoU and accuracy to check whether the # traning process is corresponed to expectation loss, lr, pred, grts, masks = exe.run( program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) cm.calculate(pred, grts, masks) avg_loss += np.mean(np.array(loss)) global_step += 1 if global_step % args.log_steps == 0: speed = args.log_steps / timer.elapsed_time() avg_loss /= args.log_steps category_acc, mean_acc = cm.accuracy() category_iou, mean_iou = cm.mean_iou() print(( "epoch={} step={} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}" ).format(epoch, global_step, lr[0], avg_loss, mean_acc, mean_iou, speed, calculate_eta(all_step - global_step, speed))) print("Category IoU:", category_iou) print("Category Acc:", category_acc) if args.use_tb: log_writer.add_scalar('Train/mean_iou', mean_iou, global_step) log_writer.add_scalar('Train/mean_acc', mean_acc, global_step) log_writer.add_scalar('Train/loss', avg_loss, global_step) log_writer.add_scalar('Train/lr', lr[0], global_step) log_writer.add_scalar('Train/step/sec', speed, global_step) sys.stdout.flush() avg_loss = 0.0 cm.zero_matrix() timer.restart() else: # If not in debug mode, avoid unnessary log and calculate loss, lr = exe.run( program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) avg_loss += np.mean(np.array(loss)) global_step += 1 if global_step % args.log_steps == 0: avg_loss /= args.log_steps speed = args.log_steps / timer.elapsed_time() print(( "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}" ).format(epoch, global_step, lr[0], avg_loss, speed, calculate_eta(all_step - global_step, speed))) if args.use_tb: log_writer.add_scalar('Train/loss', avg_loss, global_step) log_writer.add_scalar('Train/lr', lr[0], global_step) log_writer.add_scalar('Train/speed', speed, global_step) sys.stdout.flush() avg_loss = 0.0 timer.restart() except fluid.core.EOFException: py_reader.reset() break except Exception as e: print(e) if epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0: ckpt_dir = save_checkpoint(exe, train_prog, epoch) if args.do_eval: print("Evaluation start") _, mean_iou, _, mean_acc = evaluate( cfg=cfg, ckpt_dir=ckpt_dir, use_gpu=args.use_gpu, use_mpio=args.use_mpio) if args.use_tb: log_writer.add_scalar('Evaluate/mean_iou', mean_iou, global_step) log_writer.add_scalar('Evaluate/mean_acc', mean_acc, global_step) # Use Tensorboard to visualize results if args.use_tb and cfg.DATASET.VIS_FILE_LIST is not None: visualize( cfg=cfg, use_gpu=args.use_gpu, vis_file_list=cfg.DATASET.VIS_FILE_LIST, vis_dir="visual", ckpt_dir=ckpt_dir, log_writer=log_writer) # save final model save_checkpoint(exe, train_prog, 'final')
def train_async(args): # parameters from arguments logging.debug('enter train') model_name = args.model checkpoint = args.checkpoint pretrained_model = args.pretrained_model model_save_dir = args.model_save_dir if not os.path.exists(model_save_dir): os.mkdir(model_save_dir) startup_prog = fluid.Program() train_prog = fluid.Program() tmp_prog = fluid.Program() if args.enable_ce: assert args.model == "ResNet50" assert args.loss_name == "arcmargin" np.random.seed(0) startup_prog.random_seed = 1000 train_prog.random_seed = 1000 tmp_prog.random_seed = 1000 train_loader, train_cost, train_acc1, train_acc5, global_lr = build_program( is_train=True, main_prog=train_prog, startup_prog=startup_prog, args=args) test_loader, test_feas = build_program(is_train=False, main_prog=tmp_prog, startup_prog=startup_prog, args=args) test_prog = tmp_prog.clone(for_test=True) train_fetch_list = [ global_lr.name, train_cost.name, train_acc1.name, train_acc5.name ] test_fetch_list = [test_feas.name] place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) if num_trainers <= 1 and args.use_gpu: places = fluid.framework.cuda_places() else: places = place exe.run(startup_prog) if checkpoint is not None: fluid.load(program=train_prog, model_path=checkpoint, executor=exe) if pretrained_model: load_params(exe, train_prog, pretrained_model) if args.use_gpu: devicenum = get_gpu_num() else: devicenum = int(os.environ.get('CPU_NUM', 1)) assert (args.train_batch_size % devicenum) == 0 train_batch_size = args.train_batch_size // devicenum test_batch_size = args.test_batch_size train_loader.set_sample_generator(reader.train(args), batch_size=train_batch_size, drop_last=True, places=places) test_loader.set_sample_generator(reader.test(args), batch_size=test_batch_size, drop_last=False, places=place) train_exe = fluid.ParallelExecutor(main_program=train_prog, use_cuda=args.use_gpu, loss_name=train_cost.name) totalruntime = 0 iter_no = 0 train_info = [0, 0, 0, 0] while iter_no <= args.total_iter_num: for train_batch in train_loader(): t1 = time.time() lr, loss, acc1, acc5 = train_exe.run(feed=train_batch, fetch_list=train_fetch_list) t2 = time.time() period = t2 - t1 lr = np.mean(np.array(lr)) train_info[0] += np.mean(np.array(loss)) train_info[1] += np.mean(np.array(acc1)) train_info[2] += np.mean(np.array(acc5)) train_info[3] += 1 if iter_no % args.display_iter_step == 0: avgruntime = totalruntime / args.display_iter_step avg_loss = train_info[0] / train_info[3] avg_acc1 = train_info[1] / train_info[3] avg_acc5 = train_info[2] / train_info[3] print("[%s] trainbatch %d, lr %.6f, loss %.6f, "\ "acc1 %.4f, acc5 %.4f, time %2.2f sec" % \ (fmt_time(), iter_no, lr, avg_loss, avg_acc1, avg_acc5, avgruntime)) sys.stdout.flush() totalruntime = 0 if iter_no % 1000 == 0: train_info = [0, 0, 0, 0] totalruntime += period if iter_no % args.test_iter_step == 0 and iter_no != 0: f, l = [], [] for batch_id, test_batch in enumerate(test_loader()): t1 = time.time() [feas] = exe.run(test_prog, feed=test_batch, fetch_list=test_fetch_list) label = np.asarray(test_batch[0]['label']) label = np.squeeze(label) f.append(feas) l.append(label) t2 = time.time() period = t2 - t1 if batch_id % 20 == 0: print("[%s] testbatch %d, time %2.2f sec" % \ (fmt_time(), batch_id, period)) f = np.vstack(f) l = np.hstack(l) recall = recall_topk(f, l, k=1) print("[%s] test_img_num %d, trainbatch %d, test_recall %.5f" % \ (fmt_time(), len(f), iter_no, recall)) sys.stdout.flush() if iter_no % args.save_iter_step == 0 and iter_no != 0: model_path = os.path.join(model_save_dir, model_name, str(iter_no)) fluid.save(program=train_prog, model_path=model_path) iter_no += 1 # This is for continuous evaluation only if args.enable_ce: # Use the mean cost/acc for training print("kpis\ttrain_cost\t{}".format(avg_loss)) print("kpis\ttest_recall\t{}".format(recall))
def test_in_memory_dataset_masterpatch1(self): """ Testcase for InMemoryDataset from create to run. """ with open("test_in_memory_dataset_masterpatch1_a.txt", "w") as f: data = "1 id1 1 1 2 3 3 4 5 5 5 5 1 1\n" data += "1 id1 1 2 2 3 4 4 6 6 6 6 1 2\n" data += "1 id2 1 1 1 1 1 0 1 0\n" data += "1 id3 1 0 1 0 1 1 1 1\n" data += "1 id3 1 1 1 1 1 0 1 0\n" data += "1 id4 1 0 1 0 1 1 1 1\n" data += "1 id4 1 0 1 0 1 1 1 1\n" data += "1 id5 1 1 1 1 1 0 1 0\n" data += "1 id5 1 1 1 1 1 0 1 0\n" f.write(data) with open("test_in_memory_dataset_masterpatch1_b.txt", "w") as f: data = "1 id6 1 4 2 3 3 4 5 5 5 5 1 4\n" data += "1 id6 1 1 2 3 4 4 6 6 6 6 1 5\n" data += "1 id6 1 6 2 3 5 4 7 7 7 7 1 6\n" data += "1 id6 1 7 2 3 6 4 8 8 8 8 1 7\n" f.write(data) slots_vars = [] train_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(train_program, startup_program): var1 = fluid.layers.data(name="slot1", shape=[1], dtype="int64", lod_level=0) var2 = fluid.layers.data(name="slot2", shape=[1], dtype="int64", lod_level=0) var3 = fluid.layers.data(name="slot3", shape=[1], dtype="float32", lod_level=0) var4 = fluid.layers.data(name="slot4", shape=[1], dtype="float32", lod_level=0) slots_vars = [var1, var2, var3, var4] dataset = paddle.distributed.InMemoryDataset() dataset.init(batch_size=32, thread_num=1, pipe_command="cat", use_var=slots_vars) dataset._init_distributed_settings(parse_ins_id=True) dataset.set_filelist([ "test_in_memory_dataset_masterpatch1_a.txt", "test_in_memory_dataset_masterpatch1_b.txt" ]) dataset.load_into_memory() dataset.local_shuffle() exe = fluid.Executor(fluid.CPUPlace()) exe.run(startup_program) for i in range(2): try: exe.train_from_dataset(train_program, dataset) except ImportError as e: pass except Exception as e: self.assertTrue(False) dataset._set_merge_by_lineid(2) dataset.dataset.merge_by_lineid() os.remove("./test_in_memory_dataset_masterpatch1_a.txt") os.remove("./test_in_memory_dataset_masterpatch1_b.txt")
def _test_slice(self, place): b = default_main_program().current_block() w = b.create_var(dtype="float64", shape=[784, 100, 100], lod_level=0) for i in range(3): nw = w[i] self.assertEqual((1, 100, 100), nw.shape) nw = w[:] self.assertEqual((784, 100, 100), nw.shape) nw = w[:, :, ...] self.assertEqual((784, 100, 100), nw.shape) nw = w[::2, ::2, :] self.assertEqual((392, 50, 100), nw.shape) nw = w[::-2, ::-2, :] self.assertEqual((392, 50, 100), nw.shape) self.assertEqual(0, nw.lod_level) main = fluid.Program() with fluid.program_guard(main): exe = fluid.Executor(place) tensor_array = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16, 17, 18]], [[19, 20, 21], [22, 23, 24], [25, 26, 27]]]).astype('float32') var = fluid.layers.assign(tensor_array) var1 = var[0, 1, 1] var2 = var[1:] var3 = var[0:1] var4 = var[..., ] var5 = var[2::-2] var6 = var[1, 1:, 1:] var7 = var[1, ..., 1:] var8 = var[1, ...] var_reshape = fluid.layers.reshape(var, [3, -1, 3]) var9 = var_reshape[1, ..., 2] var10 = var_reshape[:, :, -1] x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.fc(input=x, size=1, act=None) var11 = y[:, 0] feeder = fluid.DataFeeder(place=place, feed_list=[x]) data = [] data.append((np.random.randint(10, size=[13]).astype('float32'))) exe.run(fluid.default_startup_program()) local_out = exe.run(main, feed=feeder.feed([data]), fetch_list=[ var, var1, var2, var3, var4, var5, var6, var7, var8, var9, var10, var11 ]) self.assertTrue( (np.array(local_out[1]) == np.array(tensor_array[0, 1, 1])).all()) self.assertTrue( (np.array(local_out[2]) == np.array(tensor_array[1:])).all()) self.assertTrue( (np.array(local_out[3]) == np.array(tensor_array[0:1])).all()) self.assertTrue((np.array(local_out[4]) == np.array( tensor_array[..., ])).all()) self.assertTrue((np.array(local_out[5]) == np.array( tensor_array[2::-2])).all()) self.assertTrue( (np.array(local_out[6]) == np.array(tensor_array[1, 1:, 1:])).all()) self.assertTrue( (np.array(local_out[7]) == np.array(tensor_array[1, ..., 1:])).all()) self.assertTrue( (np.array(local_out[8]) == np.array(tensor_array[1, ...])).all()) self.assertEqual(local_out[9].shape, (1, 3, 1)) self.assertEqual(local_out[10].shape, (3, 3, 1)) self.assertEqual(local_out[11].shape, (1, 1))
def context(self, trainable=False, max_seq_len=128, num_slots=1): """ Get the input ,output and program of the pretrained senta_lstm Args: trainable(bool): Whether fine-tune the pretrained parameters of senta_lstm or not. max_seq_len (int): It will limit the total sequence returned so that it has a maximum length. num_slots(int): It's number of data inputted to the model, selectted as following options: - 1(default): There's only one data to be feeded in the model, e.g. the module is used for text classification task. - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). Returns: inputs(dict): the input variables of senta_lstm (words) outputs(dict): the output variables of input words (word embeddings and label probilities); the sentence embedding and sequence length of the first input text. main_program(Program): the main_program of Senta with pretrained prameters """ assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): text_1 = fluid.layers.data(name="text", shape=[-1, max_seq_len, 1], dtype="int64", lod_level=0) seq_len = fluid.layers.data(name="seq_len", shape=[1], dtype='int64', lod_level=0) seq_len_used = fluid.layers.squeeze(seq_len, axes=[1]) # Add embedding layer. w_param_attrs = fluid.ParamAttr( name="embedding_0.w_0", initializer=fluid.initializer.TruncatedNormal(scale=0.02), trainable=trainable) dict_dim = 1256607 emb_1 = fluid.layers.embedding(input=text_1, size=[dict_dim, 128], padding_idx=dict_dim - 1, dtype='float32', param_attr=w_param_attrs) emb_1_name = emb_1.name data_list = [text_1] emb_name_list = [emb_1_name] # Add lstm layer. pred, fc = lstm_net(emb_1, seq_len_used) pred_name = pred.name fc_name = fc.name if num_slots > 1: text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0) emb_2 = fluid.embedding(input=text_2, size=[dict_dim, 128], padding_idx=dict_dim - 1, dtype='float32', param_attr=w_param_attrs) emb_2_name = emb_2.name data_list.append(text_2) emb_name_list.append(emb_2_name) if num_slots > 2: text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0) emb_3 = fluid.embedding(input=text_3, size=[dict_dim, 128], padding_idx=dict_dim - 1, dtype='float32', param_attr=w_param_attrs) emb_3_name = emb_3.name data_list.append(text_3) emb_name_list.append(emb_3_name) variable_names = filter( lambda v: v not in ['text', 'text_2', 'text_3', "seq_len"], list(main_program.global_block().vars.keys())) prefix_name = "@HUB_{}@".format(self.name) add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names) for param in main_program.global_block().iter_parameters(): param.trainable = trainable place = fluid.CPUPlace() exe = fluid.Executor(place) # Load the senta_lstm pretrained model. def if_exist(var): return os.path.exists( os.path.join(self.pretrained_model_path, var.name)) fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) inputs = {'seq_len': seq_len} outputs = { "class_probs": main_program.global_block().vars[prefix_name + pred_name], "sentence_feature": main_program.global_block().vars[prefix_name + fc_name] } for index, data in enumerate(data_list): if index == 0: inputs['text'] = data outputs['emb'] = main_program.global_block().vars[ prefix_name + emb_name_list[0]] else: inputs['text_%s' % (index + 1)] = data outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[ prefix_name + emb_name_list[index]] return inputs, outputs, main_program
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() reader = ClassifyReader(vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=False, is_inference=True) predict_prog = fluid.Program() predict_startup = fluid.Program() with fluid.program_guard(predict_prog, predict_startup): with fluid.unique_name.guard(): predict_pyreader, probs, feed_target_names = create_model( args, pyreader_name='predict_reader', ernie_config=ernie_config, is_classify=True, is_prediction=True) predict_prog = predict_prog.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(predict_startup) if args.init_checkpoint: init_pretraining_params(exe, args.init_checkpoint, predict_prog) else: raise ValueError( "args 'init_checkpoint' should be set for prediction!") assert args.save_inference_model_path, "args save_inference_model_path should be set for prediction" _, ckpt_dir = os.path.split(args.init_checkpoint.rstrip('/')) dir_name = ckpt_dir + '_inference_model' model_path = os.path.join(args.save_inference_model_path, dir_name) log.info("save inference model to %s" % model_path) fluid.io.save_inference_model(model_path, feed_target_names, [probs], exe, main_program=predict_prog) # Set config #config = AnalysisConfig(args.model_dir) #config = AnalysisConfig(os.path.join(model_path, "__model__"), os.path.join(model_path, "")) config = AnalysisConfig(model_path) if not args.use_cuda: log.info("disable gpu") config.disable_gpu() else: log.info("using gpu") config.enable_use_gpu(1024) # Create PaddlePredictor predictor = create_paddle_predictor(config) predict_data_generator = reader.data_generator(input_file=args.predict_set, batch_size=args.batch_size, epoch=1, shuffle=False) log.info("-------------- prediction results --------------") np.set_printoptions(precision=4, suppress=True) index = 0 total_time = 0 for sample in predict_data_generator(): src_ids = sample[0] sent_ids = sample[1] pos_ids = sample[2] task_ids = sample[3] input_mask = sample[4] inputs = [ array2tensor(ndarray) for ndarray in [src_ids, sent_ids, pos_ids, input_mask] ] begin_time = time.time() outputs = predictor.run(inputs) end_time = time.time() total_time += end_time - begin_time # parse outputs output = outputs[0] output_data = output.data.float_data() batch_result = np.array(output_data).reshape(output.shape) for single_example_probs in batch_result: print('\t'.join(map(str, single_example_probs.tolist()))) index += 1 log.info("qps:{}\ttotal_time:{}\ttotal_example:{}\tbatch_size:{}".format( index / total_time, total_time, index, args.batch_size))
def test_sensitivity(self): main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): input = fluid.data(name="image", shape=[None, 1, 28, 28]) label = fluid.data(name="label", shape=[None, 1], dtype="int64") conv1 = conv_bn_layer(input, 8, 3, "conv1") conv2 = conv_bn_layer(conv1, 8, 3, "conv2") sum1 = conv1 + conv2 conv3 = conv_bn_layer(sum1, 8, 3, "conv3") conv4 = conv_bn_layer(conv3, 8, 3, "conv4") sum2 = conv4 + sum1 conv5 = conv_bn_layer(sum2, 8, 3, "conv5") conv6 = conv_bn_layer(conv5, 8, 3, "conv6") out = fluid.layers.fc(conv6, size=10, act='softmax') acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) eval_program = main_program.clone(for_test=True) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup_program) val_reader = paddle.fluid.io.batch( paddle.dataset.mnist.test(), batch_size=128) def eval_func(program): feeder = fluid.DataFeeder( feed_list=['image', 'label'], place=place, program=program) acc_set = [] for data in val_reader(): acc_np = exe.run(program=program, feed=feeder.feed(data), fetch_list=[acc_top1]) acc_set.append(float(acc_np[0])) acc_val_mean = numpy.array(acc_set).mean() print("acc_val_mean: {}".format(acc_val_mean)) return acc_val_mean def eval_func_for_args(args): program = args[0] feeder = fluid.DataFeeder( feed_list=['image', 'label'], place=place, program=program) acc_set = [] for data in val_reader(): acc_np = exe.run(program=program, feed=feeder.feed(data), fetch_list=[acc_top1]) acc_set.append(float(acc_np[0])) acc_val_mean = numpy.array(acc_set).mean() print("acc_val_mean: {}".format(acc_val_mean)) return acc_val_mean sensitivity( eval_program, place, ["conv4_weights"], eval_func, sensitivities_file="./sensitivities_file_0", pruned_ratios=[0.1, 0.2]) sensitivity( eval_program, place, ["conv4_weights"], eval_func, sensitivities_file="./sensitivities_file_1", pruned_ratios=[0.3, 0.4]) params_sens = sensitivity( eval_program, place, ["conv4_weights"], eval_func_for_args, eval_args=[eval_program], sensitivities_file="./sensitivites_file_params", pruned_ratios=[0.1, 0.2, 0.3, 0.4]) sens_0 = load_sensitivities('./sensitivities_file_0') sens_1 = load_sensitivities('./sensitivities_file_1') sens = merge_sensitive([sens_0, sens_1]) origin_sens = sensitivity( eval_program, place, ["conv4_weights"], eval_func, sensitivities_file="./sensitivities_file_2", pruned_ratios=[0.1, 0.2, 0.3, 0.4]) self.assertTrue(params_sens == origin_sens) self.assertTrue(sens == origin_sens) loss = 0.0 ratios = get_ratios_by_loss(sens, loss) self.assertTrue(len(ratios) == len(sens)) loss = min(list(sens.get('conv4_weights').values())) - 0.01 ratios = get_ratios_by_loss(sens, loss) self.assertTrue(len(ratios) == len(sens))
def main(): config = program.load_config(FLAGS.config) program.merge_config(FLAGS.opt) logger.info(config) char_ops = CharacterOps(config['Global']) loss_type = config['Global']['loss_type'] config['Global']['char_ops'] = char_ops # check if set use_gpu=True in paddlepaddle cpu version use_gpu = config['Global']['use_gpu'] # check_gpu(use_gpu) place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) rec_model = create_module( config['Architecture']['function'])(params=config) startup_prog = fluid.Program() eval_prog = fluid.Program() with fluid.program_guard(eval_prog, startup_prog): with fluid.unique_name.guard(): _, outputs = rec_model(mode="test") fetch_name_list = list(outputs.keys()) fetch_varname_list = [outputs[v].name for v in fetch_name_list] eval_prog = eval_prog.clone(for_test=True) exe.run(startup_prog) init_model(config, eval_prog, exe) blobs = reader_main(config, 'test')() infer_img = config['Global']['infer_img'] infer_list = get_image_file_list(infer_img) max_img_num = len(infer_list) if len(infer_list) == 0: logger.info("Can not find img in infer_img dir.") for i in range(max_img_num): logger.info("infer_img:%s" % infer_list[i]) img = next(blobs) if loss_type != "srn": predict = exe.run(program=eval_prog, feed={"image": img}, fetch_list=fetch_varname_list, return_numpy=False) else: encoder_word_pos_list = [] gsrm_word_pos_list = [] gsrm_slf_attn_bias1_list = [] gsrm_slf_attn_bias2_list = [] encoder_word_pos_list.append(img[1]) gsrm_word_pos_list.append(img[2]) gsrm_slf_attn_bias1_list.append(img[3]) gsrm_slf_attn_bias2_list.append(img[4]) encoder_word_pos_list = np.concatenate(encoder_word_pos_list, axis=0).astype(np.int64) gsrm_word_pos_list = np.concatenate(gsrm_word_pos_list, axis=0).astype(np.int64) gsrm_slf_attn_bias1_list = np.concatenate(gsrm_slf_attn_bias1_list, axis=0).astype( np.float32) gsrm_slf_attn_bias2_list = np.concatenate(gsrm_slf_attn_bias2_list, axis=0).astype( np.float32) predict = exe.run(program=eval_prog, \ feed={'image': img[0], 'encoder_word_pos': encoder_word_pos_list, 'gsrm_word_pos': gsrm_word_pos_list, 'gsrm_slf_attn_bias1': gsrm_slf_attn_bias1_list, 'gsrm_slf_attn_bias2': gsrm_slf_attn_bias2_list}, \ fetch_list=fetch_varname_list, \ return_numpy=False) if loss_type == "ctc": preds = np.array(predict[0]) preds = preds.reshape(-1) preds_lod = predict[0].lod()[0] preds_text = char_ops.decode(preds) probs = np.array(predict[1]) ind = np.argmax(probs, axis=1) blank = probs.shape[1] valid_ind = np.where(ind != (blank - 1))[0] if len(valid_ind) == 0: continue score = np.mean(probs[valid_ind, ind[valid_ind]]) elif loss_type == "attention": preds = np.array(predict[0]) probs = np.array(predict[1]) end_pos = np.where(preds[0, :] == 1)[0] if len(end_pos) <= 1: preds = preds[0, 1:] score = np.mean(probs[0, 1:]) else: preds = preds[0, 1:end_pos[1]] score = np.mean(probs[0, 1:end_pos[1]]) preds = preds.reshape(-1) preds_text = char_ops.decode(preds) elif loss_type == "srn": char_num = char_ops.get_char_num() preds = np.array(predict[0]) preds = preds.reshape(-1) probs = np.array(predict[1]) ind = np.argmax(probs, axis=1) valid_ind = np.where(preds != int(char_num - 1))[0] if len(valid_ind) == 0: continue score = np.mean(probs[valid_ind, ind[valid_ind]]) preds = preds[:valid_ind[-1] + 1] preds_text = char_ops.decode(preds) logger.info("\t index: {}".format(preds)) logger.info("\t word : {}".format(preds_text)) logger.info("\t score: {}".format(score)) # save for inference model target_var = [] for key, values in outputs.items(): target_var.append(values) fluid.io.save_inference_model("./output/", feeded_var_names=['image'], target_vars=target_var, executor=exe, main_program=eval_prog, model_filename="model", params_filename="params")
def train(): """ train :return: """ logger.info("start train YOLOv3, train params:%s", str(train_parameters)) logger.info("create place, use gpu:" + str(train_parameters['use_gpu'])) # place = fluid.CUDAPlace(0) if train_parameters['use_gpu'] else fluid.CPUPlace() logger.info("build network and program") train_program = fluid.Program() start_program = fluid.Program() eval_program = fluid.Program() test_program = fluid.Program() feeder, reader, loss, outputs = build_program_with_feeder(train_program, start_program, place) pred = build_program_with_feeder(test_program, start_program, istrain=False) cur_map, accum_map, eval_feeder = build_eval_program_with_feeder(eval_program, start_program) test_program = test_program.clone(for_test=True) eval_program = eval_program.clone(for_test=True) logger.info("build executor and init params") # exe = fluid.Executor(place) exe.run(start_program) train_fetch_list = [loss[0].name] load_pretrained_params(exe, train_program) fluid.contrib.model_stat.summary(train_program) # Print out the model input / output size, total params, and FLOPS stop_strategy = train_parameters['early_stop'] rise_limit = stop_strategy['rise_limit'] # sample_freq = stop_strategy['sample_frequency'] # min_curr_map = stop_strategy['min_curr_map'] min_loss = stop_strategy['min_loss'] # stop_train = False rise_count = 0 total_batch_count = 0 train_temp_loss = 0 current_best_pass_ = 0 current_best_map = 0 for pass_id in range(train_parameters["num_epochs"]): logger.info("current pass: {}, start read image".format(pass_id)) batch_id = 0 total_loss = 0.0 for batch_id, data in enumerate(reader()): t1 = time.time() loss = exe.run(train_program, feed=feeder.feed(data), fetch_list=train_fetch_list) period = time.time() - t1 loss = np.mean(np.array(loss)) total_loss += loss batch_id += 1 total_batch_count += 1 if batch_id % 200 == 0: logger.info("pass {}, trainbatch {}, loss {}, time {}".format(pass_id, batch_id, loss, "%2.2f sec" % period)) pass_mean_loss = total_loss / batch_id logger.info("pass {0} train result, current pass mean loss: {1}".format(pass_id, pass_mean_loss)) if pass_id >= 90 or pass_id % 2 == 0: cur_map_, accum_map_ = eval(test_program, [pred.name], eval_program, [cur_map.name, accum_map.name], eval_feeder) logger.info("{} epoch current pass map is {}, accum_map is {}".format(pass_id, cur_map_, accum_map_)) if cur_map_ > current_best_map: current_best_map = cur_map_ current_best_pass_ = pass_id logger.info("model save {} epcho train result, current best pass MAP {}".format(pass_id, current_best_map)) fluid.io.save_persistables(dirname=train_parameters['save_model_dir'], main_program=train_program, executor=exe) fluid.io.save_inference_model(dirname=train_parameters['pretrained_model_dir'], feeded_var_names=['img'], target_vars=outputs[0], executor=exe, main_program=train_program) logger.info("best pass {} current best pass MAP is {}".format(current_best_pass_, current_best_map)) if pass_mean_loss < min_loss: logger.info("Has reached the set optimum value, the training is over") break if rise_count > rise_limit: logger.info("rise_count > rise_limit, so early stop") break else: if pass_mean_loss > train_temp_loss: rise_count += 1 train_temp_loss = pass_mean_loss else: rise_count = 0 train_temp_loss = pass_mean_loss logger.info("end training")
def context(self, trainable=True, pretrained=True, override_params=None, phase='train'): """context for transfer learning. Args: trainable (bool): Set parameters in program to be trainable. pretrained (bool) : Whether to load pretrained model. Returns: inputs (dict): key is 'image', corresponding vaule is image tensor. outputs (dict): key is : 'classification', corresponding value is the result of classification. 'feature_map', corresponding value is the result of the layer before the fully connected layer. context_prog (fluid.Program): program for transfer learning. """ if phase in ["dev", "test", "predict", "eval"]: is_test = False elif phase in ["train"]: is_test = True else: raise ValueError( "Phase %s is error, which must be one of train, dev, test, eval and predict." % phase) context_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(context_prog, startup_prog): with fluid.unique_name.guard(): image = fluid.layers.data(name="image", shape=[3, 224, 224], dtype="float32") efficientnet_b4 = EfficientNetB4( override_params=override_params) output, feature_map = efficientnet_b4.net(input=image, class_dim=len( self.label_list), is_test=is_test) name_prefix = '@HUB_{}@'.format(self.name) inputs = {'image': name_prefix + image.name} outputs = { 'classification': name_prefix + output.name, 'feature_map': name_prefix + feature_map.name } add_vars_prefix(context_prog, name_prefix) add_vars_prefix(startup_prog, name_prefix) global_vars = context_prog.global_block().vars inputs = { key: global_vars[value] for key, value in inputs.items() } outputs = { key: global_vars[value] for key, value in outputs.items() } place = fluid.CPUPlace() exe = fluid.Executor(place) # pretrained if pretrained: def _if_exist(var): b = os.path.exists( os.path.join(self.default_pretrained_model_path, var.name)) return b fluid.io.load_vars(exe, self.default_pretrained_model_path, context_prog, predicate=_if_exist) else: exe.run(startup_prog) # trainable for param in context_prog.global_block().iter_parameters(): param.trainable = trainable return inputs, outputs, context_prog
def test_compression(self): if not fluid.core.is_compiled_with_cuda(): return class_dim = 10 image_shape = [1, 28, 28] image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') image.stop_gradient = False label = fluid.layers.data(name='label', shape=[1], dtype='int64') out = MobileNet(name="student").net(input=image, class_dim=class_dim) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone(for_test=False) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) optimizer = fluid.optimizer.Momentum( momentum=0.9, learning_rate=fluid.layers.piecewise_decay( boundaries=[5, 10], values=[0.01, 0.001, 0.0001]), regularization=fluid.regularizer.L2Decay(4e-5)) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128) val_feed_list = [('img', image.name), ('label', label.name)] val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5', acc_top5.name)] train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=128) train_feed_list = [('img', image.name), ('label', label.name)] train_fetch_list = [('loss', avg_cost.name)] # define teacher program teacher_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(teacher_program, startup_program): img = teacher_program.global_block()._clone_variable( image, force_persistable=False) predict = MobileNet(name="teacher").net(input=img, class_dim=class_dim) exe.run(startup_program) com_pass = Compressor( place, fluid.global_scope(), fluid.default_main_program(), train_reader=train_reader, train_feed_list=train_feed_list, train_fetch_list=train_fetch_list, eval_program=val_program, eval_reader=val_reader, eval_feed_list=val_feed_list, eval_fetch_list=val_fetch_list, teacher_programs=[teacher_program.clone(for_test=True)], train_optimizer=optimizer, distiller_optimizer=optimizer) com_pass.config('./distillation/compress.yaml') eval_graph = com_pass.run()
def train(args): if args.run_ce: np.random.seed(10) fluid.default_startup_program().random_seed = 90 d_program = fluid.Program() dg_program = fluid.Program() with fluid.program_guard(d_program): conditions = fluid.layers.data(name='conditions', shape=[1], dtype='float32') img = fluid.layers.data(name='img', shape=[784], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='float32') d_logit = D_cond(img, conditions) d_loss = loss(d_logit, label) with fluid.program_guard(dg_program): conditions = fluid.layers.data(name='conditions', shape=[1], dtype='float32') noise = fluid.layers.data(name='noise', shape=[NOISE_SIZE], dtype='float32') g_img = G_cond(z=noise, y=conditions) g_program = dg_program.clone() g_program_test = dg_program.clone(for_test=True) dg_logit = D_cond(g_img, conditions) dg_loss = loss( dg_logit, fluid.layers.fill_constant_batch_size_like(input=noise, dtype='float32', shape=[-1, 1], value=1.0)) opt = fluid.optimizer.Adam(learning_rate=LEARNING_RATE) opt.minimize(loss=d_loss) parameters = [p.name for p in g_program.global_block().all_parameters()] opt.minimize(loss=dg_loss, parameter_list=parameters) exe = fluid.Executor(fluid.CPUPlace()) if args.use_gpu: exe = fluid.Executor(fluid.CUDAPlace(0)) exe.run(fluid.default_startup_program()) if args.run_ce: train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=args.batch_size) else: train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=60000), batch_size=args.batch_size) NUM_TRAIN_TIMES_OF_DG = 2 const_n = np.random.uniform(low=-1.0, high=1.0, size=[args.batch_size, NOISE_SIZE]).astype('float32') t_time = 0 losses = [[], []] for pass_id in range(args.epoch): for batch_id, data in enumerate(train_reader()): if len(data) != args.batch_size: continue noise_data = np.random.uniform(low=-1.0, high=1.0, size=[args.batch_size, NOISE_SIZE]).astype('float32') real_image = np.array(list(map(lambda x: x[0], data))).reshape( -1, 784).astype('float32') conditions_data = np.array([x[1] for x in data ]).reshape([-1, 1]).astype("float32") real_labels = np.ones(shape=[real_image.shape[0], 1], dtype='float32') fake_labels = np.zeros(shape=[real_image.shape[0], 1], dtype='float32') total_label = np.concatenate([real_labels, fake_labels]) s_time = time.time() generated_image = exe.run(g_program, feed={ 'noise': noise_data, 'conditions': conditions_data }, fetch_list=[g_img])[0] total_images = np.concatenate([real_image, generated_image]) d_loss_1 = exe.run(d_program, feed={ 'img': generated_image, 'label': fake_labels, 'conditions': conditions_data }, fetch_list=[d_loss])[0][0] d_loss_2 = exe.run(d_program, feed={ 'img': real_image, 'label': real_labels, 'conditions': conditions_data }, fetch_list=[d_loss])[0][0] d_loss_n = d_loss_1 + d_loss_2 losses[0].append(d_loss_n) for _ in six.moves.xrange(NUM_TRAIN_TIMES_OF_DG): noise_data = np.random.uniform( low=-1.0, high=1.0, size=[args.batch_size, NOISE_SIZE]).astype('float32') dg_loss_n = exe.run(dg_program, feed={ 'noise': noise_data, 'conditions': conditions_data }, fetch_list=[dg_loss])[0][0] losses[1].append(dg_loss_n) batch_time = time.time() - s_time t_time += batch_time if batch_id % 10 == 0 and not args.run_ce: if not os.path.exists(args.output): os.makedirs(args.output) # generate image each batch generated_images = exe.run(g_program_test, feed={ 'noise': const_n, 'conditions': conditions_data }, fetch_list=[g_img])[0] total_images = np.concatenate([real_image, generated_images]) fig = plot(total_images) msg = "Epoch ID={0}\n Batch ID={1}\n D-Loss={2}\n DG-Loss={3}\n gen={4}\n " \ "Batch_time_cost={5:.2f}".format( pass_id, batch_id, d_loss_n, dg_loss_n, check(generated_images), batch_time) print(msg) plt.title(msg) plt.savefig('{}/{:04d}_{:04d}.png'.format( args.output, pass_id, batch_id), bbox_inches='tight') plt.close(fig) if args.run_ce: print("kpis,cgan_d_train_cost,{}".format(np.mean(losses[0]))) print("kpis,cgan_g_train_cost,{}".format(np.mean(losses[1]))) print("kpis,cgan_duration,{}".format(t_time / args.epoch))
def main(): cfg = load_config(FLAGS.config) merge_config(FLAGS.opt) check_config(cfg) # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) # check if paddlepaddle version is satisfied check_version() main_arch = cfg.architecture dataset = cfg.TestReader['dataset'] test_images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img) dataset.set_images(test_images) place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) model = create(main_arch) startup_prog = fluid.Program() infer_prog = fluid.Program() with fluid.program_guard(infer_prog, startup_prog): with fluid.unique_name.guard(): inputs_def = cfg['TestReader']['inputs_def'] feed_vars, loader = model.build_inputs(**inputs_def) test_fetches = model.test(feed_vars) infer_prog = infer_prog.clone(True) reader = create_reader(cfg.TestReader) # When iterable mode, set set_sample_list_generator(reader, place) loader.set_sample_list_generator(reader) not_quant_pattern = [] if FLAGS.not_quant_pattern: not_quant_pattern = FLAGS.not_quant_pattern config = { 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', 'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'], 'not_quant_pattern': not_quant_pattern } infer_prog = quant_aware(infer_prog, place, config, for_test=True) exe.run(startup_prog) if cfg.weights: checkpoint.load_params(exe, infer_prog, cfg.weights) infer_prog = convert(infer_prog, place, config, save_int8=False) # parse infer fetches assert cfg.metric in ['COCO', 'VOC', 'OID', 'WIDERFACE'], \ "unknown metric type {}".format(cfg.metric) extra_keys = [] if cfg['metric'] in ['COCO', 'OID']: extra_keys = ['im_info', 'im_id', 'im_shape'] if cfg['metric'] == 'VOC' or cfg['metric'] == 'WIDERFACE': extra_keys = ['im_id', 'im_shape'] keys, values, _ = parse_fetches(test_fetches, infer_prog, extra_keys) # parse dataset category if cfg.metric == 'COCO': from ppdet.utils.coco_eval import bbox2out, mask2out, get_category_info if cfg.metric == 'OID': from ppdet.utils.oid_eval import bbox2out, get_category_info if cfg.metric == "VOC": from ppdet.utils.voc_eval import bbox2out, get_category_info if cfg.metric == "WIDERFACE": from ppdet.utils.widerface_eval_utils import bbox2out, get_category_info anno_file = dataset.get_anno() with_background = dataset.with_background use_default_label = dataset.use_default_label clsid2catid, catid2name = get_category_info(anno_file, with_background, use_default_label) # whether output bbox is normalized in model output layer is_bbox_normalized = False if hasattr(model, 'is_bbox_normalized') and \ callable(model.is_bbox_normalized): is_bbox_normalized = model.is_bbox_normalized() imid2path = dataset.get_imid2path() iter_id = 0 try: loader.start() while True: outs = exe.run(infer_prog, fetch_list=values, return_numpy=False) res = { k: (np.array(v), v.recursive_sequence_lengths()) for k, v in zip(keys, outs) } logger.info('Infer iter {}'.format(iter_id)) iter_id += 1 bbox_results = None mask_results = None if 'bbox' in res: bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized) if 'mask' in res: mask_results = mask2out([res], clsid2catid, model.mask_head.resolution) # visualize result im_ids = res['im_id'][0] for im_id in im_ids: image_path = imid2path[int(im_id)] image = Image.open(image_path).convert('RGB') image = visualize_results(image, int(im_id), catid2name, FLAGS.draw_threshold, bbox_results, mask_results) save_name = get_save_image_name(FLAGS.output_dir, image_path) logger.info("Detection bbox results save in {}".format( save_name)) image.save(save_name, quality=95) except (StopIteration, fluid.core.EOFException): loader.reset()
# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import paddle import paddle.fluid as fluid import paddle.dataset.flowers as flowers batch_size = 12 data_shape = [3, 224, 224] with fluid.program_guard(fluid.Program(), fluid.Program()): reader = paddle.batch(flowers.train(), batch_size=batch_size) feeder = fluid.DataFeeder( feed_list=[ # order is image and label fluid.layers.data(name='image', shape=data_shape, dtype='float32'), fluid.layers.data(name='label', shape=[1], dtype='int64'), ], place=fluid.CPUPlace()) fluid.recordio_writer.convert_reader_to_recordio_file( './flowers_bs_12_3_224_224.recordio', reader, feeder)
import threading import numpy as np import os from paddle.fluid.param_attr import ParamAttr from paddle.fluid.regularizer import L2Decay from layers.fc import FC from layers.bn import BatchNorm from layers.loss import MSELoss if __name__ == '__main__': use_gpu = False lr = 0.001 startup_prog = fluid.Program() train_prog = fluid.Program() with fluid.program_guard(train_prog, startup_prog): with fluid.unique_name.guard(): inputs = P.data(name='input_1', shape=[-1, 3], append_batch_size=False, dtype='float32') fc01_out_tensor = fluid.layers.fc( input=inputs, size=8, param_attr=ParamAttr(name="fc01_weights"), bias_attr=ParamAttr(name="fc01_bias")) fc02_out_tensor = fluid.layers.fc( input=fc01_out_tensor, size=8,
def parallel_exe(self, train_inputs, test_inputs, seed): main = fluid.Program() startup = fluid.Program() startup.random_seed = seed with fluid.program_guard(main, startup): data = fluid.layers.data( name='image', shape=[3, 224, 224], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') out = SE_ResNeXt(input=data, class_dim=102) loss = fluid.layers.cross_entropy(input=out, label=label) loss = fluid.layers.mean(loss) #loss = fluid.layers.reduce_sum(loss) test_program = main.clone(for_test=True) # learning_rate=cosine_decay(0.01, 1, len(train_inputs)), opt = fluid.optimizer.SGD( learning_rate=0.1) #regularization=fluid.regularizer.L2Decay(1e-4)) #momentum=0.9, opt.minimize(loss) #fluid.memory_optimize(main) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup) var = fluid.global_scope().find_var('conv2d_0.w_0').get_tensor() #print('do w ', np.array(var)) #print('exe main ', main) grad_var = fluid.framework.get_var('conv2d_0.w_0@GRAD') fetch_list = [loss.name, grad_var.name] feeder = fluid.DataFeeder(place=place, feed_list=[data, label]) pexe = fluid.ParallelExecutor( use_cuda=True, loss_name=loss.name, main_program=main) losses = [] grads = [] test_losses = [] for data in train_inputs: all_vars = main.global_block().vars import collections all_parameters = collections.OrderedDict() for k, v in all_vars.iteritems(): if v.persistable and 'velocity' not in k: all_parameters[k] = v print('Total vars: %d\n' %(len(all_parameters))) for k,v in all_parameters.iteritems(): var = fluid.global_scope().find_var(k).get_tensor() print('!!%s: %f\n'%(k, np.sum(np.abs(np.array(var))))) ret = pexe.run(fetch_list, feed=feeder.feed(data)) loss_v = np.array(ret[0]) losses.append(np.mean(loss_v)) grads.append(np.array(ret[1])[0:64, :, :, :]) for test_data in test_inputs: test_loss = exe.run(test_program, feed=feeder.feed(test_data), fetch_list=[loss]) test_losses.append(test_loss[0][0]) return losses, grads, test_losses
def test_dataset_fleet(self): """ Testcase for InMemoryDataset from create to run. """ self.skipTest("parameter server will add pslib UT later") with open("test_in_memory_dataset2_run_a.txt", "w") as f: data = "1 1 2 3 3 4 5 5 5 5 1 1\n" data += "1 2 2 3 4 4 6 6 6 6 1 2\n" data += "1 3 2 3 5 4 7 7 7 7 1 3\n" f.write(data) with open("test_in_memory_dataset2_run_b.txt", "w") as f: data = "1 4 2 3 3 4 5 5 5 5 1 4\n" data += "1 5 2 3 4 4 6 6 6 6 1 5\n" data += "1 6 2 3 5 4 7 7 7 7 1 6\n" data += "1 7 2 3 6 4 8 8 8 8 1 7\n" f.write(data) train_program = fluid.Program() startup_program = fluid.Program() scope = fluid.Scope() from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet with fluid.program_guard(train_program, startup_program): slots = ["slot1_ff", "slot2_ff", "slot3_ff", "slot4_ff"] slots_vars = [] for slot in slots: var = fluid.layers.data(\ name=slot, shape=[1], dtype="float32", lod_level=1) slots_vars.append(var) fake_cost = \ fluid.layers.elementwise_sub(slots_vars[0], slots_vars[-1]) fake_cost = fluid.layers.mean(fake_cost) with fluid.scope_guard(scope): place = fluid.CPUPlace() exe = fluid.Executor(place) try: fleet.init() except ImportError as e: print("warning: no mpi4py") adam = fluid.optimizer.Adam(learning_rate=0.000005) try: adam = fleet.distributed_optimizer(adam) adam.minimize([fake_cost], [scope]) except AttributeError as e: print("warning: no mpi") except ImportError as e: print("warning: no mpi4py") exe.run(startup_program) dataset = paddle.distributed.InMemoryDataset() dataset.init(batch_size=32, thread_num=3, pipe_command="cat", use_var=slots_vars) dataset.set_filelist([ "test_in_memory_dataset2_run_a.txt", "test_in_memory_dataset2_run_b.txt" ]) dataset.load_into_memory() fleet._opt_info = None fleet._fleet_ptr = None os.remove("./test_in_memory_dataset2_run_a.txt") os.remove("./test_in_memory_dataset2_run_b.txt")
def test_gnn_float32(self): seed = 90 startup = fluid.Program() startup.random_seed = seed main = fluid.Program() main.random_seed = seed scope = fluid.core.Scope() with new_program_scope(main=main, startup=startup, scope=scope): features = fluid.layers.data(name='features', shape=[1, 100, 50], dtype='float32', append_batch_size=False) # Use selected rows when it's supported. adj = fluid.layers.data(name='adj', shape=[1, 100, 100], dtype='float32', append_batch_size=False) labels = fluid.layers.data(name='labels', shape=[100, 1], dtype='int64', append_batch_size=False) model = GCN('test_gcn', 50) logits = model(features, adj) logits = fluid.layers.reshape(logits, logits.shape[1:]) # In other example, it's nll with log_softmax. However, paddle's # log_loss only supports binary classification now. loss = fluid.layers.softmax_with_cross_entropy(logits, labels) loss = fluid.layers.reduce_sum(loss) adam = AdamOptimizer(learning_rate=1e-3) adam.minimize(loss) exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) exe.run(startup) static_loss = exe.run(feed={ 'features': np.ones([1, 100, 50], dtype=np.float32), 'adj': np.ones([1, 100, 100], dtype=np.float32), 'labels': np.ones([100, 1], dtype=np.int64) }, fetch_list=[loss])[0] static_weight = np.array( scope.find_var(model.gc.weight.name).get_tensor()) with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed features = np.ones([1, 100, 50], dtype=np.float32) # Use selected rows when it's supported. adj = np.ones([1, 100, 100], dtype=np.float32) labels = np.ones([100, 1], dtype=np.int64) model = GCN('test_gcn', 50) logits = model(to_variable(features), to_variable(adj)) logits = fluid.layers.reshape(logits, logits.shape[1:]) # In other example, it's nll with log_softmax. However, paddle's # log_loss only supports binary classification now. loss = fluid.layers.softmax_with_cross_entropy( logits, to_variable(labels)) loss = fluid.layers.reduce_sum(loss) loss.backward() adam = AdamOptimizer(learning_rate=1e-3, parameter_list=model.parameters()) adam.minimize(loss) model.clear_gradients() loss_value = loss.numpy() model_gc_weight_value = model.gc.weight.numpy() with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed features2 = np.ones([1, 100, 50], dtype=np.float32) # Use selected rows when it's supported. adj2 = np.ones([1, 100, 100], dtype=np.float32) labels2 = np.ones([100, 1], dtype=np.int64) model2 = GCN('test_gcn', 50) logits2 = model2(to_variable(features2), to_variable(adj2)) logits2 = fluid.layers.reshape(logits2, logits2.shape[1:]) # In other example, it's nll with log_softmax. However, paddle's # log_loss only supports binary classification now. loss2 = fluid.layers.softmax_with_cross_entropy( logits2, to_variable(labels2)) loss2 = fluid.layers.reduce_sum(loss2) loss2.backward() adam2 = AdamOptimizer(learning_rate=1e-3, parameter_list=model2.parameters()) adam2.minimize(loss2) model2.clear_gradients() loss2_value = loss2.numpy() model2_gc_weight_value = model2.gc.weight.numpy() self.assertEqual(static_loss, loss_value) self.assertTrue(np.allclose(static_weight, model_gc_weight_value)) self.assertEqual(static_loss, loss2_value) self.assertTrue(np.allclose(static_weight, model2_gc_weight_value)) sys.stderr.write('%s %s\n' % (static_loss, loss_value))
def main(): train_program = fluid.Program() train_init = fluid.Program() with fluid.program_guard(train_program, train_init): image = fluid.layers.data(name="image", shape=[cfg.TRAIN.THICKNESS, 512, 512], dtype="float32") label = fluid.layers.data(name="label", shape=[1, 512, 512], dtype="int32") train_loader = fluid.io.DataLoader.from_generator( feed_list=[image, label], capacity=cfg.TRAIN.BATCH_SIZE * 2, iterable=True, use_double_buffer=True, ) prediction = create_model(image, 2) avg_loss = loss.create_loss(prediction, label, 2) miou = loss.mean_iou(prediction, label, 2) # 进行正则化 if cfg.TRAIN.REG_TYPE == "L1": decay = paddle.fluid.regularizer.L1Decay(cfg.TRAIN.REG_COEFF) elif cfg.TRAIN.REG_TYPE == "L2": decay = paddle.fluid.regularizer.L2Decay(cfg.TRAIN.REG_COEFF) else: decay = None # 选择优化器 lr = fluid.layers.piecewise_decay(boundaries=cfg.TRAIN.BOUNDARIES, values=cfg.TRAIN.LR) if cfg.TRAIN.OPTIMIZER == "adam": optimizer = fluid.optimizer.AdamOptimizer( learning_rate=lr, regularization=decay, ) elif cfg.TRAIN.OPTIMIZER == "sgd": optimizer = fluid.optimizer.SGDOptimizer(learning_rate=lr, regularization=decay) elif cfg.TRAIN.OPTIMIZE == "momentum": optimizer = fluid.optimizer.Momentum( momentum=0.9, learning_rate=lr, regularization=decay, ) else: raise Exception("错误的优化器类型: {}".format(cfg.TRAIN.OPTIMIZER)) optimizer.minimize(avg_loss) places = fluid.CUDAPlace(0) if cfg.TRAIN.USE_GPU else fluid.CPUPlace() exe = fluid.Executor(places) exe.run(train_init) exe_test = fluid.Executor(places) test_program = train_program.clone(for_test=True) compiled_train_program = fluid.CompiledProgram( train_program).with_data_parallel(loss_name=avg_loss.name) if cfg.TRAIN.PRETRAINED_WEIGHT != "": print("Loading paramaters") fluid.io.load_persistables(exe, cfg.TRAIN.PRETRAINED_WEIGHT, train_program) # train_reader = fluid.io.xmap_readers( # aug_mapper, data_reader(0, 8), multiprocessing.cpu_count()/2, 16 # ) train_reader = data_reader(0, 8) train_loader.set_sample_generator(train_reader, batch_size=cfg.TRAIN.BATCH_SIZE, places=places) test_reader = paddle.batch(data_reader(8, 10), cfg.INFER.BATCH_SIZE) test_feeder = fluid.DataFeeder(place=places, feed_list=[image, label]) writer = LogWriter(logdir="/home/aistudio/log/{}".format(datetime.now())) step = 0 best_miou = 0 for pass_id in range(cfg.TRAIN.EPOCHS): for train_data in train_loader(): step += 1 avg_loss_value, miou_value = exe.run(compiled_train_program, feed=train_data, fetch_list=[avg_loss, miou]) writer.add_scalar(tag="train_loss", step=step, value=avg_loss_value[0]) writer.add_scalar(tag="train_miou", step=step, value=miou_value[0]) if step % cfg.TRAIN.DISP_BATCH == 0: print("\tTrain pass {}, Step {}, Cost {}, Miou {}".format( pass_id, step, avg_loss_value[0], miou_value[0])) if math.isnan(float(avg_loss_value[0])): sys.exit("Got NaN loss, training failed.") if step % cfg.TRAIN.SNAPSHOT_BATCH == 0 and cfg.TRAIN.DO_EVAL: test_step = 0 eval_miou = 0 test_losses = [] test_mious = [] for test_data in test_reader(): test_step += 1 preds, test_loss, test_miou = exe_test.run( test_program, feed=test_feeder.feed(test_data), fetch_list=[prediction, avg_loss, miou], ) test_losses.append(test_loss[0]) test_mious.append(test_miou[0]) if test_step % cfg.TRAIN.DISP_BATCH == 0: print("\t\tTest Loss: {} , Miou: {}".format( test_loss[0], test_miou[0])) eval_miou = np.average(np.array(test_mious)) writer.add_scalar( tag="test_miou", step=step, value=eval_miou, ) print("Test loss: {} ,miou: {}".format( np.average(np.array(test_losses)), eval_miou)) ckpt_dir = os.path.join(cfg.TRAIN.CKPT_MODEL_PATH, str(step) + "_" + str(eval_miou)) fluid.io.save_persistables(exe, ckpt_dir, train_program) print("此前最高的测试MIOU是: ", best_miou) if step % cfg.TRAIN.SNAPSHOT_BATCH == 0 and eval_miou > best_miou: best_miou = eval_miou print("正在保存第 {} step的权重".format(step)) fluid.io.save_inference_model( cfg.TRAIN.INF_MODEL_PATH, feeded_var_names=["image"], target_vars=[prediction], executor=exe, main_program=train_program, )
def train(): config_init_utils.init_train_parameters() logger.info("start train YOLOv3, train params:%s", str(train_parameters)) logger.info("create place, use gpu:" + str(train_parameters['use_gpu'])) place = fluid.CUDAPlace( 0) if train_parameters['use_gpu'] else fluid.CPUPlace() logger.info("build network and program") train_program = fluid.Program() startup_program = fluid.Program() feeder, reader, loss = build_program_with_feeder(train_program, startup_program, place) logger.info("build executor and init params") exe = fluid.Executor(place) exe.run(startup_program) train_fetch_list = [loss.name] load_pretrained_params(exe, train_program) stop_strategy = train_parameters['early_stop'] successive_limit = stop_strategy['successive_limit'] sample_freq = stop_strategy['sample_frequency'] min_curr_map = stop_strategy['min_curr_map'] min_loss = stop_strategy['min_loss'] stop_train = False successive_count = 0 total_batch_count = 0 valid_thresh = train_parameters['valid_thresh'] nms_thresh = train_parameters['nms_thresh'] current_best_loss = 10000000000.0 for pass_id in range(train_parameters["num_epochs"]): logger.info("current pass: {}, start read image".format(pass_id)) batch_id = 0 total_loss = 0.0 for batch_id, data in enumerate(reader()): t1 = time.time() loss = exe.run(train_program, feed=feeder.feed(data), fetch_list=train_fetch_list) period = time.time() - t1 loss = np.mean(np.array(loss)) total_loss += loss batch_id += 1 total_batch_count += 1 if batch_id % 10 == 0: # 调整日志输出的频率 logger.info("pass {}, trainbatch {}, loss {} time {}".format( pass_id, batch_id, loss, "%2.2f sec" % period)) pass_mean_loss = total_loss / batch_id logger.info( "pass {0} train result, current pass mean loss: {1}".format( pass_id, pass_mean_loss)) # 采用每训练完一轮停止办法,可以调整为更精细的保存策略 if pass_mean_loss < current_best_loss: logger.info( "temp save {} epcho train result, current best pass loss {}". format(pass_id, pass_mean_loss)) fluid.io.save_persistables( dirname=train_parameters['save_model_dir'], main_program=train_program, executor=exe) current_best_loss = pass_mean_loss logger.info("training till last epcho, end training") fluid.io.save_persistables(dirname=train_parameters['save_model_dir'], main_program=train_program, executor=exe)
def visualize(cfg, vis_file_list=None, use_gpu=False, vis_dir="visual", also_save_raw_results=False, ckpt_dir=None, log_writer=None, local_test=False, **kwargs): if vis_file_list is None: vis_file_list = cfg.DATASET.TEST_FILE_LIST dataset = LaneNetDataset(file_list=vis_file_list, mode=ModelPhase.VISUAL, shuffle=True, data_dir=cfg.DATASET.DATA_DIR) startup_prog = fluid.Program() test_prog = fluid.Program() pred, logit = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL) # Clone forward graph test_prog = test_prog.clone(for_test=True) # Get device environment place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir if ckpt_dir is not None: print('load test model:', ckpt_dir) try: fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe) except: fluid.io.load_params(exe, ckpt_dir, main_program=test_prog) save_dir = os.path.join(vis_dir, 'visual_results') makedirs(save_dir) if also_save_raw_results: raw_save_dir = os.path.join(vis_dir, 'raw_results') makedirs(raw_save_dir) fetch_list = [pred.name, logit.name] test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True) postprocessor = lanenet_postprocess.LaneNetPostProcessor() for imgs, grts, grts_instance, img_names, valid_shapes, org_imgs in test_reader: segLogits, emLogits = exe.run(program=test_prog, feed={'image': imgs}, fetch_list=fetch_list, return_numpy=True) num_imgs = segLogits.shape[0] for i in range(num_imgs): gt_image = org_imgs[i] binary_seg_image, instance_seg_image = segLogits[i].squeeze( -1), emLogits[i].transpose((1, 2, 0)) postprocess_result = postprocessor.postprocess( binary_seg_result=binary_seg_image, instance_seg_result=instance_seg_image, source_image=gt_image) pred_binary_fn = os.path.join( save_dir, to_png_fn(img_names[i], name='_pred_binary')) pred_lane_fn = os.path.join( save_dir, to_png_fn(img_names[i], name='_pred_lane')) pred_instance_fn = os.path.join( save_dir, to_png_fn(img_names[i], name='_pred_instance')) dirname = os.path.dirname(pred_binary_fn) makedirs(dirname) mask_image = postprocess_result['mask_image'] for i in range(4): instance_seg_image[:, :, i] = minmax_scale(instance_seg_image[:, :, i]) embedding_image = np.array(instance_seg_image).astype(np.uint8) plt.figure('mask_image') plt.imshow(mask_image[:, :, (2, 1, 0)]) plt.figure('src_image') plt.imshow(gt_image[:, :, (2, 1, 0)]) plt.figure('instance_image') plt.imshow(embedding_image[:, :, (2, 1, 0)]) plt.figure('binary_image') plt.imshow(binary_seg_image * 255, cmap='gray') plt.show() cv2.imwrite(pred_binary_fn, np.array(binary_seg_image * 255).astype(np.uint8)) cv2.imwrite(pred_lane_fn, postprocess_result['source_image']) cv2.imwrite(pred_instance_fn, mask_image) print(pred_lane_fn, 'saved!')
def main(args): np.random.seed(args.seed) random.seed(args.seed) dataset = InfectDataset(args) log.info("num examples: %s" % len(dataset)) train_dataset, valid_dataset, test_dataset = data_split(dataset, args) train_loader = DataLoader(train_dataset, args.batch_size, shuffle=False) test_loader = DataLoader(test_dataset, batch_size=-1, shuffle=False) log.info("Train examples: %s" % len(train_dataset)) log.info("Test examples: %s" % len(test_dataset)) if valid_dataset is not None: valid_loader = DataLoader(valid_dataset, batch_size=-1, shuffle=False) log.info("Valid examples: %s" % len(valid_dataset)) else: valid_loader = None gf = GraphFactory(args) place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() train_program = fluid.Program() startup_program = fluid.Program() train_program.random_seed = args.seed startup_program.random_seed = args.seed with fluid.program_guard(train_program, startup_program): gw = pgl.graph_wrapper.GraphWrapper( "gw", node_feat=[('norm', [None, 1], "float32")], edge_feat=[('weights', [None, 1], "float32")]) model = Model(args, gw) model.forward() infer_program = train_program.clone(for_test=True) infer_program.random_seed = args.seed with fluid.program_guard(train_program, startup_program): if args.opt == 'RMSProp': train_op = fluid.optimizer.RMSPropOptimizer(args.lr).minimize( model.loss) elif args.opt == 'ADAM': train_op = fluid.optimizer.Adam(args.lr).minimize(model.loss) exe = fluid.Executor(place) exe.run(startup_program) global_step = 0 best = 100 print_loss = [] for epoch in range(1, args.epochs + 1): for idx, x_batch in enumerate(train_loader): global_step += 1 x = np.array(x_batch[:, 0:args.n_his, :, :], dtype=np.float32) graph = gf.build_graph(x) feed = gw.to_feed(graph) feed['input'] = np.array( x_batch[:, 0:args.n_his + 1, :, :], dtype=np.float32) b_loss = exe.run(train_program, feed=feed, fetch_list=[model.loss]) print_loss.append(b_loss[0]) if global_step % 5 == 0: log.info("epoch %d | step %d | loss %.6f" % (epoch, global_step, np.mean(print_loss))) print_loss = [] if global_step % 10 == 0 and valid_loader is not None: predicts = inference(exe, infer_program, model, valid_loader, gf, gw, args, future_days=args.n_pred) result = evaluate(valid_loader, predicts) message = "valid result: " for key, value in result.items(): message += "| %s %s " % (key, value) log.info(message) if result['rmsle'] < best: predicts = inference(exe, infer_program, model, test_loader, gf, gw, args, future_days=30) save_to_submit(predicts, args) best = result['rmsle'] log.info("best valid result: %s" % best)
def setUp(self): "Call setUp() to prepare environment\n" self.test_prog = fluid.Program()
def infer_epoch(args, vocab_size, test_reader, use_cuda, i2w): """ inference function """ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) emb_size = args.emb_size batch_size = args.batch_size with fluid.scope_guard(fluid.Scope()): main_program = fluid.Program() with fluid.program_guard(main_program): values, pred = net.infer_network(vocab_size, emb_size) for epoch in range(start_index, last_index + 1): copy_program = main_program.clone() model_path = model_dir + "/pass-" + str(epoch) fluid.io.load_params(executor=exe, dirname=model_path, main_program=copy_program) if args.emb_quant: config = { 'quantize_op_types': 'lookup_table', 'lookup_table': { 'quantize_type': 'abs_max' }, } copy_program = quant_embedding(copy_program, place, config) fluid.io.save_persistables(exe, './output_quant/pass-' + str(epoch), main_program=copy_program) accum_num = 0 accum_num_sum = 0.0 t0 = time.time() step_id = 0 for data in test_reader(): step_id += 1 b_size = len([dat[0] for dat in data]) wa = np.array([dat[0] for dat in data ]).astype("int64").reshape(b_size, 1) wb = np.array([dat[1] for dat in data ]).astype("int64").reshape(b_size, 1) wc = np.array([dat[2] for dat in data ]).astype("int64").reshape(b_size, 1) label = [dat[3] for dat in data] input_word = [dat[4] for dat in data] para = exe.run(copy_program, feed={ "analogy_a": wa, "analogy_b": wb, "analogy_c": wc, "all_label": np.arange(vocab_size).reshape( vocab_size, 1).astype("int64"), }, fetch_list=[pred.name, values], return_numpy=False) pre = np.array(para[0]) val = np.array(para[1]) for ii in range(len(label)): top4 = pre[ii] accum_num_sum += 1 for idx in top4: if int(idx) in input_word[ii]: continue if int(idx) == int(label[ii][0]): accum_num += 1 break if step_id % 1 == 0: print("step:%d %d " % (step_id, accum_num)) print("epoch:%d \t acc:%.3f " % (epoch, 1.0 * accum_num / accum_num_sum))
def train(): learning_rate = cfg.learning_rate image_shape = [3, 512, 512] if cfg.enable_ce: fluid.default_startup_program().random_seed = 1000 fluid.default_main_program().random_seed = 1000 import random random.seed(0) np.random.seed(0) devices_num = get_device_num() total_batch_size = devices_num * cfg.TRAIN.im_per_batch use_random = True startup_prog = fluid.Program() train_prog = fluid.Program() with fluid.program_guard(train_prog, startup_prog): with fluid.unique_name.guard(): model = model_builder.EAST( add_conv_body_func=resnet.ResNet(), use_random=use_random) model.build_model(image_shape) losses, keys = model.loss() loss = losses[0] fetch_list = losses boundaries = cfg.lr_steps gamma = cfg.lr_gamma step_num = len(cfg.lr_steps) values = [learning_rate * (gamma**i) for i in range(step_num + 1)] lr = exponential_with_warmup_decay( learning_rate=learning_rate, boundaries=boundaries, values=values, warmup_iter=cfg.warm_up_iter, warmup_factor=cfg.warm_up_factor) optimizer = fluid.optimizer.AdamOptimizer(learning_rate=lr, regularization=fluid.regularizer.L2Decay(cfg.weight_decay)) optimizer.minimize(loss) fetch_list = fetch_list + [lr] for var in fetch_list: var.persistable = True gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_optimizer_ops = False build_strategy.fuse_elewise_add_act_ops = True build_strategy.sync_batch_norm=True exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_iteration_per_drop_scope = 1 exe.run(startup_prog) if cfg.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist) compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) dataset = icdar.ICDAR2015Dataset() data_generator = dataset.get_batch(num_workers=24, input_size=512, batch_size=14) def train_loop(): start_time = time.time() prev_start_time = start_time start = start_time train_stats = TrainingStats(cfg.log_window, keys) #for iter_id, data in enumerate(next(data_generator)): for iter_id in range(100000): data = next(data_generator) #for data in data_list: prev_start_time = start_time start_time = time.time() outs = exe.run(compiled_train_prog, fetch_list=[v.name for v in fetch_list], feed={"input_images": data[0], "input_score_maps": data[2], "input_geo_maps": data[3], "input_training_masks": data[4]}) stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])} train_stats.update(stats) logs = train_stats.log() strs = '{}, batch: {}, lr: {:.5f}, {}, time: {:.3f}'.format( now_time(), iter_id, np.mean(outs[-1]), logs, start_time - prev_start_time) if iter_id % 10 == 0: print(strs) sys.stdout.flush() if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0: save_model(exe, "model_iter{}".format(iter_id), train_prog) if (iter_id + 1) == cfg.max_iter: break end_time = time.time() total_time = end_time - start_time last_loss = np.array(outs[0]).mean() train_loop()
def main(args): args = parser.parse_args() ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exe = fluid.Executor(place) reader = task_reader.ExtractEmbeddingReader( vocab_path=args.vocab_path, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case) startup_prog = fluid.Program() data_generator = reader.data_generator( input_file=args.data_set, batch_size=args.batch_size, epoch=1, shuffle=False) total_examples = reader.get_num_examples(args.data_set) print("Device count: %d" % dev_count) print("Total num examples: %d" % total_examples) infer_program = fluid.Program() with fluid.program_guard(infer_program, startup_prog): with fluid.unique_name.guard(): pyreader, graph_vars = create_model( args, pyreader_name='reader', ernie_config=ernie_config) fluid.memory_optimize(input_program=infer_program) infer_program = infer_program.clone(for_test=True) exe.run(startup_prog) if args.init_pretraining_params: init_pretraining_params( exe, args.init_pretraining_params, main_program=startup_prog) else: raise ValueError( "WARNING: args 'init_pretraining_params' must be specified") exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = dev_count pyreader.decorate_tensor_provider(data_generator) pyreader.start() total_cls_emb = [] total_top_layer_emb = [] total_labels = [] while True: try: cls_emb, unpad_top_layer_emb = exe.run( program=infer_program, fetch_list=[ graph_vars["cls_embeddings"].name, graph_vars["top_layer_embeddings"].name ], return_numpy=False) # batch_size * embedding_size total_cls_emb.append(np.array(cls_emb)) total_top_layer_emb.append(np.array(unpad_top_layer_emb)) except fluid.core.EOFException: break total_cls_emb = np.concatenate(total_cls_emb) total_top_layer_emb = np.concatenate(total_top_layer_emb) with open(os.path.join(args.output_dir, "cls_emb.npy"), "w") as cls_emb_file: np.save(cls_emb_file, total_cls_emb) with open(os.path.join(args.output_dir, "top_layer_emb.npy"), "w") as top_layer_emb_file: np.save(top_layer_emb_file, total_top_layer_emb)