def _run_tests(): import identity, maybe, error, list, writer, reader identity.test() maybe.test() error.test() list.test() writer.test() reader.test() print("all tests passed")
def infer(infer_program, image, logits, place, exe): print("--------------------inference-------------------") test_batch_size = 1 test_reader = paddle.batch(reader.test(TEST_LIST, DATA_PATH), batch_size=test_batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image]) fetch_list = [logits.name] label_res = {} for batch_id, data in enumerate(test_reader()): data_img = data[0][0] filename = data[0][1] result = exe.run(infer_program, fetch_list=fetch_list, feed=feeder.feed([data_img])) #print(result) result = result[0][0] pred_label = np.argmax(result) print("Test-{0}-score: {1}, class {2}, name={3}".format( batch_id, result[pred_label], pred_label, filename)) label_res[filename] = pred_label sys.stdout.flush() return label_res
def infer(self, data_path, output_path, model_type=ModelType(ModelType.CLASSIFICATION_MODE), feature_dim=800, batch_size=100): logger.info("infer data...") #infer_reader = reader.test(data_path, # feature_dim+1, # model_type.is_classification()) infer_batch = paddle.batch(reader.test(data_path, feature_dim+1, model_type.is_classification()), batch_size=batch_size) logger.warning('write predictions to %s' % output_path) output_f = open(output_path, 'w') batch = [] #for item in infer_reader(): # batch.append([item[0]]) for id, batch in enumerate(infer_batch()): res = self.inferer.infer(input=batch) predictions = [' '.join(map(str, x)) for x in res] assert len(batch) == len( predictions), "predict error, %d inputs, but %d predictions" % ( len(batch), len(predictions)) output_f.write('\n'.join(map(str, predictions)) + '\n') batch = []
def infer(): # parameters from arguments use_gpu = False class_dim = 5 model_name = "ResNet50" pretrained_model = "./output_indoor/ResNet50/61" with_memory_optimization = True image_shape = [3, 224, 224] # assert model_name in model_list, "{} is not in lists: {}".format(args.model, # model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') # model definition model = mo.__dict__[model_name]() if model_name is "GoogleNet": out, _, _ = model.net(input=image, class_dim=class_dim) else: out = model.net(input=image, class_dim=class_dim) test_program = fluid.default_main_program().clone(for_test=True) if with_memory_optimization: fluid.memory_optimize(fluid.default_main_program()) place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) test_batch_size = 1 test_reader = paddle.batch(reader.test(), batch_size=test_batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image]) fetch_list = [out.name] TOPK = 1 for batch_id, data in enumerate(test_reader()): result = exe.run(test_program, fetch_list=fetch_list, feed=feeder.feed(data)) result = result[0][0] pred_label = np.argsort(result)[::-1][:TOPK] #print("Test-{0}-score: {1}, class {2}" # .format(batch_id, result[pred_label], pred_label)) result = pred_label sys.stdout.flush() return result
def infer(args): # parameters from arguments class_dim = args.class_dim model_name = args.model save_inference = args.save_inference pretrained_model = args.pretrained_model image_shape = [int(m) for m in args.image_shape.split(",")] model_list = [m for m in dir(models) if "__" not in m] assert model_name in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') # model definition model = models.__dict__[model_name]() if model_name == "GoogleNet": out, _, _ = model.net(input=image, class_dim=class_dim) else: out = model.net(input=image, class_dim=class_dim) test_program = fluid.default_main_program().clone(for_test=True) fetch_list = [out.name] place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) fluid.io.load_persistables(exe, pretrained_model) if save_inference: fluid.io.save_inference_model(dirname=model_name, feeded_var_names=['image'], main_program=test_program, target_vars=out, executor=exe, model_filename='model', params_filename='params') print("model: ", model_name, " is already saved") exit(0) test_batch_size = 1 img_size = image_shape[1] test_reader = paddle.batch(reader.test(args, img_size), batch_size=test_batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image]) TOPK = 1 for batch_id, data in enumerate(test_reader()): result = exe.run(test_program, fetch_list=fetch_list, feed=feeder.feed(data)) result = result[0][0] pred_label = np.argsort(result)[::-1][:TOPK] print("Test-{0}-score: {1}, class {2}".format(batch_id, result[pred_label], pred_label)) sys.stdout.flush()
def eval(args): # parameters from arguments model_name = args.model pretrained_model = args.pretrained_model with_memory_optimization = args.with_mem_opt image_shape = [int(m) for m in args.image_shape.split(",")] assert model_name in model_list, "{} is not in lists: {}".format(args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[model_name]() out = model.net(input=image, embedding_size=args.embedding_size) test_program = fluid.default_main_program().clone(for_test=True) if with_memory_optimization: fluid.memory_optimize(fluid.default_main_program()) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) test_reader = paddle.batch(reader.test(args), batch_size=args.batch_size, drop_last=False) feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) fetch_list = [out.name] f, l = [], [] for batch_id, data in enumerate(test_reader()): t1 = time.time() [feas] = exe.run(test_program, fetch_list=fetch_list, feed=feeder.feed(data)) label = np.asarray([x[1] for x in data]) f.append(feas) l.append(label) t2 = time.time() period = t2 - t1 if batch_id % 20 == 0: print("[%s] testbatch %d, time %2.2f sec" % \ (fmt_time(), batch_id, period)) f = np.vstack(f) l = np.hstack(l) recall = recall_topk(f, l, k=1) print("[%s] End test %d, test_recall %.5f" % (fmt_time(), len(f), recall)) sys.stdout.flush()
def eval(args, data_args, configs, val_file_list): init_model = args.init_model use_gpu = args.use_gpu act_quant_type = args.act_quant_type model_save_dir = args.model_save_dir batch_size = configs['batch_size'] batch_size_per_device = batch_size startup_prog = fluid.Program() test_prog = fluid.Program() test_py_reader, map_eval, nmsed_out, image = build_program( main_prog=test_prog, startup_prog=startup_prog, train_params=configs, is_train=False) test_prog = test_prog.clone(for_test=True) transpiler = fluid.contrib.QuantizeTranspiler( weight_bits=8, activation_bits=8, activation_quantize_type=act_quant_type, weight_quantize_type='abs_max') transpiler.training_transpile(test_prog, startup_prog) place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) def if_exist(var): return os.path.exists(os.path.join(init_model, var.name)) fluid.io.load_vars(exe, init_model, main_program=test_prog, predicate=if_exist) # freeze after load parameters transpiler.freeze_program(test_prog, place) test_reader = reader.test(data_args, val_file_list, batch_size) test_py_reader.decorate_paddle_reader(test_reader) test_map = test(exe, test_prog, map_eval, test_py_reader) print("Test model {0}, map {1}".format(init_model, test_map)) # convert model to 8-bit before saving, but now Paddle can't load # the 8-bit model to do inference. # transpiler.convert_to_int8(test_prog, place) fluid.io.save_inference_model(model_save_dir, [image.name], [nmsed_out], exe, test_prog)
def infer(args): # parameters from arguments model_name = args.model pretrained_model = args.pretrained_model image_shape = [int(m) for m in args.image_shape.split(",")] assert model_name in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.data(name='image', shape=[None] + image_shape, dtype='float32') infer_loader = fluid.io.DataLoader.from_generator(feed_list=[image], capacity=64, use_double_buffer=True, iterable=True) # model definition model = models.__dict__[model_name]() out = model.net(input=image, embedding_size=args.embedding_size) test_program = fluid.default_main_program().clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.load(model_path=pretrained_model, program=test_program, executor=exe) infer_loader.set_sample_generator(reader.test(args), batch_size=args.batch_size, drop_last=False, places=place) fetch_list = [out.name] for batch_id, data in enumerate(infer_loader()): result = exe.run(test_program, fetch_list=fetch_list, feed=data) result = result[0][0].reshape(-1) print("Test-{0}-feature: {1}".format(batch_id, result[:5])) sys.stdout.flush()
def prepare_reader(epoch_id, train_py_reader, train_bs, val_bs, trn_dir, img_dim, min_scale, rect_val, args): train_reader = reader.train(traindir="%s/%strain" % (args.data_dir, trn_dir), sz=img_dim, min_scale=min_scale, shuffle_seed=epoch_id + 1) train_py_reader.decorate_paddle_reader( paddle.batch(train_reader, batch_size=train_bs)) test_reader = reader.test(valdir="%s/%svalidation" % (args.data_dir, trn_dir), bs=val_bs * DEVICE_NUM, sz=img_dim, rect_val=rect_val) test_batched_reader = paddle.batch(test_reader, batch_size=val_bs * DEVICE_NUM) return test_batched_reader
def eval(): place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) class_nums = cfg.class_num model = model_builder.RRPN(add_conv_body_func=resnet.ResNet(), add_roi_box_head_func=resnet.ResNetC5(), use_pyreader=False, mode='val') startup_prog = fluid.Program() infer_prog = fluid.Program() with fluid.program_guard(infer_prog, startup_prog): with fluid.unique_name.guard(): model.build_model() pred_boxes = model.eval_bbox_out() infer_prog = infer_prog.clone(True) exe.run(startup_prog) fluid.load(infer_prog, cfg.pretrained_model, exe) test_reader = reader.test(1) data_loader = model.data_loader data_loader.set_sample_list_generator(test_reader, places=place) fetch_list = [pred_boxes] res_list = [] keys = [ 'bbox', 'gt_box', 'gt_class', 'is_crowed', 'im_info', 'im_id', 'is_difficult' ] for i, data in enumerate(data_loader()): result = exe.run(infer_prog, fetch_list=[v.name for v in fetch_list], feed=data, return_numpy=False) pred_boxes_v = result[0] nmsed_out = pred_boxes_v outs = np.array(nmsed_out) res = get_key_dict(outs, data[0], keys) res_list.append(res) if i % 50 == 0: logger.info('test_iter {}'.format(i)) icdar_eval(res_list)
def infer(args, config): model_dir = args.model_dir pred_dir = args.pred_dir if not os.path.exists(model_dir): raise ValueError("The model path [%s] does not exist." % (model_dir)) if args.infer: image_path = args.image_path image = Image.open(image_path) if image.mode == 'L': image = image.convert('RGB') shrink, max_shrink = get_shrink(image.size[1], image.size[0]) det0 = detect_face(image, shrink) if args.use_gpu: det1 = flip_test(image, shrink) [det2, det3] = multi_scale_test(image, max_shrink) det4 = multi_scale_test_pyramid(image, max_shrink) det = np.row_stack((det0, det1, det2, det3, det4)) dets = bbox_vote(det) else: # when infer on cpu, use a simple case dets = det0 keep_index = np.where(dets[:, 4] >= args.confs_threshold)[0] dets = dets[keep_index, :] draw_bboxes(image_path, dets[:, 0:4]) else: test_reader = reader.test(config, args.file_list) for image, image_path in test_reader(): shrink, max_shrink = get_shrink(image.size[1], image.size[0]) det0 = detect_face(image, shrink) det1 = flip_test(image, shrink) [det2, det3] = multi_scale_test(image, max_shrink) det4 = multi_scale_test_pyramid(image, max_shrink) det = np.row_stack((det0, det1, det2, det3, det4)) dets = bbox_vote(det) save_widerface_bboxes(image_path, dets, pred_dir) print("Finish evaluation.")
def event_handler(event): if isinstance(event, paddle.event.EndIteration): if event.batch_id % 100 == 0: print "Pass %d, Batch %d, Cost %f, %s" % ( event.pass_id, event.batch_id, event.cost, event.metrics) if isinstance(event, paddle.event.EndPass): result = trainer.test( reader=paddle.batch(reader.test(data_path, feature_dim+1, args.model_type.is_classification()), batch_size=batch_size), feeding=feeding) print "Test %d, Cost %f, %s" % (event.pass_id, result.cost, result.metrics) model_desc = "{type}".format( type=str(args.model_type)) with open("%sdnn_%s_pass_%05d.tar" % (args.model_output_prefix, model_desc, event.pass_id), "w") as f: parameters.to_tar(f)
def eval(args, data_args, test_list, batch_size, model_dir=None): startup_prog = fluid.Program() test_prog = fluid.Program() test_py_reader, map_eval = build_program(main_prog=test_prog, startup_prog=startup_prog, args=args, data_args=data_args) test_prog = test_prog.clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) def if_exist(var): return os.path.exists(os.path.join(model_dir, var.name)) fluid.io.load_vars(exe, model_dir, main_program=test_prog, predicate=if_exist) test_reader = reader.test(data_args, test_list, batch_size=batch_size) test_py_reader.decorate_paddle_reader(test_reader) _, accum_map = map_eval.get_map_var() map_eval.reset(exe) test_py_reader.start() try: batch_id = 0 while True: test_map, = exe.run(test_prog, fetch_list=[accum_map]) if batch_id % 10 == 0: print("Batch {0}, map {1}".format(batch_id, test_map)) batch_id += 1 except (fluid.core.EOFException, StopIteration): test_py_reader.reset() print("Test model {0}, map {1}".format(model_dir, test_map))
def prepare_reader(epoch_id, train_py_reader, train_bs, val_bs, trn_dir, img_dim, min_scale, rect_val, args=None): num_trainers = args.dist_env["num_trainers"] if args.update_method != 'local' else 1 trainer_id = args.dist_env["trainer_id"] if args.update_method != 'local' else 0 train_reader = reader.train( traindir="%s/%strain" % (args.data_dir, trn_dir), sz=img_dim, min_scale=min_scale, shuffle_seed=epoch_id + 1, rank_id=trainer_id, size=num_trainers) train_py_reader.decorate_paddle_reader( paddle.batch( train_reader, batch_size=train_bs)) test_reader = reader.test( valdir="%s/%svalidation" % (args.data_dir, trn_dir), bs=val_bs * DEVICE_NUM, sz=img_dim, rect_val=rect_val) test_batched_reader = paddle.batch( test_reader, batch_size=val_bs * DEVICE_NUM) return test_batched_reader
def compute_score(model_dir, data_dir, test_list='annotations/instances_val2017.json', batch_size=32, height=300, width=300, num_classes=81, mean_value=[127.5, 127.5, 127.5]): """ compute score, mAP, flops of a model Args: model_dir (string): directory of model data_dir (string): directory of coco dataset, like '/your/path/to/coco', '/work/datasets/coco' Returns: tuple: score, mAP, flops. """ place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) [infer_program, feeded_var_names, target_var] = fluid.io.load_inference_model(dirname=model_dir, executor=exe) image_shape = [3, height, width] data_args = reader.Settings( dataset='coco2017', data_dir=data_dir, resize_h=height, resize_w=width, mean_value=mean_value, apply_distort=False, apply_expand=False, ap_version='cocoMAP') image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') gt_box = fluid.layers.data( name='gt_box', shape=[4], dtype='float32', lod_level=1) gt_label = fluid.layers.data( name='gt_label', shape=[1], dtype='int32', lod_level=1) gt_iscrowd = fluid.layers.data( name='gt_iscrowd', shape=[1], dtype='int32', lod_level=1) gt_image_info = fluid.layers.data( name='gt_image_id', shape=[3], dtype='int32') test_reader = reader.test(data_args, test_list, batch_size) feeder = fluid.DataFeeder( place=place, feed_list=[image, gt_box, gt_label, gt_iscrowd, gt_image_info]) mAP = use_coco_api_compute_mAP(data_args, test_list, num_classes, test_reader, exe, infer_program, feeded_var_names, feeder, target_var, batch_size) total_flops_params, is_quantize = summary(infer_program) MAdds = np.sum(total_flops_params['flops']) / 2000000.0 if is_quantize: MAdds /= 2.0 print('mAP:', mAP) print('MAdds:', MAdds) if MAdds < 160.0: MAdds = 160.0 if MAdds > 1300.0: score = 0.0 else: score = mAP * 100 - (5.1249 * np.log(MAdds) - 14.499) print('score:', score) return score, mAP, MAdds
def train(): learning_rate = cfg.learning_rate if cfg.enable_ce: fluid.default_startup_program().random_seed = 1000 fluid.default_main_program().random_seed = 1000 import random random.seed(0) np.random.seed(0) devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" devices_num = len(devices.split(",")) total_batch_size = devices_num * cfg.TRAIN.im_per_batch use_random = True if cfg.enable_ce: use_random = False boundaries = cfg.lr_steps gamma = cfg.lr_gamma step_num = len(cfg.lr_steps) values = [learning_rate * (gamma**i) for i in range(step_num + 1)] if cfg.use_data_parallel: strategy = fluid.dygraph.parallel.prepare_context() retinanet = RetinaNet("retinanet") optimizer = optimizer_setting(learning_rate, boundaries, values, devices_num) if cfg.use_data_parallel: retinanet = fluid.dygraph.parallel.DataParallel(retinanet, strategy) if cfg.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) pretrained, optimizer_load = fluid.dygraph.load_persistables(cfg.pretrained_model) retinanet.load_dict(pretrained) shuffle = True if cfg.enable_ce: shuffle = False if cfg.use_data_parallel: train_reader = reader.train( batch_size=cfg.TRAIN.im_per_batch, total_batch_size=total_batch_size, shuffle=shuffle) train_reader = fluid.contrib.reader.distributed_batch_reader( train_reader) else: train_reader = reader.train( batch_size=total_batch_size, shuffle=shuffle) test_reader = reader.test(total_batch_size) def save_model(model_state, postfix, optimizer=None): model_path = os.path.join(cfg.model_save_dir, postfix) #if os.path.isdir(model_path): # shutil.rmtree(model_path) fluid.dygraph.save_persistables(model_state, model_path, optimizer) def train_loop(): keys = ['loss', 'loss_cls', 'loss_bbox'] train_stats = TrainingStats(cfg.log_window, keys) retinanet.train() for iter_id, data in enumerate(train_reader()): start_time = time.time() gt_max_num = 0 batch_size = len(data) x = data[0] for x in data: #print(x[1].shape[0]) if x[1].shape[0] > gt_max_num: gt_max_num = x[1].shape[0] image_data = np.array( [x[0] for x in data]).astype('float32') if cfg.enable_ce: print('image: {} {}'.format(abs(image_data).sum(), image_data.shape)) gt_box_data = np.zeros([batch_size, gt_max_num, 4]) gt_label_data = np.zeros([batch_size, gt_max_num]) is_crowd_data = np.ones([batch_size, gt_max_num]) for batch_id, x in enumerate(data): gt_num = x[1].shape[0] gt_box_data[batch_id, 0:gt_num, :] = x[1] gt_label_data[batch_id, 0:gt_num] = x[2] is_crowd_data[batch_id, 0:gt_num] = x[3] gt_box_data = gt_box_data.astype('float32') gt_label_data = gt_label_data.astype('int32') is_crowd_data = is_crowd_data.astype('int32') im_info_data = np.array( [x[4] for x in data]).astype('float32') im_id_data = np.array( [x[5] for x in data]).astype('int32') outputs= retinanet('train', image_data, im_info_data, \ gt_box_data, gt_label_data, is_crowd_data) loss_cls = outputs['loss_cls'] loss_bbox = outputs['loss_bbox'] loss = outputs['loss'] score_pred = outputs['score_pred'] loc_pred = outputs['loc_pred'] cls_pred_list = outputs['cls_score_list'] bbox_pred_list = outputs['bbox_pred_list'] cls_score = outputs['cls_score'] bbox_pred = outputs['bbox_pred'] loss_cls_data = loss_cls.numpy() loss_bbox_data = loss_bbox.numpy() loss_data = loss.numpy() if cfg.use_data_parallel: loss = retinanet.scale_loss(loss) loss.backward() retinanet.apply_collective_grads() else: loss.backward() optimizer.minimize(loss) if cfg.enable_ce: print('score_pred grad: {} {}'.format(abs(score_pred.gradient()).sum(), score_pred.gradient().shape)) print('loc_pred grad: {} {}'.format(abs(loc_pred.gradient()).sum(), loc_pred.gradient().shape)) for var in cls_pred_list: print('cls grad reshape: {} {}'.format(abs(var.gradient()).sum(), var.gradient().shape)) for var in bbox_pred_list: print('bbox grad reshape: {} {}'.format(abs(var.gradient()).sum(), var.gradient().shape)) for var in cls_score: print('cls grad original: {} {}'.format(abs(var.gradient()).sum(), var.gradient().shape)) for var in bbox_pred: print('bbox grad original: {} {}'.format(abs(var.gradient()).sum(), var.gradient().shape)) dy_grad_value = {} for param in retinanet.parameters(): if param.name == 'retnet_cls_conv_n3_fpn3/Conv2D_0.retnet_cls_conv_n3_fpn3_w' or \ param.name == 'retnet_cls_conv_n2_fpn3/Conv2D_0.retnet_cls_conv_n2_fpn3_w' or \ param.name == 'retnet_cls_conv_n1_fpn3/Conv2D_0.retnet_cls_conv_n1_fpn3_w' or \ param.name == 'retnet_cls_conv_n0_fpn3/Conv2D_0.retnet_cls_conv_n0_fpn3_w' or \ param.name == 'retnet_cls_pred_fpn3/Conv2D_0.retnet_cls_pred_fpn3_w' or \ param.name == 'conv1/Conv2D_0.conv1_weights': np_array = np.array(param._ivar._grad_ivar().value() .get_tensor()) dy_grad_value[param.name + core.grad_var_suffix( )] = [abs(np_array).sum(), np_array.shape] np_array = np.array(param._ivar.value().get_tensor()) dy_grad_value[param.name] = [abs(np_array).sum(), np_array.shape] for key, value in dy_grad_value.items(): print('{key}: {value}'.format(key = key, value = value)) retinanet.clear_gradients() outs = [loss_data, loss_cls_data, loss_bbox_data] stats = {k: v.mean() for k, v in zip(keys, outs)} train_stats.update(stats) logs = train_stats.log() lr = optimizer._global_learning_rate().numpy() end_time = time.time() strs = '{}, iter: {}, lr: {} {}, time: {:.3f}'.format( now_time(), iter_id, lr, logs, end_time - start_time) print(strs) sys.stdout.flush() if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0: save_model(retinanet.state_dict(), "model_iter{}".format(iter_id), optimizer) if (iter_id + 1) == cfg.max_iter: break train_loop() save_model(retinanet.state_dict(), 'model_final', optimizer)
def train_async(args): # parameters from arguments logging.debug('enter train') model_name = args.model checkpoint = args.checkpoint pretrained_model = args.pretrained_model model_save_dir = args.model_save_dir if not os.path.exists(model_save_dir): os.mkdir(model_save_dir) startup_prog = fluid.Program() train_prog = fluid.Program() tmp_prog = fluid.Program() train_loader, train_cost, global_lr, train_feas, train_label = build_program( is_train=True, main_prog=train_prog, startup_prog=startup_prog, args=args) test_loader, test_feas = build_program(is_train=False, main_prog=tmp_prog, startup_prog=startup_prog, args=args) test_prog = tmp_prog.clone(for_test=True) train_fetch_list = [ global_lr.name, train_cost.name, train_feas.name, train_label.name ] test_fetch_list = [test_feas.name] place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) if num_trainers <= 1 and args.use_gpu: places = fluid.framework.cuda_places() else: places = place exe.run(startup_prog) if checkpoint is not None: fluid.load(program=train_prog, model_path=checkpoint, executor=exe) if pretrained_model: load_params(exe, train_prog, pretrained_model) if args.use_gpu: devicenum = get_gpu_num() else: devicenum = int(os.environ.get('CPU_NUM', 1)) assert (args.train_batch_size % devicenum) == 0 train_batch_size = args.train_batch_size / devicenum test_batch_size = args.test_batch_size train_loader.set_sample_generator(reader.train(args), batch_size=train_batch_size, drop_last=True, places=places) test_loader.set_sample_generator(reader.test(args), batch_size=test_batch_size, drop_last=False, places=place) train_exe = fluid.ParallelExecutor(main_program=train_prog, use_cuda=args.use_gpu, loss_name=train_cost.name) totalruntime = 0 iter_no = 0 train_info = [0, 0, 0] while iter_no <= args.total_iter_num: for train_batch in train_loader(): t1 = time.time() lr, loss, feas, label = train_exe.run(feed=train_batch, fetch_list=train_fetch_list) t2 = time.time() period = t2 - t1 lr = np.mean(np.array(lr)) train_info[0] += np.mean(np.array(loss)) train_info[1] += recall_topk(feas, label, k=1) train_info[2] += 1 if iter_no % args.display_iter_step == 0: avgruntime = totalruntime / args.display_iter_step avg_loss = train_info[0] / train_info[2] avg_recall = train_info[1] / train_info[2] print("[%s] trainbatch %d, lr %.6f, loss %.6f, "\ "recall %.4f, time %2.2f sec" % \ (fmt_time(), iter_no, lr, avg_loss, avg_recall, avgruntime)) sys.stdout.flush() totalruntime = 0 if iter_no % 1000 == 0: train_info = [0, 0, 0] totalruntime += period if iter_no % args.test_iter_step == 0 and iter_no != 0: f, l = [], [] for batch_id, test_batch in enumerate(test_loader()): t1 = time.time() [feas] = exe.run(test_prog, feed=test_batch, fetch_list=test_fetch_list) label = np.asarray(test_batch[0]['label']) label = np.squeeze(label) f.append(feas) l.append(label) t2 = time.time() period = t2 - t1 if batch_id % 20 == 0: print("[%s] testbatch %d, time %2.2f sec" % \ (fmt_time(), batch_id, period)) f = np.vstack(f) l = np.hstack(l) recall = recall_topk(f, l, k=1) print("[%s] test_img_num %d, trainbatch %d, test_recall %.5f" % \ (fmt_time(), len(f), iter_no, recall)) sys.stdout.flush() if iter_no % args.save_iter_step == 0 and iter_no != 0: model_path = os.path.join(model_save_dir, model_name, str(iter_no)) fluid.save(program=train_prog, model_path=model_path) iter_no += 1
def train_async(args): # parameters from arguments logging.debug('enter train') model_name = args.model checkpoint = args.checkpoint pretrained_model = args.pretrained_model model_save_dir = args.model_save_dir startup_prog = fluid.Program() train_prog = fluid.Program() tmp_prog = fluid.Program() if args.enable_ce: assert args.model == "ResNet50" assert args.loss_name == "arcmargin" np.random.seed(0) startup_prog.random_seed = 1000 train_prog.random_seed = 1000 tmp_prog.random_seed = 1000 train_py_reader, train_cost, train_acc1, train_acc5, global_lr = build_program( is_train=True, main_prog=train_prog, startup_prog=startup_prog, args=args) test_feas, image, label = build_program(is_train=False, main_prog=tmp_prog, startup_prog=startup_prog, args=args) test_prog = tmp_prog.clone(for_test=True) train_fetch_list = [ global_lr.name, train_cost.name, train_acc1.name, train_acc5.name ] test_fetch_list = [test_feas.name] place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) logging.debug('after run startup program') if checkpoint is not None: fluid.io.load_persistables(exe, checkpoint, main_program=train_prog) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, main_program=train_prog, predicate=if_exist) if args.use_gpu: devicenum = get_gpu_num() else: devicenum = int(os.environ.get('CPU_NUM', 1)) assert (args.train_batch_size % devicenum) == 0 train_batch_size = args.train_batch_size // devicenum test_batch_size = args.test_batch_size train_reader = paddle.batch(reader.train(args), batch_size=train_batch_size, drop_last=True) test_reader = paddle.batch(reader.test(args), batch_size=test_batch_size, drop_last=False) test_feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) train_py_reader.decorate_paddle_reader(train_reader) train_exe = fluid.ParallelExecutor(main_program=train_prog, use_cuda=args.use_gpu, loss_name=train_cost.name) totalruntime = 0 train_py_reader.start() iter_no = 0 train_info = [0, 0, 0, 0] while iter_no <= args.total_iter_num: t1 = time.time() lr, loss, acc1, acc5 = train_exe.run(fetch_list=train_fetch_list) t2 = time.time() period = t2 - t1 lr = np.mean(np.array(lr)) train_info[0] += np.mean(np.array(loss)) train_info[1] += np.mean(np.array(acc1)) train_info[2] += np.mean(np.array(acc5)) train_info[3] += 1 if iter_no % args.display_iter_step == 0: avgruntime = totalruntime / args.display_iter_step avg_loss = train_info[0] / train_info[3] avg_acc1 = train_info[1] / train_info[3] avg_acc5 = train_info[2] / train_info[3] print("[%s] trainbatch %d, lr %.6f, loss %.6f, "\ "acc1 %.4f, acc5 %.4f, time %2.2f sec" % \ (fmt_time(), iter_no, lr, avg_loss, avg_acc1, avg_acc5, avgruntime)) sys.stdout.flush() totalruntime = 0 if iter_no % 1000 == 0: train_info = [0, 0, 0, 0] totalruntime += period if iter_no % args.test_iter_step == 0 and iter_no != 0: f, l = [], [] for batch_id, data in enumerate(test_reader()): t1 = time.time() [feas] = exe.run(test_prog, fetch_list=test_fetch_list, feed=test_feeder.feed(data)) label = np.asarray([x[1] for x in data]) f.append(feas) l.append(label) t2 = time.time() period = t2 - t1 if batch_id % 20 == 0: print("[%s] testbatch %d, time %2.2f sec" % \ (fmt_time(), batch_id, period)) f = np.vstack(f) l = np.hstack(l) recall = recall_topk(f, l, k=1) print("[%s] test_img_num %d, trainbatch %d, test_recall %.5f" % \ (fmt_time(), len(f), iter_no, recall)) sys.stdout.flush() if iter_no % args.save_iter_step == 0 and iter_no != 0: model_path = os.path.join(model_save_dir + '/' + model_name, str(iter_no)) if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_persistables(exe, model_path, main_program=train_prog) iter_no += 1 # This is for continuous evaluation only if args.enable_ce: # Use the mean cost/acc for training print("kpis\ttrain_cost\t{}".format(avg_loss)) print("kpis\ttest_recall\t{}".format(recall))
def eval(): # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) if '2014' in cfg.dataset: test_list = 'annotations/instances_val2014.json' elif '2017' in cfg.dataset: test_list = 'annotations/instances_val2017.json' if cfg.debug: if not os.path.exists('output'): os.mkdir('output') place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() with fluid.dygraph.guard(place): model = YOLOv3(3,is_train=False) # yapf: disable if cfg.weights: restore, _ = fluid.load_dygraph(cfg.weights) model.set_dict(restore) model.eval() input_size = cfg.input_size # batch_size for test must be 1 test_reader = reader.test(input_size, 1) label_names, label_ids = reader.get_label_infos() if cfg.debug: print("Load in labels {} with ids {}".format(label_names, label_ids)) def get_pred_result(boxes, scores, labels, im_id): result = [] for box, score, label in zip(boxes, scores, labels): x1, y1, x2, y2 = box w = x2 - x1 + 1 h = y2 - y1 + 1 bbox = [x1, y1, w, h] res = { 'image_id': int(im_id), 'category_id': label_ids[int(label)], 'bbox': list(map(float, bbox)), 'score': float(score) } result.append(res) return result dts_res = [] total_time = 0 for iter_id, data in enumerate(test_reader()): start_time = time.time() img_data = np.array([x[0] for x in data]).astype('float32') img = to_variable(img_data) im_id_data = np.array([x[1] for x in data]).astype('int32') im_id = to_variable(im_id_data) im_shape_data = np.array([x[2] for x in data]).astype('int32') im_shape = to_variable(im_shape_data) batch_outputs = model(img, None, None, None, im_id, im_shape) nmsed_boxes = batch_outputs.numpy() if nmsed_boxes.shape[1] != 6: continue im_id = data[0][1] nmsed_box=nmsed_boxes labels = nmsed_box[:, 0] scores = nmsed_box[:, 1] boxes = nmsed_box[:, 2:6] dts_res += get_pred_result(boxes, scores, labels, im_id) end_time = time.time() print("batch id: {}, time: {}".format(iter_id, end_time - start_time)) total_time += end_time - start_time with open("yolov3_result.json", 'w') as outfile: json.dump(dts_res, outfile) print("start evaluate detection result with coco api") coco = COCO(os.path.join(cfg.data_dir, test_list)) cocoDt = coco.loadRes("yolov3_result.json") cocoEval = COCOeval(coco, cocoDt, 'bbox') cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() print("evaluate done.") print("Time per batch: {}".format(total_time / iter_id))
def train(args, train_file_list, val_file_list, data_args, learning_rate, batch_size, num_passes, model_save_dir, pretrained_model=None): image_shape = [3, data_args.resize_h, data_args.resize_w] num_classes = 2 devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" devices_num = len(devices.split(",")) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') gt_box = fluid.layers.data(name='gt_box', shape=[4], dtype='float32', lod_level=1) gt_label = fluid.layers.data(name='gt_label', shape=[1], dtype='int32', lod_level=1) difficult = fluid.layers.data(name='gt_difficult', shape=[1], dtype='int32', lod_level=1) locs, confs, box, box_var = mobile_net(num_classes, image, image_shape) nmsed_out = fluid.layers.detection_output(locs, confs, box, box_var, nms_threshold=args.nms_threshold) loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box, box_var) loss = fluid.layers.reduce_sum(loss) test_program = fluid.default_main_program().clone(for_test=True) with fluid.program_guard(test_program): map_eval = fluid.evaluator.DetectionMAP(nmsed_out, gt_label, gt_box, difficult, num_classes, overlap_threshold=0.5, evaluate_difficult=False, ap_version=args.ap_version) epocs = 4800 / batch_size boundaries = [epocs * 40, epocs * 60, epocs * 80, epocs * 100] values = [ learning_rate, learning_rate * 0.5, learning_rate * 0.25, learning_rate * 0.1, learning_rate * 0.01 ] optimizer = fluid.optimizer.RMSProp( learning_rate=fluid.layers.piecewise_decay(boundaries, values), regularization=fluid.regularizer.L2Decay(0.00005), ) optimizer.minimize(loss) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) if args.parallel: train_exe = fluid.ParallelExecutor(use_cuda=args.use_gpu, loss_name=loss.name) train_reader = paddle.batch(reader.train(data_args, train_file_list), batch_size=batch_size) test_reader = paddle.batch(reader.test(data_args, val_file_list), batch_size=batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image, gt_box, gt_label, difficult]) def save_model(postfix): model_path = os.path.join(model_save_dir, postfix) if os.path.isdir(model_path): shutil.rmtree(model_path) print('save models to %s' % (model_path)) fluid.io.save_persistables(exe, model_path) best_map = 0. def test(pass_id, best_map): _, accum_map = map_eval.get_map_var() map_eval.reset(exe) for batch_id, data in enumerate(test_reader()): test_map, = exe.run(test_program, feed=feeder.feed(data), fetch_list=[accum_map]) if batch_id % 20 == 0: print("Batch {0}, map {1}".format(batch_id, test_map)) if test_map[0] > best_map: best_map = test_map[0] save_model('best_model') print("Pass {0}, test map {1}".format(pass_id, test_map)) return best_map train_num = 0 total_train_time = 0.0 for pass_id in range(num_passes): start_time = time.time() prev_start_time = start_time # end_time = 0 every_pass_loss = [] iter = 0 pass_duration = 0.0 for batch_id, data in enumerate(train_reader()): prev_start_time = start_time start_time = time.time() if args.for_model_ce and iter == args.iterations: break if len(data) < (devices_num * 2): print("There are too few data to train on all devices.") continue if args.parallel: loss_v, = train_exe.run(fetch_list=[loss.name], feed=feeder.feed(data)) else: loss_v, = exe.run(fluid.default_main_program(), feed=feeder.feed(data), fetch_list=[loss]) # end_time = time.time() loss_v = np.mean(np.array(loss_v)) if batch_id % 20 == 0: print("Pass {0}, batch {1}, loss {2}, time {3}".format( pass_id, batch_id, loss_v, start_time - prev_start_time)) if args.for_model_ce and iter >= args.skip_batch_num or pass_id != 0: batch_duration = time.time() - start_time pass_duration += batch_duration train_num += len(data) every_pass_loss.append(loss_v) iter += 1 total_train_time += pass_duration if args.for_model_ce and pass_id == num_passes - 1: examples_per_sec = train_num / total_train_time cost = np.mean(every_pass_loss) with open("train_speed_factor.txt", 'w') as f: f.write('{:f}\n'.format(examples_per_sec)) with open("train_cost_factor.txt", 'a+') as f: f.write('{:f}\n'.format(cost)) best_map = test(pass_id, best_map) if pass_id % 10 == 0 or pass_id == num_passes - 1: save_model(str(pass_id)) print("Best test map {0}".format(best_map))
def parallel_do(args, train_file_list, val_file_list, data_args, learning_rate, batch_size, num_passes, model_save_dir, pretrained_model=None): image_shape = [3, data_args.resize_h, data_args.resize_w] if data_args.dataset == 'coco': num_classes = 81 elif data_args.dataset == 'pascalvoc': num_classes = 21 image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') gt_box = fluid.layers.data(name='gt_box', shape=[4], dtype='float32', lod_level=1) gt_label = fluid.layers.data(name='gt_label', shape=[1], dtype='int32', lod_level=1) difficult = fluid.layers.data(name='gt_difficult', shape=[1], dtype='int32', lod_level=1) if args.parallel: places = fluid.layers.get_places() pd = fluid.layers.ParallelDo(places, use_nccl=args.use_nccl) with pd.do(): image_ = pd.read_input(image) gt_box_ = pd.read_input(gt_box) gt_label_ = pd.read_input(gt_label) difficult_ = pd.read_input(difficult) locs, confs, box, box_var = mobile_net(num_classes, image_, image_shape) loss = fluid.layers.ssd_loss(locs, confs, gt_box_, gt_label_, box, box_var) nmsed_out = fluid.layers.detection_output(locs, confs, box, box_var, nms_threshold=0.45) loss = fluid.layers.reduce_sum(loss) pd.write_output(loss) pd.write_output(nmsed_out) loss, nmsed_out = pd() loss = fluid.layers.mean(loss) else: locs, confs, box, box_var = mobile_net(num_classes, image, image_shape) nmsed_out = fluid.layers.detection_output(locs, confs, box, box_var, nms_threshold=0.45) loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box, box_var) loss = fluid.layers.reduce_sum(loss) test_program = fluid.default_main_program().clone(for_test=True) with fluid.program_guard(test_program): map_eval = fluid.evaluator.DetectionMAP(nmsed_out, gt_label, gt_box, difficult, num_classes, overlap_threshold=0.5, evaluate_difficult=False, ap_version=args.ap_version) if data_args.dataset == 'coco': # learning rate decay in 12, 19 pass, respectively if '2014' in train_file_list: boundaries = [82783 / batch_size * 12, 82783 / batch_size * 19] elif '2017' in train_file_list: boundaries = [118287 / batch_size * 12, 118287 / batch_size * 19] elif data_args.dataset == 'pascalvoc': boundaries = [40000, 60000] values = [learning_rate, learning_rate * 0.5, learning_rate * 0.25] optimizer = fluid.optimizer.RMSProp( learning_rate=fluid.layers.piecewise_decay(boundaries, values), regularization=fluid.regularizer.L2Decay(0.00005), ) optimizer.minimize(loss) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) train_reader = paddle.batch(reader.train(data_args, train_file_list), batch_size=batch_size) test_reader = paddle.batch(reader.test(data_args, val_file_list), batch_size=batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image, gt_box, gt_label, difficult]) def test(pass_id): _, accum_map = map_eval.get_map_var() map_eval.reset(exe) test_map = None for data in test_reader(): test_map = exe.run(test_program, feed=feeder.feed(data), fetch_list=[accum_map]) print("Test {0}, map {1}".format(pass_id, test_map[0])) for pass_id in range(num_passes): start_time = time.time() prev_start_time = start_time end_time = 0 for batch_id, data in enumerate(train_reader()): prev_start_time = start_time start_time = time.time() loss_v = exe.run(fluid.default_main_program(), feed=feeder.feed(data), fetch_list=[loss]) end_time = time.time() if batch_id % 20 == 0: print("Pass {0}, batch {1}, loss {2}, time {3}".format( pass_id, batch_id, loss_v[0], start_time - prev_start_time)) test(pass_id) if pass_id % 10 == 0 or pass_id == num_passes - 1: model_path = os.path.join(model_save_dir, str(pass_id)) print 'save models to %s' % (model_path) fluid.io.save_persistables(exe, model_path)
def train_async(args): # parameters from arguments logging.debug('enter train') model_name = args.model checkpoint = args.checkpoint pretrained_model = args.pretrained_model model_save_dir = args.model_save_dir startup_prog = fluid.Program() train_prog = fluid.Program() tmp_prog = fluid.Program() #测试使用,固定随机参数种子 if args.enable_ce: assert args.model == "ResNet50" assert args.loss_name == "arcmargin" np.random.seed(0) startup_prog.random_seed = 1000 train_prog.random_seed = 1000 tmp_prog.random_seed = 1000 trainclassify = args.loss_name in ["softmax", "arcmargin"] train_py_reader, outputvars = build_program(is_train=True, net_config=net_config_classify, main_prog=train_prog, startup_prog=startup_prog, args=args) if trainclassify: train_cost, train_acc1, train_acc5, global_lr = outputvars train_fetch_list = [ global_lr.name, train_cost.name, train_acc1.name, train_acc5.name ] evaltrain = EvalTrain_Classify() else: train_cost, train_feas, train_label, global_lr = outputvars train_fetch_list = [ global_lr.name, train_cost.name, train_feas.name, train_label.name ] evaltrain = EvalTrain_Metric() _, outputvars = build_program(is_train=False, net_config=net_config_test, main_prog=tmp_prog, startup_prog=startup_prog, args=args) test_feas, image, label = outputvars test_prog = tmp_prog.clone(for_test=True) test_fetch_list = [test_feas.name] #打开内存优化,可以节省显存使用(注意,取出的变量要使用skip_opt_set设置一下,否则有可能被优化覆写) if args.with_mem_opt: fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list)) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) #初始化变量 exe.run(startup_prog) logging.debug('after run startup program') #从断点中恢复 if checkpoint is not None: fluid.io.load_persistables(exe, checkpoint, main_program=train_prog) #加载预训练模型的参数到网络。如果使用预训练模型,最后一层fc需要改一下名字,或者删掉预训练模型的fc对应的权值文件 if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, main_program=train_prog, predicate=if_exist) #得到机器gpu卡数。 # if args.use_gpu: devicenum = get_gpu_num() assert (args.train_batch_size % devicenum) == 0 else: devicenum = get_cpu_num() assert (args.train_batch_size % devicenum) == 0 #注意: 使用py_reader 的输入的batch大小,是单卡的batch大小,所以要除一下 train_batch_size = args.train_batch_size // devicenum test_batch_size = args.test_batch_size logging.debug('device number is %d, batch on each card:%d', devicenum, train_batch_size) #创建新的train_reader 将输入的reader读入的数据组成batch 。另外将train_reader 连接到 pyreader,由pyreader创建的线程主动读取,不在主线程调用。 train_reader = paddle.batch(reader.train(args), batch_size=train_batch_size, drop_last=True) test_reader = paddle.batch(reader.test(args), batch_size=test_batch_size, drop_last=False) test_feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) train_py_reader.decorate_paddle_reader(train_reader) #使用ParallelExecutor 实现多卡训练 train_exe = fluid.ParallelExecutor(main_program=train_prog, use_cuda=args.use_gpu, loss_name=train_cost.name) totalruntime = 0 #启动pyreader的读取线程 train_py_reader.start() iter_no = 0 while iter_no <= args.total_iter_num: t1 = time.time() #注意对于pyreader异步读取,不需要传入feed 参数了 outputlist = train_exe.run(fetch_list=train_fetch_list) t2 = time.time() period = t2 - t1 evaltrain.pushdata(outputlist) #计算多个batch的平均准确率 if iter_no % args.display_iter_step == 0: avgruntime = totalruntime / args.display_iter_step train_accuracy = evaltrain.getaccuracy() print("[%s] trainbatch %d, "\ "accuracy[%s], time %2.2f sec" % \ (fmt_time(), iter_no, train_accuracy, avgruntime)) sys.stdout.flush() totalruntime = 0 if iter_no % 1000 == 0: evaltrain.reset() totalruntime += period if iter_no % args.test_iter_step == 0 and (pretrained_model or checkpoint or iter_no != 0): #保持多个batch的feature 和 label 分别到 f, l evaltest = EvalTest() max_test_count = 100 for batch_id, data in enumerate(test_reader()): t1 = time.time() test_outputlist = exe.run(test_prog, fetch_list=test_fetch_list, feed=test_feeder.feed(data)) label = np.asarray([x[1] for x in data]) evaltest.pushdata((test_outputlist[0], label)) t2 = time.time() period = t2 - t1 if batch_id % 20 == 0: print("[%s] testbatch %d, time %2.2f sec" % \ (fmt_time(), batch_id, period)) if batch_id > max_test_count: break #测试检索的准确率,当query和检索结果类别一致,检索正确。(这里测试数据集类别与训练数据集类别不重叠,因此网络输出的类别没有意义) test_recall = evaltest.getaccuracy() print("[%s] test_img_num %d, trainbatch %d, testaccarcy %s" % \ (fmt_time(), max_test_count * args.test_batch_size, iter_no, test_recall)) sys.stdout.flush() if iter_no % args.save_iter_step == 0 and iter_no != 0: model_path = os.path.join(model_save_dir + '/' + model_name, str(iter_no)) if not os.path.isdir(model_path): os.makedirs(model_path) #保存模型, 可用于训练断点恢复 fluid.io.save_persistables(exe, model_path, main_program=train_prog) iter_no += 1 # This is for continuous evaluation only if args.enable_ce: # Use the mean cost/acc for training print("kpis train_cost %s" % (avg_loss)) print("kpis test_recall %s" % (recall))
def eval(): data_path = DatasetPath('val') test_list = data_path.get_file_list() image_shape = [3, cfg.TEST.max_size, cfg.TEST.max_size] class_nums = cfg.class_num devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" devices_num = len(devices.split(",")) total_batch_size = devices_num * cfg.TRAIN.im_per_batch cocoGt = COCO(test_list) num_id_to_cat_id_map = {i + 1: v for i, v in enumerate(cocoGt.getCatIds())} category_ids = cocoGt.getCatIds() label_list = { item['id']: item['name'] for item in cocoGt.loadCats(category_ids) } label_list[0] = ['background'] model = model_builder.RCNN( add_conv_body_func=resnet.add_ResNet50_conv4_body, add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head, use_pyreader=False, mode='val') model.build_model(image_shape) pred_boxes = model.eval_bbox_out() if cfg.MASK_ON: masks = model.eval_mask_out() place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # yapf: disable if cfg.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist) # yapf: enable test_reader = reader.test(total_batch_size) feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) dts_res = [] segms_res = [] if cfg.MASK_ON: fetch_list = [pred_boxes, masks] else: fetch_list = [pred_boxes] eval_start = time.time() for batch_id, batch_data in enumerate(test_reader()): start = time.time() im_info = [] for data in batch_data: im_info.append(data[1]) results = exe.run(fetch_list=[v.name for v in fetch_list], feed=feeder.feed(batch_data), return_numpy=False) pred_boxes_v = results[0] if cfg.MASK_ON: masks_v = results[1] new_lod = pred_boxes_v.lod() nmsed_out = pred_boxes_v dts_res += get_dt_res(total_batch_size, new_lod[0], nmsed_out, batch_data, num_id_to_cat_id_map) if cfg.MASK_ON and np.array(masks_v).shape != (1, 1): segms_out = segm_results(nmsed_out, masks_v, im_info) segms_res += get_segms_res(total_batch_size, new_lod[0], segms_out, batch_data, num_id_to_cat_id_map) end = time.time() print('batch id: {}, time: {}'.format(batch_id, end - start)) eval_end = time.time() total_time = eval_end - eval_start print('average time of eval is: {}'.format(total_time / (batch_id + 1))) assert len(dts_res) > 0, "The number of valid bbox detected is zero.\n \ Please use reasonable model and check input data." if cfg.MASK_ON: assert len( segms_res) > 0, "The number of valid mask detected is zero.\n \ Please use reasonable model and check input data." with io.open("detection_bbox_result.json", 'w') as outfile: encode_func = unicode if six.PY2 else str outfile.write(encode_func(json.dumps(dts_res))) print("start evaluate bbox using coco api") cocoDt = cocoGt.loadRes("detection_bbox_result.json") cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() if cfg.MASK_ON: with io.open("detection_segms_result.json", 'w') as outfile: encode_func = unicode if six.PY2 else str outfile.write(encode_func(json.dumps(segms_res))) print("start evaluate mask using coco api") cocoDt = cocoGt.loadRes("detection_segms_result.json") cocoEval = COCOeval(cocoGt, cocoDt, 'segm') cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize()
def main(use_cuda): """ Advbox example which demonstrate how to use advbox. """ # base marco TOTAL_NUM = 100 IMG_NAME = 'image' LABEL_NAME = 'label' # parse args args = parser.parse_args() print_arguments(args) # parameters from arguments class_dim = args.class_dim model_name = args.model target_class = args.target pretrained_model = args.pretrained_model image_shape = [int(m) for m in args.image_shape.split(",")] if args.log_debug: logging.getLogger().setLevel(logging.INFO) assert model_name in model_list, "{} is not in lists: {}".format( args.model, model_list) # model definition model = models.__dict__[model_name]() # declare vars image = fluid.layers.data(name=IMG_NAME, shape=image_shape, dtype='float32') logits = model.net(input=image, class_dim=class_dim) # clone program and graph for inference infer_program = fluid.default_main_program().clone(for_test=True) image.stop_gradient = False label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64') cost = fluid.layers.cross_entropy(input=logits, label=label) avg_cost = fluid.layers.mean(x=cost) BATCH_SIZE = 1 test_reader = paddle.batch(reader.test(TEST_LIST, DATA_PATH), batch_size=BATCH_SIZE) # advbox demo m = PaddleModel(fluid.default_main_program(), IMG_NAME, LABEL_NAME, logits.name, avg_cost.name, (0, 1), channel_axis=3) # Adversarial method: FGSM attack = FGSM(m) attack_config = {"epsilons": 0.03} enable_gpu = use_cuda and args.use_gpu place = fluid.CUDAPlace(0) if enable_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # reload model vars if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) # inference pred_label = infer(infer_program, image, logits, place, exe) # if only inference ,and exit if args.inference: exit(0) print("--------------------adversary-------------------") # use test data to generate adversarial examples total_count = 0 fooling_count = 0 for data in test_reader(): total_count += 1 data_img = [data[0][0]] filename = data[0][1] org_data = data_img[0][0] adversary = Adversary(org_data, pred_label[filename]) #target attack if target_class != -1: tlabel = target_class adversary.set_target(is_targeted_attack=True, target_label=tlabel) adversary = attack(adversary, **attack_config) if adversary.is_successful(): fooling_count += 1 print( 'attack success, original_label=%d, adversarial_label=%d, count=%d' % (pred_label[filename], adversary.adversarial_label, total_count)) #output original image, adversarial image and difference image generation_image(total_count, org_data, pred_label[filename], adversary.adversarial_example, adversary.adversarial_label, "FGSM") else: print('attack failed, original_label=%d, count=%d' % (pred_label[filename], total_count)) if total_count >= TOTAL_NUM: print( "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" % (fooling_count, total_count, float(fooling_count) / total_count)) break # inference pred_label2 = infer(infer_program, image, logits, place, exe) print("fgsm attack done")
def train(train_file_list, data_args, init_model_path, save_dir, dev_file_list=None): optimizer = paddle.optimizer.Momentum( momentum=cfg.TRAIN.MOMENTUM, learning_rate=cfg.TRAIN.LEARNING_RATE, regularization=paddle.optimizer.L2Regularization( rate=cfg.TRAIN.L2REGULARIZATION), learning_rate_decay_a=cfg.TRAIN.LEARNING_RATE_DECAY_A, learning_rate_decay_b=cfg.TRAIN.LEARNING_RATE_DECAY_B, learning_rate_schedule=cfg.TRAIN.LEARNING_RATE_SCHEDULE) cost, detect_out = vgg_ssd_net.net_conf("train") parameters = paddle.parameters.create(cost) if init_model_path is not None: assert os.path.isfile(init_model_path), "Invalid model." parameters.init_from_tar(gzip.open(init_model_path)) trainer = paddle.trainer.SGD(cost=cost, parameters=parameters, extra_layers=[detect_out], update_equation=optimizer) feeding = {"image": 0, "bbox": 1} train_reader = paddle.batch( reader.train(data_args, train_file_list), batch_size=cfg.TRAIN.BATCH_SIZE) # generate a batch image each time if dev_file_list is not None: dev_reader = paddle.batch(reader.test(data_args, dev_file_list), batch_size=cfg.TRAIN.BATCH_SIZE) def event_handler(event): if isinstance(event, paddle.event.EndIteration): if (event.batch_id + 1) % 1 == 0: print("Pass %d, Batch %d, TrainCost %f, Detection mAP=%f" % (event.pass_id, event.batch_id + 1, event.cost, event.metrics["detection_evaluator"])) sys.stdout.flush() if isinstance(event, paddle.event.EndPass): if not (event.pass_id + 1) % 20: with gzip.open( os.path.join(save_dir, "params_pass_%05d.tar.gz" % event.pass_id), "w") as f: trainer.save_parameter_to_tar(f) if dev_file_list is not None: result = trainer.test(reader=dev_reader, feeding=feeding) print("Test with Pass %d, TestCost: %f, Detection mAP=%g" % (event.pass_id, result.cost, result.metrics["detection_evaluator"])) trainer.train(reader=train_reader, event_handler=event_handler, num_passes=cfg.TRAIN.NUM_PASS, feeding=feeding)
def train(args, data_args, train_params, train_file_list, val_file_list): model_save_dir = args.model_save_dir pretrained_model = args.pretrained_model use_gpu = args.use_gpu parallel = args.parallel enable_ce = args.enable_ce is_shuffle = True if not use_gpu: devices_num = int( os.environ.get('CPU_NUM', multiprocessing.cpu_count())) else: devices_num = fluid.core.get_cuda_device_count() batch_size = train_params['batch_size'] epoc_num = train_params['epoc_num'] batch_size_per_device = batch_size // devices_num num_workers = 8 startup_prog = fluid.Program() train_prog = fluid.Program() test_prog = fluid.Program() train_py_reader, loss = build_program(main_prog=train_prog, startup_prog=startup_prog, train_params=train_params, is_train=True) test_py_reader, map_var, _, _ = build_program(main_prog=test_prog, startup_prog=startup_prog, train_params=train_params, is_train=False) test_prog = test_prog.clone(for_test=True) for param in train_prog.global_block().all_parameters(): if 'conv' in param.name: print param.name, param.shape place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, main_program=train_prog, predicate=if_exist) test_reader = reader.test(data_args, val_file_list, batch_size) test_py_reader.decorate_paddle_reader(test_reader) train_reader = reader.train(data_args, train_file_list, batch_size_per_device, shuffle=is_shuffle, use_multiprocess=args.use_multiprocess, num_workers=num_workers, enable_ce=enable_ce) train_py_reader.decorate_paddle_reader(train_reader) train_fetch_list = [("loss", loss.name)] val_fetch_list = [("map", map_var.name)] compressor = Compressor(place, fluid.global_scope(), train_prog, train_reader=train_py_reader, train_feed_list=None, train_fetch_list=train_fetch_list, eval_program=test_prog, eval_reader=test_py_reader, eval_feed_list=None, eval_fetch_list=val_fetch_list, train_optimizer=None) compressor.config('./compress.yaml') compressor.run()
def eval(args, data_args, test_list, batch_size, model_dir=None): image_shape = [3, data_args.resize_h, data_args.resize_w] num_classes = 2 image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') gt_box = fluid.layers.data(name='gt_box', shape=[4], dtype='float32', lod_level=1) gt_label = fluid.layers.data(name='gt_label', shape=[1], dtype='int32', lod_level=1) difficult = fluid.layers.data(name='gt_difficult', shape=[1], dtype='int32', lod_level=1) locs, confs, box, box_var = mobile_net(num_classes, image, image_shape) nmsed_out = fluid.layers.detection_output(locs, confs, box, box_var, nms_threshold=args.nms_threshold) loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box, box_var) loss = fluid.layers.reduce_sum(loss) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) # yapf: disable if model_dir: def if_exist(var): return os.path.exists(os.path.join(model_dir, var.name)) fluid.io.load_vars(exe, model_dir, predicate=if_exist) # yapf: enable test_reader = paddle.batch(reader.test(data_args, test_list), batch_size=batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image, gt_box, gt_label, difficult]) def test(): # switch network to test mode (i.e. batch norm test mode) test_program = fluid.default_main_program().clone(for_test=True) with fluid.program_guard(test_program): map_eval = fluid.evaluator.DetectionMAP(nmsed_out, gt_label, gt_box, difficult, num_classes, overlap_threshold=0.5, evaluate_difficult=False, ap_version=args.ap_version) _, accum_map = map_eval.get_map_var() map_eval.reset(exe) for batch_id, data in enumerate(test_reader()): test_map, = exe.run(test_program, feed=feeder.feed(data), fetch_list=[accum_map]) if batch_id % 20 == 0: print("Batch {0}, map {1}".format(batch_id, test_map)) print("Test model {0}, map {1}".format(model_dir, test_map)) test()
def eval(args): # parameters from arguments model_name = args.model pretrained_model = args.pretrained_model image_shape = [int(m) for m in args.image_shape.split(",")] assert model_name in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.data(name='image', shape=[None] + image_shape, dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') test_loader = fluid.io.DataLoader.from_generator(feed_list=[image, label], capacity=64, use_double_buffer=True, iterable=True) # model definition model = models.__dict__[model_name]() out = model.net(input=image, embedding_size=args.embedding_size) test_program = fluid.default_main_program().clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.load(program=test_program, model_path=pretrained_model, executor=exe) test_loader.set_sample_generator(reader.test(args), batch_size=args.batch_size, drop_last=False, places=place) fetch_list = [out.name] f, l = [], [] for batch_id, data in enumerate(test_loader()): t1 = time.time() [feas] = exe.run(test_program, fetch_list=fetch_list, feed=data) label = np.asarray(data[0]['label']) label = np.squeeze(label) f.append(feas) l.append(label) t2 = time.time() period = t2 - t1 if batch_id % 20 == 0: print("[%s] testbatch %d, time %2.2f sec" % \ (fmt_time(), batch_id, period)) f = np.vstack(f) l = np.hstack(l) recall = recall_topk(f, l, k=1) print("[%s] End test %d, test_recall %.5f" % (fmt_time(), len(f), recall)) sys.stdout.flush()
def train(args, data_args, train_params, train_file_list, val_file_list): model_save_dir = args.model_save_dir pretrained_model = args.pretrained_model use_gpu = args.use_gpu parallel = args.parallel enable_ce = args.enable_ce is_shuffle = True if not use_gpu: devices_num = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) else: devices_num = fluid.core.get_cuda_device_count() batch_size = train_params['batch_size'] epoc_num = train_params['epoc_num'] batch_size_per_device = batch_size // devices_num num_workers = 8 startup_prog = fluid.Program() train_prog = fluid.Program() test_prog = fluid.Program() if enable_ce: import random random.seed(0) np.random.seed(0) is_shuffle = False startup_prog.random_seed = 111 train_prog.random_seed = 111 test_prog.random_seed = 111 train_py_reader, loss = build_program( main_prog=train_prog, startup_prog=startup_prog, train_params=train_params, is_train=True) test_py_reader, map_eval, _, _ = build_program( main_prog=test_prog, startup_prog=startup_prog, train_params=train_params, is_train=False) test_prog = test_prog.clone(for_test=True) place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, main_program=train_prog, predicate=if_exist) if parallel: loss.persistable = True build_strategy = fluid.BuildStrategy() build_strategy.enable_inplace = True build_strategy.memory_optimize = True train_exe = fluid.ParallelExecutor(main_program=train_prog, use_cuda=use_gpu, loss_name=loss.name, build_strategy=build_strategy) train_reader = reader.train(data_args, train_file_list, batch_size_per_device, shuffle=is_shuffle, num_workers=num_workers, enable_ce=enable_ce) test_reader = reader.test(data_args, val_file_list, batch_size) train_py_reader.decorate_paddle_reader(train_reader) test_py_reader.decorate_paddle_reader(test_reader) def save_model(postfix, main_prog): model_path = os.path.join(model_save_dir, postfix) if os.path.isdir(model_path): shutil.rmtree(model_path) print('save models to %s' % (model_path)) fluid.io.save_persistables(exe, model_path, main_program=main_prog) best_map = 0. def test(epoc_id, best_map): _, accum_map = map_eval.get_map_var() map_eval.reset(exe) every_epoc_map=[] # for CE test_py_reader.start() try: batch_id = 0 while True: test_map, = exe.run(test_prog, fetch_list=[accum_map]) if batch_id % 10 == 0: every_epoc_map.append(test_map) print("Batch {0}, map {1}".format(batch_id, test_map)) batch_id += 1 except fluid.core.EOFException: test_py_reader.reset() mean_map = np.mean(every_epoc_map) print("Epoc {0}, test map {1}".format(epoc_id, test_map[0])) if test_map[0] > best_map: best_map = test_map[0] save_model('best_model', test_prog) return best_map, mean_map total_time = 0.0 for epoc_id in range(epoc_num): train_reader = reader.train(data_args, train_file_list, batch_size_per_device, shuffle=is_shuffle, num_workers=num_workers, enable_ce=enable_ce) train_py_reader.decorate_paddle_reader(train_reader) epoch_idx = epoc_id + 1 start_time = time.time() prev_start_time = start_time every_epoc_loss = [] batch_id = 0 train_py_reader.start() while True: try: prev_start_time = start_time start_time = time.time() if parallel: loss_v, = train_exe.run(fetch_list=[loss.name]) else: loss_v, = exe.run(train_prog, fetch_list=[loss]) loss_v = np.mean(np.array(loss_v)) every_epoc_loss.append(loss_v) if batch_id % 10 == 0: print("Epoc {:d}, batch {:d}, loss {:.6f}, time {:.5f}".format( epoc_id, batch_id, loss_v, start_time - prev_start_time)) batch_id += 1 except (fluid.core.EOFException, StopIteration): train_reader().close() train_py_reader.reset() break end_time = time.time() total_time += end_time - start_time if epoc_id % 10 == 0 or epoc_id == epoc_num - 1: best_map, mean_map = test(epoc_id, best_map) print("Best test map {0}".format(best_map)) # save model save_model(str(epoc_id), train_prog) if enable_ce: train_avg_loss = np.mean(every_epoc_loss) if devices_num == 1: print("kpis train_cost %s" % train_avg_loss) print("kpis test_acc %s" % mean_map) print("kpis train_speed %s" % (total_time / epoch_idx)) else: print("kpis train_cost_card%s %s" % (devices_num, train_avg_loss)) print("kpis test_acc_card%s %s" % (devices_num, mean_map)) print("kpis train_speed_card%s %f" % (devices_num, total_time / epoch_idx))
def eval(args, data_args, test_list, batch_size, model_dir=None): image_shape = [3, data_args.resize_h, data_args.resize_w] num_classes = 91 image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') gt_box = fluid.layers.data(name='gt_box', shape=[4], dtype='float32', lod_level=1) gt_label = fluid.layers.data(name='gt_label', shape=[1], dtype='int32', lod_level=1) gt_iscrowd = fluid.layers.data(name='gt_iscrowd', shape=[1], dtype='int32', lod_level=1) gt_image_info = fluid.layers.data(name='gt_image_id', shape=[3], dtype='int32') locs, confs, box, box_var = build_mobilenet_ssd(image, num_classes, image_shape) nmsed_out = fluid.layers.detection_output(locs, confs, box, box_var, nms_threshold=args.nms_threshold) loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box, box_var) loss = fluid.layers.reduce_sum(loss) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # yapf: disable if model_dir: def if_exist(var): return os.path.exists(os.path.join(model_dir, var.name)) fluid.io.load_vars(exe, model_dir, predicate=if_exist) # yapf: enable test_reader = reader.test(data_args, test_list, batch_size) feeder = fluid.DataFeeder( place=place, feed_list=[image, gt_box, gt_label, gt_iscrowd, gt_image_info]) def get_dt_res(nmsed_out_v, data): dts_res = [] lod = nmsed_out_v[0].lod()[0] nmsed_out_v = np.array(nmsed_out_v[0]) real_batch_size = min(batch_size, len(data)) assert (len(lod) == real_batch_size + 1), \ "Error Lod Tensor offset dimension. Lod({}) vs. batch_size({})".format(len(lod), batch_size) k = 0 for i in range(real_batch_size): dt_num_this_img = lod[i + 1] - lod[i] image_id = int(data[i][4][0]) image_width = int(data[i][4][1]) image_height = int(data[i][4][2]) for j in range(dt_num_this_img): dt = nmsed_out_v[k] k = k + 1 category_id, score, xmin, ymin, xmax, ymax = dt.tolist() xmin = max(min(xmin, 1.0), 0.0) * image_width ymin = max(min(ymin, 1.0), 0.0) * image_height xmax = max(min(xmax, 1.0), 0.0) * image_width ymax = max(min(ymax, 1.0), 0.0) * image_height w = xmax - xmin h = ymax - ymin bbox = [xmin, ymin, w, h] dt_res = { 'image_id': image_id, 'category_id': category_id, 'bbox': bbox, 'score': score } dts_res.append(dt_res) return dts_res def test(): dts_res = [] for batch_id, data in enumerate(test_reader()): nmsed_out_v = exe.run(fluid.default_main_program(), feed=feeder.feed(data), fetch_list=[nmsed_out], return_numpy=False) if batch_id % 20 == 0: print("Batch {0}".format(batch_id)) dts_res += get_dt_res(nmsed_out_v, data) with io.open("detection_result.json", 'w') as outfile: encode_func = unicode if six.PY2 else str outfile.write(encode_func(json.dumps(dts_res))) print("start evaluate using coco api") cocoGt = COCO(os.path.join(data_args.data_dir, test_list)) cocoDt = cocoGt.loadRes("detection_result.json") cocoEval = COCOeval(cocoGt, cocoDt, "bbox") cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() test()
def parallel_exe(args, train_file_list, val_file_list, data_args, learning_rate, batch_size, num_passes, model_save_dir='model', pretrained_model=None): image_shape = [3, data_args.resize_h, data_args.resize_w] if data_args.dataset == 'coco': num_classes = 81 elif data_args.dataset == 'pascalvoc': num_classes = 21 devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" devices_num = len(devices.split(",")) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') gt_box = fluid.layers.data(name='gt_box', shape=[4], dtype='float32', lod_level=1) gt_label = fluid.layers.data(name='gt_label', shape=[1], dtype='int32', lod_level=1) difficult = fluid.layers.data(name='gt_difficult', shape=[1], dtype='int32', lod_level=1) locs, confs, box, box_var = mobile_net(num_classes, image, image_shape) nmsed_out = fluid.layers.detection_output(locs, confs, box, box_var, nms_threshold=0.45) loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box, box_var) loss = fluid.layers.reduce_sum(loss) test_program = fluid.default_main_program().clone(for_test=True) with fluid.program_guard(test_program): map_eval = fluid.evaluator.DetectionMAP(nmsed_out, gt_label, gt_box, difficult, num_classes, overlap_threshold=0.5, evaluate_difficult=False, ap_version=args.ap_version) if data_args.dataset == 'coco': # learning rate decay in 12, 19 pass, respectively if '2014' in train_file_list: epocs = 82783 / batch_size boundaries = [epocs * 12, epocs * 19] elif '2017' in train_file_list: epocs = 118287 / batch_size boundaries = [epcos * 12, epocs * 19] elif data_args.dataset == 'pascalvoc': epocs = 19200 / batch_size boundaries = [epocs * 40, epocs * 60, epocs * 80, epocs * 100] values = [ learning_rate, learning_rate * 0.5, learning_rate * 0.25, learning_rate * 0.1, learning_rate * 0.01 ] optimizer = fluid.optimizer.RMSProp( learning_rate=fluid.layers.piecewise_decay(boundaries, values), regularization=fluid.regularizer.L2Decay(0.00005), ) optimizer.minimize(loss) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) fluid.default_startup_program.random_seed = 1000 exe.run(fluid.default_startup_program()) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) if args.parallel: train_exe = fluid.ParallelExecutor(use_cuda=args.use_gpu, loss_name=loss.name) train_reader = paddle.batch(reader.train(data_args, train_file_list), batch_size=batch_size) test_reader = paddle.batch(reader.test(data_args, val_file_list), batch_size=batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image, gt_box, gt_label, difficult]) def save_model(postfix): model_path = os.path.join(model_save_dir, postfix) if os.path.isdir(model_path): shutil.rmtree(model_path) print 'save models to %s' % (model_path) fluid.io.save_persistables(exe, model_path) best_map = 0. def test(pass_id, best_map): _, accum_map = map_eval.get_map_var() map_eval.reset(exe) test_map = None for data in test_reader(): test_map = exe.run(test_program, feed=feeder.feed(data), fetch_list=[accum_map]) if test_map[0] > best_map: best_map = test_map[0] save_model('best_model') print("Test {0}, map {1}".format(pass_id, test_map[0])) train_num = 0 total_train_time = 0.0 total_iters = 0 for pass_id in range(num_passes): every_pass_loss = [] iter = 0 pass_duration = 0.0 for batch_id, data in enumerate(train_reader()): batch_start = time.time() if iter == args.iterations: break if len(data) < devices_num: continue if args.parallel: loss_v, = train_exe.run(fetch_list=[loss.name], feed=feeder.feed(data)) else: loss_v, = exe.run(fluid.default_main_program(), feed=feeder.feed(data), fetch_list=[loss]) loss_v = np.mean(np.array(loss_v)) if batch_id % 20 == 0: print("Pass {0}, batch {1}, loss {2}, time {3}".format( pass_id, batch_id, loss_v, time.time() - batch_start)) if iter >= args.skip_batch_num or pass_id != 0: batch_duration = time.time() - batch_start pass_duration += batch_duration train_num += len(data) every_pass_loss.append(loss_v) iter += 1 total_iters += 1 #test(pass_id, best_map) total_train_time += pass_duration print("Pass:%d, Loss:%f, Handle Images Duration: %f\n" % (pass_id, np.mean(every_pass_loss), pass_duration)) if pass_id == num_passes - 1: examples_per_sec = train_num / total_train_time train_cost_kpi.add_record(np.mean(every_pass_loss)) train_speed_kpi.add_record( np.array(examples_per_sec, dtype='float')) four_card_speed_kpi.add_record( np.array(examples_per_sec, dtype='float')) if args.gpu_card_num == 1: train_cost_kpi.persist() train_speed_kpi.persist() else: four_card_speed_kpi.persist() print("Best test map {0}".format(best_map))