def infer(): # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) if not os.path.exists('output'): os.mkdir('output') model = YOLOv3(is_train=False) model.build_model() outputs = model.get_pred() input_size = cfg.input_size place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) # yapf: disable if cfg.weights: def if_exist(var): return os.path.exists(os.path.join(cfg.weights, var.name)) fluid.io.load_vars(exe, cfg.weights, predicate=if_exist) # yapf: enable # you can save inference model by following code # fluid.io.save_inference_model("./output/yolov3", # feeded_var_names=['image', 'im_shape'], # target_vars=outputs, # executor=exe) feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) fetch_list = [outputs] image_names = [] if cfg.image_name is not None: image_names.append(cfg.image_name) else: for image_name in os.listdir(cfg.image_path): if image_name.split('.')[-1] in ['jpg', 'png']: image_names.append(image_name) for image_name in image_names: infer_reader = reader.infer(input_size, os.path.join(cfg.image_path, image_name)) label_names, _ = reader.get_label_infos() data = next(infer_reader()) im_shape = data[0][2] outputs = exe.run(fetch_list=[v.name for v in fetch_list], feed=feeder.feed(data), return_numpy=False, use_program_cache=True) bboxes = np.array(outputs[0]) if bboxes.shape[1] != 6: print("No object found in {}".format(image_name)) continue labels = bboxes[:, 0].astype('int32') scores = bboxes[:, 1].astype('float32') boxes = bboxes[:, 2:].astype('float32') path = os.path.join(cfg.image_path, image_name) box_utils.draw_boxes_on_image(path, boxes, scores, labels, label_names, cfg.draw_thresh)
def infer(): # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) if not os.path.exists('output'): os.mkdir('output') place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() with fluid.dygraph.guard(place): model = YOLOv3(3, is_train=False) input_size = cfg.input_size # yapf: disable if cfg.weights: restore, _ = fluid.load_dygraph(cfg.weights) model.set_dict(restore) # yapf: enable # you can save inference model by following code # fluid.io.save_inference_model("./output/yolov3", # feeded_var_names=['image', 'im_shape'], # target_vars=outputs, # executor=exe) image_names = [] if cfg.image_name is not None: image_names.append(cfg.image_name) else: for image_name in os.listdir(cfg.image_path): if image_name.split('.')[-1] in ['jpg', 'png']: image_names.append(image_name) for image_name in image_names: infer_reader = reader.infer( input_size, os.path.join(cfg.image_path, image_name)) label_names, _ = reader.get_label_infos() data = next(infer_reader()) img_data = np.array([x[0] for x in data]).astype('float32') img = to_variable(img_data) im_shape_data = np.array([x[2] for x in data]).astype('int32') im_shape = to_variable(im_shape_data) outputs = model(img, None, None, None, None, im_shape) bboxes = outputs.numpy() if bboxes.shape[1] != 6: print("No object found in {}".format(image_name)) continue labels = bboxes[:, 0].astype('int32') scores = bboxes[:, 1].astype('float32') boxes = bboxes[:, 2:].astype('float32') path = os.path.join(cfg.image_path, image_name) box_utils.draw_boxes_on_image(path, boxes, scores, labels, label_names, cfg.draw_thresh)
def main(): args = parser.parse_args() print_arguments(args) check_gpu(args.use_gpu) if args.profile: if args.use_gpu: with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: train(args) else: with profiler.profiler("CPU", sorted_key='total') as cpuprof: train(args) else: train(args)
outs = train_exe.run(fetch_list=[v.name for v in fetch_list], feed=feeder.feed(data)) stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])} train_stats.update(stats) logs = train_stats.log() stats = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format( now_time(), iter_id, np.mean(outs[-1]), logs, start_time - prev_start_time) print(stats) sys.stdout.flush() if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0: save_model("model_iter{}".format(iter_id)) if (iter_id + 1) == cfg.max_iter: break end_time = time.time() total_time = end_time - start_time last_loss = np.array(outs[0]).mean() if cfg.use_pyreader: train_loop_pyreader() else: train_loop() save_model('model_final') if __name__ == '__main__': args = parse_args() print_arguments(args) check_gpu(args.use_gpu) train()
force_cpu=False).astype('float32') logit = fluid.layers.transpose(logit, [0, 2, 3, 1]) logit = fluid.layers.reshape(logit, [-1, num_classes]) label = fluid.layers.reshape(label, [-1, 1]) label = fluid.layers.cast(label, 'int64') label_nignore = fluid.layers.reshape(label_nignore, [-1, 1]) logit = fluid.layers.softmax(logit, use_cudnn=False) loss = fluid.layers.cross_entropy(logit, label, ignore_index=255) label_nignore.stop_gradient = True label.stop_gradient = True return loss, label_nignore args = parser.parse_args() utility.print_arguments(args) utility.check_gpu(args.use_gpu) models.clean() models.bn_momentum = 0.9997 models.dropout_keep_prop = 0.9 models.label_number = args.num_classes models.default_norm_type = args.norm_type deeplabv3p = models.deeplabv3p sp = fluid.Program() tp = fluid.Program() # only for ce if args.enable_ce: SEED = 102 sp.random_seed = SEED
def main(): args = parser.parse_args() print_arguments(args) check_gpu(args.use_gpu) evaluate(args)
def eval(): # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) if '2014' in cfg.dataset: test_list = 'annotations/instances_val2014.json' elif '2017' in cfg.dataset: test_list = 'annotations/instances_val2017.json' if cfg.debug: if not os.path.exists('output'): os.mkdir('output') place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() with fluid.dygraph.guard(place): model = YOLOv3(3,is_train=False) # yapf: disable if cfg.weights: restore, _ = fluid.load_dygraph(cfg.weights) model.set_dict(restore) model.eval() input_size = cfg.input_size # batch_size for test must be 1 test_reader = reader.test(input_size, 1) label_names, label_ids = reader.get_label_infos() if cfg.debug: print("Load in labels {} with ids {}".format(label_names, label_ids)) def get_pred_result(boxes, scores, labels, im_id): result = [] for box, score, label in zip(boxes, scores, labels): x1, y1, x2, y2 = box w = x2 - x1 + 1 h = y2 - y1 + 1 bbox = [x1, y1, w, h] res = { 'image_id': int(im_id), 'category_id': label_ids[int(label)], 'bbox': list(map(float, bbox)), 'score': float(score) } result.append(res) return result dts_res = [] total_time = 0 for iter_id, data in enumerate(test_reader()): start_time = time.time() img_data = np.array([x[0] for x in data]).astype('float32') img = to_variable(img_data) im_id_data = np.array([x[1] for x in data]).astype('int32') im_id = to_variable(im_id_data) im_shape_data = np.array([x[2] for x in data]).astype('int32') im_shape = to_variable(im_shape_data) batch_outputs = model(img, None, None, None, im_id, im_shape) nmsed_boxes = batch_outputs.numpy() if nmsed_boxes.shape[1] != 6: continue im_id = data[0][1] nmsed_box=nmsed_boxes labels = nmsed_box[:, 0] scores = nmsed_box[:, 1] boxes = nmsed_box[:, 2:6] dts_res += get_pred_result(boxes, scores, labels, im_id) end_time = time.time() print("batch id: {}, time: {}".format(iter_id, end_time - start_time)) total_time += end_time - start_time with open("yolov3_result.json", 'w') as outfile: json.dump(dts_res, outfile) print("start evaluate detection result with coco api") coco = COCO(os.path.join(cfg.data_dir, test_list)) cocoDt = coco.loadRes("yolov3_result.json") cocoEval = COCOeval(coco, cocoDt, 'bbox') cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() print("evaluate done.") print("Time per batch: {}".format(total_time / iter_id))
def train(): # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) if cfg.debug or args.enable_ce: fluid.default_startup_program().random_seed = 1000 fluid.default_main_program().random_seed = 1000 random.seed(0) np.random.seed(0) if not os.path.exists(cfg.model_save_dir): os.makedirs(cfg.model_save_dir) model = YOLOv3() model.build_model() input_size = cfg.input_size loss = model.loss() loss.persistable = True devices_num = get_device_num() if cfg.use_gpu else 1 print("Found {} CUDA/CPU devices.".format(devices_num)) learning_rate = cfg.learning_rate boundaries = cfg.lr_steps gamma = cfg.lr_gamma step_num = len(cfg.lr_steps) values = [learning_rate * (gamma**i) for i in range(step_num + 1)] optimizer = fluid.optimizer.Momentum( learning_rate=exponential_with_warmup_decay( learning_rate=learning_rate, boundaries=boundaries, values=values, warmup_iter=cfg.warm_up_iter, warmup_factor=cfg.warm_up_factor), regularization=fluid.regularizer.L2Decay(cfg.weight_decay), momentum=cfg.momentum) optimizer.minimize(loss) gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if cfg.pretrain: if not os.path.exists(cfg.pretrain): print("Pretrain weights not found: {}".format(cfg.pretrain)) def if_exist(var): return os.path.exists(os.path.join(cfg.pretrain, var.name)) fluid.io.load_vars(exe, cfg.pretrain, predicate=if_exist) build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False #gc and memory optimize may conflict syncbn = cfg.syncbn if (syncbn and devices_num <= 1) or num_trainers > 1: print("Disable syncbn in single device") syncbn = False build_strategy.sync_batch_norm = syncbn exec_strategy = fluid.ExecutionStrategy() if cfg.use_gpu and num_trainers > 1: dist_utils.prepare_for_multi_process(exe, build_strategy, fluid.default_main_program()) exec_strategy.num_threads = 1 compile_program = fluid.compiler.CompiledProgram( fluid.default_main_program()).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) random_sizes = [cfg.input_size] if cfg.random_shape: random_sizes = [32 * i for i in range(10, 20)] total_iter = cfg.max_iter - cfg.start_iter mixup_iter = total_iter - cfg.no_mixup_iter shuffle = True if args.enable_ce: shuffle = False shuffle_seed = None # NOTE: yolov3 is a special model, if num_trainers > 1, each process # trian the completed dataset. # if num_trainers > 1: shuffle_seed = 1 train_reader = reader.train( input_size, batch_size=cfg.batch_size, shuffle=shuffle, shuffle_seed=shuffle_seed, total_iter=total_iter * devices_num, mixup_iter=mixup_iter * devices_num, random_sizes=random_sizes, use_multiprocess_reader=cfg.use_multiprocess_reader, num_workers=cfg.worker_num) py_reader = model.py_reader py_reader.decorate_paddle_reader(train_reader) def save_model(postfix): model_path = os.path.join(cfg.model_save_dir, postfix) if os.path.isdir(model_path): shutil.rmtree(model_path) fluid.io.save_persistables(exe, model_path) fetch_list = [loss] py_reader.start() smoothed_loss = SmoothedValue() try: start_time = time.time() prev_start_time = start_time snapshot_loss = 0 snapshot_time = 0 for iter_id in range(cfg.start_iter, cfg.max_iter): prev_start_time = start_time start_time = time.time() losses = exe.run(compile_program, fetch_list=[v.name for v in fetch_list]) smoothed_loss.add_value(np.mean(np.array(losses[0]))) snapshot_loss += np.mean(np.array(losses[0])) snapshot_time += start_time - prev_start_time lr = np.array( fluid.global_scope().find_var('learning_rate').get_tensor()) print("Iter {:d}, lr {:.6f}, loss {:.6f}, time {:.5f}".format( iter_id, lr[0], smoothed_loss.get_mean_value(), start_time - prev_start_time)) sys.stdout.flush() #add profiler tools if args.is_profiler and iter_id == 5: profiler.start_profiler("All") elif args.is_profiler and iter_id == 10: profiler.stop_profiler("total", args.profiler_path) return if (iter_id + 1) % cfg.snapshot_iter == 0: save_model("model_iter{}".format(iter_id)) print("Snapshot {} saved, average loss: {}, \ average time: {}".format( iter_id + 1, snapshot_loss / float(cfg.snapshot_iter), snapshot_time / float(cfg.snapshot_iter))) if args.enable_ce and iter_id == cfg.max_iter - 1: if devices_num == 1: print("kpis\ttrain_cost_1card\t%f" % (snapshot_loss / float(cfg.snapshot_iter))) print("kpis\ttrain_duration_1card\t%f" % (snapshot_time / float(cfg.snapshot_iter))) else: print("kpis\ttrain_cost_8card\t%f" % (snapshot_loss / float(cfg.snapshot_iter))) print("kpis\ttrain_duration_8card\t%f" % (snapshot_time / float(cfg.snapshot_iter))) snapshot_loss = 0 snapshot_time = 0 except fluid.core.EOFException: py_reader.reset() save_model('model_final')
def train(): # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) devices_num = get_device_num() if cfg.use_gpu else 1 print("Found {} CUDA/CPU devices.".format(devices_num)) if cfg.debug or args.enable_ce: fluid.default_startup_program().random_seed = 1000 fluid.default_main_program().random_seed = 1000 random.seed(0) np.random.seed(0) if not os.path.exists(cfg.model_save_dir): os.makedirs(cfg.model_save_dir) gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) if cfg.use_data_parallel else fluid.CUDAPlace(0) with fluid.dygraph.guard(place): if args.use_data_parallel: strategy = fluid.dygraph.parallel.prepare_context() model = YOLOv3(3, is_train=True) if cfg.pretrain: restore, _ = fluid.load_dygraph(cfg.pretrain) model.block.set_dict(restore) if cfg.finetune: restore, _ = fluid.load_dygraph(cfg.finetune) model.set_dict(restore, use_structured_name=True) if args.use_data_parallel: model = fluid.dygraph.parallel.DataParallel(model, strategy) boundaries = cfg.lr_steps gamma = cfg.lr_gamma step_num = len(cfg.lr_steps) learning_rate = cfg.learning_rate values = [learning_rate * (gamma ** i) for i in range(step_num + 1)] lr = fluid.dygraph.PiecewiseDecay( boundaries=boundaries, values=values, begin=args.start_iter) lr = fluid.layers.linear_lr_warmup( learning_rate=lr, warmup_steps=cfg.warm_up_iter, start_lr=0.0, end_lr=cfg.learning_rate, ) optimizer = fluid.optimizer.Momentum( learning_rate=lr, regularization=fluid.regularizer.L2Decay(cfg.weight_decay), momentum=cfg.momentum, parameter_list=model.parameters() ) start_time = time.time() snapshot_loss = 0 snapshot_time = 0 total_sample = 0 input_size = cfg.input_size shuffle = True shuffle_seed = None total_iter = cfg.max_iter - cfg.start_iter mixup_iter = total_iter - cfg.no_mixup_iter random_sizes = [cfg.input_size] if cfg.random_shape: random_sizes = [32 * i for i in range(10,20)] train_reader = reader.train( input_size, batch_size=cfg.batch_size, shuffle=shuffle, shuffle_seed=shuffle_seed, total_iter=total_iter * devices_num, mixup_iter=mixup_iter * devices_num, random_sizes=random_sizes, use_multiprocess_reader=cfg.use_multiprocess_reader, num_workers=cfg.worker_num) if args.use_data_parallel: train_reader = fluid.contrib.reader.distributed_batch_reader(train_reader) smoothed_loss = SmoothedValue() for iter_id, data in enumerate(train_reader()): prev_start_time = start_time start_time = time.time() img = np.array([x[0] for x in data]).astype('float32') img = to_variable(img) gt_box = np.array([x[1] for x in data]).astype('float32') gt_box = to_variable(gt_box) gt_label = np.array([x[2] for x in data]).astype('int32') gt_label = to_variable(gt_label) gt_score = np.array([x[3] for x in data]).astype('float32') gt_score = to_variable(gt_score) loss = model(img, gt_box, gt_label, gt_score, None, None) smoothed_loss.add_value(np.mean(loss.numpy())) snapshot_loss += loss.numpy() snapshot_time += start_time - prev_start_time total_sample += 1 print("Iter {:d}, loss {:.6f}, time {:.5f}".format( iter_id, smoothed_loss.get_mean_value(), start_time-prev_start_time)) if args.use_data_parallel: loss = model.scale_loss(loss) loss.backward() model.apply_collective_grads() loss.backward() optimizer.minimize(loss) model.clear_gradients() save_parameters = (not args.use_data_parallel) or ( args.use_data_parallel and fluid.dygraph.parallel.Env().local_rank == 0) if save_parameters and iter_id > 1 and iter_id % cfg.snapshot_iter == 0: fluid.save_dygraph(model.state_dict(), args.model_save_dir + "/yolov3_{}".format(iter_id))
def eval(): # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) if '2014' in cfg.dataset: test_list = 'annotations/instances_val2014.json' elif '2017' in cfg.dataset: test_list = 'annotations/instances_val2017.json' if cfg.debug: if not os.path.exists('output'): os.mkdir('output') model = YOLOv3(is_train=False) model.build_model() outputs = model.get_pred() place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) # yapf: disable if cfg.weights: def if_exist(var): return os.path.exists(os.path.join(cfg.weights, var.name)) fluid.io.load_vars(exe, cfg.weights, predicate=if_exist) # yapf: enable # you can save inference model by following code # fluid.io.save_inference_model("./output/yolov3", # feeded_var_names=['image', 'im_shape'], # target_vars=outputs, # executor=exe) input_size = cfg.input_size test_reader = reader.test(input_size, 1) label_names, label_ids = reader.get_label_infos() if cfg.debug: print("Load in labels {} with ids {}".format(label_names, label_ids)) feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) def get_pred_result(boxes, scores, labels, im_id): result = [] for box, score, label in zip(boxes, scores, labels): x1, y1, x2, y2 = box w = x2 - x1 + 1 h = y2 - y1 + 1 bbox = [x1, y1, w, h] res = { 'image_id': im_id, 'category_id': label_ids[int(label)], 'bbox': list(map(float, bbox)), 'score': float(score) } result.append(res) return result dts_res = [] fetch_list = [outputs] total_time = 0 for batch_id, batch_data in enumerate(test_reader()): start_time = time.time() batch_outputs = exe.run(fetch_list=[v.name for v in fetch_list], feed=feeder.feed(batch_data), return_numpy=False, use_program_cache=True) lod = batch_outputs[0].lod()[0] nmsed_boxes = np.array(batch_outputs[0]) if nmsed_boxes.shape[1] != 6: continue for i in range(len(lod) - 1): im_id = batch_data[i][1] start = lod[i] end = lod[i + 1] if start == end: continue nmsed_box = nmsed_boxes[start:end, :] labels = nmsed_box[:, 0] scores = nmsed_box[:, 1] boxes = nmsed_box[:, 2:6] dts_res += get_pred_result(boxes, scores, labels, im_id) end_time = time.time() print("batch id: {}, time: {}".format(batch_id, end_time - start_time)) total_time += end_time - start_time with open("yolov3_result.json", 'w') as outfile: json.dump(dts_res, outfile) print("start evaluate detection result with coco api") coco = COCO(os.path.join(cfg.data_dir, test_list)) cocoDt = coco.loadRes("yolov3_result.json") cocoEval = COCOeval(coco, cocoDt, 'bbox') cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() print("evaluate done.") print("Time per batch: {}".format(total_time / batch_id))