def yolov3_predict(instance, strategy): network = YOLOV3DarkNet53(is_training=False) pretrained_ckpt = '/dataset/ckpt-files/shanshui_full/yolov3.ckpt' if not os.path.exists(pretrained_ckpt): err_msg = "The yolov3.ckpt file does not exist!" return {"status": 1, "err_msg": err_msg} param_dict = load_checkpoint(pretrained_ckpt) param_dict_new = {} for key, values in param_dict.items(): if key.startswith('moments.'): continue elif key.startswith('yolo_network.'): param_dict_new[key[13:]] = values else: param_dict_new[key] = values load_param_into_net(network, param_dict_new) config = ConfigYOLOV3DarkNet53() # init detection engine args = edict() args.ignore_threshold = 0.01 args.nms_thresh = 0.5 detection = DetectionEngine(args) input_shape = Tensor(tuple(config.test_img_shape), ms.float32) print('Start inference....') network.set_train(False) ori_image = np.array(json.loads(instance['data']), dtype=instance['dtype']) image, image_shape = data_preprocess(ori_image, config) prediction = network(Tensor(image.reshape(1, 3, 416, 416), ms.float32), input_shape) output_big, output_me, output_small = prediction output_big = output_big.asnumpy() output_me = output_me.asnumpy() output_small = output_small.asnumpy() per_batch_size = 1 detection.detect([output_small, output_me, output_big], per_batch_size, image_shape, config) detection.do_nms_for_results() out_img = detection.draw_boxes_in_image(ori_image) for i in range(len(detection.det_boxes)): print("x: ", detection.det_boxes[i]['bbox'][0]) print("y: ", detection.det_boxes[i]['bbox'][1]) print("h: ", detection.det_boxes[i]['bbox'][2]) print("w: ", detection.det_boxes[i]['bbox'][3]) print("score: ", round(detection.det_boxes[i]['score'], 3)) print("category: ", detection.det_boxes[i]['category_id']) return { "status": 0, "instance": { "shape": list(out_img.shape), "dtype": out_img.dtype.name, "data": json.dumps(out_img.tolist()) } }
def create_network(name, *args, **kwargs): if name == "yolov3_darknet53_quant": yolov3_darknet53_quant = YOLOV3DarkNet53(is_training=False) config = ConfigYOLOV3DarkNet53() # convert fusion network to quantization aware network if config.quantization_aware: quantizer = QuantizationAwareTraining(bn_fold=True, per_channel=[True, False], symmetric=[True, False]) yolov3_darknet53_quant = quantizer.quantize(yolov3_darknet53_quant) return yolov3_darknet53_quant raise NotImplementedError(f"{name} is not implemented in the repo")
type=str, default="yolov3_darknet53_quant", help="output file name.") parser.add_argument('--file_format', type=str, choices=["AIR", "MINDIR"], default='MINDIR', help='file format') args = parser.parse_args() context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args.device_id) if __name__ == "__main__": network = YOLOV3DarkNet53(is_training=False) config = ConfigYOLOV3DarkNet53() if config.quantization_aware: quantizer = QuantizationAwareTraining(bn_fold=True, per_channel=[True, False], symmetric=[True, False]) network = quantizer.quantize(network) param_dict = load_checkpoint(args.ckpt_file) load_param_into_net(network, param_dict) network.set_train(False) shape = [args.batch_size, 3] + config.test_img_shape input_data = Tensor(np.zeros(shape), ms.float32)
def test(): """The function of eval.""" start_time = time.time() args = parse_args() # logger args.outputs_dir = os.path.join( args.log_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S')) rank_id = int(os.environ.get('RANK_ID')) args.logger = get_logger(args.outputs_dir, rank_id) context.reset_auto_parallel_context() parallel_mode = ParallelMode.STAND_ALONE context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=1) args.logger.info('Creating Network....') network = YOLOV3DarkNet53(is_training=False) config = ConfigYOLOV3DarkNet53() if args.testing_shape: config.test_img_shape = conver_testing_shape(args) # convert fusion network to quantization aware network if config.quantization_aware: network = quant.convert_quant_network(network, bn_fold=True, per_channel=[True, False], symmetric=[True, False]) args.logger.info(args.pretrained) if os.path.isfile(args.pretrained): param_dict = load_checkpoint(args.pretrained) param_dict_new = {} for key, values in param_dict.items(): if key.startswith('moments.'): continue elif key.startswith('yolo_network.'): param_dict_new[key[13:]] = values else: param_dict_new[key] = values load_param_into_net(network, param_dict_new) args.logger.info('load_model {} success'.format(args.pretrained)) else: args.logger.info('{} not exists or not a pre-trained file'.format( args.pretrained)) assert FileNotFoundError( '{} not exists or not a pre-trained file'.format(args.pretrained)) exit(1) data_root = args.data_root ann_file = args.annFile ds, data_size = create_yolo_dataset(data_root, ann_file, is_training=False, batch_size=args.per_batch_size, max_epoch=1, device_num=1, rank=rank_id, shuffle=False, config=config) args.logger.info('testing shape : {}'.format(config.test_img_shape)) args.logger.info('totol {} images to eval'.format(data_size)) network.set_train(False) # init detection engine detection = DetectionEngine(args) input_shape = Tensor(tuple(config.test_img_shape), ms.float32) args.logger.info('Start inference....') for i, data in enumerate(ds.create_dict_iterator()): image = data["image"] image_shape = data["image_shape"] image_id = data["img_id"] prediction = network(image, input_shape) output_big, output_me, output_small = prediction output_big = output_big.asnumpy() output_me = output_me.asnumpy() output_small = output_small.asnumpy() image_id = image_id.asnumpy() image_shape = image_shape.asnumpy() detection.detect([output_small, output_me, output_big], args.per_batch_size, image_shape, image_id) if i % 1000 == 0: args.logger.info('Processing... {:.2f}% '.format( i * args.per_batch_size / data_size * 100)) args.logger.info('Calculating mAP...') detection.do_nms_for_results() result_file_path = detection.write_result() args.logger.info('result file path: {}'.format(result_file_path)) eval_result = detection.get_eval_result() cost_time = time.time() - start_time args.logger.info('\n=============coco eval reulst=========\n' + eval_result) args.logger.info('testing cost time {:.2f}h'.format(cost_time / 3600.))
def test(): """The function of eval.""" args = parse_args() devid = int(os.getenv('DEVICE_ID')) if os.getenv('DEVICE_ID') else 0 context.set_context(mode=context.GRAPH_MODE, device_target='Ascend', save_graphs=True, device_id=devid) # logger args.outputs_dir = os.path.join( args.log_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S')) rank_id = int( os.environ.get('RANK_ID')) if os.environ.get('RANK_ID') else 0 args.logger = get_logger(args.outputs_dir, rank_id) context.reset_auto_parallel_context() parallel_mode = ParallelMode.STAND_ALONE context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=1) args.logger.info('Creating Network....') network = SolveOutput(YOLOV3DarkNet53(is_training=False)) data_root = args.data_root ann_file = args.annFile args.logger.info(args.pretrained) if os.path.isfile(args.pretrained): param_dict = load_checkpoint(args.pretrained) param_dict_new = {} for key, values in param_dict.items(): if key.startswith('moments.'): continue elif key.startswith('yolo_network.'): param_dict_new[key[13:]] = values else: param_dict_new[key] = values load_param_into_net(network, param_dict_new) args.logger.info('load_model {} success'.format(args.pretrained)) else: args.logger.info('{} not exists or not a pre-trained file'.format( args.pretrained)) assert FileNotFoundError( '{} not exists or not a pre-trained file'.format(args.pretrained)) exit(1) config = ConfigYOLOV3DarkNet53() if args.testing_shape: config.test_img_shape = conver_testing_shape(args) ds, data_size = create_yolo_dataset(data_root, ann_file, is_training=False, batch_size=1, max_epoch=1, device_num=1, rank=rank_id, shuffle=False, config=config) args.logger.info('testing shape : {}'.format(config.test_img_shape)) args.logger.info('totol {} images to eval'.format(data_size)) network.set_train(False) # build attacker attack = DeepFool(network, num_classes=80, model_type='detection', reserve_ratio=0.9, bounds=(0, 1)) input_shape = Tensor(tuple(config.test_img_shape), ms.float32) args.logger.info('Start inference....') batch_num = args.samples_num adv_example = [] for i, data in enumerate(ds.create_dict_iterator(num_epochs=1)): if i >= batch_num: break image = data["image"] image_shape = data["image_shape"] gt_boxes, gt_logits = network(image, input_shape) gt_boxes, gt_logits = gt_boxes.asnumpy(), gt_logits.asnumpy() gt_labels = np.argmax(gt_logits, axis=2) adv_img = attack.generate((image.asnumpy(), image_shape.asnumpy()), (gt_boxes, gt_labels)) adv_example.append(adv_img) np.save('adv_example.npy', adv_example)
def predict(): """The function of predict.""" args = parse_args() devid = int(os.getenv('DEVICE_ID')) if os.getenv('DEVICE_ID') else 0 context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, save_graphs=False, device_id=devid) # logger args.outputs_dir = os.path.join( args.log_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S')) rank_id = int( os.environ.get('RANK_ID')) if os.environ.get('RANK_ID') else 0 args.logger = get_logger(args.outputs_dir, rank_id) args.logger.info('Creating Network....') network = YOLOV3DarkNet53(is_training=False) if os.path.isfile(args.pretrained): param_dict = load_checkpoint(args.pretrained) param_dict_new = {} for key, values in param_dict.items(): if key.startswith('moments.'): continue elif key.startswith('yolo_network.'): param_dict_new[key[13:]] = values else: param_dict_new[key] = values load_param_into_net(network, param_dict_new) args.logger.info('load_model {} success'.format(args.pretrained)) else: args.logger.info('{} not exists or not a pre-trained file'.format( args.pretrained)) assert FileNotFoundError( '{} not exists or not a pre-trained file'.format(args.pretrained)) exit(1) config = ConfigYOLOV3DarkNet53() args.logger.info('testing shape: {}'.format(config.test_img_shape)) # data preprocess operation image, image_shape = data_preprocess(args.image_path, config) # init detection engine detection = DetectionEngine(args) input_shape = Tensor(tuple(config.test_img_shape), ms.float32) args.logger.info('Start inference....') network.set_train(False) prediction = network(Tensor(image.reshape(1, 3, 416, 416), ms.float32), input_shape) output_big, output_me, output_small = prediction output_big = output_big.asnumpy() output_me = output_me.asnumpy() output_small = output_small.asnumpy() detection.detect([output_small, output_me, output_big], args.per_batch_size, image_shape, config) detection.do_nms_for_results() img = detection.draw_boxes_in_image(args.image_path) cv2.imwrite(os.path.join(args.output_dir, 'output.jpg'), img)
def train(): """Train function.""" args = parse_args() devid = int(os.getenv('DEVICE_ID', '0')) context.set_context(mode=context.GRAPH_MODE, enable_auto_mixed_precision=True, device_target=args.device_target, save_graphs=True, device_id=devid) if args.need_profiler: from mindspore.profiler.profiling import Profiler profiler = Profiler(output_path=args.outputs_dir, is_detail=True, is_show_op_path=True) loss_meter = AverageMeter('loss') context.reset_auto_parallel_context() parallel_mode = ParallelMode.STAND_ALONE degree = 1 if args.is_distributed: parallel_mode = ParallelMode.DATA_PARALLEL degree = get_group_size() context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=degree) network = YOLOV3DarkNet53(is_training=True) # default is kaiming-normal default_recurisive_init(network) load_yolov3_params(args, network) network = YoloWithLossCell(network) args.logger.info('finish get network') config = ConfigYOLOV3DarkNet53() config.label_smooth = args.label_smooth config.label_smooth_factor = args.label_smooth_factor if args.training_shape: config.multi_scale = [conver_training_shape(args)] if args.resize_rate: config.resize_rate = args.resize_rate ds, data_size = create_yolo_dataset(image_dir=args.data_root, anno_path=args.annFile, is_training=True, batch_size=args.per_batch_size, max_epoch=args.max_epoch, device_num=args.group_size, rank=args.rank, config=config) args.logger.info('Finish loading dataset') args.steps_per_epoch = int(data_size / args.per_batch_size / args.group_size) if not args.ckpt_interval: args.ckpt_interval = args.steps_per_epoch lr = get_lr(args) opt = Momentum(params=get_param_groups(network), learning_rate=Tensor(lr), momentum=args.momentum, weight_decay=args.weight_decay, loss_scale=args.loss_scale) is_gpu = context.get_context("device_target") == "GPU" if is_gpu: loss_scale_value = 1.0 loss_scale = FixedLossScaleManager(loss_scale_value, drop_overflow_update=False) network = amp.build_train_network(network, optimizer=opt, loss_scale_manager=loss_scale, level="O2", keep_batchnorm_fp32=True) keep_loss_fp32(network) else: network = TrainingWrapper(network, opt) network.set_train() if args.rank_save_ckpt_flag: # checkpoint save ckpt_max_num = args.max_epoch * args.steps_per_epoch // args.ckpt_interval ckpt_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval, keep_checkpoint_max=ckpt_max_num) save_ckpt_path = os.path.join(args.outputs_dir, 'ckpt_' + str(args.rank) + '/') ckpt_cb = ModelCheckpoint(config=ckpt_config, directory=save_ckpt_path, prefix='{}'.format(args.rank)) cb_params = _InternalCallbackParam() cb_params.train_network = network cb_params.epoch_num = ckpt_max_num cb_params.cur_epoch_num = 1 run_context = RunContext(cb_params) ckpt_cb.begin(run_context) old_progress = -1 t_end = time.time() data_loader = ds.create_dict_iterator(output_numpy=True) for i, data in enumerate(data_loader): images = data["image"] input_shape = images.shape[2:4] args.logger.info('iter[{}], shape{}'.format(i, input_shape[0])) images = Tensor.from_numpy(images) batch_y_true_0 = Tensor.from_numpy(data['bbox1']) batch_y_true_1 = Tensor.from_numpy(data['bbox2']) batch_y_true_2 = Tensor.from_numpy(data['bbox3']) batch_gt_box0 = Tensor.from_numpy(data['gt_box1']) batch_gt_box1 = Tensor.from_numpy(data['gt_box2']) batch_gt_box2 = Tensor.from_numpy(data['gt_box3']) input_shape = Tensor(tuple(input_shape[::-1]), ms.float32) loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1, batch_gt_box2, input_shape) loss_meter.update(loss.asnumpy()) if args.rank_save_ckpt_flag: # ckpt progress cb_params.cur_step_num = i + 1 # current step number cb_params.batch_num = i + 2 ckpt_cb.step_end(run_context) if i % args.log_interval == 0: time_used = time.time() - t_end epoch = int(i / args.steps_per_epoch) fps = args.per_batch_size * (i - old_progress) * args.group_size / time_used if args.rank == 0: args.logger.info( 'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format(epoch, i, loss_meter, fps, lr[i])) t_end = time.time() loss_meter.reset() old_progress = i if (i + 1) % args.steps_per_epoch == 0 and args.rank_save_ckpt_flag: cb_params.cur_epoch_num += 1 if args.need_profiler: if i == 10: profiler.analyse() break args.logger.info('==========end training===============')
def test_yolov3_darknet53(): devid = int(os.getenv('DEVICE_ID')) if os.getenv('DEVICE_ID') else 0 context.set_context(mode=context.GRAPH_MODE, enable_auto_mixed_precision=True, device_target="Ascend", device_id=devid) rank = 0 device_num = 1 lr_init = 0.001 epoch_size = 3 batch_size = 32 loss_scale = 1024 mindrecord_dir = DATA_DIR # It will generate mindrecord file in args_opt.mindrecord_dir, # and the file name is yolo.mindrecord0, 1, ... file_num. if not os.path.isdir(mindrecord_dir): raise KeyError("mindrecord path is not exist.") data_root = os.path.join(mindrecord_dir, 'train2014') annFile = os.path.join(mindrecord_dir, 'annotations/instances_train2014.json') # print("yolov3 mindrecord is ", mindrecord_file) if not os.path.exists(annFile): print("instances_train2014 file is not exist.") assert False loss_meter = AverageMeter('loss') context.reset_auto_parallel_context() parallel_mode = ParallelMode.STAND_ALONE context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=1) network = YOLOV3DarkNet53(is_training=True) # default is kaiming-normal default_recurisive_init(network) network = YoloWithLossCell(network) print('finish get network') config = ConfigYOLOV3DarkNet53() label_smooth = 0 label_smooth_factor = 0.1 config.label_smooth = label_smooth config.label_smooth_factor = label_smooth_factor # When create MindDataset, using the fitst mindrecord file, such as yolo.mindrecord0. print("Create dataset begin!") training_shape = [int(416), int(416)] config.multi_scale = [training_shape] num_samples = 256 ds, data_size = create_yolo_dataset(image_dir=data_root, anno_path=annFile, is_training=True, batch_size=batch_size, max_epoch=epoch_size, device_num=device_num, rank=rank, config=config, num_samples=num_samples) print("Create dataset done!") per_batch_size = batch_size group_size = 1 print("data_size:", data_size) steps_per_epoch = int(data_size / per_batch_size / group_size) print("steps_per_epoch:", steps_per_epoch) warmup_epochs = 0. max_epoch = epoch_size T_max = 1 eta_min = 0 lr = warmup_cosine_annealing_lr(lr_init, steps_per_epoch, warmup_epochs, max_epoch, T_max, eta_min) opt = Momentum(params=get_param_groups(network), learning_rate=Tensor(lr), momentum=0.9, weight_decay=0.0005, loss_scale=loss_scale) network = TrainingWrapper(network, opt) network.set_train() old_progress = -1 t_end = time.time() data_loader = ds.create_dict_iterator(output_numpy=True) train_starttime = time.time() time_used_per_epoch = 0 print("time:", time.time()) for i, data in enumerate(data_loader): images = data["image"] input_shape = images.shape[2:4] print('iter[{}], shape{}'.format(i, input_shape[0])) images = Tensor.from_numpy(images) batch_y_true_0 = Tensor.from_numpy(data['bbox1']) batch_y_true_1 = Tensor.from_numpy(data['bbox2']) batch_y_true_2 = Tensor.from_numpy(data['bbox3']) batch_gt_box0 = Tensor.from_numpy(data['gt_box1']) batch_gt_box1 = Tensor.from_numpy(data['gt_box2']) batch_gt_box2 = Tensor.from_numpy(data['gt_box3']) input_shape = Tensor(tuple(input_shape[::-1]), ms.float32) loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1, batch_gt_box2, input_shape) loss_meter.update(loss.asnumpy()) if (i + 1) % steps_per_epoch == 0: time_used = time.time() - t_end epoch = int(i / steps_per_epoch) fps = per_batch_size * (i - old_progress) * group_size / time_used if rank == 0: print( 'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}, time_used:{}' .format(epoch, i, loss_meter, fps, lr[i], time_used)) t_end = time.time() loss_meter.reset() old_progress = i time_used_per_epoch = time_used train_endtime = time.time() - train_starttime print('train_time_used:{}'.format(train_endtime)) expect_loss_value = 3210.0 loss_value = re.findall(r"\d+\.?\d*", str(loss_meter)) print('loss_value:{}'.format(loss_value[0])) assert float(loss_value[0]) < expect_loss_value export_time_used = 20.0 print('time_used_per_epoch:{}'.format(time_used_per_epoch)) assert time_used_per_epoch < export_time_used print('==========test case passed===========')
def predict(): """The function of predict.""" args = parse_args() # logger args.outputs_dir = os.path.join( args.log_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S')) rank_id = int(os.environ.get('RANK_ID')) args.logger = get_logger(args.outputs_dir, rank_id) args.logger.info('Creating Network....') network = YOLOV3DarkNet53(is_training=False) ckpt_file_slice = args.checkpoint_path.split('/') ckpt_file = ckpt_file_slice[len(ckpt_file_slice) - 1] local_ckpt_path = '/cache/' + ckpt_file # download checkpoint mox.file.copy_parallel(src_url=args.checkpoint_path, dst_url=local_ckpt_path) args.logger.info(local_ckpt_path) if os.path.isfile(local_ckpt_path): param_dict = load_checkpoint(local_ckpt_path) param_dict_new = {} for key, values in param_dict.items(): if key.startswith('moments.'): continue elif key.startswith('yolo_network.'): param_dict_new[key[13:]] = values else: param_dict_new[key] = values load_param_into_net(network, param_dict_new) args.logger.info('load_model {} success'.format(local_ckpt_path)) else: args.logger.info( '{} not exists or not a pre-trained file'.format(local_ckpt_path)) assert FileNotFoundError( '{} not exists or not a pre-trained file'.format(local_ckpt_path)) exit(1) config = ConfigYOLOV3DarkNet53() local_data_path = '/cache/data' # data download print('Download data.') mox.file.copy_parallel(src_url=args.data_url, dst_url=local_data_path) # init detection engine detection = DetectionEngine(args) # preprocess the image images = os.listdir(local_data_path) image_path = os.path.join(local_data_path, images[0]) image, image_shape = data_preprocess(image_path, config) args.logger.info('testing shape: {}'.format(config.test_img_shape)) input_shape = Tensor(tuple(config.test_img_shape), ms.float32) args.logger.info('Start inference....') network.set_train(False) prediction = network(Tensor(image.reshape(1, 3, 416, 416), ms.float32), input_shape) output_big, output_me, output_small = prediction output_big = output_big.asnumpy() output_me = output_me.asnumpy() output_small = output_small.asnumpy() detection.detect([output_small, output_me, output_big], args.per_batch_size, image_shape, config) detection.do_nms_for_results() img = detection.draw_boxes_in_image(image_path) local_output_dir = '/cache/output' if not os.path.exists(local_output_dir): os.mkdir(local_output_dir) cv2.imwrite(os.path.join(local_output_dir, 'output.jpg'), img) # upload output image files print('Upload output image.') mox.file.copy_parallel(src_url=local_output_dir, dst_url=args.train_url)
def train(): """Train function.""" args = parse_args() # init distributed if args.is_distributed: init() args.rank = get_rank() args.group_size = get_group_size() # select for master rank save ckpt or all rank save, compatiable for model parallel args.rank_save_ckpt_flag = 0 if args.is_save_on_master: if args.rank == 0: args.rank_save_ckpt_flag = 1 else: args.rank_save_ckpt_flag = 1 # logger args.outputs_dir = os.path.join( args.ckpt_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S')) args.logger = get_logger(args.outputs_dir, args.rank) args.logger.save_args(args) if args.need_profiler: from mindspore.profiler.profiling import Profiler profiler = Profiler(output_path=args.outputs_dir, is_detail=True, is_show_op_path=True) loss_meter = AverageMeter('loss') context.reset_auto_parallel_context() if args.is_distributed: parallel_mode = ParallelMode.DATA_PARALLEL degree = get_group_size() else: parallel_mode = ParallelMode.STAND_ALONE degree = 1 context.set_auto_parallel_context(parallel_mode=parallel_mode, mirror_mean=True, device_num=degree) network = YOLOV3DarkNet53(is_training=True) # default is kaiming-normal default_recurisive_init(network) if args.resume_yolov3: param_dict = load_checkpoint(args.resume_yolov3) param_dict_new = {} for key, values in param_dict.items(): args.logger.info('ckpt param name = {}'.format(key)) if key.startswith('moments.') or key.startswith('global_') or \ key.startswith('learning_rate') or key.startswith('momentum'): continue elif key.startswith('yolo_network.'): key_new = key[13:] if key_new.endswith('1.beta'): key_new = key_new.replace('1.beta', 'batchnorm.beta') if key_new.endswith('1.gamma'): key_new = key_new.replace('1.gamma', 'batchnorm.gamma') if key_new.endswith('1.moving_mean'): key_new = key_new.replace('1.moving_mean', 'batchnorm.moving_mean') if key_new.endswith('1.moving_variance'): key_new = key_new.replace('1.moving_variance', 'batchnorm.moving_variance') if key_new.endswith('.weight'): if key_new.endswith('0.weight'): key_new = key_new.replace('0.weight', 'conv.weight') else: key_new = key_new.replace('.weight', '.conv.weight') if key_new.endswith('.bias'): key_new = key_new.replace('.bias', '.conv.bias') param_dict_new[key_new] = values args.logger.info('in resume {}'.format(key_new)) else: param_dict_new[key] = values args.logger.info('in resume {}'.format(key)) args.logger.info('resume finished') for _, param in network.parameters_and_names(): args.logger.info('network param name = {}'.format(param.name)) if param.name not in param_dict_new: args.logger.info('not match param name = {}'.format( param.name)) load_param_into_net(network, param_dict_new) args.logger.info('load_model {} success'.format(args.resume_yolov3)) config = ConfigYOLOV3DarkNet53() # convert fusion network to quantization aware network if config.quantization_aware: network = quant.convert_quant_network(network, bn_fold=True, per_channel=[True, False], symmetric=[True, False]) network = YoloWithLossCell(network) args.logger.info('finish get network') config.label_smooth = args.label_smooth config.label_smooth_factor = args.label_smooth_factor if args.training_shape: config.multi_scale = [conver_training_shape(args)] if args.resize_rate: config.resize_rate = args.resize_rate ds, data_size = create_yolo_dataset(image_dir=args.data_root, anno_path=args.annFile, is_training=True, batch_size=args.per_batch_size, max_epoch=args.max_epoch, device_num=args.group_size, rank=args.rank, config=config) args.logger.info('Finish loading dataset') args.steps_per_epoch = int(data_size / args.per_batch_size / args.group_size) if not args.ckpt_interval: args.ckpt_interval = args.steps_per_epoch # lr scheduler if args.lr_scheduler == 'exponential': lr = warmup_step_lr( args.lr, args.lr_epochs, args.steps_per_epoch, args.warmup_epochs, args.max_epoch, gamma=args.lr_gamma, ) elif args.lr_scheduler == 'cosine_annealing': lr = warmup_cosine_annealing_lr(args.lr, args.steps_per_epoch, args.warmup_epochs, args.max_epoch, args.T_max, args.eta_min) elif args.lr_scheduler == 'cosine_annealing_V2': lr = warmup_cosine_annealing_lr_V2(args.lr, args.steps_per_epoch, args.warmup_epochs, args.max_epoch, args.T_max, args.eta_min) elif args.lr_scheduler == 'cosine_annealing_sample': lr = warmup_cosine_annealing_lr_sample(args.lr, args.steps_per_epoch, args.warmup_epochs, args.max_epoch, args.T_max, args.eta_min) else: raise NotImplementedError(args.lr_scheduler) opt = Momentum(params=get_param_groups(network), learning_rate=Tensor(lr), momentum=args.momentum, weight_decay=args.weight_decay, loss_scale=args.loss_scale) network = TrainingWrapper(network, opt) network.set_train() if args.rank_save_ckpt_flag: # checkpoint save ckpt_max_num = args.max_epoch * args.steps_per_epoch // args.ckpt_interval ckpt_config = CheckpointConfig( save_checkpoint_steps=args.ckpt_interval, keep_checkpoint_max=ckpt_max_num) ckpt_cb = ModelCheckpoint(config=ckpt_config, directory=args.outputs_dir, prefix='{}'.format(args.rank)) cb_params = _InternalCallbackParam() cb_params.train_network = network cb_params.epoch_num = ckpt_max_num cb_params.cur_epoch_num = 1 run_context = RunContext(cb_params) ckpt_cb.begin(run_context) old_progress = -1 t_end = time.time() data_loader = ds.create_dict_iterator() shape_record = ShapeRecord() for i, data in enumerate(data_loader): images = data["image"] input_shape = images.shape[2:4] args.logger.info('iter[{}], shape{}'.format(i, input_shape[0])) shape_record.set(input_shape) images = Tensor(images) annos = data["annotation"] if args.group_size == 1: batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1, batch_gt_box2 = \ batch_preprocess_true_box(annos, config, input_shape) else: batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1, batch_gt_box2 = \ batch_preprocess_true_box_single(annos, config, input_shape) batch_y_true_0 = Tensor(batch_y_true_0) batch_y_true_1 = Tensor(batch_y_true_1) batch_y_true_2 = Tensor(batch_y_true_2) batch_gt_box0 = Tensor(batch_gt_box0) batch_gt_box1 = Tensor(batch_gt_box1) batch_gt_box2 = Tensor(batch_gt_box2) input_shape = Tensor(tuple(input_shape[::-1]), ms.float32) loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1, batch_gt_box2, input_shape) loss_meter.update(loss.asnumpy()) if args.rank_save_ckpt_flag: # ckpt progress cb_params.cur_step_num = i + 1 # current step number cb_params.batch_num = i + 2 ckpt_cb.step_end(run_context) if i % args.log_interval == 0: time_used = time.time() - t_end epoch = int(i / args.steps_per_epoch) fps = args.per_batch_size * ( i - old_progress) * args.group_size / time_used if args.rank == 0: args.logger.info( 'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format( epoch, i, loss_meter, fps, lr[i])) t_end = time.time() loss_meter.reset() old_progress = i if (i + 1) % args.steps_per_epoch == 0 and args.rank_save_ckpt_flag: cb_params.cur_epoch_num += 1 if args.need_profiler: if i == 10: profiler.analyse() break args.logger.info('==========end training===============')
def train(): """Train function.""" args = parse_args() args.logger.save_args(args) if args.need_profiler: from mindspore.profiler.profiling import Profiler profiler = Profiler(output_path=args.outputs_dir, is_detail=True, is_show_op_path=True) loss_meter = AverageMeter('loss') context.reset_auto_parallel_context() parallel_mode = ParallelMode.STAND_ALONE degree = 1 if args.is_distributed: parallel_mode = ParallelMode.DATA_PARALLEL degree = get_group_size() context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=degree) network = YOLOV3DarkNet53(is_training=True) # default is kaiming-normal default_recurisive_init(network) load_yolov3_quant_params(args, network) config = ConfigYOLOV3DarkNet53() # convert fusion network to quantization aware network if config.quantization_aware: network = quant.convert_quant_network(network, bn_fold=True, per_channel=[True, False], symmetric=[True, False]) network = YoloWithLossCell(network) args.logger.info('finish get network') config.label_smooth = args.label_smooth config.label_smooth_factor = args.label_smooth_factor if args.training_shape: config.multi_scale = [conver_training_shape(args)] if args.resize_rate: config.resize_rate = args.resize_rate ds, data_size = create_yolo_dataset(image_dir=args.data_root, anno_path=args.annFile, is_training=True, batch_size=args.per_batch_size, max_epoch=args.max_epoch, device_num=args.group_size, rank=args.rank, config=config) args.logger.info('Finish loading dataset') args.steps_per_epoch = int(data_size / args.per_batch_size / args.group_size) if not args.ckpt_interval: args.ckpt_interval = args.steps_per_epoch lr = get_lr(args) opt = Momentum(params=get_param_groups(network), learning_rate=Tensor(lr), momentum=args.momentum, weight_decay=args.weight_decay, loss_scale=args.loss_scale) network = TrainingWrapper(network, opt) network.set_train() if args.rank_save_ckpt_flag: # checkpoint save ckpt_max_num = args.max_epoch * args.steps_per_epoch // args.ckpt_interval ckpt_config = CheckpointConfig( save_checkpoint_steps=args.ckpt_interval, keep_checkpoint_max=ckpt_max_num) save_ckpt_path = os.path.join(args.outputs_dir, 'ckpt_' + str(args.rank) + '/') ckpt_cb = ModelCheckpoint(config=ckpt_config, directory=save_ckpt_path, prefix='{}'.format(args.rank)) cb_params = _InternalCallbackParam() cb_params.train_network = network cb_params.epoch_num = ckpt_max_num cb_params.cur_epoch_num = 1 run_context = RunContext(cb_params) ckpt_cb.begin(run_context) old_progress = -1 t_end = time.time() data_loader = ds.create_dict_iterator(output_numpy=True, num_epochs=1) shape_record = ShapeRecord() for i, data in enumerate(data_loader): images = data["image"] input_shape = images.shape[2:4] args.logger.info('iter[{}], shape{}'.format(i, input_shape[0])) shape_record.set(input_shape) images = Tensor.from_numpy(images) annos = data["annotation"] if args.group_size == 1: batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1, batch_gt_box2 = \ batch_preprocess_true_box(annos, config, input_shape) else: batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1, batch_gt_box2 = \ batch_preprocess_true_box_single(annos, config, input_shape) batch_y_true_0 = Tensor.from_numpy(batch_y_true_0) batch_y_true_1 = Tensor.from_numpy(batch_y_true_1) batch_y_true_2 = Tensor.from_numpy(batch_y_true_2) batch_gt_box0 = Tensor.from_numpy(batch_gt_box0) batch_gt_box1 = Tensor.from_numpy(batch_gt_box1) batch_gt_box2 = Tensor.from_numpy(batch_gt_box2) input_shape = Tensor(tuple(input_shape[::-1]), ms.float32) loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1, batch_gt_box2, input_shape) loss_meter.update(loss.asnumpy()) if args.rank_save_ckpt_flag: # ckpt progress cb_params.cur_step_num = i + 1 # current step number cb_params.batch_num = i + 2 ckpt_cb.step_end(run_context) if i % args.log_interval == 0: time_used = time.time() - t_end epoch = int(i / args.steps_per_epoch) fps = args.per_batch_size * ( i - old_progress) * args.group_size / time_used if args.rank == 0: args.logger.info( 'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format( epoch, i, loss_meter, fps, lr[i])) t_end = time.time() loss_meter.reset() old_progress = i if (i + 1) % args.steps_per_epoch == 0 and args.rank_save_ckpt_flag: cb_params.cur_epoch_num += 1 if args.need_profiler: if i == 10: profiler.analyse() break args.logger.info('==========end training===============')
def train(): """Train function.""" args = parse_args() devid = int(os.getenv('DEVICE_ID')) if os.getenv('DEVICE_ID') else 0 context.set_context(mode=context.GRAPH_MODE, enable_auto_mixed_precision=True, device_target=args.device_target, save_graphs=True, device_id=devid) # init distributed if args.is_distributed: if args.device_target == "Ascend": init() else: init("nccl") args.rank = get_rank() args.group_size = get_group_size() # select for master rank save ckpt or all rank save, compatiable for model parallel args.rank_save_ckpt_flag = 0 if args.is_save_on_master: if args.rank == 0: args.rank_save_ckpt_flag = 1 else: args.rank_save_ckpt_flag = 1 # logger args.outputs_dir = os.path.join(args.ckpt_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S')) args.logger = get_logger(args.outputs_dir, args.rank) args.logger.save_args(args) if args.need_profiler: from mindspore.profiler.profiling import Profiler profiler = Profiler(output_path=args.outputs_dir, is_detail=True, is_show_op_path=True) loss_meter = AverageMeter('loss') context.reset_auto_parallel_context() if args.is_distributed: parallel_mode = ParallelMode.DATA_PARALLEL degree = get_group_size() else: parallel_mode = ParallelMode.STAND_ALONE degree = 1 context.set_auto_parallel_context(parallel_mode=parallel_mode, mirror_mean=True, device_num=degree) network = YOLOV3DarkNet53(is_training=True) # default is kaiming-normal default_recurisive_init(network) if args.pretrained_backbone: network = load_backbone(network, args.pretrained_backbone, args) args.logger.info('load pre-trained backbone {} into network'.format(args.pretrained_backbone)) else: args.logger.info('Not load pre-trained backbone, please be careful') if args.resume_yolov3: param_dict = load_checkpoint(args.resume_yolov3) param_dict_new = {} for key, values in param_dict.items(): if key.startswith('moments.'): continue elif key.startswith('yolo_network.'): param_dict_new[key[13:]] = values args.logger.info('in resume {}'.format(key)) else: param_dict_new[key] = values args.logger.info('in resume {}'.format(key)) args.logger.info('resume finished') load_param_into_net(network, param_dict_new) args.logger.info('load_model {} success'.format(args.resume_yolov3)) network = YoloWithLossCell(network) args.logger.info('finish get network') config = ConfigYOLOV3DarkNet53() config.label_smooth = args.label_smooth config.label_smooth_factor = args.label_smooth_factor if args.training_shape: config.multi_scale = [conver_training_shape(args)] if args.resize_rate: config.resize_rate = args.resize_rate ds, data_size = create_yolo_dataset(image_dir=args.data_root, anno_path=args.annFile, is_training=True, batch_size=args.per_batch_size, max_epoch=args.max_epoch, device_num=args.group_size, rank=args.rank, config=config) args.logger.info('Finish loading dataset') args.steps_per_epoch = int(data_size / args.per_batch_size / args.group_size) if not args.ckpt_interval: args.ckpt_interval = args.steps_per_epoch # lr scheduler if args.lr_scheduler == 'exponential': lr = warmup_step_lr(args.lr, args.lr_epochs, args.steps_per_epoch, args.warmup_epochs, args.max_epoch, gamma=args.lr_gamma, ) elif args.lr_scheduler == 'cosine_annealing': lr = warmup_cosine_annealing_lr(args.lr, args.steps_per_epoch, args.warmup_epochs, args.max_epoch, args.T_max, args.eta_min) elif args.lr_scheduler == 'cosine_annealing_V2': lr = warmup_cosine_annealing_lr_V2(args.lr, args.steps_per_epoch, args.warmup_epochs, args.max_epoch, args.T_max, args.eta_min) elif args.lr_scheduler == 'cosine_annealing_sample': lr = warmup_cosine_annealing_lr_sample(args.lr, args.steps_per_epoch, args.warmup_epochs, args.max_epoch, args.T_max, args.eta_min) else: raise NotImplementedError(args.lr_scheduler) opt = Momentum(params=get_param_groups(network), learning_rate=Tensor(lr), momentum=args.momentum, weight_decay=args.weight_decay, loss_scale=args.loss_scale) enable_amp = False is_gpu = context.get_context("device_target") == "GPU" if is_gpu: enable_amp = True if enable_amp: loss_scale_value = 1.0 loss_scale = FixedLossScaleManager(loss_scale_value, drop_overflow_update=False) network = amp.build_train_network(network, optimizer=opt, loss_scale_manager=loss_scale, level="O2", keep_batchnorm_fp32=True) keep_loss_fp32(network) else: network = TrainingWrapper(network, opt) network.set_train() if args.rank_save_ckpt_flag: # checkpoint save ckpt_max_num = args.max_epoch * args.steps_per_epoch // args.ckpt_interval ckpt_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval, keep_checkpoint_max=ckpt_max_num) ckpt_cb = ModelCheckpoint(config=ckpt_config, directory=args.outputs_dir, prefix='{}'.format(args.rank)) cb_params = _InternalCallbackParam() cb_params.train_network = network cb_params.epoch_num = ckpt_max_num cb_params.cur_epoch_num = 1 run_context = RunContext(cb_params) ckpt_cb.begin(run_context) old_progress = -1 t_end = time.time() data_loader = ds.create_dict_iterator() for i, data in enumerate(data_loader): images = data["image"] input_shape = images.shape[2:4] args.logger.info('iter[{}], shape{}'.format(i, input_shape[0])) images = Tensor(images) batch_y_true_0 = Tensor(data['bbox1']) batch_y_true_1 = Tensor(data['bbox2']) batch_y_true_2 = Tensor(data['bbox3']) batch_gt_box0 = Tensor(data['gt_box1']) batch_gt_box1 = Tensor(data['gt_box2']) batch_gt_box2 = Tensor(data['gt_box3']) input_shape = Tensor(tuple(input_shape[::-1]), ms.float32) loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1, batch_gt_box2, input_shape) loss_meter.update(loss.asnumpy()) if args.rank_save_ckpt_flag: # ckpt progress cb_params.cur_step_num = i + 1 # current step number cb_params.batch_num = i + 2 ckpt_cb.step_end(run_context) if i % args.log_interval == 0: time_used = time.time() - t_end epoch = int(i / args.steps_per_epoch) fps = args.per_batch_size * (i - old_progress) * args.group_size / time_used if args.rank == 0: args.logger.info( 'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format(epoch, i, loss_meter, fps, lr[i])) t_end = time.time() loss_meter.reset() old_progress = i if (i + 1) % args.steps_per_epoch == 0 and args.rank_save_ckpt_flag: cb_params.cur_epoch_num += 1 if args.need_profiler: if i == 10: profiler.analyse() break args.logger.info('==========end training===============')
def create_network(name, *args, **kwargs): if name == "yolov3_darknet53": yolov3_darknet53_net = YOLOV3DarkNet53(is_training=False) return yolov3_darknet53_net raise NotImplementedError(f"{name} is not implemented in the repo")
def build_network(): """Build YOLOv3 network.""" network = YOLOV3DarkNet53(is_training=True) params = network.get_parameters() params = [p for p in params if 'backbone' in p.name] return params
def train(): """Train function.""" args = parse_args() # logger args.outputs_dir = os.path.join( args.ckpt_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S')) args.logger = get_logger(args.outputs_dir, args.rank) args.logger.save_args(args) loss_meter = AverageMeter('loss') network = YOLOV3DarkNet53(is_training=True) # default is kaiming-normal default_recursive_init(network) pretrained_backbone_slice = args.pretrained_backbone.split('/') backbone_ckpt_file = pretrained_backbone_slice[ len(pretrained_backbone_slice) - 1] local_backbone_ckpt_path = '/cache/' + backbone_ckpt_file # download backbone checkpoint mox.file.copy_parallel(src_url=args.pretrained_backbone, dst_url=local_backbone_ckpt_path) if args.pretrained_backbone: network = load_backbone(network, local_backbone_ckpt_path, args) args.logger.info('load pre-trained backbone {} into network'.format( args.pretrained_backbone)) else: args.logger.info('Not load pre-trained backbone, please be careful') if args.resume_yolov3: param_dict = load_checkpoint(args.resume_yolov3) param_dict_new = {} for key, values in param_dict.items(): if key.startswith('moments.'): continue elif key.startswith('yolo_network.'): param_dict_new[key[13:]] = values args.logger.info('in resume {}'.format(key)) else: param_dict_new[key] = values args.logger.info('in resume {}'.format(key)) args.logger.info('resume finished') load_param_into_net(network, param_dict_new) args.logger.info('load_model {} success'.format(args.resume_yolov3)) network = YoloWithLossCell(network) args.logger.info('finish get network') config = ConfigYOLOV3DarkNet53() config.label_smooth = args.label_smooth config.label_smooth_factor = args.label_smooth_factor if args.training_shape: config.multi_scale = [convert_training_shape(args)] if args.resize_rate: config.resize_rate = args.resize_rate # data download local_data_path = '/cache/data' local_ckpt_path = '/cache/ckpt_file' print('Download data.') mox.file.copy_parallel(src_url=args.data_url, dst_url=local_data_path) ds, data_size = create_yolo_dataset( image_dir=os.path.join(local_data_path, 'images'), anno_path=os.path.join(local_data_path, 'annotation.json'), is_training=True, batch_size=args.per_batch_size, max_epoch=args.epoch_size, device_num=args.group_size, rank=args.rank, config=config) args.logger.info('Finish loading dataset') args.steps_per_epoch = int(data_size / args.per_batch_size / args.group_size) if not args.ckpt_interval: args.ckpt_interval = args.steps_per_epoch * 10 # lr scheduler if args.lr_scheduler == 'exponential': lr = warmup_step_lr( args.lr, args.lr_epochs, args.steps_per_epoch, args.warmup_epochs, args.epoch_size, gamma=args.lr_gamma, ) elif args.lr_scheduler == 'cosine_annealing': lr = warmup_cosine_annealing_lr(args.lr, args.steps_per_epoch, args.warmup_epochs, args.max_epoch, args.T_max, args.eta_min) elif args.lr_scheduler == 'cosine_annealing_V2': lr = warmup_cosine_annealing_lr_V2(args.lr, args.steps_per_epoch, args.warmup_epochs, args.max_epoch, args.T_max, args.eta_min) elif args.lr_scheduler == 'cosine_annealing_sample': lr = warmup_cosine_annealing_lr_sample(args.lr, args.steps_per_epoch, args.warmup_epochs, args.max_epoch, args.T_max, args.eta_min) else: raise NotImplementedError(args.lr_scheduler) opt = Momentum(params=get_param_groups(network), learning_rate=Tensor(lr), momentum=args.momentum, weight_decay=args.weight_decay, loss_scale=args.loss_scale) network = TrainingWrapper(network, opt) network.set_train() # checkpoint save ckpt_max_num = 10 ckpt_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval, keep_checkpoint_max=ckpt_max_num) ckpt_cb = ModelCheckpoint(config=ckpt_config, directory=local_ckpt_path, prefix='yolov3') cb_params = _InternalCallbackParam() cb_params.train_network = network cb_params.epoch_num = ckpt_max_num cb_params.cur_epoch_num = 1 run_context = RunContext(cb_params) ckpt_cb.begin(run_context) old_progress = -1 t_end = time.time() data_loader = ds.create_dict_iterator() shape_record = ShapeRecord() for i, data in enumerate(data_loader): images = data["image"] input_shape = images.shape[2:4] shape_record.set(input_shape) images = Tensor(images) annos = data["annotation"] if args.group_size == 1: batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1, batch_gt_box2 = \ batch_preprocess_true_box(annos, config, input_shape) else: batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1, batch_gt_box2 = \ batch_preprocess_true_box_single(annos, config, input_shape) batch_y_true_0 = Tensor(batch_y_true_0) batch_y_true_1 = Tensor(batch_y_true_1) batch_y_true_2 = Tensor(batch_y_true_2) batch_gt_box0 = Tensor(batch_gt_box0) batch_gt_box1 = Tensor(batch_gt_box1) batch_gt_box2 = Tensor(batch_gt_box2) input_shape = Tensor(tuple(input_shape[::-1]), ms.float32) loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1, batch_gt_box2, input_shape) loss_meter.update(loss.asnumpy()) # ckpt progress cb_params.cur_step_num = i + 1 # current step number cb_params.batch_num = i + 2 ckpt_cb.step_end(run_context) if i % args.log_interval == 0: time_used = time.time() - t_end epoch = int(i / args.steps_per_epoch) fps = args.per_batch_size * ( i - old_progress) * args.group_size / time_used if args.rank == 0: args.logger.info( 'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format( epoch, i, loss_meter, fps, lr[i])) t_end = time.time() loss_meter.reset() old_progress = i if (i + 1) % args.steps_per_epoch == 0: cb_params.cur_epoch_num += 1 args.logger.info('==========end training===============') # upload checkpoint files print('Upload checkpoint.') mox.file.copy_parallel(src_url=local_ckpt_path, dst_url=args.train_url)
def test(): """The function of eval.""" start_time = time.time() args = parse_args() context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, save_graphs=False) # logger args.outputs_dir = os.path.join(args.log_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S')) rank_id = int(os.environ.get('RANK_ID')) args.logger = get_logger(args.outputs_dir, rank_id) args.logger.info('Creating Network....') network = YOLOV3DarkNet53(is_training=False) ckpt_file_slice = args.checkpoint_path.split('/') ckpt_file = ckpt_file_slice[len(ckpt_file_slice)-1] local_ckpt_path = '/cache/'+ckpt_file # download checkpoint mox.file.copy_parallel(src_url=args.checkpoint_path, dst_url=local_ckpt_path) args.logger.info(local_ckpt_path) if os.path.isfile(local_ckpt_path): param_dict = load_checkpoint(local_ckpt_path) param_dict_new = {} for key, values in param_dict.items(): if key.startswith('moments.'): continue elif key.startswith('yolo_network.'): param_dict_new[key[13:]] = values else: param_dict_new[key] = values load_param_into_net(network, param_dict_new) args.logger.info('load_model {} success'.format(local_ckpt_path)) else: args.logger.info('{} not exists or not a pre-trained file'.format(local_ckpt_path)) assert FileNotFoundError('{} not exists or not a pre-trained file'.format(local_ckpt_path)) exit(1) config = ConfigYOLOV3DarkNet53() if args.testing_shape: config.test_img_shape = convert_testing_shape(args) local_data_path = '/cache/data' # data download print('Download data.') mox.file.copy_parallel(src_url=args.data_url, dst_url=local_data_path) args.data_root = os.path.join(local_data_path, 'images') args.ann_file = os.path.join(local_data_path, 'eval_annotation.json') ds, data_size = create_yolo_dataset(args.data_root, args.ann_file, is_training=False, batch_size=args.per_batch_size, max_epoch=1, device_num=1, rank=rank_id, shuffle=False, config=config) args.logger.info('testing shape: {}'.format(config.test_img_shape)) args.logger.info('total {} images to eval'.format(data_size)) network.set_train(False) # init detection engine detection = DetectionEngine(args) input_shape = Tensor(tuple(config.test_img_shape), ms.float32) args.logger.info('Start inference....') for i, data in enumerate(ds.create_dict_iterator()): image = data["image"] image_shape = data["image_shape"] image_id = data["img_id"] prediction = network(image, input_shape) output_big, output_me, output_small = prediction output_big = output_big.asnumpy() output_me = output_me.asnumpy() output_small = output_small.asnumpy() image_id = image_id.asnumpy() image_shape = image_shape.asnumpy() detection.detect([output_small, output_me, output_big], args.per_batch_size, image_shape, image_id, config=config) if i % 1000 == 0: args.logger.info('Processing... {:.2f}% '.format(i * args.per_batch_size / data_size * 100)) args.logger.info('Calculating mAP...') detection.do_nms_for_results() result_file_path = detection.write_result() args.logger.info('result file path: {}'.format(result_file_path)) eval_result = detection.get_eval_result() cost_time = time.time() - start_time args.logger.info('\n=============coco eval result=========\n' + eval_result) args.logger.info('testing cost time {:.2f}h'.format(cost_time / 3600.))