def run(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], load_name="512_512_ADAM_PEFF_0", load_period=10, GPU_COUNT=0, test_weight_path="weights", test_dataset_path="Dataset/test", test_save_path="result", test_graph_path="test_Graph", foreground_iou_thresh=0.5, background_iou_thresh=0.4, num_workers=4, show_flag=True, save_flag=True, decode_number=5000, multiperclass=True, nms_thresh=0.5, nms_topk=500, except_class_thresh=0.05, plot_class_thresh=0.5): if GPU_COUNT <= 0: ctx = mx.cpu(0) elif GPU_COUNT > 0: ctx = mx.gpu(0) # 운영체제 확인 if platform.system() == "Linux": logging.info(f"{platform.system()} OS") elif platform.system() == "Windows": logging.info(f"{platform.system()} OS") else: logging.info(f"{platform.system()} OS") if GPU_COUNT > 0: free_memory, total_memory = mx.context.gpu_memory_info(0) free_memory = round(free_memory / (1024 * 1024 * 1024), 2) total_memory = round(total_memory / (1024 * 1024 * 1024), 2) logging.info( f'Running on {ctx} / free memory : {free_memory}GB / total memory {total_memory}GB' ) else: logging.info(f'Running on {ctx}') logging.info(f"test {load_name}") netheight = int(load_name.split("_")[0]) netwidth = int(load_name.split("_")[1]) if not isinstance(netheight, int) and not isinstance(netwidth, int): logging.info("height is not int") logging.info("width is not int") raise ValueError else: logging.info(f"network input size : {(netheight, netwidth)}") try: test_dataloader, test_dataset = testdataloader(path=test_dataset_path, input_size=(netheight, netwidth), num_workers=num_workers, mean=mean, std=std) except Exception: logging.info("The dataset does not exist") exit(0) weight_path = os.path.join(test_weight_path, load_name) sym = os.path.join(weight_path, f'{load_name}-symbol.json') params = os.path.join(weight_path, f'{load_name}-{load_period:04d}.params') test_update_number_per_epoch = len(test_dataloader) if test_update_number_per_epoch < 1: logging.warning(" test batch size가 데이터 수보다 큼 ") exit(0) num_classes = test_dataset.num_class # 클래스 수 name_classes = test_dataset.classes logging.info("symbol model test") try: net = gluon.SymbolBlock.imports(sym, ['data'], params, ctx=ctx) except Exception: # DEBUG, INFO, WARNING, ERROR, CRITICAL 의 5가지 등급 logging.info("loading symbol weights 실패") exit(0) else: logging.info("loading symbol weights 성공") net.hybridize(active=True, static_alloc=True, static_shape=True) confidence_loss = FocalLoss( alpha=0.25, # 논문에서 가장 좋다고 한 숫자 gamma=1.5, # 논문에서 가장 좋다고 한 숫자 sparse_label=True, from_sigmoid=False, batch_axis=None, num_class=num_classes, reduction="sum", exclude=False) localization_loss = HuberLoss(rho=1, batch_axis=None, reduction="sum", exclude=False) targetgenerator = TargetGenerator( foreground_iou_thresh=foreground_iou_thresh, background_iou_thresh=background_iou_thresh) prediction = Prediction(from_sigmoid=False, num_classes=num_classes, decode_number=decode_number, nms_thresh=nms_thresh, nms_topk=nms_topk, except_class_thresh=except_class_thresh, multiperclass=multiperclass) precision_recall = Voc_2007_AP(iou_thresh=0.5, class_names=name_classes) ground_truth_colors = {} for i in range(num_classes): ground_truth_colors[i] = (0, 0, 1) conf_loss_sum = 0 loc_loss_sum = 0 for image, label, name, origin_image, origin_box in tqdm(test_dataloader): _, height, width, _ = origin_image.shape logging.info(f"real input size : {(height, width)}") origin_image = origin_image.asnumpy()[0] origin_box = origin_box.asnumpy()[0] image = image.as_in_context(ctx) label = label.as_in_context(ctx) gt_boxes = label[:, :, :4] gt_ids = label[:, :, 4:5] cls_preds, box_preds, anchors = net(image) ids, scores, bboxes = prediction(cls_preds, box_preds, anchors) precision_recall.update(pred_bboxes=bboxes, pred_labels=ids, pred_scores=scores, gt_boxes=gt_boxes, gt_labels=gt_ids) bbox = box_resize(bboxes[0], (netwidth, netheight), (width, height)) ground_truth = plot_bbox(origin_image, origin_box[:, :4], scores=None, labels=origin_box[:, 4:5], thresh=None, reverse_rgb=True, class_names=test_dataset.classes, absolute_coordinates=True, colors=ground_truth_colors) plot_bbox(ground_truth, bbox, scores=scores[0], labels=ids[0], thresh=plot_class_thresh, reverse_rgb=False, class_names=test_dataset.classes, absolute_coordinates=True, image_show=show_flag, image_save=save_flag, image_save_path=test_save_path, image_name=name[0]) cls_targets, box_targets = targetgenerator(anchors, gt_boxes, gt_ids) except_ignore_samples = cls_targets > -1 positive_samples = cls_targets > 0 positive_numbers = positive_samples.sum() conf_loss = confidence_loss(cls_preds, cls_targets, except_ignore_samples.expand_dims(axis=-1)) conf_loss = mx.nd.divide(conf_loss, positive_numbers + 1) conf_loss_sum += conf_loss.asscalar() loc_loss = localization_loss(box_preds, box_targets, positive_samples.expand_dims(axis=-1)) loc_loss_sum += loc_loss.asscalar() # epoch 당 평균 loss test_conf_loss_mean = np.divide(conf_loss_sum, test_update_number_per_epoch) test_loc_loss_mean = np.divide(loc_loss_sum, test_update_number_per_epoch) test_total_loss_mean = test_conf_loss_mean + test_loc_loss_mean logging.info( f"test confidence loss : {test_conf_loss_mean} / test localization loss : {test_loc_loss_mean} / test total loss : {test_total_loss_mean}" ) AP_appender = [] round_position = 2 class_name, precision, recall, true_positive, false_positive, threshold = precision_recall.get_PR_list( ) for j, c, p, r in zip(range(len(recall)), class_name, precision, recall): name, AP = precision_recall.get_AP(c, p, r) logging.info( f"class {j}'s {name} AP : {round(AP * 100, round_position)}%") AP_appender.append(AP) mAP_result = np.mean(AP_appender) logging.info(f"mAP : {round(mAP_result * 100, round_position)}%") precision_recall.get_PR_curve(name=class_name, precision=precision, recall=recall, threshold=threshold, AP=AP_appender, mAP=mAP_result, folder_name=test_graph_path)
def run( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], offset_alloc_size=(64, 64), anchors={ "shallow": [(10, 13), (16, 30), (33, 23)], "middle": [(30, 61), (62, 45), (59, 119)], "deep": [(116, 90), (156, 198), (373, 326)] }, graphviz=False, epoch=100, input_size=[416, 416], batch_log=100, batch_size=16, batch_interval=10, subdivision=4, train_dataset_path="Dataset/train", valid_dataset_path="Dataset/valid", multiscale=False, factor_scale=[13, 5], ignore_threshold=0.5, dynamic=False, data_augmentation=True, num_workers=4, optimizer="ADAM", weight_decay=0.000001, save_period=5, load_period=10, learning_rate=0.001, decay_lr=0.999, decay_step=10, GPU_COUNT=0, Darknetlayer=53, pretrained_base=True, pretrained_path="modelparam", AMP=True, valid_size=8, eval_period=5, tensorboard=True, valid_graph_path="valid_Graph", valid_html_auto_open=True, using_mlflow=True, multiperclass=True, nms_thresh=0.5, nms_topk=500, iou_thresh=0.5, except_class_thresh=0.05, plot_class_thresh=0.5): if GPU_COUNT == 0: ctx = mx.cpu(0) AMP = False elif GPU_COUNT == 1: ctx = mx.gpu(0) else: ctx = [mx.gpu(i) for i in range(GPU_COUNT)] # 운영체제 확인 if platform.system() == "Linux": logging.info(f"{platform.system()} OS") elif platform.system() == "Windows": logging.info(f"{platform.system()} OS") else: logging.info(f"{platform.system()} OS") if isinstance(ctx, (list, tuple)): for i, c in enumerate(ctx): free_memory, total_memory = mx.context.gpu_memory_info(i) free_memory = round(free_memory / (1024 * 1024 * 1024), 2) total_memory = round(total_memory / (1024 * 1024 * 1024), 2) logging.info( f'Running on {c} / free memory : {free_memory}GB / total memory {total_memory}GB' ) else: if GPU_COUNT == 1: free_memory, total_memory = mx.context.gpu_memory_info(0) free_memory = round(free_memory / (1024 * 1024 * 1024), 2) total_memory = round(total_memory / (1024 * 1024 * 1024), 2) logging.info( f'Running on {ctx} / free memory : {free_memory}GB / total memory {total_memory}GB' ) else: logging.info(f'Running on {ctx}') # 입력 사이즈를 32의 배수로 지정해 버리기 - stride가 일그러지는 것을 막기 위함 if input_size[0] % 32 != 0 and input_size[1] % 32 != 0: logging.info("The input size must be a multiple of 32") exit(0) if GPU_COUNT > 0 and batch_size < GPU_COUNT: logging.info("batch size must be greater than gpu number") exit(0) if AMP: amp.init() if multiscale: logging.info("Using MultiScale") if data_augmentation: logging.info("Using Data Augmentation") logging.info("training Gaussian YoloV3 Detector") input_shape = (1, 3) + tuple(input_size) train_dataloader, train_dataset = traindataloader( multiscale=multiscale, factor_scale=factor_scale, augmentation=data_augmentation, path=train_dataset_path, input_size=input_size, batch_size=batch_size, batch_interval=batch_interval, num_workers=num_workers, shuffle=True, mean=mean, std=std) train_update_number_per_epoch = len(train_dataloader) if train_update_number_per_epoch < 1: logging.warning("train batch size가 데이터 수보다 큼") exit(0) valid_list = glob.glob(os.path.join(valid_dataset_path, "*")) if valid_list: valid_dataloader, valid_dataset = validdataloader( path=valid_dataset_path, input_size=input_size, batch_size=valid_size, num_workers=num_workers, shuffle=True, mean=mean, std=std) valid_update_number_per_epoch = len(valid_dataloader) if valid_update_number_per_epoch < 1: logging.warning("valid batch size가 데이터 수보다 큼") exit(0) num_classes = train_dataset.num_class # 클래스 수 name_classes = train_dataset.classes optimizer = optimizer.upper() if pretrained_base: model = str(input_size[0]) + "_" + str( input_size[1]) + "_" + optimizer + "_P" + "Dark_" + str( Darknetlayer) else: model = str(input_size[0]) + "_" + str( input_size[1]) + "_" + optimizer + "_Dark_" + str(Darknetlayer) weight_path = os.path.join("weights", f"{model}") sym_path = os.path.join(weight_path, f'{model}-symbol.json') param_path = os.path.join(weight_path, f'{model}-{load_period:04d}.params') optimizer_path = os.path.join(weight_path, f'{model}-{load_period:04d}.opt') if os.path.exists(param_path) and os.path.exists(sym_path): start_epoch = load_period logging.info(f"loading {os.path.basename(param_path)}\n") net = gluon.SymbolBlock.imports(sym_path, ['data'], param_path, ctx=ctx) else: start_epoch = 0 ''' mxnet c++에서 arbitrary input image 를 받기 위한 전략 alloc_size : tuple of int, default is (128, 128) For advanced users. Define `alloc_size` to generate large enough offset maps, which will later saved in parameters. During inference, we support arbitrary input image by cropping corresponding area of the anchor map. This allow us to export to symbol so we can run it in c++, Scalar, etc. ''' net = Yolov3( Darknetlayer=Darknetlayer, input_size=input_size, anchors=anchors, num_classes=num_classes, # foreground만 pretrained=pretrained_base, pretrained_path=pretrained_path, alloc_size=offset_alloc_size, ctx=ctx) if isinstance(ctx, (list, tuple)): net.summary(mx.nd.ones(shape=input_shape, ctx=ctx[0])) else: net.summary(mx.nd.ones(shape=input_shape, ctx=ctx)) ''' active (bool, default True) – Whether to turn hybrid on or off. static_alloc (bool, default False) – Statically allocate memory to improve speed. Memory usage may increase. static_shape (bool, default False) – Optimize for invariant input shapes between iterations. Must also set static_alloc to True. Change of input shapes is still allowed but slower. ''' if multiscale: net.hybridize(active=True, static_alloc=True, static_shape=False) else: net.hybridize(active=True, static_alloc=True, static_shape=True) if start_epoch + 1 >= epoch + 1: logging.info("this model has already been optimized") exit(0) if tensorboard: summary = SummaryWriter(logdir=os.path.join("mxboard", model), max_queue=10, flush_secs=10, verbose=False) if isinstance(ctx, (list, tuple)): net.forward(mx.nd.ones(shape=input_shape, ctx=ctx[0])) else: net.forward(mx.nd.ones(shape=input_shape, ctx=ctx)) summary.add_graph(net) if graphviz: gluoncv.utils.viz.plot_network(net, shape=input_shape, save_prefix=model) # optimizer unit = 1 if (len(train_dataset) // batch_size) < 1 else len(train_dataset) // batch_size step = unit * decay_step lr_sch = mx.lr_scheduler.FactorScheduler(step=step, factor=decay_lr, stop_factor_lr=1e-12, base_lr=learning_rate) for p in net.collect_params().values(): if p.grad_req != "null": p.grad_req = 'add' ''' update_on_kvstore : bool, default None Whether to perform parameter updates on kvstore. If None, then trainer will choose the more suitable option depending on the type of kvstore. If the `update_on_kvstore` argument is provided, environment variable `MXNET_UPDATE_ON_KVSTORE` will be ignored. ''' if optimizer.upper() == "ADAM": trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params={ "learning_rate": learning_rate, "lr_scheduler": lr_sch, "wd": weight_decay, "beta1": 0.9, "beta2": 0.999, 'multi_precision': False }, update_on_kvstore=False if AMP else None) # for Dynamic loss scaling elif optimizer.upper() == "RMSPROP": trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params={ "learning_rate": learning_rate, "lr_scheduler": lr_sch, "wd": weight_decay, "gamma1": 0.9, "gamma2": 0.999, 'multi_precision': False }, update_on_kvstore=False if AMP else None) # for Dynamic loss scaling elif optimizer.upper() == "SGD": trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params={ "learning_rate": learning_rate, "lr_scheduler": lr_sch, "wd": weight_decay, "momentum": 0.9, 'multi_precision': False }, update_on_kvstore=False if AMP else None) # for Dynamic loss scaling else: logging.error("optimizer not selected") exit(0) if AMP: amp.init_trainer(trainer) # optimizer weight 불러오기 if os.path.exists(optimizer_path): try: trainer.load_states(optimizer_path) except Exception as E: logging.info(E) else: logging.info(f"loading {os.path.basename(optimizer_path)}\n") targetgenerator = TargetGenerator(ignore_threshold=ignore_threshold, dynamic=dynamic, from_sigmoid=False) loss = GaussianYolov3Loss(sparse_label=True, from_sigmoid=False, batch_axis=None, num_classes=num_classes, reduction="sum", exclude=False, epsilon=1e-9) prediction = Prediction(from_sigmoid=False, num_classes=num_classes, nms_thresh=nms_thresh, nms_topk=nms_topk, except_class_thresh=except_class_thresh, multiperclass=multiperclass) precision_recall = Voc_2007_AP(iou_thresh=iou_thresh, class_names=name_classes) ctx_list = ctx if isinstance(ctx, (list, tuple)) else [ctx] start_time = time.time() for i in tqdm(range(start_epoch + 1, epoch + 1, 1), initial=start_epoch + 1, total=epoch): xcyc_loss_sum = 0 wh_loss_sum = 0 object_loss_sum = 0 class_loss_sum = 0 time_stamp = time.time() for batch_count, (image, label, _) in enumerate(train_dataloader, start=1): td_batch_size, _, height, width = image.shape image = mx.nd.split(data=image, num_outputs=subdivision, axis=0) gt_boxes = mx.nd.split(data=label[:, :, :4], num_outputs=subdivision, axis=0) gt_ids = mx.nd.split(data=label[:, :, 4:5], num_outputs=subdivision, axis=0) if subdivision == 1: image = [image] gt_boxes = [gt_boxes] gt_ids = [gt_ids] ''' autograd 설명 https://mxnet.apache.org/api/python/docs/tutorials/getting-started/crash-course/3-autograd.html ''' with autograd.record(train_mode=True): xcyc_all_losses = [] wh_all_losses = [] object_all_losses = [] class_all_losses = [] for image_split, gt_boxes_split, gt_ids_split in zip( image, gt_boxes, gt_ids): image_split = gluon.utils.split_and_load(image_split, ctx_list, even_split=False) gt_boxes_split = gluon.utils.split_and_load( gt_boxes_split, ctx_list, even_split=False) gt_ids_split = gluon.utils.split_and_load(gt_ids_split, ctx_list, even_split=False) xcyc_losses = [] wh_losses = [] object_losses = [] class_losses = [] total_loss = [] # gpu N 개를 대비한 코드 (Data Parallelism) for img, gt_box, gt_id in zip(image_split, gt_boxes_split, gt_ids_split): output1, output2, output3, anchor1, anchor2, anchor3, offset1, offset2, offset3, stride1, stride2, stride3 = net( img) xcyc_target, wh_target, objectness, class_target, weights = targetgenerator( [output1, output2, output3], [anchor1, anchor2, anchor3], gt_box, gt_id, (height, width)) xcyc_loss, wh_loss, object_loss, class_loss = loss( output1, output2, output3, xcyc_target, wh_target, objectness, class_target, weights) xcyc_losses.append(xcyc_loss.asscalar()) wh_losses.append(wh_loss.asscalar()) object_losses.append(object_loss.asscalar()) class_losses.append(class_loss.asscalar()) total_loss.append(xcyc_loss + wh_loss + object_loss + class_loss) if AMP: with amp.scale_loss(total_loss, trainer) as scaled_loss: autograd.backward(scaled_loss) else: autograd.backward(total_loss) xcyc_all_losses.append(sum(xcyc_losses)) wh_all_losses.append(sum(wh_losses)) object_all_losses.append(sum(object_losses)) class_all_losses.append(sum(class_losses)) trainer.step(batch_size=td_batch_size, ignore_stale_grad=False) # 비우기 for p in net.collect_params().values(): p.zero_grad() xcyc_loss_sum += sum(xcyc_all_losses) / td_batch_size wh_loss_sum += sum(wh_all_losses) / td_batch_size object_loss_sum += sum(object_all_losses) / td_batch_size class_loss_sum += sum(class_all_losses) / td_batch_size if batch_count % batch_log == 0: logging.info( f'[Epoch {i}][Batch {batch_count}/{train_update_number_per_epoch}],' f'[Speed {td_batch_size / (time.time() - time_stamp):.3f} samples/sec],' f'[Lr = {trainer.learning_rate}]' f'[xcyc loss = {sum(xcyc_all_losses) / td_batch_size:.3f}]' f'[wh loss = {sum(wh_all_losses) / td_batch_size:.3f}]' f'[obj loss = {sum(object_all_losses) / td_batch_size:.3f}]' f'[class loss = {sum(class_all_losses) / td_batch_size:.3f}]' ) time_stamp = time.time() train_xcyc_loss_mean = np.divide(xcyc_loss_sum, train_update_number_per_epoch) train_wh_loss_mean = np.divide(wh_loss_sum, train_update_number_per_epoch) train_object_loss_mean = np.divide(object_loss_sum, train_update_number_per_epoch) train_class_loss_mean = np.divide(class_loss_sum, train_update_number_per_epoch) train_total_loss_mean = train_xcyc_loss_mean + train_wh_loss_mean + train_object_loss_mean + train_class_loss_mean logging.info(f"train xcyc loss : {train_xcyc_loss_mean} / " f"train wh loss : {train_wh_loss_mean} / " f"train object loss : {train_object_loss_mean} / " f"train class loss : {train_class_loss_mean} / " f"train total loss : {train_total_loss_mean}") if i % save_period == 0: weight_epoch_path = os.path.join(weight_path, str(i)) if not os.path.exists(weight_epoch_path): os.makedirs(weight_epoch_path) # optimizer weight 저장하기 try: trainer.save_states( os.path.join(weight_path, f'{model}-{i:04d}.opt')) except Exception as E: logging.error(f"optimizer weight export 예외 발생 : {E}") else: logging.info("optimizer weight export 성공") ''' Hybrid models can be serialized as JSON files using the export function Export HybridBlock to json format that can be loaded by SymbolBlock.imports, mxnet.mod.Module or the C++ interface. When there are only one input, it will have name data. When there Are more than one inputs, they will be named as data0, data1, etc. ''' if GPU_COUNT >= 1: context = mx.gpu(0) else: context = mx.cpu(0) ''' mxnet1.6.0 버전 에서 AMP 사용시 위에 미리 선언한 prediction을 사용하면 문제가 될 수 있다. -yolo v3, gaussian yolo v3 에서는 문제가 발생한다. mxnet 1.5.x 버전에서는 아래와 같이 새로 선언하지 않아도 정상 동작한다. block들은 함수 인자로 보낼 경우 자기 자신이 보내진다.(복사되는 것이 아님) export_block_for_cplusplus 에서 prediction 이 hybridize 되면서 미리 선언한 prediction도 hybridize화 되면서 symbol 형태가 된다. 이런 현상을 보면 아래와같이 다시 선언해 주는게 맞는 것 같다. ''' auxnet = Prediction(from_sigmoid=False, num_classes=num_classes, nms_thresh=nms_thresh, nms_topk=nms_topk, except_class_thresh=except_class_thresh, multiperclass=multiperclass) postnet = PostNet(net=net, auxnet=auxnet) try: net.export(os.path.join(weight_path, f"{model}"), epoch=i, remove_amp_cast=True) # for onnx net.save_parameters(os.path.join(weight_path, f"{i}.params")) # onnx 추출용 # network inference, decoder, nms까지 처리됨 - mxnet c++에서 편리함 / onnx로는 추출 못함. export_block_for_cplusplus( path=os.path.join(weight_epoch_path, f"{model}_prepost"), block=postnet, data_shape=tuple(input_size) + tuple((3, )), epoch=i, preprocess= True, # c++ 에서 inference시 opencv에서 읽은 이미지 그대로 넣으면 됨 layout='HWC', ctx=context, remove_amp_cast=True) except Exception as E: logging.error(f"json, param model export 예외 발생 : {E}") else: logging.info("json, param model export 성공") net.collect_params().reset_ctx(ctx) if i % eval_period == 0 and valid_list: xcyc_loss_sum = 0 wh_loss_sum = 0 object_loss_sum = 0 class_loss_sum = 0 # loss 구하기 for image, label, _ in valid_dataloader: vd_batch_size, _, height, width = image.shape image = gluon.utils.split_and_load(image, ctx_list, even_split=False) gt_boxes = gluon.utils.split_and_load(label[:, :, :4], ctx_list, even_split=False) gt_ids = gluon.utils.split_and_load(label[:, :, 4:5], ctx_list, even_split=False) xcyc_losses = [] wh_losses = [] object_losses = [] class_losses = [] total_loss = [] # gpu N 개를 대비한 코드 (Data Parallelism) for img, gt_box, gt_id in zip(image, gt_boxes, gt_ids): output1, output2, output3, anchor1, anchor2, anchor3, offset1, offset2, offset3, stride1, stride2, stride3 = net( img) xcyc_target, wh_target, objectness, class_target, weights = targetgenerator( [output1, output2, output3], [anchor1, anchor2, anchor3], gt_box, gt_id, (height, width)) id, score, bbox = prediction(output1, output2, output3, anchor1, anchor2, anchor3, offset1, offset2, offset3, stride1, stride2, stride3) precision_recall.update(pred_bboxes=bbox, pred_labels=id, pred_scores=score, gt_boxes=gt_box, gt_labels=gt_id) xcyc_loss, wh_loss, object_loss, class_loss = loss( output1, output2, output3, xcyc_target, wh_target, objectness, class_target, weights) xcyc_losses.append(xcyc_loss.asscalar()) wh_losses.append(wh_loss.asscalar()) object_losses.append(object_loss.asscalar()) class_losses.append(class_loss.asscalar()) total_loss.append(xcyc_losses + wh_losses + object_losses + class_losses) xcyc_loss_sum += sum(xcyc_losses) / vd_batch_size wh_loss_sum += sum(wh_losses) / vd_batch_size object_loss_sum += sum(object_losses) / vd_batch_size class_loss_sum += sum(class_losses) / vd_batch_size valid_xcyc_loss_mean = np.divide(xcyc_loss_sum, valid_update_number_per_epoch) valid_wh_loss_mean = np.divide(wh_loss_sum, valid_update_number_per_epoch) valid_object_loss_mean = np.divide(object_loss_sum, valid_update_number_per_epoch) valid_class_loss_mean = np.divide(class_loss_sum, valid_update_number_per_epoch) valid_total_loss_mean = valid_xcyc_loss_mean + valid_wh_loss_mean + valid_object_loss_mean + valid_class_loss_mean logging.info(f"valid xcyc loss : {valid_xcyc_loss_mean} / " f"valid wh loss : {valid_wh_loss_mean} / " f"valid object loss : {valid_object_loss_mean} / " f"valid class loss : {valid_class_loss_mean} / " f"valid total loss : {valid_total_loss_mean}") AP_appender = [] round_position = 2 class_name, precision, recall, true_positive, false_positive, threshold = precision_recall.get_PR_list( ) for j, c, p, r in zip(range(len(recall)), class_name, precision, recall): name, AP = precision_recall.get_AP(c, p, r) logging.info( f"class {j}'s {name} AP : {round(AP * 100, round_position)}%" ) AP_appender.append(AP) AP_appender = np.nan_to_num(AP_appender) mAP_result = np.mean(AP_appender) logging.info(f"mAP : {round(mAP_result * 100, round_position)}%") precision_recall.get_PR_curve(name=class_name, precision=precision, recall=recall, threshold=threshold, AP=AP_appender, mAP=mAP_result, folder_name=valid_graph_path, epoch=i, auto_open=valid_html_auto_open) precision_recall.reset() if tensorboard: # gpu N 개를 대비한 코드 (Data Parallelism) dataloader_iter = iter(valid_dataloader) image, label, _ = next(dataloader_iter) image = gluon.utils.split_and_load(image, ctx_list, even_split=False) label = gluon.utils.split_and_load(label, ctx_list, even_split=False) ground_truth_colors = {} for k in range(num_classes): ground_truth_colors[k] = (0, 1, 0) batch_image = [] for img, lb in zip(image, label): gt_boxes = lb[:, :, :4] gt_ids = lb[:, :, 4:5] output1, output2, output3, anchor1, anchor2, anchor3, offset1, offset2, offset3, stride1, stride2, stride3 = net( img) ids, scores, bboxes = prediction(output1, output2, output3, anchor1, anchor2, anchor3, offset1, offset2, offset3, stride1, stride2, stride3) for ig, gt_id, gt_box, id, score, bbox in zip( img, gt_ids, gt_boxes, ids, scores, bboxes): ig = ig.transpose((1, 2, 0)) * mx.nd.array( std, ctx=ig.context) + mx.nd.array(mean, ctx=ig.context) ig = (ig * 255).clip(0, 255) ig = ig.astype(np.uint8) # ground truth box 그리기 ground_truth = plot_bbox( ig, gt_box, scores=None, labels=gt_id, thresh=None, reverse_rgb=False, class_names=valid_dataset.classes, absolute_coordinates=True, colors=ground_truth_colors) # prediction box 그리기 prediction_box = plot_bbox( ground_truth, bbox, scores=score, labels=id, thresh=plot_class_thresh, reverse_rgb=False, class_names=valid_dataset.classes, absolute_coordinates=True) # Tensorboard에 그리기 위해 (height, width, channel) -> (channel, height, width) 를한다. prediction_box = np.transpose(prediction_box, axes=(2, 0, 1)) batch_image.append( prediction_box) # (batch, channel, height, width) summary.add_image(tag="valid_result", image=np.array(batch_image), global_step=i) summary.add_scalar(tag="xy_loss", value={ "train_xcyc_loss": train_xcyc_loss_mean, "valid_xcyc_loss": valid_xcyc_loss_mean }, global_step=i) summary.add_scalar(tag="wh_loss", value={ "train_wh_loss": train_wh_loss_mean, "valid_wh_loss": valid_wh_loss_mean }, global_step=i) summary.add_scalar(tag="object_loss", value={ "train_object_loss": train_object_loss_mean, "valid_object_loss": valid_object_loss_mean }, global_step=i) summary.add_scalar(tag="class_loss", value={ "train_class_loss": train_class_loss_mean, "valid_class_loss": valid_class_loss_mean }, global_step=i) summary.add_scalar(tag="total_loss", value={ "train_total_loss": train_total_loss_mean, "valid_total_loss": valid_total_loss_mean }, global_step=i) for p in net.collect_params().values(): summary.add_histogram(tag=p.name, values=p.data(ctx=ctx_list[0]), global_step=i, bins='default') end_time = time.time() learning_time = end_time - start_time logging.info(f"learning time : 약, {learning_time / 3600:0.2f}H") logging.info("optimization completed") if using_mlflow: ml.log_metric("learning time", round(learning_time / 3600, 2))
def run(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], load_name="608_608_ADAM_PDark_53", load_period=10, GPU_COUNT=0, test_weight_path="weights", test_dataset_path="Dataset/test", test_save_path="result", test_graph_path="test_Graph", num_workers=4, show_flag=True, save_flag=True, ignore_threshold=0.5, dynamic=False, multiperclass=True, nms_thresh=0.5, nms_topk=500, iou_thresh=0.5, except_class_thresh=0.05, plot_class_thresh=0.5): if GPU_COUNT <= 0: ctx = mx.cpu(0) elif GPU_COUNT > 0: ctx = mx.gpu(0) # 운영체제 확인 if platform.system() == "Linux": logging.info(f"{platform.system()} OS") elif platform.system() == "Windows": logging.info(f"{platform.system()} OS") else: logging.info(f"{platform.system()} OS") if GPU_COUNT > 0: free_memory, total_memory = mx.context.gpu_memory_info(0) free_memory = round(free_memory / (1024 * 1024 * 1024), 2) total_memory = round(total_memory / (1024 * 1024 * 1024), 2) logging.info( f'Running on {ctx} / free memory : {free_memory}GB / total memory {total_memory}GB' ) else: logging.info(f'Running on {ctx}') logging.info(f"test {load_name}") netheight = int(load_name.split("_")[0]) netwidth = int(load_name.split("_")[1]) if not isinstance(netheight, int) and not isinstance(netwidth, int): logging.info("height is not int") logging.info("width is not int") raise ValueError else: logging.info(f"network input size : {(netheight, netwidth)}") try: test_dataloader, test_dataset = testdataloader(path=test_dataset_path, input_size=(netheight, netwidth), num_workers=num_workers, mean=mean, std=std) except Exception: logging.info("The dataset does not exist") exit(0) weight_path = os.path.join(test_weight_path, load_name) sym = os.path.join(weight_path, f'{load_name}-symbol.json') params = os.path.join(weight_path, f'{load_name}-{load_period:04d}.params') test_update_number_per_epoch = len(test_dataloader) if test_update_number_per_epoch < 1: logging.warning(" test batch size가 데이터 수보다 큼 ") exit(0) num_classes = test_dataset.num_class # 클래스 수 name_classes = test_dataset.classes logging.info("symbol model test") try: net = gluon.SymbolBlock.imports(sym, ['data'], params, ctx=ctx) except Exception: # DEBUG, INFO, WARNING, ERROR, CRITICAL 의 5가지 등급 logging.info("loading symbol weights 실패") exit(0) else: logging.info("loading symbol weights 성공") net.hybridize(active=True, static_alloc=True, static_shape=True) targetgenerator = TargetGenerator(ignore_threshold=ignore_threshold, dynamic=dynamic, from_sigmoid=False) loss = Yolov3Loss(sparse_label=True, from_sigmoid=False, batch_axis=None, num_classes=num_classes, reduction="sum", exclude=False) prediction = Prediction(from_sigmoid=False, num_classes=num_classes, nms_thresh=nms_thresh, nms_topk=nms_topk, except_class_thresh=except_class_thresh, multiperclass=multiperclass) precision_recall = Voc_2007_AP(iou_thresh=iou_thresh, class_names=name_classes) ground_truth_colors = {} for i in range(num_classes): ground_truth_colors[i] = (0, 0, 1) object_loss_sum = 0 xcyc_loss_sum = 0 wh_loss_sum = 0 class_loss_sum = 0 for image, label, name, origin_image, origin_box in tqdm(test_dataloader): _, height, width, _ = origin_image.shape logging.info(f"real input size : {(height, width)}") origin_image = origin_image.asnumpy()[0] origin_box = origin_box.asnumpy()[0] image = image.as_in_context(ctx) label = label.as_in_context(ctx) gt_boxes = label[:, :, :4] gt_ids = label[:, :, 4:5] output1, output2, output3, \ anchor1, anchor2, anchor3, \ offset1, offset2, offset3, \ stride1, stride2, stride3 = net(image) ids, scores, bboxes = prediction(output1, output2, output3, anchor1, anchor2, anchor3, offset1, offset2, offset3, stride1, stride2, stride3) precision_recall.update(pred_bboxes=bboxes, pred_labels=ids, pred_scores=scores, gt_boxes=gt_boxes, gt_labels=gt_ids) bbox = box_resize(bboxes[0], (netwidth, netheight), (width, height)) ground_truth = plot_bbox(origin_image, origin_box[:, :4], scores=None, labels=origin_box[:, 4:5], thresh=None, reverse_rgb=True, class_names=test_dataset.classes, absolute_coordinates=True, colors=ground_truth_colors) plot_bbox(ground_truth, bbox, scores=scores[0], labels=ids[0], thresh=plot_class_thresh, reverse_rgb=False, class_names=test_dataset.classes, absolute_coordinates=True, image_show=show_flag, image_save=save_flag, image_save_path=test_save_path, image_name=name[0]) xcyc_target, wh_target, objectness, class_target, weights = targetgenerator( [output1, output2, output3], [anchor1, anchor2, anchor3], gt_boxes, gt_ids, (netheight, netwidth)) xcyc_loss, wh_loss, object_loss, class_loss = loss( output1, output2, output3, xcyc_target, wh_target, objectness, class_target, weights) xcyc_loss_sum += xcyc_loss.asscalar() wh_loss_sum += wh_loss.asscalar() object_loss_sum += object_loss.asscalar() class_loss_sum += class_loss.asscalar() train_xcyc_loss_mean = np.divide(xcyc_loss_sum, test_update_number_per_epoch) train_wh_loss_mean = np.divide(wh_loss_sum, test_update_number_per_epoch) train_object_loss_mean = np.divide(object_loss_sum, test_update_number_per_epoch) train_class_loss_mean = np.divide(class_loss_sum, test_update_number_per_epoch) train_total_loss_mean = train_xcyc_loss_mean + train_wh_loss_mean + train_object_loss_mean + train_class_loss_mean logging.info(f"train xcyc loss : {train_xcyc_loss_mean} / " f"train wh loss : {train_wh_loss_mean} / " f"train object loss : {train_object_loss_mean} / " f"train class loss : {train_class_loss_mean} / " f"train total loss : {train_total_loss_mean}") AP_appender = [] round_position = 2 class_name, precision, recall, true_positive, false_positive, threshold = precision_recall.get_PR_list( ) for j, c, p, r in zip(range(len(recall)), class_name, precision, recall): name, AP = precision_recall.get_AP(c, p, r) logging.info( f"class {j}'s {name} AP : {round(AP * 100, round_position)}%") AP_appender.append(AP) mAP_result = np.mean(AP_appender) logging.info(f"mAP : {round(mAP_result * 100, round_position)}%") precision_recall.get_PR_curve(name=class_name, precision=precision, recall=recall, threshold=threshold, AP=AP_appender, mAP=mAP_result, folder_name=test_graph_path)
def run(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], load_name="480_640_ADAM_PCENTER_RES18", load_period=10, GPU_COUNT=0, test_weight_path="weights", test_dataset_path="Dataset/test", test_save_path="result", test_graph_path="test_Graph", test_html_auto_open=False, lambda_off=1, lambda_size=0.1, num_workers=4, show_flag=True, save_flag=True, topk=100, iou_thresh=0.5, nms=False, except_class_thresh=0.01, nms_thresh=0.5, plot_class_thresh=0.5): if GPU_COUNT <= 0: ctx = mx.cpu(0) elif GPU_COUNT > 0: ctx = mx.gpu(0) # 운영체제 확인 if platform.system() == "Linux": logging.info(f"{platform.system()} OS") elif platform.system() == "Windows": logging.info(f"{platform.system()} OS") else: logging.info(f"{platform.system()} OS") if GPU_COUNT > 0: free_memory, total_memory = mx.context.gpu_memory_info(0) free_memory = round(free_memory / (1024 * 1024 * 1024), 2) total_memory = round(total_memory / (1024 * 1024 * 1024), 2) logging.info( f'Running on {ctx} / free memory : {free_memory}GB / total memory {total_memory}GB' ) else: logging.info(f'Running on {ctx}') logging.info(f"test {load_name}") scale_factor = 4 # 고정 logging.info(f"scale factor {scale_factor}") netheight = int(load_name.split("_")[0]) netwidth = int(load_name.split("_")[1]) if not isinstance(netheight, int) and not isinstance(netwidth, int): logging.info("height is not int") logging.info("width is not int") raise ValueError else: logging.info(f"network input size : {(netheight, netwidth)}") try: test_dataloader, test_dataset = testdataloader( path=test_dataset_path, input_size=(netheight, netwidth), num_workers=num_workers, mean=mean, std=std, scale_factor=scale_factor) except Exception: logging.info("The dataset does not exist") exit(0) weight_path = os.path.join(test_weight_path, load_name) sym = os.path.join(weight_path, f'{load_name}-symbol.json') params = os.path.join(weight_path, f'{load_name}-{load_period:04d}.params') test_update_number_per_epoch = len(test_dataloader) if test_update_number_per_epoch < 1: logging.warning(" test batch size가 데이터 수보다 큼 ") exit(0) num_classes = test_dataset.num_class # 클래스 수 name_classes = test_dataset.classes logging.info("symbol model test") try: net = gluon.SymbolBlock.imports(sym, ['data'], params, ctx=ctx) except Exception: # DEBUG, INFO, WARNING, ERROR, CRITICAL 의 5가지 등급 logging.info("loading symbol weights 실패") exit(0) else: logging.info("loading symbol weights 성공") net.hybridize(active=True, static_alloc=True, static_shape=True) heatmapfocalloss = HeatmapFocalLoss(from_sigmoid=True, alpha=2, beta=4) normedl1loss = NormedL1Loss() targetgenerator = TargetGenerator(num_classes=num_classes) prediction = Prediction(topk=topk, scale=scale_factor, nms=nms, except_class_thresh=except_class_thresh, nms_thresh=nms_thresh) precision_recall = Voc_2007_AP(iou_thresh=iou_thresh, class_names=name_classes) ground_truth_colors = {} for i in range(num_classes): ground_truth_colors[i] = (0, 0, 1) heatmap_loss_sum = 0 offset_loss_sum = 0 wh_loss_sum = 0 for image, label, name, origin_image, origin_box in tqdm(test_dataloader): _, height, width, _ = origin_image.shape logging.info(f"real input size : {(height, width)}") origin_image = origin_image.asnumpy()[0] origin_box = origin_box.asnumpy()[0] image = image.as_in_context(ctx) label = label.as_in_context(ctx) gt_boxes = label[:, :, :4] gt_ids = label[:, :, 4:5] heatmap_pred, offset_pred, wh_pred = net(image) ids, scores, bboxes = prediction(heatmap_pred, offset_pred, wh_pred) precision_recall.update(pred_bboxes=bboxes, pred_labels=ids, pred_scores=scores, gt_boxes=gt_boxes * scale_factor, gt_labels=gt_ids) heatmap = mx.nd.multiply(heatmap_pred[0], 255.0) # 0 ~ 255 범위로 바꾸기 heatmap = mx.nd.max(heatmap, axis=0, keepdims=True) # channel 축으로 가장 큰것 뽑기 heatmap = mx.nd.transpose(heatmap, axes=(1, 2, 0)) # (height, width, channel=1) heatmap = mx.nd.repeat(heatmap, repeats=3, axis=-1) # (height, width, channel=3) heatmap = heatmap.asnumpy() # mxnet.ndarray -> numpy.ndarray heatmap = cv2.resize(heatmap, dsize=(width, height)) # 사이즈 원복 heatmap = heatmap.astype("uint8") heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET) # heatmap, image add하기 bbox = box_resize(bboxes[0], (netwidth, netheight), (width, height)) ground_truth = plot_bbox(origin_image, origin_box[:, :4], scores=None, labels=origin_box[:, 4:5], thresh=None, reverse_rgb=True, class_names=test_dataset.classes, absolute_coordinates=True, colors=ground_truth_colors) plot_bbox(ground_truth, bbox, scores=scores[0], labels=ids[0], thresh=plot_class_thresh, reverse_rgb=False, class_names=test_dataset.classes, absolute_coordinates=True, image_show=show_flag, image_save=save_flag, image_save_path=test_save_path, image_name=name[0], heatmap=heatmap) heatmap_target, offset_target, wh_target, mask_target = targetgenerator( gt_boxes, gt_ids, netwidth // scale_factor, netheight // scale_factor, image.context) heatmap_loss = heatmapfocalloss(heatmap_pred, heatmap_target) offset_loss = normedl1loss(offset_pred, offset_target, mask_target) * lambda_off wh_loss = normedl1loss(wh_pred, wh_target, mask_target) * lambda_size heatmap_loss_sum += heatmap_loss.asscalar() offset_loss_sum += offset_loss.asscalar() wh_loss_sum += wh_loss.asscalar() # epoch 당 평균 loss test_heatmap_loss_mean = np.divide(heatmap_loss_sum, test_update_number_per_epoch) test_offset_loss_mean = np.divide(offset_loss_sum, test_update_number_per_epoch) test_wh_loss_mean = np.divide(wh_loss_sum, test_update_number_per_epoch) test_total_loss_mean = test_heatmap_loss_mean + test_offset_loss_mean + test_wh_loss_mean logging.info( f"test heatmap loss : {test_heatmap_loss_mean} / test offset loss : {test_offset_loss_mean} / test wh loss : {test_wh_loss_mean} / test total loss : {test_total_loss_mean}" ) AP_appender = [] round_position = 2 class_name, precision, recall, true_positive, false_positive, threshold = precision_recall.get_PR_list( ) for j, c, p, r in zip(range(len(recall)), class_name, precision, recall): name, AP = precision_recall.get_AP(c, p, r) logging.info( f"class {j}'s {name} AP : {round(AP * 100, round_position)}%") AP_appender.append(AP) AP_appender = np.nan_to_num(AP_appender) mAP_result = np.mean(AP_appender) logging.info(f"mAP : {round(mAP_result * 100, round_position)}%") precision_recall.get_PR_curve(name=class_name, precision=precision, recall=recall, threshold=threshold, AP=AP_appender, mAP=mAP_result, folder_name=test_graph_path, epoch=load_period, auto_open=test_html_auto_open)