def init_model(model_path, num_class, model_name): if 'lite' in model_name: model = lite_faster_rcnn(num_class) elif 'pva' in model_name: model = pva_net(num_class) elif 'resnet' in model_name: model = resnet(num_class, num_layers=101) #model = resnet(num_class, num_layers=101) checkpoint = torch.load(model_path) model.create_architecture() model.load_state_dict(checkpoint['state_dict']) model = model.cuda() model.eval() return model
def main(): args = parse_args() model_path = args.model_path save_path = args.save_dir model = lite_faster_rcnn(6) model.create_architecture() pytorch_net = model.eval() # load weights print('Finding trained model weights...') print('Loading weights from %s ...' % model_path) checkpoint = torch.load(model_path) trained_weights = checkpoint['state_dict'] # pytorch_net.load_state_dict(trained_weights) print('Weights load success') # go through pytorch net print('Going through pytorch net weights...') new_weights = OrderedDict() inner_product_flag = False for name, params in trained_weights.items(): if len(params.size()) == 4: _, _ = merge(params, name, 'Convolution') prev_layer = name elif len(params.size()) == 1 and not inner_product_flag: w, b = merge(params, name, 'BatchNorm') if w is not None: new_weights[prev_layer] = w new_weights[prev_layer.replace('weight', 'bias')] = b else: # inner product layer # if meet inner product layer, # the next bias weight can be misclassified as 'BatchNorm' layer as len(params.size()) == 1 new_weights[name] = params inner_product_flag = True # align names in new_weights with pytorch model # after move BatchNorm layer in pytorch model, # the layer names between old model and new model will mis-align print('Aligning weight names...') pytorch_net_key_list = list(trained_weights.keys()) new_weights_key_list = list(new_weights.keys()) print(len(pytorch_net_key_list)) print(len(new_weights_key_list)) #assert len(pytorch_net_key_list) == len(new_weights_key_list) for index in range(len(pytorch_net_key_list)): new_weights[pytorch_net_key_list[index]] = new_weights.pop( new_weights_key_list[index]) name = os.path.basename(model_path) torch.save(new_weights, save_path + '/' + name.replace('.ckpt', '_merged.model'))
def main(): args = parse_args() base_cfg = Config.fromfile(args.base_config) det_cfg = Config.fromfile(args.config) cfg = merge_config(base_cfg.model_cfg, det_cfg.model_cfg) gpus = ",".join('%s' % id for id in cfg.TRAIN.gpus) os.environ['CUDA_VISIBLE_DEVICES'] = gpus gpu_nums = len(gpus.split(',')) if cfg.TRAIN.dataset == 'coco': if args.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(args.coco_path, set_name='train2017', transform=transforms.Compose([Augmenter()])) dataset_val = CocoDataset(args.coco_path, set_name='val2017', transform=transforms.Compose([Augmenter()])) elif cfg.TRAIN.dataset == 'csv': if args.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if args.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=args.csv_train, class_list=args.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) if args.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=args.csv_val, class_list=args.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) elif cfg.TRAIN.dataset == 'xml': if cfg.TRAIN.train_path is None: raise ValueError( 'Must provide --voc_train when training on PASCAL VOC,') dataset_train = XML_VOCDataset(cfg, img_path=cfg.TRAIN.train_path, xml_path=cfg.TRAIN.train_path, class_list=cfg.class_list, transform=transforms.Compose([Augmenter()])) if cfg.TRAIN.val_path is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = XML_VOCDataset(cfg, img_path=cfg.TRAIN.val_path, xml_path=cfg.TRAIN.val_path, class_list=cfg.class_list, transform=transforms.Compose([])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler( dataset_train, batch_size=cfg.TRAIN.batch_size_per_gpu * gpu_nums, drop_last=False) dataloader_train = DataLoader( dataset_train, num_workers=cfg.TRAIN.num_works, collate_fn=collater, batch_sampler=sampler, pin_memory=True) if dataset_val is not None: sampler_val = AspectRatioBasedSampler( dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader( dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val, pin_memory=True) print('Num training images: {}'.format(len(dataset_train))) print("Training models...") classes = len(cfg.class_list) pretrained = True if 'lite' in cfg.MODEL.BACKBONE: pretrained = False model = lite_faster_rcnn(cfg, classes, pretrained=pretrained) if 'pva' in cfg.MODEL.BACKBONE: model = pva_net(classes, pretrained=False) # if 'resnet' in cfg.MODEL.BACKBONE: # model = resnet(classes, num_layers=101, pretrained=pretrained) # if 'resnet_pva' in cfg.MODEL.BACKBONE: # model = resnet_pva(classes, pretrained=True) # pretrained = True model.create_architecture(cfg) start_epoch = 1 if cfg.TRAIN.resume_model is not None: # added by Henson checkpoint = torch.load(cfg.TRAIN.resume_model) model_dict = model.state_dict() pretrained_dict = {k: v for k, v in checkpoint['state_dict'].items()} model_dict.update(pretrained_dict) model.load_state_dict(model_dict) start_epoch = int(checkpoint['epoch']) + 1 print("loading pretrained model: ", cfg.TRAIN.resume_model) if gpu_nums > 1: model = nn.DataParallel(model) if torch.cuda.is_available(): # cudnn.benchmark = True model = model.cuda() model.train() # if pretrained and not 'lite' in cfg.MODEL.BACKBONE: # model.module.freeze_bn() learning_rate_base = cfg.TRAIN.LEARNING_RATE optimizer = optim.Adam(model.parameters(), lr=learning_rate_base) scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, patience=40, verbose=True, mode="max") loss_hist = collections.deque(maxlen=1024) # min_loss = 1.0 min_avg_loss_hist = 110000.0 for epoch_num in range(start_epoch, cfg.TRAIN.epochs): print("\n=> learning_rate: ", learning_rate_base, " min_avg_loss_hist: ", min_avg_loss_hist) # epoch_loss = [] loss_hist.clear() # loss, rpn_loss_cls, rpn_loss_bbox, loss_cls, loss_bbox = 0, 0, 0, 0, 0 for iter_num, data in enumerate(dataloader_train): # print('iter num is: ', iter_num) # print("\n", data['im_info']) # continue try: optimizer.zero_grad() _, _, _, rpn_loss_cls, rpn_loss_bbox, loss_cls, loss_bbox = model( data['img'].cuda(), data['im_info'].cuda(), data['annot'].cuda()) # rpn_loss_bbox *= 15 # rpn_loss_cls *= 20 # loss_bbox *= 15 # loss_cls *= 20 loss = (rpn_loss_cls.mean() + 1.0 * rpn_loss_bbox.mean() + loss_cls.mean() + 1.0 * loss_bbox.mean()) if bool(loss == 0): continue loss.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), 10.0) optimizer.step() loss_hist.append(float(loss)) # epoch_loss.append(float(loss)) if gpu_nums > 1: rpn_loss_cls = rpn_loss_cls.mean().item() rpn_loss_bbox = 1.0 * rpn_loss_bbox.mean().item() loss_cls = loss_cls.mean().item() loss_bbox = 1.0 * loss_bbox.mean().item() loss = loss.mean().item() else: rpn_loss_cls = rpn_loss_cls.item() rpn_loss_bbox = 1.0 * rpn_loss_bbox.item() loss_cls = loss_cls.item() loss_bbox = 1.0 * loss_bbox.item() loss = loss.item() if iter_num % 20 == 0: print( 'Epoch: {} | Iteration: {}/{} | loss: {:1.5f} | rpn bbox loss: {:1.5f} | rpn cls loss: {:1.5f} | bbox loss: {:1.5f} | cls loss: {:1.5f} | Running loss: {:1.5f}'.format( epoch_num, iter_num, len(dataset_train)//(cfg.TRAIN.batch_size_per_gpu * gpu_nums), float(loss), float(rpn_loss_bbox), float(rpn_loss_cls), float(loss_bbox), float(loss_cls), np.mean(loss_hist))) del rpn_loss_cls del rpn_loss_bbox del loss_bbox del loss_cls except Exception as e: print('Epoch: {} | Iteration: {}/{} | Exception: {}'.format(epoch_num, iter_num, len( dataset_train)//(cfg.TRAIN.batch_size_per_gpu * gpu_nums), e)) continue # if cfg.TRAIN.dataset == 'coco': # # print('Evaluating dataset') # # coco_eval.evaluate_coco(dataset_val, model) # # elif cfg.TRAIN.dataset == 'csv' and args.csv_val is not None: # # print('Evaluating dataset') # # mAP = csv_eval.evaluate(dataset_val, model) # elif cfg.TRAIN.dataset == 'xml' and cfg.TRAIN.val_path is not None: # # print('Evaluating dataset') # # mAP = voc_eval.evaluate(dataset_val, model) # # try: # is_best_map = mAP[0][0] > best_map # best_map = max(mAP[0][0], best_map) # except: # pass # if is_best_map: # print("Get better map: ", best_map) # torch.save({ # 'epoch': epoch_num, # 'save_dir': args.save_dir, # 'state_dict': state_dict}, # os.path.join(args.save_dir, args.model_name + 'best_.ckpt')) # else: # print("Current map: ", best_map) # scheduler.step(best_map) if epoch_num % cfg.save_model_interval == 0 and epoch_num > 0: if gpu_nums > 1: state_dict = model.module.state_dict() else: state_dict = model.state_dict() learning_rate_base /= 10 optimizer = optim.Adam(model.parameters(), lr=learning_rate_base) save_model_path = os.path.join( cfg.TRAIN.save_dir, cfg.TRAIN.model_name + '%04d.ckpt' % epoch_num) print("save model: ", save_model_path) torch.save({ 'epoch': epoch_num, 'save_dir': cfg.TRAIN.save_dir, 'state_dict': state_dict}, save_model_path)
def test(args, model_epoch=None): base_cfg = Config.fromfile(args.base_config) det_cfg = Config.fromfile(args.config) cfg = merge_config(base_cfg.model_cfg, det_cfg.model_cfg) if model_epoch is not None: cfg.model = os.path.join(cfg.TRAIN.save_dir, cfg.TRAIN.model_name + model_epoch) print(cfg.model) if not os.path.exists(cfg.model): return 0, 0, 0, 0, 0 gpus = ",".join('%s' % id for id in cfg.TEST.gpus) os.environ['CUDA_VISIBLE_DEVICES'] = gpus thresh = cfg.TEST.thresh # 0.8 nms_thresh = cfg.TEST.nms_thresh # 0.35 iou_thresh = cfg.TEST.iou_thresh classes = len(cfg.class_list) if 'lite' in cfg.MODEL.BACKBONE: model = lite_faster_rcnn(cfg, classes) elif 'pva' in cfg.MODEL.BACKBONE: model = pva_net(classes) elif 'resnet' in cfg.MODEL.BACKBONE: model = resnet(classes, num_layers=101) model.create_architecture(cfg) checkpoint = torch.load(cfg.model) model.load_state_dict(checkpoint['state_dict']) if torch.cuda.is_available(): model = model.cuda() print("using gpu...") model.eval() imgs = glob(os.path.join(cfg.TEST.img_path, "*.xml")) # print(cfg.TEST.img_path) batch_size = 1 std = np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS, dtype=np.float32) mean = np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS, dtype=np.float32) std = torch.from_numpy(std).cuda() mean = torch.from_numpy(mean).cuda() total_image_nums = len(imgs) for ix, img_name in enumerate(imgs): img_name = img_name.replace(".xml", ".jpg") # print(ix, img_name) im = cv2.imread(img_name) if im is None: continue import time # start_t = time.clock() data = prepareTestData(cfg.TEST.TEST_SCALE, im, cfg.TEST.SCALE_MULTIPLE_OF, cfg.TEST.MAX_SIZE) start_t = time.time() results = im_detect(data, model, batch_size, std, mean, classes, thresh, nms_thresh) end_t = time.time() # print(ix, "/", total_image_nums, img_name, ' time consume is: ', # end_t - start_t, len(results)) print(ix, "/", total_image_nums, img_name, ' time consume is: ', end_t - start_t) xml_name = img_name.replace(".jpg", ".xml") xml_dicts = xml2dict(cfg.rename_class_list, xml_name, cfg.class_list) det_dicts = det_results2dict(results, cfg.class_list) txt_name = os.path.basename(xml_name).replace('.xml', '.txt') # os.remove("mAP/input/ground-truth/") if not os.path.exists("mAP/input/ground-truth/"): os.makedirs("mAP/input/ground-truth/") # os.remove("mAP/input/detection-results/") if not os.path.exists("mAP/input/detection-results/"): os.makedirs("mAP/input/detection-results/") # GT with open("mAP/input/ground-truth/" + txt_name, 'w') as f: for key in cfg.class_list: if key == '__background__': continue if key == 'FC': break for gt_id, gt_box in enumerate(xml_dicts[key]): f.write(key + " " + str(gt_box[0]) + " " + str(gt_box[1]) + " " + str(gt_box[2]) + " " + str(gt_box[3]) + "\n") f.close() # DET results with open("mAP/input/detection-results/" + txt_name, 'w') as f: for key in cfg.class_list: if key == '__background__': continue if key == 'FC': break for det_id, det_box in enumerate(det_dicts[key]): f.write(key + " " + str(det_box[-1]) + " " + str(det_box[0]) + " " + str(det_box[1]) + " " + str(det_box[2]) + " " + str(det_box[3]) + "\n") f.close()
def main(args, model_epoch=None): base_cfg = Config.fromfile(args.base_config) det_cfg = Config.fromfile(args.config) cfg = merge_config(base_cfg.model_cfg, det_cfg.model_cfg) if model_epoch is not None: cfg.model = os.path.join(cfg.TRAIN.save_dir, cfg.TRAIN.model_name + model_epoch) print(cfg.model) if not os.path.exists(cfg.model): return 0, 0, 0, 0, 0 gpus = ",".join('%s' % id for id in cfg.TEST.gpus) os.environ['CUDA_VISIBLE_DEVICES'] = gpus thresh = cfg.TEST.thresh # 0.8 nms_thresh = cfg.TEST.nms_thresh # 0.35 iou_thresh = cfg.TEST.iou_thresh classes = len(cfg.class_list) if 'lite' in cfg.MODEL.BACKBONE: model = lite_faster_rcnn(cfg, classes) elif 'pva' in cfg.MODEL.BACKBONE: model = pva_net(classes) elif 'resnet' in cfg.MODEL.BACKBONE: model = resnet(classes, num_layers=101) model.create_architecture(cfg) checkpoint = torch.load(cfg.model) model.load_state_dict(checkpoint['state_dict']) if torch.cuda.is_available(): model = model.cuda() print("using gpu...") model.eval() imgs = glob(os.path.join(cfg.TEST.img_path, "*.xml")) # print(cfg.TEST.img_path) batch_size = 1 std = np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS, dtype=np.float32) mean = np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS, dtype=np.float32) std = torch.from_numpy(std).cuda() mean = torch.from_numpy(mean).cuda() all_detections = [] gt_box_num_datasets = dict.fromkeys(cfg.class_list, 0) det_box_num_datasets = dict.fromkeys(cfg.class_list, 0) undetected_sample = [] undetected_sample_size = defaultdict(list) precsion_dicts = det_init_dict(cfg.class_list, 0) recall_dicts = det_init_dict(cfg.class_list, 0) TP = det_init_dict(cfg.class_list, 0) FP = det_init_dict(cfg.class_list, 0) FN = det_init_dict(cfg.class_list, 0) total_image_nums = len(imgs) for ix, img_name in enumerate(tqdm(imgs)): img_name = img_name.replace(".xml", ".jpg") # print(ix, img_name) im = cv2.imread(img_name) if im is None: continue import time # start_t = time.clock() data = prepareTestData(cfg.TEST.TEST_SCALE, im, cfg.TEST.SCALE_MULTIPLE_OF, cfg.TEST.MAX_SIZE) start_t = time.time() results = im_detect(data, model, batch_size, std, mean, cfg, nms_thresh) end_t = time.time() # print(ix, "/", total_image_nums, img_name, ' time consume is: ', # end_t - start_t) if 1: # len(results) > 0: xml_name = img_name.replace(".jpg", ".xml") xml_dicts = xml2dict(cfg.rename_class_list, xml_name, cfg.class_list) det_dicts = det_results2dict(results, cfg.class_list) small_object_size = cfg.TEST.small_object_size for key in cfg.class_list: if key == '__background__': continue ignore_gt_box_num_per_cls = dict.fromkeys(cfg.class_list, 0) ignore_det_box_num_per_cls = dict.fromkeys(cfg.class_list, 0) is_match_gt = np.zeros(len(xml_dicts[key])) is_match_det = np.zeros(len(det_dicts[key])) for gt_id, gt_box in enumerate(xml_dicts[key]): gt_s0 = gt_box[3] - gt_box[1] gt_s1 = gt_box[2] - gt_box[0] if gt_s0 < small_object_size or gt_s1 < small_object_size: ignore_gt_box_num_per_cls[key] += 1.0 is_match_gt[gt_id] = -1.0 gt_box_num = len( xml_dicts[key]) - ignore_gt_box_num_per_cls[key] gt_box_num_datasets[key] += gt_box_num # print(xml_name, key, gt_box_num, gt_box_num_datasets[key]) for det_id, det_box in enumerate(det_dicts[key]): det_s0 = det_box[3] - det_box[1] det_s1 = det_box[2] - det_box[0] if det_s0 < small_object_size or det_s1 < small_object_size: ignore_det_box_num_per_cls[key] += 1.0 is_match_det[det_id] = -1.0 continue max_iou, max_iou_id = 0, 0 for gt_id, gt_box in enumerate(xml_dicts[key]): if abs(is_match_gt[gt_id]) > 0: continue iou = calc_iou(det_box[0:-1], gt_box[0:-1]) if iou > max_iou: max_iou = iou max_iou_id = gt_id if gt_id == len(xml_dicts[key]) - 1: if max_iou > iou_thresh: is_match_gt[max_iou_id] = 1.0 is_match_det[det_id] = 1.0 det_box_num = len( det_dicts[key]) - ignore_det_box_num_per_cls[key] det_box_num_datasets[key] += det_box_num for object_id in range(len(is_match_gt)): if is_match_gt[object_id] == 0: gt_box = xml_dicts[key][object_id] gt_s0 = gt_box[3] - gt_box[1] gt_s1 = gt_box[2] - gt_box[0] undetected_sample_size[key].append( (os.path.basename(xml_name), object_id, gt_s0, gt_s1)) # print(img_name, len( # xml_dicts[key]) - ignore_gt_box_num_per_cls[key]) # save_dir = cfg.TEST.save_dir # name = os.path.basename(img_name) # shutil.copyfile(img_name, save_dir + # 'undetected_sample/' + name) # shutil.copyfile(img_name.replace(".jpg", ".xml"), save_dir + 'undetected_sample/' + # name.replace(".jpg", ".xml")) tp = np.sum(is_match_det) fp = np.sum(abs(is_match_det - 1)) fn = np.sum(abs(is_match_gt - 1)) TP[key][0] += tp FP[key][0] += fp FN[key][0] += fn recall_cls = np.sum(is_match_gt) recall_dicts[key].append(recall_cls) if gt_box_num > 0 and recall_cls / gt_box_num < 1.0 and not ( xml_name in undetected_sample): undetected_sample.append(xml_name) precision_cls = np.sum(is_match_gt) precsion_dicts[key].append(precision_cls) # if ix > 100: # break avg_precision = 0.0 avg_recall = 0.0 avg_fscore = 0.0 cls_num = 0.0 for key in cfg.defect_class_list: # print("recall: ", np.sum(recall_dicts[key]), # "gt_box_num_datasets: ", gt_box_num_datasets[key], # "det_box_num_datasets: ", det_box_num_datasets[key]) # recall_per_cls, precision_per_cls = 0, 0 # if gt_box_num_datasets[key] > 0: # cls_num = cls_num + 1 # recall_per_cls = np.sum( # recall_dicts[key]) / gt_box_num_datasets[key] # if det_box_num_datasets[key] > 0: # precision_per_cls = np.sum( # precsion_dicts[key]) / det_box_num_datasets[key] recall_per_cls = TP[key][0] / (TP[key][0] + FN[key][0]) precision_per_cls = TP[key][0] / (TP[key][0] + FP[key][0]) fscore_per_cls = 2 * recall_per_cls * precision_per_cls / ( recall_per_cls + precision_per_cls) cls_num = cls_num + 1 if gt_box_num_datasets[key] > 0: print("class_name: ", key, "recall_per_cls: ", recall_per_cls, "precision_per_cls: ", precision_per_cls, "fscore_per_cls: ", fscore_per_cls, "gt_box_num_datasets: ", gt_box_num_datasets[key], "det_box_num_datasets: ", det_box_num_datasets[key]) avg_recall += recall_per_cls avg_precision += precision_per_cls avg_fscore += fscore_per_cls avg_recall = avg_recall / cls_num avg_precision = avg_precision / cls_num avg_fscore = avg_fscore / cls_num undetected_ratio = len(undetected_sample) / len(imgs) print("avg_recall: ", avg_recall) print("avg_precision: ", avg_precision) print("avg_fscore: ", avg_fscore) print("undetected_ratio: ", undetected_ratio) # print("undetected_sample: ", undetected_sample) with open("undetected_sample_size.txt", 'w') as f: for key in undetected_sample_size.keys(): for undetected_gt_size in undetected_sample_size[key]: # print("key: ", key, undetected_gt_size) f.write("key: " + key + " " + str(undetected_gt_size) + "\n") if key in cfg.defect_class_list: save_dir = cfg.TEST.save_dir name = undetected_gt_size[0] src_file = save_dir + name.replace(".xml", ".jpg") dst_file = save_dir + 'undetected_sample/' + \ name.replace(".xml", ".jpg") if not os.path.exists(dst_file) and os.path.exists( src_file): shutil.copyfile(src_file, dst_file) src_file = save_dir + name.replace(".xml", ".bmp") dst_file = save_dir + 'undetected_sample/' + name.replace( ".xml", ".bmp") if not os.path.exists(dst_file) and os.path.exists( src_file): shutil.copyfile(src_file, dst_file) f.close() # with open("undetected_sample.txt", 'w') as f: # for key in undetected_sample_size.keys(): # for undetected_gt_size in undetected_sample_size[key]: # f.write(str(undetected_gt_size[0]) + "\n") # f.close() return avg_recall, avg_precision, avg_fscore, undetected_ratio, 1
def main(): args = parse_args() base_cfg = Config.fromfile(args.base_config) det_cfg = Config.fromfile(args.config) cfg = merge_config(base_cfg.model_cfg, det_cfg.model_cfg) gpus = ",".join('%s' % id for id in cfg.TEST.gpus) os.environ['CUDA_VISIBLE_DEVICES'] = gpus thresh = cfg.TEST.thresh nms_thresh = cfg.TEST.nms_thresh save_dir = cfg.TEST.save_dir classes = len(cfg.class_list) if not os.path.isdir(save_dir): os.makedirs(save_dir) if 'lite' in cfg.MODEL.BACKBONE: model = lite_faster_rcnn(cfg, classes) elif 'pva' in cfg.MODEL.BACKBONE: model = pva_net(classes) elif 'resnet' in cfg.MODEL.BACKBONE: model = resnet(classes, num_layers=101) model.create_architecture(cfg) checkpoint = torch.load(cfg.model) model.load_state_dict(checkpoint['state_dict']) if torch.cuda.is_available(): model = model.cuda() print("using gpu...") model.eval() imgs = glob(os.path.join(cfg.TEST.img_path, "*.xml")) print(cfg.TEST.img_path) batch_size = 1 std = np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS, dtype=np.float32) mean = np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS, dtype=np.float32) std = torch.from_numpy(std).cuda() mean = torch.from_numpy(mean).cuda() color_list = defaultdict(list) for key in cfg.class_list: bgr = (np.random.randint(10, 255), np.random.randint( 10, 255), np.random.randint(10, 255)) color_list[key].append(bgr) total_image_nums = len(imgs) undetected_sample = [] for ix, img_name in enumerate(imgs): save_flag = 0 img_name = img_name.replace(".xml", cfg.image_postfix) # print(ix, img_name) im = cv2.imread(img_name) image_cpy = im.copy() xml_image = im.copy() gt_image = im.copy() if im is None: continue import time # start_t = time.clock() # if args.gama: # im = adjust_gamma(im, 2.0) data = prepareTestData(cfg.TEST.TEST_SCALE, im, cfg.TEST.SCALE_MULTIPLE_OF, cfg.TEST.MAX_SIZE) start_t = time.time() results = im_detect(data, model, batch_size, std, mean, cfg, nms_thresh) end_t = time.time() print(ix, "/", total_image_nums, img_name, ' time consume is: ', end_t - start_t, len(results)) # show undetected sample gt xml_name = img_name.replace(cfg.image_postfix, ".xml") if os.path.exists(xml_name): xml_dicts = xml2dict(cfg.rename_class_list, xml_name, cfg.class_list) det_dicts = det_results2dict(results, cfg.class_list) is_undetected = 0 for key in cfg.class_list: if key == '__background__': continue is_match_gt = np.zeros(len(xml_dicts[key])) is_match_det = np.zeros(len(det_dicts[key])) for det_id, det_box in enumerate(det_dicts[key]): max_iou, max_iou_id = 0, 0 for gt_id, gt_box in enumerate(xml_dicts[key]): if abs(is_match_gt[gt_id]) > 0: continue iou = calc_iou(det_box[0:-1], gt_box[0:-1]) if iou > max_iou: max_iou = iou max_iou_id = gt_id if max_iou > cfg.TEST.iou_thresh: is_match_gt[max_iou_id] = 1.0 is_match_det[det_id] = 1.0 for object_id in range(len(is_match_gt)): if is_match_gt[object_id] == 0: # is_undetected += 1 # # show_gt_image # if is_undetected == 1: # xml_image = im.copy() # xml_image_name = img_name.replace(".jpg", ".bmp") # save_image_gt(xml_image, xml_image_name, color_list, save_dir, # cfg.rename_class_list, xml_name, cfg.class_list) gt_box = xml_dicts[key][object_id] save_dir = cfg.TEST.save_dir name = os.path.basename(xml_name).split( ".xml")[0] + "_" + str(object_id) + ".bmp" dst_file = save_dir + '/undetected_sample/' + key + "/" if not os.path.exists(dst_file): os.makedirs(dst_file) dst_file += name image_roi = image_cpy[ gt_box[1]:gt_box[3], gt_box[0]:gt_box[2]] if not os.path.exists(dst_file): cv2.imwrite(dst_file, image_roi, [ int(cv2.IMWRITE_JPEG_QUALITY), 100]) g, b, r = color_list[key][0] x1, x2 = gt_box[0], gt_box[2] y1, y2 = gt_box[1], gt_box[3] w = x2 - x1 h = y2 - y1 cv2.rectangle( xml_image, (x1, y1), (x2, y2), (g, b, r), 1) cv2.putText(xml_image, key, (x1 + w//2, y1 + h//2), cv2.FONT_HERSHEY_COMPLEX, 1, (g, b, r), 1) if save_flag == 0 or save_flag == 3: if key in cfg.defect_class_list: save_flag = 2 else: save_flag = 3 for object_id in range(len(is_match_det)): if is_match_det[object_id] == 0: det_box = det_dicts[key][object_id] save_dir = cfg.TEST.save_dir name = os.path.basename(xml_name).split( ".xml")[0] + "_" + str(object_id) + ".bmp" dst_file = save_dir + '/detected_sample_is_error/' + key + "/" if not os.path.exists(dst_file): os.makedirs(dst_file) dst_file += name image_roi = image_cpy[ int(det_box[1]):int(det_box[3]), int(det_box[0]):int(det_box[2])] if not os.path.exists(dst_file): cv2.imwrite(dst_file, image_roi, [ int(cv2.IMWRITE_JPEG_QUALITY), 100]) draw_img = im.copy() save_xml = False if save_xml: imgFolderPath = os.path.dirname(img_name) imgFolderName = os.path.split(imgFolderPath)[-1] imgFileName = os.path.basename(img_name) image = cv2.imread(img_name) imageShape = [image.shape[0], image.shape[1], image.shape[2]] writer = PascalVocWriter(imgFolderName, imgFileName, imageShape, localImgPath=img_name) writer.verified = False if save_flag >= 2: # show_gt_image xml_path = img_name.replace(cfg.image_postfix, ".xml") if os.path.exists(xml_path): # save_image_gt xml_image_name = os.path.basename( img_name).replace(cfg.image_postfix, "_gt.bmp") save_image_gt(gt_image, xml_image_name, color_list, save_dir, cfg.rename_class_list, xml_path, cfg.class_list) for boxes in results: for box in boxes: # for box_id, box in enumerate(boxes): x1 = int(box[0]) y1 = int(box[1]) x2 = int(box[2]) y2 = int(box[3]) score = float(box[4]) label = int(box[5]) w = x2 - x1 h = y2 - y1 label_name = cfg.class_list[label] if 0: # 'KP' == label_name and max(w, h) <= 16: image_roi = im[y1:y2, x1:x2] # saliency = cv2.saliency.StaticSaliencyFineGrained_create() # (success, saliencyMap) = saliency.computeSaliency( # cv2.cvtColor(image_roi, cv2.COLOR_RGB2LAB)) # saliencyMap = (saliencyMap * 255).astype("uint8") # threshMap = cv2.threshold(saliencyMap.astype("uint8"), 0, 255, # cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] image_gray = cv2.cvtColor(image_roi, cv2.COLOR_RGB2gray) threshMap = cv2.threshold(image_gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] # cv2.imwrite(save_dir + 'horizon/' + # str(box_id) + "_" + name.replace(".jpg", ".bmp"), image_roi, [ # int(cv2.IMWRITE_JPEG_QUALITY), 100]) cv2.imwrite(save_dir + '/' + "A0_" + name, image_roi) cv2.imwrite(save_dir + '/' + "A1_" + name, image_gray) cv2.imwrite(save_dir + '/' + "A2_" + name, threshMap) # if label_name != 'HH': # continue # else: # save_flag = 1 g, b, r = color_list[label_name][0] # label * 50, label * 100, label * 150 if 1: # label_name in ["HH"]: # save_flag = 1 cv2.rectangle(draw_img, (x1, y1), (x2, y2), (g, b, r), 1) cv2.putText(draw_img, label_name, (x1 + w//2, y1 + h//2), cv2.FONT_HERSHEY_COMPLEX, 1, (g, b, r), 1) # label*50, label*100, label*100 cv2.putText(draw_img, str(round(score, 3)), (x1 + w // 2 + 10, y1 + h // 2 + 10), cv2.FONT_HERSHEY_COMPLEX, 1, (g, b, r), 1) # label * 50, label * 100, label * 100 # if save_xml: # if 'M' == label_name or 'SL' == label_name or 'HH' == label_name: # continue # difficult = 0 # writer.addBndBox(x1, y1, x2, y2, label_name, difficult) if save_xml: writer.save(targetFile='./outputs/' + name.replace(cfg.image_postfix, '.xml')) if len(results) > 0 and save_flag >= 2: name = os.path.basename(img_name) cv2.imwrite(save_dir + '/' + name, draw_img) name = os.path.basename(img_name).replace( cfg.image_postfix, "_undetected.bmp") if save_flag == 2: if not os.path.exists(save_dir + '/FN_defect/'): os.makedirs(save_dir + '/FN_defect/') cv2.imwrite(save_dir + '/FN_defect/' + name, xml_image) cv2.imwrite(save_dir + '/' + name, xml_image) # hard_sample_dir = "/data/zhangcc/data/det_whiteshow_defect/1/hard_sample/" # shutil.copy(img_name, hard_sample_dir + # os.path.basename(img_name)) # xml_name_cpy = img_name.replace(".bmp", ".xml") # shutil.copy(xml_name_cpy, hard_sample_dir + # os.path.basename(xml_name_cpy)) elif save_flag == 3: if not os.path.exists(save_dir + '/FN_device/'): os.makedirs(save_dir + '/FN_device/') cv2.imwrite(save_dir + '/FN_device/' + name, xml_image) cv2.imwrite(save_dir + '/' + name, xml_image) else: # undetected_sample.append(name) # shutil.copyfile(img_name, save_dir + 'undetected_sample/' + name) # shutil.copyfile(img_name.replace(".jpg", ".xml"), save_dir + 'undetected_sample/' + # name.replace(".jpg", ".xml")) print('no object in this picture') return undetected_sample
def main(): args = parse_args() img_path = args.img_path save_dir = args.save_dir if not os.path.isdir(save_dir): os.mkdir(save_dir) if 'lite' in args.network: model = lite_faster_rcnn(args.classes) elif 'pva' in args.network: model = pva_net(args.classes) elif 'resnet' in args.network: model = resnet(args.classes, num_layers=101) checkpoint = torch.load(args.model) model.create_architecture() model.load_state_dict(checkpoint['state_dict']) model = model.cuda() model = torch.nn.DataParallel(model).cuda() model.eval() imgs = glob(os.path.join(args.img_path, "*.*")) print(args.img_path) batch_size = 1 std = np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS, dtype=np.float32) mean = np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS, dtype=np.float32) std = torch.from_numpy(std).cuda() mean = torch.from_numpy(mean).cuda() results_file = open("./outputs/resluts.csv", "a+") for ix, img_name in enumerate(imgs): #print(ix, img_name) im = cv2.imread(img_name) if im is None: continue import time #start_t = time.clock() data = prepareTestData(864, im, 32, 1440) start_t = time.clock() results = im_detect(data, model, batch_size, std, mean, args.classes) end_t = time.clock() print(ix, img_name, ' time consume is: ', end_t - start_t) draw_img = im.copy() #results_file = open("./output/resluts.csv","a+") name = os.path.basename(img_name) for boxes in results: for box in boxes: x1 = int(box[0]) y1 = int(box[1]) x2 = int(box[2]) y2 = int(box[3]) score = float(box[4]) label = int(box[5]) draw_label = '' #if label < 7: # draw_label = 'd' #else: # draw_label = 'd' cv2.rectangle(draw_img, (x1, y1), (x2, y2), (0, label * 6, 255), 1) w = x2 - x1 h = y2 - y1 #cv2.putText(draw_img, '%.1f'%score+draw_label, (x1+(w>>1), y1+(h>>1)), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 3) #cv2.putText(draw_img, '%.1f'%score+str(label), (x1+(w>>1), y1+(h>>1)), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 0, 255), 3) results_file.write(name + "," + class_list[label] + "," + str(x1) + "," + str(x2) + "," + str(y1) + "," + str(y2) + "\n") cv2.imwrite(save_dir + '/' + name, draw_img)
def main(): args = parse_args() img_path = args.img_path save_dir = args.save_dir args.classes = len(class_list) if not os.path.isdir(save_dir): os.mkdir(save_dir) if 'lite' in args.network: model = lite_faster_rcnn(args.classes) elif 'pva' in args.network: model = pva_net(args.classes) elif 'resnet' in args.network: model = resnet(args.classes, num_layers=101) checkpoint = torch.load(args.model) model.create_architecture() model.load_state_dict(checkpoint['state_dict']) model = model.cuda() model = torch.nn.DataParallel(model).cuda() model.eval() imgs = glob(os.path.join(args.img_path, "*.jpg")) print(args.img_path) batch_size = 1 std = np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS, dtype=np.float32) mean = np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS, dtype=np.float32) std = torch.from_numpy(std).cuda() mean = torch.from_numpy(mean).cuda() for ix, img_name in enumerate(imgs): #print(ix, img_name) im = cv2.imread(img_name) name = os.path.basename(img_name) if im is None: continue import time #start_t = time.clock() if args.gama: im = adjust_gamma(im, 2.0) data = prepareTestData(640, im, 32, 1440) start_t = time.clock() results = im_detect(data, model, batch_size, std, mean, args.classes) end_t = time.clock() print(ix, img_name, ' time consume is: ', end_t - start_t) draw_img = im.copy() save_xml = False if save_xml: imgFolderPath = os.path.dirname(img_name) imgFolderName = os.path.split(imgFolderPath)[-1] imgFileName = os.path.basename(img_name) image = cv2.imread(img_name) imageShape = [image.shape[0], image.shape[1], image.shape[2]] writer = PascalVocWriter(imgFolderName, imgFileName, imageShape, localImgPath=img_name) writer.verified = False save_flag = 0 for boxes in results: for box in boxes: x1 = int(box[0]) y1 = int(box[1]) x2 = int(box[2]) y2 = int(box[3]) score = float(box[4]) label = int(box[5]) w = x2 - x1 h = y2 - y1 label_name = class_list[label] if label_name != 'HH': continue else: save_flag = 1 cv2.rectangle(draw_img, (x1, y1), (x2, y2), (label * 50, label * 100, label * 100), 1) cv2.putText(draw_img, label_name, (x1 + w // 2, y1 + h // 2), cv2.FONT_HERSHEY_COMPLEX, 1, (label * 50, label * 100, label * 100), 1) cv2.putText(draw_img, str(round(score, 3)), (x1 + w // 2 + 10, y1 + h // 2 + 10), cv2.FONT_HERSHEY_COMPLEX, 1, (label * 50, label * 100, label * 100), 1) #if save_xml: # if 'M' == label_name or 'SL' == label_name or 'HH' == label_name: # continue # difficult = 0 # writer.addBndBox(x1, y1, x2, y2, label_name, difficult) if save_xml: writer.save(targetFile='./outputs/' + name.replace('.jpg', '.xml')) if len(results) > 0 and save_flag: cv2.imwrite(save_dir + '/' + name, draw_img)
def detections(cfg, model_epoch): if model_epoch is not None: cfg.model = os.path.join(cfg.TRAIN.save_dir, cfg.TRAIN.model_name + model_epoch) print(cfg.model) if not os.path.exists(cfg.model): raise RuntimeError("model is not exist!!!") gpus = ",".join('%s' % id for id in cfg.TEST.gpus) os.environ['CUDA_VISIBLE_DEVICES'] = gpus thresh = cfg.TEST.thresh # 0.8 nms_thresh = cfg.TEST.nms_thresh # 0.35 classes = len(cfg.class_list) if 'lite' in cfg.MODEL.BACKBONE: model = lite_faster_rcnn(cfg, classes) elif 'pva' in cfg.MODEL.BACKBONE: model = pva_net(classes) elif 'resnet' in cfg.MODEL.BACKBONE: model = resnet(classes, num_layers=101) model.create_architecture(cfg) checkpoint = torch.load(cfg.model) model.load_state_dict(checkpoint['state_dict']) if torch.cuda.is_available(): model = model.cuda() print("using gpu...") model.eval() imgs = glob(os.path.join(cfg.TEST.img_path, "*.xml")) # print(cfg.TEST.img_path) batch_size = 1 std = np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS, dtype=np.float32) mean = np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS, dtype=np.float32) std = torch.from_numpy(std).cuda() mean = torch.from_numpy(mean).cuda() xml_names = [] xml_dicts = defaultdict(list) det_dicts = defaultdict(list) total_image_nums = len(imgs) for ix, img_name in enumerate(tqdm(imgs)): img_name = img_name.replace(".xml", cfg.image_postfix) # print(ix, img_name) im = cv2.imread(img_name) if im is None: continue import time data = prepareTestData(cfg.TEST.TEST_SCALE, im, cfg.TEST.SCALE_MULTIPLE_OF, cfg.TEST.MAX_SIZE) start_t = time.time() results = im_detect(data, model, batch_size, std, mean, cfg, nms_thresh) end_t = time.time() # print(ix, "/", total_image_nums, img_name, ' time consume is: ', # end_t - start_t) xml_name = img_name.replace(cfg.image_postfix, ".xml") xml_names.append(xml_name) xml_dicts = xml2dict(cfg.rename_class_list, xml_name, cfg.class_list, xml_dicts) det_dicts = det_results2dict(xml_name, results, cfg.class_list, det_dicts) # if ix > 100: # break return xml_dicts, det_dicts, xml_names
def main(): args = parse_args() img_path = args.img_path save_dir = args.save_dir if not os.path.isdir(save_dir): os.mkdir(save_dir) if 'lite' in args.network: model = lite_faster_rcnn(args.classes) elif 'pva' in args.network: model = pva_net(args.classes) elif 'resnet' in args.network: model = resnet(args.classes, num_layers=101) checkpoint = torch.load(args.model) model.create_architecture() model.load_state_dict(checkpoint['state_dict']) model = model.cuda() model = torch.nn.DataParallel(model).cuda() model.eval() imgs = glob(os.path.join(args.img_path, "*.jpg")) #print(args.img_path) batch_size = 1 std = np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS, dtype=np.float32) mean = np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS, dtype=np.float32) std = torch.from_numpy(std).cuda() mean = torch.from_numpy(mean).cuda() all_detections = [] for ix, img_name in enumerate(imgs): #print(ix, img_name) im = cv2.imread(img_name) if im is None: continue import time #start_t = time.clock() data = prepareTestData(640, im, 32, 1440) start_t = time.clock() results = im_detect(data, model, batch_size, std, mean, args.classes) end_t = time.clock() print(ix, img_name, ' time consume is: ', end_t - start_t) #results = np.array(results) all_detections.append(results) dict_ = classed_detections(all_detections, args.classes) annots = get_all_annots(args.xml_path) annots_ = classed_annots(annots, args.classes) for i in range(1, args.classes): predictions = dict_[str(i)] annot_ = annots_[str(i)] false_positives = 0 true_positives = 0 num_annotations = annot_.shape[0] for annot in annot_: overlaps = compute_overlap(annot, predictions) if predictions.shape[0] == 0 or overlaps.shape[0] == 0: false_positives += 1 continue assigned_annotation = np.argmax(overlaps) max_overlap = overlaps[assigned_annotation] if max_overlap >= 0.01: #false_positives = np.append(false_positives, 0) true_positives += 1 else: false_positives += 1 #true_positives = np.append(true_positives, 0) #print(annot_.shape) recall = true_positives / num_annotations precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps) print('label ', str(i), ' ap is: ', recall, precision)