def do_evaluate_unlabeled(pred_config, output_file, reuse=True): """Evaluate unlabled data.""" for i, dataset in enumerate(cfg.DATA.VAL): output = output_file + '-' + dataset if not os.path.isfile(output) or not reuse: if i == 0: num_tower = max(cfg.TRAIN.NUM_GPUS, 1) graph_funcs = MultiTowerOfflinePredictor( pred_config, list(range(num_tower))).get_predictors() logger.info('Evaluating {} ...'.format(dataset)) dataflows = [ get_eval_dataflow(dataset, shard=k, num_shards=num_tower) for k in range(num_tower) ] all_results = multithread_predict_dataflow(dataflows, graph_funcs) eval_metrics = DatasetRegistry.get( dataset).eval_inference_results2( all_results, output, threshold=cfg.TRAIN.CONFIDENCE) else: all_results = json.load(open(output, 'r')) eval_metrics = DatasetRegistry.get( dataset).eval_inference_results2( all_results, output, threshold=cfg.TRAIN.CONFIDENCE, metric_only=True) with open(output + '_cocometric.json', 'w') as f: json.dump(eval_metrics, f)
def get_eval_unlabeled_dataflow(name, shard=0, num_shards=1, return_size=False): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ if isinstance(name, (list, tuple)) and len(name) > 1: if "VOC" not in name[0]: assert "VOC" not in name[ 1], "VOC has to be put before coco in cfg.DATA.TRAIN" roidbs = [] for x in name: _roidbs = DatasetRegistry.get(x).training_roidbs() print_class_histogram(_roidbs) roidbs.extend(_roidbs) # roidbs = list(itertools.chain.from_iterable(DatasetRegistry.get(x).training_roidbs() for x in name)) logger.info("Merged roidbs from {}".format(name)) print_class_histogram(roidbs) else: if isinstance(name, (list, tuple)): name = name[0] roidbs = DatasetRegistry.get(name).training_roidbs() print_class_histogram(roidbs) num_imgs = len(roidbs) img_per_shard = num_imgs // num_shards img_range = (shard * img_per_shard, (shard + 1) * img_per_shard if shard + 1 < num_shards else num_imgs) logger.info("Found {} images for inference.".format(img_range[1] - img_range[0] + 1)) # no filter for training ds = DataFromListOfDict(roidbs[img_range[0]:img_range[1]], ["file_name", "image_id"]) def f(fname): im = cv2.imread(fname, cv2.IMREAD_COLOR) assert im is not None, fname return im ds = MapDataComponent(ds, f, 0) # Evaluation itself may be multi-threaded, therefore don't add prefetch # here. if return_size: return ds, num_imgs return ds
def get_train_dataflow(): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ roidbs = list( itertools.chain.from_iterable( DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN)) print_class_histogram(roidbs) # Filter out images that have no gt boxes, but this filter shall not be applied for testing. # The model does support training with empty images, but it is not useful for COCO. num = len(roidbs) roidbs = list( filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, roidbs)) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(roidbs), len(roidbs))) ds = DataFromList(roidbs, shuffle=True) preprocess = TrainingDataPreprocessorAug(cfg) if cfg.DATA.NUM_WORKERS > 0: if cfg.TRAINER == "horovod": buffer_size = cfg.DATA.NUM_WORKERS * 10 # one dataflow for each process, therefore don't need large buffer ds = MultiThreadMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) # MPI does not like fork() else: buffer_size = cfg.DATA.NUM_WORKERS * 20 ds = MultiProcessMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) else: ds = MapData(ds, preprocess) return ds
def visualize_dataflow2(cfg, unlabled2017_used=True, VISPATH="./", maxvis=50): """Visualize the dataflow with labeled and unlabled strong augmentation.""" def prase_name(x): if not unlabled2017_used: return x + "-unlabeled" else: # return coco2017 unlabeled data return "coco_unlabeled2017" def remove_no_box_data(_roidbs, filter_fn): num = len(_roidbs) _roidbs = filter_fn(_roidbs) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total #images for training: {}" .format(num - len(_roidbs), len(_roidbs))) return _roidbs pseudo_path = os.path.join(os.environ["PSEUDO_PATH"], "pseudo_data.npy") pseudo_targets = dd.io.load(pseudo_path) roidbs = list( itertools.chain.from_iterable( DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN)) roidbs_u = list( itertools.chain.from_iterable( DatasetRegistry.get(prase_name(x)).training_roidbs() for x in cfg.DATA.TRAIN)) roidbs = remove_no_box_data( roidbs, lambda x: list( filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, x)) ) roidbs_u = remove_no_box_data( roidbs_u, lambda x: list( filter( lambda img: len(pseudo_targets[img["image_id"]]["boxes"]) > 0, x))) print_class_histogram(roidbs) print_class_histogram(roidbs_u) preprocess = TrainingDataPreprocessorSSlAug( cfg, confidence=cfg.TRAIN.CONFIDENCE, pseudo_targets=pseudo_targets) for jj, (rob, robu) in tqdm(enumerate(zip(roidbs, roidbs_u))): data = preprocess((rob, robu)) # import pdb; pdb.set_trace() nn = len(pseudo_targets[robu["image_id"]]["boxes"]) if data is None or len(data["gt_boxes_strong"]) == 0: print("empty annotation, {} (original {})".format(jj, nn)) continue ims = viz.draw_boxes(data["image"], data["gt_boxes"], [str(a) for a in data["gt_labels"]]) ims_t = viz.draw_boxes(data["image_strong"], data["gt_boxes_strong"], [ str(a) for a in data["gt_labels_strong"][:len(data["gt_boxes_strong"])] ]) ims = cv2.resize(ims, (ims_t.shape[1], ims_t.shape[0])) vis = np.concatenate((ims, ims_t), axis=1) if not os.path.exists( os.path.dirname( os.path.join(VISPATH, "result_{}.jpeg".format(jj)))): os.makedirs( os.path.dirname( os.path.join(VISPATH, "result_{}.jpeg".format(jj)))) assert cv2.imwrite(os.path.join(VISPATH, "result_{}.jpeg".format(jj)), vis) if jj > maxvis: break
def get_train_dataflow_w_unlabeled(load_path): """ Return a training dataflow. Each datapoint consists of the following: An image: (h, w, 3), 1 or more pairs of (anchor_labels, anchor_boxes): anchor_labels: (h', w', NA) anchor_boxes: (h', w', NA, 4) gt_boxes: (N, 4) gt_labels: (N,) If MODE_MASK, gt_masks: (N, h, w) """ assert os.path.isfile(load_path), "{} does not find".format(load_path) roidbs = list( itertools.chain.from_iterable( DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.TRAIN)) print_class_histogram(roidbs) if "VOC" in cfg.DATA.TRAIN[0]: roidbs_u = list( itertools.chain.from_iterable( DatasetRegistry.get(x).training_roidbs() for x in cfg.DATA.UNLABEL)) unlabled2017_used = False else: unlabled2017_used = np.any(["@" not in x for x in cfg.DATA.TRAIN]) def prase_name(x): if not unlabled2017_used: assert "@" in load_path, ( "{}: Did you use wrong pseudo_data.py for " "this model?").format(load_path) return x + "-unlabeled" else: # return coco2017 unlabeled data return "coco_unlabeled2017" roidbs_u = list( itertools.chain.from_iterable( DatasetRegistry.get(prase_name(x)).training_roidbs() for x in cfg.DATA.TRAIN)) print_class_histogram(roidbs_u) # Filter out images that have no gt boxes, but this filter shall not be applied for testing. # The model does support training with empty images, but it is not useful for COCO. def remove_no_box_data(_roidbs, filter_fn, dset): num = len(_roidbs) _roidbs = filter_fn(_roidbs) logger.info( "Filtered {} images which contain no non-crowd groudtruth boxes. Total {} #images for training: {}" .format(num - len(_roidbs), dset, len(_roidbs))) return _roidbs roidbs = remove_no_box_data( roidbs, lambda x: list( filter(lambda img: len(img["boxes"][img["is_crowd"] == 0]) > 0, x) ), "labeled") # load unlabeled if unlabled2017_used: assert "@" not in load_path, "Did you use the wrong pseudo path" pseudo_targets = dd.io.load(load_path) logger.info("Loaded {} pseudo targets from {}".format( len(pseudo_targets), load_path)) roidbs_u = remove_no_box_data( roidbs_u, lambda x: list( filter( lambda img: len(pseudo_targets[img["image_id"]]["boxes"]) > 0, x)), "unlabeled") preprocess = TrainingDataPreprocessorSSlAug( cfg, confidence=cfg.TRAIN.CONFIDENCE, pseudo_targets=pseudo_targets) ds = DataFrom2List(roidbs, roidbs_u, shuffle=True) if cfg.DATA.NUM_WORKERS > 0: if cfg.TRAINER == "horovod": buffer_size = cfg.DATA.NUM_WORKERS * 10 ds = MultiThreadMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) else: buffer_size = cfg.DATA.NUM_WORKERS * 20 ds = MultiProcessMapData(ds, cfg.DATA.NUM_WORKERS, preprocess, buffer_size=buffer_size) else: ds = MapData(ds, preprocess) return ds