def load_dacon_test_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): dacon_api = DaconAPI(json_file) anns = dacon_api.features if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) meta = MetadataCatalog.get(dataset_name) meta.thing_classes = dacon_api.thing_classes logger.info("Loaded {} images in dacon format from {}".format( len(anns), json_file)) dataset_dicts = [] for ann in anns: record = {} record["file_name"] = os.path.join(image_root, ann['image_id']) dataset_dicts.append(record) return dataset_dicts
def benchmark_data(cfg: AttrDict, split: str = "train"): split = split.upper() total_images = MAX_ITERS * cfg["DATA"][split]["BATCHSIZE_PER_REPLICA"] timer = Timer() dataset = build_dataset(cfg, split) try: device = torch.device("cuda" if cfg.MACHINE.DEVICE == "gpu" else "cpu") except AttributeError: device = torch.device("cuda") # Gives sampler same seed for entire distributed group as per pytorch documentation. sampler_seed = cfg.SEED_VALUE dataloader = get_loader( dataset=dataset, dataset_config=cfg["DATA"][split], num_dataloader_workers=cfg.DATA.NUM_DATALOADER_WORKERS, pin_memory=False, multi_processing_method=cfg.MULTI_PROCESSING_METHOD, device=device, sampler_seed=sampler_seed, ) # Fairstore data sampler would require setting the start iter before it can start. if hasattr(dataloader.sampler, "set_start_iter"): dataloader.sampler.set_start_iter(0) # initial warmup measured as warmup time timer.reset() data_iterator = iter(dataloader) for i in range(10): # warmup next(data_iterator) if i == 0: # the total number of seconds since the start/reset of the timer warmup_time = timer.seconds() logging.info(f"Warmup time {WARMUP_ITERS} batches: {warmup_time} seconds") # measure the number of images per sec in 1000 iterations. timer = Timer() for _ in tqdm.trange(MAX_ITERS): next(data_iterator) time_elapsed = timer.seconds() logging.info( f"iters: {MAX_ITERS}; images: {total_images}; time: {time_elapsed} seconds; " f"images/sec: {round(float(total_images / time_elapsed), 4)}; " f"ms/img: {round(float(1000 * time_elapsed / total_images), 4)} ") # run benchmark for a few more rounds to catch fluctuations for round_idx in range(BENCHMARK_ROUNDS): timer = Timer() for _ in tqdm.trange(MAX_ITERS): next(data_iterator) time_elapsed = timer.seconds() logging.info( f"round: {round_idx}: iters: {MAX_ITERS}; images: {total_images}; " f"time: {time_elapsed} seconds; " f"images/sec: {round(float(total_images / time_elapsed), 4)}; " f"ms/img: {round(float(1000 * time_elapsed / total_images), 4)} ") del data_iterator del dataloader
def benchmark_data(args): cfg = setup(args) logger.info("After spawning " + RAM_msg()) timer = Timer() dataloader = build_detection_train_loader(cfg) logger.info("Initialize loader using {} seconds.".format(timer.seconds())) timer.reset() itr = iter(dataloader) for i in range(10): # warmup next(itr) if i == 0: startup_time = timer.seconds() logger.info("Startup time: {} seconds".format(startup_time)) timer = Timer() max_iter = 1000 for _ in tqdm.trange(max_iter): next(itr) logger.info("{} iters ({} images) in {} seconds.".format( max_iter, max_iter * cfg.SOLVER.IMS_PER_BATCH, timer.seconds())) # test for a few more rounds for k in range(10): logger.info(f"Iteration {k} " + RAM_msg()) timer = Timer() max_iter = 1000 for _ in tqdm.trange(max_iter): next(itr) logger.info("{} iters ({} images) in {} seconds.".format( max_iter, max_iter * cfg.SOLVER.IMS_PER_BATCH, timer.seconds()))
def iter_benchmark(iterator, num_iter: int, warmup: int = 5, max_time_seconds: float = 60) -> Tuple[float, List[float]]: """ Benchmark an iterator/iterable for `num_iter` iterations with an extra `warmup` iterations of warmup. End early if `max_time_seconds` time is spent on iterations. Returns: float: average time (seconds) per iteration list[float]: time spent on each iteration. Sometimes useful for further analysis. """ num_iter, warmup = int(num_iter), int(warmup) iterator = iter(iterator) for _ in range(warmup): next(iterator) timer = Timer() all_times = [] for curr_iter in tqdm.trange(num_iter): start = timer.seconds() if start > max_time_seconds: num_iter = curr_iter break next(iterator) all_times.append(timer.seconds() - start) avg = timer.seconds() / num_iter return avg, all_times
def benchmark_data(args): cfg = setup(args) dataloader = build_detection_train_loader(cfg) timer = Timer() itr = iter(dataloader) for i in range(10): # warmup next(itr) if i == 0: startup_time = timer.seconds() timer = Timer() max_iter = 1000 for _ in tqdm.trange(max_iter): next(itr) logger.info( "{} iters ({} images) in {} seconds.".format( max_iter, max_iter * cfg.SOLVER.IMS_PER_BATCH, timer.seconds() ) ) logger.info("Startup time: {} seconds".format(startup_time)) vram = psutil.virtual_memory() logger.info( "RAM Usage: {:.2f}/{:.2f} GB".format( (vram.total - vram.available) / 1024 ** 3, vram.total / 1024 ** 3 ) )
def load_coco_unlabel_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): from pycocotools.coco import COCO timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) id_map = None # sort indices for reproducible results img_ids = sorted(coco_api.imgs.keys()) imgs = coco_api.loadImgs(img_ids) logger.info("Loaded {} images in COCO format from {}".format( len(imgs), json_file)) dataset_dicts = [] for img_dict in imgs: record = {} record["file_name"] = os.path.join(image_root, img_dict["file_name"]) record["height"] = img_dict["height"] record["width"] = img_dict["width"] image_id = record["image_id"] = img_dict["id"] dataset_dicts.append(record) return dataset_dicts
def load_lvis_json(json_file, image_root, dataset_name=None): from lvis import LVIS json_file = PathManager.get_local_path(json_file) timer = Timer() lvis_api = LVIS(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) if dataset_name is not None: meta = get_lvis_instances_meta(dataset_name) MetadataCatalog.get(dataset_name).set(**meta) img_ids = sorted(lvis_api.imgs.keys()) imgs = lvis_api.load_imgs(img_ids) anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids] ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image] assert len(set(ann_ids)) == len(ann_ids), \ f"Annotation ids in '{json_file}' are not unique" imgs_anns = list(zip(imgs, anns)) logger.info(f"Loaded {len(imgs_anns)} images in the LVIS format from {json_file}") dataset_dicts = [] for (img_dict, anno_dict_list) in imgs_anns: record = {} file_name = img_dict["file_name"] if img_dict["file_name"].startswith("COCO"): file_name = file_name[-16:] record["file_name"] = os.path.join(image_root, file_name) record["height"] = img_dict["height"] record["width"] = img_dict["width"] record["not_exhaustive_category_ids"] = img_dict.get("not_exhaustive_category_ids", []) record["neg_category_ids"] = img_dict.get("neg_category_ids", []) image_id = record["image_id"] = img_dict["id"] objs = [] for anno in anno_dict_list: assert anno["image_id"] == image_id obj = {"bbox": anno["bbox"], "bbox_mode": BoxMode.XYWH_ABS} obj["category_id"] = anno["category_id"] - 1 segm = anno["segmentation"] valid_segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6] assert len(segm) == len(valid_segm), \ "Annotation contains an invalid polygon with < 3 points" assert len(segm) > 0 obj["segmentation"] = segm objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) return dataset_dicts
def __call__(self): timer = Timer() json_file = PathManager.get_local_path(self.json_file) with open(json_file, 'r') as file: # imgs_anns = json.load(file) imgs_anns = file.readlines() if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) logger.info("Loaded {} images in CrowdHuman format from {}".format( len(imgs_anns), json_file)) dataset_dicts = [] # aspect_ratios = [] for idx, ann in enumerate(imgs_anns): v = json.loads(ann) record = {} filename = v["filename"] # NOTE when filename starts with '/', it is an absolute filename thus os.path.join doesn't work if filename.startswith('/'): filename = os.path.normpath(self.image_root + filename) else: filename = os.path.join(self.image_root, filename) height, width = v["image_height"], v["image_width"] record["file_name"] = filename record["image_id"] = idx record["height"] = height record["width"] = width objs = [] for anno in v.get('instances', []): x1, y1, x2, y2 = anno['bbox'] w = x2 - x1 h = y2 - y1 obj = { "category_id": anno['label'], "bbox": anno['bbox'], "vbbox": anno['vbbox'], "is_ignored": anno.get('is_ignored', False), 'area': w * h, # 'bbox_mode': BoxMode.XYXY_ABS } objs.append(obj) # ratio = 1.0 * (height + 1) / (width + 1) # do something with ratio ? record["annotations"] = objs # dataset_dicts.append(record) # to print class histogram dataset_dicts.append(ImageMeta.encode( record)) #this saves up to x2 memory when serializing the data # aspect_ratios.append(ratio) return dataset_dicts
def update_meta(json_file, dataset_name=None): from pyherbtools.herb import HERB if dataset_name is not None and "test" not in dataset_name: logger.info("Update Metadat of {} dataset".format(dataset_name)) timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): herb_api = HERB(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) meta = MetadataCatalog.get(dataset_name) cat_ids = sorted(herb_api.getCatIds()) cats = herb_api.loadCats(cat_ids) # The categories in a custom json file may not be sorted. thing_classes = [ c["name"] for c in sorted(cats, key=lambda x: x["id"]) ] meta.thing_classes = thing_classes logger.info("Creating hierarchy target from given annotation") order_family_hierarchy = torch.zeros(len(meta.family_map), len(meta.order_map)) family_species_hierarchy = torch.zeros(len(meta.species_map), len(meta.family_map)) for cat in cats: order_id = meta.order_map[cat["order"]] family_id = meta.family_map[cat["family"]] species_id = meta.species_map[cat["name"]] order_family_hierarchy[family_id][order_id] = 1 family_species_hierarchy[species_id][family_id] = 1 from torch import nn order_family_hierarchy = nn.Softmax(dim=1)(order_family_hierarchy) family_species_hierarchy = nn.Softmax(dim=1)(family_species_hierarchy) meta.hierarchy_prior = { "order|family": order_family_hierarchy, "family|species": family_species_hierarchy } meta.cats = cats meta.num_classes = { "family": len(meta.family_map), "order": len(meta.order_map), "species": len(meta.species_map), }
class IterationTimer(HookBase): def __init__(self, warmup_iter=3): self._warmup_iter = warmup_iter self._step_timer = Timer() self._start_time = time.perf_counter() self._total_timer = Timer() def before_train(self): self._start_time = time.perf_counter() self._total_timer.reset() self._total_timer.pause() def after_train(self): logger = logging.getLogger(__name__) total_time = time.perf_counter() - self._start_time total_time_minus_hooks = self._total_timer.seconds() hook_time = total_time - total_time_minus_hooks num_iter = self.trainer.iter + 1 - self.trainer.start_iter - self._warmup_iter if num_iter > 0 and total_time_minus_hooks > 0: logger.info( "Overall training speed: {} iterations in {} ({:.4f} s / it)".format( num_iter, str(datetime.timedelta(seconds=int(total_time_minus_hooks))), total_time_minus_hooks / num_iter, ) ) logger.info( "Total training time: {} ({} on hooks)".format( str(datetime.timedelta(seconds=int(total_time))), str(datetime.timedelta(seconds=int(hook_time))), ) ) def before_step(self): self._step_timer.reset() self._total_timer.resume() def after_step(self): iter_done = self.trainer.iter - self.trainer.start_iter + 1 if iter_done >= self._warmup_iter: sec = self._step_timer.seconds() self.trainer.storage.put_scalars(time=sec) else: self._start_time = time.perf_counter() self._total_timer.reset() self._total_timer.pause()
def benchmark_distributed(self, num_iter, warmup=10): """ Benchmark the dataloader in each distributed worker, and log results of all workers. This helps understand the final performance as well as the variances among workers. It also prints startup time (first iter) of the dataloader. """ gpu = comm.get_world_size() dataset = MapDataset(self.dataset, self.mapper) n = self.num_workers loader = build_batch_data_loader(dataset, self.sampler, self.total_batch_size, num_workers=n) timer = Timer() loader = iter(loader) next(loader) startup_time = timer.seconds() logger.info( "Dataloader startup time: {:.2f} seconds".format(startup_time)) comm.synchronize() avg, all_times = self._benchmark(loader, num_iter * max(n, 1), warmup * max(n, 1)) del loader self._log_time( f"DataLoader ({gpu} GPUs x {n} workers, total bs={self.total_batch_size})", avg, all_times, True, )
def test_timer(self): timer = Timer() time.sleep(0.5) self.assertTrue(0.99 > timer.seconds() >= 0.5) timer.pause() time.sleep(0.5) self.assertTrue(0.99 > timer.seconds() >= 0.5) timer.resume() time.sleep(0.5) self.assertTrue(1.49 > timer.seconds() >= 1.0) timer.reset() self.assertTrue(0.49 > timer.seconds() >= 0)
def benchmark_eval(args): cfg = setup(args) model = build_model(cfg) model.eval() logger.info("Model:\n{}".format(model)) DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) cfg.defrost() cfg.DATALOADER.NUM_WORKERS = 0 data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) dummy_data = list(itertools.islice(data_loader, 100)) def f(): while True: yield from DatasetFromList(dummy_data, copy=False) for _ in range(5): # warmup model(dummy_data[0]) max_iter = 400 timer = Timer() with tqdm.tqdm(total=max_iter) as pbar: for idx, d in enumerate(f()): if idx == max_iter: break model(d) pbar.update() logger.info("{} iters in {} seconds.".format(max_iter, timer.seconds()))
class TestMeter(object): def __init__(self, cfg): self.cfg = cfg self.forward_timer = Timer() self.total_time = 0 self.cnt = 0 self.score = dict() self.output_dir = Join(cfg.TEST.OUTPUT_DIR, cfg.TEST.DATASET) self.save_img = cfg.TEST.SAVE_IMG if not os.path.exists(self.output_dir): os.makedirs(self.output_dir) self.score_csv = open(Join(self.output_dir, "score.csv"), 'w') self.score_csv.write("vid, image_id, psnr, ssim\n") def forward_tic(self): """ Start to record time. """ self.forward_timer.reset() def forward_toc(self): """ Stop to record time. """ self.forward_timer.pause() self.total_time += self.forward_timer.seconds() self.cnt += 1 def log_img_result(self, img_out, vid, img_id, psnr, ssim): if vid not in self.score.keys(): self.score[vid] = {} # log score self.score[vid][img_id] = (psnr, ssim) self.score_csv.write("{},{},{},{}\n".format(vid, img_id, psnr, ssim)) # save img if self.save_img: # if not os.path.exists(Join(self.output_dir, vid)): # os.makedirs(Join(self.output_dir, vid)) img_out = cv2.cvtColor(img_out, cv2.COLOR_RGB2BGR) cv2.imwrite(Join(self.output_dir, img_id), img_out) def log_average_score(self): score_per_vid = {} for vid in self.score.keys(): psnrs = [x[0] for x in self.score[vid].values()] ssims = [x[1] for x in self.score[vid].values()] score_per_vid[vid] = (np.mean(psnrs), np.mean(ssims)) with open(Join(self.output_dir, 'videos_scores.csv'), 'w') as f: f.write('video_id, psnr, ssim\n') for vid in self.score.keys(): f.write("{},{},{}\n".format(vid, score_per_vid[vid][0], score_per_vid[vid][1])) return score_per_vid def speed(self): return self.total_time, self.total_time / self.cnt
def test_timer(self) -> None: """ Test basic timer functions (pause, resume, and reset). """ timer = Timer() time.sleep(0.5) self.assertTrue(0.99 > timer.seconds() >= 0.5) timer.pause() time.sleep(0.5) self.assertTrue(0.99 > timer.seconds() >= 0.5) timer.resume() time.sleep(0.5) self.assertTrue(1.49 > timer.seconds() >= 1.0) timer.reset() self.assertTrue(0.49 > timer.seconds() >= 0)
def test_model(epoch): """ Evaluate the model on the test set """ model.eval() test_metrics = {"loss": [], "acc": []} timer = Timer() for batch_i, (X, y) in enumerate(test_dataloader): batch_i += 1 image_sequences = Variable(X.to(device), requires_grad=False) labels = Variable(y, requires_grad=False).to(device) with torch.no_grad(): # Reset LSTM hidden state model.lstm.reset_hidden_state() # Get sequence predictions predictions = model(image_sequences) # Compute metrics loss = criterion(predictions, labels) acc = (predictions.detach().argmax(1) == labels ).cpu().numpy().mean() # Keep track of loss and accuracy test_metrics["loss"].append(loss.item()) test_metrics["acc"].append(acc) # Determine approximate time left batches_done = batch_i - 1 batches_left = len(test_dataloader) - batches_done time_left = datetime.timedelta(seconds=batches_left * timer.seconds()) time_iter = round(timer.seconds(), 3) timer.reset() # Log test performance logger.info( f'Testing - [Epoch: {epoch}/{cfg.train.num_epochs}] [Batch: {batch_i}/{len(test_dataloader)}] [Loss: {np.mean(test_metrics["loss"]):.3f}] [Acc: {np.mean(test_metrics["acc"]):.3f}] [ETA: {time_left}] [Iter time: {time_iter}s/it]' ) writer.add_scalar("test/loss", np.mean(test_metrics["loss"]), epoch) writer.add_scalar("test/acc", np.mean(test_metrics["acc"]), epoch) model.train()
def _load_coco_annotations(json_file: str): """ Load COCO annotations from a JSON file Args: json_file: str Path to the file to load annotations from Returns: Instance of `pycocotools.coco.COCO` that provides access to annotations data """ from pycocotools.coco import COCO logger = logging.getLogger(__name__) timer = Timer() with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) return coco_api
def _load_lvis_annotations(json_file: str): """ Load COCO annotations from a JSON file Args: json_file: str Path to the file to load annotations from Returns: Instance of `pycocotools.coco.COCO` that provides access to annotations data """ from lvis import LVIS json_file = PathManager.get_local_path(json_file) logger = logging.getLogger(__name__) timer = Timer() lvis_api = LVIS(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) return lvis_api
class EpochTimer: """ A timer which computes the epoch time. """ def __init__(self) -> None: self.timer = Timer() self.timer.reset() self.epoch_times = [] def reset(self) -> None: """ Reset the epoch timer. """ self.timer.reset() self.epoch_times = [] def epoch_tic(self): """ Start to record time. """ self.timer.reset() def epoch_toc(self): """ Stop to record time. """ self.timer.pause() self.epoch_times.append(self.timer.seconds()) def last_epoch_time(self): """ Get the time for the last epoch. """ assert len(self.epoch_times) > 0, "No epoch time has been recorded!" return self.epoch_times[-1] def avg_epoch_time(self): """ Calculate the average epoch time among the recorded epochs. """ assert len(self.epoch_times) > 0, "No epoch time has been recorded!" return np.mean(self.epoch_times) def median_epoch_time(self): """ Calculate the median epoch time among the recorded epochs. """ assert len(self.epoch_times) > 0, "No epoch time has been recorded!" return np.median(self.epoch_times)
def benchmark_data(args): cfg = setup(args) dataloader = build_detection_train_loader(cfg) itr = iter(dataloader) for _ in range(10): # warmup next(itr) timer = Timer() max_iter = 1000 for _ in tqdm.trange(max_iter): next(itr) logger.info("{} iters ({} images) in {} seconds.".format( max_iter, max_iter * cfg.SOLVER.IMS_PER_BATCH, timer.seconds()))
def load_cub_json(ann_files, image_root, dataset_name=None): images_txt, classes_txt, image_class_txt, train_test_split_txt = ann_files split = 0 if 'test' in dataset_name: split = 1 images_txt = open(images_txt, 'r').readlines() image_class_txt = open(image_class_txt, 'r').readlines() classes_txt = open(classes_txt, 'r').readlines() train_test_split_txt = open(train_test_split_txt, 'r').readlines() imgs = [] anns = [] classes = {} for i in range(len(train_test_split_txt)): image_id, curr_split = train_test_split_txt[i].split() if int(curr_split) == split: _, image_path = images_txt[i].split() _, class_id = image_class_txt[i].split() _, class_name = classes_txt[int(class_id) - 1].split() curr_image = {"id": int(image_id), "file_name": image_path} curr_ann = {"id": i, "category_id":int(class_id) - 1, "image_id":int(image_id)} imgs.append(curr_image) anns.append(curr_ann) classes[int(class_id) - 1] = class_name imgs_anns = list(zip(imgs, anns)) logger.info("Loaded {} images in CUB format from CUB-200".format(len(imgs_anns))) dataset_dicts = [] ann_keys = ["category_id"] logger.info("Convert CUB format into herbarium format") timer = Timer() meta = MetadataCatalog.get(dataset_name) dataset_dicts = [process_per_record(anns, image_root, ann_keys, meta) for anns in imgs_anns] logger.info("Processing Record takes {:.2f} seconds.".format(timer.seconds())) return dataset_dicts
def benchmark_eval(args): cfg = setup(args) if args.config_file.endswith(".yaml"): model = build_model(cfg) DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) cfg.defrost() cfg.DATALOADER.NUM_WORKERS = 0 data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) else: model = instantiate(cfg.model) model.to(cfg.train.device) DetectionCheckpointer(model).load(cfg.train.init_checkpoint) cfg.dataloader.num_workers = 0 data_loader = instantiate(cfg.dataloader.test) model.eval() logger.info("Model:\n{}".format(model)) dummy_data = DatasetFromList(list(itertools.islice(data_loader, 100)), copy=False) def f(): while True: yield from dummy_data for k in range(5): # warmup model(dummy_data[k]) max_iter = 300 timer = Timer() with tqdm.tqdm(total=max_iter) as pbar: for idx, d in enumerate(f()): if idx == max_iter: break model(d) pbar.update() logger.info("{} iters in {} seconds.".format(max_iter, timer.seconds()))
def load_hico_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): """ Load a json file with HOI's instances annotation. Args: json_file (str): full path to the json file in HOI instances annotation format. image_root (str or path-like): the directory where the images in this json file exists. dataset_name (str): the name of the dataset (e.g., `hico-det_train`). If provided, this function will also put "thing_classes" into the metadata associated with this dataset. extra_annotation_keys (list[str]): list of per-annotation keys that should also be loaded into the dataset dict (besides "iscrowd", "bbox", "category_id"). The values for these keys will be returned as-is. For example, the densepose annotations are loaded in this way. Returns: list[dict]: a list of dicts in Detectron2 standard dataset dicts format. (See `Using Custom Datasets </tutorials/datasets.html>`_ ) Notes: 1. This function does not read the image files. The results do not have the "image" field. """ from pycocotools.coco import COCO timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) id_map = None if dataset_name is not None: print(dataset_name) meta = MetadataCatalog.get(dataset_name) cat_ids = sorted(coco_api.getCatIds()) cats = coco_api.loadCats(cat_ids) # The categories in a custom json file may not be sorted. thing_classes = [ c["name"] for c in sorted(cats, key=lambda x: x["id"]) ] # meta.thing_classes = thing_classes # In COCO, certain category ids are artificially removed, # and by convention they are always ignored. # We deal with COCO's id issue and translate # the category ids to contiguous ids in [0, 80). # It works by looking at the "categories" field in the json, therefore # if users' own json also have incontiguous ids, we'll # apply this mapping as well but print a warning. if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)): if "coco" not in dataset_name: logger.warning(""" Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. """) id_map = {v: i for i, v in enumerate(cat_ids)} # meta.thing_dataset_id_to_contiguous_id = id_map # Get metadata "person_cls_id" and "action classes" person_cls_id = meta.person_cls_id action_classes = meta.action_classes # sort indices for reproducible results img_ids = sorted(coco_api.imgs.keys()) # imgs is a list of dicts, each looks something like: # {'license': 4, # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', # 'file_name': 'COCO_val2014_000000001268.jpg', # 'height': 427, # 'width': 640, # 'date_captured': '2013-11-17 05:57:24', # 'id': 1268} imgs = coco_api.loadImgs(img_ids) # anns is a list[list[dict]], where each dict is an annotation # record for an object. The inner list enumerates the objects in an image # and the outer list enumerates over images. Example of anns[0]: # [{'segmentation': [[192.81, # 247.09, # ... # 219.03, # 249.06]], # 'area': 1035.749, # 'iscrowd': 0, # 'image_id': 1268, # 'bbox': [192.81, 224.8, 74.73, 33.43], # 'category_id': 16, # 'id': 42986, # 'isactive': 1, # 'isknown': 1, # 'hoi_triplets': [{person_id: 42984, object_id: 42986, action_id: 4}, ...], # }, # ...] anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] imgs_anns = list(zip(imgs, anns)) logger.info("Loaded {} images in HOI format from {}".format( len(imgs_anns), json_file)) dataset_dicts = [] ann_keys = ["iscrowd", "bbox", "category_id"] ann_keys += (extra_annotation_keys or []) num_instances_without_hoi_annotations = 0 for (img_dict, anno_dict_list) in imgs_anns: record = {} record["file_name"] = os.path.join(image_root, img_dict["file_name"]) record["height"] = img_dict["height"] record["width"] = img_dict["width"] image_id = record["image_id"] = img_dict["id"] objs = [] num_instances = len(anno_dict_list) for anno in anno_dict_list: # Check that the image_id in this annotation is the same as # the image_id we're looking at. # This fails only when the data parsing logic or the annotation file is buggy. # The original COCO valminusminival2014 & minival2014 annotation files # actually contains bugs that, together with certain ways of using COCO API, # can trigger this assertion. assert anno["image_id"] == image_id obj = {key: anno[key] for key in ann_keys if key in anno} # "hoi_triplets" in the annotation is a list[dict], where each dict is an # annotation record for an interaction. Example of anno["hoi_triplet"][0]: # [{ # person_id: 42984, # object_id: 42986, # action_id: 4 # }, # ... ] # Here "person_id" ("object_id") is the *anno id* of the person (object) instance. # For each instance, we record its interactions with other instances in the given # image in an binary matrix named `actions` with shape (N, K), where N is the number # of instances and K is the number of actions. If this instance is interacting with # j-th instance with k-th action, then (i, j) entry of `actions` will be 1. actions = np.zeros((num_instances, len(action_classes))) hoi_triplets = anno["hoi_triplets"] if len(hoi_triplets) > 0: # Mapping *anno id* of instances to contiguous indices in this image map_to_contiguous_id_within_image(hoi_triplets, anno_dict_list) for triplet in hoi_triplets: action_id = triplet["action_id"] is_person = (anno["category_id"] == person_cls_id) target_id = triplet["object_id"] if is_person else triplet[ "person_id"] actions[target_id, action_id] = 1 else: num_instances_without_hoi_annotations += 1 obj["actions"] = actions obj["isactive"] = 1 if len(hoi_triplets) > 0 else 0 obj["bbox_mode"] = BoxMode.XYWH_ABS if id_map: obj["category_id"] = id_map[obj["category_id"]] objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) if num_instances_without_hoi_annotations > 0: logger.warning("There are {} instances without hoi annotation.".format( num_instances_without_hoi_annotations)) return dataset_dicts
class TrainMeter(object): """ Measure training stats. """ def __init__(self, epoch_iters, cfg): """ Args: epoch_iters (int): the overall number of iterations of one epoch. cfg (CfgNode): configs. """ self._cfg = cfg self.epoch_iters = epoch_iters self.MAX_EPOCH = cfg.SOLVER.MAX_EPOCH * epoch_iters self.iter_timer = Timer() self.data_timer = Timer() self.net_timer = Timer() self.loss = ScalarMeter(cfg.LOG_PERIOD) self.loss_total = 0.0 self.lr = None # Current minibatch errors (smoothed over a window). self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD) self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD) # Number of misclassified examples. self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_samples = 0 self.output_dir = cfg.OUTPUT_DIR def reset(self): """ Reset the Meter. """ self.loss.reset() self.loss_total = 0.0 self.lr = None self.mb_top1_err.reset() self.mb_top5_err.reset() self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_samples = 0 def iter_tic(self): """ Start to record time. """ self.iter_timer.reset() self.data_timer.reset() def iter_toc(self): """ Stop to record time. """ self.iter_timer.pause() self.net_timer.pause() def data_toc(self): self.data_timer.pause() self.net_timer.reset() def update_stats(self, top1_err, top5_err, loss, lr, mb_size): """ Update the current stats. Args: top1_err (float): top1 error rate. top5_err (float): top5 error rate. loss (float): loss value. lr (float): learning rate. mb_size (int): mini batch size. """ self.loss.add_value(loss) self.lr = lr self.loss_total += loss * mb_size self.num_samples += mb_size if not self._cfg.DATA.MULTI_LABEL: # Current minibatch stats self.mb_top1_err.add_value(top1_err) self.mb_top5_err.add_value(top5_err) # Aggregate stats self.num_top1_mis += top1_err * mb_size self.num_top5_mis += top5_err * mb_size def log_iter_stats(self, cur_epoch, cur_iter): """ log the stats of the current iteration. Args: cur_epoch (int): the number of current epoch. cur_iter (int): the number of current iteration. """ if (cur_iter + 1) % self._cfg.LOG_PERIOD != 0: return eta_sec = self.iter_timer.seconds() * ( self.MAX_EPOCH - (cur_epoch * self.epoch_iters + cur_iter + 1)) eta = str(datetime.timedelta(seconds=int(eta_sec))) stats = { "_type": "train_iter", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "iter": "{}/{}".format(cur_iter + 1, self.epoch_iters), "dt": self.iter_timer.seconds(), "dt_data": self.data_timer.seconds(), "dt_net": self.net_timer.seconds(), "eta": eta, "loss": self.loss.get_win_median(), "lr": self.lr, "gpu_mem": "{:.2f}G".format(misc.gpu_mem_usage()), } if not self._cfg.DATA.MULTI_LABEL: stats["top1_err"] = self.mb_top1_err.get_win_median() stats["top5_err"] = self.mb_top5_err.get_win_median() logging.log_json_stats(stats) def log_epoch_stats(self, cur_epoch): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ eta_sec = self.iter_timer.seconds() * ( self.MAX_EPOCH - (cur_epoch + 1) * self.epoch_iters) eta = str(datetime.timedelta(seconds=int(eta_sec))) stats = { "_type": "train_epoch", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "dt": self.iter_timer.seconds(), "dt_data": self.data_timer.seconds(), "dt_net": self.net_timer.seconds(), "eta": eta, "lr": self.lr, "gpu_mem": "{:.2f}G".format(misc.gpu_mem_usage()), "RAM": "{:.2f}/{:.2f}G".format(*misc.cpu_mem_usage()), } if not self._cfg.DATA.MULTI_LABEL: top1_err = self.num_top1_mis / self.num_samples top5_err = self.num_top5_mis / self.num_samples avg_loss = self.loss_total / self.num_samples stats["top1_err"] = top1_err stats["top5_err"] = top5_err stats["loss"] = avg_loss logging.log_json_stats(stats)
class ValMeter(object): """ Measures validation stats. """ def __init__(self, max_iter, cfg): """ Args: max_iter (int): the max number of iteration of the current epoch. cfg (CfgNode): configs. """ self._cfg = cfg self.max_iter = max_iter self.iter_timer = Timer() self.data_timer = Timer() self.net_timer = Timer() # Current minibatch errors (smoothed over a window). self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD) self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD) # Min errors (over the full val set). self.min_top1_err = 100.0 self.min_top5_err = 100.0 # Number of misclassified examples. self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_samples = 0 self.all_preds = [] self.all_labels = [] self.output_dir = cfg.OUTPUT_DIR def reset(self): """ Reset the Meter. """ self.iter_timer.reset() self.mb_top1_err.reset() self.mb_top5_err.reset() self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_samples = 0 self.all_preds = [] self.all_labels = [] def iter_tic(self): """ Start to record time. """ self.iter_timer.reset() self.data_timer.reset() def iter_toc(self): """ Stop to record time. """ self.iter_timer.pause() self.net_timer.pause() def data_toc(self): self.data_timer.pause() self.net_timer.reset() def update_stats(self, top1_err, top5_err, mb_size): """ Update the current stats. Args: top1_err (float): top1 error rate. top5_err (float): top5 error rate. mb_size (int): mini batch size. """ self.mb_top1_err.add_value(top1_err) self.mb_top5_err.add_value(top5_err) self.num_top1_mis += top1_err * mb_size self.num_top5_mis += top5_err * mb_size self.num_samples += mb_size def update_predictions(self, preds, labels): """ Update predictions and labels. Args: preds (tensor): model output predictions. labels (tensor): labels. """ # TODO: merge update_prediction with update_stats. self.all_preds.append(preds) self.all_labels.append(labels) def log_iter_stats(self, cur_epoch, cur_iter): """ log the stats of the current iteration. Args: cur_epoch (int): the number of current epoch. cur_iter (int): the number of current iteration. """ if (cur_iter + 1) % self._cfg.LOG_PERIOD != 0: return eta_sec = self.iter_timer.seconds() * (self.max_iter - cur_iter - 1) eta = str(datetime.timedelta(seconds=int(eta_sec))) stats = { "_type": "val_iter", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "iter": "{}/{}".format(cur_iter + 1, self.max_iter), "time_diff": self.iter_timer.seconds(), "eta": eta, "gpu_mem": "{:.2f}G".format(misc.gpu_mem_usage()), } if not self._cfg.DATA.MULTI_LABEL: stats["top1_err"] = self.mb_top1_err.get_win_median() stats["top5_err"] = self.mb_top5_err.get_win_median() logging.log_json_stats(stats) def log_epoch_stats(self, cur_epoch): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ stats = { "_type": "val_epoch", "epoch": "{}/{}".format(cur_epoch + 1, self._cfg.SOLVER.MAX_EPOCH), "time_diff": self.iter_timer.seconds(), "gpu_mem": "{:.2f}G".format(misc.gpu_mem_usage()), "RAM": "{:.2f}/{:.2f}G".format(*misc.cpu_mem_usage()), } if self._cfg.DATA.MULTI_LABEL: stats["map"] = get_map( torch.cat(self.all_preds).cpu().numpy(), torch.cat(self.all_labels).cpu().numpy(), ) else: top1_err = self.num_top1_mis / self.num_samples top5_err = self.num_top5_mis / self.num_samples self.min_top1_err = min(self.min_top1_err, top1_err) self.min_top5_err = min(self.min_top5_err, top5_err) stats["top1_err"] = top1_err stats["top5_err"] = top5_err stats["min_top1_err"] = self.min_top1_err stats["min_top5_err"] = self.min_top5_err logging.log_json_stats(stats)
class AVAMeter(object): """ Measure the AVA train, val, and test stats. """ def __init__(self, overall_iters, cfg, mode): """ overall_iters (int): the overall number of iterations of one epoch. cfg (CfgNode): configs. mode (str): `train`, `val`, or `test` mode. """ self.cfg = cfg self.lr = None self.loss = ScalarMeter(cfg.LOG_PERIOD) self.full_ava_test = cfg.AVA.FULL_TEST_ON_VAL self.mode = mode self.iter_timer = Timer() self.data_timer = Timer() self.net_timer = Timer() self.all_preds_train = [] self.all_ori_boxes_train = [] self.all_metadata_train = [] self.all_preds = [] self.all_ori_boxes = [] self.all_metadata = [] self.overall_iters = overall_iters self.categories, self.class_whitelist = read_labelmap( os.path.join(cfg.AVA.ANNOTATION_DIR, cfg.AVA.LABEL_MAP_FILE)) gt_filename = os.path.join(cfg.AVA.ANNOTATION_DIR, cfg.AVA.GROUNDTRUTH_FILE) self.full_groundtruth = read_csv(gt_filename, self.class_whitelist) self.mini_groundtruth = get_ava_mini_groundtruth(self.full_groundtruth) _, self.video_idx_to_name = ava_helper.load_image_lists( cfg, mode == "train") self.output_dir = cfg.OUTPUT_DIR def log_iter_stats(self, cur_epoch, cur_iter): """ Log the stats. Args: cur_epoch (int): the current epoch. cur_iter (int): the current iteration. """ if (cur_iter + 1) % self.cfg.LOG_PERIOD != 0: return eta_sec = self.iter_timer.seconds() * (self.overall_iters - cur_iter) eta = str(datetime.timedelta(seconds=int(eta_sec))) if self.mode == "train": stats = { "_type": "{}_iter".format(self.mode), "cur_epoch": "{}".format(cur_epoch + 1), "cur_iter": "{}".format(cur_iter + 1), "eta": eta, "dt": self.iter_timer.seconds(), "dt_data": self.data_timer.seconds(), "dt_net": self.net_timer.seconds(), "mode": self.mode, "loss": self.loss.get_win_median(), "lr": self.lr, } elif self.mode == "val": stats = { "_type": "{}_iter".format(self.mode), "cur_epoch": "{}".format(cur_epoch + 1), "cur_iter": "{}".format(cur_iter + 1), "eta": eta, "dt": self.iter_timer.seconds(), "dt_data": self.data_timer.seconds(), "dt_net": self.net_timer.seconds(), "mode": self.mode, } elif self.mode == "test": stats = { "_type": "{}_iter".format(self.mode), "cur_iter": "{}".format(cur_iter + 1), "eta": eta, "dt": self.iter_timer.seconds(), "dt_data": self.data_timer.seconds(), "dt_net": self.net_timer.seconds(), "mode": self.mode, } else: raise NotImplementedError("Unknown mode: {}".format(self.mode)) logging.log_json_stats(stats) def iter_tic(self): """ Start to record time. """ self.iter_timer.reset() self.data_timer.reset() def iter_toc(self): """ Stop to record time. """ self.iter_timer.pause() self.net_timer.pause() def data_toc(self): self.data_timer.pause() self.net_timer.reset() def reset(self): """ Reset the Meter. """ self.loss.reset() self.all_preds = [] self.all_ori_boxes = [] self.all_metadata = [] def update_stats(self, preds, ori_boxes, metadata, loss=None, lr=None): """ Update the current stats. Args: preds (tensor): prediction embedding. ori_boxes (tensor): original boxes (x1, y1, x2, y2). metadata (tensor): metadata of the AVA data. loss (float): loss value. lr (float): learning rate. """ if self.mode in ["val", "test"]: self.all_preds.append(preds) self.all_ori_boxes.append(ori_boxes) self.all_metadata.append(metadata) if self.mode in ["train"]: self.all_preds_train.append(preds) self.all_ori_boxes_train.append(ori_boxes) self.all_metadata_train.append(metadata) if loss is not None: self.loss.add_value(loss) if lr is not None: self.lr = lr def finalize_metrics(self, log=True): """ Calculate and log the final AVA metrics. """ all_preds = torch.cat(self.all_preds, dim=0) all_ori_boxes = torch.cat(self.all_ori_boxes, dim=0) all_metadata = torch.cat(self.all_metadata, dim=0) if self.mode == "test" or (self.full_ava_test and self.mode == "val"): groundtruth = self.full_groundtruth else: groundtruth = self.mini_groundtruth self.full_map = evaluate_ava( all_preds, all_ori_boxes, all_metadata.tolist(), self.class_whitelist, self.categories, groundtruth=groundtruth, video_idx_to_name=self.video_idx_to_name, ) if log: stats = {"mode": self.mode, "map": self.full_map} logging.log_json_stats(stats) def log_epoch_stats(self, cur_epoch): """ Log the stats of the current epoch. Args: cur_epoch (int): the number of current epoch. """ if self.mode in ["val", "test"]: self.finalize_metrics(log=False) stats = { "_type": "{}_epoch".format(self.mode), "cur_epoch": "{}".format(cur_epoch + 1), "mode": self.mode, "map": self.full_map, "gpu_mem": "{:.2f}G".format(misc.gpu_mem_usage()), "RAM": "{:.2f}/{:.2f}G".format(*misc.cpu_mem_usage()), } logging.log_json_stats(stats)
class TestMeter(object): """ Perform the multi-view ensemble for testing: each video with an unique index will be sampled with multiple clips, and the predictions of the clips will be aggregated to produce the final prediction for the video. The accuracy is calculated with the given ground truth labels. """ def __init__( self, num_videos, num_clips, num_cls, overall_iters, multi_label=False, ensemble_method="sum", ): """ Construct tensors to store the predictions and labels. Expect to get num_clips predictions from each video, and calculate the metrics on num_videos videos. Args: num_videos (int): number of videos to test. num_clips (int): number of clips sampled from each video for aggregating the final prediction for the video. num_cls (int): number of classes for each prediction. overall_iters (int): overall iterations for testing. multi_label (bool): if True, use map as the metric. ensemble_method (str): method to perform the ensemble, options include "sum", and "max". """ self.iter_timer = Timer() self.data_timer = Timer() self.net_timer = Timer() self.num_clips = num_clips self.overall_iters = overall_iters self.multi_label = multi_label self.ensemble_method = ensemble_method # Initialize tensors. self.video_preds = torch.zeros((num_videos, num_cls)) if multi_label: self.video_preds -= 1e10 self.video_labels = (torch.zeros( (num_videos, num_cls)) if multi_label else torch.zeros( (num_videos)).long()) self.clip_count = torch.zeros((num_videos)).long() self.topk_accs = [] self.stats = {} # Reset metric. self.reset() def reset(self): """ Reset the metric. """ self.clip_count.zero_() self.video_preds.zero_() if self.multi_label: self.video_preds -= 1e10 self.video_labels.zero_() def update_stats(self, preds, labels, clip_ids): """ Collect the predictions from the current batch and perform on-the-flight summation as ensemble. Args: preds (tensor): predictions from the current batch. Dimension is N x C where N is the batch size and C is the channel size (num_cls). labels (tensor): the corresponding labels of the current batch. Dimension is N. clip_ids (tensor): clip indexes of the current batch, dimension is N. """ for ind in range(preds.shape[0]): vid_id = int(clip_ids[ind]) // self.num_clips if self.video_labels[vid_id].sum() > 0: assert torch.equal( self.video_labels[vid_id].type(torch.FloatTensor), labels[ind].type(torch.FloatTensor), ) self.video_labels[vid_id] = labels[ind] if self.ensemble_method == "sum": self.video_preds[vid_id] += preds[ind] elif self.ensemble_method == "max": self.video_preds[vid_id] = torch.max(self.video_preds[vid_id], preds[ind]) else: raise NotImplementedError( "Ensemble Method {} is not supported".format( self.ensemble_method)) self.clip_count[vid_id] += 1 def log_iter_stats(self, cur_iter): """ Log the stats. Args: cur_iter (int): the current iteration of testing. """ eta_sec = self.iter_timer.seconds() * (self.overall_iters - cur_iter) eta = str(datetime.timedelta(seconds=int(eta_sec))) stats = { "split": "test_iter", "cur_iter": "{}".format(cur_iter + 1), "eta": eta, "time_diff": self.iter_timer.seconds(), } logging.log_json_stats(stats) def iter_tic(self): """ Start to record time. """ self.iter_timer.reset() self.data_timer.reset() def iter_toc(self): """ Stop to record time. """ self.iter_timer.pause() self.net_timer.pause() def data_toc(self): self.data_timer.pause() self.net_timer.reset() def finalize_metrics(self, ks=(1, 5)): """ Calculate and log the final ensembled metrics. ks (tuple): list of top-k values for topk_accuracies. For example, ks = (1, 5) correspods to top-1 and top-5 accuracy. """ if not all(self.clip_count == self.num_clips): logger.warning("clip count {} ~= num clips {}".format( ", ".join([ "{}: {}".format(i, k) for i, k in enumerate(self.clip_count.tolist()) ]), self.num_clips, )) self.stats = {"split": "test_final"} if self.multi_label: map = get_map(self.video_preds.cpu().numpy(), self.video_labels.cpu().numpy()) self.stats["map"] = map else: num_topks_correct = metrics.topks_correct(self.video_preds, self.video_labels, ks) topks = [(x / self.video_preds.size(0)) * 100.0 for x in num_topks_correct] assert len({len(ks), len(topks)}) == 1 for k, topk in zip(ks, topks): self.stats["top{}_acc".format(k)] = "{:.{prec}f}".format( topk, prec=2) logging.log_json_stats(self.stats)
class IterationTimer(HookBase): """ Track the time spent for each iteration (each run_step call in the trainer). Print a summary in the end of training. This hook uses the time between the call to its :meth:`before_step` and :meth:`after_step` methods. Under the convention that :meth:`before_step` of all hooks should only take negligible amount of time, the :class:`IterationTimer` hook should be placed at the beginning of the list of hooks to obtain accurate timing. """ def __init__(self, warmup_iter=3): """ Args: warmup_iter (int): the number of iterations at the beginning to exclude from timing. """ self._warmup_iter = warmup_iter self._step_timer = Timer() self._start_time = time.perf_counter() self._total_timer = Timer() def before_train(self): self._start_time = time.perf_counter() self._total_timer.reset() self._total_timer.pause() def after_train(self): logger = logging.getLogger(__name__) total_time = time.perf_counter() - self._start_time total_time_minus_hooks = self._total_timer.seconds() hook_time = total_time - total_time_minus_hooks num_iter = self.trainer.iter + 1 - self.trainer.start_iter - self._warmup_iter if num_iter > 0 and total_time_minus_hooks > 0: # Speed is meaningful only after warmup # NOTE this format is parsed by grep in some scripts logger.info( "Overall training speed: {} iterations in {} ({:.4f} s / it)".format( num_iter, str(datetime.timedelta(seconds=int(total_time_minus_hooks))), total_time_minus_hooks / num_iter, ) ) logger.info( "Total training time: {} ({} on hooks)".format( str(datetime.timedelta(seconds=int(total_time))), str(datetime.timedelta(seconds=int(hook_time))), ) ) def before_step(self): self._step_timer.reset() self._total_timer.resume() def after_step(self): # +1 because we're in after_step iter_done = self.trainer.iter - self.trainer.start_iter + 1 if iter_done >= self._warmup_iter: sec = self._step_timer.seconds() self.trainer.storage.put_scalars(time=sec) else: self._start_time = time.perf_counter() self._total_timer.reset() self._total_timer.pause()
def load_coco_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): """ Load a json file with COCO's instances annotation format. Currently supports instance detection, instance segmentation, and person keypoints annotations. Args: json_file (str): full path to the json file in COCO instances annotation format. image_root (str or path-like): the directory where the images in this json file exists. dataset_name (str or None): the name of the dataset (e.g., coco_2017_train). When provided, this function will also do the following: * Put "thing_classes" into the metadata associated with this dataset. * Map the category ids into a contiguous range (needed by standard dataset format), and add "thing_dataset_id_to_contiguous_id" to the metadata associated with this dataset. This option should usually be provided, unless users need to load the original json content and apply more processing manually. extra_annotation_keys (list[str]): list of per-annotation keys that should also be loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints", "category_id", "segmentation"). The values for these keys will be returned as-is. For example, the densepose annotations are loaded in this way. Returns: list[dict]: a list of dicts in Detectron2 standard dataset dicts format (See `Using Custom Datasets </tutorials/datasets.html>`_ ) when `dataset_name` is not None. If `dataset_name` is None, the returned `category_ids` may be incontiguous and may not conform to the Detectron2 standard format. Notes: 1. This function does not read the image files. The results do not have the "image" field. """ from pycocotools.coco import COCO timer = Timer() json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds())) id_map = None if dataset_name is not None: meta = MetadataCatalog.get(dataset_name) cat_ids = sorted(coco_api.getCatIds()) cats = coco_api.loadCats(cat_ids) # The categories in a custom json file may not be sorted. thing_classes = [c["name"] for c in sorted(cats, key=lambda x: x["id"])] meta.thing_classes = thing_classes # major_change # In COCO, certain category ids are artificially removed, # and by convention they are always ignored. # We deal with COCO's id issue and translate # the category ids to contiguous ids in [0, 80). # It works by looking at the "categories" field in the json, therefore # if users' own json also have incontiguous ids, we'll # apply this mapping as well but print a warning. if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)): if "coco" not in dataset_name: logger.warning( """ Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. """ ) id_map = {v: i for i, v in enumerate(cat_ids)} meta.thing_dataset_id_to_contiguous_id = id_map # sort indices for reproducible results img_ids = sorted(coco_api.imgs.keys()) # imgs is a list of dicts, each looks something like: # {'license': 4, # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', # 'file_name': 'COCO_val2014_000000001268.jpg', # 'height': 427, # 'width': 640, # 'date_captured': '2013-11-17 05:57:24', # 'id': 1268} imgs = coco_api.loadImgs(img_ids) # anns is a list[list[dict]], where each dict is an annotation # record for an object. The inner list enumerates the objects in an image # and the outer list enumerates over images. Example of anns[0]: # [{'segmentation': [[192.81, # 247.09, # ... # 219.03, # 249.06]], # 'area': 1035.749, # 'iscrowd': 0, # 'image_id': 1268, # 'bbox': [192.81, 224.8, 74.73, 33.43], # 'category_id': 16, # 'id': 42986}, # ...] anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] total_num_valid_anns = sum([len(x) for x in anns]) total_num_anns = len(coco_api.anns) if total_num_valid_anns < total_num_anns: logger.warning( f"{json_file} contains {total_num_anns} annotations, but only " f"{total_num_valid_anns} of them match to images in the file." ) if "minival" not in json_file: # The popular valminusminival & minival annotations for COCO2014 contain this bug. # However the ratio of buggy annotations there is tiny and does not affect accuracy. # Therefore we explicitly white-list them. ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image] assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format( json_file ) imgs_anns = list(zip(imgs, anns)) logger.info("Loaded {} images in COCO format from {}".format(len(imgs_anns), json_file)) dataset_dicts = [] ann_keys = ["iscrowd", "bbox", "keypoints", "category_id"] + (extra_annotation_keys or []) num_instances_without_valid_segmentation = 0 for (img_dict, anno_dict_list) in imgs_anns: record = {} record["file_name"] = os.path.join(image_root, img_dict["file_name"]) record["height"] = img_dict["height"] record["width"] = img_dict["width"] image_id = record["image_id"] = img_dict["id"] objs = [] for anno in anno_dict_list: # Check that the image_id in this annotation is the same as # the image_id we're looking at. # This fails only when the data parsing logic or the annotation file is buggy. # The original COCO valminusminival2014 & minival2014 annotation files # actually contains bugs that, together with certain ways of using COCO API, # can trigger this assertion. assert anno["image_id"] == image_id assert anno.get("ignore", 0) == 0, '"ignore" in COCO json file is not supported.' obj = {key: anno[key] for key in ann_keys if key in anno} segm = anno.get("segmentation", None) if segm: # either list[list[float]] or dict(RLE) if isinstance(segm, dict): if isinstance(segm["counts"], list): # convert to compressed RLE segm = mask_util.frPyObjects(segm, *segm["size"]) else: # filter out invalid polygons (< 3 points) segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6] if len(segm) == 0: num_instances_without_valid_segmentation += 1 continue # ignore this instance obj["segmentation"] = segm keypts = anno.get("keypoints", None) if keypts: # list[int] for idx, v in enumerate(keypts): if idx % 3 != 2: # COCO's segmentation coordinates are floating points in [0, H or W], # but keypoint coordinates are integers in [0, H-1 or W-1] # Therefore we assume the coordinates are "pixel indices" and # add 0.5 to convert to floating point coordinates. keypts[idx] = v + 0.5 obj["keypoints"] = keypts obj["bbox_mode"] = BoxMode.XYWH_ABS if id_map: annotation_category_id = obj["category_id"] try: obj["category_id"] = id_map[annotation_category_id] except KeyError as e: raise KeyError( f"Encountered category_id={annotation_category_id} " "but this id does not exist in 'categories' of the json file." ) from e objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) if num_instances_without_valid_segmentation > 0: logger.warning( "Filtered out {} instances without valid segmentation. ".format( num_instances_without_valid_segmentation ) + "There might be issues in your dataset generation process. Please " "check https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html carefully" ) return dataset_dicts
def load_coco_json(json_file, image_root, dataset_name=None, extra_annotation_keys=None): """ Load a json file with COCO's instances annotation format. Currently supports instance detection, instance segmentation, and person keypoints annotations. Args: json_file (str): full path to the json file in COCO instances annotation format. image_root (str or path-like): the directory where the images in this json file exists. dataset_name (str): the name of the dataset (e.g., coco_2017_train). If provided, this function will also put "thing_classes" into the metadata associated with this dataset. extra_annotation_keys (list[str]): list of per-annotation keys that should also be loaded into the dataset dict (besides "iscrowd", "bbox", "keypoints", "category_id", "segmentation"). The values for these keys will be returned as-is. For example, the densepose annotations are loaded in this way. Returns: list[dict]: a list of dicts in Detectron2 standard dataset dicts format. (See `Using Custom Datasets </tutorials/datasets.html>`_ ) Notes: 1. This function does not read the image files. The results do not have the "image" field. """ from pycocotools.coco import COCO # coco 的一个python api类 timer = Timer() # 计算时间 json_file = PathManager.get_local_path(json_file) with contextlib.redirect_stdout(io.StringIO()): coco_api = COCO(json_file) # 用anno_json 文件初始化 if timer.seconds() > 1: logger.info("Loading {} takes {:.2f} seconds.".format( json_file, timer.seconds())) id_map = None if dataset_name is not None: # 创建一个新的meta meta = MetadataCatalog.get(dataset_name) # getCatIds() 获取所有类别的id号 cat_ids = sorted(coco_api.getCatIds()) # loadCats() 根据id号,获取所有类别信息,每个类别信息是一个字典 # {"supercategory": "person", "id": 1, "name": "person"}, cats = coco_api.loadCats(cat_ids) # The categories in a custom json file may not be sorted. # 获取类别名称,按id 顺序 thing_classes = [ c["name"] for c in sorted(cats, key=lambda x: x["id"]) ] meta.thing_classes = thing_classes # 设置元数据类别 # In COCO, certain category ids are artificially removed, # and by convention they are always ignored. # We deal with COCO's id issue and translate # the category ids to contiguous ids in [0, 80). # It works by looking at the "categories" field in the json, therefore # if users' own json also have incontiguous ids, we'll # apply this mapping as well but print a warning. if not (min(cat_ids) == 1 and max(cat_ids) == len(cat_ids)): # 不连续 if "coco" not in dataset_name: logger.warning(""" Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you. """) id_map = {v: i for i, v in enumerate(cat_ids)} meta.thing_dataset_id_to_contiguous_id = id_map # 重新映射 # 这个在 data.datasets.builtin_meta._get_builtin_metadata 进行一次映射了吗 # sort indices for reproducible results # 可再生的,可复写的 img_ids = sorted(coco_api.imgs.keys()) # 获取所有图像的id # imgs is a list of dicts, each looks something like: # {'license': 4, # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg', # 'file_name': 'COCO_val2014_000000001268.jpg', # 'height': 427, # 'width': 640, # 'date_captured': '2013-11-17 05:57:24', # 'id': 1268} # 获取所有图片的信息,元素如上描述 imgs = coco_api.loadImgs(img_ids) # anns is a list[list[dict]], where each dict is an annotation # record for an object. The inner list enumerates the objects in an image # and the outer list enumerates over images. Example of anns[0]: # [{'segmentation': [[192.81, # 247.09, # ... # 219.03, # 249.06]], # 'area': 1035.749, # 'iscrowd': 0, # 'image_id': 1268, # 'bbox': [192.81, 224.8, 74.73, 33.43], # 'category_id': 16, # 'id': 42986}, # ...] # 内部的list[dict] 是一张图片的所有标注 # imgToAnns 返回的就是一个img_id anns = [coco_api.imgToAnns[img_id] for img_id in img_ids] # 有效标注数 total_num_valid_anns = sum([len(x) for x in anns]) # 实际标注数 total_num_anns = len(coco_api.anns) if total_num_valid_anns < total_num_anns: logger.warning( f"{json_file} contains {total_num_anns} annotations, but only " f"{total_num_valid_anns} of them match to images in the file.") if "minival" not in json_file: # The popular valminusminival & minival annotations for COCO2014 contain this bug. # However the ratio of buggy annotations there is tiny and does not affect accuracy. # Therefore we explicitly white-list them. # [先执行第一句后执行第二句] ann_ids = [ ann["id"] for anns_per_image in anns for ann in anns_per_image ] # 可能存在annotation id 不唯一 assert len(set(ann_ids)) == len( ann_ids), "Annotation ids in '{}' are not unique!".format( json_file) # pair(img, anno) imgs_anns = list(zip(imgs, anns)) logger.info("Loaded {} images in COCO format from {}".format( len(imgs_anns), json_file)) dataset_dicts = [] # iscrowd =0 ,表示只有单个对象,iscrowd = 1, 表示有多个对象 # 在segmentation 方面很有用, """ segmentation格式取决于这个实例是一个单个的对象 (即iscrowd=0,将使用polygons格式)还是一组对象(即iscrowd=1,将使用RLE格式) 注意,单个的对象(iscrowd=0)可能需要多个polygon来表示,比如这个对象在图像中被挡住了。 而iscrowd=1时(将标注一组对象,比如一群人)的segmentation使用的就是RLE格式 """ ann_keys = ["iscrowd", "bbox", "keypoints", "category_id" ] + (extra_annotation_keys or []) num_instances_without_valid_segmentation = 0 for (img_dict, anno_dict_list) in imgs_anns: # 用一个记录集成一个图片的信息 record = {} # change to full path record["file_name"] = os.path.join(image_root, img_dict["file_name"]) record["height"] = img_dict["height"] record["width"] = img_dict["width"] image_id = record["image_id"] = img_dict["id"] objs = [] # object: 物体,目标 # anno_dict_list: 一张图片中所有的标注 for anno in anno_dict_list: # Check that the image_id in this annotation is the same as # the image_id we're looking at. # This fails only when the data parsing logic or the annotation file is buggy. # The original COCO valminusminival2014 & minival2014 annotation files # actually contains bugs that, together with certain ways of using COCO API, # can trigger this assertion. assert anno["image_id"] == image_id assert anno.get( "ignore", 0) == 0, '"ignore" in COCO json file is not supported.' # 根据ann_keys:["iscrowd", "bbox", "keypoints", "category_id"] 获取需要的信息 obj = {key: anno[key] for key in ann_keys if key in anno} segm = anno.get("segmentation", None) # 如果有seg信息 if segm: # either list[list[float]] or dict(RLE) if isinstance(segm, dict): if isinstance(segm["counts"], list): # convert to compressed RLE segm = mask_util.frPyObjects(segm, *segm["size"]) else: # filter out invalid polygons (< 3 points) segm = [ poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6 ] if len(segm) == 0: num_instances_without_valid_segmentation += 1 continue # ignore this instance obj["segmentation"] = segm keypts = anno.get("keypoints", None) if keypts: # list[int] for idx, v in enumerate(keypts): if idx % 3 != 2: # COCO's segmentation coordinates are floating points in [0, H or W], # but keypoint coordinates are integers in [0, H-1 or W-1] # Therefore we assume the coordinates are "pixel indices" and # add 0.5 to convert to floating point coordinates. keypts[idx] = v + 0.5 obj["keypoints"] = keypts obj["bbox_mode"] = BoxMode.XYWH_ABS # 1 # 重新映射id if id_map: obj["category_id"] = id_map[obj["category_id"]] objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) if num_instances_without_valid_segmentation > 0: logger.warning( "Filtered out {} instances without valid segmentation. ".format( num_instances_without_valid_segmentation) + "There might be issues in your dataset generation process. " "A valid polygon should be a list[float] with even length >= 6." ) # polygon 多边形 return dataset_dicts