def finalize_use_feature(self): pair_df_per_gpu = self.pair_df all_pair_df = all_gather(pair_df_per_gpu) if not is_main_process(): return pd.concat(all_pair_df).to_pickle(self.output_folder + "/all_pair_df.pickle")
def test_model(cfg, model, distributed, iters_per_epoch, dllogger, args): if distributed: model = model.module torch.cuda.empty_cache() # TODO check if it helps iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) results = [] for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): result = inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, dllogger=dllogger, args=args) synchronize() results.append(result) if is_main_process(): map_results, raw_results = results[0] bbox_map = map_results.results["bbox"]['AP'] if cfg.MODEL.MASK_ON: segm_map = map_results.results["segm"]['AP'] else: segm_map = 0.0 dllogger.log(step=( cfg.SOLVER.MAX_ITER, cfg.SOLVER.MAX_ITER / iters_per_epoch, ), data={ "BBOX_mAP": bbox_map, "MASK_mAP": segm_map }) dllogger.log(step=tuple(), data={ "BBOX_mAP": bbox_map, "MASK_mAP": segm_map }) args.writer.add_scalar('BBOX_mAP', bbox_map, cfg.SOLVER.MAX_ITER / iters_per_epoch + 1) args.writer.add_scalar('MASK_mAP', segm_map, cfg.SOLVER.MAX_ITER / iters_per_epoch + 1)
def __init__(self, ann_file, root, remove_images_without_annotations, transforms=None): # ann_f = ann_file ann_f = ann_file + ('0' if is_main_process() else '') super(COCODataset, self).__init__(root, ann_f) synchronize() # sort indices for reproducible results self.root_json = os.path.dirname(root) self.ids = sorted(self.ids) # filter images without detection annotations # if remove_images_without_annotations: # self.ids = [ # img_id # for img_id in self.ids # if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 # ] self.json_category_id_to_contiguous_id = { v: i + 1 for i, v in enumerate(self.coco.getCatIds()) } self.contiguous_category_id_to_json_id = { v: k for k, v in self.json_category_id_to_contiguous_id.items() } self.id_to_img_map = {k: v for k, v in enumerate(self.ids)} self.transforms = transforms
def preprocess_coco(self, ann_file, split): if split == 5 and not self.is_train: return ann_file ann_file_new = ann_file + "_" + str(split) if not os.path.isfile(ann_file_new) and is_main_process(): with open(ann_file) as f_in: anns = json.load(f_in) if split == 5: #voc non-voc voc_inds = (0, 1, 2, 3, 4, 5, 6, 8, 14, 15, 16, 17, 18, 19, 39, 56, 57, 58, 60, 62) split_cat = [ a["id"] for i, a in enumerate(anns["categories"]) if not i in voc_inds ] else: if not self.is_train: split_cat = [ a["id"] for i, a in enumerate(anns["categories"]) if i % 4 == (split - 1) ] else: split_cat = [ a["id"] for i, a in enumerate(anns["categories"]) if not i % 4 == (split - 1) ] anns["annotations"] = [ v for v in anns['annotations'] if v["category_id"] in split_cat ] with open(ann_file_new, "w") as f_out: json.dump(anns, f_out) return ann_file_new
def mlperf_test_early_exit(iteration, iters_per_epoch, tester, model, distributed, min_bbox_map, min_segm_map, args): if iteration > 0 and iteration % iters_per_epoch == 0: epoch = iteration // iters_per_epoch dllogger.log(step="PARAMETER", data={"eval_start": True}) bbox_map, segm_map = test_and_exchange_map(tester, model, distributed, args) # necessary for correctness, this is for resuming the training model.train() dllogger.log(step=( iteration, epoch, ), data={ "BBOX_mAP": bbox_map, "MASK_mAP": segm_map }) if is_main_process(): args.writer.add_scalar('BBOX_mAP', bbox_map, epoch) args.writer.add_scalar('MASK_mAP', segm_map, epoch) # terminating condition if bbox_map >= min_bbox_map and segm_map >= min_segm_map: dllogger.log(step="PARAMETER", data={"target_accuracy_reached": True}) return False #True #let's continue the training return False
def inference( model, data_loader, postprocessor, semi_loss, dataset_name, iou_types=("bbox",), box_only=False, device="cuda", expected_results=(), expected_results_sigma_tol=4, output_folder=None, anchor_strides=None ): # convert to a torch.device for efficiency device = torch.device(device) num_devices = get_world_size() logger = logging.getLogger("maskrcnn_benchmark.inference") dataset = data_loader.dataset logger.info("Start evaluation on {} dataset({} images).".format(dataset_name, len(dataset))) total_timer = Timer() inference_timer = Timer() total_timer.tic() predictions = compute_on_dataset(model, data_loader,postprocessor,semi_loss,anchor_strides, device, inference_timer) # wait for all processes to complete before measuring the time synchronize() total_time = total_timer.toc() total_time_str = get_time_str(total_time) logger.info( "Total run time: {} ({} s / img per device, on {} devices)".format( total_time_str, total_time * num_devices / len(dataset), num_devices ) ) total_infer_time = get_time_str(inference_timer.total_time) logger.info( "Model inference time: {} ({} s / img per device, on {} devices)".format( total_infer_time, inference_timer.total_time * num_devices / len(dataset), num_devices, ) ) predictions = _accumulate_predictions_from_multiple_gpus(predictions) if not is_main_process(): return if output_folder: torch.save(predictions, os.path.join(output_folder, "predictions.pth")) extra_args = dict( box_only=box_only, iou_types=iou_types, expected_results=expected_results, expected_results_sigma_tol=expected_results_sigma_tol, ) return evaluate(dataset=dataset, predictions=predictions, output_folder=output_folder, **extra_args)
def run_test(cfg, model, distributed): if distributed: model = model.module torch.cuda.empty_cache() # TODO check if it helps iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints", ) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder # evaluate object detection data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): result_obj = inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, eval_attributes=False, ) synchronize() # evaluate attribute detection data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): result_attr = inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, eval_attributes=True, ) synchronize() if is_main_process(): results = {**result_obj, **result_attr} print(results)
def _get_tensorboard_writer(log_dir, window_size=20): if is_main_process() and log_dir is not None: timestamp = datetime.fromtimestamp( time.time()).strftime('%Y%m%d-%H:%M') return TensorboardXWriter(os.path.join(log_dir, timestamp), window_size) else: return None
def _accumulate_predictions_from_multiple_gpus(predictions_per_gpu): all_predictions = all_gather(predictions_per_gpu) if not is_main_process(): return # merge the list of dicts predictions = {} for p in all_predictions: predictions.update(p) return predictions
def cache_url(url, model_dir=None, progress=True): r"""Loads the Torch serialized object at the given URL. If the object is already present in `model_dir`, it's deserialized and returned. The filename part of the URL should follow the naming convention ``filename-<sha256>.ext`` where ``<sha256>`` is the first eight or more digits of the SHA256 hash of the contents of the file. The hash is used to ensure unique names and to verify the contents of the file. The default value of `model_dir` is ``$TORCH_HOME/models`` where ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be overridden with the ``$TORCH_MODEL_ZOO`` environment variable. Args: url (string): URL of the object to download model_dir (string, optional): directory in which to save the object progress (bool, optional): whether or not to display a progress bar to stderr Example: >>> cached_file = maskrcnn_benchmark.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth') """ if model_dir is None: torch_home = os.path.expanduser(os.getenv('TORCH_HOME', '~/.torch')) model_dir = os.getenv('TORCH_MODEL_ZOO', os.path.join(torch_home, 'models')) if not os.path.exists(model_dir) and is_main_process(): os.makedirs(model_dir) parts = urlparse(url) filename = os.path.basename(parts.path) if filename == "model_final.pkl": # workaround as pre-trained Caffe2 models from Detectron have all the same filename # so make the full path the filename by replacing / with _ filename = parts.path.replace("/", "_") cached_file = os.path.join(model_dir, filename) if not os.path.exists(cached_file) and is_main_process(): sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) hash_prefix = HASH_REGEX.search(filename) if hash_prefix is not None: hash_prefix = hash_prefix.group(1) # workaround: Caffe2 models don't have a hash, but follow the R-50 convention, # which matches the hash PyTorch uses. So we skip the hash matching # if the hash_prefix is less than 6 characters if len(hash_prefix) < 6: hash_prefix = None _download_url_to_file(url, cached_file, hash_prefix, progress=progress) synchronize() return cached_file
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument('--output_dir', default='runs', help='where to write models and stats') parser.add_argument('--resume', help='filename of a model to resume training with') parser.add_argument("--local_rank", type=int, default=0) parser.add_argument("--batch_size", type=int, default=2) parser.add_argument("--save_every", type=int, default=500) parser.add_argument("--image_every", type=int, default=100) parser.add_argument("--print_every", type=int, default=20) parser.add_argument("--lr", type=float, default=0.001) parser.add_argument("--single-block", action='store_true', default=False) parser.add_argument("--fine-tune", "-ft", action='store_true', default=False, help="do not restore optimizer and scheduler state") args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() if is_main_process(): logging.basicConfig(level=logging.INFO) output_dir = experiment_dir(base_dir=args.output_dir) logger = setup_logger("fcos", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) else: output_dir = None train( output_dir, args.local_rank, args.distributed, resume=args.resume, base_lr=args.lr, batch_size=args.batch_size, single_block=args.single_block, fine_tune=args.fine_tune, save_every=args.save_every, image_every=args.image_every, print_every=args.print_every, )
def mlperf_test_early_exit(iteration, iters_per_epoch, tester, model, distributed, min_bbox_map, min_segm_map): # Note: let iters / epoch == 10k, at iter 9999 we've finished epoch 0 and need to test if iteration > 0 and (iteration + 1)% iters_per_epoch == 0: synchronize() epoch = iteration // iters_per_epoch + 1 log_end(key=constants.EPOCH_STOP, metadata={"epoch_num": epoch}) log_end(key=constants.BLOCK_STOP, metadata={"first_epoch_num": epoch}) log_start(key=constants.EVAL_START, metadata={"epoch_num":epoch}) # set the async evaluator's tag correctly set_epoch_tag(epoch) # Note: No longer returns anything, underlying future is in another castle tester(model=model, distributed=distributed) # necessary for correctness model.train() else: # Otherwise, check for finished async results results = check_completed_tags() # on master process, check each result for terminating condition # sentinel for run finishing finished = 0 if is_main_process(): for result_epoch, (bbox_map, segm_map) in results.items(): logger = logging.getLogger('maskrcnn_benchmark.trainer') logger.info('bbox mAP: {}, segm mAP: {}'.format(bbox_map, segm_map)) log_event(key=constants.EVAL_ACCURACY, value={"BBOX" : bbox_map, "SEGM" : segm_map}, metadata={"epoch_num" : result_epoch} ) log_end(key=constants.EVAL_STOP, metadata={"epoch_num": result_epoch}) # terminating condition if bbox_map >= min_bbox_map and segm_map >= min_segm_map: logger.info("Target mAP reached, exiting...") finished = 1 #return True # We now know on rank 0 whether or not we should terminate # Bcast this flag on multi-GPU if get_world_size() > 1: with torch.no_grad(): finish_tensor = torch.tensor([finished], dtype=torch.int32, device = torch.device('cuda')) torch.distributed.broadcast(finish_tensor, 0) # If notified, end. if finish_tensor.item() == 1: return True else: # Single GPU, don't need to create tensor to bcast, just use value directly if finished == 1: return True # Otherwise, default case, continue return False
def run_test(cfg, model, distributed, iteration_name): global best_val_map, is_best_val_map, cur_val_map, writer if distributed: model = model.module torch.cuda.empty_cache() # TODO check if it helps iou_types = ("bbox", ) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm", ) if cfg.MODEL.KEYPOINT_ON: iou_types = iou_types + ("keypoints", ) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name + '_' + iteration_name) mkdir(output_folder) output_folders[idx] = output_folder data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) for output_folder, dataset_name, data_loader_val in zip( output_folders, dataset_names, data_loaders_val): results = inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, ) if not is_main_process(): synchronize() return if iteration_name != 'final': for k, v in results.results.items(): for ki, vi in v.items(): if ki == 'AP': cur_val_map = vi if vi > best_val_map: best_val_map = vi is_best_val_map = True else: is_best_val_map = False writer.add_scalar(dataset_name + '_' + k + '_' + ki, vi, int(iteration_name)) # print(dataset_name + '_' + k + '_' + ki, vi) synchronize()
def _get_tensorboard_writer(log_dir): try: from tensorboardX import SummaryWriter except ImportError: raise ImportError('To use tensorboard please install tensorboardX ' '[ pip install tensorflow tensorboardX ].') if is_main_process(): timestamp = datetime.datetime.fromtimestamp( time.time()).strftime('%Y%m%d-%H:%M') tb_logger = SummaryWriter('{}-{}'.format(log_dir, timestamp)) return tb_logger else: return None
def inference( model, rngs, data_loader, iou_types=("bbox", ), box_only=False, device="cuda", expected_results=(), expected_results_sigma_tol=4, output_folder=None, ): # convert to a torch.device for efficiency device = torch.device(device) num_devices = (torch.distributed.get_world_size() if torch.distributed.is_initialized() else 1) dataset = data_loader.dataset predictions = compute_on_dataset(model, rngs, data_loader, device) # wait for all processes to complete before measuring the time synchronize() predictions = _accumulate_predictions_from_multiple_gpus(predictions) if not is_main_process(): return coco_results = {} if "bbox" in iou_types: coco_results["bbox"] = prepare_for_coco_detection(predictions, dataset) results = COCOResults(*iou_types) uuid1 = str(uuid.uuid1()) for iou_type in iou_types: with tempfile.NamedTemporaryFile() as f: file_path = f.name if output_folder: file_path = os.path.join(output_folder, uuid1 + iou_type + ".json") res = evaluate_predictions_on_coco(dataset.coco, coco_results[iou_type], file_path, iou_type) results.update(res) if os.path.isfile(file_path): os.remove(file_path) return results
def evaluator(cfg,args,model,device,iteration): meters_val = MetricLogger(delimiter=" ") data_loader_val = make_data_loader(cfg, is_train=False, is_distributed=False)[0] with torch.no_grad(): # Should be one image for each GPU: print('Calculating evaluation loss.') for iteration_val, batch in enumerate(data_loader_val): #if is_main_process(): # print(iteration_val) if args.debug and iteration_val>10: break images_val, targets_val, _ = batch skip_batch=False nbox=[] for t in targets_val: nbox.append(len(t)) if len(t)<1: skip_batch=True break if skip_batch: continue try: print(iteration_val,nbox) images_val = images_val.to(device) targets_val = [target.to(device) for target in targets_val] loss_dict = model(images_val, targets_val) losses = sum(loss for loss in loss_dict.values()) loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters_val.update(loss=losses_reduced, **loss_dict_reduced) except: print('Warning: ground truth error.') #synchronize() if is_main_process(): print('Save evaluation loss to tensorboard.') for name, meter in meters_val.meters.items(): print(name,meter.global_avg) args.writer.add_scalar('EvalMetrics/'+name, meter.global_avg, iteration / args.iters_per_epoch) print('Pass')
def check_completed_tags(): # Evaluator is only valid on master rank - all others will have nothing. # So, assemble lists of completed runs on master if is_main_process(): evaluator = get_evaluator() # loop over all all epoch, result pairs that have finished all_results = {} for t, r in evaluator.finished_tasks().items(): # Note: one indirection due to possibility of multiple test datasets # we only care about the first map_results = r# [0] bbox_map = map_results.results["bbox"]['AP'] segm_map = map_results.results["segm"]['AP'] all_results.update({ t : (bbox_map, segm_map) }) return all_results return {}
def cache_url(url, model_dir=None, progress=True): r"""Loads the Torch serialized object at the given URL. If the object is already present in `model_dir`, it's deserialized and returned. The filename part of the URL should follow the naming convention ``filename-<sha256>.ext`` where ``<sha256>`` is the first eight or more digits of the SHA256 hash of the contents of the file. The hash is used to ensure unique names and to verify the contents of the file. The default value of `model_dir` is ``$TORCH_HOME/models`` where ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be overridden with the ``$TORCH_MODEL_ZOO`` environment variable. Args: url (string): URL of the object to download model_dir (string, optional): directory in which to save the object progress (bool, optional): whether or not to display a progress bar to stderr Example: >>> cached_file = maskrcnn_benchmark.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth') """ if model_dir is None: torch_home = os.path.expanduser(os.getenv('TORCH_HOME', '~/.torch')) model_dir = os.getenv('TORCH_MODEL_ZOO', os.path.join(torch_home, 'models')) if not os.path.exists(model_dir): os.makedirs(model_dir) parts = urlparse(url) filename = os.path.basename(parts.path) if filename == "model_final.pkl": # workaround as pre-trained Caffe2 models from Detectron have all the same filename # so make the full path the filename by replacing / with _ filename = parts.path.replace("/", "_") cached_file = os.path.join(model_dir, filename) if not os.path.exists(cached_file) and is_main_process(): sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) hash_prefix = HASH_REGEX.search(filename) if hash_prefix is not None: hash_prefix = hash_prefix.group(1) # workaround: Caffe2 models don't have a hash, but follow the R-50 convention, # which matches the hash PyTorch uses. So we skip the hash matching # if the hash_prefix is less than 6 characters if len(hash_prefix) < 6: hash_prefix = None _download_url_to_file(url, cached_file, hash_prefix, progress=progress) synchronize() return cached_file
def inference( model, data_loader, dataset_name, device="cuda" ): # convert to a torch.device for efficiency device = torch.device(device) num_devices = get_world_size() logger = logging.getLogger("maskrcnn_benchmark.inference") dataset = data_loader.dataset logger.info("Start evaluation on {} dataset({} images).".format(dataset_name, len(dataset))) total_timer = Timer() inference_timer = Timer() total_timer.tic() predictions = compute_on_dataset(model, data_loader, device, inference_timer) # wait for all processes to complete before measuring the time synchronize() total_time = total_timer.toc() total_time_str = get_time_str(total_time) logger.info( "Total run time: {} ({} s / img per device, on {} devices)".format( total_time_str, total_time * num_devices / len(dataset), num_devices ) ) total_infer_time = get_time_str(inference_timer.total_time) logger.info( "Model inference time: {} ({} s / img per device, on {} devices)".format( total_infer_time, inference_timer.total_time * num_devices / len(dataset), num_devices, ) ) predictions = _accumulate_predictions_from_multiple_gpus(predictions) if not is_main_process(): return results = do_voc_evaluation(dataset=dataset, predictions=predictions, logger=logger) return results
def inference( model, data_loader, iou_types=("bbox",), box_only=False, device="cuda", expected_results=(), expected_results_sigma_tol=4, without_nms=False, output_folder=None, ): # convert to a torch.device for efficiency device = torch.device(device) num_devices = ( torch.distributed.deprecated.get_world_size() if torch.distributed.deprecated.is_initialized() else 1 ) logger = logging.getLogger("maskrcnn_benchmark.eval_IR") dataset = data_loader.dataset logger.info("Start evaluation on {} images".format(len(dataset))) start_time = time.time() with_overlaps, without_overlaps = compute_on_dataset(model, data_loader, device) # wait for all processes to complete before measuring the time synchronize() total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=total_time)) logger.info( "Total inference time: {} ({} s / img per device, on {} devices)".format( total_time_str, total_time * num_devices / len(dataset), num_devices ) ) with_overlaps = _accumulate_predictions_from_multiple_gpus(with_overlaps) without_overlaps = _accumulate_predictions_from_multiple_gpus(without_overlaps) if not is_main_process(): return logger.info("Evaluating IoU average Recall (IR)") results = evaluate_iou_average_recall(with_overlaps, without_overlaps, output_folder) logger.info(results)
def _accumulate_predictions_from_multiple_gpus(predictions_per_gpu): all_predictions = all_gather(predictions_per_gpu) if not is_main_process(): return # merge the list of dicts predictions = {} for p in all_predictions: predictions.update(p) # convert a dict where the key is the index in a list image_ids = list(sorted(predictions.keys())) if len(image_ids) != image_ids[-1] + 1: logger = logging.getLogger("maskrcnn_benchmark.inference") logger.warning( "Number of images that were gathered from multiple processes is not " "a contiguous set. Some images might be missing from the evaluation" ) # convert to a list predictions = [predictions[i] for i in image_ids] return predictions
def _accumulate_predictions_from_multiple_gpus(predictions_per_gpu, return_dict=False, only_gather=False): if _dict_to_list is None: return if get_world_size() == 1: return predictions_per_gpu all_predictions = all_gather(predictions_per_gpu) if only_gather: return all_predictions if not is_main_process(): return # merge the list of dicts predictions = {} for p in all_predictions: predictions.update(p) if return_dict: return predictions return _dict_to_list(predictions)
def test_and_exchange_map(tester, model, distributed): results = tester(model=model, distributed=distributed) # main process only if is_main_process(): # Note: one indirection due to possibility of multiple test datasets, we only care about the first # tester returns (parsed results, raw results). In our case, don't care about the latter map_results, raw_results = results[0] bbox_map = map_results.results["bbox"]['AP'] segm_map = map_results.results["segm"]['AP'] else: bbox_map = 0. segm_map = 0. if distributed: map_tensor = torch.tensor([bbox_map, segm_map], dtype=torch.float32, device=torch.device("cuda")) torch.distributed.broadcast(map_tensor, 0) bbox_map = map_tensor[0].item() segm_map = map_tensor[1].item() return bbox_map, segm_map
def preprocess_lvis(self, ann_file, extract_feature): suffix = "_freq" if self.is_train else "_common_rare" ann_file_new = ann_file + suffix if not os.path.isfile(ann_file_new) and is_main_process(): with open(ann_file) as f_in: anns = json.load(f_in) cids_lvis_f = [ a["id"] for a in anns["categories"] if a["frequency"] == "f" ] if self.is_train: anns["annotations"] = [ v for v in anns["annotations"] if v["category_id"] in cids_lvis_f ] else: anns["annotations"] = [ v for v in anns["annotations"] if v["category_id"] not in cids_lvis_f ] # classes not exhaustively annotated should not be support examples of an image if not extract_feature: img_ne = { d["id"]: d["not_exhaustive_category_ids"] for d in anns["images"] } anns["annotations"] = [ v for v in anns["annotations"] if v["category_id"] not in img_ne[v["image_id"]] ] # the file_name field in the lvis val dataset is broken replace_file_name = lambda x: (x["file_name"].split("_")[2]) if "_" in anns["images"][0]["file_name"]: for img in anns["images"]: img.update({"file_name": replace_file_name(img)}) with open(ann_file_new, "w") as f_out: json.dump(anns, f_out) return ann_file_new
def main(): run = Run.get_context() workspace = run.experiment.workspace # First thing to do is try to set up from environment configure_nccl_settings_from_env() parser = argparse.ArgumentParser(description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=os.getenv("LOCAL_RANK", 0)) parser.add_argument( "--max_steps", type=int, default=0, help="Override number of training steps in the config", ) parser.add_argument("--dataset", type=str, required=True) parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument("--fp16", help="Mixed precision training", action="store_true") parser.add_argument("--amp", help="Mixed precision training", action="store_true") parser.add_argument( "--skip_checkpoint", default=False, action="store_true", help="Whether to save checkpoints", ) parser.add_argument( "--json-summary", help="Out file for DLLogger", default="dllogger.out", type=str, ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() args.fp16 = args.fp16 or args.amp num_gpus = get_global_size() args.distributed = num_gpus > 1 args.local_rank = get_local_rank() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) # Redundant option - Override config parameter with command line input if args.max_steps > 0: cfg.SOLVER.MAX_ITER = args.max_steps if args.skip_checkpoint: cfg.SAVE_CHECKPOINT = False cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) if is_main_process(): dllogger.init( backends=[ dllogger.JSONStreamBackend( verbosity=dllogger.Verbosity.VERBOSE, filename=args.json_summary ), dllogger.StdOutBackend( verbosity=dllogger.Verbosity.VERBOSE, step_format=format_step ), ] ) else: dllogger.init(backends=[]) dllogger.log(step="PARAMETER", data={"gpu_count": num_gpus}) # dllogger.log(step="PARAMETER", data={"environment_info": collect_env_info()}) dllogger.log(step="PARAMETER", data={"config_file": args.config_file}) dllogger.log(step="PARAMETER", data={"config": cfg}) if args.fp16: fp16 = True else: fp16 = False if args.local_rank == 0: dllogger.log(step="WEIGHT DOWNLOAD", data={"complete": False}) download_weights(cfg.MODEL.WEIGHT, cfg.PATHS_CATALOG) dllogger.log(step="WEIGHT DOWNLOAD", data={"complete": True}) dllogger.log( step="DATASET MOUNT", data={"complete": False, "dataset": args.dataset} ) coco2017 = Dataset.get_by_name(workspace, args.dataset) cc2017mount = coco2017.mount("/data") cc2017mount.start() dllogger.log( step="DATASET MOUNT", data={"complete": True, "dataset": args.dataset} ) if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() model, iters_per_epoch = train( cfg, args.local_rank, args.distributed, fp16, dllogger )
def finalize_extract_feature(self): features_df_per_gpu = self.features_df all_features_df = all_gather(features_df_per_gpu) if not is_main_process(): return pd.concat(all_features_df).to_pickle(self.df_file)
def do_train( model, data_loader, optimizer, scheduler, checkpointer, device, checkpoint_period, arguments, tb_logger, cfg, ): print('111111111111111111111') logger = logging.getLogger("maskrcnn_benchmark.trainer") print('2222222222222222222222') logger.info("Start training") print('4444444444444444444444') meters = MetricLogger(delimiter=" ") max_iter = len(data_loader) start_iter = arguments["iteration"] model.train() start_training_time = time.time() end = time.time() kkk = 0 for iteration, (images, targets, _) in enumerate(data_loader, start_iter): data_time = time.time() - end arguments["iteration"] = iteration #print(kkk) kkk += 1 scheduler.step() images = images.to(device) targets = [target.to(device) for target in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) meters.update(loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() losses.backward() if cfg.SOLVER.USE_ADAM: torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) optimizer.step() batch_time = time.time() - end end = time.time() meters.update(time=batch_time, data=data_time) eta_seconds = meters.time.global_avg * (max_iter - iteration) eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) if iteration % 20 == 0 or iteration == (max_iter - 1): #print(kkk * 10000000) logger.info( meters.delimiter.join( [ "eta: {eta}", "iter: {iter}", "{meters}", "lr: {lr:.6f}", "max mem: {memory:.0f}", ] ).format( eta=eta_string, iter=iteration, meters=str(meters), lr=optimizer.param_groups[0]["lr"], memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0, ) ) if is_main_process(): for tag, value in loss_dict_reduced.items(): tb_logger.scalar_summary(tag, value.item(), iteration) if iteration % checkpoint_period == 0 and iteration > 0: checkpointer.save("model_{:07d}".format(iteration), **arguments) checkpointer.save("model_{:07d}".format(iteration), **arguments) total_training_time = time.time() - start_training_time total_time_str = str(datetime.timedelta(seconds=total_training_time)) logger.info( "Total training time: {} ({:.4f} s / it)".format( total_time_str, total_training_time / (max_iter) ) )
def main(): parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=os.getenv('LOCAL_RANK', 0)) parser.add_argument("--max_steps", type=int, default=0, help="Override number of training steps in the config") parser.add_argument( "--skip-test", dest="skip_test", help="Do not test the final model", action="store_true", ) parser.add_argument("--fp16", help="Mixed precision training", action="store_true") parser.add_argument("--amp", help="Mixed precision training", action="store_true") parser.add_argument('--skip_checkpoint', default=False, action='store_true', help="Whether to save checkpoints") parser.add_argument( "--json-summary", help="Out file for DLLogger", default="dllogger.out", type=str, ) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() args.fp16 = args.fp16 or args.amp num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) # Redundant option - Override config parameter with command line input if args.max_steps > 0: cfg.SOLVER.MAX_ITER = args.max_steps if args.skip_checkpoint: cfg.SAVE_CHECKPOINT = False cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) if is_main_process(): dllogger.init(backends=[ dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE, filename=args.json_summary), dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE, step_format=format_step) ]) else: dllogger.init(backends=[]) dllogger.log(step="PARAMETER", data={"gpu_count": num_gpus}) # dllogger.log(step="PARAMETER", data={"environment_info": collect_env_info()}) dllogger.log(step="PARAMETER", data={"config_file": args.config_file}) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() dllogger.log(step="PARAMETER", data={"config": cfg}) if args.fp16: fp16 = True else: fp16 = False model, iters_per_epoch = train(cfg, args.local_rank, args.distributed, fp16, dllogger) if not args.skip_test: if not cfg.PER_EPOCH_EVAL: test_model(cfg, model, args.distributed, iters_per_epoch, dllogger)
def main(): mlperf_log.ROOT_DIR_MASKRCNN = os.path.dirname(os.path.abspath(__file__)) parser = argparse.ArgumentParser( description="PyTorch Object Detection Training") parser.add_argument( "--config-file", default="", metavar="FILE", help="path to config file", type=str, ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int( os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 args.distributed = num_gpus > 1 if is_main_process: # Setting logging file parameters for compliance logging os.environ["COMPLIANCE_FILE"] = './MASKRCNN_complVv0.5.0_' + str( datetime.datetime.now()) mlperf_log.LOG_FILE = os.getenv("COMPLIANCE_FILE") mlperf_log._FILE_HANDLER = logging.FileHandler(mlperf_log.LOG_FILE) mlperf_log._FILE_HANDLER.setLevel(logging.DEBUG) mlperf_log.LOGGER.addHandler(mlperf_log._FILE_HANDLER) if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() print_mlperf(key=mlperf_log.RUN_START) # setting seeds - needs to be timed, so after RUN_START if is_main_process(): master_seed = random.SystemRandom().randint(0, 2**32 - 1) seed_tensor = torch.tensor(master_seed, dtype=torch.float32, device=torch.device("cuda")) else: seed_tensor = torch.tensor(0, dtype=torch.float32, device=torch.device("cuda")) torch.distributed.broadcast(seed_tensor, 0) master_seed = int(seed_tensor.item()) else: print_mlperf(key=mlperf_log.RUN_START) # random master seed, random.SystemRandom() uses /dev/urandom on Unix master_seed = random.SystemRandom().randint(0, 2**32 - 1) # actually use the random seed args.seed = master_seed # random number generator with seed set to master_seed random_number_generator = random.Random(master_seed) print_mlperf(key=mlperf_log.RUN_SET_RANDOM_SEED, value=master_seed) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) # generate worker seeds, one seed for every distributed worker worker_seeds = generate_seeds( random_number_generator, torch.distributed.get_world_size() if torch.distributed.is_initialized() else 1) # todo sharath what if CPU # broadcast seeds from rank=0 to other workers worker_seeds = broadcast_seeds(worker_seeds, device='cuda') # Setting worker seeds logger.info("Worker {}: Setting seed {}".format( args.local_rank, worker_seeds[args.local_rank])) torch.manual_seed(worker_seeds[args.local_rank]) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) model = train(cfg, args.local_rank, args.distributed) print_mlperf(key=mlperf_log.RUN_FINAL)
def do_coco_evaluation( dataset, predictions, box_only, output_folder, iou_types, expected_results, expected_results_sigma_tol, ): logger = logging.getLogger("maskrcnn_benchmark.inference") # Different path here, fast parallel method not available, fall back to effectively the old # path. if box_only: predictions = _accumulate_predictions_from_multiple_gpus(predictions) if not is_main_process(): return logger.info("Evaluating bbox proposals") areas = {"all": "", "small": "s", "medium": "m", "large": "l"} res = COCOResults("box_proposal") for limit in [100, 1000]: for area, suffix in areas.items(): stats = evaluate_box_proposals(predictions, dataset, area=area, limit=limit) key = "AR{}@{:d}".format(suffix, limit) res.results["box_proposal"][key] = stats["ar"].item() logger.info(res) check_expected_results(res, expected_results, expected_results_sigma_tol) if output_folder: torch.save(res, os.path.join(output_folder, "box_proposals.pth")) return logger.info("Preparing results for COCO format") coco_results = {} if "bbox" in iou_types: logger.info("Preparing bbox results") coco_results["bbox"] = prepare_for_coco_detection(predictions, dataset) if "segm" in iou_types: logger.info("Preparing segm results") coco_results["segm"] = prepare_for_coco_segmentation( predictions, dataset) if 'keypoints' in iou_types: logger.info('Preparing keypoints results') coco_results['keypoints'] = prepare_for_coco_keypoint( predictions, dataset) # Gather all prepared predictions of each type from all ranks if "bbox" in iou_types: temp_bbox_list = all_gather(coco_results["bbox"]) if "segm" in iou_types: temp_segm_list = all_gather(coco_results["segm"]) if "keypoints" in iou_types: temp_keypoints_list = all_gather(coco_results["keypoints"]) # Only main process will call COCO if not is_main_process(): return # Unpack the gathered results into a single List[Entry] if "bbox" in iou_types: coco_results["bbox"] = [i for j in temp_bbox_list for i in j] if "segm" in iou_types: coco_results["segm"] = [i for j in temp_segm_list for i in j] if "keypoints" in iou_types: coco_results["keypoints"] = [i for j in temp_keypoints_list for i in j] results = evaluate_coco(dataset, coco_results, iou_types, output_folder) # Submit to async evaluator # get_evaluator().submit_task(get_tag(), # evaluate_coco, # dataset, # coco_results, # iou_types, # output_folder) # Note: None of these are possible now # logger.info(results) check_expected_results(results, expected_results, expected_results_sigma_tol) # if output_folder: # torch.save(results, os.path.join(output_folder, "coco_results.pth")) # Note: results is now empty, the relevant future is held in the hidden # AsyncEvaluator object return results, coco_results
def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference") parser.add_argument( "--config-file", default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", metavar="FILE", help="path to config file", ) parser.add_argument("--local_rank", type=int, default=0) parser.add_argument( "opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER, ) args = parser.parse_args() num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 distributed = num_gpus > 1 if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group( backend="nccl", init_method="env://" ) cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) # replace the output directory based on the "task" kind. cfg.OUTPUT_DIR = os.path.join( os.path.dirname(cfg.OUTPUT_DIR), cfg.TASK.KIND.lower(), cfg.NAME, os.path.basename(cfg.OUTPUT_DIR)) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir: mkdir(output_dir) running_lock = None if is_main_process(): # save the config to the output directory. save_config_path = os.path.join(output_dir, "config.yaml") with open(save_config_path, "w") as cf: cf.write(cfg.dump()) print("wrote (merged) config to {0}".format(save_config_path)) running_lock = RunningLockFile(output_dir).start() save_code = SaveCodeChanges([os.path.dirname(maskrcnn_benchmark.__path__[0])]) save_code(output_dir) print("saved code changes (against HEAD) to {0}".format(output_dir)) save_dir = "" logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(cfg) logger.info("Collecting env info (might take some time)") logger.info("\n" + collect_env_info()) model = build_detection_model(cfg) model.to(cfg.MODEL.DEVICE) checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) _ = checkpointer.load(cfg.MODEL.WEIGHT) chunk_predictions = cfg.TEST.CHUNK_PREDICTIONS iou_types = ("bbox",) if cfg.MODEL.MASK_ON: iou_types = iou_types + ("segm",) if cfg.TASK.KIND.lower() == "panoptic": # at some point we should run all bbox/segm, etc. if cfg.MODEL.PANOPTIC.COMPUTE_CC_RESULTS and not chunk_predictions: iou_types = ("ccpan", "pan") else: iou_types = ("pan",) if cfg.TEST.ORDER_ONLY: iou_types = ("order",) output_folders = [None] * len(cfg.DATASETS.TEST) dataset_names = cfg.DATASETS.TEST if cfg.OUTPUT_DIR: for idx, dataset_name in enumerate(dataset_names): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) mkdir(output_folder) output_folders[idx] = output_folder # allow the user to override with existing .pth files. # unsure how this will work in the distributed setting. if not distributed: existing_predictions = cfg.TEST.PREDICTION_PATHS else: print("distributed... ignoring existing predictions if given") existing_predictions = [] chunk_predictions = cfg.TEST.CHUNK_PREDICTIONS if not distributed: # only makes sense for multiple GPUS when doing actual prediction. chunk_predictions = chunk_predictions and existing_predictions data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) dataset_index = 0 try: for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): predictions = None if existing_predictions is not None and dataset_index < len(existing_predictions): # todo, check for "chunk" predictions" and delay the load. predictions_path = existing_predictions[dataset_index] if not chunk_predictions: predictions = torch.load(predictions_path) else: # this should be a list of "chunks". predictions = predictions_path print("using chunks: {0}".format(predictions)) inference( model, data_loader_val, dataset_name=dataset_name, iou_types=iou_types, box_only=cfg.MODEL.RPN_ONLY, device=cfg.MODEL.DEVICE, expected_results=cfg.TEST.EXPECTED_RESULTS, expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, output_folder=output_folder, predictions=predictions, working_directory=cfg.TEMPORARY_DIR, chunk_predictions=chunk_predictions, compute_pre_results=cfg.MODEL.PANOPTIC.COMPUTE_PRE_RESULTS, panoptic_confidence_thresh=cfg.MODEL.FUSION.CONFIDENCE_THRESHOLD, panoptic_overlap_thresh=cfg.MODEL.FUSION.OVERLAP_THRESHOLD, panoptic_stuff_min_area=cfg.MODEL.FUSION.STUFF_MINIMUM_AREA) synchronize() dataset_index += 1 finally: if running_lock: running_lock.end()