def build_train_loader(cls, cfg): """ Returns: iterable It now calls :func:`fastreid.data.build_reid_train_loader`. Overwrite it if you'd like a different data loader. """ logger = logging.getLogger("fastreid.clas_dataset") logger.info("Prepare training set") train_items = list() for d in cfg.DATASETS.NAMES: data = DATASET_REGISTRY.get(d)(root=_root) if comm.is_main_process(): data.show_train() train_items.extend(data.train) transforms = build_transforms(cfg, is_train=True) train_set = ClasDataset(train_items, transforms) data_loader = build_reid_train_loader(cfg, train_set=train_set) # Save index to class dictionary output_dir = cfg.OUTPUT_DIR if comm.is_main_process() and output_dir: path = os.path.join(output_dir, "idx2class.json") with PathManager.open(path, "w") as f: json.dump(train_set.idx_to_class, f) return data_loader
def build_reid_test_loader(cfg, dataset_name): cfg = cfg.clone() cfg.defrost() dataset = DATASET_REGISTRY.get(dataset_name)( root=_root, dataset_name=cfg.SPECIFIC_DATASET) if comm.is_main_process(): dataset.show_test() test_items = dataset.query + dataset.gallery test_transforms = build_transforms(cfg, is_train=False) test_set = CommDataset(test_items, test_transforms, relabel=False) mini_batch_size = cfg.TEST.IMS_PER_BATCH // comm.get_world_size() data_sampler = samplers.InferenceSampler(len(test_set)) batch_sampler = torch.utils.data.BatchSampler(data_sampler, mini_batch_size, False) test_loader = DataLoader( test_set, batch_sampler=batch_sampler, num_workers=0, # save some memory collate_fn=fast_batch_collator, pin_memory=True, ) return test_loader, len(dataset.query)
def do_test(cfg, model): results = OrderedDict() for idx, dataset_name in enumerate(cfg.DATASETS.TESTS): logger.info("Prepare testing set") try: data_loader, evaluator = get_evaluator(cfg, dataset_name) except NotImplementedError: logger.warn( "No evaluator found. implement its `build_evaluator` method.") results[dataset_name] = {} continue results_i = inference_on_dataset(model, data_loader, evaluator, flip_test=cfg.TEST.FLIP_ENABLED) results[dataset_name] = results_i if comm.is_main_process(): assert isinstance( results, dict ), "Evaluator must return a dict on the main process. Got {} instead.".format( results) print_csv_format(results) if len(results) == 1: results = list(results.values())[0] return results
def build_cls_test_loader(cfg, dataset_name, mapper=None, **kwargs): cfg = cfg.clone() dataset = DATASET_REGISTRY.get(dataset_name)(root=_root, **kwargs) if comm.is_main_process(): dataset.show_test() test_items = dataset.query if mapper is not None: transforms = mapper else: transforms = build_transforms(cfg, is_train=False) test_set = CommDataset(test_items, transforms, relabel=False) mini_batch_size = cfg.TEST.IMS_PER_BATCH // comm.get_world_size() data_sampler = samplers.InferenceSampler(len(test_set)) batch_sampler = torch.utils.data.BatchSampler(data_sampler, mini_batch_size, False) test_loader = DataLoader( test_set, batch_sampler=batch_sampler, num_workers=4, # save some memory collate_fn=fast_batch_collator, pin_memory=True, ) return test_loader
def build_attr_test_loader(cfg, dataset_name): cfg = cfg.clone() cfg.defrost() dataset = DATASET_REGISTRY.get(dataset_name)( root=_root, combineall=cfg.DATASETS.COMBINEALL) if comm.is_main_process(): dataset.show_test() test_items = dataset.test test_transforms = build_transforms(cfg, is_train=False) test_set = AttrDataset(test_items, dataset.attr_dict, test_transforms) mini_batch_size = cfg.TEST.IMS_PER_BATCH // comm.get_world_size() data_sampler = samplers.InferenceSampler(len(test_set)) batch_sampler = torch.utils.data.BatchSampler(data_sampler, mini_batch_size, False) test_loader = DataLoader( test_set, batch_sampler=batch_sampler, num_workers=4, # save some memory collate_fn=fast_batch_collator, pin_memory=True, ) return test_loader
def evaluate(self): if comm.get_world_size() > 1: comm.synchronize() predictions = comm.gather(self._predictions, dst=0) predictions = list(itertools.chain(*predictions)) if not comm.is_main_process(): return {} else: predictions = self._predictions pred_logits = [] labels = [] for prediction in predictions: pred_logits.append(prediction['logits']) labels.append(prediction['labels']) pred_logits = torch.cat(pred_logits, dim=0) labels = torch.cat(labels, dim=0) # measure accuracy and record loss acc1, = accuracy(pred_logits, labels, topk=(1, )) self._results = OrderedDict() self._results["Acc@1"] = acc1 self._results["metric"] = acc1 return copy.deepcopy(self._results)
def evaluate(self): if comm.get_world_size() > 1: comm.synchronize() predictions = comm.gather(self._predictions, dst=0) predictions = list(itertools.chain(*predictions)) if not comm.is_main_process(): return {} else: predictions = self._predictions features = [] pids = [] # camids = [] for prediction in predictions: features.append(prediction['feats']) pids.append(prediction['pids']) # camids.append(prediction['camids']) features = torch.cat(features, dim=0) pids = torch.cat(pids, dim=0).numpy() rerank_dist = compute_jaccard_distance( features, k1=self.cfg.CLUSTER.JACCARD.K1, k2=self.cfg.CLUSTER.JACCARD.K2, ) pseudo_labels = self.cluster.fit_predict(rerank_dist) contingency_matrix = metrics.cluster.contingency_matrix( pids, pseudo_labels) purity = np.sum(np.amax(contingency_matrix, axis=0)) / np.sum(contingency_matrix) return purity
def evaluate(self): if comm.get_world_size() > 1: comm.synchronize() predictions = comm.gather(self._predictions, dst=0) predictions = list(itertools.chain(*predictions)) if not comm.is_main_process(): return {} else: predictions = self._predictions features = [] pids = [] # camids = [] for prediction in predictions: features.append(prediction['feats']) pids.append(prediction['pids']) # camids.append(prediction['camids']) features = torch.cat(features, dim=0) pids = torch.cat(pids, dim=0).numpy() rerank_dist = compute_jaccard_distance( features, k1=self.cfg.CLUSTER.JACCARD.K1, k2=self.cfg.CLUSTER.JACCARD.K2, ) pseudo_labels = self.cluster.fit_predict(rerank_dist) ARI_score = metrics.adjusted_rand_score(pids, pseudo_labels) return ARI_score
def build_attr_train_loader(cfg): train_items = list() attr_dict = None for d in cfg.DATASETS.NAMES: dataset = DATASET_REGISTRY.get(d)(root=_root, combineall=cfg.DATASETS.COMBINEALL) if comm.is_main_process(): dataset.show_train() if attr_dict is not None: assert attr_dict == dataset.attr_dict, f"attr_dict in {d} does not match with previous ones" else: attr_dict = dataset.attr_dict train_items.extend(dataset.train) train_transforms = build_transforms(cfg, is_train=True) train_set = AttrDataset(train_items, train_transforms, attr_dict) num_workers = cfg.DATALOADER.NUM_WORKERS mini_batch_size = cfg.SOLVER.IMS_PER_BATCH // comm.get_world_size() data_sampler = samplers.TrainingSampler(len(train_set)) batch_sampler = torch.utils.data.sampler.BatchSampler( data_sampler, mini_batch_size, True) train_loader = torch.utils.data.DataLoader( train_set, num_workers=num_workers, batch_sampler=batch_sampler, collate_fn=fast_batch_collator, pin_memory=True, ) return train_loader
def evaluate(self): if comm.get_world_size() > 1: comm.synchronize() pred_logits = comm.gather(self.pred_logits) pred_logits = sum(pred_logits, []) labels = comm.gather(self.labels) labels = sum(labels, []) # fmt: off if not comm.is_main_process(): return {} # fmt: on else: pred_logits = self.pred_logits labels = self.labels pred_logits = torch.cat(pred_logits, dim=0) labels = torch.stack(labels) # measure accuracy and record loss acc1, = accuracy(pred_logits, labels, topk=(1, )) self._results = OrderedDict() self._results["Acc@1"] = acc1 self._results["metric"] = acc1 return copy.deepcopy(self._results)
def _test_loader_from_config(cfg, *, dataset_name=None, test_set=None, num_query=0, transforms=None, **kwargs): if transforms is None: transforms = build_transforms(cfg, is_train=False) if test_set is None: assert dataset_name is not None, "dataset_name must be explicitly passed in when test_set is not provided" data = DATASET_REGISTRY.get(dataset_name)(root=_root, **kwargs) if comm.is_main_process(): data.show_test() test_items = data.query + data.gallery test_set = CommDataset(test_items, transforms, relabel=False) # Update query number num_query = len(data.query) return { "test_set": test_set, "test_batch_size": cfg.TEST.IMS_PER_BATCH, "num_query": num_query, }
def test(cls, cfg, model): """ Args: cfg (CfgNode): model (nn.Module): Returns: dict: a dict of result metrics """ logger = logging.getLogger(__name__) results = OrderedDict() for idx, dataset_name in enumerate(cfg.DATASETS.TESTS): logger.info("Prepare testing set") try: data_loader, evaluator = cls.build_evaluator(cfg, dataset_name) except NotImplementedError: logger.warn( "No evaluator found. implement its `build_evaluator` method." ) results[dataset_name] = {} continue results_i = inference_on_dataset(model, data_loader, evaluator) results[dataset_name] = results_i if comm.is_main_process(): assert isinstance( results, dict ), "Evaluator must return a dict on the main process. Got {} instead.".format( results) print_csv_format(results) if len(results) == 1: results = list(results.values())[0] return results
def _write_metrics(self, metrics_dict: dict): """ Args: metrics_dict (dict): dict of scalar metrics """ metrics_dict = { k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v) for k, v in metrics_dict.items() } # gather metrics among all workers for logging # This assumes we do DDP-style training, which is currently the only # supported method in fastreid. all_metrics_dict = comm.gather(metrics_dict) if comm.is_main_process(): if "data_time" in all_metrics_dict[0]: # data_time among workers can have high variance. The actual latency # caused by data_time is the maximum among workers. data_time = np.max( [x.pop("data_time") for x in all_metrics_dict]) self.storage.put_scalar("data_time", data_time) # average the rest metrics metrics_dict = { k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys() } total_losses_reduced = sum(loss for loss in metrics_dict.values()) self.storage.put_scalar("total_loss", total_losses_reduced) if len(metrics_dict) > 1: self.storage.put_scalars(**metrics_dict)
def evaluate(self): if comm.get_world_size() > 1: comm.synchronize() features = comm.gather(self.features) features = sum(features, []) # fmt: off if not comm.is_main_process(): return {} # fmt: on else: features = self.features features = torch.cat(features, dim=0) features = F.normalize(features, p=2, dim=1).numpy() self._results = OrderedDict() tpr, fpr, accuracy, best_thresholds = evaluate(features, self.labels) self._results["Accuracy"] = accuracy.mean() * 100 self._results["Threshold"] = best_thresholds.mean() self._results["metric"] = accuracy.mean() * 100 buf = gen_plot(fpr, tpr) roc_curve = Image.open(buf) PathManager.mkdirs(self._output_dir) roc_curve.save( os.path.join(self._output_dir, self.dataset_name + "_roc.png")) return copy.deepcopy(self._results)
def test_and_save_results(): if comm.is_main_process(): self._last_eval_results = self.test(self.cfg, self.model) torch.cuda.empty_cache() return self._last_eval_results else: return None
def auto_scale_hyperparams(cfg, num_classes): r""" This is used for auto-computation actual training iterations, because some hyper-param, such as MAX_ITER, means training epochs rather than iters, so we need to convert specific hyper-param to training iterations. """ cfg = cfg.clone() frozen = cfg.is_frozen() cfg.defrost() # If you don't hard-code the number of classes, it will compute the number automatically if cfg.MODEL.HEADS.NUM_CLASSES == 0: output_dir = cfg.OUTPUT_DIR cfg.MODEL.HEADS.NUM_CLASSES = num_classes logger = logging.getLogger(__name__) logger.info( f"Auto-scaling the num_classes={cfg.MODEL.HEADS.NUM_CLASSES}") # Update the saved config file to make the number of classes valid if comm.is_main_process() and output_dir: # Note: some of our scripts may expect the existence of # config.yaml in output directory path = os.path.join(output_dir, "config.yaml") with PathManager.open(path, "w") as f: f.write(cfg.dump()) if frozen: cfg.freeze() return cfg
def build_hooks(self): """ Build a list of default hooks, including timing, evaluation, checkpointing, lr scheduling, precise BN, writing events. Returns: list[HookBase]: """ logger = logging.getLogger(__name__) cfg = self.cfg.clone() cfg.defrost() cfg.DATALOADER.NUM_WORKERS = 0 # save some memory and time for PreciseBN cfg.DATASETS.NAMES = tuple([cfg.TEST.PRECISE_BN.DATASET ]) # set dataset name for PreciseBN ret = [ hooks.IterationTimer(), hooks.LRScheduler(self.optimizer, self.scheduler), ] if cfg.TEST.PRECISE_BN.ENABLED and hooks.get_bn_modules(self.model): logger.info("Prepare precise BN dataset") ret.append( hooks.PreciseBN( # Run at the same freq as (but before) evaluation. self.model, # Build a new data loader to not affect training self.build_train_loader(cfg), cfg.TEST.PRECISE_BN.NUM_ITER, )) if len(cfg.MODEL.FREEZE_LAYERS) > 0 and cfg.SOLVER.FREEZE_ITERS > 0: ret.append( hooks.LayerFreeze( self.model, cfg.MODEL.FREEZE_LAYERS, cfg.SOLVER.FREEZE_ITERS, )) # Do PreciseBN before checkpointer, because it updates the model and need to # be saved by checkpointer. # This is not always the best: if checkpointing has a different frequency, # some checkpoints may have more precise statistics than others. def test_and_save_results(): self._last_eval_results = self.test(self.cfg, self.model) return self._last_eval_results # Do evaluation before checkpointer, because then if it fails, # we can use the saved checkpoint to debug. ret.append(hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results)) if comm.is_main_process(): ret.append( hooks.PeriodicCheckpointer(self.checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD)) # run writers in the end, so that evaluation metrics are written ret.append(hooks.PeriodicWriter(self.build_writers(), 200)) return ret
def __init__(self, cfg): """ Args: cfg (CfgNode): """ super().__init__() logger = logging.getLogger("fastreid") if not logger.isEnabledFor( logging.INFO): # setup_logger is not called for fastreid setup_logger() # Assume these objects must be constructed in this order. data_loader = self.build_train_loader(cfg) cfg = self.auto_scale_hyperparams(cfg, data_loader.dataset.num_classes) model = self.build_model(cfg) optimizer = self.build_optimizer(cfg, model) optimizer_ckpt = dict(optimizer=optimizer) if cfg.SOLVER.FP16_ENABLED: model, optimizer = amp.initialize(model, optimizer, opt_level="O1") optimizer_ckpt.update(dict(amp=amp)) # For training, wrap with DDP. But don't need this for inference. if comm.get_world_size() > 1: # ref to https://github.com/pytorch/pytorch/issues/22049 to set `find_unused_parameters=True` # for part of the parameters is not updated. # model = DistributedDataParallel( # model, device_ids=[comm.get_local_rank()], broadcast_buffers=False # ) model = DistributedDataParallel(model, delay_allreduce=True) self._trainer = (AMPTrainer if cfg.SOLVER.FP16_ENABLED else SimpleTrainer)(model, data_loader, optimizer) self.iters_per_epoch = len( data_loader.dataset) // cfg.SOLVER.IMS_PER_BATCH self.scheduler = self.build_lr_scheduler(cfg, optimizer, self.iters_per_epoch) # Assume no other objects need to be checkpointed. # We can later make it checkpoint the stateful hooks self.checkpointer = Checkpointer( # Assume you want to save checkpoints together with logs/statistics model, cfg.OUTPUT_DIR, save_to_disk=comm.is_main_process(), **optimizer_ckpt, **self.scheduler, ) self.start_epoch = 0 self.max_epoch = cfg.SOLVER.MAX_EPOCH self.max_iter = self.max_epoch * self.iters_per_epoch self.warmup_iters = cfg.SOLVER.WARMUP_ITERS self.delay_epochs = cfg.SOLVER.DELAY_EPOCHS self.cfg = cfg self.register_hooks(self.build_hooks())
def default_setup(cfg, args): """ Perform some basic common setups at the beginning of a job, including: 1. Set up the detectron2 logger 2. Log basic information about environment, cmdline arguments, and config 3. Backup the config to the output directory Args: cfg (CfgNode): the full config to be used args (argparse.NameSpace): the command line arguments to be logged """ output_dir = cfg.OUTPUT_DIR if comm.is_main_process() and output_dir: PathManager.mkdirs(output_dir) rank = comm.get_rank() setup_logger(output_dir, distributed_rank=rank, name="fvcore") logger = setup_logger(output_dir, distributed_rank=rank) logger.info("Rank of current process: {}. World size: {}".format(rank, comm.get_world_size())) logger.info("Environment info:\n" + collect_env_info()) logger.info("Command line arguments: " + str(args)) if hasattr(args, "config_file") and args.config_file != "": logger.info( "Contents of args.config_file={}:\n{}".format( args.config_file, PathManager.open(args.config_file, "r").read() ) ) logger.info("Running with full config:\n{}".format(cfg)) if comm.is_main_process() and output_dir: # Note: some of our scripts may expect the existence of # config.yaml in output directory path = os.path.join(output_dir, "config.yaml") with PathManager.open(path, "w") as f: f.write(cfg.dump()) logger.info("Full config saved to {}".format(os.path.abspath(path))) # make sure each worker has a different, yet deterministic seed if specified seed_all_rng() # cudnn benchmark has large overhead. It shouldn't be used considering the small size of # typical validation set. if not (hasattr(args, "eval_only") and args.eval_only): torch.backends.cudnn.benchmark = cfg.CUDNN_BENCHMARK
def load_dataset(cls, name): logger = logging.getLogger(__name__) logger.info(f"Preparing {name}") _root = os.getenv("FASTREID_DATASETS", "/root/datasets") data = DATASET_REGISTRY.get(name)(root=_root) if comm.is_main_process(): data.show_train() return data
def train(self): """ Run training. Returns: OrderedDict of results, if evaluation is enabled. Otherwise None. """ super().train(self.start_epoch, self.max_epoch, self.iters_per_epoch) if comm.is_main_process(): assert hasattr(self, "_last_eval_results" ), "No evaluation results obtained during training!" return self._last_eval_results
def auto_scale_hyperparams(cfg, data_loader): r""" This is used for auto-computation actual training iterations, because some hyper-param, such as MAX_ITER, means training epochs rather than iters, so we need to convert specific hyper-param to training iterations. """ cfg = cfg.clone() frozen = cfg.is_frozen() cfg.defrost() # If you don't hard-code the number of classes, it will compute the number automatically if cfg.MODEL.HEADS.NUM_CLASSES == 0: output_dir = cfg.OUTPUT_DIR cfg.MODEL.HEADS.NUM_CLASSES = data_loader.dataset.num_classes # Update the saved config file to make the number of classes valid if comm.is_main_process() and output_dir: # Note: some of our scripts may expect the existence of # config.yaml in output directory path = os.path.join(output_dir, "config.yaml") with PathManager.open(path, "w") as f: f.write(cfg.dump()) if cfg.MODEL.LOSSES.USE_CLOTHES: cfg.MODEL.HEADS.NUM_CLO_CLASSES = data_loader.dataset.num_clothes iters_per_epoch = len(data_loader.dataset) // cfg.SOLVER.IMS_PER_BATCH cfg.SOLVER.MAX_ITER *= iters_per_epoch cfg.SOLVER.WARMUP_ITERS *= iters_per_epoch cfg.SOLVER.FREEZE_ITERS *= iters_per_epoch cfg.SOLVER.DELAY_ITERS *= iters_per_epoch for i in range(len(cfg.SOLVER.STEPS)): cfg.SOLVER.STEPS[i] *= iters_per_epoch cfg.SOLVER.SWA.ITER *= iters_per_epoch cfg.SOLVER.SWA.PERIOD *= iters_per_epoch ckpt_multiple = cfg.SOLVER.CHECKPOINT_PERIOD / cfg.TEST.EVAL_PERIOD # Evaluation period must be divided by 200 for writing into tensorboard. eval_num_mod = (200 - cfg.TEST.EVAL_PERIOD * iters_per_epoch) % 200 cfg.TEST.EVAL_PERIOD = cfg.TEST.EVAL_PERIOD * iters_per_epoch + eval_num_mod # Change checkpoint saving period consistent with evaluation period. cfg.SOLVER.CHECKPOINT_PERIOD = int(cfg.TEST.EVAL_PERIOD * ckpt_multiple) logger = logging.getLogger(__name__) logger.info( f"Auto-scaling the config to num_classes={cfg.MODEL.HEADS.NUM_CLASSES}, " f"max_Iter={cfg.SOLVER.MAX_ITER}, wamrup_Iter={cfg.SOLVER.WARMUP_ITERS}, " f"freeze_Iter={cfg.SOLVER.FREEZE_ITERS}, delay_Iter={cfg.SOLVER.DELAY_ITERS}, " f"step_Iter={cfg.SOLVER.STEPS}, ckpt_Iter={cfg.SOLVER.CHECKPOINT_PERIOD}, " f"eval_Iter={cfg.TEST.EVAL_PERIOD}." ) if frozen: cfg.freeze() return cfg
def auto_scale_hyperparams(cfg, num_classes): cfg = DefaultTrainer.auto_scale_hyperparams(cfg, num_classes) # Save index to class dictionary output_dir = cfg.OUTPUT_DIR if comm.is_main_process() and output_dir: path = os.path.join(output_dir, "idx2class.json") with PathManager.open(path, "w") as f: json.dump(ClasTrainer.idx2class, f) return cfg
def build_test_loader(cls, cfg, dataset_name): dataset = DATASET_REGISTRY.get(dataset_name)(root=_root) attr_dict = dataset.attr_dict if comm.is_main_process(): dataset.show_test() test_items = dataset.test test_transforms = build_transforms(cfg, is_train=False) test_set = AttrDataset(test_items, test_transforms, attr_dict) data_loader, _ = build_reid_test_loader(cfg, test_set=test_set) return data_loader
def _train_loader_from_config(cfg, *, train_set=None, transforms=None, sampler=None, **kwargs): if transforms is None: transforms = build_transforms(cfg, is_train=True) if train_set is None: train_items = list() for d in cfg.DATASETS.NAMES: data = DATASET_REGISTRY.get(d)(root=_root, **kwargs) if comm.is_main_process(): data.show_train() train_items.extend(data.train) train_set = CommDataset(train_items, transforms, relabel=True) if sampler is None: sampler_name = cfg.DATALOADER.SAMPLER_TRAIN num_instance = cfg.DATALOADER.NUM_INSTANCE mini_batch_size = cfg.SOLVER.IMS_PER_BATCH // comm.get_world_size() logger = logging.getLogger(__name__) logger.info("Using training sampler {}".format(sampler_name)) if sampler_name == "TrainingSampler": sampler = samplers.TrainingSampler(len(train_set)) elif sampler_name == "NaiveIdentitySampler": sampler = samplers.NaiveIdentitySampler(train_set.img_items, mini_batch_size, num_instance) elif sampler_name == "BalancedIdentitySampler": sampler = samplers.BalancedIdentitySampler(train_set.img_items, mini_batch_size, num_instance) elif sampler_name == "SetReWeightSampler": set_weight = cfg.DATALOADER.SET_WEIGHT sampler = samplers.SetReWeightSampler(train_set.img_items, mini_batch_size, num_instance, set_weight) elif sampler_name == "ImbalancedDatasetSampler": sampler = samplers.ImbalancedDatasetSampler(train_set.img_items) else: raise ValueError( "Unknown training sampler: {}".format(sampler_name)) return { "train_set": train_set, "sampler": sampler, "total_batch_size": cfg.SOLVER.IMS_PER_BATCH, "num_workers": cfg.DATALOADER.NUM_WORKERS, }
def build_reid_train_loader(cfg, mapper=None, **kwargs): """ Build reid train loader Args: cfg : image file path mapper : one of the supported image modes in PIL, or "BGR" Returns: torch.utils.data.DataLoader: a dataloader. """ cfg = cfg.clone() train_items = list() for d in cfg.DATASETS.NAMES: dataset = DATASET_REGISTRY.get(d)(root=_root, combineall=cfg.DATASETS.COMBINEALL, **kwargs) if comm.is_main_process(): dataset.show_train() train_items.extend(dataset.train) if mapper is not None: transforms = mapper else: transforms = build_transforms(cfg, is_train=True) train_set = CommDataset(train_items, transforms, relabel=True) num_workers = cfg.DATALOADER.NUM_WORKERS num_instance = cfg.DATALOADER.NUM_INSTANCE mini_batch_size = cfg.SOLVER.IMS_PER_BATCH // comm.get_world_size() if cfg.DATALOADER.PK_SAMPLER: if cfg.DATALOADER.NAIVE_WAY: data_sampler = samplers.NaiveIdentitySampler( train_set.img_items, mini_batch_size, num_instance) else: data_sampler = samplers.BalancedIdentitySampler( train_set.img_items, mini_batch_size, num_instance) else: data_sampler = samplers.TrainingSampler(len(train_set)) batch_sampler = torch.utils.data.sampler.BatchSampler( data_sampler, mini_batch_size, True) train_loader = torch.utils.data.DataLoader( train_set, num_workers=num_workers, batch_sampler=batch_sampler, collate_fn=fast_batch_collator, pin_memory=True, ) return train_loader
def test(cls, cfg, model, evaluators=None): """ Args: cfg (CfgNode): model (nn.Module): evaluators (list[DatasetEvaluator] or None): if None, will call :meth:`build_evaluator`. Otherwise, must have the same length as `cfg.DATASETS.TEST`. Returns: dict: a dict of result metrics """ logger = logging.getLogger(__name__) if isinstance(evaluators, DatasetEvaluator): evaluators = [evaluators] if evaluators is not None: assert len(cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format( len(cfg.DATASETS.TEST), len(evaluators) ) results = OrderedDict() for idx, dataset_name in enumerate(cfg.DATASETS.TESTS): logger.info("Prepare testing set") data_loader, num_query = cls.build_test_loader(cfg, dataset_name) # When evaluators are passed in as arguments, # implicitly assume that evaluators can be created before data_loader. if evaluators is not None: evaluator = evaluators[idx] else: try: evaluator = cls.build_evaluator(cfg, num_query) except NotImplementedError: logger.warn( "No evaluator found. Use `DefaultTrainer.test(evaluators=)`, " "or implement its `build_evaluator` method." ) results[dataset_name] = {} continue results_i = inference_on_dataset(model, data_loader, evaluator) results[dataset_name] = results_i if comm.is_main_process(): assert isinstance( results, dict ), "Evaluator must return a dict on the main process. Got {} instead.".format( results ) print_csv_format(results) if len(results) == 1: results = list(results.values())[0] return results
def build_reid_train_loader(cfg): cfg = cfg.clone() cfg.defrost() train_items = list() for d in cfg.DATASETS.NAMES: dataset = DATASET_REGISTRY.get(d)(root=_root, combineall=cfg.DATASETS.COMBINEALL) if comm.is_main_process(): dataset.show_train() train_items.extend(dataset.train) iters_per_epoch = len(train_items) // cfg.SOLVER.IMS_PER_BATCH cfg.SOLVER.MAX_ITER *= iters_per_epoch train_transforms = build_transforms(cfg, is_train=True) if not cfg.DATALOADER.IS_CLO_CHANGES: train_set = CommDataset(train_items, train_transforms, relabel=True) else: # For clothes changes datasets train_set = CCDatasets(train_items, train_transforms, relabel=True) num_workers = cfg.DATALOADER.NUM_WORKERS num_instance = cfg.DATALOADER.NUM_INSTANCE mini_batch_size = cfg.SOLVER.IMS_PER_BATCH // comm.get_world_size() if cfg.DATALOADER.PK_SAMPLER: if cfg.DATALOADER.NAIVE_WAY: data_sampler = samplers.NaiveIdentitySampler( train_set.img_items, cfg.SOLVER.IMS_PER_BATCH, num_instance, None, True) else: data_sampler = samplers.BalancedIdentitySampler( train_set.img_items, cfg.SOLVER.IMS_PER_BATCH, num_instance) else: data_sampler = samplers.TrainingSampler(len(train_set)) batch_sampler = torch.utils.data.sampler.BatchSampler( data_sampler, mini_batch_size, True) train_loader = torch.utils.data.DataLoader( train_set, num_workers=num_workers, batch_sampler=batch_sampler, collate_fn=fast_batch_collator, pin_memory=True, ) return train_loader
def init_pretrained_weights(key): """Initializes model with pretrained weights. Layers that don't match with pretrained layers in name or size are kept unchanged. """ import os import errno import gdown def _get_torch_home(): ENV_TORCH_HOME = 'TORCH_HOME' ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME' DEFAULT_CACHE_DIR = '~/.cache' torch_home = os.path.expanduser( os.getenv( ENV_TORCH_HOME, os.path.join( os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch' ) ) ) return torch_home torch_home = _get_torch_home() model_dir = os.path.join(torch_home, 'checkpoints') try: os.makedirs(model_dir) except OSError as e: if e.errno == errno.EEXIST: # Directory already exists, ignore. pass else: # Unexpected OSError, re-raise. raise filename = model_urls[key].split('/')[-1] cached_file = os.path.join(model_dir, filename) if not os.path.exists(cached_file): if comm.is_main_process(): gdown.download(model_urls[key], cached_file, quiet=False) comm.synchronize() logger.info(f"Loading pretrained model from {cached_file}") state_dict = torch.load(cached_file, map_location=torch.device('cpu')) return state_dict
def build_test_loader(cls, cfg, dataset_name): """ Returns: iterable It now calls :func:`fastreid.data.build_reid_test_loader`. Overwrite it if you'd like a different data loader. """ data = DATASET_REGISTRY.get(dataset_name)(root=_root) if comm.is_main_process(): data.show_test() transforms = build_transforms(cfg, is_train=False) test_set = ClasDataset(data.query, transforms) data_loader, _ = build_reid_test_loader(cfg, test_set=test_set) return data_loader