Python DatasetMapper Examples, detectron2.data.dataset_mapper.DatasetMapper Python Examples

Example #1

0

Show file

def build_detection_val_loader(cfg, dataset_name: str, mapper=None):
    dataset_dicts = get_detection_dataset_dicts(
        [dataset_name],
        filter_empty=False,
        proposal_files=[
            cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)]
        ]
        if cfg.MODEL.LOAD_PROPOSALS
        else None,
    )
    # dataset_dicts = get_detection_dataset_dicts(
    #     [dataset_name],
    #     filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
    #     min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
    #     if cfg.MODEL.KEYPOINT_ON
    #     else 0,
    #     proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
    # )
    dataset = DatasetFromList(dataset_dicts)

    if mapper is None:
        mapper = DatasetMapper(cfg, True)
    dataset = MapDataset(dataset, mapper)
    sampler = InferenceSampler(len(dataset))
    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)

    data_loader = torch.utils.data.DataLoader(
        dataset,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
        batch_sampler=batch_sampler,
        collate_fn=trivial_batch_collator,
    )
    return data_loader

Example #2

0

Show file

File: build.py Project: wangerxiao001/unbiased-teacher

def build_detection_test_loader(cfg, dataset_name, mapper=None):
    dataset_dicts = get_detection_dataset_dicts(
        [dataset_name],
        filter_empty=False,
        proposal_files=[
            cfg.DATASETS.PROPOSAL_FILES_TEST[list(
                cfg.DATASETS.TEST).index(dataset_name)]
        ] if cfg.MODEL.LOAD_PROPOSALS else None,
    )
    dataset = DatasetFromList(dataset_dicts)
    if mapper is None:
        mapper = DatasetMapper(cfg, False)
    dataset = MapDataset(dataset, mapper)

    sampler = InferenceSampler(len(dataset))
    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler,
                                                          1,
                                                          drop_last=False)

    data_loader = torch.utils.data.DataLoader(
        dataset,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
        batch_sampler=batch_sampler,
        collate_fn=trivial_batch_collator,
    )
    return data_loader

Example #3

0

Show file

File: train_net.py Project: stepp1/compVision-DCC

def do_test(cfg, model):
    results = OrderedDict()
    for dataset_name in cfg.DATASETS.TEST:
        mapper = None if cfg.INPUT.TEST_INPUT_TYPE == 'default' else \
            DatasetMapper(
                cfg, False, augmentations=build_custom_augmentation(cfg, False))
        data_loader = build_detection_test_loader(cfg,
                                                  dataset_name,
                                                  mapper=mapper)
        output_folder = os.path.join(cfg.OUTPUT_DIR,
                                     "inference_{}".format(dataset_name))
        evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type

        if evaluator_type == "lvis":
            evaluator = LVISEvaluator(dataset_name, cfg, True, output_folder)
        elif evaluator_type == 'coco':
            evaluator = COCOEvaluator(dataset_name, cfg, True, output_folder)
        else:
            assert 0, evaluator_type

        results[dataset_name] = inference_on_dataset(model, data_loader,
                                                     evaluator)
        if comm.is_main_process():
            logger.info("Evaluation results for {} in csv format:".format(
                dataset_name))
            print_csv_format(results[dataset_name])
    if len(results) == 1:
        results = list(results.values())[0]
    return results

Example #4

0

Show file

File: train_net.py Project: GaetanLepage/compound-figure-separator

    def build_hooks(self) -> List[HookBase]:
        """
        This method overwrites the default one from DefaultTrainer.
        It adds (if necessary) the `LossEvalHook` that allows evaluating the loss on the
        validation set.

        Returns:
            List[HookBase]: The augmented list of hooks.
        """
        # Build a list of default hooks, including timing, evaluation,
        # checkpointing, lr scheduling, precise BN, writing events.
        hooks = super().build_hooks()

        # We add our custom validation hook
        if self.cfg.DATASETS.VALIDATION != "":
            data_set_mapper: DatasetMapper = DatasetMapper.from_config(
                cfg=self.cfg, is_train=True)

            data_loader: DataLoader = build_detection_test_loader(
                cfg=self.cfg,
                dataset_name=self.cfg.DATASETS.VALIDATION,
                mapper=data_set_mapper)

            loss_eval_hook: LossEvalHook = LossEvalHook(
                eval_period=self.cfg.VALIDATION.VALIDATION_PERIOD,
                model=self.model,
                data_loader=data_loader)

            hooks.insert(index=-1, obj=loss_eval_hook)

        return hooks

Example #5

0

Show file

    def build_test_loader(cls, cfg, dataset_name):
        """
        Custom dataloader to provide model with ground truth bounding boxes
        """
        # returns a list of dicts. Every entry in the list corresponds to one sample, represented by a dict.
        dataset_dicts = detectron2.data.get_detection_dataset_dicts(
            dataset_name)

        # add proposal boxes
        for i, s in enumerate(dataset_dicts):
            s["proposal_boxes"] = np.array(
                [ann["bbox"] for ann in dataset_dicts[i]["annotations"]]
            )  # np.array([[xmin, ymin, xmax, ymax],[xmin, ymin, xmax, ...], ...]) # kx4 matrix for k proposed bounding boxes
            s["proposal_objectness_logits"] = np.full(
                (s["proposal_boxes"].shape[0], ),
                10)  # logit of 10 is 99.999...%
            s["proposal_bbox_mode"] = detectron2.structures.BoxMode.XYWH_ABS  # 1 # (x0, y0, w, h) in absolute floating points coordinates

        print("Proposal boxes for test data added.")

        return build_detection_test_loader(dataset_dicts,
                                           mapper=DatasetMapper(
                                               is_train=False,
                                               augmentations=[],
                                               image_format=cfg.INPUT.FORMAT,
                                               precomputed_proposal_topk=500))

Example #6

0

Show file

File: build.py Project: e96031413/BalancedMetaSoftmax-InstanceSeg

def build_detection_meta_loader(cfg, mapper=None):
    """
    build the meta set from training data with Class Balanced Sampling
    """
    dataset_dicts = get_detection_dataset_dicts(
        cfg.DATASETS.TRAIN,
        filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
        min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
        if cfg.MODEL.KEYPOINT_ON else 0,
        proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN
        if cfg.MODEL.LOAD_PROPOSALS else None,
    )
    dataset = DatasetFromList(dataset_dicts, copy=False)

    if mapper is None:
        mapper = DatasetMapper(cfg, True)
    dataset = MapDataset(dataset, mapper)

    logger = logging.getLogger(__name__)
    logger.info("Using training sampler Class Balanced Sampler")
    repeat_factors = ClassBalancedTrainingSampler.repeat_factors_by_inverse_category_frequency(
        dataset_dicts)
    sampler = ClassBalancedTrainingSampler(repeat_factors)
    return build_batch_data_loader(
        dataset,
        sampler,
        cfg.SOLVER.IMS_PER_BATCH,
        aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
    )

Example #7

0

Show file

    def build_train_loader(cls, cfg):
        """
        custom dataloader to provide model with ground truth bounding boxes
        """

        # returns a list of dicts. Every entry in the list corresponds to one sample, represented by a dict.
        dataset_dicts = detectron2.data.get_detection_dataset_dicts(
            cfg.DATASETS.TRAIN[0])

        # add proposal boxes
        for i, s in enumerate(dataset_dicts):
            s["proposal_boxes"] = np.array(
                [ann["bbox"] for ann in dataset_dicts[i]["annotations"]]
            )  # np.array([[xmin, ymin, xmax, ymax],[xmin, ymin, xmax, ...], ...]) # kx4 matrix for k proposed bounding boxes
            s["proposal_objectness_logits"] = np.full(
                (s["proposal_boxes"].shape[0], ),
                10)  # logit of 10 is 99.999...%
            s["proposal_bbox_mode"] = detectron2.structures.BoxMode.XYWH_ABS  # 1 # (x0, y0, w, h) in absolute floating points coordinates

        print("Proposal boxes added.")

        return build_detection_train_loader(
            dataset_dicts,
            mapper=DatasetMapper(is_train=True,
                                 augmentations=[],
                                 image_format=cfg.INPUT.FORMAT,
                                 precomputed_proposal_topk=500),
            total_batch_size=cfg.SOLVER.IMS_PER_BATCH,
            aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING,
            num_workers=cfg.DATALOADER.NUM_WORKERS)

Example #8

0

Show file

def build_detection_val_loader(cfg,
                               dataset_name,
                               total_batch_size,
                               mapper=None):
    """
    Similar to `build_detection_train_loader`.
    But this function uses the given `dataset_name` argument (instead of the names in cfg),
    and uses batch size 1.

    Args:
        cfg: a detectron2 CfgNode
        dataset_name (str): a name of the dataset that's available in the DatasetCatalog
        mapper (callable): a callable which takes a sample (dict) from dataset
           and returns the format to be consumed by the model.
           By default it will be `DatasetMapper(cfg, False)`.

    Returns:
        DataLoader: a torch DataLoader, that loads the given detection
        dataset, with test-time transformation and batching.
    """

    world_size = comm.get_world_size()
    assert (
        total_batch_size > 0 and total_batch_size % world_size == 0
    ), "Total batch size ({}) must be divisible by the number of gpus ({}).".format(
        total_batch_size, world_size)

    batch_size = total_batch_size // world_size
    dataset_dicts = get_detection_dataset_dicts(
        [dataset_name],
        filter_empty=False,
        proposal_files=[
            cfg.DATASETS.PROPOSAL_FILES_TEST[list(
                cfg.DATASETS.TEST).index(dataset_name)]
        ] if cfg.MODEL.LOAD_PROPOSALS else None,
    )

    dataset = DatasetFromList(dataset_dicts)
    if mapper is None:
        mapper = DatasetMapper(cfg, True)
    dataset = MapDataset(dataset, mapper)

    # sampler = InferenceSampler(len(dataset))
    sampler = DistributedSampler(dataset, shuffle=False)

    # logger.info("Start Computing Validation Loss on {} images".format(len(dataset)))
    # drop_last so the batch always have the same size
    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler,
                                                          batch_size,
                                                          drop_last=False)
    # batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)

    data_loader = torch.utils.data.DataLoader(
        dataset,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
        batch_sampler=batch_sampler,
        collate_fn=trivial_batch_collator,
    )
    return data_loader

Example #9

0

Show file

File: train_net.py Project: dmitryshendryk/wesmart_lpr

 def build_train_loader(cls, cfg):
     """
     Returns:
         iterable
     It calls :func:`detectron2.data.build_detection_train_loader` with a customized
     DatasetMapper, which adds categorical labels as a semantic mask.
     """
     mapper = DatasetMapper(cfg, True)
     return build_detection_train_loader(cfg, mapper)

Example #10

0

Show file

 def build_train_loader(cls, cfg):
     """
     Returns:
         iterable
     It now calls :func:`detectron2.data.build_detection_train_loader`.
     Overwrite it if you'd like a different data loader.
     """
     return build_detection_train_loader(cfg,
                                         mapper=DatasetMapper(cfg, True))

Example #11

0

Show file

File: build.py Project: ubc-vision/UniT

def build_detection_test_query_loader(cfg, dataset_name, mapper=None):
    """
    Similar to `build_detection_train_loader`.
    But this function uses the given `dataset_name` argument (instead of the names in cfg),
    and uses batch size 1.
    Args:
        cfg: a detectron2 CfgNode
        dataset_name (str): a name of the dataset that's available in the DatasetCatalog
        mapper (callable): a callable which takes a sample (dict) from dataset
           and returns the format to be consumed by the model.
           By default it will be `DatasetMapper(cfg, False)`.
    Returns:
        DataLoader: a torch DataLoader, that loads the given detection
        dataset, with test-time transformation and batching.
    """
    num_workers = get_world_size()
    images_per_batch = cfg.SOLVER.IMS_PER_BATCH
    assert (
        images_per_batch % num_workers == 0
    ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
        images_per_batch, num_workers)
    assert (
        images_per_batch >= num_workers
    ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
        images_per_batch, num_workers)
    images_per_worker = images_per_batch // num_workers

    dataset_dicts = get_detection_dataset_dicts(
        [dataset_name],
        filter_empty=False,
        proposal_files=[
            cfg.DATASETS.PROPOSAL_FILES_TEST[list(
                cfg.DATASETS.TEST).index(dataset_name)]
        ] if cfg.MODEL.LOAD_PROPOSALS else None,
    )

    dataset = DatasetFromList(dataset_dicts)
    if mapper is None:
        mapper = DatasetMapper(cfg, False)
    dataset = MapDataset(dataset, mapper)

    sampler = samplers.InferenceSampler(len(dataset))
    # Always use 1 image per worker during inference since this is the
    # standard when reporting inference time in papers.
    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler,
                                                          images_per_worker,
                                                          drop_last=False)

    data_loader = torch.utils.data.DataLoader(
        dataset,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
        batch_sampler=batch_sampler,
        collate_fn=trivial_batch_collator,
    )
    return data_loader

Example #12

0

Show file

def build_detection_train_loader(cfg, mapper=None):
    """
    A data loader is created by the following steps:

    1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts.
    2. Coordinate a random shuffle order shared among all processes (all GPUs)
    3. Each process spawn another few workers to process the dicts. Each worker will:
       * Map each metadata dict into another format to be consumed by the model.
       * Batch them by simply putting dicts into a list.

    The batched ``list[mapped_dict]`` is what this dataloader will yield.

    Args:
        cfg (CfgNode): the config
        mapper (callable): a callable which takes a sample (dict) from dataset and
            returns the format to be consumed by the model.
            By default it will be ``DatasetMapper(cfg, True)``.

    Returns:
        an infinite iterator of training data
    """
    dataset_dicts = get_detection_dataset_dicts(
        cfg.DATASETS.TRAIN,
        filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
        min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
        if cfg.MODEL.KEYPOINT_ON
        else 0,
        proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
    )
    dataset = DatasetFromList(dataset_dicts, copy=False)

    if mapper is None:
        mapper = DatasetMapper(cfg, True)
    dataset = MapDataset(dataset, mapper)
    sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
    logger = logging.getLogger(__name__)
    logger.info("Using training sampler {}".format(sampler_name))
    # TODO avoid if-else?
    if sampler_name == "TrainingSampler":
        sampler = TrainingSampler(len(dataset))
    elif sampler_name == "RepeatFactorTrainingSampler":
        repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency(
            dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD
        )
        sampler = RepeatFactorTrainingSampler(repeat_factors)
    else:
        raise ValueError("Unknown training sampler: {}".format(sampler_name))
    return build_batch_data_loader(
        dataset,
        sampler,
        cfg.SOLVER.IMS_PER_BATCH,
        aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
    )

Example #13

0

Show file

    def build_hooks(self):
        hooks = super().build_hooks()
        hooks.insert(-1,LossEvalHook(
            self.cfg.TEST.EVAL_PERIOD,
            self.model,
            build_detection_val_loader(self.cfg ,
                                       self.cfg.DATASETS.TEST[0],
                                       DatasetMapper(self.cfg,True) # label 필요
                                       )
        ))
        return hooks

#--------------------

Example #14

0

Show file

File: D2_rdd2020.py Project: zhangbo1227/rdd2020

    def build_mapper(cls, cfg, is_train=True):
        augs = detection_utils.build_augmentation(cfg, is_train)
        #if cfg.INPUT.CROP.ENABLED and is_train:
        #    augs.insert(0, T.RandomCrop(cfg.INPUT.CROP.TYPE, cfg.INPUT.CROP.SIZE))

        # Define a sequence of augmentations: TODO
        augs.append(T.RandomBrightness(0.9, 1.1))
        augs.append(MyColorAugmentation())
        # augs.append(T.RandomRotation([5,10,15,20,25,30], expand=True, center=None))
        # T.RandomCrop("absolute", (640, 640)),
        # MyCustomResize()
        # type: T.Augmentation
        return DatasetMapper(cfg, is_train=is_train, augmentations=augs)

Example #15

0

Show file

File: build.py Project: wangerxiao001/unbiased-teacher

def build_detection_semisup_train_loader(cfg, mapper=None):

    dataset_dicts = get_detection_dataset_dicts(
        cfg.DATASETS.TRAIN,
        filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
        min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
        if cfg.MODEL.KEYPOINT_ON else 0,
        proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN
        if cfg.MODEL.LOAD_PROPOSALS else None,
    )

    # Divide into labeled and unlabeled sets according to supervision percentage
    label_dicts, unlabel_dicts = divide_label_unlabel(
        dataset_dicts,
        cfg.DATALOADER.SUP_PERCENT,
        cfg.DATALOADER.RANDOM_DATA_SEED,
        cfg.DATALOADER.RANDOM_DATA_SEED_PATH,
    )

    dataset = DatasetFromList(label_dicts, copy=False)

    if mapper is None:
        mapper = DatasetMapper(cfg, True)
    dataset = MapDataset(dataset, mapper)

    sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
    logger = logging.getLogger(__name__)
    logger.info("Using training sampler {}".format(sampler_name))

    if sampler_name == "TrainingSampler":
        sampler = TrainingSampler(len(dataset))
    elif sampler_name == "RepeatFactorTrainingSampler":
        repeat_factors = (
            RepeatFactorTrainingSampler.repeat_factors_from_category_frequency(
                label_dicts, cfg.DATALOADER.REPEAT_THRESHOLD))
        sampler = RepeatFactorTrainingSampler(repeat_factors)
    else:
        raise ValueError("Unknown training sampler: {}".format(sampler_name))

    # list num of labeled and unlabeled
    logger.info("Number of training samples " + str(len(dataset)))
    logger.info("Supervision percentage " + str(cfg.DATALOADER.SUP_PERCENT))

    return build_batch_data_loader(
        dataset,
        sampler,
        cfg.SOLVER.IMS_PER_BATCH,
        aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
    )

Example #16

0

Show file

    def build_train_loader(cls, cfg):
        """
        Returns:
            iterable

        It calls :func:`detectron2.data.build_detection_train_loader` with a customized
        DatasetMapper, which adds categorical labels as a semantic mask.
        """

        register_coco_instances("fruits_nuts", {},
                                "./fruit_nuts/trainval.json",
                                "./fruit_nuts/images")
        fruits_nuts_metadata = MetadataCatalog.get("fruits_nuts")
        mapper = DatasetMapper(cfg, True)
        return build_detection_train_loader(cfg, mapper)

Example #17

0

Show file

File: build.py Project: facebookresearch/d2go

def build_weighted_detection_train_loader(cfg: CfgNode, mapper=None):
    dataset_repeat_factors = get_train_datasets_repeat_factors(cfg)
    # OrderedDict to guarantee order of values() consistent with repeat factors
    dataset_name_to_dicts = OrderedDict(
        {
            name: get_detection_dataset_dicts(
                [name],
                filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
                min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
                if cfg.MODEL.KEYPOINT_ON
                else 0,
                proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN
                if cfg.MODEL.LOAD_PROPOSALS
                else None,
            )
            for name in cfg.DATASETS.TRAIN
        }
    )
    # Repeat factor for every sample in the dataset
    repeat_factors = [
        [dataset_repeat_factors[dsname]] * len(dataset_name_to_dicts[dsname])
        for dsname in cfg.DATASETS.TRAIN
    ]
    repeat_factors = list(itertools.chain.from_iterable(repeat_factors))

    dataset_dicts = dataset_name_to_dicts.values()
    dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))
    dataset = DatasetFromList(dataset_dicts, copy=False)
    if mapper is None:
        mapper = DatasetMapper(cfg, True)
    dataset = MapDataset(dataset, mapper)

    logger.info(
        "Using WeightedTrainingSampler with repeat_factors={}".format(
            cfg.DATASETS.TRAIN_REPEAT_FACTOR
        )
    )
    sampler = RepeatFactorTrainingSampler(torch.tensor(repeat_factors))

    return build_batch_data_loader(
        dataset,
        sampler,
        cfg.SOLVER.IMS_PER_BATCH,
        aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
    )

Example #18

0

Show file

File: build.py Project: vlfom/CSD-detectron2

def build_detection_train_loader(cfg):
    """Builds a data loader for the baseline trainer with support of training on the subset of labeled data only.

    Most of code comes from `d2.data.build.build_detection_train_loader()`, see it for more details.
    """

    # CSD: check config is supported
    assert cfg.DATALOADER.SAMPLER_TRAIN == "TrainingSampler", "Unsupported training sampler: {}".format(
        cfg.DATALOADER.SAMPLER_TRAIN)

    # Original code
    dataset = get_detection_dataset_dicts(
        cfg.DATASETS.TRAIN,
        filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
        min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
        if cfg.MODEL.KEYPOINT_ON else 0,
        proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN
        if cfg.MODEL.LOAD_PROPOSALS else None,
    )

    # CSD: subsample the dataset if needed
    dataset = check_subsample_dataset(dataset, cfg)

    if comm.is_main_process():  # Log counts
        logger = setup_logger(name=__name__)
        logger.debug("Number of images in the dataset: {}".format(
            len(dataset)))
        _log_api_usage("dataset." + cfg.DATASETS.TRAIN[0])

    # Original code
    mapper = DatasetMapper(cfg, True)

    sampler = TrainingSampler(len(dataset))

    dataset = DatasetFromList(dataset, copy=False)
    dataset = MapDataset(dataset, mapper)
    sampler = TrainingSampler(len(dataset))
    assert isinstance(sampler, torch.utils.data.sampler.Sampler)

    return build_batch_data_loader(
        dataset,
        sampler,
        cfg.SOLVER.IMS_PER_BATCH,
        aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
    )

Example #19

0

Show file

File: custom_dataset_dataloader.py Project: xychen9459/UniDet

def build_custom_train_loader(cfg, mapper=None):
    """
    Modified from detectron2.data.build.build_custom_train_loader, but supports
    different samplers
    """
    dataset_dicts = get_detection_dataset_dicts(
        cfg.DATASETS.TRAIN,
        filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
        min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
        if cfg.MODEL.KEYPOINT_ON else 0,
        proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN
        if cfg.MODEL.LOAD_PROPOSALS else None,
    )
    dataset = DatasetFromList(dataset_dicts, copy=False)

    if mapper is None:
        mapper = DatasetMapper(cfg, True)
    dataset = MapDataset(dataset, mapper)

    sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
    logger = logging.getLogger(__name__)
    logger.info("Using training sampler {}".format(sampler_name))
    # TODO avoid if-else?
    if sampler_name == "TrainingSampler":
        sampler = TrainingSampler(len(dataset))
    elif sampler_name == "RepeatFactorTrainingSampler":
        repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency(
            dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD)
        sampler = RepeatFactorTrainingSampler(repeat_factors)
    elif sampler_name == "ClassAwareSampler":
        sampler = ClassAwareSampler(dataset_dicts)
    else:
        raise ValueError("Unknown training sampler: {}".format(sampler_name))

    return build_batch_data_loader(
        dataset,
        sampler,
        cfg.SOLVER.IMS_PER_BATCH,
        aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
    )

Example #20

0

Show file

def build_classification_test_loader(cfg, dataset_name, mapper=None):
    """
    Similar to `build_detection_train_loader`.
    But this function uses the given `dataset_name` argument (instead of the names in cfg),
    and uses batch size 1.

    Args:
        cfg: a detectron2 CfgNode
        dataset_name (str): a name of the dataset that's available in the DatasetCatalog
        mapper (callable): a callable which takes a sample (dict) from dataset
           and returns the format to be consumed by the model.
           By default it will be `DatasetMapper(cfg, False)`.

    Returns:
        DataLoader: a torch DataLoader, that loads the given detection
        dataset, with test-time transformation and batching.
    """
    dataset_dicts = get_classification_dataset_dicts([dataset_name])

    dataset = DatasetFromList(dataset_dicts)
    if mapper is None:
        mapper = DatasetMapper(cfg, False)
    dataset = MapDataset(dataset, mapper)

    sampler = samplers.InferenceSampler(len(dataset))
    # Always use 1 image per worker during inference since this is the
    # standard when reporting inference time in papers.
    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler,
                                                          1,
                                                          drop_last=False)

    data_loader = torch.utils.data.DataLoader(
        dataset,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
        batch_sampler=batch_sampler,
        collate_fn=trivial_batch_collator,
    )
    return data_loader

Example #21

0

Show file

File: DefaultTrain.py Project: josepharneil/sharks

def my_build_detection_train_loader(cfg,
                                    mapper=None,
                                    isShuffleData=True,
                                    curriculum_fraction=0):
    """
    A data loader is created by the following steps:

    1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts.
    2. Start workers to work on the dicts. Each worker will:

       * Map each metadata dict into another format to be consumed by the model.
       * Batch them by simply putting dicts into a list.

    The batched ``list[mapped_dict]`` is what this dataloader will return.

    Args:
        cfg (CfgNode): the config
        mapper (callable): a callable which takes a sample (dict) from dataset and
            returns the format to be consumed by the model.
            By default it will be `DatasetMapper(cfg, True)`.

    Returns:
        an infinite iterator of training data
    """
    num_workers = get_world_size()
    images_per_batch = cfg.SOLVER.IMS_PER_BATCH
    assert (
        images_per_batch % num_workers == 0
    ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
        images_per_batch, num_workers)
    assert (
        images_per_batch >= num_workers
    ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
        images_per_batch, num_workers)
    images_per_worker = images_per_batch // num_workers

    dataset_dicts = get_detection_dataset_dicts(
        cfg.DATASETS.TRAIN,
        filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
        min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
        if cfg.MODEL.KEYPOINT_ON else 0,
        proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN
        if cfg.MODEL.LOAD_PROPOSALS else None,
    )
    dataset = DatasetFromList(dataset_dicts, copy=False)

    if mapper is None:
        mapper = DatasetMapper(cfg, True)
    dataset = MapDataset(dataset, mapper)

    sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
    logger = logging.getLogger(__name__)
    logger.info("Using training sampler {}".format(sampler_name))
    if sampler_name == "TrainingSampler":
        # If the fraction is the default 0, use the whole dataset
        if (curriculum_fraction == 0):
            sampler = samplers.TrainingSampler(len(dataset),
                                               shuffle=isShuffleData)
        # If the fraction is not 0, then take that fraction of the dataset as a subset
        else:
            new_len = int(round(len(dataset) * curriculum_fraction))
            sampler = samplers.TrainingSampler(new_len, shuffle=isShuffleData)
    elif sampler_name == "RepeatFactorTrainingSampler":
        sampler = samplers.RepeatFactorTrainingSampler(
            dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD)
    else:
        raise ValueError("Unknown training sampler: {}".format(sampler_name))

    if cfg.DATALOADER.ASPECT_RATIO_GROUPING:
        data_loader = torch.utils.data.DataLoader(
            dataset,
            sampler=sampler,
            num_workers=cfg.DATALOADER.NUM_WORKERS,
            batch_sampler=None,
            collate_fn=operator.itemgetter(
                0),  # don't batch, but yield individual elements
            worker_init_fn=worker_init_reset_seed,
        )  # yield individual mapped dict
        data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker)
    else:
        batch_sampler = torch.utils.data.sampler.BatchSampler(
            sampler, images_per_worker, drop_last=True)
        # drop_last so the batch always have the same size
        data_loader = torch.utils.data.DataLoader(
            dataset,
            num_workers=cfg.DATALOADER.NUM_WORKERS,
            batch_sampler=batch_sampler,
            collate_fn=trivial_batch_collator,
            worker_init_fn=worker_init_reset_seed,
        )

    return data_loader

Example #22

0

Show file

File: data_lvis.py Project: ZZWENG/SimCLR

def get_lvis_train_dataloader(cfg, h, w):
    default_mapper = DatasetMapper(cfg, is_train=True)
    mapper = partial(wrapper, default_m=default_mapper, h=h, w=w)
    dl = build_detection_train_loader(cfg, mapper=mapper)
    return dl

Example #23

0

Show file

File: trainer.py Project: wangerxiao001/unbiased-teacher

    def build_hooks(self):
        cfg = self.cfg.clone()
        cfg.defrost()
        cfg.DATALOADER.NUM_WORKERS = 0  # save some memory and time for PreciseBN

        ret = [
            hooks.IterationTimer(),
            hooks.LRScheduler(self.optimizer, self.scheduler),
            hooks.PreciseBN(
                # Run at the same freq as (but before) evaluation.
                cfg.TEST.EVAL_PERIOD,
                self.model,
                # Build a new data loader to not affect training
                self.build_train_loader(cfg),
                cfg.TEST.PRECISE_BN.NUM_ITER,
            ) if cfg.TEST.PRECISE_BN.ENABLED and get_bn_modules(self.model)
            else None,
        ]

        # Do PreciseBN before checkpointer, because it updates the model and need to
        # be saved by checkpointer.
        # This is not always the best: if checkpointing has a different frequency,
        # some checkpoints may have more precise statistics than others.
        if comm.is_main_process():
            ret.append(
                hooks.PeriodicCheckpointer(self.checkpointer,
                                           cfg.SOLVER.CHECKPOINT_PERIOD))

        def test_and_save_results_student():
            self._last_eval_results_student = self.test(self.cfg, self.model)
            _last_eval_results_student = {
                k + "_student": self._last_eval_results_student[k]
                for k in self._last_eval_results_student.keys()
            }
            return _last_eval_results_student

        def test_and_save_results_teacher():
            self._last_eval_results_teacher = self.test(
                self.cfg, self.model_teacher)
            return self._last_eval_results_teacher

        ret.append(
            hooks.EvalHook(cfg.TEST.EVAL_PERIOD,
                           test_and_save_results_student))
        ret.append(
            hooks.EvalHook(cfg.TEST.EVAL_PERIOD,
                           test_and_save_results_teacher))

        if cfg.TEST.VAL_LOSS:  # default is True # save training time if not applied
            ret.append(
                LossEvalHook(
                    cfg.TEST.EVAL_PERIOD,
                    self.model,
                    build_detection_test_loader(
                        self.cfg,
                        self.cfg.DATASETS.TEST[0],
                        DatasetMapper(self.cfg, True),
                    ),
                    model_output="loss_proposal",
                    model_name="student",
                ))

            ret.append(
                LossEvalHook(
                    cfg.TEST.EVAL_PERIOD,
                    self.model_teacher,
                    build_detection_test_loader(
                        self.cfg,
                        self.cfg.DATASETS.TEST[0],
                        DatasetMapper(self.cfg, True),
                    ),
                    model_output="loss_proposal",
                    model_name="",
                ))

        if comm.is_main_process():
            # run writers in the end, so that evaluation metrics are written
            ret.append(hooks.PeriodicWriter(self.build_writers(), period=20))
        return ret

Example #24

0

Show file

File: data_lvis.py Project: ZZWENG/SimCLR

def get_lvis_test_dataloader(cfg, h, w):
    default_mapper = DatasetMapper(cfg, is_train=False)
    mapper = partial(wrapper, default_m=default_mapper, h=h, w=w)
    dl = build_detection_test_loader(cfg, 'lvis_v0.5_val', mapper=mapper)
    return dl

Example #25

0

Show file

File: build.py Project: ubc-vision/UniT

def build_classification_train_loader(cfg, mapper=None, multiplier=1):
    """
    A data loader is created by the following steps:
    1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts.
    2. Start workers to work on the dicts. Each worker will:
       * Map each metadata dict into another format to be consumed by the model.
       * Batch them by simply putting dicts into a list.
    The batched ``list[mapped_dict]`` is what this dataloader will return.
    Args:
        cfg (CfgNode): the config
        mapper (callable): a callable which takes a sample (dict) from dataset and
            returns the format to be consumed by the model.
            By default it will be `DatasetMapper(cfg, True)`.
    Returns:
        an infinite iterator of training data
    """
    num_workers = get_world_size()
    images_per_batch = cfg.SOLVER.IMS_PER_BATCH
    sample_num = cfg.DATASETS.WEAK_CLASSIFIER_SAMPLE_NUM
    assert (
        images_per_batch % num_workers == 0
    ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
        images_per_batch, num_workers)
    assert (
        images_per_batch >= num_workers
    ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
        images_per_batch, num_workers)
    images_per_worker = images_per_batch // num_workers
    images_per_worker = int(images_per_worker * multiplier)
    if sample_num > 0:
        np.random.seed(cfg.DATASETS.SAMPLE_SEED)
        print("Setting sampling seed:", cfg.DATASETS.SAMPLE_SEED)
        dataset_names = cfg.DATASETS.CLASSIFIER_TRAIN
        if isinstance(dataset_names, str):
            dataset_names = [dataset_names]
        dataset_dicts = [
            DatasetCatalog.get(dataset_name) for dataset_name in dataset_names
        ]
        dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))
        label_to_annotation_dict = {
            e: []
            for e in range(cfg.MODEL.ROI_HEADS.NUM_CLASSES)
        }
        for e in dataset_dicts:
            per_label_record = {}
            for ann in e['annotations']:
                if ann['category_id'] in per_label_record:
                    per_label_record[ann['category_id']]['annotations'].append(
                        ann)
                else:
                    record = copy.deepcopy(e)
                    # filter annotations
                    annotations_filtered = [ann]
                    record['annotations'] = annotations_filtered
                    per_label_record[ann['category_id']] = record
            for key in per_label_record.keys():
                label_to_annotation_dict[key].append(per_label_record[key])

        label_to_annotation_dict_sampled = {}
        for id_class, ann_list in label_to_annotation_dict.items():
            if id_class in cfg.DATASETS.FEWSHOT.BASE_CLASSES_ID:
                if not cfg.DATASETS.OVER_SAMPLE:
                    if cfg.DATASETS.BASE_MULTIPLIER > 0:
                        try:
                            ann_list_sampled = np.random.choice(
                                ann_list,
                                size=int(sample_num *
                                         cfg.DATASETS.BASE_MULTIPLIER),
                                replace=False)
                        except:
                            ann_list_sampled = np.random.choice(
                                ann_list,
                                size=int(sample_num *
                                         cfg.DATASETS.BASE_MULTIPLIER),
                                replace=True)
                    else:
                        ann_list_sampled = ann_list
                else:
                    print("BASE OVER SAMPLING")
                    ann_list_sampled = ann_list
                label_to_annotation_dict_sampled[id_class] = ann_list_sampled
            else:
                if not cfg.DATASETS.OVER_SAMPLE:
                    if cfg.DATASETS.BASE_MULTIPLIER > 0:
                        try:
                            ann_list_sampled = np.random.choice(
                                ann_list, size=sample_num, replace=False)
                        except:
                            ann_list_sampled = np.random.choice(
                                ann_list, size=sample_num, replace=True)
                        if cfg.DATASETS.NOVEL_MULTIPLER > 0:
                            ann_list_sampled = np.repeat(
                                ann_list_sampled, cfg.DATASETS.NOVEL_MULTIPLER)
                    else:
                        ann_list_sampled = []

                else:
                    try:
                        ann_list_sampled_temp = np.random.choice(
                            ann_list, size=sample_num, replace=False)
                        if not cfg.DATASETS.SAMPLE_WITH_REPLACEMENT:
                            print("OVER SAMPLING")
                            ann_list_sampled = np.random.choice(
                                ann_list_sampled_temp,
                                size=len(ann_list),
                                replace=True)
                        else:
                            ann_list_sampled_temp = np.random.choice(
                                ann_list, size=sample_num, replace=False)
                            num_repeat = len(ann_list) // len(
                                ann_list_sampled_temp)
                            num_remainder = len(ann_list) % len(
                                ann_list_sampled_temp)
                            ann_list_sampled = np.repeat(
                                ann_list_sampled_temp, num_repeat)
                            if num_remainder > 0:
                                ann_list_sampled = np.hstack(
                                    (ann_list_sampled,
                                     np.random.choice(ann_list_sampled_temp,
                                                      size=num_remainder,
                                                      replace=True)))
                            print("OVER SAMPLING FIXED NEW",
                                  len(ann_list_sampled_temp),
                                  len(ann_list_sampled))
                    except:
                        ann_list_sampled = ann_list
                label_to_annotation_dict_sampled[id_class] = ann_list_sampled
        dataset_dicts = []
        for k, v in label_to_annotation_dict_sampled.items():
            dataset_dicts.extend(v)

        DatasetCatalog.register("classifier_train_sampled",
                                lambda: dataset_dicts)
        MetadataCatalog.get("classifier_train_sampled").set(
            thing_classes=MetadataCatalog.get(dataset_names[0]).thing_classes,
            evaluator_type='pascal_voc')
        dataset_name = ('classifier_train_sampled', )
        # print([(x['image_id'], len(x['annotations'])) for x in dataset_dicts[:50]])
        # print_instances_class_histogram_1(dataset_dicts, MetadataCatalog.get(dataset_names[0]).thing_classes)
    else:
        dataset_name = cfg.DATASETS.CLASSIFIER_TRAIN

    dataset_dicts = get_detection_dataset_dicts(
        dataset_name,
        filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
        min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
        if cfg.MODEL.KEYPOINT_ON else 0,
        proposal_files=cfg.DATASETS.PROPOSAL_FILES_CLASSIFIER_TRAIN
        if cfg.MODEL.LOAD_PROPOSALS else None)

    dataset = DatasetFromList(dataset_dicts, copy=False)
    # # filtering
    # dataset_filtered = []
    # for sample in dataset:
    #     e_class_ids = set([e['category_id'] for e in sample['annotations']])
    #     for e_class_ids_ in e_class_ids:
    #         if e_class_ids_ in cfg.DATASETS.FEWSHOT.NOVEL_CLASSES_ID:
    #             dataset_filtered.append(sample)
    #             break
    # dataset = dataset_filtered
    if mapper is None:
        mapper = DatasetMapper(cfg, True)
    dataset = MapDataset(dataset, mapper)

    sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
    logger = logging.getLogger(__name__)
    logger.info("Using training sampler {}".format(sampler_name))
    if sampler_name == "TrainingSampler":
        sampler = samplers.TrainingSampler(len(dataset))
    elif sampler_name == "RepeatFactorTrainingSampler":
        sampler = samplers.RepeatFactorTrainingSampler(
            dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD)
    else:
        raise ValueError("Unknown training sampler: {}".format(sampler_name))

    if cfg.DATALOADER.ASPECT_RATIO_GROUPING:
        data_loader = torch.utils.data.DataLoader(
            dataset,
            sampler=sampler,
            num_workers=cfg.DATALOADER.NUM_WORKERS,
            batch_sampler=None,
            collate_fn=operator.itemgetter(
                0),  # don't batch, but yield individual elements
            worker_init_fn=worker_init_reset_seed,
        )  # yield individual mapped dict
        data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker)
    else:
        batch_sampler = torch.utils.data.sampler.BatchSampler(
            sampler, images_per_worker, drop_last=True)
        # drop_last so the batch always have the same size
        data_loader = torch.utils.data.DataLoader(
            dataset,
            num_workers=cfg.DATALOADER.NUM_WORKERS,
            batch_sampler=batch_sampler,
            collate_fn=trivial_batch_collator,
            worker_init_fn=worker_init_reset_seed,
        )

    return data_loader

Example #26

0

Show file

File: build.py Project: ubc-vision/UniT

def build_detection_query_loader(cfg,
                                 dataset_names_tuple,
                                 dataset_proposal_files_tuple,
                                 mapper=None,
                                 is_train=True):
    """
    - Modified from detectron2.data.build_detection_train_loader
    - `dataset_names_tuple`: since we need to provide dataset names using
      different variables (meta-setup) and cfg could not be modified
      (CfgNode is immutable)
    - `is_train`: will create duplicated entries according to annotations
      So if an image contains five annotations, it will appear in the dataset
      five times with different annotations
    """

    num_workers = get_world_size()
    images_per_batch = cfg.SOLVER.IMS_PER_BATCH
    assert (
        images_per_batch % num_workers == 0
    ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
        images_per_batch, num_workers)
    assert (
        images_per_batch >= num_workers
    ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
        images_per_batch, num_workers)
    images_per_worker = images_per_batch // num_workers

    dataset_dicts = get_detection_dataset_dicts(
        dataset_names_tuple,
        filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
        min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
        if cfg.MODEL.KEYPOINT_ON else 0,
        proposal_files=dataset_proposal_files_tuple
        if cfg.MODEL.LOAD_PROPOSALS else None,
    )
    logger = logging.getLogger(__name__)

    # Train: split annotations class-wise
    if is_train:
        print(
            "Query dataset num instances before annotation-wise duplication: {}"
            .format(len(dataset_dicts)))
        dataset_dicts = duplicate_data_acc_to_annotation_categories(
            dataset_dicts)
        print(
            "Query dataset num instances after annotation-wise duplication: {}"
            .format(len(dataset_dicts)))

    dataset = DatasetFromList(dataset_dicts, copy=False)

    if mapper is None:
        mapper = DatasetMapper(cfg, is_train=is_train)
    dataset = MapDataset(dataset, mapper)

    sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
    logger.info("Using training sampler {}".format(sampler_name))
    if sampler_name == "TrainingSampler":
        sampler = samplers.TrainingSampler(len(dataset))
    elif sampler_name == "RepeatFactorTrainingSampler":
        sampler = samplers.RepeatFactorTrainingSampler(
            dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD)
    else:
        raise ValueError("Unknown training sampler: {}".format(sampler_name))

    if cfg.DATALOADER.ASPECT_RATIO_GROUPING:
        data_loader = torch.utils.data.DataLoader(
            dataset,
            sampler=sampler,
            num_workers=cfg.DATALOADER.NUM_WORKERS,
            batch_sampler=None,
            collate_fn=operator.itemgetter(
                0),  # don't batch, but yield individual elements
            worker_init_fn=worker_init_reset_seed,
        )  # yield individual mapped dict
        data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker)
    else:
        batch_sampler = torch.utils.data.sampler.BatchSampler(
            sampler, images_per_worker, drop_last=True)
        # drop_last so the batch always have the same size
        data_loader = torch.utils.data.DataLoader(
            dataset,
            num_workers=cfg.DATALOADER.NUM_WORKERS,
            batch_sampler=batch_sampler,
            collate_fn=trivial_batch_collator,
            worker_init_fn=worker_init_reset_seed,
        )

    return data_loader

Example #27

0

Show file

def build_detection_train_loader_drop_ids(cfg, drop_image_ids, mapper=None):
    """
    A rewrite for the detectron2.data.build.build_detection_train_loader
    function, as it supports drop images of certian_ids specified by
    drop_image_ids.

    Returns:
        an infinite iterator of training data
    """
    num_workers = get_world_size()
    images_per_batch = cfg.SOLVER.IMS_PER_BATCH
    assert (
        images_per_batch % num_workers == 0
    ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
        images_per_batch, num_workers)
    assert (
        images_per_batch >= num_workers
    ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
        images_per_batch, num_workers)
    images_per_worker = images_per_batch // num_workers

    dataset_dicts = get_detection_dataset_dicts(
        cfg.DATASETS.TRAIN,
        filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
        min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
        if cfg.MODEL.KEYPOINT_ON else 0,
        proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN
        if cfg.MODEL.LOAD_PROPOSALS else None,
    )

    dataset = DatasetFromList(
        [dd for dd in dataset_dicts if dd['image_id'] not in drop_image_ids],
        copy=False)

    if mapper is None:
        mapper = DatasetMapper(cfg, True)
    dataset = MapDataset(dataset, mapper)

    sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
    logger = logging.getLogger(__name__)
    logger.info("Using training sampler {}".format(sampler_name))
    if sampler_name == "TrainingSampler":
        sampler = samplers.TrainingSampler(len(dataset))
    elif sampler_name == "RepeatFactorTrainingSampler":
        sampler = samplers.RepeatFactorTrainingSampler(
            dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD)
    else:
        raise ValueError("Unknown training sampler: {}".format(sampler_name))

    if cfg.DATALOADER.ASPECT_RATIO_GROUPING:
        data_loader = torch.utils.data.DataLoader(
            dataset,
            sampler=sampler,
            num_workers=cfg.DATALOADER.NUM_WORKERS,
            batch_sampler=None,
            collate_fn=operator.itemgetter(
                0),  # don't batch, but yield individual elements
            worker_init_fn=worker_init_reset_seed,
        )  # yield individual mapped dict
        data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker)
    else:
        batch_sampler = torch.utils.data.sampler.BatchSampler(
            sampler, images_per_worker, drop_last=True)
        # drop_last so the batch always have the same size
        data_loader = torch.utils.data.DataLoader(
            dataset,
            num_workers=cfg.DATALOADER.NUM_WORKERS,
            batch_sampler=batch_sampler,
            collate_fn=trivial_batch_collator,
            worker_init_fn=worker_init_reset_seed,
        )

    return data_loader

Example #28

0

Show file

def test_recognition(model_yaml, checkpoint, dataset, annotations, imagedir, outdir=None, use_rpn=False, record_individual_scores=False):
    """
    Computes detections and uses them to compute recognition accuracies.

    Arguments:
        model_yaml: Path to model config in yaml format.
        dataset: Dataset name, used to name output files.
        annotations: Path to ground truth annotations (json file with COCO-style annotations).
        imagedir: Path to image directory.
        outdir: Directory where output files are stored. When None is passed, the output directory specified in the model's config file is used.
        use_rpn: If True, the region proposal network of the model is used instead of bounding boxes from the ground truth.
    """

    # Register testset
    register_coco_instances(dataset, {}, annotations, imagedir)

    # Load model
    cfg = get_cfg()
    with open(model_yaml) as f:
        cfg = cfg.load_cfg(f)

    print("Numer of classes: {}".format(cfg.MODEL.ROI_HEADS.NUM_CLASSES))

    cfg.DATASETS.TEST = (dataset,)
    model = build_model(cfg)

    # Create outdir
    if outdir == None:
        outdir = cfg.OUTPUT_DIR

    pathlib.Path(outdir).mkdir(exist_ok=True)

    print("Evaluation output directory: " + outdir)

    # Create data loader
    if not use_rpn:
        # returns a list of dicts. Every entry in the list corresponds to one sample, represented by a dict.
        dataset_dicts = detectron2.data.get_detection_dataset_dicts(dataset)

        # add proposal boxes
        for i, s in enumerate(dataset_dicts):
            s["proposal_boxes"] = np.array([ ann["bbox"] for ann in dataset_dicts[i]["annotations"] ]) # np.array([[xmin, ymin, xmax, ymax],[xmin, ymin, xmax, ...], ...]) # kx4 matrix for k proposed bounding boxes
            s["proposal_objectness_logits"] = np.full((s["proposal_boxes"].shape[0],), 10) # logit of 10 is 99.999...%
            s["proposal_bbox_mode"] = detectron2.structures.BoxMode.XYWH_ABS # 1 # (x0, y0, w, h) in absolute floating points coordinates 
        
        print("Proposal boxes added.")

        model.proposal_generator = None # deactivate such that precomputed proposals are used
        print("Region proposal deactivated, ground truth bounding boxes are used.")

        val_loader = build_detection_test_loader(dataset_dicts, mapper=DatasetMapper(is_train=False, augmentations=[], image_format= cfg.INPUT.FORMAT, precomputed_proposal_topk=500))
    else:
        val_loader = build_detection_test_loader(cfg, dataset)

    # load model state (weights) from checkpoint
    DetectionCheckpointer(model).load(checkpoint)

    # evaluate detections
    evaluator = COCOEvaluator(dataset, ("bbox",), False, output_dir=outdir)
    result = inference_on_dataset(model, val_loader, evaluator)
    print_csv_format(result)

    with open(os.path.join(outdir, "evaluation_" + dataset + ".json"), "w") as outfile:
            json.dump(result, outfile)

    # compute accuracies
    detection2accuracy(detections=os.path.join(outdir,"coco_instances_results.json"), groundtruth=annotations, outdir=outdir, record_individual_scores=record_individual_scores)

Example #29

0

Show file

File: train_net.py Project: DeqiangWang/MyD2Ext

 def build_train_loader(cls, cfg):
     return build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True, augmentations=build_train_aug(cfg)))

Example #30

0

Show file

File: train_net.py Project: DeqiangWang/MyD2Ext

 def build_test_loader(cls, cfg, dataset_name):
     return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False))