def get_coco_train(batch_size, opts, cfg_file):
    cfg = setup(opts, cfg_file)
    dataset_dicts = get_detection_dataset_dicts(
        cfg.DATASETS.TRAIN,
        filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
        min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
        if cfg.MODEL.KEYPOINT_ON else 0,
        proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN
        if cfg.MODEL.LOAD_PROPOSALS else None,
    )

    dataset = DatasetFromList(dataset_dicts, copy=False)

    mapper = DatasetMapper(cfg, True)
    dataset = MapDataset(dataset, mapper)

    sampler = samplers.TrainingSampler(len(dataset))
    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler,
                                                          batch_size,
                                                          drop_last=True)
    # drop_last so the batch always have the same size
    train_loader = torch.utils.data.DataLoader(
        dataset,
        num_workers=4,
        batch_sampler=batch_sampler,
        collate_fn=trivial_batch_collator,
        worker_init_fn=worker_init_reset_seed,
    )

    return train_loader
def get_coco_test(batch_size, opts, cfg_file):
    cfg = setup(opts, cfg_file)
    dataset_name = 'coco_2017_val_panoptic_separated'
    dataset_dicts = get_detection_dataset_dicts(
        [dataset_name],
        filter_empty=False,
        proposal_files=None,
    )

    dataset = DatasetFromList(dataset_dicts)
    mapper = DatasetMapper(cfg, False)
    dataset = MapDataset(dataset, mapper)

    sampler = samplers.InferenceSampler(len(dataset))
    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler,
                                                          batch_size,
                                                          drop_last=False)

    data_loader = torch.utils.data.DataLoader(
        dataset,
        num_workers=4,
        batch_sampler=batch_sampler,
        collate_fn=trivial_batch_collator,
    )
    return data_loader
def build_detection_test_loader(cfg, dataset_name, batch_size, mapper=None):

    dataset_dicts = get_detection_dataset_dicts(
        [dataset_name],
        filter_empty=False,
        proposal_files=[
            cfg.DATASETS.PROPOSAL_FILES_TEST[list(
                cfg.DATASETS.TEST).index(dataset_name)]
        ] if cfg.MODEL.LOAD_PROPOSALS else None,
    )

    dataset = DatasetFromList(dataset_dicts)
    if mapper is None:
        mapper = DatasetMapper(cfg, False)
    dataset = MapDataset(dataset, mapper)

    sampler = samplers.InferenceSampler(len(dataset))
    # Always use 1 image per worker during inference since this is the
    # standard when reporting inference time in papers.
    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler,
                                                          batch_size,
                                                          drop_last=False)

    data_loader = torch.utils.data.DataLoader(
        dataset,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
        batch_sampler=batch_sampler,
        collate_fn=_trivial_batch_collator,
    )
    return data_loader
Example #4
0
def build_sequence_loader(cfg,
                          dataset_name,
                          mapper,
                          total_samples,
                          batch_size=1):
    """
    Similar to `build_detection_test_loader` in the way that its sampler
        samples dataset_dicts in order and only loops once.
    """
    dataset_dicts = DatasetCatalog.get(dataset_name)
    dataset = DatasetFromList(dataset_dicts)
    dataset = MapDataset(dataset, mapper)

    interval = max(1, int(len(dataset) / total_samples))
    sampler = IntervalSampler(len(dataset), interval)
    batch_sampler = BatchSampler(sampler, batch_size, drop_last=False)

    def _trivial_batch_collator(batch):
        return batch

    data_loader = torch.utils.data.DataLoader(
        dataset,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
        batch_sampler=batch_sampler,
        collate_fn=_trivial_batch_collator,
    )
    return data_loader
Example #5
0
def do_test(cfg, model):
    results = OrderedDict()
    for dataset_name in cfg.DATASETS.TEST:
        # data_loader = build_detection_test_loader(cfg, dataset_name)
        if 'build_detection_test_loader':
            if 'get_detection_dataset_dicts':
                descs_valid: List[Dict] = DatasetCatalog.get(dataset_name)
            # validation dataset is too large.
            random.seed(2020)
            descs_valid = random.sample(descs_valid, k=10)
            dataset = DatasetFromList(descs_valid)
            if 'DatasetMapper':
                mapper = make_mapper(dataset_name,
                                     is_train=False,
                                     augmentations=None)
            dataset = MapDataset(dataset, mapper)

            sampler = InferenceSampler(len(dataset))
            # Always use 1 image per worker during inference since this is the
            # standard when reporting inference time in papers.
            batch_sampler = torch.utils.data.sampler.BatchSampler(
                sampler, 1, drop_last=False)

            data_loader = torch.utils.data.DataLoader(
                dataset,
                num_workers=cfg.DATALOADER.NUM_WORKERS,
                batch_sampler=batch_sampler,
                collate_fn=trivial_batch_collator,
            )

        evaluator = get_evaluator2(
            cfg, dataset_name,
            os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name))
        global EVALUATOR
        EVALUATOR = evaluator

        results_i = inference_on_dataset(model, data_loader, evaluator)
        results[dataset_name] = results_i
        # TODO: Multiprocessing?
        if comm.is_main_process():
            logger.info("Evaluation results for {} in csv format:".format(
                dataset_name))
            if 'print_csv_format(results_i)':
                for tsk, res in results_i.items():
                    global RES
                    RES = res
                    res_df = pd.DataFrame(pd.Series(res, name='value'))
                    res_df = res_df[res_df['value'].notna()]
                    # res_df = res_df[res_df['value'] > 0]
                    res_df.index = res_df.index.map(
                        lambda x: '/'.join(x.split('/')[1:]))
                    pd.set_option('display.max_rows', None)
                    print(res_df)
                    pd.reset_option('display.max_rows')
Example #6
0
    def test_iter_style(self):
        class DS(torch.utils.data.IterableDataset):
            def __iter__(self):
                yield from [1, 2, 3]

        ds = DS()
        ds = MapDataset(ds, TestMapDataset.map_func)
        self.assertIsInstance(ds, torch.utils.data.IterableDataset)

        data = list(iter(ds))
        self.assertEqual(data, [2, 6])
Example #7
0
def build_detection_train_loader(
    dataset,
    *,
    mapper,
    sampler=None,
    total_batch_size,
    aspect_ratio_grouping=True,
    num_workers=0,
):
    """
    Build a dataloader for object detection with some default features.
    This interface is experimental.

    Args:
        dataset (list or torch.utils.data.Dataset): a list of dataset dicts,
            or a map-style pytorch dataset. They can be obtained by using
            :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
        mapper (callable): a callable which takes a sample (dict) from dataset and
            returns the format to be consumed by the model.
            When using cfg, the default choice is ``DatasetMapper(cfg, is_train=True)``.
        sampler (torch.utils.data.sampler.Sampler or None): a sampler that
            produces indices to be applied on ``dataset``.
            Default to :class:`TrainingSampler`, which coordinates a random shuffle
            sequence across all workers.
        total_batch_size (int): total batch size across all workers. Batching
            simply puts data into a list.
        aspect_ratio_grouping (bool): whether to group images with similar
            aspect ratio for efficiency. When enabled, it requires each
            element in dataset be a dict with keys "width" and "height".
        num_workers (int): number of parallel data loading workers

    Returns:
        torch.utils.data.DataLoader: a dataloader. Each output from it is a
            ``list[mapped_element]`` of length ``total_batch_size / num_workers``,
            where ``mapped_element`` is produced by the ``mapper``.
    """
    if isinstance(dataset, list):
        dataset = DatasetFromList(dataset, copy=False)
    if mapper is not None:
        dataset = MapDataset(dataset, mapper)
    if sampler is None:
        sampler = TrainingSampler(len(dataset))
    assert isinstance(sampler, torch.utils.data.sampler.Sampler)
    return build_batch_data_loader(
        dataset,
        sampler,
        total_batch_size,
        aspect_ratio_grouping=aspect_ratio_grouping,
        num_workers=num_workers,
    )
Example #8
0
def do_test(cfg, model):
    for dataset_name in cfg.DATASETS.TEST:
        # data_loader = build_detection_test_loader(cfg, dataset_name)
        if 'build_detection_test_loader':
            if dataset_name == 'coco_2017_val':
                dicts_valid: List[Dict] = DatasetCatalog.get(dataset_name)
                if "filter_empty and has_instances":
                    ...
                ds_valid = DatasetFromList(dicts_valid, copy=False)
                mapper = DatasetMapper(cfg, is_train=False)
            else:  # Open-Image-Dataset
                if 'get_detection_dataset_dicts':
                    descs_get: List[Dict] = DatasetCatalog.get(dataset_name)
                # validation dataset is too large.
                random.seed(2020)
                descs_valid = random.choices(descs_get, k=N_IMAGES_PER_TEST)
                # TODO: clear cache.
                ds_valid = DatasetFromList(descs_valid)
                if 'DatasetMapper':
                    mapper = make_mapper(dataset_name, is_train=False, augmentations=None)

            ds_valid = MapDataset(ds_valid, mapper)

            sampler = InferenceSampler(len(ds_valid))
            # Always use 1 image per worker during inference since this is the
            # standard when reporting inference time in papers.
            batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)

            data_loader = torch.utils.data.DataLoader(
                ds_valid,
                num_workers=cfg.DATALOADER.NUM_WORKERS,
                batch_sampler=batch_sampler,
                collate_fn=trivial_batch_collator,
            )

        evaluator = get_evaluator2(
            cfg, dataset_name, os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
        )

        results_i = inference_on_dataset(model, data_loader, evaluator)
        if comm.is_main_process():
            logger.info("Evaluation results for {} in csv format:".format(dataset_name))
            # print_csv_format(results_i)
            for tsk, res in results_i.items():
                res_df = pd.DataFrame(pd.Series(res, name='value'))
                res_df = res_df[res_df['value'].notna()]
                res_df.index = res_df.index.map(lambda x: '/'.join(x.split('/')[1:]))
                pd.set_option('display.max_rows', None)
                print(res_df)
                pd.reset_option('display.max_rows')
Example #9
0
def build_hand_train_loader(cfg):
    dataset_dicts, num_per_epoch = load_hand(cfg.HAND_PROJECT.DATA.MODE, cfg.HAND_PROJECT.DATA.ANNOT_SUBSET_TRAIN, cfg.HAND_PROJECT.DATA.BASE_PATH, selects=cfg.HAND_PROJECT.DATA.SELECTS)
    
    # pdb.set_trace()
    dataset = DatasetFromList(dataset_dicts, copy=False)
    mapper = DatasetMapper(cfg, True)
    dataset = MapDataset(dataset, mapper)

    sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
    logger = logging.getLogger(__name__)
    logger.info("Using training sampler {}".format(sampler_name))
    
    sampler = TrainingSampler(len(dataset))
    
    return build_batch_data_loader(dataset, sampler, cfg.SOLVER.IMS_PER_BATCH, num_workers=cfg.DATALOADER.NUM_WORKERS), num_per_epoch
Example #10
0
def build_simple_dataloader(dataset_name: list, batch_size):
    dataset_dicts = get_detection_dataset_dicts(dataset_name)
    dataset = DatasetFromList(dataset_dicts, copy=False)

    cfg = get_cfg()
    cfg["aug_kwargs"] = {}

    dataset = MapDataset(dataset, AlbumentationsMapper(cfg, False))

    # set the shuffle to False in debugging mode
    sampler = TrainingSampler(len(dataset), shuffle=False, seed=42)
    dataloader = build_batch_data_loader(dataset=dataset, sampler=sampler,
                                         total_batch_size=batch_size)

    return dataloader
Example #11
0
def build_detection_train_loader_with_train_sampler(cfg, mapper, seed=42, shuffle=True):
    dataset_dicts = get_detection_dataset_dicts(cfg.DATASETS.TRAIN)
    dataset = DatasetFromList(dataset_dicts, copy=False)
    dataset = MapDataset(dataset, mapper)

    logger = logging.getLogger(__name__)
    logger.info("Using training sampler TrainingSampler with shuffle=False")
    sampler = TrainingSampler(len(dataset), shuffle=shuffle, seed=seed)

    return build_batch_data_loader(
        dataset,
        sampler,
        cfg.SOLVER.IMS_PER_BATCH,
        aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
    )
Example #12
0
def build_x_train_loader(
    dataset, *, mapper, sampler=None, total_batch_size, aspect_ratio_grouping=True, num_workers=0
):
    if isinstance(dataset, list):
        dataset = DatasetFromList(dataset, copy=False)
    if mapper is not None:
        dataset = MapDataset(dataset, mapper)
    if sampler is None:
        sampler = TrainingSampler(len(dataset))
    assert isinstance(sampler, torch.utils.data.sampler.Sampler)
    return build_batch_data_loader(
        dataset,
        sampler,
        total_batch_size,
        aspect_ratio_grouping=aspect_ratio_grouping,
        num_workers=num_workers,
    )
Example #13
0
def build_hand_test_loader(cfg):
    """
    """
    dataset_dicts, num_per_epoch = load_hand("test", cfg.HAND_PROJECT.DATA.ANNOT_SUBSET_TRAIN, cfg.HAND_PROJECT.DATA.BASE_PATH, selects=[1])

    dataset = DatasetFromList(dataset_dicts)
    mapper = DatasetMapper(cfg, False)
    dataset = MapDataset(dataset, mapper)

    sampler = InferenceSampler(len(dataset))
    # Always use 1 image per worker during inference since this is the
    # standard when reporting inference time in papers.
    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, 1, drop_last=False)

    # data_loader = torch.utils.data.DataLoader(dataset, num_workers=cfg.DATALOADER.NUM_WORKERS, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator)
    data_loader = torch.utils.data.DataLoader(dataset, num_workers=0, batch_sampler=batch_sampler, collate_fn=trivial_batch_collator)
    return data_loader
def build_detection_test_loader(cfg, dataset_name, batch_size, mapper=None):
    """
  Similar to `build_detection_train_loader`.
  But this function uses the given `dataset_name` argument (instead of the names in cfg),
  and uses batch size 1.

  Args:
      cfg: a detectron2 CfgNode
      dataset_name (str): a name of the dataset that's available in the DatasetCatalog
      mapper (callable): a callable which takes a sample (dict) from dataset
         and returns the format to be consumed by the model.
         By default it will be `DatasetMapper(cfg, False)`.

  Returns:
      DataLoader: a torch DataLoader, that loads the given detection
      dataset, with test-time transformation and batching.
  """
    dataset_dicts = get_detection_dataset_dicts(
        [dataset_name],
        filter_empty=False,
        proposal_files=[
            cfg.DATASETS.PROPOSAL_FILES_TEST[list(
                cfg.DATASETS.TEST).index(dataset_name)]
        ] if cfg.MODEL.LOAD_PROPOSALS else None,
    )

    dataset = DatasetFromList(dataset_dicts)
    if mapper is None:
        mapper = DatasetMapper(cfg, False)
    dataset = MapDataset(dataset, mapper)

    sampler = samplers.InferenceSampler(len(dataset))
    # Always use 1 image per worker during inference since this is the
    # standard when reporting inference time in papers.
    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler,
                                                          batch_size,
                                                          drop_last=False)

    data_loader = torch.utils.data.DataLoader(
        dataset,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
        batch_sampler=batch_sampler,
        collate_fn=trivial_batch_collator,
    )
    return data_loader
Example #15
0
def derender_dataset(cfg, dataset_names, attributes,for_inference=False):
    print("reading datasets {}".format(dataset_names))
    start = time.time()
    dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in dataset_names]
    dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts))

    required_fields = ["pred_box"] if cfg.DATASETS.USE_PREDICTED_BOXES else ["bbox"]
    required_fields += ["attributes"] if not for_inference else []
    _, dataset_dicts = image_based_to_annotation_based(dataset_dicts, required_fields)

    dataset = DatasetFromList(dataset_dicts, copy=False)
    mapper = DerenderMapper(cfg.DATASETS.USE_PREDICTED_BOXES,
                            attributes,
                            for_inference,
                            use_depth=cfg.DATASETS.USE_DEPTH)
    dataset = MapDataset(dataset, mapper)
    print("done after {}".format(time.time()-start))
    return dataset
Example #16
0
def build_detection_test_loader(dataset, *, mapper, num_workers=0):
    """
    Similar to `build_detection_train_loader`, but uses a batch size of 1.
    This interface is experimental.

    Args:
        dataset (list or torch.utils.data.Dataset): a list of dataset dicts,
            or a map-style pytorch dataset. They can be obtained by using
            :func:`DatasetCatalog.get` or :func:`get_detection_dataset_dicts`.
        mapper (callable): a callable which takes a sample (dict) from dataset
           and returns the format to be consumed by the model.
           When using cfg, the default choice is ``DatasetMapper(cfg, is_train=False)``.
        num_workers (int): number of parallel data loading workers

    Returns:
        DataLoader: a torch DataLoader, that loads the given detection
        dataset, with test-time transformation and batching.

    Examples:
    ::
        data_loader = build_detection_test_loader(
            DatasetRegistry.get("my_test"),
            mapper=DatasetMapper(...))

        # or, instantiate with a CfgNode:
        data_loader = build_detection_test_loader(cfg, "my_test")
    """
    if isinstance(dataset, list):
        dataset = DatasetFromList(dataset, copy=False)
    if mapper is not None:
        dataset = MapDataset(dataset, mapper)
    sampler = InferenceSampler(len(dataset))
    # Always use 1 image per worker during inference since this is the
    # standard when reporting inference time in papers.
    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler,
                                                          1,
                                                          drop_last=False)
    data_loader = torch.utils.data.DataLoader(
        dataset,
        num_workers=num_workers,
        batch_sampler=batch_sampler,
        collate_fn=trivial_batch_collator,
    )
    return data_loader
Example #17
0
def build_classification_test_loader(cfg, dataset_name, mapper=None):

    dataset_dicts = get_classification_dataset_dicts(cfg.DATASETS.TEST)
    dataset = DatasetFromList(dataset_dicts, copy=False)

    if mapper is None:
        mapper = ClsDatasetMapper(cfg, False)  # False means Not is_training
    dataset = MapDataset(dataset, mapper)

    sampler = samplers.InferenceSampler(len(dataset))
    batch_sampler = torch.utils.data.sampler.BatchSampler(sampler,
                                                          1,
                                                          drop_last=False)
    data_loader = torch.utils.data.DataLoader(
        dataset,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
        batch_sampler=batch_sampler,
        collate_fn=trivial_batch_collator,
    )
    return data_loader
Example #18
0
def build_train_dataloader(cfg):  # like 'build_detection_train_loader'
    if 'coco_2017_train' in cfg.DATASETS.TRAIN:
        descs_train: List[Dict] = DatasetCatalog.get("coco_2017_train")
        ds_train = DatasetFromList(descs_train, copy=False)
        mapper = DatasetMapper(cfg, True)
    else:  # Open-Image-Dataset
        if 'get_detection_dataset_dicts':
            all_descs_train: List[Dict] = DatasetCatalog.get("oid_train")
        if 'rebalancing':
            image_id_vs_idx = {}
            for idx, desc in enumerate(all_descs_train):
                image_id_vs_idx[desc['image_id']] = idx
            descs_train = list(map(lambda img_id: all_descs_train[image_id_vs_idx[img_id]], sample_image_ids()))
            print('_' * 50 + f'train dataset len: {len(descs_train)}')

        ds_train = DatasetFromList(descs_train, copy=False)

        if 'DatasetMapper':
            augs = [RandomContrast(0.8, 1.2),
                    RandomBrightness(0.8, 1.2),
                    RandomSaturation(0.8, 1.2)]
            augs.extend(build_augmentation(cfg, is_train=True))
            mapper = make_mapper('oid_train', is_train=True, augmentations=T.AugmentationList(augs))
    ds_train = MapDataset(ds_train, mapper)

    sampler = TrainingSampler(len(ds_train))
    data_loader = build_batch_data_loader(
        ds_train,
        sampler,
        cfg.SOLVER.IMS_PER_BATCH,
        aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING,
        num_workers=cfg.DATALOADER.NUM_WORKERS,
    )
    global DATA_LOADER
    DATA_LOADER = data_loader
    return data_loader
Example #19
0
            if "filter_empty and has_instances":
                ...
            dataset = DatasetFromList(dicts_valid, copy=False)
            mapper = DatasetMapper(cfg, is_train=False)
        else:  # Open-Image-Dataset
            if 'get_detection_dataset_dicts':
                descs_valid: List[Dict] = DatasetCatalog.get(dataset_name)
            # # validation dataset is too large.
            # descs_valid = random.choices(descs_valid, k=200)
            dataset = DatasetFromList(descs_valid)
            if 'DatasetMapper':
                mapper = make_mapper(dataset_name,
                                     is_train=False,
                                     augmentations=None)

        dataset = MapDataset(dataset, mapper)

        sampler = RandomSampler(dataset)
        # Always use 1 image per worker during inference since this is the
        # standard when reporting inference time in papers.
        batch_sampler = torch.utils.data.sampler.BatchSampler(sampler,
                                                              1,
                                                              drop_last=False)

        data_loader = torch.utils.data.DataLoader(
            dataset,
            num_workers=cfg.DATALOADER.NUM_WORKERS,
            batch_sampler=batch_sampler,
            collate_fn=trivial_batch_collator,
        )
def write_with_inferred_attributes(cfg, split, attributes_key):
    timer = CodeTimer(
        "adding inferred attributes split:{}, attributes_key:{}".format(
            split, attributes_key))
    module_cfg = os.path.join(cfg.TRAINED_DERENDER.EXP_DIR, "cfg.yaml")
    module_cfg = load_cfg_from_file(module_cfg)
    module_cfg.MODEL.WEIGHTS = cfg.TRAINED_DERENDER.ATTRIBUTES_WEIGHTS_MAP[
        attributes_key]

    module_cfg.DATALOADER.OBJECTS_PER_BATCH = 1000 if cfg.BASE_NAME == "intphys" else 450
    module_cfg.DATALOADER.NUM_WORKERS = 8 if cfg.BASE_NAME == "adept" else module_cfg.DATALOADER.NUM_WORKERS

    if cfg.DEBUG:
        module_cfg.DATALOADER.NUM_WORKERS = 0
        module_cfg.DEBUG = True
        module_cfg.DATALOADER.OBJECTS_PER_BATCH = 50

    predictor = DerenderPredictor(module_cfg)

    # if not cfg.DEBUG:
    #     gpu_ids = [_ for _ in range(torch.cuda.device_count())]
    #     predictor.derenderer = torch.nn.parallel.DataParallel(predictor.derenderer, gpu_ids)

    dataset_name, standard_format_json_file = get_dataset_name_and_json(
        cfg, split)
    dataset = DatasetCatalog.get(dataset_name)
    required_fields = [
        "pred_box"
    ] if cfg.TRAINED_DERENDER.USE_INFERRED_BOXES else ["bbox"]
    filtered_idx, \
    mapped_dataset = image_based_to_annotation_based(dataset, required_fields)
    mapped_dataset = DatasetFromList(mapped_dataset, copy=False)
    mapper = DerenderMapper(cfg.TRAINED_DERENDER.USE_INFERRED_BOXES,
                            predictor.attributes,
                            for_inference=True,
                            use_depth=cfg.TRAINED_DERENDER.USE_DEPTH)
    mapped_dataset = MapDataset(mapped_dataset, mapper)

    data_loader = DataLoader(
        dataset=mapped_dataset,
        batch_size=module_cfg.DATALOADER.OBJECTS_PER_BATCH,
        num_workers=module_cfg.DATALOADER.NUM_WORKERS,
        shuffle=False)

    fil_pointer = 0
    with torch.no_grad():
        for inputs in data_loader:
            inputs = to_cuda(inputs)
            outputs = predictor(inputs)
            batch_size = list(outputs.values())[0].shape[0]
            for oix, (img_idx, an_idx) in zip(
                    range(batch_size),
                    filtered_idx[fil_pointer:fil_pointer + batch_size]):

                dataset[img_idx]["annotations"][an_idx][attributes_key] = \
                    {k: v[oix].item() for k, v in outputs.items()}
                # {k: v[oix].item() if v[oix].size == 1
                #                   else [float(el) for el in v[oix]]
                # for k,v in outputs.items()}

            fil_pointer = fil_pointer + batch_size

    dataset = [fix_for_serialization(d) for d in dataset]

    with open(standard_format_json_file, "w") as f:
        json.dump(dataset, f, indent=4)

    timer.done()
Example #21
0
 def test_pickleability(self):
     ds = DatasetFromList([1, 2, 3])
     ds = MapDataset(ds, lambda x: x * 2)
     ds = pickle.loads(pickle.dumps(ds))
     self.assertEqual(ds[0], 2)
Example #22
0
    #     model.to(torch.device(cfg.MODEL.DEVICE))

    if 'do-train':
        ...
        if 'build_detection_train_loader':
            # all dataset_dicts w.r.t cfg.DATASETS.TRAIN will be flattened.
            dataset_dicts = get_detection_dataset_dicts(
                cfg.DATASETS.TRAIN,
                filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
                min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
                if cfg.MODEL.KEYPOINT_ON
                else 0,
                proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
            )
            dataset = DatasetFromList(dataset_dicts, copy=False)
            dataset = MapDataset(dataset, DatasetMapper(cfg, True))

            sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
            logger = logging.getLogger(__name__)
            logger.info("Using training sampler {}".format(sampler_name))
            # TODO avoid if-else?
            if sampler_name == "TrainingSampler":
                sampler = TrainingSampler(len(dataset))
            else:
                raise ValueError("Unknown training sampler: {}".format(sampler_name))
            data_loader = build_batch_data_loader(
                dataset,
                sampler,
                cfg.SOLVER.IMS_PER_BATCH,
                aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING,
                num_workers=cfg.DATALOADER.NUM_WORKERS,
Example #23
0
 def test_map_style(self):
     ds = DatasetFromList([1, 2, 3])
     ds = MapDataset(ds, TestMapDataset.map_func)
     self.assertEqual(ds[0], 2)
     self.assertEqual(ds[2], 6)
     self.assertIn(ds[1], [2, 6])
Example #24
0
def build_classification_train_loader(cfg, mapper=None):
    """
    Build a classification data loader from cfg.

    Returns:
        list[dict]: Each dict contains,
        * image: Tensor, image in (C, H, W) format.
        * label (optional): int, groundtruth class
    """

    num_workers = get_world_size()
    images_per_batch = cfg.SOLVER.IMS_PER_BATCH
    assert (
        images_per_batch % num_workers == 0
    ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
        images_per_batch, num_workers)
    assert (
        images_per_batch >= num_workers
    ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
        images_per_batch, num_workers)
    images_per_worker = images_per_batch // num_workers

    dataset_dicts = get_classification_dataset_dicts(cfg.DATASETS.TRAIN)
    dataset = DatasetFromList(dataset_dicts, copy=False)

    if mapper is None:
        mapper = ClsDatasetMapper(cfg, True)
    dataset = MapDataset(dataset, mapper)

    sampler_name = cfg.DATALOADER.SAMPLER_TRAIN
    logger = logging.getLogger(__name__)
    logger.info("Using training sampler {}".format(sampler_name))
    if sampler_name == "TrainingSampler":
        sampler = samplers.TrainingSampler(len(dataset))
    elif sampler_name == "RepeatFactorTrainingSampler":
        sampler = samplers.RepeatFactorTrainingSampler(
            dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD)
    else:
        raise ValueError("Unknown training sampler: {}".format(sampler_name))

    if cfg.DATALOADER.ASPECT_RATIO_GROUPING:
        data_loader = torch.utils.data.DataLoader(
            dataset,
            sampler=sampler,
            num_workers=cfg.DATALOADER.NUM_WORKERS,
            batch_sampler=None,
            collate_fn=operator.itemgetter(
                0),  # don't batch, but yield individual elements
            worker_init_fn=worker_init_reset_seed,
        )  # yield individual mapped dict
        data_loader = AspectRatioGroupedDataset(data_loader, images_per_worker)
    else:
        batch_sampler = torch.utils.data.sampler.BatchSampler(
            sampler, images_per_worker, drop_last=True)
        # drop_last so the batch always have the same size
        data_loader = torch.utils.data.DataLoader(
            dataset,
            num_workers=cfg.DATALOADER.NUM_WORKERS,
            batch_sampler=batch_sampler,
            collate_fn=trivial_batch_collator,
            worker_init_fn=worker_init_reset_seed,
        )

    return data_loader
Example #25
0
    if 'setup(args)':
        cfg = get_cfg()
        cfg.merge_from_file(args.config_file)
        cfg.merge_from_list(args.opts)
        cfg.freeze()
        default_setup(
            cfg, args
        )  # if you don't like any of the default setup, write your own setup code

    if 'build dataloader':
        file_paths = glob.glob(DS_DIR + 'test/*.jpg')
        # import numpy as np
        # file_paths = np.random.choice(file_paths, size=200, replace=False)
        ds = DatasetFromList(file_paths)
        ds = MapDataset(ds, map_func=kaggle_mapper)

        sampler = InferenceSampler(len(ds))
        batch_sampler = torch.utils.data.sampler.BatchSampler(sampler,
                                                              BATCH_SIZE,
                                                              drop_last=False)

        data_loader = torch.utils.data.DataLoader(
            ds,
            batch_sampler=batch_sampler,
            collate_fn=trivial_batch_collator,
        )

    if 'create model and load weights':
        meta_arch = cfg.MODEL.META_ARCHITECTURE
        model = META_ARCH_REGISTRY.get(meta_arch)(cfg)