def __init__(self,
                 size: int,
                 shuffle: bool = True,
                 seed: Optional[int] = None):
        """
        Args:
            size (int): the total number of data of the underlying dataset to sample from
            shuffle (bool): whether to shuffle the indices or not
            seed (int): the initial seed of the shuffle. Must be the same
                across all workers. If None, will use a random seed shared
                among workers (require synchronization among all workers).
        """
        if not isinstance(size, int):
            raise TypeError(
                f"TrainingSampler(size=) expects an int. Got type {type(size)}."
            )
        if size <= 0:
            raise ValueError(
                f"TrainingSampler(size=) expects a positive int. Got {size}.")
        self._size = size
        self._shuffle = shuffle
        if seed is None:
            seed = comm.shared_random_seed()
        self._seed = int(seed)

        self._rank = comm.get_rank()
        self._world_size = comm.get_world_size()
Beispiel #2
0
    def __init__(self, cfg, sizes, dataset_dicts, seed: Optional[int] = None):
        """
        Args:
            size (int): the total number of data of the underlying dataset to sample from
            seed (int): the initial seed of the shuffle. Must be the same
                across all workers. If None, will use a random seed shared
                among workers (require synchronization among all workers).
        """
        self.sizes = sizes
        dataset_ratio = cfg.DATALOADER.DATASET_RATIO
        self._batch_size = cfg.SOLVER.IMS_PER_BATCH
        assert len(dataset_ratio) == len(sizes), \
            'length of dataset ratio {} should be equal to number if dataset {}'.format(
                len(dataset_ratio), len(sizes)
            )
        if seed is None:
            seed = comm.shared_random_seed()
        self._seed = int(seed)
        self._rank = comm.get_rank()
        self._world_size = comm.get_world_size()

        self._ims_per_gpu = self._batch_size // self._world_size
        self.dataset_ids = torch.tensor(
            [d['dataset_source'] for d in dataset_dicts], dtype=torch.long)

        dataset_weight = [torch.ones(s) * max(sizes) / s * r / sum(dataset_ratio) \
            for i, (r, s) in enumerate(zip(dataset_ratio, sizes))]
        dataset_weight = torch.cat(dataset_weight)
        self.weights = dataset_weight
        self.sample_epoch_size = len(self.weights)
    def __init__(self, dataset_dicts, seed: Optional[int] = None):
        """
        """
        self._size = len(dataset_dicts)
        assert self._size > 0
        if seed is None:
            seed = comm.shared_random_seed()
        self._seed = int(seed)

        self._rank = comm.get_rank()
        self._world_size = comm.get_world_size()
        self.weights = self._get_class_balance_factor(dataset_dicts)
Beispiel #4
0
    def __init__(self, dataset_dicts, seed: Optional[int] = None):
        """
        Args:
            size (int): the total number of data of the underlying dataset to sample from
            seed (int): the initial seed of the shuffle. Must be the same
                across all workers. If None, will use a random seed shared
                among workers (require synchronization among all workers).
        """
        self._size = len(dataset_dicts)
        assert self._size > 0
        if seed is None:
            seed = comm.shared_random_seed()
        self._seed = int(seed)

        self._rank = comm.get_rank()
        self._world_size = comm.get_world_size()
        self.weights = self._get_class_balance_factor(dataset_dicts)
Beispiel #5
0
    def __init__(self, size: int, shuffle: bool = True, seed: Optional[int] = None):
        """
        Args:
            size (int): the total number of data of the underlying data to sample from
            shuffle (bool): whether to shuffle the indices or not
            seed (int): the initial seed of the shuffle. Must be the same
                across all workers. If None, will use a random seed shared
                among workers (require synchronization among all workers).
        """
        self._size = size
        assert size > 0
        self._shuffle = shuffle
        if seed is None:
            seed = comm.shared_random_seed()
        self._seed = int(seed)

        self._rank = comm.get_rank()
        self._world_size = comm.get_world_size()
    def __init__(self, repeat_factors, *, shuffle=True, seed=None):
        """
        Args:
            repeat_factors (Tensor): a float vector, the repeat factor for each indice. When it's
                full of ones, it is equivalent to ``TrainingSampler(len(repeat_factors), ...)``.
            shuffle (bool): whether to shuffle the indices or not
            seed (int): the initial seed of the shuffle. Must be the same
                across all workers. If None, will use a random seed shared
                among workers (require synchronization among all workers).
        """
        self._shuffle = shuffle
        if seed is None:
            seed = comm.shared_random_seed()
        self._seed = int(seed)

        self._rank = comm.get_rank()
        self._world_size = comm.get_world_size()

        # Split into whole number (_int_part) and fractional (_frac_part) parts.
        self._int_part = torch.trunc(repeat_factors)
        self._frac_part = repeat_factors - self._int_part
    def __init__(
        self, cfg, dataset_dicts, batch_size, shuffle=True, seed=None
    ):
        """
        Args:
            cfg: config parameters
            dataset_dicts (list[dict]): annotations in Detectron2 dataset format.
            batch_size (int): Size of mini-batch.
            shuffle (bool): whether to shuffle the indices or not
            seed (int): the initial seed of the shuffle. Must be the same
                across all workers. If None, will use a random seed shared
                among workers (require synchronization among all workers).
        """
        self._offset = cfg.DATALOADER.PAIR_OFFSET_RANGE

        self._shuffle = shuffle
        if seed is None:
            seed = comm.shared_random_seed()
        self._seed = int(seed)
        # only sample the previous frame during eval

        self._rank = comm.get_rank()
        self._world_size = comm.get_world_size()

        self._total_size = len(dataset_dicts)
        total_batch_size = batch_size * self._world_size
        self._size = (
            len(dataset_dicts) // total_batch_size
        ) * total_batch_size
        self._batch_size = batch_size
        self.num_per_worker = self._size // self._world_size

        self._dataset_dicts = dataset_dicts
        self._data_by_video = {}
        for i, data in enumerate(dataset_dicts):
            data["total_idx"] = i
            if data["video_id"] in self._data_by_video:
                self._data_by_video[data["video_id"]][data["index"]] = data
            else:
                self._data_by_video[data["video_id"]] = {data["index"]: data}
    def __init__(
        self,
        size: int,
        subset_ratio: float,
        shuffle: bool = True,
        seed_shuffle: Optional[int] = None,
        seed_subset: Optional[int] = None,
    ):
        """
        Args:
            size (int): the total number of data of the underlying dataset to sample from
            subset_ratio (float): the ratio of subset data to sample from the underlying dataset
            shuffle (bool): whether to shuffle the indices or not
            seed_shuffle (int): the initial seed of the shuffle. Must be the same
                across all workers. If None, will use a random seed shared
                among workers (require synchronization among all workers).
            seed_subset (int): the seed to randomize the subset to be sampled.
                Must be the same across all workers. If None, will use a random seed shared
                among workers (require synchronization among all workers).
        """
        super().__init__(size=size, shuffle=shuffle, seed=seed_shuffle)

        assert 0.0 < subset_ratio <= 1.0
        self._size_subset = int(size * subset_ratio)
        assert self._size_subset > 0
        if seed_subset is None:
            seed_subset = comm.shared_random_seed()
        self._seed_subset = int(seed_subset)

        # randomly generate the subset indexes to be sampled from
        g = torch.Generator()
        g.manual_seed(self._seed_subset)
        indexes_randperm = torch.randperm(self._size, generator=g)
        self._indexes_subset = indexes_randperm[:self._size_subset]

        logger.info("Using RandomSubsetTrainingSampler......")
        logger.info(
            f"Randomly sample {self._size_subset} data from the original {self._size} data"
        )
Beispiel #9
0
    def __init__(self, dataset_dicts, repeat_thresh, shuffle=True, seed=None):
        """
        Args:
            dataset_dicts (list[dict]): annotations in Detectron2 dataset format.
            repeat_thresh (float): frequency threshold below which data is repeated.
            shuffle (bool): whether to shuffle the indices or not
            seed (int): the initial seed of the shuffle. Must be the same
                across all workers. If None, will use a random seed shared
                among workers (require synchronization among all workers).
        """
        self._shuffle = shuffle
        if seed is None:
            seed = comm.shared_random_seed()
        self._seed = int(seed)

        self._rank = comm.get_rank()
        self._world_size = comm.get_world_size()

        # Get fractional repeat factors and split into whole number (_int_part)
        # and fractional (_frac_part) parts.
        rep_factors = self._get_repeat_factors(dataset_dicts, repeat_thresh)
        self._int_part = torch.trunc(rep_factors)
        self._frac_part = rep_factors - self._int_part
Beispiel #10
0
def shared_random_seed():
    return comm.shared_random_seed()