def __init__(self, dataset, batch_size=1, world_size=None, rank=None, seed=0, shuffle=True): _rank, _world_size = get_dist_info() if world_size is None: world_size = _world_size if rank is None: rank = _rank self.rank = rank self.world_size = world_size self.dataset = dataset self.batch_size = batch_size # In distributed sampling, different ranks should sample # non-overlapped data in the dataset. Therefore, this function # is used to make sure that each rank shuffles the data indices # in the same order based on the same seed. Then different ranks # could use different indices to select non-overlapped data from the # same data list. self.seed = sync_random_seed(seed) self.shuffle = shuffle assert hasattr(self.dataset, 'flag') self.flag = self.dataset.flag self.group_sizes = np.bincount(self.flag) # buffer used to save indices of each group self.buffer_per_group = {k: [] for k in range(len(self.group_sizes))} self.size = len(dataset) self.indices = self._indices_of_rank()
def __init__(self, dataset, batch_size=1, world_size=None, rank=None, seed=0, shuffle=True): _rank, _world_size = get_dist_info() if world_size is None: world_size = _world_size if rank is None: rank = _rank self.rank = rank self.world_size = world_size self.dataset = dataset self.batch_size = batch_size # In distributed sampling, different ranks should sample # non-overlapped data in the dataset. Therefore, this function # is used to make sure that each rank shuffles the data indices # in the same order based on the same seed. Then different ranks # could use different indices to select non-overlapped data from the # same data list. self.seed = sync_random_seed(seed) self.shuffle = shuffle self.size = len(dataset) self.indices = self._indices_of_rank()
def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True, seed=0): super().__init__(dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle) # In distributed sampling, different ranks should sample # non-overlapped data in the dataset. Therefore, this function # is used to make sure that each rank shuffles the data indices # in the same order based on the same seed. Then different ranks # could use different indices to select non-overlapped data from the # same data list. self.seed = sync_random_seed(seed)
def __init__(self, dataset, samples_per_gpu=1, num_replicas=None, rank=None, seed=0, num_sample_class=1): _rank, _num_replicas = get_dist_info() if num_replicas is None: num_replicas = _num_replicas if rank is None: rank = _rank self.dataset = dataset self.num_replicas = num_replicas self.samples_per_gpu = samples_per_gpu self.rank = rank self.epoch = 0 # Must be the same across all workers. If None, will use a # random seed shared among workers # (require synchronization among all workers) self.seed = sync_random_seed(seed) # The number of samples taken from each per-label list assert num_sample_class > 0 and isinstance(num_sample_class, int) self.num_sample_class = num_sample_class # Get per-label image list from dataset assert hasattr(dataset, 'get_cat2imgs'), \ 'dataset must have `get_cat2imgs` function' self.cat_dict = dataset.get_cat2imgs() self.num_samples = int( math.ceil( len(self.dataset) * 1.0 / self.num_replicas / self.samples_per_gpu)) * self.samples_per_gpu self.total_size = self.num_samples * self.num_replicas # get number of images containing each category self.num_cat_imgs = [len(x) for x in self.cat_dict.values()] # filter labels without images self.valid_cat_inds = [ i for i, length in enumerate(self.num_cat_imgs) if length != 0 ] self.num_classes = len(self.valid_cat_inds)