def __init__(self, dataset, num_replicas=None, rank=None, batch_size=None):
     warnings.warn(
         "SequentialDistributedSampler is deprecated and will be removed in v5 of Transformers.",
         FutureWarning,
     )
     if num_replicas is None:
         if not dist.is_available():
             raise RuntimeError(
                 "Requires distributed package to be available")
         num_replicas = dist.get_world_size()
     if rank is None:
         if not dist.is_available():
             raise RuntimeError(
                 "Requires distributed package to be available")
         rank = dist.get_rank()
     self.dataset = dataset
     self.num_replicas = num_replicas
     self.rank = rank
     num_samples = len(self.dataset)
     # Add extra samples to make num_samples a multiple of batch_size if passed
     if batch_size is not None:
         self.num_samples = int(
             math.ceil(num_samples /
                       (batch_size * num_replicas))) * batch_size
     else:
         self.num_samples = int(math.ceil(num_samples / num_replicas))
     self.total_size = self.num_samples * self.num_replicas
     self.batch_size = batch_size
Esempio n. 2
0
    def __init__(
        self,
        batch_size: int,
        dataset: Optional[Dataset] = None,
        num_replicas: Optional[int] = None,
        rank: Optional[int] = None,
        seed: int = 0,
        drop_last: bool = False,
        lengths: Optional[List[int]] = None,
        model_input_name: Optional[str] = None,
    ):
        if dataset is None and lengths is None:
            raise ValueError("One of dataset and lengths must be provided.")
        if num_replicas is None:
            if not dist.is_available():
                raise RuntimeError(
                    "Requires distributed package to be available")
            num_replicas = dist.get_world_size()
        if rank is None:
            if not dist.is_available():
                raise RuntimeError(
                    "Requires distributed package to be available")
            rank = dist.get_rank()

        self.batch_size = batch_size
        self.num_replicas = num_replicas
        self.rank = rank
        self.epoch = 0
        self.drop_last = drop_last

        if lengths is None:
            model_input_name = model_input_name if model_input_name is not None else "input_ids"
            if (not (isinstance(dataset[0], dict)
                     or isinstance(dataset[0], BatchEncoding))
                    or model_input_name not in dataset[0]):
                raise ValueError(
                    "Can only automatically infer lengths for datasets whose items are dictionaries with an "
                    f"'{model_input_name}' key.")
            lengths = [len(feature[model_input_name]) for feature in dataset]
        self.lengths = lengths

        # If the dataset length is evenly divisible by # of replicas, then there
        # is no need to drop any data, since the dataset will be split equally.
        if self.drop_last and len(self.lengths) % self.num_replicas != 0:
            # Split to nearest available length that is evenly divisible.
            # This is to ensure each rank receives the same amount of data when
            # using this Sampler.
            self.num_samples = math.ceil(
                (len(self.lengths) - self.num_replicas) / self.num_replicas)
        else:
            self.num_samples = math.ceil(len(self.lengths) / self.num_replicas)
        self.total_size = self.num_samples * self.num_replicas
        self.seed = seed
Esempio n. 3
0
 def __init__(self, dataset, num_replicas=None, rank=None):
     if num_replicas is None:
         if not dist.is_available():
             raise RuntimeError("Requires distributed package to be available")
         num_replicas = dist.get_world_size()
     if rank is None:
         if not dist.is_available():
             raise RuntimeError("Requires distributed package to be available")
         rank = dist.get_rank()
     self.dataset = dataset
     self.num_replicas = num_replicas
     self.rank = rank
     self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
     self.total_size = self.num_samples * self.num_replicas
def all_reduce_item(value, op='sum'):
    """
    All-reduces single scalar value if distributed is in use
    """
    if dist.is_available() and dist.is_initialized():
        if op == 'sum' or op == 'mean':
            dop = dist.ReduceOp.SUM
        elif op == 'min':
            dop = dist.ReduceOp.MIN
        elif op == 'max':
            dop = dist.ReduceOp.MAX
        elif op == 'product':
            dop = dist.ReduceOp.PRODUCT
        else:
            raise RuntimeError('Unsupported reduce op')

        # backend = dist.get_backend()
        # if backend == dist.Backend.NCCL:
        #     device = torch.device('cuda')
        # elif backend == dist.Backend.GLOO:
        #     device = torch.device('cpu')
        # else:
        #     raise RuntimeError('Unsupported distributed backend')

        device = torch.device('cuda')
        tensor = torch.tensor(value, device=device)
        dist.all_reduce(tensor, dop)
        if op == 'mean':
            tensor /= get_world_size()
        ret = tensor.item()
    else:
        ret = value
    return ret
def get_rank():
    """
    Gets distributed rank or returns zero if distributed is not initialized.
    """
    if dist.is_available() and dist.is_initialized():
        rank = dist.get_rank()
    else:
        rank = 0
    return rank
def get_world_size():
    """
    Gets total number of distributed workers or returns one if distributed is
    not initialized.
    """
    if dist.is_available() and dist.is_initialized():
        world_size = dist.get_world_size()
    else:
        world_size = 1
    return world_size
 def __init__(self, dataset, num_replicas=None, rank=None, batch_size=None):
     if num_replicas is None:
         if not dist.is_available():
             raise RuntimeError("Requires distributed package to be available")
         num_replicas = dist.get_world_size()
     if rank is None:
         if not dist.is_available():
             raise RuntimeError("Requires distributed package to be available")
         rank = dist.get_rank()
     self.dataset = dataset
     self.num_replicas = num_replicas
     self.rank = rank
     num_samples = len(self.dataset)
     # Add extra samples to make num_samples a multiple of batch_size if passed
     if batch_size is not None:
         self.num_samples = int(math.ceil(num_samples / (batch_size * num_replicas))) * batch_size
     else:
         self.num_samples = int(math.ceil(num_samples / num_replicas))
     self.total_size = self.num_samples * self.num_replicas
     self.batch_size = batch_size
def barrier():
    """
    Call dist.barrier() if distributed is in use
    """
    if dist.is_available() and dist.is_initialized():
        dist.barrier()