def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): if num_replicas is None: if not dist.is_available(): raise RuntimeError("Requires distributed package to be available") num_replicas = dist.get_world_size() if rank is None: if not dist.is_available(): raise RuntimeError("Requires distributed package to be available") rank = dist.get_rank() self.dataset = dataset self.num_replicas = num_replicas self.rank = rank self.epoch = 0 self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) self.total_size = self.num_samples * self.num_replicas self.shuffle = True
INIT_METHOD = os.getenv("INIT_METHOD", "env://") MASTER_PORT = "29500" DEFAULT_TIMEOUT = 300 CUSTOMIZED_TIMEOUT = {"test_DistributedDataParallel": 500} def get_timeout(test_id): test_name = test_id.split(".")[-1] if test_name in CUSTOMIZED_TIMEOUT: return CUSTOMIZED_TIMEOUT[test_name] else: return DEFAULT_TIMEOUT if not dist.is_available(): print("Distributed not available, skipping tests") sys.exit(0) SKIP_IF_NO_CUDA_EXIT_CODE = 75 SKIP_IF_NO_GPU_EXIT_CODE = 76 SKIP_IF_SMALL_WORLDSIZE_EXIT_CODE = 77 SKIP_IF_BACKEND_UNAVAILABLE = 78 def skip_if_no_cuda_distributed(func): func.skip_if_no_cuda_distributed = True @wraps(func) def wrapper(*args, **kwargs): if not torch.cuda.is_available():