Exemple #1
0
 def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
     if num_replicas is None:
         if not dist.is_available():
             raise RuntimeError("Requires distributed package to be available")
         num_replicas = dist.get_world_size()
     if rank is None:
         if not dist.is_available():
             raise RuntimeError("Requires distributed package to be available")
         rank = dist.get_rank()
     self.dataset = dataset
     self.num_replicas = num_replicas
     self.rank = rank
     self.epoch = 0
     self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
     self.total_size = self.num_samples * self.num_replicas
     self.shuffle = True
INIT_METHOD = os.getenv("INIT_METHOD", "env://")
MASTER_PORT = "29500"

DEFAULT_TIMEOUT = 300
CUSTOMIZED_TIMEOUT = {"test_DistributedDataParallel": 500}


def get_timeout(test_id):
    test_name = test_id.split(".")[-1]
    if test_name in CUSTOMIZED_TIMEOUT:
        return CUSTOMIZED_TIMEOUT[test_name]
    else:
        return DEFAULT_TIMEOUT


if not dist.is_available():
    print("Distributed not available, skipping tests")
    sys.exit(0)

SKIP_IF_NO_CUDA_EXIT_CODE = 75
SKIP_IF_NO_GPU_EXIT_CODE = 76
SKIP_IF_SMALL_WORLDSIZE_EXIT_CODE = 77
SKIP_IF_BACKEND_UNAVAILABLE = 78


def skip_if_no_cuda_distributed(func):
    func.skip_if_no_cuda_distributed = True

    @wraps(func)
    def wrapper(*args, **kwargs):
        if not torch.cuda.is_available():