def test_equal_gpu_allocation(num_workers, num_gpus_per_worker): def train_fn(): import os from pyspark import BarrierTaskContext context = BarrierTaskContext.get() cuda_state = os.environ['CUDA_VISIBLE_DEVICES'] if cuda_state: num_gpus = len(os.environ['CUDA_VISIBLE_DEVICES'].split(',')) else: num_gpus = 0 return [int(e) for e in context.allGather(str(num_gpus))] runner = MirroredStrategyRunner(num_slots=2) assert runner.get_num_tasks() == 1 gpus_used_by_each_task = runner.run(train_fn) assert gpus_used_by_each_task == [2] runner = MirroredStrategyRunner(num_slots=4) assert runner.get_num_tasks() == 1 gpus_used_by_each_task = runner.run(train_fn) assert gpus_used_by_each_task == [4] runner = MirroredStrategyRunner(num_slots=6) assert runner.get_num_tasks() == 2 gpus_used_by_each_task = runner.run(train_fn) assert gpus_used_by_each_task == [3, 3] runner = MirroredStrategyRunner(num_slots=8) assert runner.get_num_tasks() == 2 gpus_used_by_each_task = runner.run(train_fn) assert gpus_used_by_each_task == [4, 4]
def test_spark_task_cuda_devices_env_support(num_workers, num_gpus_per_worker): def train_fn(): import os return os.environ['CUDA_VISIBLE_DEVICES'] for num_slots in [2, 3, 4]: runner = MirroredStrategyRunner(num_slots=num_slots) task_cuda_env = runner.run(train_fn) gpu_set = {int(i) for i in task_cuda_env.split(',')} assert len(gpu_set) == num_slots for gpu_id in gpu_set: assert gpu_id in [10, 11, 12, 13]
def test_cpu_training_with_gpus(num_workers, num_gpus_per_worker): def train_fn(): from pyspark import BarrierTaskContext context = BarrierTaskContext.get() cuda_state = os.environ['CUDA_VISIBLE_DEVICES'] if cuda_state: num_gpus = len(os.environ['CUDA_VISIBLE_DEVICES'].split(',')) else: num_gpus = 0 return [int(e) for e in context.allGather(str(num_gpus))] runner = MirroredStrategyRunner(num_slots=2, use_gpu=False) assert runner.get_num_tasks() == 2 gpus_used_by_each_task = runner.run(train_fn) assert gpus_used_by_each_task == [0, 0]
def test_equal_gpu_allocation(num_workers, num_gpus_per_worker): def train_fn(): import os from pyspark import BarrierTaskContext context = BarrierTaskContext.get() cuda_state = os.environ['CUDA_VISIBLE_DEVICES'] if cuda_state: num_gpus = len(os.environ['CUDA_VISIBLE_DEVICES'].split(',')) else: num_gpus = 0 return [int(e) for e in context.allGather(str(num_gpus))] for num_slots in [2, 4, 6, 8]: runner = MirroredStrategyRunner(num_slots=num_slots) task_gpu_amount = int( runner.sc.getConf().get('spark.task.resource.gpu.amount')) expected_num_task = math.ceil(num_slots / task_gpu_amount) assert runner.get_num_tasks() == expected_num_task gpus_used_by_each_task = runner.run(train_fn) assert gpus_used_by_each_task == [(num_slots // expected_num_task) + (i < (num_slots % expected_num_task)) for i in range(expected_num_task)]