def testGpuInvalidConfig(self): gpus = config.list_physical_devices('GPU') self.assertNotEqual(len(gpus), 0) for gpu in gpus: config.set_memory_growth(gpu, True) c = context.context().config self.assertTrue(c.gpu_options.allow_growth) with self.assertRaisesRegexp(ValueError, 'memory limit'): config.set_virtual_device_configuration(gpus[-1], [ context.VirtualDeviceConfiguration(), context.VirtualDeviceConfiguration() ]) self.assertIsNone(config.get_virtual_device_configuration(gpus[-1])) config.set_virtual_device_configuration(gpus[-1], [ context.VirtualDeviceConfiguration(memory_limit=10), context.VirtualDeviceConfiguration(memory_limit=10) ]) c = context.context().config self.assertFalse(c.gpu_options.allow_growth) with self.assertRaisesRegexp(ValueError, 'virtual devices'): config.set_memory_growth(gpus[-1], False)
def testCpuMultiple(self): cpus = config.list_physical_devices('CPU') self.assertEqual(len(cpus), 1) config.set_virtual_device_configuration(cpus[0], [ context.VirtualDeviceConfiguration(), context.VirtualDeviceConfiguration() ]) context.ensure_initialized() cpus = config.list_logical_devices('CPU') self.assertEqual(len(cpus), 2) with ops.device('/device:CPU:0'): a = constant_op.constant(1.0) self.evaluate(a) with ops.device('/device:CPU:1'): b = constant_op.constant(1.0) self.evaluate(b) with self.assertRaisesRegexp(RuntimeError, 'unknown device'): with ops.device('/device:CPU:2'): c = constant_op.constant(1.0) self.evaluate(c) # Ensure we can place ops on each of the device names for cpu in cpus: with ops.device(cpu.name): d = constant_op.constant(1.0) self.evaluate(d)
def testGpuNone(self): gpus = config.list_physical_devices('GPU') self.assertGreater(len(gpus), 0) cpus = config.list_physical_devices('CPU') self.assertEqual(len(cpus), 1) self.assertEqual(len(config.get_visible_devices('CPU')), 1) self.assertGreater(len(config.get_visible_devices('GPU')), 0) config.set_visible_devices(cpus[0]) self.assertEqual(len(config.get_visible_devices('CPU')), 1) self.assertEqual(len(config.get_visible_devices('GPU')), 0) with self.assertRaisesRegexp(RuntimeError, 'unknown device'): with ops.device('/device:GPU:0'): a = constant_op.constant(1.0) self.evaluate(a)
def testV1CompatibilityDummyInivisibleDeviceList(self): gpus = config.list_physical_devices('GPU') if gpus: self.skipTest('Test requires no GPUs') # Ensure GPU options left untouched on CPU only environments context.context()._physical_devices = None context.context()._config = config_pb2.ConfigProto( gpu_options=config_pb2.GPUOptions(visible_device_list='0')) new_config = context.context().config self.assertEqual(new_config.gpu_options.visible_device_list, '0')
def testV1Compatibility(self): # Ensure we set 1 CPU by default context.context()._config = config_pb2.ConfigProto() new_config = context.context().config self.assertEqual(new_config.device_count['CPU'], 1) context.context()._physical_devices = None # Ensure CPU is split context.context()._config = config_pb2.ConfigProto(device_count={'CPU': 2}) new_config = context.context().config self.assertEqual(new_config.device_count['CPU'], 2) context.context()._physical_devices = None # Handle empty visible device list context.context()._config = config_pb2.ConfigProto( gpu_options=config_pb2.GPUOptions(visible_device_list='')) gpus = config.list_physical_devices('GPU') gpu_count = len(gpus) new_config = context.context().config self.assertEqual(new_config.gpu_options.visible_device_list, ','.join(str(i) for i in range(len(gpus)))) context.context()._physical_devices = None # Handle invalid visible device list context.context()._config = config_pb2.ConfigProto( gpu_options=config_pb2.GPUOptions(visible_device_list=str(gpu_count))) with self.assertRaisesRegexp(ValueError, 'Invalid visible device index'): gpus = config.list_physical_devices('GPU') new_config = context.context().config context.context()._physical_devices = None # Handle single visible device list context.context()._config = config_pb2.ConfigProto( gpu_options=config_pb2.GPUOptions(visible_device_list=str(gpu_count-1))) gpus = config.list_physical_devices('GPU') new_config = context.context().config self.assertEqual(new_config.gpu_options.visible_device_list, str(gpu_count-1)) context.context()._physical_devices = None
def testGpuNone(self): gpus = config.list_physical_devices('GPU') self.assertGreater(len(gpus), 0) cpus = config.list_physical_devices('CPU') self.assertEqual(len(cpus), 1) self.assertEqual(len(config.get_visible_devices('CPU')), 1) self.assertGreater(len(config.get_visible_devices('GPU')), 0) config.set_visible_devices(cpus[0]) self.assertEqual(len(config.get_visible_devices('CPU')), 1) self.assertEqual(len(config.get_visible_devices('GPU')), 0) with self.assertRaisesRegexp(RuntimeError, 'unknown device'): with ops.device('/device:GPU:0'): a = constant_op.constant(1.0) self.evaluate(a) # Modifying the visible devices is not supported with self.assertRaisesRegexp(RuntimeError, 'cannot be modified'): config.set_visible_devices(gpus) # Setting the same visible devices is fine config.set_visible_devices(cpus[0])
def should_execute_combination(self, kwargs): distributions = [ v for v in kwargs.values() if isinstance(v, NamedDistribution) ] required_gpus = kwargs.get("required_gpus", 0) required_physical_gpus = kwargs.get("required_physical_gpus", 0) if distributions and required_gpus: raise ValueError( "Do not use `required_gpus` and arguments of type " "NamedDistribution together.") number_of_required_gpus = max( [required_gpus] + [required_physical_gpus] + [d.required_physical_gpus or 0 for d in distributions] + [d.required_gpus or 0 for d in distributions]) if (required_physical_gpus and required_gpus): raise ValueError( "Only one of `required_physical_gpus`(number of physical" " GPUs required) and `required_gpus`(total number of " "GPUs required) should be set. ") if not number_of_required_gpus and GPUCombination.GPU_TEST: return (False, "Test that doesn't require GPUs.") elif (number_of_required_gpus > 0 and context.num_gpus() < number_of_required_gpus): return (False, ("Only {} of {} required GPUs are available.".format( context.num_gpus(), number_of_required_gpus))) elif required_physical_gpus > len(config.list_physical_devices("GPU")): return (False, ( "Only {} of {} required physical GPUs are available.".format( config.list_physical_devices("GPU"), required_physical_gpus))) else: return (True, None)
def configureDevicesForMultiDeviceTest(self, num_devices): """Configures number of logical devices for multi-device tests. It returns a list of device names. If invoked in GPU-enabled runtime, the last device name will be for a GPU device. Otherwise, all device names will be for a CPU device. Args: num_devices: The number of devices to configure. Returns: A list of device names to use for a multi-device test. """ cpus = config.list_physical_devices("CPU") gpus = config.list_physical_devices("GPU") config.set_logical_device_configuration( cpus[0], [context.LogicalDeviceConfiguration() for _ in range(num_devices)]) devices = ["/device:CPU:" + str(i) for i in range(num_devices - 1)] if gpus: devices.append("/device:GPU:0") else: devices.append("/device:CPU:" + str(num_devices - 1)) return devices
def testGpuMultiple(self): gpus = config.list_physical_devices('GPU') if len(gpus) < 2: self.skipTest('Need at least 2 GPUs') context.ensure_initialized() for i in range(0, len(gpus)): with ops.device('/device:GPU:' + str(i)): a = constant_op.constant(1.0) self.evaluate(a) with self.assertRaisesRegexp(RuntimeError, 'unknown device'): with ops.device('/device:GPU:' + str(len(gpus))): a = constant_op.constant(1.0) self.evaluate(a)
def testCollectiveReduceMinMax(self): gpus = config.list_physical_devices('GPU') if len(gpus) != 1: self.skipTest('Expected 1 GPU but found {} GPUs'.format(len(gpus))) config.set_virtual_device_configuration(gpus[0], [ context.VirtualDeviceConfiguration(1024), context.VirtualDeviceConfiguration(1024) ]) context.ensure_initialized() @def_function.function def run_all_reduce(group_key, instance_key, merge_op): group_size = 2 t0 = [1., 20., 3., 40., 5.] t1 = [10., 2., 30., 4., 50.] os.environ['NCCL_DEBUG'] = 'INFO' os.environ['NCCL_LAUNCH_MODE'] = 'PARALLEL' with ops.device('/GPU:0'): in0 = constant_op.constant(t0) c0 = collective_ops.all_reduce(in0, group_size, group_key, instance_key, merge_op, final_op='Id', communication_hint='nccl') with ops.device('/GPU:1'): in1 = constant_op.constant(t1) c1 = collective_ops.all_reduce(in1, group_size, group_key, instance_key, merge_op, final_op='Id', communication_hint='nccl') return c0, c1 for combination in [('Max', [10., 20., 30., 40., 50.]), ('Min', [1., 2., 3., 4., 5.])]: merge_op = combination[0] results = run_all_reduce(group_key=10, instance_key=20, merge_op=merge_op) expected = combination[1] for result in results: self.assertAllClose(result, expected, rtol=1e-5, atol=1e-5)
def testBackward(self, data_format): with self.cached_session(): for large_batch in [False, True]: # Only run with float32, as float16 is very slow on CPUs params = self._genParams(data_format, dtypes.float32, large_batch) x, scale, offset, mean, variance, upstream_gradients = params for is_training in [False, True]: for backprop_to in [x, scale, offset]: with backprop.GradientTape(persistent=True) as tape: tape.watch(backprop_to) op_output = nn_impl.fused_batch_norm( x, scale, offset, mean, variance, data_format=data_format, is_training=is_training, exponential_avg_factor=0.99) gradient_injector_output = op_output[ 0] * upstream_gradients if (len(config.list_physical_devices('GPU')) and not is_training): # Only backprop to offset is nondeterministic (on GPU, when # is_training=False), but backprop to the other parameters is # calculated using the same kernel. with self.assertRaisesRegex( errors_impl.UnimplementedError, 'A deterministic GPU implementation of fused batch-norm' + ' backprop, when training is disabled, is not currently' + ' available.'): grad = tape.gradient(gradient_injector_output, backprop_to) self.evaluate(grad) else: grad_a = tape.gradient(gradient_injector_output, backprop_to) grad_a = self.evaluate(grad_a) for _ in range(3): grad_b = tape.gradient( gradient_injector_output, backprop_to) grad_b = self.evaluate(grad_b) self.assertAllEqual(grad_a, grad_b)
def SetVirtualCpus(num_virtual_cpus): """Create virtual CPU devices if they haven't yet been created.""" if num_virtual_cpus < 1: raise ValueError('`num_virtual_cpus` must be at least 1 not %r' % (num_virtual_cpus,)) physical_devices = device_config.list_physical_devices('CPU') if not physical_devices: raise RuntimeError('No CPUs found') configs = device_config.get_virtual_device_configuration(physical_devices[0]) if configs is None: virtual_devices = [context.VirtualDeviceConfiguration() for _ in range(num_virtual_cpus)] device_config.set_virtual_device_configuration( physical_devices[0], virtual_devices) else: if len(configs) < num_virtual_cpus: raise RuntimeError('Already configured with %d < %d virtual CPUs' % (len(configs), num_virtual_cpus))
def testTrtGraphConverter_DevicePlacement(self, device_id): """Test case for trt_convert.TrtGraphConverter().""" gpus = config.list_physical_devices("GPU") if len(gpus) < 2: self.skipTest("Expected at least 2 GPUs but found {} GPUs".format( len(gpus))) np_input1 = ops.convert_to_tensor( np.ones([4, 1, 1]).astype(np.float32)) np_input2 = ops.convert_to_tensor( np.ones([4, 1, 1]).astype(np.float32)) # Create a model and save it. input_saved_model_dir = self.mkdtemp() root = self._GetModelForV2() save.save(root, input_saved_model_dir, {_SAVED_MODEL_SIGNATURE_KEY: root.run}) converter = self._CreateConverterV2( input_saved_model_dir, precision_mode=trt_convert.TrtPrecisionMode.FP32) converted_model = None # Specify device on which converted model should be placed with ops.device(device_id): converted_model = converter.convert() # Verify that TRT engine op has the correct device. self._CheckTrtOps(converter._converted_func) actual_device_id = self._GetUniqueTRTEngineOp( converter._converted_graph_def).device expected_device_id = None if device_id is not None: expected_device_id = device_id else: expected_device_id = "GPU:0" self.assertTrue(expected_device_id.lower() in actual_device_id.lower()) del converter gc.collect() # Force GC to destroy the TRT engine cache.
def testParamResolutionAfterTimeoutV2(self): context._reset_context() timeout = 1.5 cpus = config.list_physical_devices('CPU') self.assertEqual(len(cpus), 1) config.set_logical_device_configuration(cpus[0], [ context.LogicalDeviceConfiguration(), context.LogicalDeviceConfiguration() ]) context.ensure_initialized() group_key = 20 instance_key = 30 input_data = constant_op.constant([1, 2, 3, 4]) # This timeout comes from param solution. with self.assertRaisesRegex( errors.DeadlineExceededError, 'Collective has timed out waiting for other workers'): with ops.device('CPU:0'): collective_ops.all_reduce( input_data, group_size=2, group_key=group_key, instance_key=instance_key, merge_op='Add', final_op='Id', timeout=timeout) # We launch the second device after the first device times out. This is to # simulate the situation when other workers are slow and the timeout is # short. Since the CPU:0 times out in the param resolution phase, CPU:1 # should times out as well, but in the execute phase. with self.assertRaisesRegex(errors.DeadlineExceededError, 'Collective has timed out during execution'): with ops.device('CPU:1'): collective_ops.all_reduce( input_data, group_size=2, group_key=group_key, instance_key=instance_key, merge_op='Add', final_op='Id', timeout=timeout)
def test_save_profile(self): logdir = self.get_temp_dir() profiler.start(logdir) with traceme.TraceMe('three_times_five'): three = constant_op.constant(3) five = constant_op.constant(5) product = three * five self.assertAllEqual(15, product) profiler.stop() file_list = gfile.ListDirectory(logdir) self.assertEqual(len(file_list), 2) for file_name in gfile.ListDirectory(logdir): if gfile.IsDirectory(os.path.join(logdir, file_name)): self.assertEqual(file_name, 'plugins') else: self.assertTrue(file_name.endswith('.profile-empty')) profile_dir = os.path.join(logdir, 'plugins', 'profile') run = gfile.ListDirectory(profile_dir)[0] hostname = socket.gethostname() overview_page = os.path.join(profile_dir, run, hostname + '.overview_page.pb') self.assertTrue(gfile.Exists(overview_page)) input_pipeline = os.path.join(profile_dir, run, hostname + '.input_pipeline.pb') self.assertTrue(gfile.Exists(input_pipeline)) tensorflow_stats = os.path.join(profile_dir, run, hostname + '.tensorflow_stats.pb') self.assertTrue(gfile.Exists(tensorflow_stats)) trace_file = os.path.join(profile_dir, run, hostname + '.trace') self.assertTrue(gfile.Exists(trace_file)) with gfile.Open(trace_file, 'rb') as f: profile_pb = trace_events_pb2.Trace() profile_pb.ParseFromString(f.read()) devices = frozenset(device.name for device in profile_pb.devices.values()) self.assertIn('/host:CPU', devices) if config.list_physical_devices('GPU'): self.assertIn('/device:GPU:0', devices) events = frozenset(event.name for event in profile_pb.trace_events) self.assertIn('three_times_five', events) self.assertIn('Mul:Mul', events)
def testKeepLogicalDevice(self): # Cannot change logical device after the context initialization. context._reset_context() # pylint: disable=protected-access cluster_spec = multi_worker_test_base.create_cluster_spec( has_chief=False, num_workers=1) resolver = cluster_resolver_lib.SimpleClusterResolver( cluster_spec=multi_worker_util.normalize_cluster_spec(cluster_spec), task_type='worker', task_id=0) gpus = tf_config.list_physical_devices('GPU') tf_config.set_logical_device_configuration(gpus[-1], [ context.LogicalDeviceConfiguration(64), context.LogicalDeviceConfiguration(64), ]) collective_all_reduce_strategy.CollectiveAllReduceStrategy( cluster_resolver=resolver) # Since we create two logical GPUs out of the last GPU, there should be one # more logical GPUs than physical GPUs. self.assertLen(tf_config.list_logical_devices('GPU'), len(gpus) + 1) context._reset_context() # pylint: disable=protected-access
def testCollectiveTensorsHaveNoDeviceSpecified(self): context._reset_context() cpus = config.list_physical_devices('CPU') self.assertEqual(len(cpus), 1) config.set_logical_device_configuration(cpus[0], [ context.LogicalDeviceConfiguration(), context.LogicalDeviceConfiguration() ]) context.ensure_initialized() group_size = 2 group_key = 1 instance_key = 1 @def_function.function def fn(all_args): results = [] # The inputs have no devices set. This is expected to be a trace-time # check only. self.assertEqual(all_args[0].device, '') self.assertEqual(all_args[1].device, '') with ops.device('/CPU:0'): results.append( collective_ops.all_reduce(all_args[0], group_size, group_key, instance_key, 'Add', 'Div')) with ops.device('/CPU:1'): results.append( collective_ops.all_reduce(all_args[1], group_size, group_key, instance_key, 'Add', 'Div')) return results with ops.device('/CPU:0'): in0 = constant_op.constant(1) with ops.device('/CPU:1'): in1 = constant_op.constant(3) result = fn([in0, in1]) self.assertAllClose(result, [2, 2])
def test_profile(self): profiler.start() three = constant_op.constant(3) five = constant_op.constant(5) product = three * five self.assertAllEqual(15, product) with self.assertRaises(profiler.ProfilerAlreadyRunningError): profiler.start() profile_result = profiler.stop() profile_pb = trace_events_pb2.Trace() profile_pb.ParseFromString(profile_result) devices = frozenset(device.name for device in profile_pb.devices.values()) self.assertIn('/host:CPU', devices) if config.list_physical_devices('GPU'): self.assertIn('/device:GPU:0', devices) events = frozenset(event.name for event in profile_pb.trace_events) self.assertIn('Mul:Mul', events) with self.assertRaises(profiler.ProfilerNotRunningError): profiler.stop()
def testV1Compatibility(self): # Ensure we set 1 CPU by default context.context()._config = config_pb2.ConfigProto() new_config = context.context().config self.assertEqual(new_config.device_count['CPU'], 1) context.context()._physical_devices = None # Ensure CPU is split context.context()._config = config_pb2.ConfigProto(device_count={'CPU': 2},) new_config = context.context().config self.assertEqual(new_config.device_count['CPU'], 2) context.context()._physical_devices = None # Ensure Handle visible device list parsing context.context()._config = config_pb2.ConfigProto( gpu_options=config_pb2.GPUOptions(visible_device_list='',),) gpus = config.list_physical_devices('GPU') new_config = context.context().config self.assertEqual(new_config.gpu_options.visible_device_list, ','.join(str(i) for i in range(len(gpus)))) context.context()._physical_devices = None
def testV1Compatibility(self): # Ensure we set 1 CPU by default context.context()._config = config_pb2.ConfigProto() new_config = context.context().config self.assertEqual(new_config.device_count['CPU'], 1) context.context()._physical_devices = None # Ensure CPU is split context.context()._config = config_pb2.ConfigProto(device_count={'CPU': 2},) new_config = context.context().config self.assertEqual(new_config.device_count['CPU'], 2) context.context()._physical_devices = None # Ensure Handle visible device list parsing context.context()._config = config_pb2.ConfigProto( gpu_options=config_pb2.GPUOptions(visible_device_list='',),) gpus = config.list_physical_devices('GPU') new_config = context.context().config self.assertEqual(new_config.gpu_options.visible_device_list, ','.join(str(i) for i in range(len(gpus)))) context.context()._physical_devices = None
def testReadVariableInsideFunction(self, distribution, run_functions_eagerly): if not run_functions_eagerly and config.list_physical_devices( "TPU") and FLAGS.tpu_use_tfrt: self.skipTest( "TFRT does not support XlaLocalLaunch, see b/194517185") def_function.run_functions_eagerly(run_functions_eagerly) # Get devices on which variables will be placed. Default strategy does not # define this, so assume cpu:0 in that case. try: devices = distribution.extended.parameter_devices except RuntimeError: devices = ["cpu:0"] with distribution.scope(): v = variables.Variable(0.) if isinstance(v, values.DistributedVariable): for i in range(len(devices)): # NOTE: Assigning manually to component variables so we can test # different values on different devices. Using .assign on the # mirrored variable itself will lead to a synchronization which # will prohibit testing different values. replica_variable = v._values[i] replica_variable.assign(math_ops.cast(i, dtypes.float32)) @def_function.function def read(): return v.read_value() # Verify that the value from each device is read, when in that device # scope. Doing this inside strategy scope is needed to force function # retracing on each device, otherwise `read()` will only be traced once # on the first device and following variable read will always read the value # on the first replica. with distribution.scope(): for i, d in enumerate(devices): with ops.device(d): self.assertEqual(math_ops.cast(i, dtypes.float32), read())
def testRemoteFunctionCancellation(self): context._reset_context() logical_devices = [] logical_devices.append(context.LogicalDeviceConfiguration()) logical_devices.append(context.LogicalDeviceConfiguration()) framework_config.set_logical_device_configuration( framework_config.list_physical_devices("CPU")[0], logical_devices) @function.Defun(dtypes.float32) def _remote_fn(v): # We run two collectives here to make sure we cancel in the middle of the # RemoteCall. The second one should never finish. anchor = collective_ops.all_reduce_v2( v, group_size=2, group_key=1, instance_key=1) with ops.control_dependencies([anchor]): return collective_ops.all_reduce_v2( v, group_size=2, group_key=1, instance_key=2) @eager_def_function.function def run(): with ops.device("/cpu:0"): return functional_ops.remote_call( args=[constant_op.constant([1.])], Tout=[dtypes.float32], f=_remote_fn, target="/cpu:1")[0] async_executor = executor.new_executor(enable_async=True) cancel_mgr = cancellation.CancellationManager() with context.executor_scope(async_executor): # This should never finish. cancel_mgr.get_cancelable_function(run.get_concrete_function())() with ops.device("/cpu:0"): collective_ops.all_reduce_v2([1.], group_size=2, group_key=1, instance_key=1) cancel_mgr.start_cancel() with self.assertRaises(errors.CancelledError): async_executor.wait()
def testVirtualGpu(self): gpus = config.list_physical_devices('GPU') self.assertNotEqual(len(gpus), 0) self.assertIsNone(config.get_virtual_device_configuration(gpus[-1])) config.set_virtual_device_configuration(gpus[-1], [ context.VirtualDeviceConfiguration(memory_limit=10), context.VirtualDeviceConfiguration(memory_limit=10) ]) self.assertEqual(len(config.get_virtual_device_configuration(gpus[-1])), 2) logical_gpus = config.list_logical_devices('GPU') self.assertTrue(len(logical_gpus), len(gpus) + 1) for i in range(0, len(logical_gpus)): with ops.device('/device:GPU:' + str(i)): a = constant_op.constant(1.0) self.evaluate(a) with self.assertRaisesRegexp(RuntimeError, 'unknown device'): with ops.device('/device:GPU:' + str(len(logical_gpus))): a = constant_op.constant(1.0) self.evaluate(a)
def testPhysicalDevices(self): cpus = config.list_physical_devices('CPU') self.assertGreater(len(cpus), 0) if test_util.is_gpu_available(): gpus = config.list_physical_devices('GPU') self.assertGreater(len(gpus), 0)