def _get_cluster(): named_device = device_properties_pb2.NamedDevice() named_device.name = '/GPU:0' named_device.properties.type = 'GPU' named_device.properties.environment['architecture'] = '4' cluster = gcluster.Cluster(devices=[named_device]) return cluster
def testVirtualCluster(self): with ops.Graph().as_default() as g: with ops.device('/device:GPU:0'): a = random_ops.random_uniform(shape=[1024, 1024]) b = random_ops.random_uniform(shape=[1024, 1024]) c = a + b train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) train_op.append(c) mg = meta_graph.create_meta_graph_def(graph=g) grappler_item = item.Item(mg) device_properties = device_properties_pb2.DeviceProperties( type='GPU', frequency=1000, num_cores=60, environment={'architecture': '7'}) named_device = device_properties_pb2.NamedDevice( properties=device_properties, name='/device:GPU:0') grappler_cluster = cluster.Cluster( disable_detailed_stats=False, disable_timeline=False, devices=[named_device]) op_perfs, run_time, _ = grappler_cluster.MeasureCosts(grappler_item) self.assertEqual(run_time, 0.000545) self.assertEqual(len(op_perfs), 15) estimated_perf = grappler_cluster.EstimatePerformance(named_device) self.assertEqual(7680.0, estimated_perf)
def testSupportDevices(self): gpu_type = test_util.gpu_device_type() gpu_name = test_util.gpu_device_name() with ops.Graph().as_default() as g: a = random_ops.random_uniform(shape=(2, 3)) b = random_ops.random_uniform(shape=(2, 3)) c = a + b dims = math_ops.range(0, array_ops.rank(c), 1) d = math_ops.reduce_sum(a, axis=dims) train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) train_op.append(d) mg = meta_graph.create_meta_graph_def(graph=g) grappler_item = item.Item(mg) device_properties = device_properties_pb2.DeviceProperties( type=gpu_type, frequency=1000, num_cores=60) named_gpu = device_properties_pb2.NamedDevice( properties=device_properties, name=gpu_name) device_properties = device_properties_pb2.DeviceProperties( type='CPU', frequency=3000, num_cores=6) named_cpu = device_properties_pb2.NamedDevice( properties=device_properties, name='/CPU:0') virtual_cluster = cluster.Cluster(devices=[named_cpu, named_gpu]) supported_dev = virtual_cluster.GetSupportedDevices(grappler_item) self.assertEqual(supported_dev['add'], ['/CPU:0', gpu_name]) self.assertEqual(supported_dev['Sum'], ['/CPU:0', gpu_name]) self.assertEqual(supported_dev['range'], ['/CPU:0', gpu_name]) real_cluster = cluster.Cluster() supported_dev = real_cluster.GetSupportedDevices(grappler_item) if test.is_gpu_available(): self.assertEqual(supported_dev['add'], [ '/job:localhost/replica:0/task:0/device:CPU:0', '/job:localhost/replica:0/task:0' + gpu_name ]) self.assertEqual(supported_dev['Sum'], [ '/job:localhost/replica:0/task:0/device:CPU:0', '/job:localhost/replica:0/task:0' + gpu_name ]) # The axis tensor must reside on the host self.assertEqual( supported_dev['range'], ['/job:localhost/replica:0/task:0/device:CPU:0']) else: self.assertEqual( supported_dev['add'], ['/job:localhost/replica:0/task:0/device:CPU:0'])
def get_cluster(): """Grappler optimization configuration for GPU.""" named_device = device_properties_pb2.NamedDevice() named_device.name = '/GPU:0' named_device.properties.type = 'GPU' named_device.properties.environment['architecture'] = '4' cluster = gcluster.Cluster(devices=[named_device]) return cluster
def buildCluster(num_cpus=1, num_gpus=2): devices = [] if num_gpus > 0: device_properties = device_properties_pb2.DeviceProperties( type='GPU', vendor='NVidia', model='GeForce GTX TITAN X', frequency=1076, num_cores=24, environment={ 'architecture': '5.2', 'cuda': '8000', 'cudnn': '6021' }, num_registers=65536, l1_cache_size=24576, l2_cache_size=3145728, shared_memory_size_per_multiprocessor=98304, memory_size=12783648768, bandwidth=336480000) for i in range(num_gpus): devices.append( device_properties_pb2.NamedDevice(properties=device_properties, name='/GPU:' + str(i))) assert num_cpus > 0 device_properties = device_properties_pb2.DeviceProperties( type='CPU', frequency=1900, num_cores=2, l1_cache_size=32768, l2_cache_size=262144, l3_cache_size=3145728) for i in range(num_cpus): devices.append( device_properties_pb2.NamedDevice(properties=device_properties, name='/CPU:' + str(i))) return cluster.Cluster(devices=devices)
def _buildCluster(num_cpus=1, num_gpus=1): devices = [] if num_gpus > 0: device_properties = device_properties_pb2.DeviceProperties( type='GPU', vendor='NVidia', model='Tesla K40m', frequency=745, #745 MHZ num_cores=2888, # CUDA Cores environment={ 'architecture': '5.2', 'cuda': '10000', 'cudnn': '7031' }, num_registers=65536, l1_cache_size=65536, #64KB l2_cache_size=1572864, #1.5 MB shared_memory_size_per_multiprocessor=49152, #49152 bytes memory_size=12884901888, # 12GB bandwidth=288000000) #288 GBps for i in range(num_gpus): devices.append( device_properties_pb2.NamedDevice( properties=device_properties, name='/GPU:' + str(i))) assert num_cpus > 0 device_properties = device_properties_pb2.DeviceProperties( type='CPU', frequency=2399, num_cores=32, l1_cache_size=32768, l2_cache_size=262144, l3_cache_size=20971520) for i in range(num_cpus): devices.append( device_properties_pb2.NamedDevice(properties=device_properties, name='/CPU:' + str(i))) return cluster.Cluster(devices=devices)
def build_cluster(): devices = [] device_properties = device_properties_pb2.DeviceProperties( type='CPU', frequency=2000, num_cores=12, l1_cache_size=32768, l2_cache_size=262144, l3_cache_size=30720*1024) for i in range(2): devices.append( device_properties_pb2.NamedDevice( properties=device_properties, name='/CPU:' + str(i))) return cluster.Cluster(devices=devices)
def testVirtualCluster(self): with ops.Graph().as_default() as g: a = random_ops.random_uniform(shape=()) b = random_ops.random_uniform(shape=()) c = a + b train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) train_op.append(c) mg = meta_graph.create_meta_graph_def(graph=g) grappler_item = item.Item(mg) device_properties = device_properties_pb2.DeviceProperties( type='GPU', environment={ 'architecture': '7' }) named_device = device_properties_pb2.NamedDevice( properties=device_properties, name='/GPU:0') grappler_cluster = cluster.Cluster(devices=[named_device]) op_perfs, run_time, _ = grappler_cluster.MeasureCosts(grappler_item) self.assertGreater(run_time, 0) self.assertEqual(len(op_perfs), 15)
def testSupportDevices(self): with ops.Graph().as_default() as g: a = random_ops.random_uniform(shape=(2, 3)) b = random_ops.random_uniform(shape=(2, 3)) c = a + b dims = math_ops.range(0, array_ops.rank(c), 1) d = math_ops.reduce_sum(a, axis=dims) train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) train_op.append(d) mg = meta_graph.create_meta_graph_def(graph=g) grappler_item = item.Item(mg) device_properties = device_properties_pb2.DeviceProperties( type='GPU', frequency=1000, num_cores=60) named_gpu = device_properties_pb2.NamedDevice( properties=device_properties, name='/GPU:0') device_properties = device_properties_pb2.DeviceProperties( type='CPU', frequency=3000, num_cores=6) named_cpu = device_properties_pb2.NamedDevice( properties=device_properties, name='/CPU:0') virtual_cluster = cluster.Cluster(devices=[named_cpu, named_gpu]) supported_dev = virtual_cluster.GetSupportedDevices(grappler_item) self.assertEqual(supported_dev['add'], ['/CPU:0', '/GPU:0']) self.assertEqual(supported_dev['Sum'], ['/CPU:0', '/GPU:0']) self.assertEqual(supported_dev['range'], ['/CPU:0', '/GPU:0']) real_cluster = cluster.Cluster() supported_dev = real_cluster.GetSupportedDevices(grappler_item) #NCL 18.04 -- Hack to account for possible XLA devices if test.is_gpu_available(): add_devices = [ d for d in supported_dev['add'] if not d.split(':')[-2].startswith('XLA') ] self.assertEqual(add_devices, [ '/job:localhost/replica:0/task:0/device:CPU:0', '/job:localhost/replica:0/task:0/device:GPU:0' ]) Sum_devices = [ d for d in supported_dev['Sum'] if not d.split(':')[-2].startswith('XLA') ] self.assertEqual(Sum_devices, [ '/job:localhost/replica:0/task:0/device:CPU:0', '/job:localhost/replica:0/task:0/device:GPU:0' ]) # The axis tensor must reside on the host range_devices = [ d for d in supported_dev['range'] if not d.split(':')[-2].startswith('XLA') ] self.assertEqual( range_devices, ['/job:localhost/replica:0/task:0/device:CPU:0']) else: add_devices = [ d for d in supported_dev['add'] if not d.split(':')[-2].startswith('XLA') ] self.assertEqual( add_devices, ['/job:localhost/replica:0/task:0/device:CPU:0'])