예제 #1
0
def _get_cluster():
  named_device = device_properties_pb2.NamedDevice()
  named_device.name = '/GPU:0'
  named_device.properties.type = 'GPU'
  named_device.properties.environment['architecture'] = '4'
  cluster = gcluster.Cluster(devices=[named_device])
  return cluster
  def testVirtualCluster(self):
    with ops.Graph().as_default() as g:
      with ops.device('/device:GPU:0'):
        a = random_ops.random_uniform(shape=[1024, 1024])
        b = random_ops.random_uniform(shape=[1024, 1024])
        c = a + b
      train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
      train_op.append(c)
      mg = meta_graph.create_meta_graph_def(graph=g)
      grappler_item = item.Item(mg)
      device_properties = device_properties_pb2.DeviceProperties(
          type='GPU',
          frequency=1000,
          num_cores=60,
          environment={'architecture': '7'})
      named_device = device_properties_pb2.NamedDevice(
          properties=device_properties, name='/device:GPU:0')
      grappler_cluster = cluster.Cluster(
          disable_detailed_stats=False,
          disable_timeline=False,
          devices=[named_device])
      op_perfs, run_time, _ = grappler_cluster.MeasureCosts(grappler_item)
      self.assertEqual(run_time, 0.000545)
      self.assertEqual(len(op_perfs), 15)

      estimated_perf = grappler_cluster.EstimatePerformance(named_device)
      self.assertEqual(7680.0, estimated_perf)
예제 #3
0
    def testSupportDevices(self):
        gpu_type = test_util.gpu_device_type()
        gpu_name = test_util.gpu_device_name()
        with ops.Graph().as_default() as g:
            a = random_ops.random_uniform(shape=(2, 3))
            b = random_ops.random_uniform(shape=(2, 3))
            c = a + b
            dims = math_ops.range(0, array_ops.rank(c), 1)
            d = math_ops.reduce_sum(a, axis=dims)
            train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
            train_op.append(d)
            mg = meta_graph.create_meta_graph_def(graph=g)
            grappler_item = item.Item(mg)

            device_properties = device_properties_pb2.DeviceProperties(
                type=gpu_type, frequency=1000, num_cores=60)
            named_gpu = device_properties_pb2.NamedDevice(
                properties=device_properties, name=gpu_name)
            device_properties = device_properties_pb2.DeviceProperties(
                type='CPU', frequency=3000, num_cores=6)
            named_cpu = device_properties_pb2.NamedDevice(
                properties=device_properties, name='/CPU:0')
            virtual_cluster = cluster.Cluster(devices=[named_cpu, named_gpu])
            supported_dev = virtual_cluster.GetSupportedDevices(grappler_item)
            self.assertEqual(supported_dev['add'], ['/CPU:0', gpu_name])
            self.assertEqual(supported_dev['Sum'], ['/CPU:0', gpu_name])
            self.assertEqual(supported_dev['range'], ['/CPU:0', gpu_name])

            real_cluster = cluster.Cluster()
            supported_dev = real_cluster.GetSupportedDevices(grappler_item)
            if test.is_gpu_available():
                self.assertEqual(supported_dev['add'], [
                    '/job:localhost/replica:0/task:0/device:CPU:0',
                    '/job:localhost/replica:0/task:0' + gpu_name
                ])
                self.assertEqual(supported_dev['Sum'], [
                    '/job:localhost/replica:0/task:0/device:CPU:0',
                    '/job:localhost/replica:0/task:0' + gpu_name
                ])
                # The axis tensor must reside on the host
                self.assertEqual(
                    supported_dev['range'],
                    ['/job:localhost/replica:0/task:0/device:CPU:0'])
            else:
                self.assertEqual(
                    supported_dev['add'],
                    ['/job:localhost/replica:0/task:0/device:CPU:0'])
예제 #4
0
def get_cluster():
    """Grappler optimization configuration for GPU."""
    named_device = device_properties_pb2.NamedDevice()
    named_device.name = '/GPU:0'
    named_device.properties.type = 'GPU'
    named_device.properties.environment['architecture'] = '4'
    cluster = gcluster.Cluster(devices=[named_device])
    return cluster
예제 #5
0
def buildCluster(num_cpus=1, num_gpus=2):
    devices = []
    if num_gpus > 0:
        device_properties = device_properties_pb2.DeviceProperties(
            type='GPU',
            vendor='NVidia',
            model='GeForce GTX TITAN X',
            frequency=1076,
            num_cores=24,
            environment={
                'architecture': '5.2',
                'cuda': '8000',
                'cudnn': '6021'
            },
            num_registers=65536,
            l1_cache_size=24576,
            l2_cache_size=3145728,
            shared_memory_size_per_multiprocessor=98304,
            memory_size=12783648768,
            bandwidth=336480000)
        for i in range(num_gpus):
            devices.append(
                device_properties_pb2.NamedDevice(properties=device_properties,
                                                  name='/GPU:' + str(i)))

    assert num_cpus > 0
    device_properties = device_properties_pb2.DeviceProperties(
        type='CPU',
        frequency=1900,
        num_cores=2,
        l1_cache_size=32768,
        l2_cache_size=262144,
        l3_cache_size=3145728)
    for i in range(num_cpus):
        devices.append(
            device_properties_pb2.NamedDevice(properties=device_properties,
                                              name='/CPU:' + str(i)))

    return cluster.Cluster(devices=devices)
예제 #6
0
    def _buildCluster(num_cpus=1, num_gpus=1):
        devices = []
        if num_gpus > 0:
            device_properties = device_properties_pb2.DeviceProperties(
                type='GPU',
                vendor='NVidia',
                model='Tesla K40m',
                frequency=745,  #745 MHZ
                num_cores=2888,  # CUDA Cores
                environment={
                    'architecture': '5.2',
                    'cuda': '10000',
                    'cudnn': '7031'
                },
                num_registers=65536,
                l1_cache_size=65536,  #64KB
                l2_cache_size=1572864,  #1.5 MB
                shared_memory_size_per_multiprocessor=49152,  #49152 bytes
                memory_size=12884901888,  # 12GB
                bandwidth=288000000)  #288 GBps
            for i in range(num_gpus):
                devices.append(
                    device_properties_pb2.NamedDevice(
                        properties=device_properties, name='/GPU:' + str(i)))

        assert num_cpus > 0
        device_properties = device_properties_pb2.DeviceProperties(
            type='CPU',
            frequency=2399,
            num_cores=32,
            l1_cache_size=32768,
            l2_cache_size=262144,
            l3_cache_size=20971520)
        for i in range(num_cpus):
            devices.append(
                device_properties_pb2.NamedDevice(properties=device_properties,
                                                  name='/CPU:' + str(i)))

        return cluster.Cluster(devices=devices)
예제 #7
0
def build_cluster():
    devices = []
    device_properties = device_properties_pb2.DeviceProperties(
        type='CPU',
        frequency=2000,
        num_cores=12,
        l1_cache_size=32768,
        l2_cache_size=262144,
        l3_cache_size=30720*1024)
    for i in range(2):
        devices.append(
            device_properties_pb2.NamedDevice(
                properties=device_properties, name='/CPU:' + str(i)))
    return cluster.Cluster(devices=devices)
예제 #8
0
 def testVirtualCluster(self):
   with ops.Graph().as_default() as g:
     a = random_ops.random_uniform(shape=())
     b = random_ops.random_uniform(shape=())
     c = a + b
     train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
     train_op.append(c)
     mg = meta_graph.create_meta_graph_def(graph=g)
     grappler_item = item.Item(mg)
     device_properties = device_properties_pb2.DeviceProperties(
         type='GPU', environment={
             'architecture': '7'
         })
     named_device = device_properties_pb2.NamedDevice(
         properties=device_properties, name='/GPU:0')
     grappler_cluster = cluster.Cluster(devices=[named_device])
     op_perfs, run_time, _ = grappler_cluster.MeasureCosts(grappler_item)
     self.assertGreater(run_time, 0)
     self.assertEqual(len(op_perfs), 15)
예제 #9
0
    def testSupportDevices(self):
        with ops.Graph().as_default() as g:
            a = random_ops.random_uniform(shape=(2, 3))
            b = random_ops.random_uniform(shape=(2, 3))
            c = a + b
            dims = math_ops.range(0, array_ops.rank(c), 1)
            d = math_ops.reduce_sum(a, axis=dims)
            train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
            train_op.append(d)
            mg = meta_graph.create_meta_graph_def(graph=g)
            grappler_item = item.Item(mg)

            device_properties = device_properties_pb2.DeviceProperties(
                type='GPU', frequency=1000, num_cores=60)
            named_gpu = device_properties_pb2.NamedDevice(
                properties=device_properties, name='/GPU:0')
            device_properties = device_properties_pb2.DeviceProperties(
                type='CPU', frequency=3000, num_cores=6)
            named_cpu = device_properties_pb2.NamedDevice(
                properties=device_properties, name='/CPU:0')
            virtual_cluster = cluster.Cluster(devices=[named_cpu, named_gpu])
            supported_dev = virtual_cluster.GetSupportedDevices(grappler_item)
            self.assertEqual(supported_dev['add'], ['/CPU:0', '/GPU:0'])
            self.assertEqual(supported_dev['Sum'], ['/CPU:0', '/GPU:0'])
            self.assertEqual(supported_dev['range'], ['/CPU:0', '/GPU:0'])

            real_cluster = cluster.Cluster()
            supported_dev = real_cluster.GetSupportedDevices(grappler_item)
            #NCL 18.04 -- Hack to account for possible XLA devices
            if test.is_gpu_available():
                add_devices = [
                    d for d in supported_dev['add']
                    if not d.split(':')[-2].startswith('XLA')
                ]
                self.assertEqual(add_devices, [
                    '/job:localhost/replica:0/task:0/device:CPU:0',
                    '/job:localhost/replica:0/task:0/device:GPU:0'
                ])
                Sum_devices = [
                    d for d in supported_dev['Sum']
                    if not d.split(':')[-2].startswith('XLA')
                ]
                self.assertEqual(Sum_devices, [
                    '/job:localhost/replica:0/task:0/device:CPU:0',
                    '/job:localhost/replica:0/task:0/device:GPU:0'
                ])
                # The axis tensor must reside on the host
                range_devices = [
                    d for d in supported_dev['range']
                    if not d.split(':')[-2].startswith('XLA')
                ]
                self.assertEqual(
                    range_devices,
                    ['/job:localhost/replica:0/task:0/device:CPU:0'])
            else:
                add_devices = [
                    d for d in supported_dev['add']
                    if not d.split(':')[-2].startswith('XLA')
                ]
                self.assertEqual(
                    add_devices,
                    ['/job:localhost/replica:0/task:0/device:CPU:0'])