def testVirtualCluster(self):
    with ops.Graph().as_default() as g:
      with ops.device('/device:GPU:0'):
        a = random_ops.random_uniform(shape=[1024, 1024])
        b = random_ops.random_uniform(shape=[1024, 1024])
        c = a + b
      train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
      train_op.append(c)
      mg = meta_graph.create_meta_graph_def(graph=g)
      grappler_item = item.Item(mg)
      device_properties = device_properties_pb2.DeviceProperties(
          type='GPU',
          frequency=1000,
          num_cores=60,
          environment={'architecture': '7'})
      named_device = device_properties_pb2.NamedDevice(
          properties=device_properties, name='/device:GPU:0')
      grappler_cluster = cluster.Cluster(
          disable_detailed_stats=False,
          disable_timeline=False,
          devices=[named_device])
      op_perfs, run_time, _ = grappler_cluster.MeasureCosts(grappler_item)
      self.assertEqual(run_time, 0.000545)
      self.assertEqual(len(op_perfs), 15)

      estimated_perf = grappler_cluster.EstimatePerformance(named_device)
      self.assertEqual(7680.0, estimated_perf)
def GenerateCostReport(metagraph,
                       per_node_report=False,
                       verbose=False,
                       cluster=None):
  """Analyze the cost of each TensorFlow op and node in the provided metagraph.

  Args:
    metagraph: A TensorFlow MetaGraphDef.
    per_node_report: by default the report contains stats aggregated on a per op
      type basis, setting per_node_report to True adds results for each
      individual node to the report.
    verbose: Prints out the entire operation proto instead of a summary table.
    cluster: Analyze the costs using the specified cluster, or the local machine
      if no cluster was specified.

  Returns:
    A string of cost report.
  """
  if cluster is None:
    cluster = gcluster.Cluster(disable_detailed_stats=False)

  with errors.raise_exception_on_not_ok_status():
    ret_from_swig = tf_wrap.GenerateCostReport(metagraph.SerializeToString(),
                                               per_node_report, verbose,
                                               cluster.tf_cluster)
  return ret_from_swig
Ejemplo n.º 3
0
def _get_cluster():
  named_device = device_properties_pb2.NamedDevice()
  named_device.name = '/GPU:0'
  named_device.properties.type = 'GPU'
  named_device.properties.environment['architecture'] = '4'
  cluster = gcluster.Cluster(devices=[named_device])
  return cluster
Ejemplo n.º 4
0
def OptimizeGraph(config_proto,
                  metagraph,
                  verbose=True,
                  graph_id=b'graph_to_optimize',
                  cluster=None,
                  strip_default_attributes=False):
    """Optimize the provided metagraph.

  For best results, the signature_def field in `metagraph` should be populated
  with information about input (feed) and output (fetch) tensors.

  Args:
    config_proto: a ConfigProto protobuf.
    metagraph: a MetagraphDef protobuf.
    verbose: whether to log optimization results.
    graph_id: a string identifying this graph.
    cluster: a grappler cluster object representing hardware resources
        available to run this graph.
    strip_default_attributes: whether graph node attributes having default
        values should be removed after all the optimization passes. This
        option is useful if the resulting graph will be executed by an older
        process that might not know some of the recently added attributes.
  """
    if not isinstance(config_proto, config_pb2.ConfigProto):
        raise TypeError(
            'Expected config_proto to be a ConfigProto, saw type %s' %
            type(config_proto))
    if cluster is None:
        cluster = gcluster.Cluster()
    out_graph = tf_opt.TF_OptimizeGraph(cluster.tf_cluster,
                                        config_proto.SerializeToString(),
                                        metagraph.SerializeToString(), verbose,
                                        graph_id, strip_default_attributes)
    return graph_pb2.GraphDef().FromString(out_graph)
def GenerateMemoryReport(metagraph, detailed_report=True, cluster=None):
  """Analyze the peak memory usage for the provided metagraph.

  Args:
    metagraph: A TensorFlow MetaGraphDef.
    detailed_report: print the live tensors in addition to the peak memory
      usage.
    cluster: Analyze the memory using the specified cluster, or the local
      machine if no cluster was specified.

  Returns:
    A string with the formatted memory usage.
  """
  if cluster is None:
    cluster = gcluster.Cluster(
        disable_detailed_stats=True, disable_timeline=True)

  item = gitem.Item(metagraph)
  peak_usage = cluster.DeterminePeakMemoryUsage(item)
  report = ""
  for device, snapshot in peak_usage.items():
    peak_usage = snapshot[0]
    report += "Peak usage for device " + device + ": " + str(
        peak_usage) + " bytes\n"
    if detailed_report:
      live_tensors = snapshot[1]
      for tensor in live_tensors:
        op_name = tensor[0]
        output_id = tensor[1]
        mem_used = tensor[2]
        report += "  " + str(op_name) + ":" + str(output_id) + " uses " + str(
            mem_used) + " bytes\n"

  return report
Ejemplo n.º 6
0
    def testSupportDevices(self):
        gpu_type = test_util.gpu_device_type()
        gpu_name = test_util.gpu_device_name()
        with ops.Graph().as_default() as g:
            a = random_ops.random_uniform(shape=(2, 3))
            b = random_ops.random_uniform(shape=(2, 3))
            c = a + b
            dims = math_ops.range(0, array_ops.rank(c), 1)
            d = math_ops.reduce_sum(a, axis=dims)
            train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
            train_op.append(d)
            mg = meta_graph.create_meta_graph_def(graph=g)
            grappler_item = item.Item(mg)

            device_properties = device_properties_pb2.DeviceProperties(
                type=gpu_type, frequency=1000, num_cores=60)
            named_gpu = device_properties_pb2.NamedDevice(
                properties=device_properties, name=gpu_name)
            device_properties = device_properties_pb2.DeviceProperties(
                type='CPU', frequency=3000, num_cores=6)
            named_cpu = device_properties_pb2.NamedDevice(
                properties=device_properties, name='/CPU:0')
            virtual_cluster = cluster.Cluster(devices=[named_cpu, named_gpu])
            supported_dev = virtual_cluster.GetSupportedDevices(grappler_item)
            self.assertEqual(supported_dev['add'], ['/CPU:0', gpu_name])
            self.assertEqual(supported_dev['Sum'], ['/CPU:0', gpu_name])
            self.assertEqual(supported_dev['range'], ['/CPU:0', gpu_name])

            real_cluster = cluster.Cluster()
            supported_dev = real_cluster.GetSupportedDevices(grappler_item)
            if test.is_gpu_available():
                self.assertEqual(supported_dev['add'], [
                    '/job:localhost/replica:0/task:0/device:CPU:0',
                    '/job:localhost/replica:0/task:0' + gpu_name
                ])
                self.assertEqual(supported_dev['Sum'], [
                    '/job:localhost/replica:0/task:0/device:CPU:0',
                    '/job:localhost/replica:0/task:0' + gpu_name
                ])
                # The axis tensor must reside on the host
                self.assertEqual(
                    supported_dev['range'],
                    ['/job:localhost/replica:0/task:0/device:CPU:0'])
            else:
                self.assertEqual(
                    supported_dev['add'],
                    ['/job:localhost/replica:0/task:0/device:CPU:0'])
Ejemplo n.º 7
0
def run_grappler(target_op, allotted_time, logdir, sess_config):
    """Runs Grappler placement."""
    tf.logging.set_verbosity(tf.logging.INFO)

    # need to create a session here with memory fraction.
    # otherwise, memory fraction flag is not correctly set due to a session
    # created by cluster
    with tf.Session(config=sess_config):
        pass

    graph = tf.get_default_graph()

    cluster = gcluster.Cluster()
    metagraph = tf.train.export_meta_graph(graph=graph,
                                           clear_extraneous_savers=True)

    _LOGGER.info('Grappler allotted time: %d', allotted_time)

    placed_metagraph_list = grappler_graph_placer.PlaceGraph(
        metagraph,
        cluster=cluster,
        allotted_time=allotted_time,
        verbose=True,
        sess_config=sess_config,
        gpu_only=True)

    _LOGGER.info('# found metagraph: %d', len(placed_metagraph_list))

    if len(placed_metagraph_list) == 0:
        _LOGGER.info('No feasible placement is found.')
        return

    if logdir:
        metagraph_dir = os.path.join(logdir, 'metagraph')
        os.makedirs(metagraph_dir, exist_ok=True)
        for i, metagraph in enumerate(placed_metagraph_list):
            metagraph_path = os.path.join(metagraph_dir,
                                          'metagraph-%d.pbtxt' % i)
            # pylint: disable=invalid-name
            with open(metagraph_path, 'wb') as f:
                f.write(metagraph.SerializeToString())

    # use the last element because it is the best placement that is found.
    placed_metagraph = placed_metagraph_list[-1]

    # assign device placement
    for node in placed_metagraph.graph_def.node:
        tf_op = graph.get_operation_by_name(node.name)
        # pylint: disable=protected-access
        tf_op._set_device(node.device)

    step_time = run_op(target_op,
                       warmup_count=10,
                       num_measurement=21,
                       profile_every_n_steps=21,
                       logdir=logdir,
                       config=sess_config)[0]

    _LOGGER.info('Average runtime: {}'.format(step_time))
Ejemplo n.º 8
0
def get_cluster():
    """Grappler optimization configuration for GPU."""
    named_device = device_properties_pb2.NamedDevice()
    named_device.name = '/GPU:0'
    named_device.properties.type = 'GPU'
    named_device.properties.environment['architecture'] = '4'
    cluster = gcluster.Cluster(devices=[named_device])
    return cluster
Ejemplo n.º 9
0
def build_cluster():
    devices = []
    device_properties = device_properties_pb2.DeviceProperties(
        type='CPU',
        frequency=2000,
        num_cores=12,
        l1_cache_size=32768,
        l2_cache_size=262144,
        l3_cache_size=30720*1024)
    for i in range(2):
        devices.append(
            device_properties_pb2.NamedDevice(
                properties=device_properties, name='/CPU:' + str(i)))
    return cluster.Cluster(devices=devices)
Ejemplo n.º 10
0
def OptimizeGraph(config_proto,
                  metagraph,
                  verbose=True,
                  graph_id=b'graph_to_optimize',
                  cluster=None,
                  strip_default_attributes=False):
    """Optimize the provided metagraph.

  For best results, the signature_def field in `metagraph` should be populated
  with information about input (feed) and output (fetch) tensors.

  Args:
    config_proto: a ConfigProto protobuf.
    metagraph: a MetagraphDef protobuf.
    verbose: whether to log optimization results.
    graph_id: a string identifying this graph.
    cluster: a grappler cluster object representing hardware resources
        available to run this graph.
    strip_default_attributes: whether graph node attributes having default
        values should be removed after all the optimization passes. This
        option is useful if the resulting graph will be executed by an older
        process that might not know some of the recently added attributes.
  """
    if not isinstance(config_proto, config_pb2.ConfigProto):
        raise TypeError(
            'Expected config_proto to be a ConfigProto, saw type %s' %
            type(config_proto))
    if cluster is not None:
        out_graph = tf_opt.TF_OptimizeGraph(cluster.tf_cluster,
                                            config_proto.SerializeToString(),
                                            metagraph.SerializeToString(),
                                            verbose, graph_id,
                                            strip_default_attributes)
    else:
        # Currently Grappler assumes no more than 1 sessions alive globally.
        # See comments on SingleMachine::Provision(), hence we use the following
        # lock to prevent concurrent access to the following code.
        with _OPTIMIZE_GRAPH_CLUSTER_LOCK:
            cluster = gcluster.Cluster()
            try:
                out_graph = tf_opt.TF_OptimizeGraph(
                    cluster.tf_cluster, config_proto.SerializeToString(),
                    metagraph.SerializeToString(), verbose, graph_id,
                    strip_default_attributes)
            finally:
                # Force the cleanup instead of waiting on python GC to cleanup the
                # temporary cluster we've created. Otherwise subsequent calls might
                # not have a clean slate because GC may not have run yet.
                cluster.Shutdown()
    return graph_pb2.GraphDef().FromString(out_graph)
Ejemplo n.º 11
0
def get_local_devices(cluster=None):
    """Returns a list of available local devices."""
    if cluster:
        devices = cluster.ListDevices()
    else:
        cluster = gcluster.Cluster()
        devices = cluster.ListDevices()
        cluster.Shutdown()

    return [{
        "name": named_device.name,
        "memory_size": named_device.properties.memory_size,
        "type": named_device.properties.type
    } for named_device in devices]
Ejemplo n.º 12
0
def OptimizeGraph(rewriter_config,
                  metagraph,
                  verbose=True,
                  graph_id=b'graph_to_optimize',
                  cluster=None):
    """Optimize the provided metagraph."""
    with errors.raise_exception_on_not_ok_status() as status:
        if cluster is None:
            cluster = gcluster.Cluster()
        ret_from_swig = tf_opt.TF_OptimizeGraph(
            cluster.tf_cluster, rewriter_config.SerializeToString(),
            metagraph.SerializeToString(), verbose, graph_id, status)
    if ret_from_swig is None:
        return None
    out_graph = graph_pb2.GraphDef().FromString(ret_from_swig)
    return out_graph
Ejemplo n.º 13
0
    def testNoDetailedStats(self):
        with ops.Graph().as_default() as g:
            a = random_ops.random_uniform(shape=())
            b = random_ops.random_uniform(shape=())
            c = a + b
            train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
            train_op.append(c)
            mg = meta_graph.create_meta_graph_def(graph=g)
            grappler_item = item.Item(mg)
            grappler_cluster = cluster.Cluster(disable_detailed_stats=True)

            op_perfs, run_time, step_stats = grappler_cluster.MeasureCosts(
                grappler_item)
            self.assertTrue(run_time > 0)
            self.assertEqual(len(op_perfs), 0)
            self.assertEqual(len(step_stats.dev_stats), 0)
Ejemplo n.º 14
0
def optimize(g, inputs, outputs):
    sd = SignatureDef()
    for name in inputs:
        input_t = g.get_operation_by_name(name).outputs[0]
        sd.inputs[name].name = name
        sd.inputs[name].dtype = input_t.dtype.as_datatype_enum
        sd.inputs[name].tensor_shape.CopyFrom(input_t.shape.as_proto())
    for name in outputs:
        output_t = g.get_operation_by_name(name).outputs[0]
        sd.outputs[name].name = name
        sd.outputs[name].dtype = output_t.dtype.as_datatype_enum
        sd.outputs[name].tensor_shape.CopyFrom(output_t.shape.as_proto())

    tf.compat.v1.enable_resource_variables()
    cl = cluster.Cluster(disable_detailed_stats=True)

    # We have to run this twice to eliminate constants that are left after
    # optimising away split/pad/transpose nodes. They are const parameters like
    # axis, perm. They remain after 1 iter of optimization because we specify them
    # in the whitelist
    for i in range(2):
        if i == 0:
            graph = g
            c = get_default_config()
        else:
            graph = get_graph_from(optimized_graph_def)
            c = get_only_prune_config()

        white_list = get_white_list(graph)
        for name in white_list:
            graph.add_to_collection(
                GraphKeys.TRAIN_OP, graph.get_operation_by_name(name)
            )

        meta_graph = tf.compat.v1.train.export_meta_graph(
            graph_def=graph.as_graph_def(), graph=graph
        )
        meta_graph.signature_def["not_used_key"].CopyFrom(sd)

        optimized_graph_def = tf_optimizer.OptimizeGraph(
            config_proto=c, metagraph=meta_graph, cluster=cl
        )
    # Don't create VarHandleOp, ReadVariableOp, VarIsInitializedOp
    # Instead create VariableV2 ops in the future
    tf.disable_resource_variables()
    return optimized_graph_def
Ejemplo n.º 15
0
 def testVirtualCluster(self):
   with ops.Graph().as_default() as g:
     a = random_ops.random_uniform(shape=())
     b = random_ops.random_uniform(shape=())
     c = a + b
     train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
     train_op.append(c)
     mg = meta_graph.create_meta_graph_def(graph=g)
     grappler_item = item.Item(mg)
     device_properties = device_properties_pb2.DeviceProperties(
         type='GPU', environment={
             'architecture': '7'
         })
     named_device = device_properties_pb2.NamedDevice(
         properties=device_properties, name='/GPU:0')
     grappler_cluster = cluster.Cluster(devices=[named_device])
     op_perfs, run_time, _ = grappler_cluster.MeasureCosts(grappler_item)
     self.assertGreater(run_time, 0)
     self.assertEqual(len(op_perfs), 15)
Ejemplo n.º 16
0
def OptimizeGraph(config_proto,
                  metagraph,
                  verbose=True,
                  graph_id=b'graph_to_optimize',
                  cluster=None):
  """Optimize the provided metagraph."""
  if not isinstance(config_proto, config_pb2.ConfigProto):
    raise TypeError('Expected config_proto to be a ConfigProto, saw type %s' %
                    type(config_proto))
  if cluster is None:
    cluster = gcluster.Cluster()
  ret_from_swig = tf_opt.TF_OptimizeGraph(cluster.tf_cluster,
                                          config_proto.SerializeToString(),
                                          metagraph.SerializeToString(),
                                          verbose, graph_id)
  if ret_from_swig is None:
    return None
  out_graph = graph_pb2.GraphDef().FromString(ret_from_swig)
  return out_graph
 def testMemoryEstimates(self):
   with ops.Graph().as_default() as g:
     with ops.device('/job:localhost/replica:0/task:0/device:CPU:0'):
       a = random_ops.random_uniform(shape=())
       b = random_ops.random_uniform(shape=())
       c = a + b
       train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
       train_op.append(c)
       mg = meta_graph.create_meta_graph_def(graph=g)
       grappler_item = item.Item(mg)
       grappler_cluster = cluster.Cluster(
           disable_detailed_stats=True, disable_timeline=True)
       peak_mem = grappler_cluster.DeterminePeakMemoryUsage(grappler_item)
       self.assertLessEqual(1, len(peak_mem))
       snapshot = peak_mem['/job:localhost/replica:0/task:0/device:CPU:0']
       peak_usage = snapshot[0]
       self.assertEqual(52, peak_usage)
       live_tensors = snapshot[1]
       self.assertEqual(15, len(live_tensors))
Ejemplo n.º 18
0
    def _buildCluster(num_cpus=1, num_gpus=1):
        devices = []
        if num_gpus > 0:
            device_properties = device_properties_pb2.DeviceProperties(
                type='GPU',
                vendor='NVidia',
                model='Tesla K40m',
                frequency=745,  #745 MHZ
                num_cores=2888,  # CUDA Cores
                environment={
                    'architecture': '5.2',
                    'cuda': '10000',
                    'cudnn': '7031'
                },
                num_registers=65536,
                l1_cache_size=65536,  #64KB
                l2_cache_size=1572864,  #1.5 MB
                shared_memory_size_per_multiprocessor=49152,  #49152 bytes
                memory_size=12884901888,  # 12GB
                bandwidth=288000000)  #288 GBps
            for i in range(num_gpus):
                devices.append(
                    device_properties_pb2.NamedDevice(
                        properties=device_properties, name='/GPU:' + str(i)))

        assert num_cpus > 0
        device_properties = device_properties_pb2.DeviceProperties(
            type='CPU',
            frequency=2399,
            num_cores=32,
            l1_cache_size=32768,
            l2_cache_size=262144,
            l3_cache_size=20971520)
        for i in range(num_cpus):
            devices.append(
                device_properties_pb2.NamedDevice(properties=device_properties,
                                                  name='/CPU:' + str(i)))

        return cluster.Cluster(devices=devices)
Ejemplo n.º 19
0
def buildCluster(num_cpus=1, num_gpus=2):
    devices = []
    if num_gpus > 0:
        device_properties = device_properties_pb2.DeviceProperties(
            type='GPU',
            vendor='NVidia',
            model='GeForce GTX TITAN X',
            frequency=1076,
            num_cores=24,
            environment={
                'architecture': '5.2',
                'cuda': '8000',
                'cudnn': '6021'
            },
            num_registers=65536,
            l1_cache_size=24576,
            l2_cache_size=3145728,
            shared_memory_size_per_multiprocessor=98304,
            memory_size=12783648768,
            bandwidth=336480000)
        for i in range(num_gpus):
            devices.append(
                device_properties_pb2.NamedDevice(properties=device_properties,
                                                  name='/GPU:' + str(i)))

    assert num_cpus > 0
    device_properties = device_properties_pb2.DeviceProperties(
        type='CPU',
        frequency=1900,
        num_cores=2,
        l1_cache_size=32768,
        l2_cache_size=262144,
        l3_cache_size=3145728)
    for i in range(num_cpus):
        devices.append(
            device_properties_pb2.NamedDevice(properties=device_properties,
                                              name='/CPU:' + str(i)))

    return cluster.Cluster(devices=devices)
Ejemplo n.º 20
0
def OptimizeGraph(config_proto,
                  metagraph,
                  verbose=True,
                  graph_id=b'graph_to_optimize',
                  cluster=None):
    """Optimize the provided metagraph.

  For best results, the signature_def field in `metagraph` should be populated
  with information about input (feed) and output (fetch) tensors.
  """
    if not isinstance(config_proto, config_pb2.ConfigProto):
        raise TypeError(
            'Expected config_proto to be a ConfigProto, saw type %s' %
            type(config_proto))
    if cluster is None:
        cluster = gcluster.Cluster()
    ret_from_swig = tf_opt.TF_OptimizeGraph(cluster.tf_cluster,
                                            config_proto.SerializeToString(),
                                            metagraph.SerializeToString(),
                                            verbose, graph_id)
    if ret_from_swig is None:
        return None
    out_graph = graph_pb2.GraphDef().FromString(ret_from_swig)
    return out_graph
Ejemplo n.º 21
0
def PlaceGraph(metagraph,
               cluster=None,
               allotted_time=3600,
               hparams=None,
               verbose=False):
    """Place the provided metagraph.

  Args:
    metagraph: the metagraph to place.
    cluster: an optional set of hardware resource to optimize the placement for.
      If none is specified, we'll optimize the placement for the hardware
      available on the local machine.
    allotted_time: the maximum amount to time in seconds to spend optimizing
      the placement.
    hparams: hyperparameters used to fine tune the placer.
    verbose: prints debug information if True.

  Returns:
    The placed metagraph.
  """
    if cluster is None:
        cluster = gcluster.Cluster()

    # Optimize the metagraph to speedup the placement
    rewriter_config = rewriter_config_pb2.RewriterConfig()
    rewriter_config.optimizers.append("pruning")
    rewriter_config.optimizers.append("constfold")
    rewriter_config.optimizers.append("arithmetic")
    rewriter_config.optimizers.append("dependency")
    rewriter_config.optimizers.append("pruning")
    optimized_graph = tf_optimizer.OptimizeGraph(rewriter_config,
                                                 metagraph,
                                                 verbose=verbose,
                                                 cluster=cluster)
    optimized_metagraph = meta_graph_pb2.MetaGraphDef()
    optimized_metagraph.CopyFrom(metagraph)
    optimized_metagraph.graph_def.CopyFrom(optimized_graph)

    item = gitem.Item(optimized_metagraph)

    # Measure the runtime achievable with the original placement.
    try:
        _, original_run_time, _ = cluster.MeasureCosts(item)
        if verbose:
            print("Runtime for original placement: " + str(original_run_time))
    except errors.OpError as e:
        if verbose:
            print("Original placement isn't feasible: " + str(e))
        original_run_time = hparams.failing_signal

    if hparams is None:
        hparams = hierarchical_controller.hierarchical_controller_hparams()
    # We run with a single child
    hparams.num_children = 1

    with tf_ops.Graph().as_default():
        # Place all the nodes of the controller on the CPU. We don't want them to
        # fight for accelerator memory with the model to optimize.
        with tf_ops.device("/device:CPU:0"):
            model = hierarchical_controller.HierarchicalController(
                hparams, item, cluster)
            ops = model.build_controller()
            session_creator = training.ChiefSessionCreator()
            with training.MonitoredSession(
                    session_creator=session_creator) as sess:
                start_time = time.time()
                current_time = start_time
                while current_time - start_time < allotted_time:
                    grouping_actions = model.generate_grouping(sess)
                    input_to_seq2seq = model.create_group_embeddings(
                        grouping_actions, verbose=verbose)
                    model.generate_placement(input_to_seq2seq, sess)
                    try:
                        run_time = model.eval_placement(sess, verbose=verbose)
                    except errors.OpError as e:
                        if verbose:
                            print("Failed to run graph:" + str(e))
                        run_time = hparams.failing_signal
                    updated = model.update_reward(sess,
                                                  run_time,
                                                  verbose=verbose)
                    if updated and run_time < original_run_time:
                        if verbose:
                            print("Found better placement, with runtime " +
                                  str(run_time))
                        model.export_placement(metagraph)

                    model.process_reward(sess)

                    current_time = time.time()

    return metagraph
Ejemplo n.º 22
0
import tensorflow as tf

from tensorflow.core.protobuf import config_pb2

from tensorflow.python.framework import ops
from tensorflow.python.framework import importer
from tensorflow.python.framework import meta_graph

from tensorflow.python.grappler import cluster
from tensorflow.python.grappler import tf_optimizer


try:  # pragma: no cover
    gcluster = cluster.Cluster()
except tf.errors.UnavailableError:  # pragma: no cover
    pass

config = config_pb2.ConfigProto()


def normalize_tf_graph(graph_output, new_graph=True, verbose=False):
    """Use grappler to normalize a graph.

    Arguments
    ---------
    graph_output: Tensor
      A tensor we want to consider as "output" of a `FuncGraph`.

    Returns
    -------
    The simplified graph.
Ejemplo n.º 23
0
    def testSupportDevices(self):
        with ops.Graph().as_default() as g:
            a = random_ops.random_uniform(shape=(2, 3))
            b = random_ops.random_uniform(shape=(2, 3))
            c = a + b
            dims = math_ops.range(0, array_ops.rank(c), 1)
            d = math_ops.reduce_sum(a, axis=dims)
            train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
            train_op.append(d)
            mg = meta_graph.create_meta_graph_def(graph=g)
            grappler_item = item.Item(mg)

            device_properties = device_properties_pb2.DeviceProperties(
                type='GPU', frequency=1000, num_cores=60)
            named_gpu = device_properties_pb2.NamedDevice(
                properties=device_properties, name='/GPU:0')
            device_properties = device_properties_pb2.DeviceProperties(
                type='CPU', frequency=3000, num_cores=6)
            named_cpu = device_properties_pb2.NamedDevice(
                properties=device_properties, name='/CPU:0')
            virtual_cluster = cluster.Cluster(devices=[named_cpu, named_gpu])
            supported_dev = virtual_cluster.GetSupportedDevices(grappler_item)
            self.assertEqual(supported_dev['add'], ['/CPU:0', '/GPU:0'])
            self.assertEqual(supported_dev['Sum'], ['/CPU:0', '/GPU:0'])
            self.assertEqual(supported_dev['range'], ['/CPU:0', '/GPU:0'])

            real_cluster = cluster.Cluster()
            supported_dev = real_cluster.GetSupportedDevices(grappler_item)
            #NCL 18.04 -- Hack to account for possible XLA devices
            if test.is_gpu_available():
                add_devices = [
                    d for d in supported_dev['add']
                    if not d.split(':')[-2].startswith('XLA')
                ]
                self.assertEqual(add_devices, [
                    '/job:localhost/replica:0/task:0/device:CPU:0',
                    '/job:localhost/replica:0/task:0/device:GPU:0'
                ])
                Sum_devices = [
                    d for d in supported_dev['Sum']
                    if not d.split(':')[-2].startswith('XLA')
                ]
                self.assertEqual(Sum_devices, [
                    '/job:localhost/replica:0/task:0/device:CPU:0',
                    '/job:localhost/replica:0/task:0/device:GPU:0'
                ])
                # The axis tensor must reside on the host
                range_devices = [
                    d for d in supported_dev['range']
                    if not d.split(':')[-2].startswith('XLA')
                ]
                self.assertEqual(
                    range_devices,
                    ['/job:localhost/replica:0/task:0/device:CPU:0'])
            else:
                add_devices = [
                    d for d in supported_dev['add']
                    if not d.split(':')[-2].startswith('XLA')
                ]
                self.assertEqual(
                    add_devices,
                    ['/job:localhost/replica:0/task:0/device:CPU:0'])