예제 #1
0
 def initialize(self):
     if context.executing_eagerly():
         # TODO(priyag): Add appopriate call here when eager is supported for TPUs.
         raise NotImplementedError(
             'Eager mode not supported in TPUStrategy.')
     else:
         return [tpu.initialize_system()]
예제 #2
0
def setup_tpu_session(master):
  """Initializes and returns a Keras/TF session connected the TPU `master`."""
  session = tf_session.Session(
      target=master, config=config_pb2.ConfigProto(isolate_session_state=True))
  K.set_session(session)
  K.get_session().run(tpu.initialize_system())
  return session
예제 #3
0
  def testTrainNetworkWithBatchNorm(self, distribution, optimizer_fn, momentum,
                                    renorm, is_tpu,
                                    update_ops_in_cross_tower_mode):
    """Verifies that moving mean updates are reduced across towers."""
    with distribution.scope():
      num_towers = len(distribution.worker_devices)
      model_fn, dataset_fn, batchnorm = batchnorm_example(
          optimizer_fn,
          batch_per_epoch=num_towers,
          momentum=momentum,
          renorm=renorm,
          update_ops_in_tower_mode=not update_ops_in_cross_tower_mode)

      # Make sure prefetching is disabled since that makes the
      # specific input on each device to be non deterministic, and
      # this test relies on specific input being on each device.
      if isinstance(distribution, mirrored_strategy.MirroredStrategy):
        self.assertFalse(distribution._prefetch_on_device)
      iterator = distribution.distribute_dataset(
          dataset_fn).make_one_shot_iterator()

      def run_step():
        fetches = distribution.unwrap(
            distribution.call_for_each_tower(
                model_fn, iterator.get_next(),
                run_concurrently=batchnorm.built))
        if update_ops_in_cross_tower_mode:
          fetches += ops.get_collection(ops.GraphKeys.UPDATE_OPS)
        return control_flow_ops.group(fetches)

      if not context.executing_eagerly():
        with self.test_session() as sess:
          if is_tpu:
            sess.run(tpu.initialize_system())
          run_step = sess.make_callable(run_step())
        self.evaluate(variables_lib.global_variables_initializer())

      expected_moving_means = [0.] * 8

      def averaged_batch_mean(i):
        # Each batch has shape [16, 8] where the ith element in jth list is
        # (8 * j + i + tower_id * 100). So the batch mean in each tower is
        # (60 + i + tower_id * 100). So here comes its batch mean over all
        # towers:
        return 60. + i + (num_towers - 1.) / 2. * 100.

      for _ in range(10):
        run_step()
        moving_means = self.evaluate(distribution.fetch(batchnorm.moving_mean))

        # We make sure that the moving_mean is updated as if the sample mean is
        # calculated over all towers.
        for i, expected_moving_mean in enumerate(expected_moving_means):
          expected_moving_means[i] -= ((
              expected_moving_mean - averaged_batch_mean(i)) * (1.0 - momentum))
          self.assertNear(expected_moving_means[i], moving_means[i], 0.0001)

      if is_tpu:
        with self.test_session() as sess:
          sess.run(tpu.shutdown_system())
예제 #4
0
def setup_tpu_session(master):
  """Initializes and returns a Keras/TF session connected the TPU `master`."""
  session = tf_session.Session(
      target=master, config=config_pb2.ConfigProto(isolate_session_state=True))
  K.set_session(session)
  K.get_session().run(tpu.initialize_system())
  return session
예제 #5
0
 def _run_tpu_initialization(self):
     """Test TPU system initialization."""
     with tf.Session('grpc://{0}:8470'.format(self.tpu_ip)) as sess:
         sess.run(tpu.initialize_system())
         sess.run(tpu.shutdown_system())
         logging.info('Successfully initialized and shutdown the tpu')
     self.tpu_initialization = 'Passed'
예제 #6
0
  def __init__(self, cpu_model, tpu_name_or_address, strategy):
    super(models.Model, self).__init__(  # pylint: disable=bad-super-call
        inputs=cpu_model.inputs,
        outputs=cpu_model.outputs,
        name=cpu_model.name,
    )

    # Create a mapping from numpy arrays to infeed managers.
    # Note: uses a list of tuples instead of a map because numpy arrays are
    # not hashable.
    self._numpy_to_infeed_manager_list = []

    self.predict_function = None
    self.test_function = None
    self.train_function = None
    self._strategy = strategy

    self._tpu_name_or_address = tpu_name_or_address
    self._cpu_model = cpu_model
    self._tpu_model = None
    self._tpu_weights_initialized = False
    self._graph = ops.Graph()

    self._cluster_resolver = tpu_cluster_resolver.TPUClusterResolver(
        tpu_name_or_address)
    master = self._cluster_resolver.master()
    cluster_spec = self._cluster_resolver.cluster_spec()
    self._session = tf_session.Session(
        graph=self._graph,
        target=master,
        config=config_pb2.ConfigProto(isolate_session_state=True))

    # TODO(saeta): Confirm the lines below work in ClusterSpec propagation env.
    if cluster_spec:
      self._session.cluster_def.CopyFrom(cluster_spec.as_cluster_def())

    with self._graph.as_default():
      self._session.run(tpu.initialize_system())

    # If the input CPU model has already been compiled, compile our TPU model
    # immediately.
    if self._cpu_model.optimizer:
      self.compile(
          self._cpu_model.optimizer,
          self._cpu_model.loss,
          self._cpu_model.metrics,
          self._cpu_model.loss_weights,
          self._cpu_model.sample_weight_mode,
          self._cpu_model.weighted_metrics,
          self._cpu_model.target_tensors,
      )
예제 #7
0
    def __init__(self, cpu_model, tpu_name_or_address, strategy):
        super(models.Model, self).__init__(  # pylint: disable=bad-super-call
            inputs=cpu_model.inputs,
            outputs=cpu_model.outputs,
            name=cpu_model.name,
        )

        # Create a mapping from numpy arrays to infeed managers.
        # Note: uses a list of tuples instead of a map because numpy arrays are
        # not hashable.
        self._numpy_to_infeed_manager_list = []

        self.predict_function = None
        self.test_function = None
        self.train_function = None
        self._strategy = strategy

        self._tpu_name_or_address = tpu_name_or_address
        self._cpu_model = cpu_model
        self._tpu_model = None
        self._tpu_weights_initialized = False
        self._graph = ops.Graph()

        self._cluster_resolver = tpu_cluster_resolver.TPUClusterResolver(
            tpu_name_or_address)
        master = self._cluster_resolver.master()
        cluster_spec = self._cluster_resolver.cluster_spec()
        self._session = tf_session.Session(
            graph=self._graph,
            target=master,
            config=config_pb2.ConfigProto(isolate_session_state=True))

        # TODO(saeta): Confirm the lines below work in ClusterSpec propagation env.
        if cluster_spec:
            self._session.cluster_def.CopyFrom(cluster_spec.as_cluster_def())

        with self._graph.as_default():
            self._session.run(tpu.initialize_system())

        # If the input CPU model has already been compiled, compile our TPU model
        # immediately.
        if self._cpu_model.optimizer:
            self.compile(
                self._cpu_model.optimizer,
                self._cpu_model.loss,
                self._cpu_model.metrics,
                self._cpu_model.loss_weights,
                self._cpu_model.sample_weight_mode,
                self._cpu_model.weighted_metrics,
                self._cpu_model.target_tensors,
            )
예제 #8
0
 def initialize(self):
   if context.executing_eagerly():
     # TODO(priyag): Add appopriate call here when eager is supported for TPUs.
     raise NotImplementedError('Eager mode not supported in TPUStrategy.')
   else:
     # TODO(jhseu): We need this hack because DistributionStrategies must be
     # pickleable for copy.deepcopy(). Remove when initialize_system goes away.
     graph = ops.get_default_graph()
     tpu_init = graph.get_collection(_TPU_INITIALIZE_SYSTEM_COLLECTION)
     if tpu_init:
       return tpu_init
     graph.add_to_collection(_TPU_INITIALIZE_SYSTEM_COLLECTION,
                             tpu.initialize_system())
     return graph.get_collection(_TPU_INITIALIZE_SYSTEM_COLLECTION)
예제 #9
0
 def initialize(self):
   if context.executing_eagerly():
     # TODO(priyag): Add appopriate call here when eager is supported for TPUs.
     raise NotImplementedError("Eager mode not supported in TPUStrategy.")
   else:
     # TODO(jhseu): We need this hack because DistributionStrategies must be
     # pickleable for copy.deepcopy(). Remove when initialize_system goes away.
     graph = ops.get_default_graph()
     tpu_init = graph.get_collection(_TPU_INITIALIZE_SYSTEM_COLLECTION)
     if tpu_init:
       return tpu_init
     graph.add_to_collection(_TPU_INITIALIZE_SYSTEM_COLLECTION,
                             tpu.initialize_system())
     return graph.get_collection(_TPU_INITIALIZE_SYSTEM_COLLECTION)
예제 #10
0
    def testTrainNetwork(self, distribution, optimizer_fn, use_callable_loss,
                         is_tpu):
        with distribution.scope():
            model_fn, dataset, layer = minimize_loss_example(
                optimizer_fn,
                use_bias=True,
                use_callable_loss=use_callable_loss)

            # TODO(isaprykin):  Eliminate `is_tpu`. Probably add a
            # `DistributionStrategy.create_monitor` so that each DistributionStrategy
            # could influence its training loop. That method would return an instance
            # of Monitor.  TPUMonitor would execute tpu.initialize_system() and
            # tpu.shutdown_system().
            if is_tpu:
                dataset = dataset.batch(2)

            iterator = distribution.distribute_dataset(dataset)

            def run_step():
                # TODO(isaprykin): Make iterator get_next() return a list of sub-
                # batches for each iteration. Pass iterator.get_next() and not iterator
                # to call_for_each_tower.
                return distribution.group(
                    distribution.call_for_each_tower(
                        model_fn,
                        iterator.get_next() if not is_tpu else iterator,
                        run_concurrently=layer.built))

            if not context.executing_eagerly():
                with self.test_session() as sess:
                    if is_tpu:
                        sess.run(tpu.initialize_system())
                    run_step = sess.make_callable(run_step())
                self.evaluate(variables_lib.global_variables_initializer())

            weights, biases = [], []
            for _ in range(10):
                run_step()

                weights.append(self.evaluate(distribution.fetch(layer.kernel)))
                biases.append(self.evaluate(distribution.fetch(layer.bias)))

            if is_tpu:
                with self.test_session() as sess:
                    sess.run(tpu.shutdown_system())

            error = abs(
                numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1)
            is_not_increasing = all(y <= x for x, y in zip(error, error[1:]))
            self.assertTrue(is_not_increasing)
예제 #11
0
def _obtain_topology(master_address, run_config):
    try:
        logging.info(
            'Initializing TPU system (master: %s) to fetch topology '
            'for model parallelism. This might take a while.', master_address)
        with ops.Graph().as_default():
            session_config = _get_session_config_with_timeout(
                _INITIAL_TPU_SYSTEM_TIMEOUT_IN_MS, run_config)
            with session_lib.Session(master_address,
                                     config=session_config) as sess:
                topology = sess.run(tpu.initialize_system())
                return topology
    except errors.DeadlineExceededError:
        raise ValueError('Fail to initialize TPU system with master (%s). '
                         'Please double check the TPU system is functional.' %
                         (master_address))
def _obtain_topology(master_address, run_config):
  try:
    logging.info('Initializing TPU system (master: %s) to fetch topology '
                 'for model parallelism. This might take a while.',
                 master_address)
    with ops.Graph().as_default():
      session_config = get_session_config_with_timeout(
          _INITIAL_TPU_SYSTEM_TIMEOUT_IN_MS, run_config)
      with session_lib.Session(
          master_address, config=session_config) as sess:
        topology = sess.run(tpu.initialize_system())
        return topology
  except errors.DeadlineExceededError:
    raise ValueError(
        'Fail to initialize TPU system with master (%s). '
        'Please double check the TPU system is functional.' % (
            master_address))
예제 #13
0
def initialize_tpu_system(cluster_resolver=None):
  """Initialize the TPU devices in a separate session and graph.

  Args:
    cluster_resolver: A tf.contrib.cluster_resolver.TPUClusterResolver,
        which provides information about the TPU cluster.
  Returns:
    The tf.contrib.tpu.Topology object for the topology of the TPU cluster.
  """
  if cluster_resolver is None:
    cluster_resolver = TPUClusterResolver("")
  master = cluster_resolver.master()

  logging.info("Initializing the TPU system.")

  if context.executing_eagerly():
    # This function looks as it is for the following non-intuitive reasons.
    # tpu.initialize_system creates a dummy op whose sole purpose is to trigger
    # DistributedTPURewritePass. This pass actually adds real ops that
    # initialize the TPU system. Thus, we can't simply run tpu.initialize_system
    # eagerly. We need to wrap it in defun and trigger the rewrite passes on it.
    # The easiest way to trigger a rewrite is to run the function with
    # TPUPartitionedCallOp.
    @function.defun
    def _tpu_init_fn():
      return tpu.initialize_system()

    # We can't call _tpu_init_fn normally (because it contains just a dummy op,
    # see above) but need to define it to get it added to eager context
    # and get its assigned name.
    # pylint: disable=protected-access
    graph_func = _tpu_init_fn._get_concrete_function_internal()
    func_name = compat.as_str(graph_func._inference_function.name)
    # pylint: enable=protected-access

    output = tpu_functional_ops.TPUPartitionedCall(
        args=[], device_ordinal=0, Tout=[dtypes.string], f=func_name)
    serialized_topology = output[0].numpy()
  else:
    session_config = config_pb2.ConfigProto(allow_soft_placement=True)
    with ops.Graph().as_default():
      with session_lib.Session(config=session_config, target=master) as sess:
        serialized_topology = sess.run(tpu.initialize_system())

  logging.info("Finished initializing TPU system.")
  return topology.Topology(serialized=serialized_topology)
예제 #14
0
def tpu_session(cluster_resolver):
  """Construct or return a `tf.Session` connected to the given cluster."""
  global _SESSIONS
  master = cluster_resolver.master()
  if master not in _SESSIONS:
    cluster_spec = cluster_resolver.cluster_spec()
    config = config_pb2.ConfigProto(isolate_session_state=True)
    if cluster_spec:
      config.cluster_def.CopyFrom(cluster_spec.as_cluster_def())

    graph = ops.Graph()
    session = tf_session.Session(graph=graph, target=master, config=config)

    with graph.as_default():
      session.run(tpu.initialize_system())

    _SESSIONS[master] = session
  return _SESSIONS[master]
예제 #15
0
def initialize_tpu_system(cluster_resolver=None):
  """Initialize the TPU devices in a separate session and graph.

  Args:
    cluster_resolver: A tf.contrib.cluster_resolver.TPUClusterResolver,
        which provides information about the TPU cluster.
  """
  if cluster_resolver is None:
    cluster_resolver = resolver_lib.TPUClusterResolver("")
  master = cluster_resolver.master()

  logging.info("Initializing the TPU system.")
  session_config = config_pb2.ConfigProto(allow_soft_placement=True)

  with ops.Graph().as_default():
    with session_lib.Session(config=session_config, target=master) as sess:
      sess.run([tpu.initialize_system()])
  logging.info("Finished initializing TPU system.")
예제 #16
0
def tpu_session(cluster_resolver):
    """Construct or return a `tf.Session` connected to the given cluster."""
    global _SESSIONS
    master = cluster_resolver.master()
    if master not in _SESSIONS:
        cluster_spec = cluster_resolver.cluster_spec()
        config = config_pb2.ConfigProto(isolate_session_state=True)
        if cluster_spec:
            config.cluster_def.CopyFrom(cluster_spec.as_cluster_def())

        graph = ops.Graph()
        session = tf_session.Session(graph=graph, target=master, config=config)

        with graph.as_default():
            session.run(tpu.initialize_system())

        _SESSIONS[master] = session
    return _SESSIONS[master]
예제 #17
0
def initialize_tpu_system(cluster_resolver=None):
    """Initialize the TPU devices in a separate session and graph.

  Args:
    cluster_resolver: A tf.contrib.cluster_resolver.TPUClusterResolver,
        which provides information about the TPU cluster.
  """
    if cluster_resolver is None:
        cluster_resolver = resolver_lib.TPUClusterResolver("")
    master = cluster_resolver.master()

    logging.info("Initializing the TPU system.")
    session_config = config_pb2.ConfigProto(allow_soft_placement=True)

    with ops.Graph().as_default():
        with session_lib.Session(config=session_config, target=master) as sess:
            sess.run([tpu.initialize_system()])
    logging.info("Finished initializing TPU system.")
예제 #18
0
  def __init__(self, cpu_model, tpu_name_or_address, strategy):
    super(models.Model, self).__init__(  # pylint: disable=bad-super-call
        inputs=cpu_model.inputs,
        outputs=cpu_model.outputs,
        name=cpu_model.name,
    )

    self.predict_function = None
    self.test_function = None
    self.train_function = None
    self._strategy = strategy

    self._tpu_name_or_address = tpu_name_or_address
    self._cpu_model = cpu_model
    self._tpu_model = None
    self._tpu_weights_initialized = False
    self._graph = ops.Graph()

    cluster_resolver = tpu_cluster_resolver.TPUClusterResolver(
        tpu_name_or_address)
    cluster_spec = cluster_resolver.cluster_spec()
    self._session = tf_session.Session(
        graph=self._graph,
        target=cluster_resolver.master(),
        config=config_pb2.ConfigProto(isolate_session_state=True))

    if cluster_spec:
      self._session.cluster_def.CopyFrom(cluster_spec.as_cluster_def())

    with self._graph.as_default():
      self._session.run(tpu.initialize_system())

    # If the input CPU model has already been compiled, compile our TPU model
    # immediately.
    if self._cpu_model.optimizer:
      self.compile(
          self._cpu_model.optimizer,
          self._cpu_model.loss,
          self._cpu_model.metrics,
          self._cpu_model.loss_weights,
          self._cpu_model.sample_weight_mode,
          self._cpu_model.weighted_metrics,
          self._cpu_model.target_tensors,
      )
예제 #19
0
    def __init__(self, cpu_model, tpu_name_or_address, strategy):
        super(models.Model, self).__init__(  # pylint: disable=bad-super-call
            inputs=cpu_model.inputs,
            outputs=cpu_model.outputs,
            name=cpu_model.name,
        )

        self.predict_function = None
        self.test_function = None
        self.train_function = None
        self._strategy = strategy

        self._tpu_name_or_address = tpu_name_or_address
        self._cpu_model = cpu_model
        self._tpu_model = None
        self._tpu_weights_initialized = False
        self._graph = ops.Graph()

        cluster_resolver = tpu_cluster_resolver.TPUClusterResolver(
            tpu_name_or_address)
        cluster_spec = cluster_resolver.cluster_spec()
        self._session = tf_session.Session(
            graph=self._graph,
            target=cluster_resolver.master(),
            config=config_pb2.ConfigProto(isolate_session_state=True))

        if cluster_spec:
            self._session.cluster_def.CopyFrom(cluster_spec.as_cluster_def())

        with self._graph.as_default():
            self._session.run(tpu.initialize_system())

        # If the input CPU model has already been compiled, compile our TPU model
        # immediately.
        if self._cpu_model.optimizer:
            self.compile(
                self._cpu_model.optimizer,
                self._cpu_model.loss,
                self._cpu_model.metrics,
                self._cpu_model.loss_weights,
                self._cpu_model.sample_weight_mode,
                self._cpu_model.weighted_metrics,
                self._cpu_model.target_tensors,
            )
  def testTrainNetwork(self, distribution, optimizer_fn, use_callable_loss,
                       is_tpu):
    # TODO(priyag): Remove this once the step TPU Strategy is stable.
    if is_tpu:
      self.skipTest("TPU tests are WIP.")

    with distribution.scope():
      model_fn, dataset_fn, layer = minimize_loss_example(
          optimizer_fn, use_bias=True, use_callable_loss=use_callable_loss)

      # TODO(isaprykin):  Eliminate `is_tpu`. Probably add a
      # `DistributionStrategy.create_monitor` so that each DistributionStrategy
      # could influence its training loop. That method would return an instance
      # of Monitor.  TPUMonitor would execute tpu.initialize_system() and
      # tpu.shutdown_system().
      iterator = distribution.distribute_dataset(
          dataset_fn).make_one_shot_iterator()

      def run_step():
        return distribution.group(
            distribution.call_for_each_tower(
                model_fn, iterator.get_next(), run_concurrently=layer.built))

      if not context.executing_eagerly():
        with self.test_session() as sess:
          if is_tpu:
            sess.run(tpu.initialize_system())
          run_step = sess.make_callable(run_step())
        self.evaluate(variables_lib.global_variables_initializer())

      weights, biases = [], []
      for _ in range(10):
        run_step()

        weights.append(self.evaluate(layer.kernel))
        biases.append(self.evaluate(layer.bias))

      if is_tpu:
        with self.test_session() as sess:
          sess.run(tpu.shutdown_system())

      error = abs(numpy.add(numpy.squeeze(weights), numpy.squeeze(biases)) - 1)
      is_not_increasing = all(y <= x for x, y in zip(error, error[1:]))
      self.assertTrue(is_not_increasing)
예제 #21
0
  def run_on_device(self, model_fn, model_inputs, device):
    """Runs `model_fn` on the given device.

    Raises an exception if no such device is available.  `model_fn` should
    return one or more tensors as a list or tuple.

    Args:
      model_fn: Function returning one or more tensors.
      model_inputs: An iterable of Numpy arrays or scalars.
                    These will be passed as arguments to `model_fn`.
      device: Device to run on.  One of ("tpu", "gpu", "cpu").

    Returns:
      Output from the model function.
    """

    def _make_placeholders():
      return dict([(gen_array_ops.placeholder_with_default(v, v.shape), v)
                   for v in model_inputs])

    if device == "tpu":
      with self.test_session(graph=ops.Graph()) as sess:
        placeholders = _make_placeholders()
        tpu_computation = tpu.rewrite(model_fn, placeholders.keys())
        sess.run(tpu.initialize_system())
        sess.run(variables.global_variables_initializer())
        result = sess.run(tpu_computation, placeholders)
        sess.run(tpu.shutdown_system())
        # TODO(b/36891278): supports non-flat returns lists in tpu.rewrite().
        if len(result) == 1:
          return result[0]
        return result
    elif device == "gpu":
      with self.test_session(graph=ops.Graph(), use_gpu=True) as sess:
        placeholders = _make_placeholders()
        sess.run(variables.global_variables_initializer())
        return sess.run(model_fn(placeholders.keys()), placeholders)
    elif device == "cpu":
      # TODO(power) -- will this interact poorly with cached GPU sessions?
      with self.test_session(graph=ops.Graph(), use_gpu=False) as sess:
        placeholders = _make_placeholders()
        sess.run(variables.global_variables_initializer())
        return sess.run(model_fn(placeholders.keys()), placeholders)
예제 #22
0
  def run_on_device(self, model_fn, model_inputs, device):
    """Runs `model_fn` on the given device.

    Raises an exception if no such device is available.  `model_fn` should
    return one or more tensors as a list or tuple.

    Args:
      model_fn: Function returning one or more tensors.
      model_inputs: An iterable of Numpy arrays or scalars.
                    These will be passed as arguments to `model_fn`.
      device: Device to run on.  One of ("tpu", "gpu", "cpu").

    Returns:
      Output from the model function.
    """
    def _make_placeholders():
      return dict(
          [(gen_array_ops.placeholder_with_default(v, v.shape), v)
           for v in model_inputs])

    if device == "tpu":
      with self.test_session(graph=ops.Graph()) as sess:
        placeholders = _make_placeholders()
        tpu_computation = tpu.rewrite(model_fn, placeholders.keys())
        sess.run(tpu.initialize_system())
        sess.run(variables.global_variables_initializer())
        result = sess.run(tpu_computation, placeholders)
        sess.run(tpu.shutdown_system())
        # TODO(b/36891278): supports non-flat returns lists in tpu.rewrite().
        if len(result) == 1:
          return result[0]
        return result
    elif device == "gpu":
      with self.test_session(graph=ops.Graph(), use_gpu=True) as sess:
        placeholders = _make_placeholders()
        sess.run(variables.global_variables_initializer())
        return sess.run(model_fn(placeholders.keys()), placeholders)
    elif device == "cpu":
      # TODO(power) -- will this interact poorly with cached GPU sessions?
      with self.test_session(graph=ops.Graph(), use_gpu=False) as sess:
        placeholders = _make_placeholders()
        sess.run(variables.global_variables_initializer())
        return sess.run(model_fn(placeholders.keys()), placeholders)
예제 #23
0
  def _initialize_tpu(self):
    """Initialize the TPU devices in a separate session and graph.

    We keep track of all the TPU devices that we're initialized as we should
    only be running TPU initialize once for the entire process.
    """
    master = self._tpu_cluster_resolver.master()
    # Verify TPU has not already been initialized in this process.
    if master in TPUExtended._initialized_devices:
      logging.info("TPU master %s has already been initialized." % master)
      return

    logging.info("Initializing the TPU system.")
    session_config = config_pb2.ConfigProto(allow_soft_placement=True)
    self._configure(session_config)
    with ops.Graph().as_default():
      with session_lib.Session(config=session_config, target=master) as sess:
        sess.run([tpu.initialize_system()])
    logging.info("Finized initializing TPU system.")

    # Update Strategy state to make sure we can track device initialization.
    TPUExtended._initialized_devices.append(master)
예제 #24
0
def setup_tpu_session(tpu_name_or_address):
    """Initializes and returns a Keras/TF session connected the TPU `master`.

  Args:
    tpu_name_or_address: A string that is either the name of the Cloud TPU,
      the grpc address of the Cloud TPU, or (Googlers only) the BNS name of the
      Cloud TPU. If tpu_name_or_address is None, the TPUClusterResolver will
      examine the environment to determine a potential Cloud TPU to use.

  Returns:
    A `tf.Session`.
  """
    cluster_resolver = tpu_cluster_resolver.TPUClusterResolver(
        tpu_name_or_address)
    cluster_spec = cluster_resolver.cluster_spec()
    session = tf_session.Session(
        target=cluster_resolver.master(),
        config=config_pb2.ConfigProto(isolate_session_state=True))
    if cluster_spec:
        session.cluster_def.CopyFrom(cluster_spec.as_cluster_def())
    K.set_session(session)
    K.get_session().run(tpu.initialize_system())
    return session
예제 #25
0
def setup_tpu_session(tpu_name_or_address):
  """Initializes and returns a Keras/TF session connected the TPU `master`.

  Args:
    tpu_name_or_address: A string that is either the name of the Cloud TPU,
      the grpc address of the Cloud TPU, or (Googlers only) the BNS name of the
      Cloud TPU. If tpu_name_or_address is None, the TPUClusterResolver will
      examine the environment to determine a potential Cloud TPU to use.

  Returns:
    A `tf.Session`.
  """
  cluster_resolver = tpu_cluster_resolver.TPUClusterResolver(
      tpu_name_or_address)
  cluster_spec = cluster_resolver.cluster_spec()
  session = tf_session.Session(
      target=cluster_resolver.master(),
      config=config_pb2.ConfigProto(
          isolate_session_state=True))
  if cluster_spec:
    session.cluster_def.CopyFrom(cluster_spec.as_cluster_def())
  K.set_session(session)
  K.get_session().run(tpu.initialize_system())
  return session
예제 #26
0
    def load(self,
             model_path,
             model_output_dir,
             image_list_inmemory,
             params,
             batch_size=128,
             master="local",
             scenario="Offline",
             batch_timeout_micros=20 * 1000):
        if params["use_fused_bn"]:
            model_path = convert_checkpoint.convert_checkpoint(
                model_path, model_output_dir)
        tpu_graph = tf.Graph()
        tpu_config = tf.ConfigProto(
            operation_timeout_in_ms=600 * 1000,
            allow_soft_placement=True,
            graph_options=tf.GraphOptions(
                rewrite_options=rewriter_config_pb2.RewriterConfig(
                    disable_meta_optimizer=True)),
            isolate_session_state=True)
        self.sess = tf.Session(master, graph=tpu_graph, config=tpu_config)
        self.params = params

        with tpu_graph.as_default():
            image_list = tf.constant(image_list_inmemory, dtype=tf.int32)
            if scenario == "Offline":
                self.indices = tf.placeholder(shape=(batch_size[-1]),
                                              dtype=tf.int32)
                self.source_id = tf.placeholder(shape=(batch_size[-1]),
                                                dtype=tf.int32)
                self.raw_shape = tf.placeholder(shape=(batch_size[-1], 3),
                                                dtype=tf.int32)
                image = tf.gather(image_list, self.indices, axis=0)
                if not params["conv0_space_to_depth"]:
                    # Transpose from [N, C, H, W] to [H, W, C, N]
                    image = tf.transpose(image, [2, 3, 1, 0])
                self.predict_op = self.offline_op(
                    (image, self.source_id, self.raw_shape))
            else:
                self.indices = tf.placeholder(dtype=tf.int32)
                self.source_id = tf.placeholder(dtype=tf.int32)
                self.raw_shape = tf.placeholder(dtype=tf.int32,
                                                shape=[None, 3])
                image = tf.gather(image_list, self.indices, axis=0)
                self.predict_op = self.server_op(
                    [image, self.source_id, self.raw_shape],
                    num_batch_threads=16,
                    max_batch_size=batch_size[-1],
                    batch_timeout_micros=batch_timeout_micros,
                    allowed_batch_sizes=batch_size,
                    max_enqueued_batches=10000)

            self.sess.run(tpu.initialize_system())
            for param in tf.trainable_variables():
                tf.logging.info(
                    "  %s, %s, %s" %
                    (param.name, str(param.get_shape()), param.op.device))

            # Checkpoint's variable name: https://internal/6714143388205056
            tf.compat.v1.train.init_from_checkpoint(model_path, {
                "ssd1200/": "ssd1200/",
            })
            self.sess.run(tf.initializers.global_variables())

        return self
예제 #27
0
 def begin(self):
     self._enqueue_ops = self._enqueue_fn()
     logging.info('TPU job name %s', self._tpu_job)
     self._init_op = [tpu.initialize_system(job=self._tpu_job)]
     self._finalize_op = [tpu.shutdown_system(job=self._tpu_job)]
예제 #28
0
 def _tpu_init_fn():
   return tpu.initialize_system()
예제 #29
0
 def initialize(self):
   if context.executing_eagerly():
     # TODO(priyag): Add appopriate call here when eager is supported for TPUs.
     raise NotImplementedError('Eager mode not supported in TPUStrategy.')
   else:
     return [tpu.initialize_system()]
예제 #30
0
    def load(self,
             ckpt_path,
             hparams,
             master='local',
             batch_timeout_micros=80 * 1000,
             buckets=None):
        self.hparams = hparams
        self.buckets = buckets
        self.tpu_graph = tf.Graph()
        tpu_config = tf.ConfigProto(
            operation_timeout_in_ms=600 * 1000,
            allow_soft_placement=True,
            graph_options=tf.GraphOptions(
                rewrite_options=rewriter_config_pb2.RewriterConfig(
                    disable_meta_optimizer=True)),
            isolate_session_state=True)
        # Find tpu master.
        print('master value set to:', master)
        tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
            master, zone=None, project=None)
        master = tpu_cluster_resolver.get_master()
        self.sess = tf.Session(master, graph=self.tpu_graph, config=tpu_config)
        with self.tpu_graph.as_default():
            self.vocab_table = tf.contrib.lookup.index_to_string_table_from_file(
                self.vocab_prefix, default_value=vocab_utils.UNK)

        if self.scenario == 'Offline':
            with self.tpu_graph.as_default():
                self.source = tf.placeholder(shape=(hparams.infer_batch_size,
                                                    hparams.src_max_len_infer),
                                             dtype=tf.int32)
                self.source_sequence_length = tf.placeholder(
                    shape=(hparams.infer_batch_size), dtype=tf.int32)

                inputs = [[self.source, self.source_sequence_length]]
                self.predict_ops.append(self.offline_op(inputs))
        else:
            with self.tpu_graph.as_default():
                self.source = tf.placeholder(
                    shape=[None, hparams.src_max_len_infer], dtype=tf.int32)
                self.source_sequence_length = tf.placeholder(shape=[None],
                                                             dtype=tf.int32)
                inputs = [self.source, self.source_sequence_length]
                for _ in buckets:
                    self.predict_ops.append(
                        self.server_op(
                            inputs,
                            num_batch_threads=16,
                            max_batch_size=hparams.infer_batch_size,
                            batch_timeout_micros=batch_timeout_micros,
                            allowed_batch_sizes=[hparams.infer_batch_size],
                            max_enqueued_batches=10000))
                # Add longest sequence predict op.
                self.predict_ops.append(
                    self.server_op(
                        inputs,
                        num_batch_threads=16,
                        max_batch_size=hparams.infer_batch_size,
                        batch_timeout_micros=5000 * 1000,
                        allowed_batch_sizes=[hparams.infer_batch_size],
                        max_enqueued_batches=10000))

        with self.tpu_graph.as_default():
            vs = tf.global_variables()

            assign_ops = []
            var_map = {}
            with tf.variable_scope('f32', dtype=tf.float32):
                for i in vs:
                    if 'output_projection' in i.name:
                        new_var = tf.get_variable(
                            i.name[:-2], [i.shape[0], hparams.tgt_vocab_size])
                        assign_ops.append(
                            tf.assign(
                                i,
                                tf.pad(
                                    tf.cast(new_var, i.dtype),
                                    [[0, 0],
                                     [
                                         0, 128 *
                                         (hparams.tgt_vocab_size // 128 + 1) -
                                         hparams.tgt_vocab_size
                                     ]])))
                    else:
                        new_var = tf.get_variable(i.name[:-2], i.shape)
                        assign_ops.append(
                            tf.assign(i, tf.cast(new_var, i.dtype)))
                    var_map[i.name[:-2]] = new_var.name[:-2]

            self.sess.run(tpu.initialize_system())
            tf.train.init_from_checkpoint(ckpt_path, var_map)
            self.sess.run(tf.initializers.global_variables())
            self.sess.run(tf.tables_initializer())
            self.sess.run(assign_ops)

        return self
예제 #31
0
  def testMeanVsSum(self, distribution, optimizer_fn, loss_reduction,
                    use_callable_loss, is_tpu):
    with distribution.scope():
      all_vars = []

      def model_fn(x, y):

        def loss_fn():
          # Use fixed initialization to make the steps deterministic.
          w = variable_scope.get_variable("w", initializer=[[2.]])
          all_vars.append(w)
          predict = math_ops.matmul(x, w)
          return losses_impl.mean_squared_error(
              y, predict, reduction=loss_reduction)

        optimizer = optimizer_fn()  # GradientDescent with 0.2 learning rate

        if use_callable_loss:
          return optimizer.minimize(loss_fn)
        else:
          return optimizer.minimize(loss_fn())

      def dataset_fn():
        features = dataset_ops.Dataset.from_tensors([[2.], [7.]])
        labels = dataset_ops.Dataset.from_tensors([[6.], [21.]])
        return dataset_ops.Dataset.zip((features, labels)).repeat()

      iterator = distribution.distribute_dataset(
          dataset_fn).make_one_shot_iterator()

      def run_step():
        return distribution.group(
            distribution.call_for_each_tower(
                model_fn, *iterator.get_next(), run_concurrently=False))

      if not context.executing_eagerly():
        with self.test_session() as sess:
          if is_tpu:
            sess.run(tpu.initialize_system())
          run_step = sess.make_callable(run_step())
        self.evaluate(variables_lib.global_variables_initializer())

      run_step()

      v = all_vars[0]
      self.assertTrue(all([v is vi for vi in all_vars[1:]]))
      weight = numpy.squeeze(self.evaluate(distribution.fetch(v)))
      # Our model is:
      #   predict = x * w
      #   loss = (predict - y)^2
      #   dloss/dpredict = 2*(predict - y)
      #   dloss/dw = 2 * x^T @ (predict - y)
      # For our batch size of 2, assuming sum loss reduction:
      #   x = [2, 7]
      #   y = [6, 21]
      #   w_initial = 2
      #   predict = [4, 14]
      #   predict - y = [-2, -7]
      #   dloss/dw = 2 <[2, 7], [-2, -7]> = - 2(4 + 49) = -106
      # So unreplicated the update to w with lr=0.2 is -0.2 * -106 = 21.2
      # with sum loss reduction, or 10.6 with mean.
      if loss_reduction == losses_impl.Reduction.SUM:
        # Note that the "distribution.num_towers" factor will go away once
        # we split the input across towers, instead of pulling a complete
        # batch of input per tower.
        self.assertNear(weight, 2 + 21.2 * distribution.num_towers, 0.0001)
      else:
        # One of the mean loss reductions.
        self.assertNear(weight, 2 + 10.6, 0.0001)

      if is_tpu:
        with self.test_session() as sess:
          sess.run(tpu.shutdown_system())
예제 #32
0
 def _check():
   with session.Session() as sess:
     sess.run(tpu.initialize_system())
     sess.run(tpu.shutdown_system())
예제 #33
0
 def _check():
   with tf_session.Session() as sess:
     sess.run(tpu.initialize_system())
     sess.run(tpu.shutdown_system())
예제 #34
0
  def testOptimizerInsideModelFn(self, distribution, optimizer_fn, is_tpu):
    created_variables = []
    trainable_variables = []

    def appending_creator(next_creator, *args, **kwargs):
      v = next_creator(*args, **kwargs)
      created_variables.append(v.name)
      if "trainable" in kwargs and kwargs["trainable"]:
        trainable_variables.append(v.name)
      return v

    # Creator scope needs to be set before it's used inside
    # `distribution.scope`.
    with variable_scope.variable_creator_scope(
        appending_creator), distribution.scope():
      model_fn, dataset_fn, layer = minimize_loss_example(
          optimizer_fn,
          use_bias=True,
          use_callable_loss=True,
          create_optimizer_inside_model_fn=True)

      iterator = distribution.distribute_dataset(
          dataset_fn).make_one_shot_iterator()

      def run_step():
        return distribution.group(
            distribution.call_for_each_tower(
                model_fn, iterator.get_next(), run_concurrently=layer.built))

      if not context.executing_eagerly():
        with self.test_session() as sess:
          if is_tpu:
            sess.run(tpu.initialize_system())
          run_step = sess.make_callable(run_step())
        self.evaluate(variables_lib.global_variables_initializer())

      run_step()

      if is_tpu:
        with self.test_session() as sess:
          sess.run(tpu.shutdown_system())

      def get_expected_variables(optimizer_fn, num_parameter_devices):
        variables_map = {
            "GradientDescent": ["dense/kernel", "dense/bias"],
            "Adam": [
                "dense/kernel", "dense/bias", "beta1_power", "beta2_power",
                "dense/kernel/Adam", "dense/kernel/Adam_1", "dense/bias/Adam",
                "dense/bias/Adam_1"
            ]
        }
        variables = variables_map[optimizer_fn().get_name()]
        variables.extend([
            v + "/replica_{}".format(replica)
            for v in variables
            for replica in range(1, num_parameter_devices)
        ])
        return set([v + ":0" for v in variables])

      self.assertEqual(
          get_expected_variables(optimizer_fn,
                                 len(distribution.parameter_devices)),
          set(created_variables))
예제 #35
0
 def begin(self):
   self._enqueue_ops = self._enqueue_fn()
   logging.info('TPU job name %s', self._tpu_job)
   self._init_op = [tpu.initialize_system(job=self._tpu_job)]
   self._finalize_op = [tpu.shutdown_system(job=self._tpu_job)]
예제 #36
0
    def testMeanVsSum(self, distribution, optimizer_fn, loss_reduction,
                      use_callable_loss, is_tpu):
        with distribution.scope():
            all_vars = []

            def model_fn(x, y):
                def loss_fn():
                    # Use fixed initialization to make the steps deterministic.
                    w = variable_scope.get_variable("w", initializer=[[2.]])
                    all_vars.append(w)
                    predict = math_ops.matmul(x, w)
                    return losses_impl.mean_squared_error(
                        y, predict, reduction=loss_reduction)

                optimizer = optimizer_fn(
                )  # GradientDescent with 0.2 learning rate

                if use_callable_loss:
                    return optimizer.minimize(loss_fn)
                else:
                    return optimizer.minimize(loss_fn())

            def dataset_fn():
                features = dataset_ops.Dataset.from_tensors([[2.], [7.]])
                labels = dataset_ops.Dataset.from_tensors([[6.], [21.]])
                return dataset_ops.Dataset.zip((features, labels)).repeat()

            iterator = distribution.distribute_dataset(
                dataset_fn).make_one_shot_iterator()

            def run_step():
                return distribution.group(
                    distribution.call_for_each_tower(model_fn,
                                                     *iterator.get_next(),
                                                     run_concurrently=False))

            if not context.executing_eagerly():
                with self.test_session() as sess:
                    if is_tpu:
                        sess.run(tpu.initialize_system())
                    run_step = sess.make_callable(run_step())
                self.evaluate(variables_lib.global_variables_initializer())

            run_step()

            v = all_vars[0]
            self.assertTrue(all([v is vi for vi in all_vars[1:]]))
            weight = numpy.squeeze(self.evaluate(distribution.fetch(v)))
            # Our model is:
            #   predict = x * w
            #   loss = (predict - y)^2
            #   dloss/dpredict = 2*(predict - y)
            #   dloss/dw = 2 * x^T @ (predict - y)
            # For our batch size of 2, assuming sum loss reduction:
            #   x = [2, 7]
            #   y = [6, 21]
            #   w_initial = 2
            #   predict = [4, 14]
            #   predict - y = [-2, -7]
            #   dloss/dw = 2 <[2, 7], [-2, -7]> = - 2(4 + 49) = -106
            # So unreplicated the update to w with lr=0.2 is -0.2 * -106 = 21.2
            # with sum loss reduction, or 10.6 with mean.
            if loss_reduction == losses_impl.Reduction.SUM:
                # Note that the "distribution.num_towers" factor will go away once
                # we split the input across towers, instead of pulling a complete
                # batch of input per tower.
                self.assertNear(weight, 2 + 21.2 * distribution.num_towers,
                                0.0001)
            else:
                # One of the mean loss reductions.
                self.assertNear(weight, 2 + 10.6, 0.0001)

            if is_tpu:
                with self.test_session() as sess:
                    sess.run(tpu.shutdown_system())
예제 #37
0
    def testOptimizerInsideModelFn(self, distribution, optimizer_fn, is_tpu):
        created_variables = []
        trainable_variables = []

        def appending_creator(next_creator, *args, **kwargs):
            v = next_creator(*args, **kwargs)
            created_variables.append(v.name)
            if "trainable" in kwargs and kwargs["trainable"]:
                trainable_variables.append(v.name)
            return v

        # Creator scope needs to be set before it's used inside
        # `distribution.scope`.
        with variable_scope.variable_creator_scope(
                appending_creator), distribution.scope():
            model_fn, dataset_fn, layer = minimize_loss_example(
                optimizer_fn,
                use_bias=True,
                use_callable_loss=True,
                create_optimizer_inside_model_fn=True)

            iterator = distribution.distribute_dataset(
                dataset_fn).make_one_shot_iterator()

            def run_step():
                return distribution.group(
                    distribution.call_for_each_tower(
                        model_fn,
                        iterator.get_next(),
                        run_concurrently=layer.built))

            if not context.executing_eagerly():
                with self.test_session() as sess:
                    if is_tpu:
                        sess.run(tpu.initialize_system())
                    run_step = sess.make_callable(run_step())
                self.evaluate(variables_lib.global_variables_initializer())

            run_step()

            if is_tpu:
                with self.test_session() as sess:
                    sess.run(tpu.shutdown_system())

            def get_expected_variables(optimizer_fn, num_parameter_devices):
                variables_map = {
                    "GradientDescent": ["dense/kernel", "dense/bias"],
                    "Adam": [
                        "dense/kernel", "dense/bias", "beta1_power",
                        "beta2_power", "dense/kernel/Adam",
                        "dense/kernel/Adam_1", "dense/bias/Adam",
                        "dense/bias/Adam_1"
                    ]
                }
                variables = variables_map[optimizer_fn().get_name()]
                variables.extend([
                    v + "/replica_{}".format(replica) for v in variables
                    for replica in range(1, num_parameter_devices)
                ])
                return set([v + ":0" for v in variables])

            self.assertEqual(
                get_expected_variables(optimizer_fn,
                                       len(distribution.parameter_devices)),
                set(created_variables))
예제 #38
0
 def get_initialization_ops(self):
     return [tpu.initialize_system()]