def _testMoments(self, dt):
    try:
      from scipy import stats  # pylint: disable=g-import-not-at-top
    except ImportError as e:
      tf_logging.warn("Cannot test moments: %s" % e)
      return

    # The moments test is a z-value test.  This is the largest z-value
    # we want to tolerate. Since the z-test approximates a unit normal
    # distribution, it should almost definitely never exceed 6.
    z_limit = 6.0

    for stride in 0, 1, 4, 17:
      alphas = [0.2, 1.0, 3.0]
      if dt == dtypes.float64:
        alphas = [0.01] + alphas
      for alpha in alphas:
        for scale in 9, 17:
          # Gamma moments only defined for values less than the scale param.
          max_moment = min(6, scale // 2)
          sampler = self._Sampler(
              20000, alpha, 1 / scale, dt, use_gpu=False, seed=12345)
          z_scores = util.test_moment_matching(
              sampler(),
              max_moment,
              stats.gamma(alpha, scale=scale),
              stride=stride,
          )
          self.assertAllLess(z_scores, z_limit)
Example #2
0
def surrogate_loss(sample_losses,
                   stochastic_tensors=None,
                   name="SurrogateLoss"):
  """Surrogate loss for stochastic graphs.

  This function will call `loss_fn` on each `StochasticTensor`
  upstream of `sample_losses`, passing the losses that it influenced.

  Note that currently `surrogate_loss` does not work with `StochasticTensor`s
  instantiated in `while_loop`s or other control structures.

  Args:
    sample_losses: a list or tuple of final losses. Each loss should be per
      example in the batch (and possibly per sample); that is, it should have
      dimensionality of 1 or greater. All losses should have the same shape.
    stochastic_tensors: a list of `StochasticTensor`s to add loss terms for.
      If None, defaults to all `StochasticTensor`s in the graph upstream of
      the `Tensor`s in `sample_losses`.
    name: the name with which to prepend created ops.

  Returns:
    `Tensor` loss, which is the sum of `sample_losses` and the
    `loss_fn`s returned by the `StochasticTensor`s.

  Raises:
    TypeError: if `sample_losses` is not a list or tuple, or if its elements
      are not `Tensor`s.
    ValueError: if any loss in `sample_losses` does not have dimensionality 1
      or greater.
  """
  with ops.op_scope(sample_losses, name):
    fixed_losses = []
    if not isinstance(sample_losses, (list, tuple)):
      raise TypeError("sample_losses must be a list or tuple")
    for loss in sample_losses:
      if not isinstance(loss, ops.Tensor):
        raise TypeError("loss is not a Tensor: %s" % loss)
      ndims = loss.get_shape().ndims
      if not (ndims is not None and ndims >= 1):
        raise ValueError("loss must have dimensionality 1 or greater: %s" %
                         loss)
      fixed_losses.append(array_ops.stop_gradient(loss))

    stoch_dependencies_map = _stochastic_dependencies_map(
        fixed_losses, stochastic_tensors=stochastic_tensors)
    if not stoch_dependencies_map:
      logging.warn(
          "No collection of Stochastic Tensors found for current graph.")
      return math_ops.add_n(sample_losses)

    # Iterate through all of the stochastic dependencies, adding
    # surrogate terms where necessary.
    sample_losses = [ops.convert_to_tensor(loss) for loss in sample_losses]
    loss_terms = sample_losses
    for (stoch_node, dependent_losses) in stoch_dependencies_map.items():
      loss_term = stoch_node.loss(list(dependent_losses))
      if loss_term is not None:
        loss_terms.append(loss_term)

    return math_ops.add_n(loss_terms)
  def validateKolmogorovSmirnov(self,
                                shape,
                                mean,
                                stddev,
                                minval,
                                maxval,
                                seed=1618):
    try:
      import scipy.stats  # pylint: disable=g-import-not-at-top
      random_seed.set_random_seed(seed)
      with self.test_session(use_gpu=True):
        samples = random_ops.parameterized_truncated_normal(shape, mean, stddev,
                                                            minval,
                                                            maxval).eval()
      assert (~np.isnan(samples)).all()
      minval = max(mean - stddev * 10, minval)
      maxval = min(mean + stddev * 10, maxval)
      dist = scipy.stats.norm(loc=mean, scale=stddev)
      cdf_min = dist.cdf(minval)
      cdf_max = dist.cdf(maxval)

      def truncated_cdf(x):
        return np.clip((dist.cdf(x) - cdf_min) / (cdf_max - cdf_min), 0.0, 1.0)

      pvalue = scipy.stats.kstest(samples, truncated_cdf)[1]
      self.assertGreater(pvalue, 1e-10)
    except ImportError as e:
      tf_logging.warn("Cannot test truncated normal op: %s" % str(e))
Example #4
0
    def _write_dict_to_summary(output_dir,
                               dictionary,
                               current_global_step):
        """Writes a `dict` into summary file in given output directory.

          Args:
            output_dir: `str`, directory to write the summary file in.
            dictionary: the `dict` to be written to summary file.
            current_global_step: `int`, the current global step.
          """
        logging.info('Saving dict for global step %d: %s', current_global_step,
                     dict_to_str(dictionary))
        summary_writer = summary_io.SummaryWriterCache.get(output_dir)
        summary_proto = summary_pb2.Summary()
        for key in dictionary:
            if dictionary[key] is None or key == tf.GraphKeys.GLOBAL_STEP:
                continue
            value = summary_proto.value.add()
            value.tag = key
            if isinstance(dictionary[key], (np.float32, float)):
                value.simple_value = float(dictionary[key])
            elif isinstance(dictionary[key], (int, np.int64, np.int32)):
                value.simple_value = int(dictionary[key])
            else:
                logging.warn('Skipping summary for %s, must be a '
                             'float, np.float32, int, int32, or int64.', key)
        summary_writer.add_summary(summary_proto, current_global_step)
        summary_writer.flush()
Example #5
0
def _write_dict_to_summary(output_dir,
                           dictionary,
                           current_global_step):
  """Writes a `dict` into summary file in given output directory.

  Args:
    output_dir: `str`, directory to write the summary file in.
    dictionary: the `dict` to be written to summary file.
    current_global_step: `int`, the current global step.
  """
  logging.info('Saving dict for global step %d: %s', current_global_step,
               _dict_to_str(dictionary))
  summary_writer = writer_cache.FileWriterCache.get(output_dir)
  summary_proto = summary_pb2.Summary()
  for key in dictionary:
    if dictionary[key] is None:
      continue
    if key == 'global_step':
      continue
    value = summary_proto.value.add()
    value.tag = key
    if (isinstance(dictionary[key], np.float32) or
        isinstance(dictionary[key], float)):
      value.simple_value = float(dictionary[key])
    elif (isinstance(dictionary[key], np.int64) or
          isinstance(dictionary[key], np.int32) or
          isinstance(dictionary[key], int)):
      value.simple_value = int(dictionary[key])
    else:
      logging.warn(
          'Skipping summary for %s, must be a float, np.float32, np.int64, '
          'np.int32 or int.',
          key)
  summary_writer.add_summary(summary_proto, current_global_step)
  summary_writer.flush()
Example #6
0
def _log_signature_report(signature_def_map, excluded_signatures):
  """Log a report of which signatures were produced."""
  sig_names_by_method_name = collections.defaultdict(list)

  # We'll collect whatever method_names are present, but also we want to make
  # sure to output a line for each of the three standard methods even if they
  # have no signatures.
  for method_name in _FRIENDLY_METHOD_NAMES:
    sig_names_by_method_name[method_name] = []

  for signature_name, sig in signature_def_map.items():
    sig_names_by_method_name[sig.method_name].append(signature_name)

  # TODO(b/67733540): consider printing the full signatures, not just names
  for method_name, sig_names in sig_names_by_method_name.items():
    if method_name in _FRIENDLY_METHOD_NAMES:
      method_name = _FRIENDLY_METHOD_NAMES[method_name]
    logging.info('Signatures INCLUDED in export for {}: {}'.format(
        method_name, sig_names if sig_names else 'None'))

  if excluded_signatures:
    logging.info('Signatures EXCLUDED from export because they cannot be '
                 'be served via TensorFlow Serving APIs:')
    for signature_name, message in excluded_signatures.items():
      logging.info('\'{}\' : {}'.format(signature_name, message))

  if not signature_def_map:
    logging.warn('Export includes no signatures!')
  elif (signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY
        not in signature_def_map):
    logging.warn('Export includes no default signature!')
  def __init__(self, num_units, forget_bias=1.0,
               state_is_tuple=True, activation=None, reuse=None, name=None):
    """Initialize the basic LSTM cell.

    Args:
      num_units: int, The number of units in the LSTM cell.
      forget_bias: float, The bias added to forget gates (see above).
        Must set to `0.0` manually when restoring from CudnnLSTM-trained
        checkpoints.
      state_is_tuple: If True, accepted and returned states are 2-tuples of
        the `c_state` and `m_state`.  If False, they are concatenated
        along the column axis.  The latter behavior will soon be deprecated.
      activation: Activation function of the inner states.  Default: `tanh`.
      reuse: (optional) Python boolean describing whether to reuse variables
        in an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.
      name: String, the name of the layer. Layers with the same name will
        share weights, but to avoid mistakes we require reuse=True in such
        cases.

      When restoring from CudnnLSTM-trained checkpoints, must use
      `CudnnCompatibleLSTMCell` instead.
    """
    super(BasicLSTMCell, self).__init__(_reuse=reuse, name=name)
    if not state_is_tuple:
      logging.warn("%s: Using a concatenated state is slower and will soon be "
                   "deprecated.  Use state_is_tuple=True.", self)

    # Inputs must be 2-dimensional.
    self.input_spec = base_layer.InputSpec(ndim=2)

    self._num_units = num_units
    self._forget_bias = forget_bias
    self._state_is_tuple = state_is_tuple
    self._activation = activation or math_ops.tanh
Example #8
0
def get_timestamped_dir(dir_base):
  """Builds a path to a new subdirectory within the base directory.

  The subdirectory will be named using the current time.
  This guarantees monotonically increasing directory numbers even across
  multiple runs of the pipeline.
  The timestamp used is the number of seconds since epoch UTC.

  Args:
    dir_base: A string containing a directory to create the subdirectory under.

  Returns:
    The full path of the new subdirectory (which is not actually created yet).

  Raises:
    RuntimeError: if repeated attempts fail to obtain a unique timestamped
      directory name.
  """
  attempts = 0
  while attempts < MAX_DIRECTORY_CREATION_ATTEMPTS:
    timestamp = int(time.time())

    result_dir = os.path.join(
        compat.as_bytes(dir_base), compat.as_bytes(str(timestamp)))
    if not gfile.Exists(result_dir):
      # Collisions are still possible (though extremely unlikely): this
      # directory is not actually created yet, but it will be almost
      # instantly on return from this function.
      return result_dir
    time.sleep(1)
    attempts += 1
    logging.warn('Directory {} already exists; retrying (attempt {}/{})'.format(
        result_dir, attempts, MAX_DIRECTORY_CREATION_ATTEMPTS))
  raise RuntimeError('Failed to obtain a unique export directory name after '
                     '{} attempts.'.format(MAX_DIRECTORY_CREATION_ATTEMPTS))
 def _determinant(self):
   logging.warn(
       "Using (possibly slow) default implementation of determinant."
       "  Requires conversion to a dense matrix and O(N^3) operations.")
   if self._can_use_cholesky():
     return math_ops.exp(self.log_abs_determinant())
   return linalg_ops.matrix_determinant(self._matrix)
 def testDoubleBasic(self):
   x = np.linspace(-5, 20, 15).reshape(1, 3, 5).astype(np.float64)
   y = np.linspace(20, -5, 15).reshape(1, 3, 5).astype(np.float64)
   self._compareBoth(x, y, np.add, math_ops.add)
   self._compareBoth(x, y, np.subtract, math_ops.subtract)
   self._compareBoth(x, y, np.multiply, math_ops.multiply)
   self._compareBoth(x, y + 0.1, np.true_divide, math_ops.truediv)
   self._compareBoth(x, y + 0.1, np.floor_divide, math_ops.floordiv)
   self._compareBoth(x, y, np.add, _ADD)
   self._compareBoth(x, y, np.subtract, _SUB)
   self._compareBoth(x, y, np.multiply, _MUL)
   self._compareBoth(x, y + 0.1, np.true_divide, _TRUEDIV)
   self._compareBoth(x, y + 0.1, np.floor_divide, _FLOORDIV)
   self._compareBoth(x, y, np.arctan2, math_ops.atan2)
   x1 = np.random.randn(7, 4).astype(np.float64)
   x2 = np.random.randn(7, 4).astype(np.float64)
   # Remove tiny values--atan2 gradients are flaky near the origin.
   x1[np.abs(x1) < 0.5] = 0.5 * np.sign(x1[np.abs(x1) < 0.5])
   x2[np.abs(x2) < 0.5] = 0.5 * np.sign(x2[np.abs(x2) < 0.5])
   self._compareBoth(x1, x2, np.arctan2, math_ops.atan2)
   try:
     from scipy import special  # pylint: disable=g-import-not-at-top
     a_pos_small = np.linspace(0.1, 2, 15).reshape(1, 3, 5).astype(np.float32)
     x_pos_small = np.linspace(0.1, 10, 15).reshape(1, 3, 5).astype(np.float32)
     self._compareBoth(a_pos_small, x_pos_small, special.gammainc,
                       math_ops.igamma)
     self._compareBoth(a_pos_small, x_pos_small, special.gammaincc,
                       math_ops.igammac)
   except ImportError as e:
     tf_logging.warn("Cannot test special functions: %s" % str(e))
Example #11
0
  def __init__(self, num_units, forget_bias=1.0,
               input_size=None, activation=math_ops.tanh,
               layer_norm=True, norm_gain=1.0, norm_shift=0.0,
               dropout_keep_prob=1.0, dropout_prob_seed=None):
    """Initializes the basic LSTM cell.

    Args:
      num_units: int, The number of units in the LSTM cell.
      forget_bias: float, The bias added to forget gates (see above).
      input_size: Deprecated and unused.
      activation: Activation function of the inner states.
      layer_norm: If `True`, layer normalization will be applied.
      norm_gain: float, The layer normalization gain initial value. If
        `layer_norm` has been set to `False`, this argument will be ignored.
      norm_shift: float, The layer normalization shift initial value. If
        `layer_norm` has been set to `False`, this argument will be ignored.
      dropout_keep_prob: unit Tensor or float between 0 and 1 representing the
        recurrent dropout probability value. If float and 1.0, no dropout will
        be applied.
      dropout_prob_seed: (optional) integer, the randomness seed.
    """

    if input_size is not None:
      logging.warn("%s: The input_size parameter is deprecated.", self)

    self._num_units = num_units
    self._activation = activation
    self._forget_bias = forget_bias
    self._keep_prob = dropout_keep_prob
    self._seed = dropout_prob_seed
    self._layer_norm = layer_norm
    self._g = norm_gain
    self._b = norm_shift
  def __init__(self, num_units, input_size=None,
               use_peepholes=False, cell_clip=None,
               initializer=None, num_proj=None, proj_clip=None,
               num_unit_shards=1, num_proj_shards=1,
               forget_bias=1.0, state_is_tuple=False,
               activation=tanh):

#    if not state_is_tuple:
#      logging.warn(
#          "%s: Using a concatenated state is slower and will soon be "
#          "deprecated.  Use state_is_tuple=True." % self)
    if input_size is not None:
      logging.warn("%s: The input_size parameter is deprecated." % self)

    #self._use_peepholes = use_peepholes
    #self._cell_clip = cell_clip
    #self._initializer = initializer
    #self._num_proj = num_proj
    #self._num_unit_shards = num_unit_shards
    #self._num_proj_shards = num_proj_shards

    self._num_units = num_units
    self._forget_bias = forget_bias
    self._state_is_tuple = state_is_tuple
    self._activation = activation
Example #13
0
  def after_create_session(self, training_session, coord):  # pylint: disable=unused-argument
    # N.B. We have to pull the global step here to avoid it being unavailable
    # at checkpoint time; the graph has been frozen at that point.
    if training_util.get_global_step() is None and self.saver() is not None:
      raise ValueError(
          'Saver defined but no global step.  Run `get_or_create_global_step()`'
          ' in your model definition to allow checkpointing.')

    with self._graph.as_default():
      logging.info('Installing graceful shutdown hook.')
      self._session = _clone_session(training_session, self._graph)
      self._workers = WorkerHeartbeatManager.from_devices(
          self._session, all_worker_devices(self._session))
      self._heartbeat_supported = self._workers.num_workers() > 0
      if self._heartbeat_supported:
        try:
          self._workers.configure(
              event_pb2.WorkerHeartbeatRequest(
                  shutdown_mode=event_pb2.WAIT_FOR_COORDINATOR))
        except errors.InvalidArgumentError:
          logging.warn(
              'TPU device does not support heartbeats. Failure '
              'handling will be disabled.')
          self._heartbeat_supported = False
      else:
        logging.warn(
            'No workers support hearbeats. Failure handling will be disabled.')
Example #14
0
def _get_timestamped_export_dir(export_dir_base):
  # When we create a timestamped directory, there is a small chance that the
  # directory already exists because another worker is also writing exports.
  # In this case we just wait one second to get a new timestamp and try again.
  # If this fails several times in a row, then something is seriously wrong.
  max_directory_creation_attempts = 10

  attempts = 0
  while attempts < max_directory_creation_attempts:
    export_timestamp = int(time.time())

    export_dir = os.path.join(
        compat.as_bytes(export_dir_base), compat.as_bytes(
            str(export_timestamp)))
    if not gfile.Exists(export_dir):
      # Collisions are still possible (though extremely unlikely): this
      # directory is not actually created yet, but it will be almost
      # instantly on return from this function.
      return export_dir
    time.sleep(1)
    attempts += 1
    logging.warn(
        "Export directory {} already exists; retrying (attempt {}/{})".format(
            export_dir, attempts, max_directory_creation_attempts))
  raise RuntimeError("Failed to obtain a unique export directory name after "
                     "{} attempts.".format(max_directory_creation_attempts))
Example #15
0
  def train(self, delay_secs=None):
    """Fit the estimator using the training data.

    Train the estimator for `self._train_steps` steps, after waiting for
    `delay_secs` seconds. If `self._train_steps` is `None`, train forever.

    Args:
      delay_secs: Start training after this many seconds.

    Returns:
      The trained estimator.
    """
    start = time.time()

    # Start the server, if needed. It's important to start the server before
    # we (optionally) sleep for the case where no device_filters are set.
    # Otherwise, the servers will wait to connect to each other before starting
    # to train. We might as well start as soon as we can.
    config = self._estimator.config
    if isinstance(config, run_config.RunConfig):
      if (config.cluster_spec and config.master and
          config.environment == run_config.Environment.LOCAL):
        logging.warn("ClusterSpec and master are provided, but environment is "
                     "set to 'local'. Set environment to 'cloud' if you intend "
                     "to use the distributed runtime.")
      if (config.environment != run_config.Environment.LOCAL and
          config.environment != run_config.Environment.GOOGLE and
          config.cluster_spec and config.master):
        self._start_server()
    elif config.cluster_spec and config.master:
      raise ValueError(
          "For distributed runtime, Experiment class only works with "
          "tf.contrib.learn.RunConfig for now, but provided {}".format(
              type(config)))

    extra_hooks = []
    if delay_secs is None:
      task_id = self._estimator.config.task_id or 0
      if self._delay_workers_by_global_step:
        # Wait 5500 global steps for the second worker. Each worker waits more
        # then previous one but with a diminishing number of steps.
        extra_hooks.append(
            basic_session_run_hooks.GlobalStepWaiterHook(
                int(8000.0 * math.log(task_id + 1))))
        delay_secs = 0
      else:
        # Wait 5 secs more for each new worker up to 60 secs.
        delay_secs = min(60, task_id * 5)

    if delay_secs > 0:
      elapsed_secs = time.time() - start
      remaining = delay_secs - elapsed_secs
      logging.info("Waiting %d secs before starting training.", remaining)
      time.sleep(delay_secs)

    return self._call_train(
        input_fn=self._train_input_fn,
        max_steps=self._train_steps,
        hooks=self._train_monitors + extra_hooks,
        saving_listeners=self._saving_listeners)
Example #16
0
  def _garbage_collect_exports(self, export_dir_base):
    """Deletes older exports, retaining only a given number of the most recent.

    Export subdirectories are assumed to be named with monotonically increasing
    integers; the most recent are taken to be those with the largest values.

    Args:
      export_dir_base: the base directory under which each export is in a
        versioned subdirectory.
    """
    if self._exports_to_keep is None:
      return

    def _export_version_parser(path):
      # create a simple parser that pulls the export_version from the directory.
      filename = os.path.basename(path.path)
      if not (len(filename) == 10 and filename.isdigit()):
        return None
      return path._replace(export_version=int(filename))

    # pylint: disable=protected-access
    keep_filter = gc._largest_export_versions(self._exports_to_keep)
    delete_filter = gc._negation(keep_filter)
    for p in delete_filter(
        gc._get_paths(export_dir_base, parser=_export_version_parser)):
      try:
        gfile.DeleteRecursively(p.path)
      except errors_impl.NotFoundError as e:
        tf_logging.warn('Can not delete %s recursively: %s', p.path, e)
Example #17
0
def additional_score_function_losses(sample_losses, name=None):
  with ops.op_scope(sample_losses, name, "SampleLosses"):
    fixed_losses = []
    if not isinstance(sample_losses, (list, tuple)):
      raise TypeError("sample_losses must be a list or tuple")
    for loss in sample_losses:
      if not isinstance(loss, ops.Tensor):
        raise TypeError("loss is not a Tensor: %s" % loss)
      ndims = loss.get_shape().ndims
      if not (ndims is not None and ndims <= 1):
        raise ValueError(
            "loss must be a scalar or batch-length vector loss: %s" % loss)
      fixed_losses.append(array_ops.stop_gradient(loss))

    stoch_dependencies_map = _stochastic_dependencies_map(fixed_losses)
    if not stoch_dependencies_map:
      logging.warn(
          "No collection of Stochastic Tensors found for current graph.")
      return []

    score_function_losses = []

    # Iterate through all of the stochastic dependencies, adding
    # surrogate terms where necessary.
    for (stoch_node, dependent_losses) in stoch_dependencies_map.items():
      score_function = stoch_node.score_function(list(dependent_losses))
      if score_function is not None:
        with ops.name_scope("ScoreFunction_%s" % stoch_node.name):
          score_function_losses.append(array_ops.identity(score_function))

    return score_function_losses
  def _testZeroDensity(self, alpha):
    """Zero isn't in the support of the gamma distribution.

    But quantized floating point math has its limits.
    TODO(bjp): Implement log-gamma sampler for small-shape distributions.

    Args:
      alpha: float shape value to test
    """
    try:
      from scipy import stats  # pylint: disable=g-import-not-at-top
    except ImportError as e:
      tf_logging.warn("Cannot test zero density proportions: %s" % e)
      return
    allowable_zeros = {
        dtypes.float16: stats.gamma(alpha).cdf(np.finfo(np.float16).tiny),
        dtypes.float32: stats.gamma(alpha).cdf(np.finfo(np.float32).tiny),
        dtypes.float64: stats.gamma(alpha).cdf(np.finfo(np.float64).tiny)
    }
    failures = []
    for use_gpu in [False, True]:
      for dt in dtypes.float16, dtypes.float32, dtypes.float64:
        sampler = self._Sampler(
            10000, alpha, 1.0, dt, use_gpu=use_gpu, seed=12345)
        x = sampler()
        allowable = allowable_zeros[dt] * x.size
        allowable = allowable * 2 if allowable < 10 else allowable * 1.05
        if np.sum(x <= 0) > allowable:
          failures += [(use_gpu, dt)]
      self.assertEqual([], failures)
Example #19
0
  def __init__(self, num_units, input_size=None,
               use_peepholes=False, cell_clip=None,
               initializer=None, num_proj=None, proj_clip=None,
               num_unit_shards=1, num_proj_shards=1,
               forget_bias=1.0, state_is_tuple=True,
               activation=tanh):
    """Initialize the parameters for an LSTM cell.

    Args:
      num_units: int, The number of units in the LSTM cell
      input_size: Deprecated and unused.
      use_peepholes: bool, set True to enable diagonal/peephole connections.
      cell_clip: (optional) A float value, if provided the cell state is clipped
        by this value prior to the cell output activation.
      initializer: (optional) The initializer to use for the weight and
        projection matrices.
      num_proj: (optional) int, The output dimensionality for the projection
        matrices.  If None, no projection is performed.
      proj_clip: (optional) A float value.  If `num_proj > 0` and `proj_clip` is
      provided, then the projected values are clipped elementwise to within
      `[-proj_clip, proj_clip]`.
      num_unit_shards: How to split the weight matrix.  If >1, the weight
        matrix is stored across num_unit_shards.
      num_proj_shards: How to split the projection matrix.  If >1, the
        projection matrix is stored across num_proj_shards.
      forget_bias: Biases of the forget gate are initialized by default to 1
        in order to reduce the scale of forgetting at the beginning of
        the training.
      state_is_tuple: If True, accepted and returned states are 2-tuples of
        the `c_state` and `m_state`.  If False, they are concatenated
        along the column axis.  This latter behavior will soon be deprecated.
      activation: Activation function of the inner states.
    """
    if not state_is_tuple:
      logging.warn("%s: Using a concatenated state is slower and will soon be "
                   "deprecated.  Use state_is_tuple=True.", self)
    if input_size is not None:
      logging.warn("%s: The input_size parameter is deprecated.", self)
    self._num_units = num_units
    self._use_peepholes = use_peepholes
    self._cell_clip = cell_clip
    self._initializer = initializer
    self._num_proj = num_proj
    self._proj_clip = proj_clip
    self._num_unit_shards = num_unit_shards
    self._num_proj_shards = num_proj_shards
    self._forget_bias = forget_bias
    self._state_is_tuple = state_is_tuple
    self._activation = activation

    if num_proj:
      self._state_size = (
          LSTMStateTuple(num_units, num_proj)
          if state_is_tuple else num_units + num_proj)
      self._output_size = num_proj
    else:
      self._state_size = (
          LSTMStateTuple(num_units, num_units)
          if state_is_tuple else 2 * num_units)
      self._output_size = num_units
Example #20
0
def _changing_default_center_bias():
  logging.warn(
      "Change warning: default value of `enable_centered_bias` will change"
      " after 2016-10-09. It will be disabled by default."
      "Instructions for keeping existing behaviour:\n"
      "Explicitly set `enable_centered_bias` to 'True' if you want to keep "
      "existing behaviour.")
Example #21
0
  def __init__(self, num_units, forget_bias=1.0, input_size=None,
               state_is_tuple=True, activation=tanh, reuse=None):
    """Initialize the basic LSTM cell.

    Args:
      num_units: int, The number of units in the LSTM cell.
      forget_bias: float, The bias added to forget gates (see above).
      input_size: Deprecated and unused.
      state_is_tuple: If True, accepted and returned states are 2-tuples of
        the `c_state` and `m_state`.  If False, they are concatenated
        along the column axis.  The latter behavior will soon be deprecated.
      activation: Activation function of the inner states.
      reuse: (optional) Python boolean describing whether to reuse variables
        in an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.
    """
    if not state_is_tuple:
      logging.warn("%s: Using a concatenated state is slower and will soon be "
                   "deprecated.  Use state_is_tuple=True.", self)
    if input_size is not None:
      logging.warn("%s: The input_size parameter is deprecated.", self)
    self._num_units = num_units
    self._forget_bias = forget_bias
    self._state_is_tuple = state_is_tuple
    self._activation = activation
    self._reuse = reuse
Example #22
0
  def __init__(self, num_units, recurrence_depth=1, transfer_bias=-2.0, input_size=None,
               state_is_tuple=False, activation=tanh):
    """Initialize the basic RHN cell.

    Args:
      num_units: int, The number of units per recurrence depth in the RHN cell.
      forget_bias: float, The bias added to forget gates (see above).
      recurrence_depth: int, Number of recurrent layers in the RHN network
      input_size: Deprecated and unused.
      state_is_tuple: If True, accepted and returned states are 1-tuple of
        the `m_state`.  By default (False).
        This default behavior will soon be deprecated.
      activation: Activation function of the inner states.
    """
    if not state_is_tuple:
      logging.warn(
          "%s: Using a concatenated state is slower and will soon be "
          "deprecated.  Use state_is_tuple=True." % self)
    if input_size is not None:
      logging.warn("%s: The input_size parameter is deprecated." % self)
    self._num_units = num_units
    self._recurrence_depth = recurrence_depth
    self._transfer_bias = transfer_bias
    self._state_is_tuple = state_is_tuple
    self._activation = activation
Example #23
0
  def __init__(self,
               cell,
               num_proj,
               activation=None,
               input_size=None,
               reuse=None):
    """Create a cell with input projection.

    Args:
      cell: an RNNCell, a projection of inputs is added before it.
      num_proj: Python integer.  The dimension to project to.
      activation: (optional) an optional activation function.
      input_size: Deprecated and unused.
      reuse: (optional) Python boolean describing whether to reuse variables
        in an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.

    Raises:
      TypeError: if cell is not an RNNCell.
    """
    super(InputProjectionWrapper, self).__init__(_reuse=reuse)
    if input_size is not None:
      logging.warn("%s: The input_size parameter is deprecated.", self)
    if not _like_rnncell(cell):
      raise TypeError("The parameter cell is not RNNCell.")
    self._cell = cell
    self._num_proj = num_proj
    self._activation = activation
    self._linear = None
Example #24
0
  def _testCompareToExplicitDerivative(self, dtype):
    """Compare to the explicit reparameterization derivative.

    Verifies that the computed derivative satisfies
    dsample / dalpha = d igammainv(alpha, u) / dalpha,
    where u = igamma(alpha, sample).

    Args:
      dtype: TensorFlow dtype to perform the computations in.
    """
    delta = 1e-3
    np_dtype = dtype.as_numpy_dtype
    try:
      from scipy import misc  # pylint: disable=g-import-not-at-top
      from scipy import special  # pylint: disable=g-import-not-at-top

      alpha_val = np.logspace(-2, 3, dtype=np_dtype)
      alpha = constant_op.constant(alpha_val)
      sample = random_ops.random_gamma([], alpha, np_dtype(1.0), dtype=dtype)
      actual = gradients_impl.gradients(sample, alpha)[0]

      (sample_val, actual_val) = self.evaluate((sample, actual))

      u = special.gammainc(alpha_val, sample_val)
      expected_val = misc.derivative(
          lambda alpha_prime: special.gammaincinv(alpha_prime, u),
          alpha_val, dx=delta * alpha_val)

      self.assertAllClose(actual_val, expected_val, rtol=1e-3, atol=1e-3)
    except ImportError as e:
      tf_logging.warn("Cannot use special functions in a test: %s" % str(e))
Example #25
0
  def raise_errors(self, timeout_sec=0):
    """Wait for up to `timeout` seconds for all error sources to finish.

    Preferentially raise "interesting" errors (errors not in the
    _UNINTERESTING_ERRORS) set.

    Args:
      timeout_sec: Seconds to wait for other error sources.
    """
    for _ in range(timeout_sec):
      if len(self._errors) == self._num_sources:
        break
      time.sleep(1)

    kept_errors = [(k, v) for (k, v) in self._errors.items() if v is not None]

    # First check for any interesting errors, then fall back on the session
    # cancelled errors etc.
    for k, (typ, value, traceback) in kept_errors:
      if isinstance(value, _UNINTERESTING_ERRORS):
        continue
      else:
        logging.warn('Reraising captured error')
        six.reraise(typ, value, traceback)

    for k, (typ, value, traceback) in kept_errors:
      logging.warn('Reraising captured error')
      six.reraise(typ, value, traceback)
Example #26
0
def _check_trt_version_compatibility():
  """Check compatibility of TensorRT version.

  Raises:
    RuntimeError: if the TensorRT library version is incompatible.
  """
  compiled_version = get_linked_tensorrt_version()
  loaded_version = get_loaded_tensorrt_version()
  tf_logging.info("Linked TensorRT version: %s" % str(compiled_version))
  tf_logging.info("Loaded TensorRT version: %s" % str(loaded_version))
  version_mismatch = False
  if loaded_version[0] < compiled_version[0]:
    tf_logging.error(
        "TensorRT version mismatch. Tensorflow was compiled against " +
        "TensorRT %s but library loaded from environment is TensorRT %s" %
        (".".join([str(x) for x in compiled_version]),
         ".".join([str(x) for x in loaded_version])) +
        ". Please make sure that correct version of TensorRT " +
        "is available in the system and added to ldconfig or LD_LIBRARY_PATH")
    raise RuntimeError("Incompatible TensorRT library version")
  for i in zip(loaded_version, compiled_version):
    if i[0] != i[1]:
      tf_logging.warn("TensorRT mismatch. Compiled against version " +
                      "%s, but loaded %s. Things may not work" %
                      (".".join([str(x) for x in compiled_version]),
                       ".".join([str(x) for x in loaded_version])))
      version_mismatch = True
      break
  if not version_mismatch:
    tf_logging.info("Running against TensorRT version %s" %
                    ".".join([str(x) for x in loaded_version]))
Example #27
0
  def __init__(self, num_units, forget_bias=1.0,
               state_is_tuple=True, activation=None, reuse=None):
    """Initialize the basic LSTM cell.

    Args:
      num_units: int, The number of units in the LSTM cell.
      forget_bias: float, The bias added to forget gates (see above).
        Must set to `0.0` manually when restoring from CudnnLSTM-trained
        checkpoints.
      state_is_tuple: If True, accepted and returned states are 2-tuples of
        the `c_state` and `m_state`.  If False, they are concatenated
        along the column axis.  The latter behavior will soon be deprecated.
      activation: Activation function of the inner states.  Default: `tanh`.
      reuse: (optional) Python boolean describing whether to reuse variables
        in an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.
    """
    super(BasicLSTMCell, self).__init__(_reuse=reuse)
    if not state_is_tuple:
      logging.warn("%s: Using a concatenated state is slower and will soon be "
                   "deprecated.  Use state_is_tuple=True.", self)
    self._num_units = num_units
    self._forget_bias = forget_bias
    self._state_is_tuple = state_is_tuple
    self._activation = activation or math_ops.tanh
 def __init__(self, num_units, input_size=None, activation=tanh, dt_tau=0.1, stddev=0.0, O=None):
   if input_size is not None:
     logging.warn("%s: The input_size parameter is deprecated." % self)
   self._num_units = num_units
   self._activation = activation
   self._dt_tau = dt_tau
   self._sigma = stddev*math.sqrt(2/dt_tau-1)
   self._O=O
Example #29
0
def _export_estimator(estimator,
                      export_dir,
                      signature_fn,
                      input_fn,
                      default_batch_size,
                      exports_to_keep):
  input_fn = input_fn or _default_input_fn
  checkpoint_path = tf_saver.latest_checkpoint(estimator._model_dir)
  with ops.Graph().as_default() as g:
    contrib_variables.create_global_step(g)
    examples = array_ops.placeholder(dtype=dtypes.string,
                                     shape=[default_batch_size],
                                     name='input_example_tensor')
    features = input_fn(estimator, examples)
    predictions = estimator._get_predict_ops(features)

    # Explicit signature_fn takes priority
    if signature_fn:
      default_signature, named_graph_signatures = signature_fn(examples,
                                                               features,
                                                               predictions)
    else:
      try:
        # Some estimators provide a target_column of known type
        target_column = estimator._get_target_column()
        problem_type = target_column.problem_type

        if problem_type == layers.ProblemType.CLASSIFICATION:
          signature_fn = classification_signature_fn
        elif problem_type == layers.ProblemType.LINEAR_REGRESSION:
          signature_fn = regression_signature_fn
        elif problem_type == layers.ProblemType.LOGISTIC_REGRESSION:
          signature_fn = logistic_regression_signature_fn
        else:
          raise ValueError(
              'signature_fn must be provided because the TargetColumn is a %s, '
              'which does not have a standard problem type and so cannot use a '
              'standard export signature.' % type(target_column).__name__)

        default_signature, named_graph_signatures = (
            signature_fn(examples, features, predictions))
      except AttributeError:
        logging.warn(
            'Change warning: `signature_fn` will be required after'
            '2016-08-01.\n'
            'Using generic signatures for now.  To maintain this behavior, '
            'pass:\n'
            '  signature_fn=export.generic_signature_fn\n'
            'Also consider passing a regression or classification signature; '
            'see cl/126430915 for an example.')
        default_signature, named_graph_signatures = generic_signature_fn(
            examples, features, predictions)
    if exports_to_keep is not None:
      exports_to_keep = gc.largest_export_versions(exports_to_keep)
    _export_graph(g, _get_saver(), checkpoint_path, export_dir,
                  default_graph_signature=default_signature,
                  named_graph_signatures=named_graph_signatures,
                  exports_to_keep=exports_to_keep)
Example #30
0
 def _log_abs_determinant(self):
   logging.warn(
       "Using (possibly slow) default implementation of determinant."
       "  Requires conversion to a dense matrix and O(N^3) operations.")
   if self._can_use_cholesky():
     diag = array_ops.matrix_diag_part(self._get_cached_chol())
     return 2 * math_ops.reduce_sum(math_ops.log(diag), reduction_indices=[-1])
   abs_det = math_ops.abs(self.determinant())
   return math_ops.log(abs_det)
    def __init__(self,
                 cell,
                 attn_inputs,
                 attn_size,
                 attn_vec_size,
                 output_size=None,
                 input_size=None,
                 state_is_tuple=True,
                 attn_masks=None,
                 merge_output_attn='linear',
                 reuse=None):
        """Create a cell with attention.

    Args:
      cell: an RNNCell, an attention is added to it.
      attn_inputs: a Tensor.
      attn_size: integer, the size of an attention vector. Equal to
        cell.output_size by default.
      attn_vec_size: integer, the number of convolutional features calculated on
        attention state and a size of the hidden layer built from base cell
        state. Equal to attn_size by default.
      input_size: integer, the size of a hidden linear layer, built from inputs
        and attention. Derived from the input tensor by default.
      state_is_tuple: If True, accepted and returned states are n-tuples, where
        `n = len(cells)`.  By default (False), the states are all concatenated
        along the column axis.
      attn_mask: mask that should be applied to attention. If None, no masks
        will be applied.
      reuse: (optional) Python boolean describing whether to reuse variables in
        an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.

    Raises:
      TypeError: if cell is not an RNNCell.
      ValueError: if cell returns a state tuple but the flag
          `state_is_tuple` is `False` or if attn_length is zero or less.
    """
        if not isinstance(cell, rnn.RNNCell):
            raise TypeError('The parameter cell is not RNNCell.')
        if nest.is_sequence(cell.state_size) and not state_is_tuple:
            raise ValueError('Cell returns tuple of states, but the flag '
                             'state_is_tuple is not set. State size is: %s' %
                             str(cell.state_size))
        if not state_is_tuple:
            logging.warn(
                '%s: Using a concatenated state is slower and will soon be '
                'deprecated.  Use state_is_tuple=True.', self)

        self._state_is_tuple = state_is_tuple

        if not state_is_tuple:
            raise NotImplementedError

        self._cell = cell
        self._input_size = input_size
        self._output_size = output_size
        if output_size is None:
            self._output_size = cell.output_size
        self._attn_size = attn_size
        self._reuse = reuse
        self._attn_inputs = attn_inputs
        self._attn_vec_size = attn_vec_size
        self.attn_masks = attn_masks
        self.merge_output_attn = merge_output_attn
    def _testMoments(self, dt):
        try:
            from scipy import stats  # pylint: disable=g-import-not-at-top
        except ImportError as e:
            tf_logging.warn("Cannot test moments: %s" % e)
            return

        # Check the given array of samples matches the given theoretical moment
        # function at different orders. The test is considered passing if the
        # z-tests of all statistical moments are all below z_limit.
        # Parameters:
        #   max_moments: the largest moments of the distribution to be tested
        #   stride: the distance between samples to check for statistical properties
        #       0 means the n-th moment of each sample
        #       any other strides tests for spatial correlation between samples;
        #   z_limit: the maximum z-test we would consider the test to pass;

        # The moments test is a z-value test.  This is the largest z-value
        # we want to tolerate. Since the z-test approximates a unit normal
        # distribution, it should almost definitely never exceed 6.
        z_limit = 6.0

        for stride in 0, 1, 4, 17:
            alphas = [0.2, 1.0, 3.0]
            if dt == dtypes.float64:
                alphas = [0.01] + alphas
            for alpha in alphas:
                for scale in 9, 17:
                    # Gamma moments only defined for values less than the scale param.
                    max_moment = min(6, scale // 2)
                    sampler = self._Sampler(20000,
                                            alpha,
                                            1 / scale,
                                            dt,
                                            use_gpu=False,
                                            seed=12345)
                    moments = [0] * (max_moment + 1)
                    moments_sample_count = [0] * (max_moment + 1)
                    x = np.array(sampler().flat)  # sampler does 10x samples
                    for k in range(len(x)):
                        moment = 1.
                        for i in range(max_moment + 1):
                            index = k + i * stride
                            if index >= len(x):
                                break
                            moments[i] += moment
                            moments_sample_count[i] += 1
                            moment *= x[index]
                    for i in range(max_moment + 1):
                        moments[i] /= moments_sample_count[i]
                    for i in range(1, max_moment + 1):
                        g = stats.gamma(alpha, scale=scale)
                        if stride == 0:
                            moments_i_mean = g.moment(i)
                            moments_i_squared = g.moment(2 * i)
                        else:
                            moments_i_mean = pow(g.moment(1), i)
                            moments_i_squared = pow(g.moment(2), i)
                        # Calculate moment variance safely:
                        # This is just
                        #  (moments_i_squared - moments_i_mean**2) / moments_sample_count[i]
                        normalized_moments_i_var = (
                            moments_i_mean / moments_sample_count[i] *
                            (moments_i_squared / moments_i_mean -
                             moments_i_mean))
                        # Assume every operation has a small numerical error.
                        # It takes i multiplications to calculate one i-th moment.
                        error_per_moment = i * np.finfo(dt.as_numpy_dtype).eps
                        total_variance = (normalized_moments_i_var +
                                          error_per_moment)
                        tiny = np.finfo(dt.as_numpy_dtype).tiny
                        self.assertGreaterEqual(total_variance, 0)
                        if total_variance < tiny:
                            total_variance = tiny
                        # z_test is approximately a unit normal distribution.
                        z_test = abs((moments[i] - moments_i_mean) /
                                     math.sqrt(total_variance))
                        self.assertLess(z_test, z_limit)
Example #33
0
    def __init__(self,
                 num_units,
                 use_peepholes=False,
                 cell_clip=None,
                 initializer=None,
                 num_proj=None,
                 proj_clip=None,
                 num_unit_shards=None,
                 num_proj_shards=None,
                 forget_bias=1.0,
                 state_is_tuple=True,
                 activation=None,
                 reuse=None):
        """Initialize the parameters for an LSTM cell.

    Args:
      num_units: int, The number of units in the LSTM cell
      use_peepholes: bool, set True to enable diagonal/peephole connections.
      cell_clip: (optional) A float value, if provided the cell state is clipped
        by this value prior to the cell output activation.
      initializer: (optional) The initializer to use for the weight and
        projection matrices.
      num_proj: (optional) int, The output dimensionality for the projection
        matrices.  If None, no projection is performed.
      proj_clip: (optional) A float value.  If `num_proj > 0` and `proj_clip` is
        provided, then the projected values are clipped elementwise to within
        `[-proj_clip, proj_clip]`.
      num_unit_shards: Deprecated, will be removed by Jan. 2017.
        Use a variable_scope partitioner instead.
      num_proj_shards: Deprecated, will be removed by Jan. 2017.
        Use a variable_scope partitioner instead.
      forget_bias: Biases of the forget gate are initialized by default to 1
        in order to reduce the scale of forgetting at the beginning of
        the training. Must set it manually to `0.0` when restoring from
        CudnnLSTM trained checkpoints.
      state_is_tuple: If True, accepted and returned states are 2-tuples of
        the `c_state` and `m_state`.  If False, they are concatenated
        along the column axis.  This latter behavior will soon be deprecated.
      activation: Activation function of the inner states.  Default: `tanh`.
      reuse: (optional) Python boolean describing whether to reuse variables
        in an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.

      When restoring from CudnnLSTM-trained checkpoints, must use
      CudnnCompatibleLSTMCell instead.
    """
        super(LSTMCell, self).__init__(_reuse=reuse)
        if not state_is_tuple:
            logging.warn(
                "%s: Using a concatenated state is slower and will soon be "
                "deprecated.  Use state_is_tuple=True.", self)
        if num_unit_shards is not None or num_proj_shards is not None:
            logging.warn(
                "%s: The num_unit_shards and proj_unit_shards parameters are "
                "deprecated and will be removed in Jan 2017.  "
                "Use a variable scope with a partitioner instead.", self)

        self._num_units = num_units
        self._use_peepholes = use_peepholes
        self._cell_clip = cell_clip
        self._initializer = initializer
        self._num_proj = num_proj
        self._proj_clip = proj_clip
        self._num_unit_shards = num_unit_shards
        self._num_proj_shards = num_proj_shards
        self._forget_bias = forget_bias
        self._state_is_tuple = state_is_tuple
        self._activation = activation or math_ops.tanh

        if num_proj:
            self._state_size = (LSTMStateTuple(num_units, num_proj)
                                if state_is_tuple else num_units + num_proj)
            self._output_size = num_proj
        else:
            self._state_size = (LSTMStateTuple(num_units, num_units)
                                if state_is_tuple else 2 * num_units)
            self._output_size = num_units
Example #34
0
    def train(self, delay_secs=None):
        """Fit the estimator using the training data.

    Train the estimator for `self._train_steps` steps, after waiting for
    `delay_secs` seconds. If `self._train_steps` is `None`, train forever.

    Args:
      delay_secs: Start training after this many seconds.

    Returns:
      The trained estimator.
    """
        start = time.time()

        # Start the server, if needed. It's important to start the server before
        # we (optionally) sleep for the case where no device_filters are set.
        # Otherwise, the servers will wait to connect to each other before starting
        # to train. We might as well start as soon as we can.
        config = self._estimator.config
        if isinstance(config, run_config.RunConfig):
            if (config.cluster_spec and config.master
                    and config.environment == run_config.Environment.LOCAL):
                logging.warn(
                    "ClusterSpec and master are provided, but environment is "
                    "set to 'local'. Set environment to 'cloud' if you intend "
                    "to use the distributed runtime.")
            if (config.environment != run_config.Environment.LOCAL
                    and config.environment != run_config.Environment.GOOGLE
                    and config.cluster_spec and config.master):
                self._start_server()
        elif config.cluster_spec and config.master:
            raise ValueError(
                "For distributed runtime, Experiment class only works with"
                "tf.contrib.learn.RunConfig for now, but provided {}".format(
                    type(config)))

        extra_hooks = []
        if delay_secs is None:
            task_id = self._estimator.config.task_id or 0
            if self._delay_workers_by_global_step:
                # Wait 5500 global steps for the second worker. Each worker waits more
                # then previous one but with a diminishing number of steps.
                extra_hooks.append(
                    basic_session_run_hooks.GlobalStepWaiterHook(
                        int(8000.0 * math.log(task_id + 1))))
                delay_secs = 0
            else:
                # Wait 5 secs more for each new worker up to 60 secs.
                delay_secs = min(60, task_id * 5)

        if delay_secs > 0:
            elapsed_secs = time.time() - start
            remaining = delay_secs - elapsed_secs
            logging.info("Waiting %d secs before starting training.",
                         remaining)
            time.sleep(delay_secs)

        return self._call_train(input_fn=self._train_input_fn,
                                max_steps=self._train_steps,
                                hooks=self._train_monitors + extra_hooks,
                                saving_listeners=self._saving_listeners)
Example #35
0
def _export_estimator(estimator,
                      export_dir,
                      signature_fn,
                      input_fn,
                      default_batch_size,
                      exports_to_keep,
                      input_feature_key=None,
                      use_deprecated_input_fn=True,
                      prediction_key=None,
                      checkpoint_path=None):
    if use_deprecated_input_fn:
        input_fn = input_fn or _default_input_fn
    elif input_fn is None:
        raise ValueError('input_fn must be defined.')

    # If checkpoint_path is specified, use the specified checkpoint path.
    checkpoint_path = (checkpoint_path
                       or tf_saver.latest_checkpoint(estimator._model_dir))
    with ops.Graph().as_default() as g:
        training_util.create_global_step(g)

        if use_deprecated_input_fn:
            examples = array_ops.placeholder(dtype=dtypes.string,
                                             shape=[default_batch_size],
                                             name='input_example_tensor')
            features = input_fn(estimator, examples)
        else:
            features, _ = input_fn()
            examples = None
            if input_feature_key is not None:
                examples = features.pop(input_feature_key)

        if (not features) and (examples is None):
            raise ValueError('Either features or examples must be defined.')

        predictions = estimator._get_predict_ops(features).predictions

        if prediction_key is not None:
            predictions = predictions[prediction_key]

        # Explicit signature_fn takes priority
        if signature_fn:
            default_signature, named_graph_signatures = signature_fn(
                examples, features, predictions)
        else:
            try:
                # Some estimators provide a signature function.
                # TODO(zakaria): check if the estimator has this function,
                #   raise helpful error if not
                signature_fn = estimator._create_signature_fn()

                default_signature, named_graph_signatures = (signature_fn(
                    examples, features, predictions))
            except AttributeError:
                logging.warn(
                    'Change warning: `signature_fn` will be required after'
                    '2016-08-01.\n'
                    'Using generic signatures for now.  To maintain this behavior, '
                    'pass:\n'
                    '  signature_fn=export.generic_signature_fn\n'
                    'Also consider passing a regression or classification signature; '
                    'see cl/126430915 for an example.')
                default_signature, named_graph_signatures = generic_signature_fn(
                    examples, features, predictions)
        if exports_to_keep is not None:
            exports_to_keep = gc.largest_export_versions(exports_to_keep)
        return _export_graph(g,
                             _get_saver(),
                             checkpoint_path,
                             export_dir,
                             default_graph_signature=default_signature,
                             named_graph_signatures=named_graph_signatures,
                             exports_to_keep=exports_to_keep)
  def __init__(self, num_units, input_size=None,
               use_peepholes=False, cell_clip=None,
               initializer=None, num_proj=None, proj_clip=None,
               num_unit_shards=None, num_proj_shards=None,
               forget_bias=1.0, state_is_tuple=True,
               activation=tanh):
    """Initialize the parameters for an LSTM cell.

    Args:
      num_units: int, The number of units in the LSTM cell
      input_size: Deprecated and unused.
      use_peepholes: bool, set True to enable diagonal/peephole connections.
      cell_clip: (optional) A float value, if provided the cell state is clipped
        by this value prior to the cell output activation.
      initializer: (optional) The initializer to use for the weight and
        projection matrices.
      num_proj: (optional) int, The output dimensionality for the projection
        matrices.  If None, no projection is performed.
      proj_clip: (optional) A float value.  If `num_proj > 0` and `proj_clip` is
        provided, then the projected values are clipped elementwise to within
        `[-proj_clip, proj_clip]`.
      num_unit_shards: Deprecated, will be removed by Jan. 2017.
        Use a variable_scope partitioner instead.
      num_proj_shards: Deprecated, will be removed by Jan. 2017.
        Use a variable_scope partitioner instead.
      forget_bias: Biases of the forget gate are initialized by default to 1
        in order to reduce the scale of forgetting at the beginning of
        the training.
      state_is_tuple: If True, accepted and returned states are 2-tuples of
        the `c_state` and `m_state`.  If False, they are concatenated
        along the column axis.  This latter behavior will soon be deprecated.
      activation: Activation function of the inner states.
    """
    if not state_is_tuple:
      logging.warn("%s: Using a concatenated state is slower and will soon be "
                   "deprecated.  Use state_is_tuple=True.", self)
    if input_size is not None:
      logging.warn("%s: The input_size parameter is deprecated.", self)
    if num_unit_shards is not None or num_proj_shards is not None:
      logging.warn(
          "%s: The num_unit_shards and proj_unit_shards parameters are "
          "deprecated and will be removed in Jan 2017.  "
          "Use a variable scope with a partitioner instead.", self)

    self._num_units = num_units
    self._use_peepholes = use_peepholes
    self._cell_clip = cell_clip
    self._initializer = initializer
    self._num_proj = num_proj
    self._proj_clip = proj_clip
    self._num_unit_shards = num_unit_shards
    self._num_proj_shards = num_proj_shards
    self._forget_bias = forget_bias
    self._state_is_tuple = state_is_tuple
    self._activation = activation

    if num_proj:
      self._state_size = (
          LSTMStateTuple(num_units, num_proj)
          if state_is_tuple else num_units + num_proj)
      self._output_size = num_proj
    else:
      self._state_size = (
          LSTMStateTuple(num_units, num_units)
          if state_is_tuple else 2 * num_units)
      self._output_size = num_units
Example #37
0
def _pfor_impl(loop_fn,
               iters,
               fallback_to_while_loop,
               parallel_iterations=None,
               pfor_config=None):
  """Implementation of pfor."""
  assert not context.executing_eagerly()
  loop_fn_has_config = _loop_fn_has_config(loop_fn)
  existing_ops = set(ops.get_default_graph().get_operations())
  # Run the loop body
  with ops.name_scope("loop_body"):
    loop_var = array_ops.placeholder_with_default(0, shape=[])
    if loop_fn_has_config:
      if pfor_config is None:
        pfor_config = PForConfig()
        pfor_config._set_iters(iters)  # pylint: disable=protected-access
      loop_fn_outputs = loop_fn(loop_var, **{PFOR_CONFIG_ARG: pfor_config})
    else:
      assert pfor_config is None
      loop_fn_outputs = loop_fn(loop_var)

  # Convert outputs to Tensor if needed.
  rewrap_as_ndarray = False
  tmp_loop_fn_outputs = []
  for loop_fn_output in nest.flatten(loop_fn_outputs):
    if (loop_fn_output is not None and not isinstance(
        loop_fn_output,
        (ops.Operation, ops.Tensor, sparse_tensor.SparseTensor))):
      if isinstance(loop_fn_output, indexed_slices.IndexedSlices):
        logging.warn("Converting %s to a dense representation may make it slow."
                     " Alternatively, output the indices and values of the"
                     " IndexedSlices separately, and handle the vectorized"
                     " outputs directly." % loop_fn_output)
        loop_fn_output = ops.convert_to_tensor(loop_fn_output)
      elif isinstance(loop_fn_output, np_arrays.ndarray):
        loop_fn_output = loop_fn_output.data
        rewrap_as_ndarray = True
      else:
        loop_fn_output = ops.convert_to_tensor(loop_fn_output)
    tmp_loop_fn_outputs.append(loop_fn_output)
  loop_fn_outputs = nest.pack_sequence_as(loop_fn_outputs, tmp_loop_fn_outputs)

  new_ops = set(ops.get_default_graph().get_operations()) - existing_ops
  iters = ops.convert_to_tensor(iters)
  if parallel_iterations is not None:
    if parallel_iterations < 1:
      raise ValueError("parallel_iterations must be None or a positive integer")
    if parallel_iterations == 1:
      raise ValueError("Found parallel_iterations == 1. Use for_loop instead.")
    iters_value = tensor_util.constant_value(iters)
    if iters_value is not None and iters_value < parallel_iterations:
      parallel_iterations = None
  if parallel_iterations is None:
    with ops.name_scope("pfor"):
      converter = PFor(loop_var, iters, new_ops,
                       fallback_to_while_loop=fallback_to_while_loop,
                       pfor_config=pfor_config)
      outputs = []
      for loop_fn_output in nest.flatten(loop_fn_outputs):
        output = converter.convert(loop_fn_output)
        if rewrap_as_ndarray:
          output = np_arrays.tensor_to_ndarray(output)
        outputs.append(output)
      return nest.pack_sequence_as(loop_fn_outputs, outputs)
  else:
    if pfor_config is not None and pfor_config._has_reductions():  # pylint: disable=protected-access
      raise ValueError("Setting parallel_iterations currently unsupported if"
                       " reductions across iterations are performed.")
    num_tiled_iterations = iters // parallel_iterations
    num_remaining_iterations = iters % parallel_iterations
    # TODO(agarwal): Avoid calling loop_fn twice. Generate the loop body inside
    # a tf.function and extract the graph from there to vectorize it.
    with ops.name_scope("pfor_untiled"):
      converter = PFor(loop_var, num_remaining_iterations, new_ops,
                       fallback_to_while_loop=fallback_to_while_loop,
                       pfor_config=pfor_config)
      remaining_outputs = []
      flattened_loop_fn_outputs = nest.flatten(loop_fn_outputs)
      for loop_fn_output in flattened_loop_fn_outputs:
        output = converter.convert(loop_fn_output)
        if rewrap_as_ndarray:
          output = np_arrays.tensor_to_ndarray(output)
        remaining_outputs.append(output)

    with ops.name_scope("pfor_tiled"):
      loop_fn_dtypes = [ops.convert_to_tensor(x).dtype
                        for x in flattened_loop_fn_outputs]

      def tiled_loop_body(j):
        offset = j * parallel_iterations + num_remaining_iterations

        def tiled_loop_fn(i, pfor_config=None):
          if loop_fn_has_config:
            return nest.flatten(loop_fn(i + offset, pfor_config=pfor_config))
          else:
            return nest.flatten(loop_fn(i + offset))

        return _pfor_impl(
            tiled_loop_fn,
            parallel_iterations,
            fallback_to_while_loop=fallback_to_while_loop,
            pfor_config=pfor_config)

      tiled_outputs = for_loop(tiled_loop_body, loop_fn_dtypes,
                               num_tiled_iterations, parallel_iterations=1)
      tiled_outputs = [_flatten_first_two_dims(y) for y in tiled_outputs]

    with ops.name_scope("pfor"):
      iters_value = tensor_util.constant_value(iters)
      if iters_value is None or iters_value % parallel_iterations:
        outputs = control_flow_ops.cond(
            math_ops.equal(num_remaining_iterations, 0),
            lambda: tiled_outputs,
            lambda: [array_ops.concat([x, y], axis=0)
                     for x, y in zip(remaining_outputs, tiled_outputs)])
      else:
        outputs = tiled_outputs
      flattened_outputs = nest.flatten(outputs)
      if rewrap_as_ndarray:
        flattened_outputs = [
            np_arrays.tensor_to_ndarray(x) for x in flattened_outputs]
      return nest.pack_sequence_as(loop_fn_outputs, nest.flatten(outputs))
Example #38
0
def _einsum_v1(equation, *inputs, **kwargs):
    """Legacy implementation of einsum without using EinsumOp."""
    name = kwargs.pop('name', None)
    if kwargs:
        raise TypeError(
            'invalid keyword arguments for this function: ' +
            ', '.join([format(key) for key in sorted(list(kwargs.keys()))]))
    with ops.name_scope(name, 'einsum', [equation, inputs]) as name:
        inputs = list(inputs)
        input_shapes = [x.shape for x in inputs]
        input_axis_labels, output_axis_labels = (
            _einsum_v1_parse_and_resolve_equation(equation, input_shapes))

        axis_labels = set(''.join(input_axis_labels) + output_axis_labels)

        for a in axis_labels:
            for input_labels in input_axis_labels:
                if (len(input_axis_labels) == 1 and input_labels.count(a) == 2
                        and input_labels == input_labels[::-1]
                        and '->' not in equation):
                    return math_ops.trace(inputs[0])
                if input_labels.count(a) > 1:
                    raise ValueError(
                        'Subscript not supported: an axis appears more than once: %s'
                        % input_labels)
        for a in axis_labels:
            input_count = sum(1 for s in input_axis_labels if a in s)
            if input_count > 2 and a not in output_axis_labels:
                logging.warn(
                    'Falling back to exponential-space implementation of einsum()'
                    ' because index "%s" is summed over more than two inputs.',
                    a)
                return _exponential_space_einsum_v1(equation, *inputs)

        # Use xla_einsum if executing on TPU and if the operation is a 2 input
        # einsum supported by XlaEinsumOp.
        if _enclosing_tpu_context() is not None and len(inputs) == 2:
            return gen_xla_ops.xla_einsum(
                inputs[0], inputs[1], input_axis_labels[0] + ',' +
                input_axis_labels[1] + '->' + output_axis_labels)
        temp = inputs[0]
        temp_axis_labels = input_axis_labels[0]
        for i in xrange(len(inputs) - 1):
            axes_to_sum = (
                set(temp_axis_labels)
                & set(input_axis_labels[i + 1]) - set(output_axis_labels))
            temp, temp_axis_labels = _einsum_v1_reduction(
                temp, temp_axis_labels, inputs[i + 1],
                input_axis_labels[i + 1], axes_to_sum)

        missing_indices = set(temp_axis_labels) - set(output_axis_labels)
        if missing_indices:
            axis = [
                i for i, a in enumerate(temp_axis_labels)
                if a not in output_axis_labels
            ]
            temp = math_ops.reduce_sum(temp, axis=axis)
            temp_axis_labels = ''.join(a for a in temp_axis_labels
                                       if a in output_axis_labels)
        if sorted(temp_axis_labels) != sorted(output_axis_labels):
            raise ValueError('Invalid equation: %s' % equation)

        perm = [temp_axis_labels.index(a) for a in output_axis_labels]
        return _transpose_if_necessary(temp, perm)
Example #39
0
def _pfor_impl(loop_fn,
               iters,
               fallback_to_while_loop,
               parallel_iterations=None,
               pfor_config=None):
    """Implementation of pfor."""
    assert not context.executing_eagerly()
    loop_fn_has_config = _loop_fn_has_config(loop_fn)
    existing_ops = set(ops.get_default_graph().get_operations())
    iters_value = tensor_util.constant_value(iters)
    # Run the loop body
    with ops.name_scope("loop_body"):
        loop_var = array_ops.placeholder_with_default(0, shape=[])
        if loop_fn_has_config:
            if pfor_config is None:
                pfor_config = PForConfig()
                pfor_config._set_iters(iters)  # pylint: disable=protected-access
            loop_fn_outputs = loop_fn(loop_var,
                                      **{PFOR_CONFIG_ARG: pfor_config})
        else:
            assert pfor_config is None
            f = autograph.tf_convert(loop_fn,
                                     autograph_ctx.control_status_ctx())
            loop_fn_outputs = f(loop_var)
        loop_fn_output_tensors = nest.map_structure(_composite_to_tensors,
                                                    loop_fn_outputs)

    # Convert outputs to Tensor if needed.
    tmp_loop_fn_outputs = []
    for loop_fn_output in nest.flatten(loop_fn_output_tensors):
        if (loop_fn_output is not None and not isinstance(
                loop_fn_output,
            (ops.Operation, ops.Tensor, sparse_tensor.SparseTensor))):
            if isinstance(loop_fn_output, indexed_slices.IndexedSlices):
                logging.warn(
                    "Converting %s to a dense representation may make it slow."
                    " Alternatively, output the indices and values of the"
                    " IndexedSlices separately, and handle the vectorized"
                    " outputs directly." % loop_fn_output)
                loop_fn_output = ops.convert_to_tensor(loop_fn_output)
            else:
                loop_fn_output = ops.convert_to_tensor(loop_fn_output)
        tmp_loop_fn_outputs.append(loop_fn_output)
    loop_fn_output_tensors = nest.pack_sequence_as(loop_fn_output_tensors,
                                                   tmp_loop_fn_outputs)

    new_ops = set(ops.get_default_graph().get_operations()) - existing_ops
    iters = ops.convert_to_tensor(iters)
    if parallel_iterations is not None:
        if parallel_iterations < 1:
            raise ValueError(
                "Argument `parallel_iterations` must be None or a positive integer. "
                f"Received: {parallel_iterations}.")
        if parallel_iterations == 1:
            raise ValueError(
                "Found `parallel_iterations == 1`. Use `for_loop` instead.")
        if iters_value is not None and iters_value < parallel_iterations:
            parallel_iterations = None
    if parallel_iterations is None:
        with ops.name_scope("pfor"):
            converter = PFor(loop_var,
                             iters,
                             new_ops,
                             fallback_to_while_loop=fallback_to_while_loop,
                             pfor_config=pfor_config)
            flattened_output_tensors = []
            for loop_fn_output in nest.flatten(loop_fn_output_tensors):
                output = converter.convert(loop_fn_output)
                flattened_output_tensors.append(output)
    else:
        if pfor_config is not None and pfor_config._has_reductions():  # pylint: disable=protected-access
            raise ValueError(
                "Setting `parallel_iterations` currently unsupported if "
                "reductions across iterations are performed.")
        num_tiled_iterations = iters // parallel_iterations
        num_remaining_iterations = iters % parallel_iterations
        # TODO(agarwal): Avoid calling loop_fn twice. Generate the loop body inside
        # a tf.function and extract the graph from there to vectorize it.
        with ops.name_scope("pfor_untiled"):
            converter = PFor(loop_var,
                             num_remaining_iterations,
                             new_ops,
                             fallback_to_while_loop=fallback_to_while_loop,
                             pfor_config=pfor_config)
            remaining_output_tensors = []
            flattened_output_tensors = nest.flatten(loop_fn_output_tensors)
            for loop_fn_output in flattened_output_tensors:
                output = converter.convert(loop_fn_output)
                remaining_output_tensors.append(output)

        with ops.name_scope("pfor_tiled"):
            loop_fn_dtypes = [
                ops.convert_to_tensor(x).dtype
                for x in flattened_output_tensors
            ]

            def tiled_loop_body(j):
                offset = j * parallel_iterations + num_remaining_iterations

                def tiled_loop_fn(i, pfor_config=None):
                    if loop_fn_has_config:
                        loop_fn_outputs = loop_fn(i + offset,
                                                  pfor_config=pfor_config)
                    else:
                        loop_fn_outputs = loop_fn(i + offset)
                    return nest.flatten(
                        # Stacking across iterations requires explicit Tensors.
                        nest.map_structure(_composite_to_tensors,
                                           loop_fn_outputs))

                return _pfor_impl(
                    tiled_loop_fn,
                    parallel_iterations,
                    fallback_to_while_loop=fallback_to_while_loop,
                    pfor_config=pfor_config)

            tiled_output_tensors = for_loop(tiled_loop_body,
                                            loop_fn_dtypes,
                                            num_tiled_iterations,
                                            parallel_iterations=1)
            tiled_output_tensors = [
                _flatten_first_two_dims(y) for y in tiled_output_tensors
            ]

        with ops.name_scope("pfor"):
            if iters_value is None or iters_value % parallel_iterations:
                output_tensors = control_flow_ops.cond(
                    math_ops.equal(num_remaining_iterations, 0),
                    lambda: tiled_output_tensors,
                    lambda: [
                        array_ops.concat([x, y], axis=0)  # pylint: disable=g-long-lambda
                        for x, y in zip(remaining_output_tensors,
                                        tiled_output_tensors)
                    ])
            else:
                output_tensors = tiled_output_tensors
            flattened_output_tensors = nest.flatten(output_tensors)

            for output, original_output in zip(
                    flattened_output_tensors,
                    nest.flatten(loop_fn_output_tensors)):
                # Restore any shape information lost from tiling.
                # TODO(b/174254748): this may not be correct for stacked `variant`s.
                output.set_shape(
                    tensor_shape.TensorShape([iters_value]).concatenate(
                        original_output.shape))

    return nest.map_structure_up_to(
        loop_fn_outputs,
        functools.partial(_composite_from_tensors, batch_size=iters_value),
        nest.pack_sequence_as(loop_fn_output_tensors,
                              flattened_output_tensors), loop_fn_outputs)
Example #40
0
def evaluate(graph,
             output_dir,
             checkpoint_path,
             eval_dict,
             update_op=None,
             global_step_tensor=None,
             supervisor_master='',
             log_every_steps=10,
             feed_fn=None,
             max_steps=None):
  """Evaluate a model loaded from a checkpoint.

  Given `graph`, a directory to write summaries to (`output_dir`), a checkpoint
  to restore variables from, and a `dict` of `Tensor`s to evaluate, run an eval
  loop for `max_steps` steps, or until an exception (generally, an
  end-of-input signal from a reader operation) is raised from running
  `eval_dict`.

  In each step of evaluation, all tensors in the `eval_dict` are evaluated, and
  every `log_every_steps` steps, they are logged. At the very end of evaluation,
  a summary is evaluated (finding the summary ops using `Supervisor`'s logic)
  and written to `output_dir`.

  Args:
    graph: A `Graph` to train. It is expected that this graph is not in use
      elsewhere.
    output_dir: A string containing the directory to write a summary to.
    checkpoint_path: A string containing the path to a checkpoint to restore.
      Can be `None` if the graph doesn't require loading any variables.
    eval_dict: A `dict` mapping string names to tensors to evaluate. It is
      evaluated in every logging step. The result of the final evaluation is
      returned. If `update_op` is None, then it's evaluated in every step. If
      `max_steps` is `None`, this should depend on a reader that will raise an
      end-of-inupt exception when the inputs are exhausted.
    update_op: A `Tensor` which is run in every step.
    global_step_tensor: A `Variable` containing the global step. If `None`,
      one is extracted from the graph using the same logic as in `Supervisor`.
      Used to place eval summaries on training curves.
    supervisor_master: The master string to use when preparing the session.
    log_every_steps: Integer. Output logs every `log_every_steps` evaluation
      steps. The logs contain the `eval_dict` and timing information.
    feed_fn: A function that is called every iteration to produce a `feed_dict`
      passed to `session.run` calls. Optional.
    max_steps: Integer. Evaluate `eval_dict` this many times.

  Returns:
    A tuple `(eval_results, global_step)`:
    eval_results: A `dict` mapping `string` to numeric values (`int`, `float`)
      that are the result of running eval_dict in the last step. `None` if no
      eval steps were run.
    global_step: The global step this evaluation corresponds to.

  Raises:
    ValueError: if `output_dir` is empty.
  """
  if not output_dir:
    raise ValueError('Output directory should be non-empty %s.' % output_dir)
  with graph.as_default():
    global_step_tensor = contrib_variables.assert_or_get_global_step(
        graph, global_step_tensor)

    # Create or get summary op, global_step and saver.
    saver = _get_saver()
    local_init_op = _get_local_init_op()
    ready_op = _get_ready_op()

    session_manager = session_manager_lib.SessionManager(
        local_init_op=local_init_op,
        ready_op=ready_op)
    session, initialized = session_manager.recover_session(
        master=supervisor_master,
        saver=saver,
        checkpoint_dir=checkpoint_path)

    # Start queue runners.
    coord = coordinator.Coordinator()
    threads = queue_runner.start_queue_runners(session, coord)

  with session:
    if not initialized:
      logging.warning('Failed to initialize from %s.', checkpoint_path)
      # TODO(ipolosukhin): This should be failing, but old code relies on that.
      session.run(variables.initialize_all_variables())
      if checkpoint_path:
        _restore_from_checkpoint(session, graph, checkpoint_path, saver)

    current_global_step = session.run(global_step_tensor)
    eval_results = None
    # TODO(amodei): Fix this to run through the eval set exactly once.
    step = 0
    eval_step = None
    feed_dict = None
    logging.info('Eval steps [%d,%s) for training step %d.', step,
                 'inf' if max_steps is None
                 else str(max_steps), current_global_step)
    try:
      try:
        while (max_steps is None) or (step < max_steps):
          step += 1
          start_time = time.time()
          feed_dict = feed_fn() if feed_fn is not None else None
          if update_op is not None:
            session.run(update_op, feed_dict=feed_dict)
          else:
            eval_results = session.run(eval_dict, feed_dict=feed_dict)
            eval_step = step

          # TODO(wicke): We should assert that the global step hasn't changed.
          if step % log_every_steps == 0:
            if eval_step is None or step != eval_step:
              eval_results = session.run(eval_dict, feed_dict=feed_dict)
              eval_step = step
            duration = time.time() - start_time
            logging.info('Results after %d steps (%.3f sec/batch): %s.',
                         step, float(duration),
                         _eval_results_to_str(eval_results))
      finally:
        if eval_results is None or step != eval_step:
          eval_results = session.run(eval_dict, feed_dict=feed_dict)
          eval_step = step
        # Stop session first, before queue runners.
        session.close()

        # Stop queue runners.
        try:
          coord.request_stop()
          coord.join(threads, stop_grace_period_secs=120)
        except (RuntimeError, errors.CancelledError) as e:
          logging.warning('Coordinator didn\'t stop cleanly: %s', e)

    # catch OutOfRangeError which is thrown when queue is out of data (and for
    # other reasons as well).
    except errors.OutOfRangeError as e:
      if max_steps is None:
        logging.info('Input queue is exhausted.')
      else:
        logging.warn('Input queue is exhausted: %s.', e)
    # catch StopIteration which is thrown is DataReader is out of data.
    except StopIteration as e:
      if max_steps is None:
        logging.info('Input iterator is exhausted.')
      else:
        logging.warn('Input iterator is exhausted: %s.', e)

  # Save summaries for this evaluation.
  _write_summary_results(output_dir, eval_results, current_global_step)

  return eval_results, current_global_step
Example #41
0
def _train_internal(graph,
                    output_dir,
                    train_op,
                    loss_op,
                    global_step_tensor,
                    init_op,
                    init_feed_dict,
                    init_fn,
                    log_every_steps,
                    supervisor_is_chief,
                    supervisor_master,
                    supervisor_save_model_secs,
                    keep_checkpoint_max,
                    supervisor_save_summaries_steps,
                    feed_fn,
                    steps,
                    fail_on_nan_loss,
                    monitors,
                    max_steps):
  """See train."""
  if (steps is not None) and (max_steps is not None):
    raise ValueError('Can not provide both steps and max_steps.')
  if not output_dir:
    raise ValueError('Output directory should be non-empty %s.' % output_dir)
  if train_op is None:
    raise ValueError('Missing train_op.')
  if loss_op is None:
    raise ValueError('Missing loss_op.')

  with graph.as_default():
    global_step_tensor = contrib_variables.assert_or_get_global_step(
        graph, global_step_tensor)
    if global_step_tensor is None:
      raise ValueError('No "global_step" was provided or found in the graph.')

    # Get current step.
    try:
      start_step = checkpoints.load_variable(
          output_dir, global_step_tensor.name)
    except (errors.NotFoundError, ValueError):
      start_step = 0

    summary_writer = (get_summary_writer(output_dir)
                      if supervisor_is_chief else None)

    # Add default chief monitors if none were provided.
    if not monitors:
      monitors = monitors_lib.get_default_monitors(
          loss_op=loss_op,
          summary_op=logging_ops.get_summary_op(),
          save_summary_steps=supervisor_save_summaries_steps,
          summary_writer=summary_writer) if supervisor_is_chief else []

    # TODO(ipolosukhin): Replace all functionality of Supervisor
    # with Chief-Exclusive Monitors.
    if not supervisor_is_chief:
      # Prune list of monitor to the ones runnable on all workers.
      monitors = [monitor for monitor in monitors if monitor.run_on_all_workers]

    if max_steps is None:
      max_steps = (start_step + steps) if steps else None
    # Start monitors, can create graph parts.
    for monitor in monitors:
      monitor.begin(max_steps=max_steps)

  supervisor = tf_supervisor.Supervisor(
      graph,
      init_op=init_op or tf_supervisor.Supervisor.USE_DEFAULT,
      init_feed_dict=init_feed_dict,
      is_chief=supervisor_is_chief,
      logdir=output_dir,
      saver=_make_saver(graph, keep_checkpoint_max),
      global_step=global_step_tensor,
      summary_op=None,
      summary_writer=summary_writer,
      save_model_secs=supervisor_save_model_secs,
      init_fn=init_fn)
  session = supervisor.PrepareSession(master=supervisor_master,
                                      start_standard_services=True)
  supervisor.StartQueueRunners(session)

  with session:
    get_current_step = lambda: session.run(global_step_tensor)

    start_step = get_current_step()
    last_step = start_step
    last_log_step = start_step
    loss_value = None
    logging.info('Training steps [%d,%s)', last_step, 'inf'
                 if max_steps is None else str(max_steps))

    excinfo = None
    try:
      while not supervisor.ShouldStop() and (
          (max_steps is None) or (last_step < max_steps)):
        start_time = time.time()
        feed_dict = feed_fn() if feed_fn is not None else None

        outputs, should_stop = _run_with_monitors(
            session, last_step + 1, [train_op, loss_op], feed_dict, monitors)

        loss_value = outputs[loss_op.name]
        if np.isnan(loss_value):
          failure_message = 'Model diverged with loss = NaN.'
          if fail_on_nan_loss:
            logging.error(failure_message)
            raise NanLossDuringTrainingError()
          else:
            logging.warning(failure_message)

        if should_stop:
          break

        this_step = get_current_step()

        if this_step <= last_step:
          logging.error(
              'Global step was not incremented by train op at step %s'
              ': new step %d', last_step, this_step)

        last_step = this_step
        is_last_step = (max_steps is not None) and (last_step >= max_steps)
        if is_last_step or (last_step - last_log_step >= log_every_steps):
          logging.info(
              'training step %d, loss = %.5f (%.3f sec/batch).',
              last_step, loss_value, float(time.time() - start_time))
          last_log_step = last_step
    except errors.OutOfRangeError as e:
      logging.warn('Got exception during tf.learn training loop possibly '
                   'due to exhausted input queue %s.', e)
    except StopIteration:
      logging.info('Exhausted input iterarator.')
    except BaseException as e:  # pylint: disable=broad-except
      # Hold on to any other exceptions while we try recording a final
      # checkpoint and summary.
      excinfo = sys.exc_info()
    finally:
      try:
        # Call supervisor.Stop() from within a try block because it re-raises
        # exceptions thrown by the supervised threads.
        supervisor.Stop(close_summary_writer=False)

        # Save one last checkpoint and summaries
        # TODO(wicke): This should be handled by Supervisor

        # In case we encountered an exception in the try block before we updated
        # last_step, update it here (again).
        last_step = get_current_step()
        if supervisor_is_chief:
          ckpt_path = supervisor.save_path
          logging.info('Saving checkpoint for step %d to checkpoint: %s.',
                       last_step, ckpt_path)
          supervisor.saver.save(session, ckpt_path, global_step=last_step)

          # Finish monitors.
          for monitor in monitors:
            monitor.end()

      # catch OutOfRangeError which is thrown when queue is out of data (and for
      # other reasons as well).
      except errors.OutOfRangeError as e:
        logging.warn('OutOfRangeError in tf.learn final checkpoint possibly '
                     'due to exhausted input queue. Note: summary_op is not '
                     'expected to trigger dequeues. %s.', e)
      except BaseException as e:  # pylint: disable=broad-except
        # If we don't already have an exception to re-raise, raise this one.
        if not excinfo:
          raise
        # Otherwise, log this one and raise the other in the finally block.
        logging.error('Got exception during tf.learn final checkpoint %s.', e)
      finally:
        if excinfo:
          reraise(*excinfo)
    return loss_value
Example #42
0
def embedding_lookup_sparse(params,
                            sp_ids,
                            sp_weights,
                            partition_strategy="mod",
                            name=None,
                            combiner=None,
                            max_norm=None):
  """Computes embeddings for the given ids and weights.

  This op assumes that there is at least one id for each row in the dense tensor
  represented by sp_ids (i.e. there are no rows with empty features), and that
  all the indices of sp_ids are in canonical row-major order.

  It also assumes that all id values lie in the range [0, p0), where p0
  is the sum of the size of params along dimension 0.

  Args:
    params: A single tensor representing the complete embedding tensor,
      or a list of P tensors all of same shape except for the first dimension,
      representing sharded embedding tensors.  Alternatively, a
      `PartitionedVariable`, created by partitioning along dimension 0. Each
      element must be appropriately sized for the given `partition_strategy`.
    sp_ids: N x M SparseTensor of int64 ids (typically from FeatureValueToId),
      where N is typically batch size and M is arbitrary.
    sp_weights: either a SparseTensor of float / double weights, or None to
      indicate all weights should be taken to be 1. If specified, sp_weights
      must have exactly the same shape and indices as sp_ids.
    partition_strategy: A string specifying the partitioning strategy, relevant
      if `len(params) > 1`. Currently `"div"` and `"mod"` are supported. Default
      is `"mod"`. See `tf.nn.embedding_lookup` for more details.
    name: Optional name for the op.
    combiner: A string specifying the reduction op. Currently "mean", "sqrtn"
      and "sum" are supported.
      "sum" computes the weighted sum of the embedding results for each row.
      "mean" is the weighted sum divided by the total weight.
      "sqrtn" is the weighted sum divided by the square root of the sum of the
      squares of the weights.
    max_norm: If provided, each embedding is normalized to have l2 norm equal
      to max_norm before combining.

  Returns:
    A dense tensor representing the combined embeddings for the
    sparse ids. For each row in the dense tensor represented by sp_ids, the op
    looks up the embeddings for all ids in that row, multiplies them by the
    corresponding weight, and combines these embeddings as specified.

    In other words, if

      shape(combined params) = [p0, p1, ..., pm]

    and

      shape(sp_ids) = shape(sp_weights) = [d0, d1, ..., dn]

    then

      shape(output) = [d0, d1, ..., dn-1, p1, ..., pm].

    For instance, if params is a 10x20 matrix, and sp_ids / sp_weights are

      [0, 0]: id 1, weight 2.0
      [0, 1]: id 3, weight 0.5
      [1, 0]: id 0, weight 1.0
      [2, 3]: id 1, weight 3.0

    with `combiner`="mean", then the output will be a 3x20 matrix where

      output[0, :] = (params[1, :] * 2.0 + params[3, :] * 0.5) / (2.0 + 0.5)
      output[1, :] = params[0, :] * 1.0
      output[2, :] = params[1, :] * 3.0

  Raises:
    TypeError: If sp_ids is not a SparseTensor, or if sp_weights is neither
      None nor SparseTensor.
    ValueError: If combiner is not one of {"mean", "sqrtn", "sum"}.
  """
  if combiner is None:
    logging.warn("The default value of combiner will change from \"mean\" "
                 "to \"sqrtn\" after 2016/11/01.")
    combiner = "mean"
  if combiner not in ("mean", "sqrtn", "sum"):
    raise ValueError("combiner must be one of 'mean', 'sqrtn' or 'sum'")
  if isinstance(params, variables.PartitionedVariable):
    params = list(params)  # Iterate to get the underlying Variables.
  if not isinstance(params, list):
    params = [params]
  if not isinstance(sp_ids, sparse_tensor.SparseTensor):
    raise TypeError("sp_ids must be SparseTensor")
  ignore_weights = sp_weights is None
  if not ignore_weights:
    if not isinstance(sp_weights, sparse_tensor.SparseTensor):
      raise TypeError("sp_weights must be either None or SparseTensor")
    sp_ids.values.get_shape().assert_is_compatible_with(
        sp_weights.values.get_shape())
    sp_ids.indices.get_shape().assert_is_compatible_with(
        sp_weights.indices.get_shape())
    sp_ids.dense_shape.get_shape().assert_is_compatible_with(
        sp_weights.dense_shape.get_shape())
    # TODO(yleon): Add enhanced node assertions to verify that sp_ids and
    # sp_weights have equal indices and shapes.

  with ops.name_scope(name, "embedding_lookup_sparse",
                      params + [sp_ids]) as name:
    segment_ids = sp_ids.indices[:, 0]
    if segment_ids.dtype != dtypes.int32:
      segment_ids = math_ops.cast(segment_ids, dtypes.int32)

    ids = sp_ids.values
    if ignore_weights:
      ids, idx = array_ops.unique(ids)
    else:
      idx = None

    embeddings = embedding_lookup(
        params, ids, partition_strategy=partition_strategy, max_norm=max_norm)
    if not ignore_weights:
      weights = sp_weights.values
      if weights.dtype != embeddings.dtype:
        weights = math_ops.cast(weights, embeddings.dtype)

      # Reshape weights to allow broadcast
      ones = array_ops.fill(
          array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0), 1)
      bcast_weights_shape = array_ops.concat([array_ops.shape(weights), ones],
                                             0)

      orig_weights_shape = weights.get_shape()
      weights = array_ops.reshape(weights, bcast_weights_shape)

      # Set the weight shape, since after reshaping to bcast_weights_shape,
      # the shape becomes None.
      if embeddings.get_shape().ndims is not None:
        weights.set_shape(
            orig_weights_shape.concatenate(
                [1 for _ in range(embeddings.get_shape().ndims - 1)]))

      embeddings *= weights

      if combiner == "sum":
        embeddings = math_ops.segment_sum(embeddings, segment_ids, name=name)
      elif combiner == "mean":
        embeddings = math_ops.segment_sum(embeddings, segment_ids)
        weight_sum = math_ops.segment_sum(weights, segment_ids)
        embeddings = math_ops.div(embeddings, weight_sum, name=name)
      elif combiner == "sqrtn":
        embeddings = math_ops.segment_sum(embeddings, segment_ids)
        weights_squared = math_ops.pow(weights, 2)
        weight_sum = math_ops.segment_sum(weights_squared, segment_ids)
        weight_sum_sqrt = math_ops.sqrt(weight_sum)
        embeddings = math_ops.div(embeddings, weight_sum_sqrt, name=name)
      else:
        assert False, "Unrecognized combiner"
    else:
      assert idx is not None
      if combiner == "sum":
        embeddings = math_ops.sparse_segment_sum(
            embeddings, idx, segment_ids, name=name)
      elif combiner == "mean":
        embeddings = math_ops.sparse_segment_mean(
            embeddings, idx, segment_ids, name=name)
      elif combiner == "sqrtn":
        embeddings = math_ops.sparse_segment_sqrt_n(
            embeddings, idx, segment_ids, name=name)
      else:
        assert False, "Unrecognized combiner"

    return embeddings
Example #43
0
 def __init__(self, num_units, input_size=None, activation=tanh):
   if input_size is not None:
     logging.warn("%s: The input_size parameter is deprecated.", self)
   self._num_units = num_units
   self._activation = activation
Example #44
0
def inf_nan_callback(op_type,
                     op_name,
                     attrs,
                     inputs,
                     outputs,
                     check_inf=True,
                     check_nan=True,
                     action=_DEFAULT_CALLBACK_ACTION):
    """An execution callback that checks for `inf`s and `nan`s in output tensors.

  This callback can be used with `tfe.add_execute_callback` to check for invalid
  numeric values. E.g.,
  ```python
  tfe.add_execute_callback(tfe.inf_nan_callback)
  ```

  Args:
    op_type: Name of the TFE operation type (e.g., `MatMul`).
    op_name: Name of the TFE operation. This name is set by client and can be
      `None` if it unset.
    attrs: Attributes of the TFE operation, as a tuple of alternating attribute
      names and attribute values.
    inputs: The `list` of input tensors to the operation, currently unused by
      this callback.
    outputs: The `list` of output tensors from the operation, checked by this
      callback for `inf` and `nan` values.
    check_inf: (`bool`) Whether this callback should check for `inf` values in
      the output tensor values.
    check_nan: (`bool`) Whether this callback should check for `nan` values in
      the output tensor values.
    action: (`str`) Action to be taken by the callback when `inf` or `nan`
      values are detected. Possible values {"raise", "warn", "print"}
      `"raise"`: Raise a `InfOrNanError`.
      `"warn"`: Log a warning using `tf.logging.warn`.
      `"print"`: Print a message to `sys.stdout`.

  Raises:
    InfOrNanError: iff `inf` or `nan` values are seen in any of `outputs` and
      `action` is `"raise"`.
    ValueError: iff the value of `action` is invalid.
  """
    del attrs, inputs  # Not used.

    ctx = context.get_default_context()

    for index, output in enumerate(outputs):
        if not output.dtype.is_numpy_compatible:
            continue

        numpy_dtype = output.dtype.as_numpy_dtype
        if (np.issubdtype(numpy_dtype, np.float)
                or np.issubdtype(numpy_dtype, np.complex)
                or np.issubdtype(numpy_dtype, np.integer)):
            try:
                check_numerics_op_attrs = ("message",
                                           "Eager-mode inf/nan check", "T",
                                           outputs[0].dtype.as_datatype_enum)
                # TODO (cais): Consider moving this into execute.py. id:3190 gh:3191
                # pylint: disable=protected-access
                pywrap_tensorflow.TFE_Py_Execute(ctx._handle, output.device,
                                                 "CheckNumerics", [output],
                                                 check_numerics_op_attrs, 1)
                # pylint: enable=protected-access
            except core._NotOkStatusException:  # pylint: disable=protected-access
                value = output.numpy()
                inf_detected = np.any(np.isinf(value)) and check_inf
                nan_detected = np.any(np.isnan(value)) and check_nan
                if not inf_detected and not nan_detected:
                    continue

                error = InfOrNanError(op_type, op_name, index, len(outputs),
                                      value)
                if action == "print":
                    print("Warning: %s" % str(error))
                elif action == "warn":
                    logging.warn(str(error))
                elif action == "raise":
                    raise error
                else:
                    raise ValueError(
                        "Invalid action for inf_nan_callback: %s. Valid actions are: "
                        "{print | warn | raise}" % action)
Example #45
0
    def build(self, input_shape):
        """Create variables of the Cudnn RNN.

    It can be called manually before `__call__()` or automatically through
    `__call__()`. In the former case, subsequent `__call__()`s will skip
    creating variables.
    Args:
      input_shape: network input tensor shape, a python list or a TensorShape
        object with 3 dimensions.

    Raises:
      ValueError: if input_shape has wrong dimension or unknown 3rd dimension.
    """
        if self.built:
            return

        input_shape = tensor_shape.TensorShape(input_shape)
        if input_shape.ndims != 3:
            raise ValueError("Expecting input_shape with 3 dims, got %d" %
                             input_shape.ndims)
        if input_shape[-1].value is None:
            raise ValueError("The last dimension of the inputs to `CudnnRNN` "
                             "should be defined. Found `None`.")
        self._input_size = input_shape[-1].value
        self.input_spec = input_spec.InputSpec(ndim=3,
                                               axes={-1: self._input_size})

        self._set_scope(None)

        # Not using base class `add_variable()` since the it calls
        # `tf.compat.v1.get_variable()` with a callable initializer whereas here
        # with a tensor. The difference is mandated to support forward-compatibility
        # with Cudnn.
        with vs.variable_scope(self._scope,
                               reuse=self.built,
                               custom_getter=self._update_trainable_weights):
            if self._kernel_initializer is None:
                self._kernel_initializer = init_ops.glorot_uniform_initializer(
                    seed=self._seed, dtype=self._plain_dtype)
            if self._bias_initializer is None:
                self._bias_initializer = init_ops.constant_initializer(
                    0.0, dtype=self._plain_dtype)

            weights = [
                self._kernel_initializer(sp, dtype=self._plain_dtype)
                for sp in self.canonical_weight_shapes
            ]
            biases = [
                self._bias_initializer(sp, dtype=self._plain_dtype)
                for sp in self.canonical_bias_shapes
            ]
            opaque_params_t = self._canonical_to_opaque(weights, biases)

            if vs.get_variable_scope().partitioner is not None:
                logging.warn(
                    "Partitioner is not supported for Cudnn RNN layer variables, using "
                    "it will create forward-compatibility issues with future "
                    "CUDA/CuDNN generations.")
            # Initialize opaque params with a tensor with unknown shape, thus couldn't
            # use self.add_variable(name, shape, initializer, ...)
            self.kernel = vs.get_variable("opaque_kernel",
                                          dtype=self._plain_dtype,
                                          initializer=opaque_params_t,
                                          validate_shape=False)
        # Create saveable in the outer scope of the cudnn subgraph, such that
        # alternative subgraph with platform-independent rnn cells can load the
        # checkpoints directly.
        if not (self.built or vs.get_variable_scope().reuse is True):
            self._create_saveable()
        self.built = True
Example #46
0
def _graph_mode_decorator(f, args, kwargs):
    """Implement custom gradient decorator for graph mode."""
    # TODO(rsepassi): Add support for kwargs
    if kwargs:
        raise ValueError(
            "The custom_gradient decorator currently supports keywords "
            "arguments only when eager execution is enabled.")
    name = generate_name()
    args = nest.map_structure(ops.convert_to_tensor, args)

    # Checking global and local variables attempts to ensure that no non-resource
    # Variables are added to the graph.
    current_var_scope = variable_scope.get_variable_scope()
    before_vars = set([
        v.ref() for v in current_var_scope.global_variables() +
        current_var_scope.local_variables()
    ])
    with tape_lib.VariableWatcher() as variable_watcher:
        result, grad_fn = f(*args)

    args = nest.flatten(args)
    flat_result = nest.flatten(result)
    flat_result_len = len(flat_result)

    after_vars = set([
        v.ref() for v in current_var_scope.global_variables() +
        current_var_scope.local_variables()
    ])
    new_vars = after_vars - before_vars
    new_vars_list = [v.deref() for v in new_vars]
    for v in new_vars_list:
        if not resource_variable_ops.is_resource_variable(v):
            raise TypeError(
                "All variables used by a function wrapped with @custom_gradient must "
                "be `ResourceVariable`s. Ensure that no `variable_scope` is created "
                "with `use_resource=False`.")

    # The variables that grad_fn needs to return gradients for are the set of
    # variables used that are *not* part of the inputs.
    variables_in_tape = frozenset(
        [v.ref() for v in variable_watcher.watched_variables()])

    graphs = {getattr(o, "graph", None) for o in flat_result}
    # Not all results may be tensors. However, we want to ensure all tensor
    # outputs are from the same graph and get a list of captured inputs for
    # variable search
    graphs.discard(None)  # Discard non-graph outputs
    if graphs:
        if len(graphs) > 1:
            raise ValueError(
                "All custom_gradient outputs should be from the same graph")
        output_graph = graphs.pop()
        filtered_input_tensors = []
        for i in args:
            if i.graph == output_graph:
                filtered_input_tensors.append(i)
    else:
        filtered_input_tensors = args

    variables_in_subgraph = frozenset([
        v.ref()
        for v in _get_dependent_variables(input_ops=filtered_input_tensors,
                                          output_ops=flat_result)
    ])
    variables = sorted(
        [v.deref() for v in variables_in_subgraph.union(variables_in_tape)],
        key=lambda v: v.name)

    grad_argspec = tf_inspect.getfullargspec(grad_fn)
    variables_in_signature = ("variables" in grad_argspec.args
                              or "variables" in grad_argspec.kwonlyargs
                              or grad_argspec.varkw)
    if variables and not variables_in_signature:
        raise TypeError(
            "@tf.custom_gradient grad_fn must accept keyword argument 'variables', "
            "since function uses variables: {}".format(variables))
    if variables_in_signature and not variables:
        # User seems to intend to use variables but none were captured.
        logging.warn(
            "@custom_gradient grad_fn has 'variables' in signature, but "
            "no ResourceVariables were used on the forward pass.")

    all_tensors = flat_result + args + variables

    def tape_grad_fn(*result_grads):
        """Custom grad fn wrapper."""
        result_grads = result_grads[:flat_result_len]
        if variables:
            input_grads, variable_grads = grad_fn(*result_grads,
                                                  variables=variables)
            if len(variable_grads) != len(variables):
                raise ValueError("Must return gradient for each variable from "
                                 "@custom_gradient grad_fn.")
        else:
            input_grads = grad_fn(*result_grads)
            variable_grads = []

        # Need to return one value per input to the IdentityN, so pad the
        # gradients of the inputs of the custom_gradient function with the
        # gradients of the outputs as well.
        input_grads = nest.flatten(input_grads)
        return ([None] * flat_result_len) + input_grads + variable_grads

    @ops.RegisterGradient(name)
    def internal_grad_fn(unused_op, *result_grads):  # pylint: disable=unused-variable
        """Custom grad fn wrapper."""
        return tape_grad_fn(*result_grads)

    original_tensors = all_tensors
    with ops.get_default_graph().gradient_override_map({"IdentityN": name}):
        all_tensors = array_ops.identity_n(all_tensors)

    original_tensors = [ops.convert_to_tensor(x) for x in original_tensors]

    # Propagate handle data for happier shape inference for resource variables.
    for i, t in enumerate(original_tensors):
        if t.dtype == dtypes.resource and hasattr(t, "_handle_data"):
            all_tensors[i]._handle_data = t._handle_data  # pylint: disable=protected-access
    tape_lib.record_operation(f.__name__, all_tensors, original_tensors,
                              tape_grad_fn)
    for ot, t in zip(original_tensors, all_tensors):
        handle_data_util.copy_handle_data(ot, t)
    return nest.pack_sequence_as(structure=result,
                                 flat_sequence=all_tensors[:flat_result_len])
Example #47
0
def create_partitioned_variables(shape,
                                 slicing,
                                 initializer,
                                 dtype=dtypes.float32,
                                 trainable=True,
                                 collections=None,
                                 name=None,
                                 reuse=None):
    """Create a list of partitioned variables according to the given `slicing`.

  Currently only one dimension of the full variable can be sliced, and the
  full variable can be reconstructed by the concatenation of the returned
  list along that dimension.

  Args:
    shape: List of integers.  The shape of the full variable.
    slicing: List of integers.  How to partition the variable.
      Must be of the same length as `shape`.  Each value
      indicate how many slices to create in the corresponding
      dimension.  Presently only one of the values can be more than 1;
      that is, the variable can only be sliced along one dimension.

      For convenience, The requested number of partitions does not have to
      divide the corresponding dimension evenly.  If it does not, the
      shapes of the partitions are incremented by 1 starting from partition
      0 until all slack is absorbed.  The adjustment rules may change in the
      future, but as you can save/restore these variables with different
      slicing specifications this should not be a problem.
    initializer: A `Tensor` of shape `shape` or a variable initializer
      function.  If a function, it will be called once for each slice,
      passing the shape and data type of the slice as parameters.  The
      function must return a tensor with the same shape as the slice.
    dtype: Type of the variables. Ignored if `initializer` is a `Tensor`.
    trainable: If True also add all the variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES`.
    collections: List of graph collections keys to add the variables to.
      Defaults to `[GraphKeys.VARIABLES]`.
    name: Optional name for the full variable.  Defaults to
      `"PartitionedVariable"` and gets uniquified automatically.
    reuse: Boolean or `None`; if `True` and name is set, it would reuse
      previously created variables. if `False` it will create new variables.
      if `None`, it would inherit the parent scope reuse.

  Returns:
    A list of Variables corresponding to the slicing.

  Raises:
    ValueError: If any of the arguments is malformed.
  """
    logging.warn("create_partitioned_variables is deprecated.  Use "
                 "tf.get_variable with a partitioner set, or "
                 "tf.get_partitioned_variable_list, instead.")

    if len(shape) != len(slicing):
        raise ValueError("The 'shape' and 'slicing' of a partitioned Variable "
                         "must have the length: shape: %s, slicing: %s" %
                         (shape, slicing))
    if len(shape) < 1:
        raise ValueError("A partitioned Variable must have rank at least 1: "
                         "shape: %s" % shape)

    # Legacy: we are provided the slicing directly, so just pass it to
    # the partitioner.
    partitioner = lambda **unused_kwargs: slicing

    with variable_scope.variable_op_scope([],
                                          name,
                                          "PartitionedVariable",
                                          reuse=reuse) as scope:

        # pylint: disable=protected-access
        vs, _ = variable_scope._get_partitioned_variable_list(
            name="part",
            shape=shape,
            dtype=dtype,
            initializer=initializer,
            trainable=trainable,
            partitioner=partitioner,
            collections=collections)

        for var in vs:
            var._save_slice_info.full_name = scope.name
        # pylint: enable=protected-access

    return vs
Example #48
0
def einsum(equation, *inputs):
  """
  A generalized contraction between tensors of arbitrary dimension.

  This function returns a tensor whose elements are defined by `equation`,
  which is written in a shorthand form inspired by the Einstein summation
  convention.  As an example, consider multiplying two matrices
  A and B to form a matrix C.  The elements of C are given by:

  ```
    C[i,k] = sum_j A[i,j] * B[j,k]
  ```

  The corresponding `equation` is:

  ```
    ij,jk->ik
  ```

  In general, the `equation` is obtained from the more familiar element-wise
  equation by
    1. removing variable names, brackets, and commas,
    2. replacing "*" with ",",
    3. dropping summation signs, and
    4. moving the output to the right, and replacing "=" with "->".

  Many common operations can be expressed in this way.  For example:

  ```python
  # Matrix multiplication
  >>> einsum('ij,jk->ik', m0, m1)  # output[i,k] = sum_j m0[i,j] * m1[j, k]

  # Dot product
  >>> einsum('i,i->', u, v)  # output = sum_i u[i]*v[i]

  # Outer product
  >>> einsum('i,j->ij', u, v)  # output[i,j] = u[i]*v[j]

  # Transpose
  >>> einsum('ij->ji', m)  # output[j,i] = m[i,j]

  # Batch matrix multiplication
  >>> einsum('aij,ajk->aik', s, t)  # out[a,i,k] = sum_j s[a,i,j] * t[a, j, k]
  ```

  This function behaves like `numpy.einsum`, but does not support:
  * Ellipses (subscripts like `ij...,jk...->ik...`)
  * Subscripts where an axis appears more than once for a single input
    (e.g. `ijj,k->ik`).
  * Subscripts that are summed across multiple inputs (e.g., `ij,ij,jk->ik`).

  Args:
    equation: a `str` describing the contraction, in the same format as
      `numpy.einsum`.
    inputs: the inputs to contract (each one a `Tensor`), whose shapes should
      be consistent with `equation`.

  Returns:
    The contracted `Tensor`, with shape determined by `equation`.

  Raises:
    ValueError: If
      - the format of `equation` is incorrect,
      - the number of inputs implied by `equation` does not match `len(inputs)`,
      - an axis appears in the output subscripts but not in any of the inputs,
      - the number of dimensions of an input differs from the number of
        indices in its subscript, or
      - the input shapes are inconsistent along a particular axis.
  """
  if '...' in equation:
    raise ValueError("Subscripts with ellipses are not yet supported.")

  match = re.match('([a-z,]+)(->[a-z]*)?', equation)
  if not match:
    raise ValueError(
        'Indices have incorrect format: %s' % equation
    )

  inputs = list(inputs)
  input_axis_labels = match.group(1).split(',')

  if len(inputs) != len(input_axis_labels):
    raise ValueError('Got %d arguments for equation "%s", expecting %d' % (
        len(inputs), equation, len(input_axis_labels)))

  axis_labels = set(''.join(input_axis_labels))
  if match.group(2):
    output_axis_labels = match.group(2)[2:]
  else:
    # infer the output subscripts if not given, assume alphabetical order
    indices = ''.join(sorted(axis_labels))
    counts = {ax: 0 for ax in indices}
    for axes_ in input_axis_labels:
      for ax in axes_:
        counts[ax] += 1

    output_axis_labels = ''.join(sorted(
      ax for ax in indices
      if counts[ax] == 1
    ))

  for a in axis_labels:
    input_count = sum(1 for s in input_axis_labels if a in s)
    if input_count > 2 and a not in output_axis_labels:
      logging.warn(
          'Falling back to exponential-space implementation of einsum() because'
          ' index "%s" is summed over more than two inputs.', a)
      return _exponential_space_einsum(equation, *inputs)

  temp = inputs[0]
  temp_axis_labels = input_axis_labels[0]
  for i in xrange(len(inputs)-1):
    axes_to_sum = (set(temp_axis_labels) & set(input_axis_labels[i+1])
                   - set(output_axis_labels))
    temp, temp_axis_labels = _einsum_reduction(temp,
                                               temp_axis_labels,
                                               inputs[i+1],
                                               input_axis_labels[i+1],
                                               axes_to_sum)

  missing_indices = set(temp_axis_labels) - set(output_axis_labels)
  if missing_indices:
    reduction_indices = [i for i, a in enumerate(temp_axis_labels)
                         if a not in output_axis_labels]
    temp = math_ops.reduce_sum(temp, reduction_indices=reduction_indices)
    temp_axis_labels = ''.join(a for a in temp_axis_labels
                               if a in output_axis_labels)

  if sorted(temp_axis_labels) != sorted(output_axis_labels):
    raise ValueError('Invalid equation: %s' % equation)

  perm = [temp_axis_labels.index(a) for a in output_axis_labels]
  return _transpose_if_necessary(temp, perm)
Example #49
0
def main(unused_argv=None):
    logging.set_verbosity(logging.INFO)
    tf_version = versions.__version__
    print('TensorFlow version %s detected' % tf_version)
    print('Welcome to the Cloud TPU Profiler v%s' %
          profiler_version.__version__)

    if LooseVersion(tf_version) < LooseVersion('2.2.0'):
        sys.exit('You must install tensorflow >= 2.2.0 to use this plugin.')

    if not FLAGS.service_addr and not FLAGS.tpu:
        sys.exit('You must specify either --service_addr or --tpu.')

    tpu_cluster_resolver = None
    if FLAGS.service_addr:
        if FLAGS.tpu:
            logging.warn('Both --service_addr and --tpu are set. Ignoring '
                         '--tpu and using --service_addr.')
        service_addr = FLAGS.service_addr
    else:
        try:
            tpu_cluster_resolver = (resolver.TPUClusterResolver(
                [FLAGS.tpu], zone=FLAGS.tpu_zone, project=FLAGS.gcp_project))
            service_addr = tpu_cluster_resolver.get_master()
        except (ValueError, TypeError):
            sys.exit(
                'Failed to find TPU %s in zone %s project %s. You may use '
                '--tpu_zone and --gcp_project to specify the zone and project of'
                ' your TPU.' % (FLAGS.tpu, FLAGS.tpu_zone, FLAGS.gcp_project))
    service_addr = service_addr.replace('grpc://',
                                        '').replace(':8470', ':8466')

    workers_list = ''
    if FLAGS.workers_list is not None:
        workers_list = FLAGS.workers_list
    elif tpu_cluster_resolver is not None:
        workers_list = get_workers_list(tpu_cluster_resolver)

    # If profiling duration was not set by user or set to a non-positive value,
    # we set it to a default value of 1000ms.
    duration_ms = FLAGS.duration_ms if FLAGS.duration_ms > 0 else 1000

    if FLAGS.monitoring_level > 0:
        print('Since monitoring level is provided, profile', service_addr,
              ' for ', FLAGS.duration_ms, ' ms and show metrics for ',
              FLAGS.num_queries, ' time(s).')
        monitoring_helper(service_addr, duration_ms, FLAGS.monitoring_level,
                          FLAGS.num_queries)
    else:
        if not FLAGS.logdir:
            sys.exit('You must specify either --logdir or --monitoring_level.')

        if not gfile.Exists(FLAGS.logdir):
            gfile.MakeDirs(FLAGS.logdir)

        try:
            if LooseVersion(tf_version) < LooseVersion('2.3.0'):
                profiler_client.trace(service_addr,
                                      os.path.expanduser(FLAGS.logdir),
                                      duration_ms, workers_list,
                                      FLAGS.num_tracing_attempts)
            else:
                options = profiler.ProfilerOptions(
                    host_tracer_level=FLAGS.host_tracer_level)
                profiler_client.trace(service_addr,
                                      os.path.expanduser(FLAGS.logdir),
                                      duration_ms, workers_list,
                                      FLAGS.num_tracing_attempts, options)
        except errors.UnavailableError:
            sys.exit(0)
Example #50
0
    def __init__(self,
                 input_saved_model_dir=None,
                 input_saved_model_tags=None,
                 input_saved_model_signature_key=None,
                 input_graph_def=None,
                 nodes_blacklist=None,
                 session_config=None,
                 max_batch_size=1,
                 max_workspace_size_bytes=DEFAULT_TRT_MAX_WORKSPACE_SIZE_BYTES,
                 precision_mode=TrtPrecisionMode.FP32,
                 minimum_segment_size=3,
                 is_dynamic_op=False,
                 maximum_cached_engines=1,
                 use_calibration=True):
        """Initialize the converter.

    Args:
      input_saved_model_dir: the directory to load the SavedModel which contains
        the input graph to transforms. Used only when input_graph_def is None.
      input_saved_model_tags: list of tags to load the SavedModel.
      input_saved_model_signature_key: the key of the signature to optimize the
        graph for.
      input_graph_def: a GraphDef object containing a model to be transformed.
        If set to None, the graph will be read from the SavedModel loaded from
        input_saved_model_dir.
      nodes_blacklist: list of node names to prevent the converter from
        touching.
      session_config: the ConfigProto used to create a Session. It's also used
        as a template to create a TRT-enabled ConfigProto for conversion. If not
        specified, a default ConfigProto will be used.
      max_batch_size: max size for the input batch.
      max_workspace_size_bytes: the maximum GPU temporary memory which the TRT
        engine can use at execution time. This corresponds to the
        'workspaceSize' parameter of nvinfer1::IBuilder::setMaxWorkspaceSize().
      precision_mode: one of TrtPrecisionMode.supported_precision_modes().
      minimum_segment_size: the minimum number of nodes required for a subgraph
        to be replaced by TRTEngineOp.
      is_dynamic_op: whether to generate dynamic TRT ops which will build the
        TRT network and engine at run time.
      maximum_cached_engines: max number of cached TRT engines in dynamic TRT
        ops. If the number of cached engines is already at max but none of them
        can serve the input, the TRTEngineOp will fall back to run the TF
        function based on which the TRTEngineOp is created.
      use_calibration: this argument is ignored if precision_mode is not INT8.
        If set to True, a calibration graph will be created to calibrate the
        missing ranges. The calibration graph must be converted to an inference
        graph by running calibration with calibrate(). If set to False,
        quantization nodes will be expected for every tensor in the graph
        (exlcuding those which will be fused). If a range is missing, an error
        will occur. Please note that accuracy may be negatively affected if
        there is a mismatch between which tensors TRT quantizes and which
        tensors were trained with fake quantization.

    Raises:
      ValueError: if the combination of the parameters is invalid.
      RuntimeError: if this class is used in TF 2.0.
    """
        if context.executing_eagerly():
            raise RuntimeError("Please use TrtGraphConverterV2 in TF 2.0.")

        if input_graph_def and input_saved_model_dir:
            raise ValueError(
                "Can only specify one of input_graph_def and input_saved_model_dir"
            )
        if not input_graph_def and not input_saved_model_dir:
            raise ValueError("Must specify one of input_graph_def and "
                             "input_saved_model_dir")
        _check_trt_version_compatibility()

        self._input_graph_def = input_graph_def
        self._nodes_blacklist = nodes_blacklist

        self._input_saved_model_dir = input_saved_model_dir
        self._converted = False
        self._grappler_meta_graph_def = None

        self._input_saved_model_tags = (input_saved_model_tags
                                        or [tag_constants.SERVING])
        self._input_saved_model_signature_key = (
            input_saved_model_signature_key
            or signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY)
        self._session_config = session_config or config_pb2.ConfigProto()

        # For calibration usage.
        self._calibration_graph = None
        self._calibration_data_collected = False
        self._need_calibration = (precision_mode == TrtPrecisionMode.INT8
                                  and use_calibration)
        if self._need_calibration and not is_dynamic_op:
            tf_logging.warn(
                "INT8 precision mode with calibration is supported with "
                "dynamic TRT ops only. Disregarding is_dynamic_op parameter.")
            is_dynamic_op = True

        # TODO(laigd):
        # - Verify in int8 mode that maximum_cached_engines is set properly.
        # - If it fails to build the int8 engine it should return error.
        rewriter_config_template = None
        if (session_config and session_config.HasField("graph_options")
                and session_config.graph_options.HasField("rewrite_options")):
            rewriter_config_template = session_config.graph_options.rewrite_options

        self._conversion_params = TrtConversionParams(
            rewriter_config_template=rewriter_config_template,
            max_workspace_size_bytes=max_workspace_size_bytes,
            precision_mode=precision_mode,
            minimum_segment_size=minimum_segment_size,
            is_dynamic_op=is_dynamic_op,
            maximum_cached_engines=maximum_cached_engines,
            use_calibration=use_calibration,
            max_batch_size=max_batch_size)
        _check_conversion_params(self._conversion_params)
Example #51
0
def _check_dtype(dtype):
    if dtypes.as_dtype(dtype) == dtypes.float64:
        logging.warn(
            'float64 is not supported by many models, consider casting to float32.'
        )
    return dtype
def scattered_embedding_lookup_sparse(params,
                                      sparse_values,
                                      dimension,
                                      combiner=None,
                                      default_value=None,
                                      name=None,
                                      hash_key=None):
  """Looks up embeddings of a sparse feature using parameter hashing.

  See `tf.contrib.layers.scattered_embedding_lookup` for embedding with hashing.

  Args:
    params: A `Tensor`, `list` of `Tensors`, or `PartitionedVariable`. Each
      tensor must be of rank 1 with fully-defined shape.
    sparse_values: A 2-D `SparseTensor` containing the values to be embedded.
      Some rows may be empty.
    dimension: Embedding dimension
    combiner: A string specifying how to combine embedding results for each
      entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the
      default.
    default_value: The value to use for an entry with no features.
    name: An optional name for this op.
    hash_key: Specify the hash_key that will be used by the `FingerprintCat64`
      function to combine the crosses fingerprints on SparseFeatureCrossOp
      (optional).

  Returns:
     Dense tensor with shape [N, dimension] with N the number of rows in
       sparse_values.

  Raises:
    TypeError: If sparse_values is not a SparseTensor.
    ValueError: If combiner is not one of {"mean", "sqrtn", "sum"}.
  """
  if combiner is None:
    logging.warn("The default value of combiner will change from \"mean\" "
                 "to \"sqrtn\" after 2016/11/01.")
    combiner = "mean"
  if isinstance(params, variables.PartitionedVariable):
    params = list(params)
  if not isinstance(params, list):
    params = [params]
  if not isinstance(sparse_values, sparse_tensor.SparseTensor):
    raise TypeError("sparse_values must be SparseTensor")

  with ops.name_scope(name, "scattered_embedding_lookup_sparse",
                      params + [sparse_values]) as scope:
    # Fill in the empty rows.
    if default_value is None:
      # Random default values to reduce the risk of collision.
      if sparse_values.dtype == dtypes.string:
        default_value = "6ZxWzWOHxZ"
      else:
        default_value = 1288896567
    sparse_values, _ = sparse_ops.sparse_fill_empty_rows(
        sparse_values, default_value)

    segment_ids = sparse_values.indices[:, 0]
    if segment_ids.dtype != dtypes.int32:
      segment_ids = math_ops.cast(segment_ids, dtypes.int32)

    values = sparse_values.values
    values, idx = array_ops.unique(values)

    embeddings = scattered_embedding_lookup(
        params, values, dimension, hash_key=hash_key)

    if combiner == "sum":
      embeddings = math_ops.sparse_segment_sum(
          embeddings, idx, segment_ids, name=scope)
    elif combiner == "mean":
      embeddings = math_ops.sparse_segment_mean(
          embeddings, idx, segment_ids, name=scope)
    elif combiner == "sqrtn":
      embeddings = math_ops.sparse_segment_sqrt_n(
          embeddings, idx, segment_ids, name=scope)
    else:
      raise ValueError("Combiner must be one of 'mean', 'sqrtn' or 'sum'.")

    return embeddings
Example #53
0
  def __init__(self,
               num_units,
               use_peepholes=False,
               cell_clip=None,
               initializer=None,
               num_proj=None,
               proj_clip=None,
               num_unit_shards=None,
               num_proj_shards=None,
               forget_bias=1.0,
               state_is_tuple=True,
               activation=None,
               reuse=None,
               name=None,
               dtype=None):
    """Initialize the parameters for an LSTM cell.

    Args:
      num_units: int, The number of units in the LSTM cell.
      use_peepholes: bool, set True to enable diagonal/peephole connections.
      cell_clip: (optional) A float value, if provided the cell state is clipped
        by this value prior to the cell output activation.
      initializer: (optional) The initializer to use for the weight and
        projection matrices.
      num_proj: (optional) int, The output dimensionality for the projection
        matrices.  If None, no projection is performed.
      proj_clip: (optional) A float value.  If `num_proj > 0` and `proj_clip` is
        provided, then the projected values are clipped elementwise to within
        `[-proj_clip, proj_clip]`.
      num_unit_shards: Deprecated, will be removed by Jan. 2017. Use a
        variable_scope partitioner instead.
      num_proj_shards: Deprecated, will be removed by Jan. 2017. Use a
        variable_scope partitioner instead.
      forget_bias: Biases of the forget gate are initialized by default to 1 in
        order to reduce the scale of forgetting at the beginning of the
        training. Must set it manually to `0.0` when restoring from CudnnLSTM
        trained checkpoints.
      state_is_tuple: If True, accepted and returned states are 2-tuples of the
        `c_state` and `m_state`.  If False, they are concatenated along the
        column axis.  This latter behavior will soon be deprecated.
      activation: Activation function of the inner states.  Default: `tanh`.
      reuse: (optional) Python boolean describing whether to reuse variables in
        an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.
      name: String, the name of the layer. Layers with the same name will share
        weights, but to avoid mistakes we require reuse=True in such cases.
      dtype: Default dtype of the layer (default of `None` means use the type of
        the first input). Required when `build` is called before `call`.  When
        restoring from CudnnLSTM-trained checkpoints, use
        `CudnnCompatibleLSTMCell` instead.
    """
    super(TFLiteLSTMCell, self).__init__(_reuse=reuse, name=name, dtype=dtype)
    # TODO(raziel): decide if we want to just support tuples (yes please!).
    if not state_is_tuple:
      logging.warn(
          "%s: Using a concatenated state is slower and will soon be "
          "deprecated.  Use state_is_tuple=True.", self)
    if num_unit_shards is not None or num_proj_shards is not None:
      logging.warn(
          "%s: The num_unit_shards and proj_unit_shards parameters are "
          "deprecated and will be removed in Jan 2017.  "
          "Use a variable scope with a partitioner instead.", self)

    # Inputs must be 2-dimensional.
    # TODO(raziel): layers stuff -- chop if un-layerizing Op.
    self.input_spec = base_layer.InputSpec(ndim=2)

    self._tflite_wrapper = op_hint.OpHint("UnidirectionalSequenceLstm")

    self._num_units = num_units
    self._use_peepholes = use_peepholes
    self._cell_clip = cell_clip
    self._initializer = initializer
    self._num_proj = num_proj
    self._proj_clip = proj_clip
    self._num_unit_shards = num_unit_shards
    self._num_proj_shards = num_proj_shards
    self._forget_bias = forget_bias
    self._state_is_tuple = state_is_tuple
    self._activation = activation or math_ops.tanh

    self._output_size = num_proj if num_proj else num_units
    self._state_size = (
        rnn_cell_impl.LSTMStateTuple(num_units, self._output_size)
        if state_is_tuple else num_units + self._output_size)
Example #54
0
    def __init__(self,
                 estimator,
                 train_input_fn,
                 eval_input_fn,
                 eval_metrics=None,
                 train_steps=None,
                 eval_steps=100,
                 train_monitors=None,
                 eval_hooks=None,
                 local_eval_frequency=None,
                 eval_delay_secs=120,
                 continuous_eval_throttle_secs=60,
                 min_eval_frequency=None,
                 delay_workers_by_global_step=False,
                 export_strategies=None,
                 train_steps_per_iteration=None,
                 checkpoint_and_export=False,
                 saving_listeners=None,
                 check_interval_secs=5):
        """Constructor for `Experiment`.

    Creates an Experiment instance. None of the functions passed to this
    constructor are executed at construction time. They are stored and used
    when a method is executed which requires it.

    Args:
      estimator: Object implementing Estimator interface, which could be a
        combination of @{tf.contrib.learn.Trainable} and
        @{tf.contrib.learn.Evaluable} (deprecated), or
        @{tf.estimator.Estimator}.
      train_input_fn: function, returns features and labels for training.
      eval_input_fn: function, returns features and labels for evaluation. If
        `eval_steps` is `None`, this should be configured only to produce for a
        finite number of batches (generally, 1 epoch over the evaluation data).
      eval_metrics: `dict` of string, metric function. If `None`, default set
        is used. This should be `None` if the `estimator` is
        @{tf.estimator.Estimator}. If metrics are provided they will be
        *appended* to the default set.
      train_steps: Perform this many steps of training. `None`, the default,
        means train forever.
      eval_steps: `evaluate` runs until input is exhausted (or another exception
        is raised), or for `eval_steps` steps, if specified.
      train_monitors: A list of monitors to pass to the `Estimator`'s `fit`
        function.
      eval_hooks: A list of `SessionRunHook` hooks to pass to the
        `Estimator`'s `evaluate` function.
      local_eval_frequency: (applies only to local_run) Frequency of running
        eval in steps. If `None`, runs evaluation only at the end of training.
      eval_delay_secs: Start evaluating after waiting for this many seconds.
      continuous_eval_throttle_secs: Do not re-evaluate unless the last
        evaluation was started at least this many seconds ago for
        continuous_eval().
      min_eval_frequency: (applies only to train_and_evaluate). the minimum
        number of steps between evaluations. Of course, evaluation does not
        occur if no new snapshot is available, hence, this is the minimum.
        If 0, the evaluation will only happen after training.
        If None, defaults to 1. To avoid checking for new checkpoints too
        frequent, the interval is further limited to be at least
        check_interval_secs between checks.
      delay_workers_by_global_step: if `True` delays training workers
        based on global step instead of time.
      export_strategies: Iterable of `ExportStrategy`s, or a single one, or
        `None`.
      train_steps_per_iteration: (applies only to continuous_train_and_eval).
        Perform this many (integer) number of train steps for each
        training-evaluation iteration. With a small value, the model will be
        evaluated more frequently with more checkpoints saved. If `None`, will
        use a default value (which is smaller than `train_steps` if provided).
      checkpoint_and_export: (applies only to train_and_evaluate). If `True`,
        performs intermediate model checkpoints and exports during the training
        process, rather than only once model training is complete. This
        parameter is experimental and may be changed or removed in the future.
        Setting this parameter leads to the following: the value of
        `min_eval_frequency` will be ignored, and the number of steps between
        evaluations and exports will instead be determined by the Estimator
        configuration parameters `save_checkpoints_secs` and
        `save_checkpoints_steps`. Also, this parameter leads to the creation of
        a default `CheckpointSaverHook` instead of a `ValidationMonitor`, so the
        provided `train_monitors` will need to be adjusted accordingly.
      saving_listeners: list of `CheckpointSaverListener` objects. Used by
        tf.estimator.Estimator for callbacks that run immediately before or
        after checkpoint savings.
      check_interval_secs:
        Minimum time between subsequent checks for a new checkpoint. This
        mostly applies if both min_eval_frequency and the time spent per
        training step is low.
    Raises:
      ValueError: if `estimator` does not implement Estimator interface,
        or if export_strategies has the wrong type.
    """
        if isinstance(estimator, core_estimator.Estimator):
            self._core_estimator_used = True
            if eval_metrics is not None:
                raise ValueError(
                    "`eval_metrics` must be `None` with `tf.estimator.Estimator`. "
                    "Use `eval_metric_ops` in `tf.estimator.EstimatorSpec` instead."
                )
        else:
            self._core_estimator_used = False
            if not isinstance(estimator, evaluable.Evaluable):
                raise ValueError(
                    "`estimator` must implement `tf.contrib.learn.Evaluable` "
                    "or `tf.estimator.Estimator`.")
            if not isinstance(estimator, trainable.Trainable):
                raise ValueError(
                    "`estimator` must implement `tf.contrib.learn.Trainable`"
                    "or `tf.estimator.`Estimator`.")
            if saving_listeners is not None:
                raise ValueError("`saving_listeners` must be `None` with "
                                 "`tf.contrib.learn.Estimator`.")

        if isinstance(estimator, tpu_estimator.TPUEstimator):
            logging.warn(
                "`Experiment` class cannot work with `tf.contrib.tpu.TPUEstimator`. "
                "Please call `TPUEstimator` train/evaluate directly. \n"
                "Details: `Experiment` class is designed for between-graph "
                "distributed training, while `TPUEstimator` is working in in-graph "
                "distributed mode. Use with care.")

        super(Experiment, self).__init__()
        # Immutable fields.
        self._estimator = estimator
        self._train_input_fn = train_input_fn
        self._eval_input_fn = eval_input_fn
        self._eval_metrics = eval_metrics
        self._train_steps = train_steps
        self._eval_steps = eval_steps
        self._local_eval_frequency = local_eval_frequency
        self._eval_delay_secs = eval_delay_secs
        self._continuous_eval_throttle_secs = continuous_eval_throttle_secs
        self._checkpoint_and_export = checkpoint_and_export
        self._saving_listeners = saving_listeners
        self._min_eval_frequency = min_eval_frequency if (min_eval_frequency
                                                          is not None) else 1
        self._check_interval_secs = check_interval_secs
        self._delay_workers_by_global_step = delay_workers_by_global_step
        self._train_monitors = train_monitors[:] if train_monitors else []
        self._eval_hooks = eval_hooks[:] if eval_hooks else []
        self._set_export_strategies(export_strategies)

        self._train_steps_per_iteration = train_steps_per_iteration
        if (self._train_steps_per_iteration is not None
                and not isinstance(self._train_steps_per_iteration, int)):
            raise ValueError("`train_steps_per_iteration` must be an integer.")
Example #55
0
    def _add_children_recreated_from_config(self, obj, proto, node_id):
        """Recursively records objects recreated from config."""
        # pylint: disable=protected-access
        if node_id in self._traversed_nodes_from_config:
            return
        self._traversed_nodes_from_config.append(node_id)
        obj._maybe_initialize_trackable()
        if isinstance(obj, base_layer.Layer) and not obj.built:
            metadata = json_utils.decode(proto.user_object.metadata)
            self._try_build_layer(obj, node_id,
                                  metadata.get('build_input_shape'))

        # Create list of all possible children
        children = []
        # Look for direct children
        for reference in proto.children:
            obj_child = obj._lookup_dependency(reference.local_name)
            children.append((obj_child, reference.node_id))

        # Add metrics that may have been added to the layer._metrics list.
        # This is stored in the SavedModel as layer.keras_api.layer_metrics in
        # SavedModels created after Tf 2.2.
        metric_list_node_id = self._search_for_child_node(
            node_id, [constants.KERAS_ATTR, 'layer_metrics'],
            raise_error=False)
        if metric_list_node_id is not None and hasattr(obj, '_metrics'):
            obj_metrics = {m.name: m for m in obj._metrics}
            for reference in self._proto.nodes[metric_list_node_id].children:
                metric = obj_metrics.get(reference.local_name)
                if metric is not None:
                    children.append((metric, reference.node_id))

        for (obj_child, child_id) in children:
            child_proto = self._proto.nodes[child_id]

            if not isinstance(obj_child, trackable.Trackable):
                continue
            if (child_proto.user_object.identifier
                    in revived_types.registered_identifiers()):
                setter = revived_types.get_setter(child_proto.user_object)
            elif obj_child._object_identifier in KERAS_OBJECT_IDENTIFIERS:
                setter = _revive_setter
            else:
                setter = setattr
                # pylint: enable=protected-access

            if (child_id in self._nodes_recreated_from_config
                    and self._nodes_recreated_from_config[child_id][0]
                    is not obj_child):
                # This means that the same trackable object is referenced by two
                # different objects that were recreated from the config.
                logging.warn(
                    'Looks like there is an object (perhaps variable or layer)'
                    ' that is shared between different layers/models. This '
                    'may cause issues when restoring the variable values.'
                    'Object: {}'.format(obj_child))
            self._nodes_recreated_from_config[child_id] = (
                obj_child, self._config_node_setter(setter))
            self._all_nodes_recreated_from_config.add(obj_child)
            self._add_children_recreated_from_config(obj_child, child_proto,
                                                     child_id)
Example #56
0
def _graph_mode_decorator(f, args, kwargs):
    """Implement custom gradient decorator for graph mode."""
    # TODO(rsepassi): Add support for kwargs
    if kwargs:
        raise ValueError(
            "The custom_gradient decorator currently supports keywords "
            "arguments only when eager execution is enabled.")
    name = "CustomGradient-%s" % ops.uid()
    args = [ops.convert_to_tensor(x) for x in args]

    # Checking global and local variables attempts to ensure that no non-resource
    # Variables are added to the graph.
    current_var_scope = variable_scope.get_variable_scope()
    before_vars = set([
        v.experimental_ref() for v in current_var_scope.global_variables() +
        current_var_scope.local_variables()
    ])
    with backprop.GradientTape() as tape:
        result, grad_fn = f(*args)
    after_vars = set([
        v.experimental_ref() for v in current_var_scope.global_variables() +
        current_var_scope.local_variables()
    ])
    new_vars = after_vars - before_vars
    new_vars_list = [v.deref() for v in new_vars]
    for v in new_vars_list:
        if not resource_variable_ops.is_resource_variable(v):
            raise TypeError(
                "All variables used by a function wrapped with @custom_gradient must "
                "be `ResourceVariable`s. Ensure that no `variable_scope` is created "
                "with `use_resource=False`.")
    # The variables that grad_fn needs to return gradients for are the set of
    # variables used that are *not* part of the inputs.
    inputs = args
    variables_in_tape = frozenset(
        [v.experimental_ref()
         for v in tape.watched_variables()]) - frozenset(v.experimental_ref()
                                                         for v in inputs)
    variables_in_subgraph = frozenset([
        v.experimental_ref()
        for v in get_dependent_variables(input_ops=inputs, output_ops=result)
    ])
    variables = list(
        [v.deref() for v in variables_in_subgraph.union(variables_in_tape)])

    grad_argspec = tf_inspect.getfullargspec(grad_fn)
    variables_in_signature = ("variables" in grad_argspec.args
                              or grad_argspec.varkw)
    if variables and not variables_in_signature:
        raise TypeError("If using @custom_gradient with a function that "
                        "uses variables, then grad_fn must accept a keyword "
                        "argument 'variables'.")
    if variables_in_signature and not variables:
        # User seems to intend to use variables but none were captured.
        if not variable_scope.get_variable_scope().use_resource:
            raise TypeError(
                "If using @custom_gradient with a function that "
                "uses variables, the enclosing variable scope must "
                "have use_resource=True.")
        else:
            logging.warn(
                "@custom_gradient grad_fn has 'variables' in signature, but "
                "no ResourceVariables were used on the forward pass.")
    flat_result = nest.flatten(result)
    flat_result_len = len(flat_result)

    all_tensors = flat_result + args + variables

    def tape_grad_fn(*result_grads):
        """Custom grad fn wrapper."""
        result_grads = result_grads[:flat_result_len]
        if variables:
            input_grads, variable_grads = grad_fn(*result_grads,
                                                  variables=variables)
            if len(variable_grads) != len(variables):
                raise ValueError("Must return gradient for each variable from "
                                 "@custom_gradient grad_fn.")
        else:
            input_grads = grad_fn(*result_grads)
            variable_grads = []

        # Need to return one value per input to the IdentityN, so pad the
        # gradients of the inputs of the custom_gradient function with the
        # gradients of the outputs as well.
        input_grads = nest.flatten(input_grads)
        return ([None] * flat_result_len) + input_grads + variable_grads

    @ops.RegisterGradient(name)
    def internal_grad_fn(unused_op, *result_grads):  # pylint: disable=unused-variable
        """Custom grad fn wrapper."""
        return tape_grad_fn(*result_grads)

    original_tensors = all_tensors
    with ops.get_default_graph().gradient_override_map({"IdentityN": name}):
        all_tensors = array_ops.identity_n(all_tensors)

    original_tensors = [ops.convert_to_tensor(x) for x in original_tensors]

    # Propagate handle data for happier shape inference for resource variables.
    for i, t in enumerate(original_tensors):
        if t.dtype == dtypes.resource and hasattr(t, "_handle_data"):
            all_tensors[i]._handle_data = t._handle_data  # pylint: disable=protected-access
    tape_lib.record_operation(f.__name__, all_tensors, original_tensors,
                              tape_grad_fn)
    for ot, t in zip(original_tensors, all_tensors):
        copy_handle_data(ot, t)
    return nest.pack_sequence_as(structure=result,
                                 flat_sequence=all_tensors[:flat_result_len])
def embedding_lookup_sparse_with_distributed_aggregation(
    params,
    sp_ids,
    sp_weights,
    partition_strategy="mod",
    name=None,
    combiner=None,
    max_norm=None):
  """Computes embeddings for the given ids and weights.

  Embeddings belonging to same param are aggregated on that device first. This
  op is intended to decrease data transmission and improve parallelism. See
  `tf.nn.embedding_lookup_sparse` for the functionality and example of this op.

  Args:
    params: A single tensor representing the complete embedding tensor, or a
      list of P tensors all of same shape except for the first dimension,
      representing sharded embedding tensors.  Alternatively, a
      `PartitionedVariable`, created by partitioning along dimension 0. Each
      element must be appropriately sized for the given `partition_strategy`.
    sp_ids: N x M SparseTensor of int64 ids (typically from FeatureValueToId),
      where N is typically batch size and M is arbitrary.
    sp_weights: either a SparseTensor of float / double weights, or None to
      indicate all weights should be taken to be 1. If specified, sp_weights
      must have exactly the same shape and indices as sp_ids.
    partition_strategy: A string specifying the partitioning strategy, relevant
      if `len(params) > 1`. Currently `"div"` and `"mod"` are supported. Default
      is `"mod"`. See `tf.nn.embedding_lookup` for more details.
    name: Optional name for the op.
    combiner: A string specifying the reduction op. Currently "mean", "sqrtn"
      and "sum" are supported. "sum" computes the weighted sum of the embedding
      results for each row. "mean" is the weighted sum divided by the total
      weight. "sqrtn" is the weighted sum divided by the square root of the sum
      of the squares of the weights.
    max_norm: If not None, each embedding is normalized to have l2 norm equal to
      max_norm before combining.

  Returns:
    A dense tensor representing the combined embeddings for the
    sparse ids. For each row in the dense tensor represented by sp_ids, the op
    looks up the embeddings for all ids in that row, multiplies them by the
    corresponding weight, and combines these embeddings as specified.

  Raises:
    TypeError: If sp_ids is not a SparseTensor, or if sp_weights is neither
      None nor SparseTensor.
    ValueError: If combiner is not one of {"mean", "sqrtn", "sum"}.
  """
  if combiner is None:
    logging.warn("The default value of combiner will change from \"mean\" "
                 "to \"sqrtn\" after 2016/11/01.")
    combiner = "mean"
  if combiner not in ("mean", "sqrtn", "sum"):
    raise ValueError("combiner must be one of 'mean', 'sqrtn' or 'sum'")
  if isinstance(params, variables.PartitionedVariable):
    params = list(params)  # Iterate to get the underlying Variables.
  if not isinstance(params, list):
    params = [params]
  if not isinstance(sp_ids, sparse_tensor.SparseTensor):
    raise TypeError("sp_ids must be SparseTensor")
  ignore_weights = sp_weights is None
  if not ignore_weights:
    if not isinstance(sp_weights, sparse_tensor.SparseTensor):
      raise TypeError("sp_weights must be either None or SparseTensor")
    sp_ids.values.get_shape().assert_is_compatible_with(
        sp_weights.values.get_shape())
    sp_ids.indices.get_shape().assert_is_compatible_with(
        sp_weights.indices.get_shape())
    sp_ids.dense_shape.get_shape().assert_is_compatible_with(
        sp_weights.dense_shape.get_shape())
    # TODO(yleon): Add enhanced node assertions to verify that sp_ids and
    # sp_weights have equal indices and shapes.

  with ops.name_scope(name, "embedding_lookup_sparse",
                      params + [sp_ids]) as name:
    segment_ids = sp_ids.indices[:, 0]
    if segment_ids.dtype != dtypes.int32:
      segment_ids = math_ops.cast(segment_ids, dtypes.int32)

    ids = sp_ids.values
    if ignore_weights:
      ids, idx = array_ops.unique(ids)
    else:
      idx = None

    weights = None if ignore_weights else sp_weights.values
    embeddings = _embedding_lookup_with_distributed_aggregation(
        params,
        ids,
        partition_strategy=partition_strategy,
        max_norm=max_norm,
        weights=weights,
        idx=idx,
        segment_ids=segment_ids)
    # Set weights to all one if ignore weights.
    if ignore_weights:
      weights = array_ops.fill([array_ops.shape(segment_ids)[0]], 1)
    if weights.dtype != embeddings.dtype:
      weights = math_ops.cast(weights, embeddings.dtype)
    # Reshape weights.
    ones = array_ops.fill(
        array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0), 1)
    bcast_weights_shape = array_ops.concat([array_ops.shape(weights), ones], 0)
    orig_weights_shape = weights.get_shape()
    weights = array_ops.reshape(weights, bcast_weights_shape)
    if embeddings.get_shape().ndims is not None:
      weights.set_shape(
          orig_weights_shape.concatenate(
              [1 for _ in range(embeddings.get_shape().ndims - 1)]))

    if combiner == "mean":
      weight_sum = math_ops.segment_sum(weights, segment_ids)
      embeddings = math_ops.div(embeddings, weight_sum)
    elif combiner == "sqrtn":
      weights_squared = math_ops.pow(weights, 2)
      weight_sum = math_ops.segment_sum(weights_squared, segment_ids)
      weight_sum_sqrt = math_ops.sqrt(weight_sum)
      embeddings = math_ops.div(embeddings, weight_sum_sqrt)
    elif combiner != "sum":
      assert False, "Unrecognized combiner"
    return embeddings
Example #58
0
    def testRecursiveCriticalSectionAccessWithinLoopIsProtected(self):
        cs = critical_section_ops.CriticalSection(shared_name="cs")

        def body_implicit_capture(i, j):
            # This would have caused a deadlock if not for logic in execute
            # that inserts additional control dependencies onto the lock op:
            #   * Loop body argument j is captured by fn()
            #   * i is running in parallel to move forward the execution
            #   * j is not being checked by the predicate function
            #   * output of cs.execute() is returned as next j.
            fn = lambda: j + 1
            return (i + 1, cs.execute(fn))

        (i_n, j_n) = control_flow_ops.while_loop(lambda i, _: i < 1000,
                                                 body_implicit_capture, [0, 0],
                                                 parallel_iterations=25)
        # For consistency between eager and graph mode.
        i_n = array_ops.identity(i_n)
        logging.warn(
            "\n==============\nRunning "
            "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock "
            "body_implicit_capture'\n"
            "==============\n")
        self.assertEqual((1000, 1000), self.evaluate((i_n, j_n)))
        logging.warn(
            "\n==============\nSuccessfully finished running "
            "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock "
            "body_implicit_capture'\n"
            "==============\n")

        def body_implicit_capture_protected(i, j):
            # This version is ok because we manually add a control
            # dependency on j, which is an argument to the while_loop body
            # and captured by fn.
            fn = lambda: j + 1
            with ops.control_dependencies([j]):
                return (i + 1, cs.execute(fn))

        (i_n,
         j_n) = control_flow_ops.while_loop(lambda i, _: i < 1000,
                                            body_implicit_capture_protected,
                                            [0, 0],
                                            parallel_iterations=25)
        # For consistency between eager and graph mode.
        i_n = array_ops.identity(i_n)
        logging.warn(
            "\n==============\nRunning "
            "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock "
            "body_implicit_capture_protected'\n"
            "==============\n")
        self.assertEqual((1000, 1000), self.evaluate((i_n, j_n)))
        logging.warn(
            "\n==============\nSuccessfully finished running "
            "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock "
            "body_implicit_capture_protected'\n"
            "==============\n")

        def body_args_capture(i, j):
            # This version is ok because j is an argument to fn and we can
            # ensure there's a control dependency on j.
            fn = lambda x: x + 1
            return (i + 1, cs.execute(lambda: fn(j)))

        (i_n, j_n) = control_flow_ops.while_loop(lambda i, _: i < 1000,
                                                 body_args_capture, [0, 0],
                                                 parallel_iterations=25)
        # For consistency between eager and graph mode.
        i_n = array_ops.identity(i_n)
        logging.warn(
            "\n==============\nRunning "
            "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock "
            "body_args_capture'\n"
            "==============\n")
        self.assertEqual((1000, 1000), self.evaluate((i_n, j_n)))
        logging.warn(
            "\n==============\nSuccessfully finished running "
            "'testRecursiveCriticalSectionAccessWithinLoopDoesNotDeadlock "
            "body_args_capture'\n"
            "==============\n")
def safe_embedding_lookup_sparse(embedding_weights,
                                 sparse_ids,
                                 sparse_weights=None,
                                 combiner=None,
                                 default_id=None,
                                 name=None,
                                 partition_strategy="div",
                                 max_norm=None):
  """Lookup embedding results, accounting for invalid IDs and empty features.

  The partitioned embedding in `embedding_weights` must all be the same shape
  except for the first dimension. The first dimension is allowed to vary as the
  vocabulary size is not necessarily a multiple of `P`.  `embedding_weights`
  may be a `PartitionedVariable` as returned by using
  `tf.compat.v1.get_variable()` with a
  partitioner.

  Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs
  with non-positive weight. For an entry with no features, the embedding vector
  for `default_id` is returned, or the 0-vector if `default_id` is not supplied.

  The ids and weights may be multi-dimensional. Embeddings are always aggregated
  along the last dimension.

  Args:
    embedding_weights:  A list of `P` float tensors or values representing
      partitioned embedding tensors.  Alternatively, a `PartitionedVariable`,
      created by partitioning along dimension 0.  The total unpartitioned shape
      should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the vocab size
      and `e_1, ..., e_m` are the embedding dimensions.
    sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the
      ids. `d_0` is typically batch size.
    sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing
      float weights corresponding to `sparse_ids`, or `None` if all weights are
      be assumed to be 1.0.
    combiner: A string specifying how to combine embedding results for each
      entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" the
      default.
    default_id: The id to use for an entry with no features.
    name: A name for this operation (optional).
    partition_strategy: A string specifying the partitioning strategy. Currently
      `"div"` and `"mod"` are supported. Default is `"div"`.
    max_norm: If not None, all embeddings are l2-normalized to max_norm before
      combining.

  Returns:
    Dense tensor of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`.

  Raises:
    ValueError: if `embedding_weights` is empty.
  """
  if combiner is None:
    logging.warn("The default value of combiner will change from \"mean\" "
                 "to \"sqrtn\" after 2016/11/01.")
    combiner = "mean"
  if embedding_weights is None:
    raise ValueError("Missing embedding_weights %s." % embedding_weights)
  if isinstance(embedding_weights, variables.PartitionedVariable):
    embedding_weights = list(embedding_weights)  # get underlying Variables.
  if not isinstance(embedding_weights, list):
    embedding_weights = [embedding_weights]
  if len(embedding_weights) < 1:
    raise ValueError("Missing embedding_weights %s." % embedding_weights)

  dtype = sparse_weights.dtype if sparse_weights is not None else None
  if isinstance(embedding_weights, variables.PartitionedVariable):
    embedding_weights = list(embedding_weights)
  embedding_weights = [
      ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights
  ]

  contrib_tensor_util.assert_same_float_dtype(embedding_weights +
                                              [sparse_weights])

  with ops.name_scope(name, "embedding_lookup", embedding_weights +
                      [sparse_ids, sparse_weights]) as scope:
    # Reshape higher-rank sparse ids and weights to linear segment ids.
    original_shape = sparse_ids.dense_shape
    original_rank_dim = tensor_shape.Dimension(
        tensor_shape.dimension_value(sparse_ids.dense_shape.get_shape()[0]))
    original_rank = (
        array_ops.size(original_shape)
        if original_rank_dim.value is None else original_rank_dim.value)
    sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [
        math_ops.reduce_prod(
            array_ops.slice(original_shape, [0], [original_rank - 1])),
        array_ops.gather(original_shape, original_rank - 1)
    ])
    if sparse_weights is not None:
      sparse_weights = sparse_tensor.SparseTensor(sparse_ids.indices,
                                                  sparse_weights.values,
                                                  sparse_ids.dense_shape)

    # Prune invalid ids and weights.
    sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights)
    if combiner != "sum":
      sparse_ids, sparse_weights = _prune_invalid_weights(
          sparse_ids, sparse_weights)

    # Fill in dummy values for empty features, if necessary.
    sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(
        sparse_ids, default_id or 0)
    if sparse_weights is not None:
      sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(sparse_weights, 1.0)

    result = embedding_ops.embedding_lookup_sparse(
        embedding_weights,
        sparse_ids,
        sparse_weights,
        combiner=combiner,
        partition_strategy=partition_strategy,
        name=None if default_id is None else scope,
        max_norm=max_norm)

    if default_id is None:
      # Broadcast is_row_empty to the same shape as embedding_lookup_result,
      # for use in Select.
      is_row_empty = array_ops.tile(
          array_ops.reshape(is_row_empty, [-1, 1]),
          array_ops.stack([1, array_ops.shape(result)[1]]))

      result = array_ops.where(
          is_row_empty, array_ops.zeros_like(result), result, name=scope)

    # Reshape back from linear ids back into higher-dimensional dense result.
    final_result = array_ops.reshape(
        result,
        array_ops.concat([
            array_ops.slice(
                math_ops.cast(original_shape, dtypes.int32), [0],
                [original_rank - 1]),
            array_ops.slice(array_ops.shape(result), [1], [-1])
        ], 0))
    final_result.set_shape(
        tensor_shape.unknown_shape(
            (original_rank_dim - 1).value).concatenate(result.get_shape()[1:]))
    return final_result
Example #60
0
def einsum(equation, *inputs, **kwargs):
    """A generalized contraction between tensors of arbitrary dimension.

  This function returns a tensor whose elements are defined by `equation`,
  which is written in a shorthand form inspired by the Einstein summation
  convention.  As an example, consider multiplying two matrices
  A and B to form a matrix C.  The elements of C are given by:

  ```
    C[i,k] = sum_j A[i,j] * B[j,k]
  ```

  The corresponding `equation` is:

  ```
    ij,jk->ik
  ```

  In general, the `equation` is obtained from the more familiar element-wise
  equation by
    1. removing variable names, brackets, and commas,
    2. replacing "*" with ",",
    3. dropping summation signs, and
    4. moving the output to the right, and replacing "=" with "->".

  Many common operations can be expressed in this way.  For example:

  ```python
  # Matrix multiplication
  >>> einsum('ij,jk->ik', m0, m1)  # output[i,k] = sum_j m0[i,j] * m1[j, k]

  # Dot product
  >>> einsum('i,i->', u, v)  # output = sum_i u[i]*v[i]

  # Outer product
  >>> einsum('i,j->ij', u, v)  # output[i,j] = u[i]*v[j]

  # Transpose
  >>> einsum('ij->ji', m)  # output[j,i] = m[i,j]

  # Trace
  >>> einsum('ii', m)  # output[j,i] = trace(m) = sum_i m[i, i]

  # Batch matrix multiplication
  >>> einsum('aij,ajk->aik', s, t)  # out[a,i,k] = sum_j s[a,i,j] * t[a, j, k]
  ```

  To enable and control broadcasting, use an ellipsis.  For example, to do
  batch matrix multiplication, you could use:

  ```python
  >>> einsum('...ij,...jk->...ik', u, v)
  ```

  This function behaves like `numpy.einsum`, but does not support:

  * Subscripts where an axis appears more than once for a single input
    (e.g. `ijj,k->ik`) unless it is a trace (e.g. `ijji`).

  Args:
    equation: a `str` describing the contraction, in the same format as
      `numpy.einsum`.
    *inputs: the inputs to contract (each one a `Tensor`), whose shapes should
      be consistent with `equation`.
    name: A name for the operation (optional).

  Returns:
    The contracted `Tensor`, with shape determined by `equation`.

  Raises:
    ValueError: If
      - the format of `equation` is incorrect,
      - the number of inputs implied by `equation` does not match `len(inputs)`,
      - an axis appears in the output subscripts but not in any of the inputs,
      - the number of dimensions of an input differs from the number of
        indices in its subscript, or
      - the input shapes are inconsistent along a particular axis.
  """
    name = kwargs.pop('name', None)
    if kwargs:
        raise TypeError(
            'invalid keyword arguments for this function: ' +
            ', '.join([format(key) for key in sorted(list(kwargs.keys()))]))
    with ops.name_scope(name, 'einsum', [equation, inputs]) as name:
        inputs = list(inputs)
        input_shapes = [x.get_shape() for x in inputs]
        input_axis_labels, output_axis_labels = _einsum_parse_and_resolve_equation(
            equation, input_shapes)

        axis_labels = set(''.join(input_axis_labels) + output_axis_labels)

        for a in axis_labels:
            for input_labels in input_axis_labels:
                if (len(input_axis_labels) == 1 and input_labels.count(a) == 2
                        and input_labels == input_labels[::-1]
                        and '->' not in equation):
                    return math_ops.trace(inputs[0])
                if input_labels.count(a) > 1:
                    raise ValueError(
                        'Subscript not supported: an axis appears more than once: %s'
                        % input_labels)
        for a in axis_labels:
            input_count = sum(1 for s in input_axis_labels if a in s)
            if input_count > 2 and a not in output_axis_labels:
                logging.warn(
                    'Falling back to exponential-space implementation of einsum()'
                    ' because index "%s" is summed over more than two inputs.',
                    a)
                return _exponential_space_einsum(equation, *inputs)

        # Use xla_einsum if executing on TPU and if the operation is a 2 input
        # einsum supported by XlaEinsumOp.
        if _enclosing_tpu_context() is not None and len(inputs) == 2:
            return gen_xla_ops.xla_einsum(
                inputs[0], inputs[1], input_axis_labels[0] + ',' +
                input_axis_labels[1] + '->' + output_axis_labels)
        temp = inputs[0]
        temp_axis_labels = input_axis_labels[0]
        for i in xrange(len(inputs) - 1):
            axes_to_sum = (
                set(temp_axis_labels)
                & set(input_axis_labels[i + 1]) - set(output_axis_labels))
            temp, temp_axis_labels = _einsum_reduction(
                temp, temp_axis_labels, inputs[i + 1],
                input_axis_labels[i + 1], axes_to_sum)

        missing_indices = set(temp_axis_labels) - set(output_axis_labels)
        if missing_indices:
            axis = [
                i for i, a in enumerate(temp_axis_labels)
                if a not in output_axis_labels
            ]
            temp = math_ops.reduce_sum(temp, axis=axis)
            temp_axis_labels = ''.join(a for a in temp_axis_labels
                                       if a in output_axis_labels)
        if sorted(temp_axis_labels) != sorted(output_axis_labels):
            raise ValueError('Invalid equation: %s' % equation)

        perm = [temp_axis_labels.index(a) for a in output_axis_labels]
        return _transpose_if_necessary(temp, perm)