def testGroup_MultiDevice(self):
     with ops.Graph().as_default() as g:
         with g.device("/task:0"):
             a = constant_op.constant(0, name="a")
             b = constant_op.constant(0, name="b")
         with g.device("/task:1"):
             c = constant_op.constant(0, name="c")
             d = constant_op.constant(0, name="d")
         with g.device("/task:2"):
             control_flow_ops.group(a.op, b.op, c.op, d.op, name="root")
     gd = g.as_graph_def()
     self.assertProtoEquals(
         """
   node { name: "a" op: "Const" device: "/task:0"}
   node { name: "b" op: "Const" device: "/task:0"}
   node { name: "c" op: "Const" device: "/task:1"}
   node { name: "d" op: "Const" device: "/task:1"}
   node { name: "root/NoOp" op: "NoOp" input: "^a" input: "^b"
          device: "/task:0" }
   node { name: "root/NoOp_1" op: "NoOp" input: "^c" input: "^d"
          device: "/task:1" }
   node { name: "root" op: "NoOp" input: "^root/NoOp" input: "^root/NoOp_1"
          device: "/task:2" }
 """,
         self._StripGraph(gd),
     )
  def benchmarkMatrixInverseOp(self):
    for adjoint in False, True:
      for shape in self.shapes:
        with ops.Graph().as_default(), \
            session.Session(config=benchmark.benchmark_config()) as sess, \
            ops.device("/cpu:0"):
          matrix = self._GenerateMatrix(shape)
          inv = linalg_ops.matrix_inverse(matrix, adjoint=adjoint)
          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(inv),
              min_iters=25,
              name="matrix_inverse_cpu_{shape}_adjoint_{adjoint}".format(
                  shape=shape, adjoint=adjoint))

        if test.is_gpu_available(True):
          with ops.Graph().as_default(), \
              session.Session(config=benchmark.benchmark_config()) as sess, \
              ops.device("/gpu:0"):
            matrix = self._GenerateMatrix(shape)
            inv = linalg_ops.matrix_inverse(matrix, adjoint=adjoint)
            variables.global_variables_initializer().run()
            self.run_op_benchmark(
                sess,
                control_flow_ops.group(inv),
                min_iters=25,
                name="matrix_inverse_gpu_{shape}_adjoint_{adjoint}".format(
                    shape=shape, adjoint=adjoint))
  def benchmarkMatrixSolveLsOp(self):
    run_gpu_test = test_lib.is_gpu_available(True)
    regularizer = 1.0
    for matrix_shape in self.matrix_shapes:
      for num_rhs in 1, 2, matrix_shape[-1]:

        with ops.Graph().as_default(), \
            session.Session(config=benchmark.benchmark_config()) as sess, \
            ops.device("/cpu:0"):
          matrix, rhs = _GenerateTestData(matrix_shape, num_rhs)
          x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer)
          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(x),
              min_iters=25,
              store_memory_usage=False,
              name=("matrix_solve_ls_cpu_shape_{matrix_shape}_num_rhs_{num_rhs}"
                   ).format(matrix_shape=matrix_shape, num_rhs=num_rhs))

        if run_gpu_test and (len(matrix_shape) < 3 or matrix_shape[0] < 513):
          with ops.Graph().as_default(), \
                session.Session(config=benchmark.benchmark_config()) as sess, \
                ops.device("/gpu:0"):
            matrix, rhs = _GenerateTestData(matrix_shape, num_rhs)
            x = linalg_ops.matrix_solve_ls(matrix, rhs, regularizer)
            variables.global_variables_initializer().run()
            self.run_op_benchmark(
                sess,
                control_flow_ops.group(x),
                min_iters=25,
                store_memory_usage=False,
                name=("matrix_solve_ls_gpu_shape_{matrix_shape}_num_rhs_"
                      "{num_rhs}").format(
                          matrix_shape=matrix_shape, num_rhs=num_rhs))
  def benchmarkMatrixBandPartOp(self):
    for shape_ in self.shapes:
      for limits in (-1, -1), (-1, 0), (0, -1), (2, 2):
        with ops.Graph().as_default(), \
            session.Session() as sess, \
            ops.device("/cpu:0"):
          matrix = variables.Variable(array_ops.ones(shape_))
          band = array_ops.matrix_band_part(matrix, limits[0], limits[1])
          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(band),
              min_iters=10,
              name="matrix_band_part_cpu_{shape}_{limits}".format(
                  shape=shape_, limits=limits))

        if test_lib.is_gpu_available(True):
          with ops.Graph().as_default(), \
              session.Session() as sess, \
              ops.device("/gpu:0"):
            matrix = variables.Variable(array_ops.ones(shape_))
            band = array_ops.matrix_band_part(matrix, limits[0], limits[1])
            variables.global_variables_initializer().run()
            self.run_op_benchmark(
                sess,
                control_flow_ops.group(band),
                min_iters=10,
                name="matrix_band_part_gpu_{shape}_{limits}".format(
                    shape=shape_, limits=limits))
Exemple #5
0
  def benchmarkQROp(self):
    for shape_ in self.shapes:
      with ops.Graph().as_default(), \
          session.Session(config=benchmark.benchmark_config()) as sess, \
          ops.device("/cpu:0"):
        matrix_value = np.random.uniform(
            low=-1.0, high=1.0, size=shape_).astype(np.float32)
        matrix = variables.Variable(matrix_value)
        q, r = linalg_ops.qr(matrix)
        variables.global_variables_initializer().run()
        self.run_op_benchmark(
            sess,
            control_flow_ops.group(q, r),
            min_iters=25,
            name="QR_cpu_{shape}".format(shape=shape_))

      if test.is_gpu_available(True):
        with ops.Graph().as_default(), \
            session.Session(config=benchmark.benchmark_config()) as sess, \
            ops.device("/device:GPU:0"):
          matrix_value = np.random.uniform(
              low=-1.0, high=1.0, size=shape_).astype(np.float32)
          matrix = variables.Variable(matrix_value)
          q, r = linalg_ops.qr(matrix)
          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(q, r),
              min_iters=25,
              name="QR_gpu_{shape}".format(shape=shape_))
  def benchmarkCholeskyOp(self):
    for shape in self.shapes:
      with ops.Graph().as_default(), \
          session.Session() as sess, \
          ops.device("/cpu:0"):
        matrix = variables.Variable(self._GenerateMatrix(shape))
        l = linalg_ops.cholesky(matrix)
        variables.global_variables_initializer().run()
        self.run_op_benchmark(
            sess,
            control_flow_ops.group(
                l,),
            min_iters=25,
            name="cholesky_cpu_{shape}".format(shape=shape))

      if test.is_gpu_available(True):
        with ops.Graph().as_default(), \
            session.Session() as sess, \
            ops.device("/device:GPU:0"):
          matrix = variables.Variable(self._GenerateMatrix(shape))
          l = linalg_ops.cholesky(matrix)
          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(
                  l,),
              min_iters=25,
              name="cholesky_gpu_{shape}".format(shape=shape))
  def benchmarkMatrixExponentialOp(self):
    for shape in self.shapes:
      with ops.Graph().as_default(), \
          session.Session() as sess, \
          ops.device("/cpu:0"):
        matrix = self._GenerateMatrix(shape)
        expm = linalg_impl.matrix_exponential(matrix)
        variables.global_variables_initializer().run()
        self.run_op_benchmark(
            sess,
            control_flow_ops.group(expm),
            min_iters=25,
            name="matrix_exponential_cpu_{shape}".format(
                shape=shape))

      if test.is_gpu_available(True):
        with ops.Graph().as_default(), \
            session.Session() as sess, \
            ops.device("/gpu:0"):
          matrix = self._GenerateMatrix(shape)
          expm = linalg_impl.matrix_exponential(matrix)
          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(expm),
              min_iters=25,
              name="matrix_exponential_gpu_{shape}".format(
                  shape=shape))
    def benchmarkTridiagonalMulOp(self):
      devices = [('/cpu:0', 'cpu')]
      if test.is_gpu_available(cuda_only=True):
        devices += [('/gpu:0', 'gpu')]

      for device_option, size_option in itertools.product(devices, self.sizes):
        device_id, device_name = device_option
        m, batch_size, n = size_option

        with ops.Graph().as_default(), \
            session.Session(config=benchmark.benchmark_config()) as sess, \
            ops.device(device_id):
          upper, diag, lower, vec = self._generateData(batch_size, m, n)
          x1 = self.baseline(upper, diag, lower, vec)
          x2 = linalg_impl.tridiagonal_matmul((upper, diag, lower),
                                              vec,
                                              diagonals_format='sequence')

          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(x1),
              min_iters=10,
              store_memory_usage=False,
              name=('tridiagonal_matmul_baseline_%s'
                    '_batch_size_%d_m_%d_n_%d' %
                    (device_name, batch_size, m, n)))

          self.run_op_benchmark(
              sess,
              control_flow_ops.group(x2),
              min_iters=10,
              store_memory_usage=False,
              name=('tridiagonal_matmul_%s_batch_size_%d_m_%d_n_%d' %
                    (device_name, batch_size, m, n)))
  def benchmarkMatrixInverseOp(self):
    for adjoint in False, True:
      for size in self.sizes:
        data = self._GenerateData(size)

        with ops.Graph().as_default(), \
            session.Session() as sess, \
            ops.device("/cpu:0"):
          inv = linalg_ops.matrix_inverse(data, adjoint=adjoint)
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(inv),
              min_iters=25,
              name="matrix_inverse_cpu_{size}_{adjoint}".format(
                  size=size, adjoint="adjoint" if adjoint else "noadjoint"))

        if test.is_gpu_available(True):
          with ops.Graph().as_default(), \
              session.Session() as sess, \
              ops.device("/gpu:0"):
            inv = linalg_ops.matrix_inverse(data, adjoint=adjoint)
            self.run_op_benchmark(
                sess,
                control_flow_ops.group(inv),
                min_iters=25,
                name="matrix_inverse_gpu_{size}_{adjoint}".format(
                    size=size, adjoint="adjoint" if adjoint else "noadjoint"))
  def benchmarkMatrixDeterminantOp(self):
    for shape in self.shapes:
      with ops.Graph().as_default(), session.Session(
          config=benchmark.benchmark_config()) as sess, ops.device("/cpu:0"):
        matrix = self._GenerateMatrix(shape)
        d = linalg_ops.matrix_determinant(matrix)
        variables.global_variables_initializer().run()
        self.run_op_benchmark(
            sess,
            control_flow_ops.group(
                d,),
            min_iters=25,
            name="matrix_determinant_cpu_{shape}".format(shape=shape))

      if test.is_gpu_available(True):
        with ops.Graph().as_default(), session.Session(
            config=benchmark.benchmark_config()) as sess, ops.device("/gpu:0"):
          matrix = self._GenerateMatrix(shape)
          d = linalg_ops.matrix_determinant(matrix)
          variables.global_variables_initializer().run()
          self.run_op_benchmark(
              sess,
              control_flow_ops.group(
                  d,),
              min_iters=25,
              name="matrix_determinant_gpu_{shape}".format(shape=shape))
  def make_ops_and_vars_round_robin(self, scope=None, cov_devices=None,
                                    inv_devices=None):
    """Make ops and vars with a round-robin device placement strategy.

    For each factor, all of that factor's cov variables and their associated
    update ops will be placed on a particular device.  A new device is chosen
    for each factor by cycling through list of devices in the cov_devices
    argument. If cov_devices is None then no explicit device placement occurs.

    An analogous strategy is followed for inverse update ops, with the list of
    devices being given by the inv_devices argument.

    Inverse variables on the other hand are not placed on any specific device
    (they will just use the current the device placement context, whatever
    that happens to be).  The idea is that the inverse variable belong where
    they will be accessed most often, which is the device that actually applies
    the preconditioner to the gradient. The user will be responsible for setting
    the device context for this.

    Args:
      scope: A string or None.  If None it will be set to the name of this
        estimator (given by the name property). All variables will be created,
        and all ops will execute, inside of a variable scope of the given
        name. (Default: None)
      cov_devices: Iterable of device strings (e.g. '/gpu:0'). Covariance
        computations will be placed on these devices in a round-robin fashion.
        Can be None, which means that no devices are specified.
      inv_devices: Iterable of device strings (e.g. '/gpu:0'). Inversion
        computations will be placed on these devices in a round-robin fashion.
        Can be None, which means that no devices are specified.

    Returns:
      cov_update_ops: List of ops that compute the cov updates. Corresponds
        one-to-one with the list of factors given by the "factors" property.
      cov_update_op: cov_update_ops grouped into a single op.
      inv_update_ops: List of ops that compute the inv updates. Corresponds
        one-to-one with the list of factors given by the "factors" property.
      inv_update_op: inv_update_ops grouped into a single op.
      cov_update_thunks: Thunks that make the ops in cov_update_ops.
      inv_update_thunks: Thunks that make the ops in inv_update_ops.
    """
    (cov_update_thunks,
     inv_update_thunks) = self.make_vars_and_create_op_thunks_round_robin(
         scope=scope,
         cov_devices=cov_devices,
         inv_devices=inv_devices)
    cov_update_ops = [thunk() for thunk in cov_update_thunks]
    inv_update_ops = [thunk() for thunk in inv_update_thunks]

    scope = self.name if scope is None else scope
    with variable_scope.variable_scope(scope):
      cov_update_op = control_flow_ops.group(cov_update_ops,
                                             name="cov_update_op")
      inv_update_op = control_flow_ops.group(inv_update_ops,
                                             name="inv_update_op")

    return (cov_update_ops, cov_update_op, inv_update_ops, inv_update_op,
            cov_update_thunks, inv_update_thunks)
  def _create_transient_vars(self):
    """Creates local cache of factors, weights and gramian for rows and columns.

    Note that currently the caching strategy is as follows:
    When initiating a row (resp. column) update:
      - The column (resp. row) gramian is computed.
      - Optionally, if use_gramian_cache is True, the column (resp. row) Gramian
        is cached, while the row (resp. column) gramian is reset.
      - Optionally, if use_factors_weights_cache is True, the column (resp. row)
        factors and weights are cached, while the row (resp. column) factors and
        weights are reset.
    """

    (self._row_factors_cache, row_factors_cache_init,
     row_factors_cache_reset) = self._cached_copy(
         self._row_factors,
         "row_factors_cache",
         pass_through=not self._use_factors_weights_cache)
    (self._col_factors_cache, col_factors_cache_init,
     col_factors_cache_reset) = self._cached_copy(
         self._col_factors,
         "col_factors_cache",
         pass_through=not self._use_factors_weights_cache)
    (self._row_wt_cache, row_wt_cache_init, _) = self._cached_copy(
        self._row_weights,
        "row_wt_cache",
        pass_through=not self._use_factors_weights_cache)
    (self._col_wt_cache, col_wt_cache_init, _) = self._cached_copy(
        self._col_weights,
        "col_wt_cache",
        pass_through=not self._use_factors_weights_cache)
    (self._row_gramian_cache, row_gramian_cache_init,
     row_gramian_cache_reset) = self._cached_copy(
         self._row_gramian,
         "row_gramian_cache",
         pass_through=not self._use_gramian_cache)
    (self._col_gramian_cache, col_gramian_cache_init,
     col_gramian_cache_reset) = self._cached_copy(
         self._col_gramian,
         "col_gramian_cache",
         pass_through=not self._use_gramian_cache)

    self._row_updates_init = control_flow_ops.group(col_factors_cache_init,
                                                    row_factors_cache_reset,
                                                    col_gramian_cache_init,
                                                    row_gramian_cache_reset)
    self._col_updates_init = control_flow_ops.group(row_factors_cache_init,
                                                    col_factors_cache_reset,
                                                    row_gramian_cache_init,
                                                    col_gramian_cache_reset)

    if self._row_wt_cache is not None:
      assert self._col_wt_cache is not None
      self._worker_init = control_flow_ops.group(
          row_wt_cache_init, col_wt_cache_init, name="worker_init")
    else:
      self._worker_init = control_flow_ops.no_op(name="worker_init")
Exemple #13
0
  def head_ops(self, features, target, mode, train_op_fn, logits=None,
               logits_input=None):
    """Returns ops for a model_fn.

    Args:
      features: input dict.
      target: target dict or tensor.
      mode: estimator's ModeKeys
      train_op_fn: function that takes a scalar loss and returns an op to
          optimize with the loss.
      logits: logits to be used for the head.
      logits_input: tensor to build logits from.

    Returns:
      `estimator.ModelFnOps`

    Raises:
      ValueError: if mode is not recognized.
    """
    _check_logits_input_not_supported(logits, logits_input)
    if mode == estimator.ModeKeys.TRAIN:
      loss, additional_train_op = self._training_loss(features, target,
                                                      logits, logits_input)

      train_op = train_op_fn(loss)

      if additional_train_op:
        if train_op:
          train_op = control_flow_ops.group(train_op, *additional_train_op)
        else:
          train_op = control_flow_ops.group(*additional_train_op)

      return estimator.ModelFnOps(
          mode=estimator.ModeKeys.TRAIN,
          loss=loss,
          training_op=train_op,
          default_metrics=self._default_metric(),
          signature_fn=self._create_signature_fn())

    if mode == estimator.ModeKeys.INFER:
      return estimator.ModelFnOps(
          mode=estimator.ModeKeys.INFER,
          predictions=self._infer_op(logits, logits_input),
          default_metrics=self._default_metric(),
          signature_fn=self._create_signature_fn())

    if mode == estimator.ModeKeys.EVAL:
      predictions, loss = self._eval_op(features, target, logits, logits_input)
      return estimator.ModelFnOps(
          mode=estimator.ModeKeys.EVAL,
          predictions=predictions,
          loss=loss,
          default_metrics=self._default_metric(),
          signature_fn=self._create_signature_fn())

    raise ValueError("mode=%s unrecognized." % str(mode))
Exemple #14
0
 def _resource_apply_sparse(self, grad, var, indices):
   var_dtype = var.dtype.base_dtype
   lr_t = self._decayed_lr(var_dtype)
   rms = self.get_slot(var, "rms")
   rho = self._get_hyper("rho", var_dtype)
   momentum = self._get_hyper("momentum", var_dtype)
   epsilon = self._get_hyper("epsilon", var_dtype)
   if self._momentum:
     mom = self.get_slot(var, "momentum")
     if self.centered:
       mg = self.get_slot(var, "mg")
       return training_ops.resource_sparse_apply_centered_rms_prop(
           var.handle,
           mg.handle,
           rms.handle,
           mom.handle,
           lr_t,
           rho,
           momentum,
           epsilon,
           grad,
           indices,
           use_locking=self._use_locking)
     else:
       return training_ops.resource_sparse_apply_rms_prop(
           var.handle,
           rms.handle,
           mom.handle,
           lr_t,
           rho,
           momentum,
           epsilon,
           grad,
           indices,
           use_locking=self._use_locking)
   else:
     rms_scaled_g_values = (grad * grad) * (1. - rho)
     rms_t = state_ops.assign(rms, rms * rho, use_locking=self._use_locking)
     with ops.control_dependencies([rms_t]):
       rms_t = self._resource_scatter_add(rms, indices, rms_scaled_g_values)
       rms_slice = array_ops.gather(rms_t, indices)
     denom_slice = rms_slice
     if self.centered:
       mg = self.get_slot(var, "mg")
       mg_scaled_g_values = grad * (1. - rho)
       mg_t = state_ops.assign(mg, mg * rho, use_locking=self._use_locking)
       with ops.control_dependencies([mg_t]):
         mg_t = self._resource_scatter_add(mg, indices, mg_scaled_g_values)
         mg_slice = array_ops.gather(mg_t, indices)
         denom_slice = rms_slice - math_ops.square(mg_slice)
     var_update = self._resource_scatter_add(
         var, indices, -lr_t * grad / (math_ops.sqrt(denom_slice) + epsilon))
     if self.centered:
       return control_flow_ops.group(*[var_update, rms_t, mg_t])
     return control_flow_ops.group(*[var_update, rms_t])
 def testPassingList(self):
   with ops.Graph().as_default() as g:
     a = constant_op.constant(0, name="a")
     b = constant_op.constant(0, name="b")
     control_flow_ops.group([a.op, b.op], name="root")
   gd = g.as_graph_def()
   self.assertProtoEquals("""
     node { name: "a" op: "Const"}
     node { name: "b" op: "Const"}
     node { name: "root" op: "NoOp" input: "^a" input: "^b" }
   """, self._StripGraph(gd))
 def testGroup_OneDevice(self):
   with ops.Graph().as_default() as g:
     with g.device("/task:0"):
       a = constant_op.constant(0, name="a")
       b = constant_op.constant(0, name="b")
     control_flow_ops.group(a.op, b.op, name="root")
   gd = g.as_graph_def()
   self.assertProtoEquals("""
     node { name: "a" op: "Const" device: "/task:0" }
     node { name: "b" op: "Const" device: "/task:0" }
     node { name: "root" op: "NoOp" input: "^a" input: "^b" device: "/task:0" }
   """, self._StripGraph(gd))
 def testGroup_NoDevices(self):
   with ops.Graph().as_default() as g:
     a = constant_op.constant(0, name="a")
     b = constant_op.constant(0, name="b")
     c = constant_op.constant(0, name="c")
     control_flow_ops.group(a.op, b.op, c.op, name="root")
   gd = g.as_graph_def()
   self.assertProtoEquals("""
     node { name: "a" op: "Const"}
     node { name: "b" op: "Const"}
     node { name: "c" op: "Const"}
     node { name: "root" op: "NoOp" input: "^a" input: "^b" input: "^c" }
   """, self._StripGraph(gd))
def randn_sampler_switchover(shape, num_iters, use_gpu=False):
  # Benchmark by constructing samplers on the threshold of using the randn
  # rejection sampling and check that this threshold is set correctly by
  # benchmarking with bounds just above and below this threshold.
  # The uniform and randn samplers should have about the same performance
  # at this point.

  stddev_inside_bounds_before_using_randn = (
      _get_stddev_inside_bounds_before_using_randn(use_gpu))

  epsilon = 0.001

  np.random.seed(1618)  # Make it reproducible.

  # No CSE/CF.
  optimizer_options = config_pb2.OptimizerOptions(
      opt_level=config_pb2.OptimizerOptions.L0)
  config = config_pb2.ConfigProto(
      graph_options=config_pb2.GraphOptions(
          optimizer_options=optimizer_options))

  with session.Session(config=config) as sess:
    with ops.device("/cpu:0" if not use_gpu else "/gpu:0"):
      uniform_sampler_op = control_flow_ops.group(
          random_ops.parameterized_truncated_normal(
              shape,
              means=0.,
              stddevs=1.0,
              minvals=-stddev_inside_bounds_before_using_randn + epsilon,
              maxvals=0.01))
      randn_sampler_op = control_flow_ops.group(
          random_ops.parameterized_truncated_normal(
              shape,
              means=0.,
              stddevs=1.0,
              minvals=-stddev_inside_bounds_before_using_randn - epsilon,
              maxvals=0.01))

    # Burn-in to avoid session setup costs in the timing.
    sess.run(uniform_sampler_op)
    sess.run(uniform_sampler_op)
    uniform_dt = timeit.timeit(
        lambda: sess.run(uniform_sampler_op), number=num_iters)

    sess.run(randn_sampler_op)
    sess.run(randn_sampler_op)
    randn_dt = timeit.timeit(
        lambda: sess.run(randn_sampler_op), number=num_iters)

    return randn_dt, uniform_dt
def build_graph(device, dtype, data_format, input_shape, filter_shape, strides,
                padding, num_iters, warmup_iters):
  """builds a graph containing a sequence of conv2d operations.

  Args:
    device: String, the device to run on.
    dtype: Data type for the convolution.
    data_format: A string from: "NHWC" or "NCHW". Data format for input and
                 output data.
    input_shape: Shape of the input tensor.
    filter_shape: Shape of the filter tensor.
    strides: A list of ints. 1-D of length 4. The stride of sliding
             window for each dimension of input.
    padding: A string from: "SAME", "VALID". The type of padding
             algorithm to use.
    num_iters: number of iterations to run conv2d.
    warmup_iters: number of iterations for warmup runs.

  Returns:
    An array of tensors to run()
  """
  with ops.device("/%s:0" % device):
    inp = variables.Variable(
        random_ops.truncated_normal(input_shape, dtype=dtype))
    filt = variables.Variable(
        random_ops.truncated_normal(filter_shape, dtype=dtype))

    outputs = []
    conv2d_op = nn_ops.conv2d(
        inp, filt, strides, padding, data_format=data_format)
    outputs.append(conv2d_op)
    for _ in range(1, num_iters):
      with ops.control_dependencies([conv2d_op]):
        conv2d_op = nn_ops.conv2d(
            inp, filt, strides, padding, data_format=data_format)
        outputs.append(conv2d_op)

    warmup_groups = []
    warmup_conv2d_op = nn_ops.conv2d(
        inp, filt, strides, padding, data_format=data_format)
    warmup_groups.append(warmup_conv2d_op)
    for _ in range(1, warmup_iters):
      with ops.control_dependencies([warmup_conv2d_op]):
        warmup_conv2d_op = nn_ops.conv2d(
            inp, filt, strides, padding, data_format=data_format)
        warmup_groups.append(warmup_conv2d_op)
    return control_flow_ops.group(*warmup_groups), control_flow_ops.group(
        *outputs)
Exemple #20
0
 def restore(self, restored_tensors, restored_shapes):
   if (self._cudnn_rnn.direction == CUDNN_RNN_UNIDIRECTION and
       self._cudnn_rnn.rnn_mode == CUDNN_LSTM):
     if len(restored_tensors) % 4 != 0:
       raise ValueError(
           "Invalid count of restored_tensors, expecting a multiple of 4.")
     weights = restored_tensors[:len(restored_tensors) // 4]
     biases = restored_tensors[len(restored_tensors) // 4:]
   elif (self._cudnn_rnn.direction == CUDNN_RNN_UNIDIRECTION and
         self._cudnn_rnn.rnn_mode == CUDNN_GRU):
     if len(restored_tensors) % 8 != 0:
       raise ValueError(
           "Invalid count of restored_tensors, expecting a multiple of 8.")
     weights = restored_tensors[:len(restored_tensors) // 8 * 3]
     biases = restored_tensors[len(restored_tensors) // 8 * 3:]
   else:
     weights = restored_tensors[:len(restored_tensors) // 2]
     biases = restored_tensors[len(restored_tensors) // 2:]
   weights, biases = self._untransform_canonical(weights, biases)
   params = self._canonical_to_params(weights, biases)
   if not isinstance(params, tuple):
     params = (params,)
   assign_ops = [
       state_ops.assign(variable, param, validate_shape=False)
       for variable, param in zip(self._variables, params)
   ]
   return control_flow_ops.group(*assign_ops)
Exemple #21
0
 def _apply_sparse_shared(self, grad, var, indices, scatter_add):
   beta1_power = math_ops.cast(self._beta1_power, var.dtype.base_dtype)
   beta2_power = math_ops.cast(self._beta2_power, var.dtype.base_dtype)
   lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
   beta1_t = math_ops.cast(self._beta1_t, var.dtype.base_dtype)
   beta2_t = math_ops.cast(self._beta2_t, var.dtype.base_dtype)
   epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
   lr = (lr_t * math_ops.sqrt(1 - beta2_power) / (1 - beta1_power))
   # m_t = beta1 * m + (1 - beta1) * g_t
   m = self.get_slot(var, "m")
   m_scaled_g_values = grad * (1 - beta1_t)
   m_t = state_ops.assign(m, m * beta1_t,
                          use_locking=self._use_locking)
   with ops.control_dependencies([m_t]):
     m_t = scatter_add(m, indices, m_scaled_g_values)
   # v_t = beta2 * v + (1 - beta2) * (g_t * g_t)
   v = self.get_slot(var, "v")
   v_scaled_g_values = (grad * grad) * (1 - beta2_t)
   v_t = state_ops.assign(v, v * beta2_t, use_locking=self._use_locking)
   with ops.control_dependencies([v_t]):
     v_t = scatter_add(v, indices, v_scaled_g_values)
   v_sqrt = math_ops.sqrt(v_t)
   var_update = state_ops.assign_sub(var,
                                     lr * m_t / (v_sqrt + epsilon_t),
                                     use_locking=self._use_locking)
   return control_flow_ops.group(*[var_update, m_t, v_t])
def _auc_hist_accumulate(hist_true, hist_false, nbins, collections):
  """Accumulate histograms in new variables."""
  with variable_scope.variable_op_scope(
      [hist_true, hist_false], None, 'hist_accumulate'):
    # Holds running total histogram of scores for records labeled True.
    hist_true_acc = variable_scope.get_variable(
        'hist_true_acc',
        initializer=array_ops.zeros_initializer(
            [nbins],
            dtype=hist_true.dtype),
        collections=collections,
        trainable=False)
    # Holds running total histogram of scores for records labeled False.
    hist_false_acc = variable_scope.get_variable(
        'hist_false_acc',
        initializer=array_ops.zeros_initializer(
            [nbins],
            dtype=hist_false.dtype),
        collections=collections,
        trainable=False)

    update_op = control_flow_ops.group(
        hist_true_acc.assign_add(hist_true),
        hist_false_acc.assign_add(hist_false),
        name='update_op')

    return hist_true_acc, hist_false_acc, update_op
def build_graph(device, input_shape, perm, datatype, num_iters):
  """builds a graph containing a sequence of conv2d operations.

  Args:
    device: String, the device to run on.
    input_shape: Shape of the input tensor.
    perm: A list of ints with the same length as input tensor's dimension.
    datatype: numpy data type of the input tensor.
    num_iters: number of iterations to run transpose.

  Returns:
    An array of tensors to run()
  """
  with ops.device("/%s:0" % device):
    total_size = np.prod(input_shape)
    inp = np.arange(1, total_size + 1, dtype=datatype).reshape(input_shape)
    t = constant_op.constant(inp, shape=input_shape)

    outputs = []
    transpose_op = array_ops.transpose(t, perm)
    outputs.append(transpose_op)
    for _ in range(1, num_iters):
      with ops.control_dependencies([transpose_op]):
        transpose_op = array_ops.transpose(t, perm)
        outputs.append(transpose_op)
    return control_flow_ops.group(*outputs)
  def _between_graph_with_monitored_session(self, strategy):
    context = distribute_coordinator_context.get_current_worker_context()
    self.assertTrue(context is not None)
    with ops.device("/job:ps/task:0"):
      # TODO(yuefengz): investigate why not using resource variable will make
      # the test flaky.
      x = variable_scope.get_variable("xx", initializer=10.0, use_resource=True)
    with ops.device("/job:ps/task:1"):
      y = variable_scope.get_variable("yy", initializer=20.0, use_resource=True)

    x_add = x.assign_add(2.0)
    y_sub = y.assign_sub(2.0)
    train_op = control_flow_ops.group([x_add, y_sub])

    # The monitored session will run init or ready ops.
    with monitored_session.MonitoredSession() as sess:
      sess.run(train_op)

      # Synchronize workers after one step to make sure they all have finished
      # training.
      if context.has_barrier:
        context.wait_for_other_workers()
      else:
        self._barrier.wait()

      x_val, y_val = sess.run([x, y])

    self.assertEqual(x_val, 16.0)
    self.assertEqual(y_val, 14.0)
    if x_val == 16.0 and y_val == 14.0:
      with self._lock:
        self._result_correct += 1
    def _get_train_ops(self, features, targets):
        """See base class."""
        global_step = contrib_variables.get_global_step()
        assert global_step
        logits = self._logits(features, is_training=True)
        if self._enable_centered_bias:
            centered_bias_step = [self._centered_bias_step(targets, features)]
        else:
            centered_bias_step = []
        with ops.control_dependencies(centered_bias_step):
            loss = self._loss(logits, targets, features)
        logging_ops.scalar_summary("loss", loss)

        linear_vars = self._get_linear_vars()
        dnn_vars = self._get_dnn_vars()
        grads = gradients.gradients(loss, dnn_vars + linear_vars)
        if self._gradient_clip_norm:
            grads, _ = clip_ops.clip_by_global_norm(grads, self._gradient_clip_norm)

        dnn_grads = grads[0 : len(dnn_vars)]
        linear_grads = grads[len(dnn_vars) :]

        train_ops = self._get_linear_training_ops(linear_grads, linear_vars) + self._get_dnn_training_ops(
            dnn_grads, dnn_vars
        )

        train_step = control_flow_ops.group(*train_ops, name="combined_training_op")
        with ops.control_dependencies([train_step]):
            with ops.get_default_graph().colocate_with(global_step):
                return state_ops.assign_add(global_step, 1).op, loss
 def loop_body(j):
   ns1 = tf.scatter_update(select1, j, 10.0)
   ns2 = tf.scatter_update(select2, j, 10.0)
   nj = tf.add(j, 1)
   op = control_flow_ops.group(ns1, ns2)
   nj = control_flow_ops.with_dependencies([op], nj)
   return [nj]
  def _AddShardedRestoreOps(self, filename_tensor, per_device,
                            restore_sequentially, reshape):
    """Add Ops to save variables from multiple devices.

    Args:
      filename_tensor: Tensor for the path of the file to load.
      per_device: A list of (device, _VarToSave) pairs, as
        returned by _GroupByDevices().
      restore_sequentially: True if we want to restore variables sequentially
        within a shard.
      reshape: True if we want to reshape loaded tensors to the shape of
        the corresponding variable.

    Returns:
      An Operation that restores the variables.
    """
    sharded_restores = []
    for shard, (device, vars_to_save) in enumerate(per_device):
      with ops.device(device):
        sharded_restores.append(self._AddRestoreOps(
            filename_tensor,
            vars_to_save,
            restore_sequentially,
            reshape,
            preferred_shard=shard,
            name="restore_shard"))
    return control_flow_ops.group(*sharded_restores, name="restore_all")
    def apply(self, var_list=None):
        # TODO(touts): op_scope
        if var_list is None:
            var_list = variables.trainable_variables()
        for var in var_list:
            if var.dtype.base_dtype not in [dtypes.float32, dtypes.float64]:
                raise TypeError(
                    "The variables must be float or double: %s" % var)
            if var in self._averages:
                raise ValueError(
                    "Moving average already computed for: %s" % var)

            # For variables: to lower communication bandwidth across devices we keep
            # the moving averages on the same device as the variables. For other
            # tensors, we rely on the existing device allocation mechanism.
            if isinstance(var, variables.Variable):
                avg = slot_creator.create_slot(
                    var, var.initialized_value(), self._name,
                    colocate_with_primary=True)
            else:
                avg = slot_creator.create_zeros_slot(
                    var, self._name, colocate_with_primary=(var.op.type == "Variable"))
            self._averages[var] = avg

        with ops.name_scope(self._name) as scope:
            decay = self._num_updates / (self._num_updates + 1)
            updates = []
            updates.append(self._num_updates_op)
            for var in var_list:
                updates.append(assign_moving_average(
                    self._averages[var], var, decay))
            return control_flow_ops.group(*updates, name=scope)
def build_graph(device, n, m, k, transpose_a, transpose_b, dtype):
  """Build a graph containing a sequence of matmul operations.

  Args:
    device: String, the device to run on.
    n: tensor A's first dimension size.
    m: tensor A's second dimension size.
    k: tensor B's second dimension size.
    transpose_a: boolean value to show if tensor A is transposed.
    transpose_b: boolean value to show if tensor B is transposed.
    dtype: numpy data type of the input tensor.

  Returns:
    A matmul operation to run()
  """
  with ops.device('%s' % device):
    if not transpose_a:
      x = variables.VariableV1(random_ops.random_uniform([n, m], dtype=dtype),
                               use_resource=False)
    else:
      x = variables.VariableV1(random_ops.random_uniform([m, n], dtype=dtype),
                               use_resource=False)
    if not transpose_b:
      y = variables.VariableV1(random_ops.random_uniform([m, k], dtype=dtype),
                               use_resource=False)
    else:
      y = variables.VariableV1(random_ops.random_uniform([k, m], dtype=dtype),
                               use_resource=False)

    z = math_ops.matmul(x, y, transpose_a=transpose_a, transpose_b=transpose_b)
    return control_flow_ops.group(z)
Exemple #30
0
  def _get_train_ops(self, features, targets):
    """Method that builds model graph and returns trainer ops.

    Args:
      features: `Tensor` or `dict` of `Tensor` objects.
      targets: `Tensor` or `dict` of `Tensor` objects.

    Returns:
      Tuple of train `Operation` and loss `Tensor`.
    """
    features, spec = data_ops.ParseDataTensorOrDict(features)
    labels = data_ops.ParseLabelTensorOrDict(targets)

    graph_builder = self.graph_builder_class(
        self.params, device_assigner=self.device_assigner,
        **self.construction_args)

    epoch = None
    if self.data_feeder:
      epoch = self.data_feeder.make_epoch_variable()

    train = control_flow_ops.group(
        graph_builder.training_graph(
            features, labels, data_spec=spec, epoch=epoch,
            **self.training_args),
        state_ops.assign_add(contrib_framework.get_global_step(), 1))

    self.training_loss = graph_builder.training_loss()

    return train, self.training_loss
Exemple #31
0
def evaluation_loop(master,
                    checkpoint_dir,
                    logdir,
                    num_evals=1,
                    eval_op=None,
                    eval_op_feed_dict=None,
                    final_op=None,
                    final_op_feed_dict=None,
                    summary_op=_USE_DEFAULT,
                    summary_op_feed_dict=None,
                    variables_to_restore=None,
                    eval_interval_secs=60,
                    max_number_of_evaluations=None,
                    session_config=None):
    """Runs TF-Slim's Evaluation Loop.

  Args:
    master: The BNS address of the TensorFlow master.
    checkpoint_dir: The directory where checkpoints are stored.
    logdir: The directory where the TensorFlow summaries are written to.
    num_evals: The number of times to run `eval_op`.
    eval_op: A operation run `num_evals` times.
    eval_op_feed_dict: The feed dictionary to use when executing the `eval_op`.
    final_op: An operation to execute after all of the `eval_op` executions. The
      value of `final_op` is returned.
    final_op_feed_dict: A feed dictionary to use when executing `final_op`.
    summary_op: The summary_op to evaluate after running TF-Slims metric ops. By
      default the summary_op is set to tf.merge_all_summaries().
    summary_op_feed_dict: An optional feed dictionary to use when running the
      `summary_op`.
    variables_to_restore: A list of TensorFlow variables to restore during
      evaluation. If the argument is left as `None` then
      slim.variables.GetVariablesToRestore() is used.
    eval_interval_secs: The minimum number of seconds between evaluations.
    max_number_of_evaluations: the max number of iterations of the evaluation.
      If the value is left as 'None', the evaluation continues indefinitely.
    session_config: An instance of `tf.ConfigProto` that will be used to
      configure the `Session`. If left as `None`, the default will be used.

  Returns:
    The value of `final_op` or `None` if `final_op` is `None`.
  """
    if summary_op == _USE_DEFAULT:
        summary_op = logging_ops.merge_all_summaries()

    global_step = variables.get_or_create_global_step()

    init_op = control_flow_ops.group(tf_variables.initialize_all_variables(),
                                     tf_variables.initialize_local_variables(),
                                     data_flow_ops.initialize_all_tables())

    saver = tf_saver.Saver(variables_to_restore
                           or variables.get_variables_to_restore())

    summary_writer = summary_io.SummaryWriter(logdir)

    sv = supervisor.Supervisor(graph=ops.get_default_graph(),
                               logdir=logdir,
                               init_op=init_op,
                               summary_op=None,
                               summary_writer=None,
                               global_step=None,
                               saver=saver)

    last_checkpoint = None
    number_of_evaluations = 0
    while True:
        last_checkpoint = wait_for_new_checkpoint(checkpoint_dir,
                                                  last_checkpoint)
        start = time.time()
        logging.info('Starting evaluation at ' +
                     time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))

        with sv.managed_session(master,
                                start_standard_services=False,
                                config=session_config) as sess:
            sv.saver.restore(sess, last_checkpoint)
            sv.start_queue_runners(sess)
            final_op_value = evaluation(
                sess,
                num_evals=num_evals,
                eval_op=eval_op,
                eval_op_feed_dict=eval_op_feed_dict,
                final_op=final_op,
                final_op_feed_dict=final_op_feed_dict,
                summary_op=summary_op,
                summary_op_feed_dict=summary_op_feed_dict,
                summary_writer=summary_writer,
                global_step=global_step)

        logging.info('Finished evaluation at ' +
                     time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))
        number_of_evaluations += 1
        if (max_number_of_evaluations
                and number_of_evaluations >= max_number_of_evaluations):
            logging.info('Reached max_number_of_evaluations=%s. Exit',
                         max_number_of_evaluations)
            break

        time_to_next_eval = start + eval_interval_secs - time.time()
        if time_to_next_eval > 0:
            time.sleep(time_to_next_eval)

    return final_op_value
Exemple #32
0
    def minimize(self, global_step=None, name=None):
        """Add operations to train a linear model by minimizing the loss function.

    Args:
      global_step: Optional `Variable` to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.

    Returns:
      An Operation that updates the variables passed in the constructor.
    """
        # Technically, the op depends on a lot more than the variables,
        # but we'll keep the list short.
        with op_scope([], name, 'sdca/minimize'):
            sparse_features_indices = []
            sparse_features_values = []
            for sf in self._examples['sparse_features']:
                sparse_features_indices.append(convert_to_tensor(sf.indices))
                sparse_features_values.append(convert_to_tensor(sf.values))

            example_ids_hashed = _sdca_ops.sdca_fprint(
                convert_to_tensor(self._examples['example_ids']))
            example_state_data = self._hashtable.lookup(example_ids_hashed)

            example_state_data_updated = _sdca_ops.sdca_solver(
                sparse_features_indices,
                sparse_features_values,
                self._convert_n_to_tensor(self._examples['dense_features']),
                convert_to_tensor(self._examples['example_weights']),
                convert_to_tensor(self._examples['example_labels']),
                self._convert_n_to_tensor(
                    self._slots['unshrinked_sparse_features_weights'],
                    as_ref=True),
                self._convert_n_to_tensor(
                    self._slots['unshrinked_dense_features_weights'],
                    as_ref=True),
                example_state_data,
                l1=self._options['symmetric_l1_regularization'],
                l2=self._symmetric_l2_regularization(),
                # TODO(sibyl-Aix6ihai): Provide empirical evidence for this. It is better
                # to run more than one iteration on single mini-batch as we want to
                # spend more time in compute. SDCA works better with larger
                # mini-batches and there is also recent work that shows its better to
                # reuse old samples than train on new samples.
                # See: http://arxiv.org/abs/1602.02136.
                num_inner_iterations=2,
                loss_type=self._options['loss_type'])
            with ops.control_dependencies([example_state_data_updated]):
                insert_op = self._hashtable.insert(example_ids_hashed,
                                                   example_state_data_updated)
                update_ops = [insert_op]
                for name in [
                        'sparse_features_weights', 'dense_features_weights'
                ]:
                    for var, slot_var in zip(self._variables[name],
                                             self._slots['unshrinked_' +
                                                         name]):
                        update_ops.append(var.assign(slot_var))
                update_group = control_flow_ops.group(*update_ops)
                with ops.control_dependencies([update_group]):
                    shrink_l1 = _sdca_ops.sdca_shrink_l1(
                        self._convert_n_to_tensor(
                            self._variables['sparse_features_weights'],
                            as_ref=True),
                        self._convert_n_to_tensor(
                            self._variables['dense_features_weights'],
                            as_ref=True),
                        l1=self._options['symmetric_l1_regularization'],
                        l2=self._symmetric_l2_regularization())
            if not global_step:
                return shrink_l1
            with ops.control_dependencies([shrink_l1]):
                return state_ops.assign_add(global_step, 1, name=name).op
 def _default_local_init_op():
   return control_flow_ops.group(variables.local_variables_initializer(),
                                 data_flow_ops.initialize_all_tables())
Exemple #34
0
    def _model_fn(features, labels, mode):
        """Function that returns predictions, training loss, and training op."""
        if (isinstance(features, ops.Tensor)
                or isinstance(features, sparse_tensor.SparseTensor)):
            features = {'features': features}
        weights = None
        if weights_name and weights_name in features:
            weights = features.pop(weights_name)

        keys = None
        if keys_name and keys_name in features:
            keys = features.pop(keys_name)

        # If we're doing eval, optionally ignore device_assigner.
        # Also ignore device assigner if we're exporting (mode == INFER)
        dev_assn = device_assigner
        if (mode == model_fn_lib.ModeKeys.INFER
                or (local_eval and mode == model_fn_lib.ModeKeys.EVAL)):
            dev_assn = None

        graph_builder = graph_builder_class(params, device_assigner=dev_assn)

        logits = graph_builder.inference_graph(features)
        # For binary classification problems, convert probabilities to logits.
        # Includes hack to get around the fact that a probability might be 0 or 1.
        if not params.regression and params.num_classes == 2:
            class_1_probs = array_ops.slice(logits, [0, 1], [-1, 1])
            logits = math_ops.log(
                math_ops.maximum(
                    class_1_probs /
                    math_ops.maximum(1.0 - class_1_probs, EPSILON), EPSILON))

        # labels might be None if we're doing prediction (which brings up the
        # question of why we force everything to adhere to a single model_fn).
        training_graph = None
        training_hooks = []
        if labels is not None and mode == model_fn_lib.ModeKeys.TRAIN:
            with ops.control_dependencies([logits.op]):
                training_graph = control_flow_ops.group(
                    graph_builder.training_graph(features,
                                                 labels,
                                                 input_weights=weights,
                                                 num_trainers=num_trainers,
                                                 trainer_id=trainer_id),
                    state_ops.assign_add(contrib_framework.get_global_step(),
                                         1))

        # Put weights back in
        if weights is not None:
            features[weights_name] = weights

        # TensorForest's training graph isn't calculated directly from the loss
        # like many other models.
        def _train_fn(unused_loss):
            return training_graph

        model_ops = model_head.create_model_fn_ops(features=features,
                                                   labels=labels,
                                                   mode=mode,
                                                   train_op_fn=_train_fn,
                                                   logits=logits,
                                                   scope=head_scope)

        if report_feature_importances:
            training_hooks.append(
                TensorForestRunOpAtEndHook({
                    'feature_importances':
                    graph_builder.feature_importances()
                }))

        if early_stopping_rounds:
            training_hooks.append(
                TensorForestLossHook(
                    early_stopping_rounds,
                    early_stopping_loss_threshold=early_stopping_loss_threshold,
                    loss_op=model_ops.loss))

        model_ops.training_hooks.extend(training_hooks)

        if keys is not None:
            model_ops.predictions[keys_name] = keys

        return model_ops
    def _experimental_run_steps_on_iterator(self,
                                            fn,
                                            iterator,
                                            iterations,
                                            initial_loop_values=None):
        if initial_loop_values is None:
            initial_loop_values = {}
        initial_loop_values = nest.flatten(initial_loop_values)

        ctx = values.MultiStepContext()

        def body(i, *args):
            """A wrapper around `fn` to create the while loop body."""
            del args
            fn_inputs = iterator.get_next()
            if not isinstance(fn_inputs, tuple):
                fn_inputs = (fn_inputs, )
            fn_result = fn(ctx, fn_inputs)
            for (name, output) in ctx.last_step_outputs.items():
                # Convert all outputs to tensors, potentially from `DistributedValues`.
                ctx.last_step_outputs[name] = self._unwrap(output)
            flat_last_step_outputs = nest.flatten(ctx.last_step_outputs)
            with ops.control_dependencies([fn_result]):
                return [i + 1] + flat_last_step_outputs

        # We capture the control_flow_context at this point, before we run `fn`
        # inside a while_loop. This is useful in cases where we might need to exit
        # these contexts and get back to the outer context to do some things, for
        # e.g. create an op which should be evaluated only once at the end of the
        # loop on the host. One such usage is in creating metrics' value op.
        self._outer_control_flow_context = (
            ops.get_default_graph()._get_control_flow_context())  # pylint: disable=protected-access

        cond = lambda i, *args: i < iterations
        i = constant_op.constant(0)
        loop_result = control_flow_ops.while_loop(cond,
                                                  body,
                                                  [i] + initial_loop_values,
                                                  name="",
                                                  parallel_iterations=1,
                                                  back_prop=False,
                                                  swap_memory=False,
                                                  return_same_structure=True)
        del self._outer_control_flow_context

        ctx.run_op = control_flow_ops.group(loop_result)

        # Convert the last_step_outputs from a list to the original dict structure
        # of last_step_outputs.
        last_step_tensor_outputs = loop_result[1:]
        last_step_tensor_outputs_dict = nest.pack_sequence_as(
            ctx.last_step_outputs, last_step_tensor_outputs)

        for name, reduce_op in ctx._last_step_outputs_reduce_ops.items():  # pylint: disable=protected-access
            output = last_step_tensor_outputs_dict[name]
            # For outputs that have already been reduced, wrap them in a Mirrored
            # container, else in a PerReplica container.
            if reduce_op is None:
                last_step_tensor_outputs_dict[name] = values.regroup(
                    {d: t
                     for d, t in zip(self._devices, output)},
                    values.PerReplica)
            else:
                assert len(output) == 1
                last_step_tensor_outputs_dict[name] = output[0]

        ctx._set_last_step_outputs(last_step_tensor_outputs_dict)  # pylint: disable=protected-access
        return ctx
    def training_graph(self,
                       input_data,
                       input_labels,
                       num_trainers=1,
                       trainer_id=0,
                       **tree_kwargs):
        """Constructs a TF graph for training a random forest.

    Args:
      input_data: A tensor or dict of string->Tensor for input data.
      input_labels: A tensor or placeholder for labels associated with
        input_data.
      num_trainers: Number of parallel trainers to split trees among.
      trainer_id: Which trainer this instance is.
      **tree_kwargs: Keyword arguments passed to each tree's training_graph.

    Returns:
      The last op in the random forest training graph.

    Raises:
      NotImplementedError: If trying to use bagging with sparse features.
    """
        processed_dense_features, processed_sparse_features, data_spec = (
            data_ops.ParseDataTensorOrDict(input_data))

        if input_labels is not None:
            labels = data_ops.ParseLabelTensorOrDict(input_labels)

        data_spec = data_spec or self.get_default_data_spec(input_data)

        tree_graphs = []
        trees_per_trainer = self.params.num_trees / num_trainers
        tree_start = int(trainer_id * trees_per_trainer)
        tree_end = int((trainer_id + 1) * trees_per_trainer)
        for i in range(tree_start, tree_end):
            with ops.device(self.variables.device_dummies[i].device):
                seed = self.params.base_random_seed
                if seed != 0:
                    seed += i
                # If using bagging, randomly select some of the input.
                tree_data = processed_dense_features
                tree_labels = labels
                if self.params.bagging_fraction < 1.0:
                    # TODO(gilberth): Support bagging for sparse features.
                    if processed_sparse_features is not None:
                        raise NotImplementedError(
                            'Bagging not supported with sparse features.')
                    # TODO(thomaswc): This does sampling without replacement.  Consider
                    # also allowing sampling with replacement as an option.
                    batch_size = array_ops.strided_slice(
                        array_ops.shape(processed_dense_features), [0], [1])
                    r = random_ops.random_uniform(batch_size, seed=seed)
                    mask = math_ops.less(
                        r,
                        array_ops.ones_like(r) * self.params.bagging_fraction)
                    gather_indices = array_ops.squeeze(array_ops.where(mask),
                                                       squeeze_dims=[1])
                    # TODO(thomaswc): Calculate out-of-bag data and labels, and store
                    # them for use in calculating statistics later.
                    tree_data = array_ops.gather(processed_dense_features,
                                                 gather_indices)
                    tree_labels = array_ops.gather(labels, gather_indices)
                if self.params.bagged_features:
                    if processed_sparse_features is not None:
                        raise NotImplementedError(
                            'Feature bagging not supported with sparse features.'
                        )
                    tree_data = self._bag_features(i, tree_data)

                tree_graphs.append(self.trees[i].training_graph(
                    tree_data,
                    tree_labels,
                    seed,
                    data_spec=data_spec,
                    sparse_features=processed_sparse_features,
                    **tree_kwargs))

        return control_flow_ops.group(*tree_graphs, name='train')
Exemple #37
0
 def initializer(self):
     # return grouped ops of all the var initializations of component values of
     # the mirrored variable
     return control_flow_ops.group(
         [v.initializer for v in self._index.values()])
Exemple #38
0
  def _call_for_each_tower(self, fn, *args, **kwargs):
    kwargs.pop('run_concurrently', None)

    inputs = {'args': args, 'kwargs': kwargs}
    flat_inputs = nest.flatten(inputs)

    feed_mask = [isinstance(f, values.PerIteration) for f in flat_inputs]

    feeds = lambda: itertools.compress(flat_inputs, feed_mask)
    shapes = [f.get_shape() for f in feeds()]
    if any([not s.is_fully_defined() for s in shapes]):
      raise ValueError(
          'TPU currently requires fully defined shapes. Either use '
          'set_shape() on the input tensors or use '
          'dataset.apply(map_and_batch(..., drop_remainder=True)).')
    types = [f.get_dtype() for f in feeds()]

    def infeed_input(i):
      """Get input, split it and then enqueue."""
      iteration_inputs = [f.get(i) for f in feeds()]

      infeed_inputs = [[inputs_per_core[core_id]
                        for inputs_per_core in iteration_inputs]
                       for core_id in range(self._num_cores_per_host)]

      infeed_ops = []
      for core_id, infeed_input in enumerate(infeed_inputs):
        infeed_ops.append(
            tpu_ops.infeed_enqueue_tuple(
                inputs=infeed_input, shapes=shapes, device_ordinal=core_id))

      with ops.control_dependencies(infeed_ops):
        return i + 1

    with ops.device('/task:0/device:CPU:0'):
      enqueue_ops = control_flow_ops.while_loop(
          lambda i: i < self._iterations_per_step,
          infeed_input, [constant_op.constant(0)],
          parallel_iterations=1)

    def dequeueing_fn(*args, **kwargs):
      """Dequeue input arguments and supply them to `fn`."""
      del args, kwargs
      dequeued = tpu.infeed_dequeue_tuple(dtypes=types, shapes=shapes)
      dequeued = iter(dequeued)

      fn_inputs = []
      for inp, is_feed in zip(flat_inputs, feed_mask):
        if is_feed:
          fn_inputs.append(next(dequeued))
        else:
          fn_inputs.append(inp)

      fn_inputs = nest.pack_sequence_as(inputs, fn_inputs)
      return fn(*fn_inputs['args'], **fn_inputs['kwargs'])

    def iterate_on_tpu():
      return tpu.repeat(self._iterations_per_step, dequeueing_fn, [])

    with one_device_strategy._OneDeviceTowerContext(self):  # pylint: disable=protected-access
      tpu_result = tpu.batch_parallel(
          iterate_on_tpu, [], num_shards=self._num_cores_per_host)

    return control_flow_ops.group(tpu_result, enqueue_ops)
Exemple #39
0
 def initializer(self):
     return control_flow_ops.group(
         [iterator.initializer for iterator in self._iterators.values()])
def initialize_iterator(iterator, distribution_strategy):
  with distribution_strategy.scope():
    init_op = control_flow_ops.group(iterator.initialize())
    if not context.executing_eagerly():
      K.get_session((init_op,)).run(init_op)
Exemple #41
0
def update_confusion_matrix_variables(variables_to_update,
                                      y_true,
                                      y_pred,
                                      thresholds,
                                      top_k=None,
                                      class_id=None,
                                      sample_weight=None,
                                      multi_label=False,
                                      label_weights=None):
    """Returns op to update the given confusion matrix variables.

  For every pair of values in y_true and y_pred:

  true_positive: y_true == True and y_pred > thresholds
  false_negatives: y_true == True and y_pred <= thresholds
  true_negatives: y_true == False and y_pred <= thresholds
  false_positive: y_true == False and y_pred > thresholds

  The results will be weighted and added together. When multiple thresholds are
  provided, we will repeat the same for every threshold.

  For estimation of these metrics over a stream of data, the function creates an
  `update_op` operation that updates the given variables.

  If `sample_weight` is `None`, weights default to 1.
  Use weights of 0 to mask values.

  Args:
    variables_to_update: Dictionary with 'tp', 'fn', 'tn', 'fp' as valid keys
      and corresponding variables to update as values.
    y_true: A `Tensor` whose shape matches `y_pred`. Will be cast to `bool`.
    y_pred: A floating point `Tensor` of arbitrary shape and whose values are in
      the range `[0, 1]`.
    thresholds: A float value, float tensor, python list, or tuple of float
      thresholds in `[0, 1]`, or NEG_INF (used when top_k is set).
    top_k: Optional int, indicates that the positive labels should be limited to
      the top k predictions.
    class_id: Optional int, limits the prediction and labels to the class
      specified by this argument.
    sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as
      `y_true`, and must be broadcastable to `y_true` (i.e., all dimensions must
      be either `1`, or the same as the corresponding `y_true` dimension).
    multi_label: Optional boolean indicating whether multidimensional
      prediction/labels should be treated as multilabel responses, or flattened
      into a single label. When True, the valus of `variables_to_update` must
      have a second dimension equal to the number of labels in y_true and
      y_pred, and those tensors must not be RaggedTensors.
    label_weights: (optional) tensor of non-negative weights for multilabel
      data. The weights are applied when calculating TP, FP, FN, and TN without
      explicit multilabel handling (i.e. when the data is to be flattened).

  Returns:
    Update op.

  Raises:
    ValueError: If `y_pred` and `y_true` have mismatched shapes, or if
      `sample_weight` is not `None` and its shape doesn't match `y_pred`, or if
      `variables_to_update` contains invalid keys.
  """
    if multi_label and label_weights is not None:
        raise ValueError(
            '`label_weights` for multilabel data should be handled '
            'outside of `update_confusion_matrix_variables` when '
            '`multi_label` is True.')
    if variables_to_update is None:
        return
    if not any(key
               for key in variables_to_update if key in list(ConfusionMatrix)):
        raise ValueError(
            'Please provide at least one valid confusion matrix '
            'variable to update. Valid variable key options are: "{}". '
            'Received: "{}"'.format(list(ConfusionMatrix),
                                    variables_to_update.keys()))

    variable_dtype = list(variables_to_update.values())[0].dtype

    y_true = math_ops.cast(y_true, dtype=variable_dtype)
    y_pred = math_ops.cast(y_pred, dtype=variable_dtype)
    thresholds = ops.convert_to_tensor_v2(thresholds, dtype=variable_dtype)
    num_thresholds = thresholds.shape[0]
    if multi_label:
        one_thresh = math_ops.equal(math_ops.cast(1, dtype=dtypes.int32),
                                    array_ops.rank(thresholds),
                                    name='one_set_of_thresholds_cond')
    else:
        [y_pred, y_true
         ], _ = ragged_assert_compatible_and_get_flat_values([y_pred, y_true],
                                                             sample_weight)
        one_thresh = math_ops.cast(True, dtype=dtypes.bool)

    invalid_keys = [
        key for key in variables_to_update if key not in list(ConfusionMatrix)
    ]
    if invalid_keys:
        raise ValueError(
            'Invalid keys: {}. Valid variable key options are: "{}"'.format(
                invalid_keys, list(ConfusionMatrix)))

    with ops.control_dependencies([
            check_ops.assert_greater_equal(y_pred,
                                           math_ops.cast(0.0,
                                                         dtype=y_pred.dtype),
                                           message='predictions must be >= 0'),
            check_ops.assert_less_equal(y_pred,
                                        math_ops.cast(1.0, dtype=y_pred.dtype),
                                        message='predictions must be <= 1')
    ]):
        if sample_weight is None:
            y_pred, y_true = tf_losses_utils.squeeze_or_expand_dimensions(
                y_pred, y_true)
        else:
            y_pred, y_true, sample_weight = (
                tf_losses_utils.squeeze_or_expand_dimensions(
                    y_pred, y_true, sample_weight=sample_weight))
    y_pred.shape.assert_is_compatible_with(y_true.shape)

    if top_k is not None:
        y_pred = _filter_top_k(y_pred, top_k)
    if class_id is not None:
        y_true = y_true[..., class_id]
        y_pred = y_pred[..., class_id]

    pred_shape = array_ops.shape(y_pred)
    num_predictions = pred_shape[0]
    if y_pred.shape.ndims == 1:
        num_labels = 1
    else:
        num_labels = gen_math_ops.Prod(input=pred_shape[1:], axis=0)
    thresh_label_tile = control_flow_ops.cond(
        one_thresh, lambda: num_labels,
        lambda: math_ops.cast(1, dtype=dtypes.int32))

    # Reshape predictions and labels, adding a dim for thresholding.
    if multi_label:
        predictions_extra_dim = array_ops.expand_dims(y_pred, 0)
        labels_extra_dim = array_ops.expand_dims(
            math_ops.cast(y_true, dtype=dtypes.bool), 0)
    else:
        # Flatten predictions and labels when not multilabel.
        predictions_extra_dim = array_ops.reshape(y_pred, [1, -1])
        labels_extra_dim = array_ops.reshape(
            math_ops.cast(y_true, dtype=dtypes.bool), [1, -1])

    # Tile the thresholds for every prediction.
    if multi_label:
        thresh_pretile_shape = [num_thresholds, 1, -1]
        thresh_tiles = [1, num_predictions, thresh_label_tile]
        data_tiles = [num_thresholds, 1, 1]
    else:
        thresh_pretile_shape = [num_thresholds, -1]
        thresh_tiles = [1, num_predictions * num_labels]
        data_tiles = [num_thresholds, 1]

    thresh_tiled = array_ops.tile(
        array_ops.reshape(thresholds, thresh_pretile_shape),
        array_ops.stack(thresh_tiles))

    # Tile the predictions for every threshold.
    preds_tiled = array_ops.tile(predictions_extra_dim, data_tiles)

    # Compare predictions and threshold.
    pred_is_pos = math_ops.greater(preds_tiled, thresh_tiled)

    # Tile labels by number of thresholds
    label_is_pos = array_ops.tile(labels_extra_dim, data_tiles)

    if sample_weight is not None:
        sample_weight = weights_broadcast_ops.broadcast_weights(
            math_ops.cast(sample_weight, dtype=variable_dtype), y_pred)
        weights_tiled = array_ops.tile(
            array_ops.reshape(sample_weight, thresh_tiles), data_tiles)
    else:
        weights_tiled = None

    if label_weights is not None and not multi_label:
        label_weights = array_ops.expand_dims(label_weights, 0)
        label_weights = weights_broadcast_ops.broadcast_weights(
            label_weights, y_pred)
        label_weights_tiled = array_ops.tile(
            array_ops.reshape(label_weights, thresh_tiles), data_tiles)
        if weights_tiled is None:
            weights_tiled = label_weights_tiled
        else:
            weights_tiled = math_ops.multiply(weights_tiled,
                                              label_weights_tiled)

    update_ops = []

    def weighted_assign_add(label, pred, weights, var):
        label_and_pred = math_ops.cast(math_ops.logical_and(label, pred),
                                       dtype=var.dtype)
        if weights is not None:
            label_and_pred *= math_ops.cast(weights, dtype=var.dtype)
        return var.assign_add(math_ops.reduce_sum(label_and_pred, 1))

    loop_vars = {
        ConfusionMatrix.TRUE_POSITIVES: (label_is_pos, pred_is_pos),
    }
    update_tn = ConfusionMatrix.TRUE_NEGATIVES in variables_to_update
    update_fp = ConfusionMatrix.FALSE_POSITIVES in variables_to_update
    update_fn = ConfusionMatrix.FALSE_NEGATIVES in variables_to_update

    if update_fn or update_tn:
        pred_is_neg = math_ops.logical_not(pred_is_pos)
        loop_vars[ConfusionMatrix.FALSE_NEGATIVES] = (label_is_pos,
                                                      pred_is_neg)

    if update_fp or update_tn:
        label_is_neg = math_ops.logical_not(label_is_pos)
        loop_vars[ConfusionMatrix.FALSE_POSITIVES] = (label_is_neg,
                                                      pred_is_pos)
        if update_tn:
            loop_vars[ConfusionMatrix.TRUE_NEGATIVES] = (label_is_neg,
                                                         pred_is_neg)

    for matrix_cond, (label, pred) in loop_vars.items():

        if matrix_cond in variables_to_update:
            update_ops.append(
                weighted_assign_add(label, pred, weights_tiled,
                                    variables_to_update[matrix_cond]))

    return control_flow_ops.group(update_ops)
def assign_from_checkpoint_fusion(model_path_1, var_list_1,model_path_2,var_list_2):
  """Creates an operation to assign specific variables from a checkpoint.

  Args:
	model_path: The full path to the model checkpoint. To get latest checkpoint
		use `model_path = tf.train.latest_checkpoint(checkpoint_dir)`
	var_list: A list of `Variable` objects or a dictionary mapping names in the
		checkpoint to the corresponding variables to initialize. If empty or
		None, it would return  no_op(), None.

  Returns:
	the restore_op and the feed_dict that need to be run to restore var_list.

  Raises:
	ValueError: If the checkpoint specified at `model_path` is missing one of
	  the variables in `var_list`.
  """
  reader_1 = pywrap_tensorflow.NewCheckpointReader(model_path_1)
  reader_2 = pywrap_tensorflow.NewCheckpointReader(model_path_2)

  if isinstance(var_list_1, (tuple, list)):
	var_list_1 = {var.op.name: var for var in var_list_1}

  if isinstance(var_list_2, (tuple, list)):
	 var_list_2 = {var.op.name: var for var in var_list_2}

  feed_dict = {}
  assign_ops = []

  for checkpoint_var_name_1 in var_list_1:
	checkpoint_var_name_1_r = checkpoint_var_name_1.decode("utf-8").replace(u"vgg_19_face", "vgg_19")
	var = var_list_1[checkpoint_var_name_1]
	if not reader_1.has_tensor(checkpoint_var_name_1_r):
	  raise ValueError(
		  'Checkpoint is missing variable [%s]' % checkpoint_var_name_1_r)

	var_value = reader_1.get_tensor(checkpoint_var_name_1_r)
	placeholder_name = 'placeholder/' + var.op.name
	placeholder_value = array_ops.placeholder(
		dtype=var.dtype.base_dtype,
		shape=var.get_shape(),
		name=placeholder_name)
	assign_ops.append(var.assign(placeholder_value))

	if var.get_shape() != var_value.shape:
	  raise ValueError(
		  'Total size of new array must be unchanged for %s '
		  'lh_shape: [%s], rh_shape: [%s]'
		  % (checkpoint_var_name_1_r, str(var_value.shape), str(var.get_shape())))

	feed_dict[placeholder_value] = var_value.reshape(var.get_shape())

  for checkpoint_var_name_2 in var_list_2:
	checkpoint_var_name_2_r = checkpoint_var_name_2.decode("utf-8").replace(u"vgg_19_iris", "vgg_19")
	var = var_list_2[checkpoint_var_name_2]
	if not reader_2.has_tensor(checkpoint_var_name_2_r):
	  raise ValueError(
		  'Checkpoint is missing variable [%s]' % checkpoint_var_name_2_r)

	var_value = reader_2.get_tensor(checkpoint_var_name_2_r)
	placeholder_name = 'placeholder/' + var.op.name
	placeholder_value = array_ops.placeholder(
		dtype=var.dtype.base_dtype,
		shape=var.get_shape(),
		name=placeholder_name)
	assign_ops.append(var.assign(placeholder_value))

	if var.get_shape() != var_value.shape:
	  raise ValueError(
		  'Total size of new array must be unchanged for %s '
		  'lh_shape: [%s], rh_shape: [%s]'
		  % (checkpoint_var_name_2_r, str(var_value.shape), str(var.get_shape())))

	feed_dict[placeholder_value] = var_value.reshape(var.get_shape())

  assign_op = control_flow_ops.group(*assign_ops)
  return assign_op, feed_dict
 def change_weights_op(self, v_new, w_new):
     return control_flow_ops.group(
         [self.v.assign(v_new),
          self.w.assign(w_new)])
 def default_init_op():
   return control_flow_ops.group(
       variables.global_variables_initializer(),
       resources.initialize_resources(resources.shared_resources()))
Exemple #45
0
    def _resource_apply_sparse(self, grad, var, indices, apply_state=None):
        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = ((apply_state or {}).get((var_device, var_dtype))
                        or self._fallback_apply_state(var_device, var_dtype))

        rms = self.get_slot(var, "rms")
        if self._momentum:
            mom = self.get_slot(var, "momentum")
            if self.centered:
                mg = self.get_slot(var, "mg")
                return training_ops.resource_sparse_apply_centered_rms_prop(
                    var.handle,
                    mg.handle,
                    rms.handle,
                    mom.handle,
                    coefficients["lr_t"],
                    coefficients["rho"],
                    coefficients["momentum"],
                    coefficients["epsilon"],
                    grad,
                    indices,
                    use_locking=self._use_locking)
            else:
                return training_ops.resource_sparse_apply_rms_prop(
                    var.handle,
                    rms.handle,
                    mom.handle,
                    coefficients["lr_t"],
                    coefficients["rho"],
                    coefficients["momentum"],
                    coefficients["epsilon"],
                    grad,
                    indices,
                    use_locking=self._use_locking)
        else:
            rms_scaled_g_values = (grad * grad) * coefficients["one_minus_rho"]
            rms_t = state_ops.assign(rms,
                                     rms * coefficients["rho"],
                                     use_locking=self._use_locking)
            with ops.control_dependencies([rms_t]):
                rms_t = self._resource_scatter_add(rms, indices,
                                                   rms_scaled_g_values)
                rms_slice = array_ops.gather(rms_t, indices)
            denom_slice = rms_slice
            if self.centered:
                mg = self.get_slot(var, "mg")
                mg_scaled_g_values = grad * coefficients["one_minus_rho"]
                mg_t = state_ops.assign(mg,
                                        mg * coefficients["rho"],
                                        use_locking=self._use_locking)
                with ops.control_dependencies([mg_t]):
                    mg_t = self._resource_scatter_add(mg, indices,
                                                      mg_scaled_g_values)
                    mg_slice = array_ops.gather(mg_t, indices)
                    denom_slice = rms_slice - math_ops.square(mg_slice)
            var_update = self._resource_scatter_add(
                var, indices, coefficients["neg_lr_t"] * grad /
                (math_ops.sqrt(denom_slice) + coefficients["epsilon"]))
            if self.centered:
                return control_flow_ops.group(*[var_update, rms_t, mg_t])
            return control_flow_ops.group(*[var_update, rms_t])
 def just_update_steps():
   # When bad_steps is incremented, good_step is reset.
   return control_flow_ops.group(
       state_ops.assign_add(self._num_bad_steps, 1),
       state_ops.assign(self._num_good_steps, 0))
 def _reset_stats(self):
   return control_flow_ops.group(
       state_ops.assign(self._num_good_steps, 0),
       state_ops.assign(self._num_bad_steps, 0))
Exemple #48
0
def _wals_factorization_model_function(features, labels, mode, params):
    """Model function for the WALSFactorization estimator.

  Args:
    features: Dictionary of features. See WALSMatrixFactorization.
    labels: Must be None.
    mode: A model_fn.ModeKeys object.
    params: Dictionary of parameters containing arguments passed to the
      WALSMatrixFactorization constructor.

  Returns:
    A ModelFnOps object.

  Raises:
    ValueError: If `mode` is not recognized.
  """
    assert labels is None
    use_factors_weights_cache = (
        params["use_factors_weights_cache_for_training"]
        and mode == model_fn.ModeKeys.TRAIN)
    use_gramian_cache = (params["use_gramian_cache_for_training"]
                         and mode == model_fn.ModeKeys.TRAIN)
    max_sweeps = params["max_sweeps"]
    model = factorization_ops.WALSModel(
        params["num_rows"],
        params["num_cols"],
        params["embedding_dimension"],
        unobserved_weight=params["unobserved_weight"],
        regularization=params["regularization_coeff"],
        row_init=params["row_init"],
        col_init=params["col_init"],
        num_row_shards=params["num_row_shards"],
        num_col_shards=params["num_col_shards"],
        row_weights=params["row_weights"],
        col_weights=params["col_weights"],
        use_factors_weights_cache=use_factors_weights_cache,
        use_gramian_cache=use_gramian_cache)

    # Get input rows and cols. We either update rows or columns depending on
    # the value of row_sweep, which is maintained using a session hook.
    input_rows = features[WALSMatrixFactorization.INPUT_ROWS]
    input_cols = features[WALSMatrixFactorization.INPUT_COLS]

    # TRAIN mode:
    if mode == model_fn.ModeKeys.TRAIN:
        # Training consists of the following ops (controlled using a SweepHook).
        # Before a row sweep:
        #   row_update_prep_gramian_op
        #   initialize_row_update_op
        # During a row sweep:
        #   update_row_factors_op
        # Before a col sweep:
        #   col_update_prep_gramian_op
        #   initialize_col_update_op
        # During a col sweep:
        #   update_col_factors_op

        is_row_sweep_var = variable_scope.variable(
            True,
            trainable=False,
            name="is_row_sweep",
            collections=[ops.GraphKeys.GLOBAL_VARIABLES])
        is_sweep_done_var = variable_scope.variable(
            False,
            trainable=False,
            name="is_sweep_done",
            collections=[ops.GraphKeys.GLOBAL_VARIABLES])
        completed_sweeps_var = variable_scope.variable(
            0,
            trainable=False,
            name=WALSMatrixFactorization.COMPLETED_SWEEPS,
            collections=[ops.GraphKeys.GLOBAL_VARIABLES])
        loss_var = variable_scope.variable(
            0.,
            trainable=False,
            name=WALSMatrixFactorization.LOSS,
            collections=[ops.GraphKeys.GLOBAL_VARIABLES])
        # The root weighted squared error =
        #   \sqrt( \sum_{i,j} w_ij * (a_ij - r_ij)^2 / \sum_{i,j} w_ij )
        rwse_var = variable_scope.variable(
            0.,
            trainable=False,
            name=WALSMatrixFactorization.RWSE,
            collections=[ops.GraphKeys.GLOBAL_VARIABLES])

        summary.scalar("loss", loss_var)
        summary.scalar("root_weighted_squared_error", rwse_var)
        summary.scalar("completed_sweeps", completed_sweeps_var)

        def create_axis_ops(sp_input, num_items, update_fn, axis_name):
            """Creates book-keeping and training ops for a given axis.

      Args:
        sp_input: A SparseTensor corresponding to the row or column batch.
        num_items: An integer, the total number of items of this axis.
        update_fn: A function that takes one argument (`sp_input`), and that
        returns a tuple of
          * new_factors: A float Tensor of the factor values after update.
          * update_op: a TensorFlow op which updates the factors.
          * loss: A float Tensor, the unregularized loss.
          * reg_loss: A float Tensor, the regularization loss.
          * sum_weights: A float Tensor, the sum of factor weights.
        axis_name: A string that specifies the name of the axis.

      Returns:
        A tuple consisting of:
          * reset_processed_items_op: A TensorFlow op, to be run before the
            beginning of any sweep. It marks all items as not-processed.
          * axis_train_op: A Tensorflow op, to be run during this axis' sweeps.
      """
            processed_items_init = array_ops.fill(dims=[num_items],
                                                  value=False)
            with ops.colocate_with(processed_items_init):
                processed_items = variable_scope.variable(
                    processed_items_init,
                    collections=[ops.GraphKeys.GLOBAL_VARIABLES],
                    trainable=False,
                    name="processed_" + axis_name)
            _, update_op, loss, reg, sum_weights = update_fn(sp_input)
            input_indices = sp_input.indices[:, 0]
            with ops.control_dependencies([
                    update_op,
                    state_ops.assign(loss_var, loss + reg),
                    state_ops.assign(rwse_var,
                                     math_ops.sqrt(loss / sum_weights))
            ]):
                with ops.colocate_with(processed_items):
                    update_processed_items = state_ops.scatter_update(
                        processed_items,
                        input_indices,
                        array_ops.ones_like(input_indices, dtype=dtypes.bool),
                        name="update_processed_{}_indices".format(axis_name))
                with ops.control_dependencies([update_processed_items]):
                    is_sweep_done = math_ops.reduce_all(processed_items)
                    axis_train_op = control_flow_ops.group(
                        state_ops.assign(is_sweep_done_var, is_sweep_done),
                        state_ops.assign_add(
                            completed_sweeps_var,
                            math_ops.cast(is_sweep_done, dtypes.int32)),
                        name="{}_sweep_train_op".format(axis_name))
            return processed_items.initializer, axis_train_op

        reset_processed_rows_op, row_train_op = create_axis_ops(
            input_rows, params["num_rows"],
            lambda x: model.update_row_factors(sp_input=x,
                                               transpose_input=False), "rows")
        reset_processed_cols_op, col_train_op = create_axis_ops(
            input_cols, params["num_cols"],
            lambda x: model.update_col_factors(sp_input=x,
                                               transpose_input=True), "cols")
        switch_op = control_flow_ops.group(state_ops.assign(
            is_row_sweep_var, math_ops.logical_not(is_row_sweep_var)),
                                           reset_processed_rows_op,
                                           reset_processed_cols_op,
                                           name="sweep_switch_op")
        row_prep_ops = [
            model.row_update_prep_gramian_op, model.initialize_row_update_op
        ]
        col_prep_ops = [
            model.col_update_prep_gramian_op, model.initialize_col_update_op
        ]
        init_op = model.worker_init
        sweep_hook = _SweepHook(is_row_sweep_var, is_sweep_done_var, init_op,
                                row_prep_ops, col_prep_ops, row_train_op,
                                col_train_op, switch_op)
        global_step_hook = _IncrementGlobalStepHook()
        training_hooks = [sweep_hook, global_step_hook]
        if max_sweeps is not None:
            training_hooks.append(_StopAtSweepHook(max_sweeps))

        return model_fn.ModelFnOps(mode=model_fn.ModeKeys.TRAIN,
                                   predictions={},
                                   loss=loss_var,
                                   eval_metric_ops={},
                                   train_op=control_flow_ops.no_op(),
                                   training_hooks=training_hooks)

    # INFER mode
    elif mode == model_fn.ModeKeys.INFER:
        projection_weights = features.get(
            WALSMatrixFactorization.PROJECTION_WEIGHTS)

        def get_row_projection():
            return model.project_row_factors(
                sp_input=input_rows,
                projection_weights=projection_weights,
                transpose_input=False)

        def get_col_projection():
            return model.project_col_factors(
                sp_input=input_cols,
                projection_weights=projection_weights,
                transpose_input=True)

        predictions = {
            WALSMatrixFactorization.PROJECTION_RESULT:
            control_flow_ops.cond(
                features[WALSMatrixFactorization.PROJECT_ROW],
                get_row_projection, get_col_projection)
        }

        return model_fn.ModelFnOps(mode=model_fn.ModeKeys.INFER,
                                   predictions=predictions,
                                   loss=None,
                                   eval_metric_ops={},
                                   train_op=control_flow_ops.no_op(),
                                   training_hooks=[])

    # EVAL mode
    elif mode == model_fn.ModeKeys.EVAL:

        def get_row_loss():
            _, _, loss, reg, _ = model.update_row_factors(
                sp_input=input_rows, transpose_input=False)
            return loss + reg

        def get_col_loss():
            _, _, loss, reg, _ = model.update_col_factors(sp_input=input_cols,
                                                          transpose_input=True)
            return loss + reg

        loss = control_flow_ops.cond(
            features[WALSMatrixFactorization.PROJECT_ROW], get_row_loss,
            get_col_loss)
        return model_fn.ModelFnOps(mode=model_fn.ModeKeys.EVAL,
                                   predictions={},
                                   loss=loss,
                                   eval_metric_ops={},
                                   train_op=control_flow_ops.no_op(),
                                   training_hooks=[])

    else:
        raise ValueError("mode=%s is not recognized." % str(mode))
Exemple #49
0
 def training_ops(self):
     """Returns the training operation."""
     return control_flow_ops.group(*self._train_ops)
 def decr_loss_scale():
   update_op = state_ops.assign(
       self._loss_scale,
       gen_math_ops.maximum(1., self._loss_scale * self._decr_ratio))
   # When loss_scale is updated, both good and bad steps are reset.
   return control_flow_ops.group(update_op, self._reset_stats())
Exemple #51
0
    def __init__(self,
                 dataset,
                 devices,
                 max_buffer_size=1,
                 prefetch_buffer_size=1,
                 source_device="/cpu:0"):
        """Constructs a MultiDeviceIterator.

    Args:
      dataset: The input dataset to be iterated over.
      devices: The list of devices to fetch data to.
      max_buffer_size: Maximum size of the host side per device buffer to keep.
      prefetch_buffer_size: if > 1, then we setup a buffer on each device
        to prefetch into.
      source_device: The host device to place the `dataset` on.

      In order to prevent deadlocks, if the prefetch_buffer_size is greater
      than the max_buffer_size, we set the max_buffer_size to
      prefetch_buffer_size.

    Raises:
      RuntimeError: If run in Eager mode.
    """
        self._dataset = dataset._apply_options()  # pylint: disable=protected-access
        self._devices = devices
        self._source_device = source_device
        self._source_device_tensor = ops.convert_to_tensor(source_device)
        self._max_buffer_size = max_buffer_size
        self._prefetch_buffer_size = prefetch_buffer_size

        if self._prefetch_buffer_size > self._max_buffer_size:
            self._max_buffer_size = self._prefetch_buffer_size

        # Create the MultiDeviceIterator.
        with ops.device(self._source_device):
            # TODO(b/121378567): Get rid of this shared_name hack.
            shared_name = ""
            if context.executing_eagerly():
                shared_name = context.shared_name()
            self._multi_device_iterator_resource = (
                gen_dataset_ops.multi_device_iterator(
                    devices=self._devices,
                    shared_name=shared_name,
                    container="",
                    **dataset_ops.flat_structure(self._dataset)))
            if context.executing_eagerly():
                # Delete the resource when this object is deleted
                self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
                    handle=self._multi_device_iterator_resource,
                    handle_device=self._source_device)

            # The incarnation ID is used to ensure consistency between the per-device
            # iterators and the multi-device iterator.
            self._incarnation_id = gen_dataset_ops.multi_device_iterator_init(
                self._dataset._variant_tensor,  # pylint: disable=protected-access
                self._multi_device_iterator_resource,
                max_buffer_size=self._max_buffer_size)

        self._prototype_device_datasets = []
        for i, device in enumerate(self._devices):
            with ops.device(device):
                ds = _PerDeviceGenerator(i,
                                         self._multi_device_iterator_resource,
                                         self._incarnation_id,
                                         self._source_device_tensor,
                                         self._dataset._element_structure)  # pylint: disable=protected-access
                self._prototype_device_datasets.append(ds)

        # TODO(rohanj): Explore the possibility of the MultiDeviceIterator to
        # initialize the device side of the pipeline. This would allow the
        # MultiDeviceIterator to choose, for example, to move some transformations
        # into the device side from its input. It might be useful in rewriting.
        # Create the per device iterators.
        self._device_iterators = []
        for i, device in enumerate(self._devices):
            with ops.device(device):
                ds = self._create_device_dataset(i)
                if context.executing_eagerly():
                    self._device_iterators.append(
                        dataset_ops.make_one_shot_iterator(ds))
                else:
                    self._device_iterators.append(
                        dataset_ops.make_initializable_iterator(ds))

        if not context.executing_eagerly():
            device_iterator_initializers = [
                iterator.initializer for iterator in self._device_iterators
            ]
            self._initializer = control_flow_ops.group(
                *device_iterator_initializers)
Exemple #52
0
    def minimize(self, global_step=None, name=None):
        """Add operations to train a linear model by minimizing the loss function.

    Args:
      global_step: Optional `Variable` to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.

    Returns:
      An Operation that updates the variables passed in the constructor.
    """
        # Technically, the op depends on a lot more than the variables,
        # but we'll keep the list short.
        with name_scope(name, 'sdca/minimize'):
            sparse_example_indices = []
            sparse_feature_indices = []
            sparse_features_values = []
            for sf in self._examples['sparse_features']:
                sparse_example_indices.append(sf.example_indices)
                sparse_feature_indices.append(sf.feature_indices)
                # If feature values are missing, sdca assumes a value of 1.0f.
                if sf.feature_values is not None:
                    sparse_features_values.append(sf.feature_values)

            # pylint: disable=protected-access
            example_ids_hashed = gen_sdca_ops.sdca_fprint(
                internal_convert_to_tensor(self._examples['example_ids']))
            # pylint: enable=protected-access
            example_state_data = self._hashtable.lookup(example_ids_hashed)
            # Solver returns example_state_update, new delta sparse_feature_weights
            # and delta dense_feature_weights.

            sparse_weights = []
            sparse_indices = []
            # If we have partitioned variables, keep a few dictionaries of Tensors
            # around that we need for the assign_add after the op call to
            # gen_sdca_ops.sdca_optimizer().  These are keyed because we may have a
            # mix of partitioned and un-partitioned variables.
            num_partitions_by_var = {}
            p_assignments_by_var = {}
            gather_ids_by_var = {}
            for v_num, (w, i) in enumerate(
                    zip(self._slots['unshrinked_sparse_features_weights'],
                        sparse_feature_indices)):
                # Append the sparse_indices (in full-variable space).
                sparse_idx = math_ops.cast(
                    array_ops.unique(math_ops.cast(i, dtypes.int32))[0],
                    dtypes.int64)
                sparse_indices.append(sparse_idx)
                if isinstance(w, list) or isinstance(
                        w, var_ops.PartitionedVariable):
                    num_partitions = len(w)
                    flat_ids = array_ops.reshape(sparse_idx, [-1])
                    # We use div partitioning, which is easiest to support downstream.
                    # Compute num_total_ids as the sum of dim-0 of w, then assign
                    # to partitions based on a constant number of ids per partition.
                    # Optimize if we already know the full shape statically.
                    dim_0_size = self._get_first_dimension_size_statically(
                        w, num_partitions)

                    if dim_0_size.value:
                        num_total_ids = constant_op.constant(
                            dim_0_size.value, flat_ids.dtype)
                    else:
                        dim_0_sizes = []
                        for p in range(num_partitions):
                            if w[p].get_shape()[0].value is not None:
                                dim_0_sizes.append(w[p].get_shape()[0].value)
                            else:
                                with ops.colocate_with(w[p]):
                                    dim_0_sizes.append(
                                        array_ops.shape(w[p])[0])
                        num_total_ids = math_ops.reduce_sum(
                            math_ops.cast(array_ops.stack(dim_0_sizes),
                                          flat_ids.dtype))
                    ids_per_partition = num_total_ids // num_partitions
                    extras = num_total_ids % num_partitions

                    p_assignments = math_ops.maximum(
                        flat_ids // (ids_per_partition + 1),
                        (flat_ids - extras) // ids_per_partition)

                    # Emulate a conditional using a boolean indicator tensor
                    new_ids = array_ops.where(
                        p_assignments < extras,
                        flat_ids % (ids_per_partition + 1),
                        (flat_ids - extras) % ids_per_partition)

                    # Cast partition assignments to int32 for use in dynamic_partition.
                    # There really should not be more than 2^32 partitions.
                    p_assignments = math_ops.cast(p_assignments, dtypes.int32)
                    # Partition list of ids based on assignments into num_partitions
                    # separate lists.
                    gather_ids = data_flow_ops.dynamic_partition(
                        new_ids, p_assignments, num_partitions)
                    # Add these into the dictionaries for use in the later update.
                    num_partitions_by_var[v_num] = num_partitions
                    p_assignments_by_var[v_num] = p_assignments
                    gather_ids_by_var[v_num] = gather_ids

                    # Gather the weights from each partition.
                    partition_gathered_weights = []
                    for p in range(num_partitions):
                        with ops.colocate_with(w[p]):
                            partition_gathered_weights.append(
                                array_ops.gather(w[p], gather_ids[p]))

                    # Stitch the weights back together in the same order they were before
                    # we dynamic_partitioned them.
                    condition_indices = data_flow_ops.dynamic_partition(
                        math_ops.range(array_ops.shape(new_ids)[0]),
                        p_assignments, num_partitions)
                    batch_gathered_weights = data_flow_ops.dynamic_stitch(
                        condition_indices, partition_gathered_weights)
                else:
                    w_as_tensor = internal_convert_to_tensor(w)
                    with ops.device(w_as_tensor.device):
                        batch_gathered_weights = array_ops.gather(
                            w_as_tensor, sparse_idx)
                sparse_weights.append(batch_gathered_weights)

            # pylint: disable=protected-access
            if compat.forward_compatible(year=2018, month=10, day=30):
                esu, sfw, dfw = gen_sdca_ops.sdca_optimizer_v2(
                    sparse_example_indices,
                    sparse_feature_indices,
                    sparse_features_values,
                    self._convert_n_to_tensor(
                        self._examples['dense_features']),
                    internal_convert_to_tensor(
                        self._examples['example_weights']),
                    internal_convert_to_tensor(
                        self._examples['example_labels']),
                    sparse_indices,
                    sparse_weights,
                    self._convert_n_to_tensor(
                        self._slots['unshrinked_dense_features_weights']),
                    example_state_data,
                    loss_type=self._options['loss_type'],
                    l1=self._options['symmetric_l1_regularization'],
                    l2=self._symmetric_l2_regularization(),
                    num_loss_partitions=self._num_loss_partitions(),
                    num_inner_iterations=1,
                    adaptive=self._adaptive())
            else:
                esu, sfw, dfw = gen_sdca_ops.sdca_optimizer(
                    sparse_example_indices,
                    sparse_feature_indices,
                    sparse_features_values,
                    self._convert_n_to_tensor(
                        self._examples['dense_features']),
                    internal_convert_to_tensor(
                        self._examples['example_weights']),
                    internal_convert_to_tensor(
                        self._examples['example_labels']),
                    sparse_indices,
                    sparse_weights,
                    self._convert_n_to_tensor(
                        self._slots['unshrinked_dense_features_weights']),
                    example_state_data,
                    loss_type=self._options['loss_type'],
                    l1=self._options['symmetric_l1_regularization'],
                    l2=self._symmetric_l2_regularization(),
                    num_loss_partitions=self._num_loss_partitions(),
                    num_inner_iterations=1,
                    adaptative=self._adaptive())
            # pylint: enable=protected-access

            with ops.control_dependencies([esu]):
                update_ops = [self._hashtable.insert(example_ids_hashed, esu)]
                # Update the weights before the proximal step.
                for v_num, (w, i, u) in enumerate(
                        zip(self._slots['unshrinked_sparse_features_weights'],
                            sparse_indices, sfw)):
                    if (isinstance(w, var_ops.PartitionedVariable)
                            or isinstance(w, list)):
                        update_ops += self._get_partitioned_update_ops(
                            v_num, num_partitions_by_var, p_assignments_by_var,
                            gather_ids_by_var, w, u, p_assignments,
                            num_partitions)
                    else:
                        update_ops.append(state_ops.scatter_add(w, i, u))
                for w, u in zip(
                        self._slots['unshrinked_dense_features_weights'], dfw):
                    if (isinstance(w, var_ops.PartitionedVariable)
                            or isinstance(w, list)):
                        split_updates = array_ops.split(
                            u,
                            num_or_size_splits=[
                                v.shape.as_list()[0] for v in w
                            ])
                        for v, split_update in zip(w, split_updates):
                            update_ops.append(
                                state_ops.assign_add(v, split_update))
                    else:
                        update_ops.append(state_ops.assign_add(w, u))
            if not global_step:
                return control_flow_ops.group(*update_ops)
            with ops.control_dependencies(update_ops):
                return state_ops.assign_add(global_step, 1, name=name).op
Exemple #53
0
    def apply(self, var_list=None):
        """Maintains moving averages of variables.

    `var_list` must be a list of `Variable` or `Tensor` objects.  This method
    creates shadow variables for all elements of `var_list`.  Shadow variables
    for `Variable` objects are initialized to the variable's initial value.
    They will be added to the `GraphKeys.MOVING_AVERAGE_VARIABLES` collection.
    For `Tensor` objects, the shadow variables are initialized to 0 and zero
    debiased (see docstring in `assign_moving_average` for more details).

    shadow variables are created with `trainable=False` and added to the
    `GraphKeys.ALL_VARIABLES` collection.  They will be returned by calls to
    `tf.global_variables()`.

    Returns an op that updates all shadow variables as described above.

    Note that `apply()` can be called multiple times with different lists of
    variables.

    Args:
      var_list: A list of Variable or Tensor objects. The variables
        and Tensors must be of types float16, float32, or float64.

    Returns:
      An Operation that updates the moving averages.

    Raises:
      TypeError: If the arguments are not all float16, float32, or float64.
      ValueError: If the moving average of one of the variables is already
        being computed.
    """
        # TODO(touts): op_scope
        if var_list is None:
            var_list = variables.trainable_variables()
        zero_debias_true = set()  # set of vars to set `zero_debias=True`
        for var in var_list:
            if var.dtype.base_dtype not in [
                    dtypes.float16, dtypes.float32, dtypes.float64
            ]:
                raise TypeError(
                    "The variables must be half, float, or double: %s" %
                    var.name)
            if var in self._averages:
                raise ValueError("Moving average already computed for: %s" %
                                 var.name)

            # For variables: to lower communication bandwidth across devices we keep
            # the moving averages on the same device as the variables. For other
            # tensors, we rely on the existing device allocation mechanism.
            with ops.control_dependencies(None):
                if isinstance(var, variables.Variable):
                    avg = slot_creator.create_slot(var,
                                                   var.initialized_value(),
                                                   self._name,
                                                   colocate_with_primary=True)
                    # NOTE(mrry): We only add `tf.Variable` objects to the
                    # `MOVING_AVERAGE_VARIABLES` collection.
                    ops.add_to_collection(
                        ops.GraphKeys.MOVING_AVERAGE_VARIABLES, var)
                else:
                    avg = slot_creator.create_zeros_slot(
                        var,
                        self._name,
                        colocate_with_primary=(var.op.type
                                               in ["Variable", "VariableV2"]))
                    if self._zero_debias:
                        zero_debias_true.add(avg)
            self._averages[var] = avg

        with ops.name_scope(self._name) as scope:
            decay = ops.convert_to_tensor(self._decay, name="decay")
            if self._num_updates is not None:
                num_updates = math_ops.cast(self._num_updates,
                                            dtypes.float32,
                                            name="num_updates")
                decay = math_ops.minimum(decay, (1.0 + num_updates) /
                                         (10.0 + num_updates))
            updates = []
            for var in var_list:
                zero_debias = self._averages[var] in zero_debias_true
                updates.append(
                    assign_moving_average(self._averages[var],
                                          var,
                                          decay,
                                          zero_debias=zero_debias))
            return control_flow_ops.group(*updates, name=scope)
  def _mini_batch_training_op(self, inputs, cluster_idx_list, cluster_centers,
                              total_counts):
    """Creates an op for training for mini batch case.

    Args:
      inputs: list of input Tensors.
      cluster_idx_list: A vector (or list of vectors). Each element in the
        vector corresponds to an input row in 'inp' and specifies the cluster id
        corresponding to the input.
      cluster_centers: Tensor Ref of cluster centers.
      total_counts: Tensor Ref of cluster counts.

    Returns:
      An op for doing an update of mini-batch k-means.
    """
    update_ops = []
    for inp, cluster_idx in zip(inputs, cluster_idx_list):
      with ops.colocate_with(inp, ignore_existing=True):
        assert total_counts is not None
        cluster_idx = array_ops.reshape(cluster_idx, [-1])
        # Dedupe the unique ids of cluster_centers being updated so that updates
        # can be locally aggregated.
        unique_ids, unique_idx = array_ops.unique(cluster_idx)
        num_unique_cluster_idx = array_ops.size(unique_ids)
        # Fetch the old values of counts and cluster_centers.
        with ops.colocate_with(total_counts, ignore_existing=True):
          old_counts = array_ops.gather(total_counts, unique_ids)
        # TODO(agarwal): This colocation seems to run into problems. Fix it.
        with ops.colocate_with(cluster_centers, ignore_existing=True):
          old_cluster_centers = array_ops.gather(cluster_centers, unique_ids)
        # Locally aggregate the increment to counts.
        count_updates = math_ops.unsorted_segment_sum(
            array_ops.ones_like(unique_idx, dtype=total_counts.dtype),
            unique_idx, num_unique_cluster_idx)
        # Locally compute the sum of inputs mapped to each id.
        # For a cluster with old cluster value x, old count n, and with data
        # d_1,...d_k newly assigned to it, we recompute the new value as
        # \\(x += (sum_i(d_i) - k * x) / (n + k)\\).
        # Compute \\(sum_i(d_i)\\), see comment above.
        cluster_center_updates = math_ops.unsorted_segment_sum(
            inp, unique_idx, num_unique_cluster_idx)
        # Shape to enable broadcasting count_updates and learning_rate to inp.
        # It extends the shape with 1's to match the rank of inp.
        broadcast_shape = array_ops.concat([
            array_ops.reshape(num_unique_cluster_idx, [1]),
            array_ops.ones(
                array_ops.reshape(array_ops.rank(inp) - 1, [1]),
                dtype=dtypes.int32)
        ], 0)
        # Subtract k * x, see comment above.
        cluster_center_updates -= math_ops.cast(
            array_ops.reshape(count_updates, broadcast_shape),
            inp.dtype) * old_cluster_centers
        learning_rate = math_ops.reciprocal(
            math_ops.cast(old_counts + count_updates, inp.dtype))
        learning_rate = array_ops.reshape(learning_rate, broadcast_shape)
        # scale by 1 / (n + k), see comment above.
        cluster_center_updates *= learning_rate
        # Apply the updates.
      update_counts = state_ops.scatter_add(total_counts, unique_ids,
                                            count_updates)
      update_cluster_centers = state_ops.scatter_add(
          cluster_centers, unique_ids, cluster_center_updates)
      update_ops.extend([update_counts, update_cluster_centers])
    return control_flow_ops.group(*update_ops)
Exemple #55
0
    def minimize(self, global_step=None, name=None):
        """Add operations to train a linear model by minimizing the loss function.

    Args:
      global_step: Optional `Variable` to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.

    Returns:
      An Operation that updates the variables passed in the constructor.
    """
        # Technically, the op depends on a lot more than the variables,
        # but we'll keep the list short.
        with name_scope(name, 'sdca/minimize'):
            sparse_example_indices = []
            sparse_feature_indices = []
            sparse_features_values = []
            for sf in self._examples['sparse_features']:
                sparse_example_indices.append(sf.example_indices)
                sparse_feature_indices.append(sf.feature_indices)
                # If feature values are missing, sdca assumes a value of 1.0f.
                if sf.feature_values is not None:
                    sparse_features_values.append(sf.feature_values)

            # pylint: disable=protected-access
            example_ids_hashed = gen_sdca_ops.sdca_fprint(
                internal_convert_to_tensor(self._examples['example_ids']))
            # pylint: enable=protected-access
            example_state_data = self._hashtable.lookup(example_ids_hashed)
            # Solver returns example_state_update, new delta sparse_feature_weights
            # and delta dense_feature_weights.

            weights_tensor = self._convert_n_to_tensor(
                self._slots['unshrinked_sparse_features_weights'])
            sparse_weights = []
            sparse_indices = []
            for w, i in zip(weights_tensor, sparse_feature_indices):
                # Find the feature ids to lookup in the variables.
                with ops.device(w.device):
                    sparse_indices.append(
                        math_ops.cast(
                            array_ops.unique(math_ops.cast(i,
                                                           dtypes.int32))[0],
                            dtypes.int64))
                    sparse_weights.append(
                        array_ops.gather(w, sparse_indices[-1]))

            # pylint: disable=protected-access
            esu, sfw, dfw = gen_sdca_ops.sdca_optimizer(
                sparse_example_indices,
                sparse_feature_indices,
                sparse_features_values,
                self._convert_n_to_tensor(self._examples['dense_features']),
                internal_convert_to_tensor(self._examples['example_weights']),
                internal_convert_to_tensor(self._examples['example_labels']),
                sparse_indices,
                sparse_weights,
                self._convert_n_to_tensor(
                    self._slots['unshrinked_dense_features_weights']),
                example_state_data,
                loss_type=self._options['loss_type'],
                l1=self._options['symmetric_l1_regularization'],
                l2=self._symmetric_l2_regularization(),
                num_loss_partitions=self._num_loss_partitions(),
                num_inner_iterations=1)
            # pylint: enable=protected-access

            with ops.control_dependencies([esu]):
                update_ops = [self._hashtable.insert(example_ids_hashed, esu)]
                # Update the weights before the proximal step.
                for w, i, u in zip(
                        self._slots['unshrinked_sparse_features_weights'],
                        sparse_indices, sfw):
                    update_ops.append(state_ops.scatter_add(w, i, u))
                for w, u in zip(
                        self._slots['unshrinked_dense_features_weights'], dfw):
                    update_ops.append(w.assign_add(u))

            if not global_step:
                return control_flow_ops.group(*update_ops)
            with ops.control_dependencies(update_ops):
                return state_ops.assign_add(global_step, 1, name=name).op
Exemple #56
0
    def _experimental_run_steps_on_iterator(self,
                                            fn,
                                            multi_worker_iterator,
                                            iterations,
                                            initial_loop_values=None):
        # Wrap `fn` for repeat.
        if initial_loop_values is None:
            initial_loop_values = {}
        initial_loop_values = nest.flatten(initial_loop_values)
        ctx = input_lib.MultiStepContext()

        def run_fn(inputs):
            """Single step on the TPU device."""
            fn_result = fn(ctx, inputs)
            flat_last_step_outputs = nest.flatten(ctx.last_step_outputs)
            if flat_last_step_outputs:
                with ops.control_dependencies([fn_result]):
                    return [
                        array_ops.identity(f) for f in flat_last_step_outputs
                    ]
            else:
                return fn_result

        # We capture the control_flow_context at this point, before we run `fn`
        # inside a while_loop and TPU replicate context. This is useful in cases
        # where we might need to exit these contexts and get back to the outer
        # context to do some things, for e.g. create an op which should be
        # evaluated only once at the end of the loop on the host. One such usage
        # is in creating metrics' value op.
        self._outer_control_flow_context = (
            ops.get_default_graph()._get_control_flow_context())  # pylint: disable=protected-access

        def rewrite_fn(*args):
            """The rewritten step fn running on TPU."""
            del args

            per_replica_inputs = multi_worker_iterator.get_next()
            replicate_inputs = []
            for replica_id in range(self._num_replicas_in_sync):
                select_replica = lambda x: values.select_replica(replica_id, x)  # pylint: disable=cell-var-from-loop
                replicate_inputs.append(
                    (nest.map_structure(select_replica, per_replica_inputs), ))

            replicate_outputs = tpu.replicate(
                run_fn,
                replicate_inputs,
                device_assignment=self._device_assignment)

            # If run_fn has tensor outputs, tpu.replicate returns a list of list. We
            # will flatten it in this case. If run_fn has no tensor outputs,
            # tpu.replicate returns a list of no_ops, we will keep the output as it
            # is.
            if isinstance(replicate_outputs[0], list):
                replicate_outputs = nest.flatten(replicate_outputs)

            return replicate_outputs

        # TODO(sourabhbajaj): The input to while loop should be based on the
        # output type of the step_fn
        assert isinstance(initial_loop_values, list)
        initial_loop_values = initial_loop_values * self._num_replicas_in_sync

        # Put the while loop op on TPU host 0.
        with ops.device(self._host_device):
            if self.steps_per_run == 1:
                replicate_outputs = rewrite_fn()
            else:
                replicate_outputs = training_loop.repeat(
                    iterations, rewrite_fn, initial_loop_values)

        del self._outer_control_flow_context
        ctx.run_op = control_flow_ops.group(replicate_outputs)

        if isinstance(replicate_outputs, list):
            # Filter out any ops from the outputs, typically this would be the case
            # when there were no tensor outputs.
            last_step_tensor_outputs = [
                x for x in replicate_outputs
                if not isinstance(x, ops.Operation)
            ]

            # Outputs are currently of the structure (flattened)
            # [output0_device0, output1_device0, output2_device0,
            #  output0_device1, output1_device1, output2_device1,
            #  ...]
            # Convert this to the following structure instead: (grouped by output)
            # [[output0_device0, output0_device1],
            #  [output1_device0, output1_device1],
            #  [output2_device0, output2_device1]]
            output_num = len(
                last_step_tensor_outputs) // self._num_replicas_in_sync
            last_step_tensor_outputs = [
                last_step_tensor_outputs[i::output_num]
                for i in range(output_num)
            ]
        else:
            # no tensors returned.
            last_step_tensor_outputs = []

        _set_last_step_outputs(ctx, last_step_tensor_outputs)
        return ctx
Exemple #57
0
 def init_ops(self):
     """Returns the initialization operation."""
     return control_flow_ops.group(*self._init_ops)
Exemple #58
0
  def training_graph(self,
                     input_data,
                     input_labels,
                     random_seed,
                     data_spec,
                     input_weights=None):

    """Constructs a TF graph for training a random tree.

    Args:
      input_data: A tensor or SparseTensor or placeholder for input data.
      input_labels: A tensor or placeholder for labels associated with
        input_data.
      random_seed: The random number generator seed to use for this tree.  0
        means use the current time as the seed.
      data_spec: A list of tf.dtype values specifying the original types of
        each column.
      input_weights: A float tensor or placeholder holding per-input weights,
        or None if all inputs are to be weighted equally.

    Returns:
      The last op in the random tree training graph.
    """
    epoch = math_ops.to_int32(get_epoch_variable())

    if input_weights is None:
      input_weights = []

    sparse_indices = []
    sparse_values = []
    sparse_shape = []
    if isinstance(input_data, sparse_tensor.SparseTensor):
      sparse_indices = input_data.indices
      sparse_values = input_data.values
      sparse_shape = input_data.dense_shape
      input_data = []

    # Count extremely random stats.
    (node_sums, node_squares, splits_indices, splits_sums, splits_squares,
     totals_indices, totals_sums, totals_squares,
     input_leaves) = (tensor_forest_ops.count_extremely_random_stats(
         input_data,
         sparse_indices,
         sparse_values,
         sparse_shape,
         data_spec,
         input_labels,
         input_weights,
         self.variables.tree,
         self.variables.tree_thresholds,
         self.variables.node_to_accumulator_map,
         self.variables.candidate_split_features,
         self.variables.candidate_split_thresholds,
         self.variables.start_epoch,
         epoch,
         num_classes=self.params.num_output_columns,
         regression=self.params.regression))
    node_update_ops = []
    node_update_ops.append(
        state_ops.assign_add(self.variables.node_sums, node_sums))

    splits_update_ops = []
    splits_update_ops.append(
        tensor_forest_ops.scatter_add_ndim(self.variables.candidate_split_sums,
                                           splits_indices, splits_sums))
    splits_update_ops.append(
        tensor_forest_ops.scatter_add_ndim(self.variables.accumulator_sums,
                                           totals_indices, totals_sums))

    if self.params.regression:
      node_update_ops.append(state_ops.assign_add(self.variables.node_squares,
                                                  node_squares))
      splits_update_ops.append(
          tensor_forest_ops.scatter_add_ndim(
              self.variables.candidate_split_squares, splits_indices,
              splits_squares))
      splits_update_ops.append(
          tensor_forest_ops.scatter_add_ndim(self.variables.accumulator_squares,
                                             totals_indices, totals_squares))

    # Sample inputs.
    update_indices, feature_updates, threshold_updates = (
        tensor_forest_ops.sample_inputs(
            input_data,
            sparse_indices,
            sparse_values,
            sparse_shape,
            input_weights,
            self.variables.node_to_accumulator_map,
            input_leaves,
            self.variables.candidate_split_features,
            self.variables.candidate_split_thresholds,
            split_initializations_per_input=(
                self.params.split_initializations_per_input),
            split_sampling_random_seed=random_seed))
    update_features_op = state_ops.scatter_update(
        self.variables.candidate_split_features, update_indices,
        feature_updates)
    update_thresholds_op = state_ops.scatter_update(
        self.variables.candidate_split_thresholds, update_indices,
        threshold_updates)

    # Calculate finished nodes.
    with ops.control_dependencies(splits_update_ops):
      # Passing input_leaves to finished nodes here means that nodes that
      # have become stale won't be deallocated until an input reaches them,
      # because we're trying to avoid considering every fertile node for
      # performance reasons.
      finished, stale = tensor_forest_ops.finished_nodes(
          input_leaves,
          self.variables.node_to_accumulator_map,
          self.variables.candidate_split_sums,
          self.variables.candidate_split_squares,
          self.variables.accumulator_sums,
          self.variables.accumulator_squares,
          self.variables.start_epoch,
          epoch,
          num_split_after_samples=self.params.split_after_samples,
          min_split_samples=self.params.min_split_samples,
          dominate_method=self.params.dominate_method,
          dominate_fraction=self.params.dominate_fraction)

    # Update leaf scores.
    # TODO(thomaswc): Store the leaf scores in a TopN and only update the
    # scores of the leaves that were touched by this batch of input.
    children = array_ops.squeeze(
        array_ops.slice(self.variables.tree, [0, 0], [-1, 1]), squeeze_dims=[1])
    is_leaf = math_ops.equal(constants.LEAF_NODE, children)
    leaves = math_ops.to_int32(
        array_ops.squeeze(
            array_ops.where(is_leaf), squeeze_dims=[1]))
    non_fertile_leaves = array_ops.boolean_mask(
        leaves, math_ops.less(array_ops.gather(
            self.variables.node_to_accumulator_map, leaves), 0))

    # TODO(gilberth): It should be possible to limit the number of non
    # fertile leaves we calculate scores for, especially since we can only take
    # at most array_ops.shape(finished)[0] of them.
    with ops.control_dependencies(node_update_ops):
      sums = array_ops.gather(self.variables.node_sums, non_fertile_leaves)
      if self.params.regression:
        squares = array_ops.gather(self.variables.node_squares,
                                   non_fertile_leaves)
        non_fertile_leaf_scores = self._variance(sums, squares)
      else:
        non_fertile_leaf_scores = self._weighted_gini(sums)

    # Calculate best splits.
    with ops.control_dependencies(splits_update_ops):
      split_indices = tensor_forest_ops.best_splits(
          finished,
          self.variables.node_to_accumulator_map,
          self.variables.candidate_split_sums,
          self.variables.candidate_split_squares,
          self.variables.accumulator_sums,
          self.variables.accumulator_squares,
          regression=self.params.regression)

    # Grow tree.
    with ops.control_dependencies([update_features_op, update_thresholds_op]):
      (tree_update_indices, tree_children_updates, tree_threshold_updates,
       new_eot) = (tensor_forest_ops.grow_tree(
           self.variables.end_of_tree, self.variables.node_to_accumulator_map,
           finished, split_indices, self.variables.candidate_split_features,
           self.variables.candidate_split_thresholds))
      tree_update_op = state_ops.scatter_update(
          self.variables.tree, tree_update_indices, tree_children_updates)
      thresholds_update_op = state_ops.scatter_update(
          self.variables.tree_thresholds, tree_update_indices,
          tree_threshold_updates)
      # TODO(thomaswc): Only update the epoch on the new leaves.
      new_epoch_updates = epoch * array_ops.ones_like(tree_threshold_updates,
                                                      dtype=dtypes.int32)
      epoch_update_op = state_ops.scatter_update(
          self.variables.start_epoch, tree_update_indices,
          new_epoch_updates)

    # Update fertile slots.
    with ops.control_dependencies([tree_update_op]):
      (n2a_map_updates, a2n_map_updates, accumulators_cleared,
       accumulators_allocated) = (tensor_forest_ops.update_fertile_slots(
           finished,
           non_fertile_leaves,
           non_fertile_leaf_scores,
           self.variables.end_of_tree,
           self.variables.accumulator_sums,
           self.variables.node_to_accumulator_map,
           stale,
           self.variables.node_sums,
           regression=self.params.regression))

    # Ensure end_of_tree doesn't get updated until UpdateFertileSlots has
    # used it to calculate new leaves.
    gated_new_eot, = control_flow_ops.tuple(
        [new_eot], control_inputs=[n2a_map_updates])
    eot_update_op = state_ops.assign(self.variables.end_of_tree, gated_new_eot)

    updates = []
    updates.append(eot_update_op)
    updates.append(tree_update_op)
    updates.append(thresholds_update_op)
    updates.append(epoch_update_op)

    updates.append(
        state_ops.scatter_update(self.variables.node_to_accumulator_map,
                                 n2a_map_updates[0], n2a_map_updates[1]))

    updates.append(
        state_ops.scatter_update(self.variables.accumulator_to_node_map,
                                 a2n_map_updates[0], a2n_map_updates[1]))

    cleared_and_allocated_accumulators = array_ops.concat_v2(
        [accumulators_cleared, accumulators_allocated], 0)

    # Calculate values to put into scatter update for candidate counts.
    # Candidate split counts are always reset back to 0 for both cleared
    # and allocated accumulators. This means some accumulators might be doubly
    # reset to 0 if the were released and not allocated, then later allocated.
    split_values = array_ops.tile(
        array_ops.expand_dims(array_ops.expand_dims(
            array_ops.zeros_like(cleared_and_allocated_accumulators,
                                 dtype=dtypes.float32), 1), 2),
        [1, self.params.num_splits_to_consider, self.params.num_output_columns])
    updates.append(state_ops.scatter_update(
        self.variables.candidate_split_sums,
        cleared_and_allocated_accumulators, split_values))
    if self.params.regression:
      updates.append(state_ops.scatter_update(
          self.variables.candidate_split_squares,
          cleared_and_allocated_accumulators, split_values))

    # Calculate values to put into scatter update for total counts.
    total_cleared = array_ops.tile(
        array_ops.expand_dims(
            math_ops.neg(array_ops.ones_like(accumulators_cleared,
                                             dtype=dtypes.float32)), 1),
        [1, self.params.num_output_columns])
    total_reset = array_ops.tile(
        array_ops.expand_dims(
            array_ops.zeros_like(accumulators_allocated,
                                 dtype=dtypes.float32), 1),
        [1, self.params.num_output_columns])
    accumulator_updates = array_ops.concat_v2([total_cleared, total_reset], 0)
    updates.append(state_ops.scatter_update(
        self.variables.accumulator_sums,
        cleared_and_allocated_accumulators, accumulator_updates))
    if self.params.regression:
      updates.append(state_ops.scatter_update(
          self.variables.accumulator_squares,
          cleared_and_allocated_accumulators, accumulator_updates))

    # Calculate values to put into scatter update for candidate splits.
    split_features_updates = array_ops.tile(
        array_ops.expand_dims(
            math_ops.neg(array_ops.ones_like(
                cleared_and_allocated_accumulators)), 1),
        [1, self.params.num_splits_to_consider])
    updates.append(state_ops.scatter_update(
        self.variables.candidate_split_features,
        cleared_and_allocated_accumulators, split_features_updates))

    updates += self.finish_iteration()

    return control_flow_ops.group(*updates)
 def body(i):
   new_u = state_ops.assign_add(u, v)
   new_i = math_ops.add(i, 1)
   op = control_flow_ops.group(new_u)
   new_i = control_flow_ops.with_dependencies([op], new_i)
   return [new_i]
Exemple #60
0
def evaluate_once(checkpoint_path,
                  logdir,
                  master='',
                  num_evals=1,
                  eval_op=None,
                  eval_op_feed_dict=None,
                  final_op=None,
                  final_op_feed_dict=None,
                  summary_op=_USE_DEFAULT,
                  summary_op_feed_dict=None,
                  variables_to_restore=None,
                  session_config=None):
    """Evaluates the model at the given checkpoint path.

  Args:
    checkpoint_path: The path to a checkpoint to use for evaluation.
    logdir: The directory where the TensorFlow summaries are written to.
    master: The BNS address of the TensorFlow master.
    num_evals: The number of times to run `eval_op`.
    eval_op: A operation run `num_evals` times.
    eval_op_feed_dict: The feed dictionary to use when executing the `eval_op`.
    final_op: An operation to execute after all of the `eval_op` executions. The
      value of `final_op` is returned.
    final_op_feed_dict: A feed dictionary to use when executing `final_op`.
    summary_op: The summary_op to evaluate after running TF-Slims metric ops. By
      default the summary_op is set to tf.merge_all_summaries().
    summary_op_feed_dict: An optional feed dictionary to use when running the
      `summary_op`.
    variables_to_restore: A list of TensorFlow variables to restore during
      evaluation. If the argument is left as `None` then
      slim.variables.GetVariablesToRestore() is used.
    session_config: An instance of `tf.ConfigProto` that will be used to
      configure the `Session`. If left as `None`, the default will be used.

  Returns:
    The value of `final_op` or `None` if `final_op` is `None`.
  """
    if summary_op == _USE_DEFAULT:
        summary_op = logging_ops.merge_all_summaries()

    global_step = variables.get_or_create_global_step()

    init_op = control_flow_ops.group(tf_variables.initialize_all_variables(),
                                     tf_variables.initialize_local_variables(),
                                     data_flow_ops.initialize_all_tables())

    saver = tf_saver.Saver(variables_to_restore
                           or variables.get_variables_to_restore())

    summary_writer = summary_io.SummaryWriter(logdir)

    sv = supervisor.Supervisor(graph=ops.get_default_graph(),
                               logdir=logdir,
                               init_op=init_op,
                               summary_op=None,
                               summary_writer=None,
                               global_step=None,
                               saver=None)

    logging.info('Starting evaluation at ' +
                 time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))
    with sv.managed_session(master,
                            start_standard_services=False,
                            config=session_config) as sess:
        saver.restore(sess, checkpoint_path)
        sv.start_queue_runners(sess)
        final_op_value = evaluation(sess,
                                    num_evals=num_evals,
                                    eval_op=eval_op,
                                    eval_op_feed_dict=eval_op_feed_dict,
                                    final_op=final_op,
                                    final_op_feed_dict=final_op_feed_dict,
                                    summary_op=summary_op,
                                    summary_op_feed_dict=summary_op_feed_dict,
                                    summary_writer=summary_writer,
                                    global_step=global_step)

    logging.info('Finished evaluation at ' +
                 time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))

    return final_op_value