Example #1
0
  def testAddWeight(self):
    layer = base_layers.Layer(name='my_layer')

    # Test basic variable creation.
    variable = layer.add_variable(
        'my_var', [2, 2], initializer=init_ops.zeros_initializer())
    self.assertEqual(variable.name, 'my_layer/my_var:0')
    self.assertListEqual(layer.variables, [variable])
    self.assertListEqual(layer.trainable_variables, [variable])
    self.assertListEqual(layer.non_trainable_variables, [])
    self.assertListEqual(layer.variables,
                         ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))

    # Test non-trainable variable creation.
    # layer.add_variable should work even outside `build` and `call`.
    variable_2 = layer.add_variable(
        'non_trainable_var', [2, 2],
        initializer=init_ops.zeros_initializer(),
        trainable=False)
    self.assertListEqual(layer.variables, [variable, variable_2])
    self.assertListEqual(layer.trainable_variables, [variable])
    self.assertListEqual(layer.non_trainable_variables, [variable_2])
    self.assertEqual(
        len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 1)

    if context.in_graph_mode():
      # regularizers only supported in GRAPH mode.
      regularizer = lambda x: math_ops.reduce_sum(x) * 1e-3
      variable = layer.add_variable(
          'reg_var', [2, 2],
          initializer=init_ops.zeros_initializer(),
          regularizer=regularizer)
      self.assertEqual(len(layer.losses), 1)
Example #2
0
def weighted_moving_average(value,
                            decay,
                            weight,
                            truediv=True,
                            collections=None,
                            name=None):
  """Compute the weighted moving average of `value`.

  Conceptually, the weighted moving average is:
    `moving_average(value * weight) / moving_average(weight)`,
  where a moving average updates by the rule
    `new_value = decay * old_value + (1 - decay) * update`
  Internally, this Op keeps moving average variables of both `value * weight`
  and `weight`.

  Args:
    value: A numeric `Tensor`.
    decay: A float `Tensor` or float value.  The moving average decay.
    weight:  `Tensor` that keeps the current value of a weight.
      Shape should be able to multiply `value`.
    truediv:  Boolean, if `True`, dividing by `moving_average(weight)` is
      floating point division.  If `False`, use division implied by dtypes.
    collections:  List of graph collections keys to add the internal variables
      `value * weight` and `weight` to.
      Defaults to `[GraphKeys.GLOBAL_VARIABLES]`.
    name: Optional name of the returned operation.
      Defaults to "WeightedMovingAvg".

  Returns:
    An Operation that updates and returns the weighted moving average.
  """
  # Unlike assign_moving_average, the weighted moving average doesn't modify
  # user-visible variables. It is the ratio of two internal variables, which are
  # moving averages of the updates.  Thus, the signature of this function is
  # quite different than assign_moving_average.
  if collections is None:
    collections = [ops.GraphKeys.GLOBAL_VARIABLES]
  with variable_scope.variable_scope(name, "WeightedMovingAvg",
                                     [value, weight, decay]) as scope:
    value_x_weight_var = variable_scope.get_variable(
        "value_x_weight",
        initializer=init_ops.zeros_initializer(value.get_shape(),
                                               dtype=value.dtype),
        trainable=False,
        collections=collections)
    weight_var = variable_scope.get_variable(
        "weight",
        initializer=init_ops.zeros_initializer(weight.get_shape(),
                                               dtype=weight.dtype),
        trainable=False,
        collections=collections)
    numerator = assign_moving_average(
        value_x_weight_var, value * weight, decay, zero_debias=False)
    denominator = assign_moving_average(
        weight_var, weight, decay, zero_debias=False)

    if truediv:
      return math_ops.truediv(numerator, denominator, name=scope.name)
    else:
      return math_ops.div(numerator, denominator, name=scope.name)
Example #3
0
 def __init__(self,
              axis=-1,
              momentum=0.99,
              epsilon=1e-3,
              center=True,
              scale=True,
              beta_initializer=init_ops.zeros_initializer(),
              gamma_initializer=init_ops.ones_initializer(),
              moving_mean_initializer=init_ops.zeros_initializer(),
              moving_variance_initializer=init_ops.ones_initializer(),
              beta_regularizer=None,
              gamma_regularizer=None,
              trainable=True,
              name=None,
              **kwargs):
   super(BatchNormalization, self).__init__(
       name=name, trainable=trainable, **kwargs)
   self.axis = axis
   self.momentum = momentum
   self.epsilon = epsilon
   self.center = center
   self.scale = scale
   self.beta_initializer = beta_initializer
   self.gamma_initializer = gamma_initializer
   self.moving_mean_initializer = moving_mean_initializer
   self.moving_variance_initializer = moving_variance_initializer
   self.beta_regularizer = beta_regularizer
   self.gamma_regularizer = gamma_regularizer
  def testKernelStateList(self):
    """Test that transition kernel works with list input to `state`."""
    num_chains = 2
    loc_one = variable_scope.get_variable(
        "loc_one", [num_chains],
        initializer=init_ops.zeros_initializer())
    loc_two = variable_scope.get_variable(
        "loc_two", [num_chains], initializer=init_ops.zeros_initializer())

    def target_log_prob_fn(loc_one, loc_two):
      loc = array_ops.stack([loc_one, loc_two])
      log_prob = mvn_tril_lib.MultivariateNormalTriL(
          loc=constant_op.constant([0., 0.]),
          scale_tril=constant_op.constant([[0.1, 0.1], [0.0, 0.1]])).log_prob(
              loc)
      return math_ops.reduce_sum(log_prob, 0)

    def proposal_fn(loc_one, loc_two):
      loc_one_proposal = mh.proposal_normal(scale=0.05)
      loc_two_proposal = mh.proposal_normal(scale=0.05)
      loc_one_sample, _ = loc_one_proposal(loc_one)
      loc_two_sample, _ = loc_two_proposal(loc_two)
      return [loc_one_sample, loc_two_sample], None

    new_state, _ = mh.kernel(
        target_log_prob_fn=target_log_prob_fn,
        proposal_fn=proposal_fn,
        current_state=[loc_one, loc_two],
        seed=12415)
    loc_one_update = loc_one.assign(new_state[0])
    loc_two_update = loc_two.assign(new_state[1])

    init = variables.initialize_all_variables()
    with self.test_session() as sess:
      sess.run(init)
      loc_one_samples = []
      loc_two_samples = []
      for _ in range(10000):
        loc_one_sample, loc_two_sample = sess.run(
            [loc_one_update, loc_two_update])
        loc_one_samples.append(loc_one_sample)
        loc_two_samples.append(loc_two_sample)

    loc_one_samples = np.array(loc_one_samples)
    loc_two_samples = np.array(loc_two_samples)
    loc_one_samples = loc_one_samples[1000:]  # drop samples for burn-in
    loc_two_samples = loc_two_samples[1000:]  # drop samples for burn-in

    self.assertAllClose(np.mean(loc_one_samples, 0),
                        np.array([0.] * num_chains),
                        rtol=1e-5, atol=1e-1)
    self.assertAllClose(np.mean(loc_two_samples, 0),
                        np.array([0.] * num_chains),
                        rtol=1e-5, atol=1e-1)
    self.assertAllClose(np.std(loc_one_samples, 0),
                        np.array([0.1] * num_chains),
                        rtol=1e-5, atol=1e-1)
    self.assertAllClose(np.std(loc_two_samples, 0),
                        np.array([0.1] * num_chains),
                        rtol=1e-5, atol=1e-1)
Example #5
0
def _auc_hist_accumulate(hist_true, hist_false, nbins, collections):
  """Accumulate histograms in new variables."""
  with variable_scope.variable_scope(
      None, 'hist_accumulate', [hist_true, hist_false]):
    # Holds running total histogram of scores for records labeled True.
    hist_true_acc = variable_scope.get_variable(
        'hist_true_acc',
        initializer=init_ops.zeros_initializer(
            [nbins],
            dtype=hist_true.dtype),
        collections=collections,
        trainable=False)
    # Holds running total histogram of scores for records labeled False.
    hist_false_acc = variable_scope.get_variable(
        'hist_false_acc',
        initializer=init_ops.zeros_initializer(
            [nbins],
            dtype=hist_false.dtype),
        collections=collections,
        trainable=False)

    update_op = control_flow_ops.group(
        hist_true_acc.assign_add(hist_true),
        hist_false_acc.assign_add(hist_false),
        name='update_op')

    return hist_true_acc, hist_false_acc, update_op
 def linear_module(x, output_size):
   w = variable_scope.get_variable(
       "w", shape=[x.get_shape()[1], output_size],
       initializer=init_ops.zeros_initializer())
   b = variable_scope.get_variable(
       "b", shape=[output_size],
       initializer=init_ops.zeros_initializer())
   return (math_ops.matmul(x, w) + b), w
 def _templated():
   v = variable_scope.get_variable(
       "v", shape=[1], initializer=init_ops.zeros_initializer(),
       use_resource=True)
   v2 = variable_scope.get_variable(
       "v2", shape=[1], initializer=init_ops.zeros_initializer(),
       use_resource=True)
   return v, v + 1., v2
Example #8
0
  def __init__(self,
               axis=-1,
               momentum=0.99,
               epsilon=1e-3,
               center=True,
               scale=True,
               beta_initializer=init_ops.zeros_initializer(),
               gamma_initializer=init_ops.ones_initializer(),
               moving_mean_initializer=init_ops.zeros_initializer(),
               moving_variance_initializer=init_ops.ones_initializer(),
               beta_regularizer=None,
               gamma_regularizer=None,
               beta_constraint=None,
               gamma_constraint=None,
               renorm=False,
               renorm_clipping=None,
               renorm_momentum=0.99,
               fused=None,
               trainable=True,
               virtual_batch_size=None,
               adjustment=None,
               name=None,
               **kwargs):
    super(BatchNormalization, self).__init__(
        name=name, trainable=trainable, **kwargs)
    if isinstance(axis, list):
      self.axis = axis[:]
    else:
      self.axis = axis
    self.momentum = momentum
    self.epsilon = epsilon
    self.center = center
    self.scale = scale
    self.beta_initializer = beta_initializer
    self.gamma_initializer = gamma_initializer
    self.moving_mean_initializer = moving_mean_initializer
    self.moving_variance_initializer = moving_variance_initializer
    self.beta_regularizer = beta_regularizer
    self.gamma_regularizer = gamma_regularizer
    self.beta_constraint = beta_constraint
    self.gamma_constraint = gamma_constraint
    self.renorm = renorm
    self.virtual_batch_size = virtual_batch_size
    self.adjustment = adjustment
    if fused is None:
      fused = True

    self.fused = fused
    self._bessels_correction_test_only = True

    if renorm:
      renorm_clipping = renorm_clipping or {}
      keys = ['rmax', 'rmin', 'dmax']
      if set(renorm_clipping) - set(keys):
        raise ValueError('renorm_clipping %s contains keys not in %s' %
                         (renorm_clipping, keys))
      self.renorm_clipping = renorm_clipping
      self.renorm_momentum = renorm_momentum
Example #9
0
  def testAddWeight(self):
    layer = base_layers.Layer(name='my_layer')

    # Test basic variable creation.
    variable = layer.add_variable(
        'my_var', [2, 2], initializer=init_ops.zeros_initializer())
    self.assertEqual(variable.name, 'my_layer/my_var:0')
    self.assertEqual(layer.variables, [variable])
    self.assertEqual(layer.trainable_variables, [variable])
    self.assertEqual(layer.non_trainable_variables, [])
    if not context.executing_eagerly():
      self.assertEqual(
          layer.variables,
          ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))

    # Test non-trainable variable creation.
    # layer.add_variable should work even outside `build` and `call`.
    variable_2 = layer.add_variable(
        'non_trainable_var', [2, 2],
        initializer=init_ops.zeros_initializer(),
        trainable=False)
    self.assertEqual(layer.variables, [variable, variable_2])
    self.assertEqual(layer.trainable_variables, [variable])
    self.assertEqual(layer.non_trainable_variables, [variable_2])

    if not context.executing_eagerly():
      self.assertEqual(
          len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 1)

    regularizer = lambda x: math_ops.reduce_sum(x) * 1e-3
    _ = layer.add_variable(
        'reg_var', [2, 2],
        initializer=init_ops.zeros_initializer(),
        regularizer=regularizer)
    self.assertEqual(len(layer.losses), 1)

    added_variable = [False]

    # Test that sync `ON_READ` variables are defaulted to be non-trainable.
    variable_3 = layer.add_variable(
        'sync_on_read_var', [2, 2],
        initializer=init_ops.zeros_initializer(),
        synchronization=variable_scope.VariableSynchronization.ON_READ,
        aggregation=variable_scope.VariableAggregation.SUM)
    self.assertEqual(layer.non_trainable_variables, [variable_2, variable_3])

    @def_function.function
    def function_adds_weight():
      if not added_variable[0]:
        layer.add_variable(
            'reg_var_from_function', [2, 2],
            initializer=init_ops.zeros_initializer(),
            regularizer=regularizer)
        added_variable[0] = True

    function_adds_weight()
    self.assertEqual(len(layer.losses), 2)
Example #10
0
  def testLSTMLayer(self):
    # Run with all-0 weights, no padding.
    o = self._RunLSTMLayer('zeros', init_ops.zeros_initializer(), 0., 0., 0.)
    self.assertAllClose(o, [[[0.]] * self._batch_size] * 3)
    o = self._RunLSTMLayer('zeros', init_ops.zeros_initializer(), 0., 1., 0.)
    self.assertAllClose(o, [[[.25]] * self._batch_size,
                            [[.125]] * self._batch_size,
                            [[.0625]] * self._batch_size])
    o = self._RunLSTMLayer('zeros', init_ops.zeros_initializer(), 1., 0., 0.)
    self.assertAllClose(o, [[[0.]] * self._batch_size] * 3)
    o = self._RunLSTMLayer('zeros', init_ops.zeros_initializer(), 1., 1., 0.)
    self.assertAllClose(o, [[[.25]] * self._batch_size,
                            [[.125]] * self._batch_size,
                            [[.0625]] * self._batch_size])

    # Run with all-1 weights, no padding.
    weight1 = 1.
    for m_init in [0., 1.]:
      for c_init in [0., 1.]:
        o = self._RunLSTMLayer('ones',
                               init_ops.ones_initializer(), m_init, c_init, 0.)
        m0 = self._NextM(self._inputs, weight1, m_init, c_init)
        c0 = self._NextC(self._inputs, weight1, m_init, c_init)
        self.assertAllClose(o[0], m0)
        m1 = self._NextM(self._inputs, weight1, m0, c0)
        c1 = self._NextC(self._inputs, weight1, m0, c0)
        self.assertAllClose(o[1], m1)
        m2 = self._NextM(self._inputs, weight1, m1, c1)
        self.assertAllClose(o[2], m2)

    # Run with random weights.
    for weight in np.random.rand(3):
      weight_tf = constant_op.constant(weight, dtypes.float32)
      random_weight = lambda shape, w=weight_tf: array_ops.fill(shape, w)

      # No padding.
      for m_init in [0., 1.]:
        for c_init in [0., 1.]:
          o = self._RunLSTMLayer('random', random_weight, m_init, c_init, 0.)
          m0 = self._NextM(self._inputs, weight, m_init, c_init)
          c0 = self._NextC(self._inputs, weight, m_init, c_init)
          self.assertAllClose(o[0], m0)
          m1 = self._NextM(self._inputs, weight, m0, c0)
          c1 = self._NextC(self._inputs, weight, m0, c0)
          self.assertAllClose(o[1], m1)
          m2 = self._NextM(self._inputs, weight, m1, c1)
          self.assertAllClose(o[2], m2)

      # Set padding.
      o = self._RunLSTMLayer('random', random_weight, 0., 0., 1.)
      self.assertAllClose(o, [[[0.]] * self._batch_size] * 3)
      o = self._RunLSTMLayer('random', random_weight, 0., 1., 1.)
      self.assertAllClose(o, [[[0.]] * self._batch_size] * 3)
      o = self._RunLSTMLayer('random', random_weight, 1., 0., 1.)
      self.assertAllClose(o, [[[1.]] * self._batch_size] * 3)
      o = self._RunLSTMLayer('random', random_weight, 1., 1., 1.)
      self.assertAllClose(o, [[[1.]] * self._batch_size] * 3)
  def __init__(self,
               axis=-1,
               momentum=0.99,
               epsilon=1e-3,
               center=True,
               scale=True,
               beta_initializer=init_ops.zeros_initializer(),
               gamma_initializer=init_ops.ones_initializer(),
               moving_mean_initializer=init_ops.zeros_initializer(),
               moving_variance_initializer=init_ops.ones_initializer(),
               beta_regularizer=None,
               gamma_regularizer=None,
               beta_constraint=None,
               gamma_constraint=None,
               renorm=False,
               renorm_clipping=None,
               renorm_momentum=0.99,
               fused=None,
               trainable=True,
               name=None,
               **kwargs):
    super(BatchNormalization, self).__init__(
        name=name, trainable=trainable, **kwargs)
    self.axis = axis
    self.momentum = momentum
    self.epsilon = epsilon
    self.center = center
    self.scale = scale
    self.beta_initializer = beta_initializer
    self.gamma_initializer = gamma_initializer
    self.moving_mean_initializer = moving_mean_initializer
    self.moving_variance_initializer = moving_variance_initializer
    self.beta_regularizer = beta_regularizer
    self.gamma_regularizer = gamma_regularizer
    self.beta_constraint = beta_constraint
    self.gamma_constraint = gamma_constraint
    self.renorm = renorm
    # This environment variable is only used during the testing period of fused
    # batch norm and will be removed after that.
    if fused is None:
      fused = _FUSED_DEFAULT

    self.fused = fused
    self._bessels_correction_test_only = True
    if renorm:
      renorm_clipping = renorm_clipping or {}
      keys = ['rmax', 'rmin', 'dmax']
      if set(renorm_clipping) - set(keys):
        raise ValueError('renorm_clipping %s contains keys not in %s' %
                         (renorm_clipping, keys))
      self.renorm_clipping = renorm_clipping
      self.renorm_momentum = renorm_momentum
Example #12
0
 def __init__(self,
              axis=-1,
              momentum=0.99,
              epsilon=1e-3,
              center=True,
              scale=True,
              beta_initializer=init_ops.zeros_initializer(),
              gamma_initializer=init_ops.ones_initializer(),
              moving_mean_initializer=init_ops.zeros_initializer(),
              moving_variance_initializer=init_ops.ones_initializer(),
              beta_regularizer=None,
              gamma_regularizer=None,
              renorm=False,
              renorm_clipping=None,
              renorm_momentum=0.99,
              fused=False,
              trainable=True,
              name=None,
              **kwargs):
   super(BatchNormalization, self).__init__(
       name=name, trainable=trainable, **kwargs)
   self.axis = axis
   self.momentum = momentum
   self.epsilon = epsilon
   self.center = center
   self.scale = scale
   self.beta_initializer = beta_initializer
   self.gamma_initializer = gamma_initializer
   self.moving_mean_initializer = moving_mean_initializer
   self.moving_variance_initializer = moving_variance_initializer
   self.beta_regularizer = beta_regularizer
   self.gamma_regularizer = gamma_regularizer
   self.renorm = renorm
   self.fused = fused
   if self.fused and renorm:
     raise ValueError(
         'Batch renorm is currently not supported with fused batch norm.')
   if self.fused and (beta_regularizer is not None or
                      gamma_regularizer is not None):
     raise ValueError('Regularizers are not currently '
                      'supported for fused batch norm.')
   if renorm:
     renorm_clipping = renorm_clipping or {}
     keys = ['rmax', 'rmin', 'dmax']
     if set(renorm_clipping) - set(keys):
       raise ValueError('renorm_clipping %s contains keys not in %s' %
                        (renorm_clipping, keys))
     self.renorm_clipping = renorm_clipping
     self.renorm_momentum = renorm_momentum
    def __init__(self, value, decay,
                 truediv=True,
                 collections=None,
                 reduction_indices=None,
                 name=None):
        self.value = value
        self.reduction_indices = reduction_indices or [0]

        eps = 1e-8
        if truediv:
            div = math_ops.truediv
        else:
            div = math_ops.div
        if collections is None:
            collections = [ops.GraphKeys.VARIABLES]

        value_shape = value.get_shape().as_list()
        shape = []
        for dim in range(len(value_shape)):
            if dim in self.reduction_indices:
                shape.append(1)
            else:
                shape.append(value_shape[dim])

        with variable_scope.variable_op_scope(
                [value, decay], name, "MomentTracker") as scope:

            mean_x_weight_var = variable_scope.get_variable("mean_x_weight", trainable=False, collections=collections,
                initializer=init_ops.zeros_initializer(shape, dtype=value.dtype))

            variance_x_weight_var = variable_scope.get_variable("variance_x_weight", trainable=False,
                collections=collections, initializer=init_ops.zeros_initializer(shape, dtype=value.dtype))

            weight_var = variable_scope.get_variable("weight", trainable=False, collections=collections,
                initializer=init_ops.zeros_initializer([1], dtype=tf.float32))

            self.tracked_mean = div(mean_x_weight_var, weight_var + eps)
            self.tracked_variance = div(variance_x_weight_var, weight_var + eps)

            self.batch_mean, self.batch_variance = tf.nn.moments(self.value, axes=self.reduction_indices,
                                                                 shift=self.tracked_mean, keep_dims=True)

            mean_numerator = assign_moving_average(mean_x_weight_var, self.batch_mean, decay)
            variance_numerator = assign_moving_average(variance_x_weight_var, self.batch_variance, decay)
            denominator = assign_moving_average(weight_var, 1.0, decay)

            self.update_mean = div(mean_numerator, denominator + eps, name=scope.name)
            self.update_variance = div(variance_numerator, denominator + eps, name=scope.name)
Example #14
0
  def create_variables_and_ops(self, table, variable_name, num_hosts,
                               table_config, table_variables,
                               load_parameters_ops, retrieve_parameters_ops):
    optimizer_name = 'Adam'
    m_initializer = init_ops.zeros_initializer()
    m_variables = _create_partitioned_variables(
        name='%s/%s/m' % (variable_name, optimizer_name),
        num_hosts=num_hosts,
        vocabulary_size=table_config.vocabulary_size,
        embedding_dimension=table_config.dimension,
        collections=[ops.GraphKeys.GLOBAL_VARIABLES],
        initializer=m_initializer)
    v_initializer = init_ops.zeros_initializer()
    v_variables = _create_partitioned_variables(
        name='%s/%s/v' % (variable_name, optimizer_name),
        num_hosts=num_hosts,
        vocabulary_size=table_config.vocabulary_size,
        embedding_dimension=table_config.dimension,
        collections=[ops.GraphKeys.GLOBAL_VARIABLES],
        initializer=v_initializer)

    self._table_to_m_variables_dict[table] = m_variables
    self._table_to_v_variables_dict[table] = v_variables

    for host_id, table_variable, m_variable, v_variable in (zip(
        range(num_hosts), table_variables,
        m_variables, v_variables)):
      with ops.colocate_with(table_variable):
        load_parameters_op = (
            tpu_ops.load_tpu_embedding_adam_parameters(
                parameters=table_variable,
                momenta=m_variable,
                velocities=v_variable,
                table_name=table,
                num_shards=num_hosts,
                shard_id=host_id))
        retrieved_table, retrieved_m, retrieved_v = (
            tpu_ops.retrieve_tpu_embedding_adam_parameters(
                table_name=table,
                num_shards=num_hosts,
                shard_id=host_id))
        retrieve_parameters_op = control_flow_ops.group(
            state_ops.assign(table_variable, retrieved_table),
            state_ops.assign(m_variable, retrieved_m),
            state_ops.assign(v_variable, retrieved_v))

      load_parameters_ops.append(load_parameters_op)
      retrieve_parameters_ops.append(retrieve_parameters_op)
Example #15
0
def _get_or_create_eval_step():
  """Gets or creates the eval step `Tensor`.

  Returns:
    A `Tensor` representing a counter for the evaluation step.

  Raises:
    ValueError: If multiple `Tensors` have been added to the
      `tf.GraphKeys.EVAL_STEP` collection.
  """
  graph = ops.get_default_graph()
  eval_steps = graph.get_collection(ops.GraphKeys.EVAL_STEP)
  if len(eval_steps) == 1:
    return eval_steps[0]
  elif len(eval_steps) > 1:
    raise ValueError('Multiple tensors added to tf.GraphKeys.EVAL_STEP')
  else:
    counter = variable_scope.get_variable(
        'eval_step',
        shape=[],
        dtype=dtypes.int64,
        initializer=init_ops.zeros_initializer(),
        trainable=False,
        collections=[ops.GraphKeys.LOCAL_VARIABLES, ops.GraphKeys.EVAL_STEP])
    return counter
  def test_multiple_random_accumulating_updates_results_in_right_dist(self):
    # Accumulate the updates in a new variable.  Resultant
    # histogram should be uniform.  Use only 3 bins because with many bins it
    # would be unlikely that all would be close to 1/n.  If someone ever wants
    # to test that, it would be better to check that the cdf was linear.
    value_range = [1.0, 4.14159]
    with self.test_session() as sess:
      values = array_ops.placeholder(dtypes.float32, shape=[4, 4, 4])
      hist = histogram_ops.histogram_fixed_width(
          values, value_range, nbins=3, dtype=dtypes.int64)

      hist_accum = variables.Variable(init_ops.zeros_initializer()(
          [3], dtype=dtypes.int64))
      hist_accum = hist_accum.assign_add(hist)

      variables.global_variables_initializer().run()

      for _ in range(100):
        # Map the rv: U[0, 1] --> U[value_range[0], value_range[1]].
        values_arr = (
            value_range[0] +
            (value_range[1] - value_range[0]) * self.rng.rand(4, 4, 4))

        hist_accum_arr = sess.run(hist_accum, feed_dict={values: values_arr})

    pmf = hist_accum_arr / float(hist_accum_arr.sum())
    np.testing.assert_allclose(1 / 3, pmf, atol=0.02)
Example #17
0
  def testInitFromNonInitializer(self):
    with self.test_session() as sess:
      # Test various dtypes with zeros initializer as following:
      types = [
          dtypes.int8, dtypes.uint8, dtypes.int16, dtypes.uint16, dtypes.int32,
          dtypes.int64, dtypes.bool
      ]

      # Use different varibale_name to distinguish various dtypes
      for (i, dtype) in enumerate(types):
        x = variable_scope.get_variable(
            name="x%d" % i,
            shape=(3, 4),
            dtype=dtype,
            partitioner=axis0_into2_partitioner)
        y = variable_scope.get_variable(
            name="y%d" % i,
            shape=(6, 4),
            dtype=dtype,
            partitioner=axis0_into2_partitioner,
            initializer=init_ops.zeros_initializer(dtype=dtype))

        variables_lib.global_variables_initializer().run()
        # x and y would become var list after partition
        val_x = sess.run(list(x))
        val_y = sess.run(list(y))

        self.assertAllEqual(val_x, val_y)
Example #18
0
  def _create_global_step(self, graph):
    """Creates a global step suitable for TPUs.

    Args:
      graph: The graph in which to create the global step.

    Returns:
      A global step `Tensor`.

    Raises:
      ValueError: if the global step tensor is already defined.
    """
    graph = graph or ops.get_default_graph()
    if training.get_global_step(graph) is not None:
      raise ValueError('"global_step" already exists.')
    # Create in proper graph and base name_scope.
    with graph.as_default() as g, g.name_scope(None):
      return variable_scope.get_variable(
          ops.GraphKeys.GLOBAL_STEP,
          shape=[],
          dtype=dtypes.int32,
          initializer=init_ops.zeros_initializer(),
          trainable=False,
          use_resource=True,
          collections=[ops.GraphKeys.GLOBAL_VARIABLES,
                       ops.GraphKeys.GLOBAL_STEP])
Example #19
0
 def model_fn(features, labels, mode):
   _ = labels
   step = training.get_global_step()
   w = variable_scope.get_variable(
       'w',
       shape=[],
       initializer=init_ops.zeros_initializer(),
       dtype=dtypes.int64)
   if estimator_lib.ModeKeys.TRAIN == mode:
     # to consume features, we have control dependency
     with ops.control_dependencies([features]):
       step_inc = state_ops.assign_add(training.get_global_step(), 1)
     with ops.control_dependencies([step_inc]):
       assign_w_to_step_plus_2 = w.assign(step + 2)
     return estimator_lib.EstimatorSpec(
         mode,
         loss=constant_op.constant(3.),
         train_op=assign_w_to_step_plus_2)
   if estimator_lib.ModeKeys.EVAL == mode:
     # to consume features, we have control dependency
     with ops.control_dependencies([features]):
       loss = constant_op.constant(5.)
     return estimator_lib.EstimatorSpec(
         mode,
         loss=loss,
         # w is constant in each step, so the mean.
         # w = 0 if step==0 else step+2
         eval_metric_ops={'mean_of_const': metrics_lib.mean(w)})
  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    gradients = []
    # Number of stale gradients.
    stale_counter = variable_scope.get_variable(
        "stale_counter", [],
        initializer=init_ops.zeros_initializer(),
        trainable=False)

    def _AcceptGradientOp():
      with ops.control_dependencies(
          [self._opt.apply_gradients(
              grads_and_vars, global_step=global_step, name=name)]):
        return gen_array_ops.identity(0.0)

    def _DropGradientOp():
      return gen_array_ops.identity(1.0)

    for grad_and_var in grads_and_vars:
      grad = grad_and_var[0]
      if isinstance(grad, ops.Tensor):
        gradients.append(grad)
      else:
        gradients.append(grad.op)

    with ops.control_dependencies(gradients), ops.colocate_with(global_step):
      staleness = gen_array_ops.reshape(
          global_step - self._local_step, shape=())
      conditional_update = stale_counter.assign_add(control_flow_ops.cond(
          gen_math_ops.less_equal(staleness, self._staleness),
          _AcceptGradientOp, _DropGradientOp))

    summary.scalar(
        "Gradient staleness percentage",
        stale_counter / (math_ops.cast(global_step + 1, dtypes.float32)))
    return conditional_update
Example #21
0
 def __init__(self,
              filters,
              kernel_size,
              strides=(1, 1),
              padding='valid',
              data_format='channels_last',
              dilation_rate=(1, 1),
              activation=None,
              use_bias=True,
              kernel_initializer=None,
              bias_initializer=init_ops.zeros_initializer(),
              kernel_regularizer=None,
              bias_regularizer=None,
              activity_regularizer=None,
              trainable=True,
              name=None,
              **kwargs):
   super(MaskedConv2D, self).__init__(
       rank=2,
       filters=filters,
       kernel_size=kernel_size,
       strides=strides,
       padding=padding,
       data_format=data_format,
       dilation_rate=dilation_rate,
       activation=activation,
       use_bias=use_bias,
       kernel_initializer=kernel_initializer,
       bias_initializer=bias_initializer,
       kernel_regularizer=kernel_regularizer,
       bias_regularizer=bias_regularizer,
       activity_regularizer=activity_regularizer,
       trainable=trainable,
       name=name,
       **kwargs)
  def testInitialValueComesFromCheckpoint(self):
    checkpoint_dir = self.get_temp_dir()
    with self.test_session() as session:
      v1, _, _, _ = _create_checkpoints(session, checkpoint_dir)

    # New graph and session.
    with ops.Graph().as_default() as g:
      with self.test_session(graph=g) as session:
        with variable_scope.variable_scope(
            "some_scope", initializer=init_ops.zeros_initializer()):
          my1 = variable_scope.get_variable("my1", [1, 10])

        # At this point, my1.initialized_value() will add ops that reference
        # the zeros initializer of my1.
        before = variables.Variable(my1.initialized_value(), name="before")

        checkpoint_utils.init_from_checkpoint(checkpoint_dir, {"var1": my1})

        # At this point, my1.initialized_value() will add ops that reference
        # the newly set initializer of my1.
        after = variables.Variable(my1.initialized_value(), name="after")

        session.run(variables.global_variables_initializer())
        self.assertAllEqual(session.run(my1), v1)
        self.assertAllEqual(session.run(my1.initialized_value()), v1)
        self.assertAllClose(session.run(before), [[0.0] * 10])
        self.assertAllClose(session.run(after), v1)
        with self.assertRaises(AssertionError):
          self.assertAllClose(session.run(before), session.run(after))
 def variable_scoped_function_no_return_value(trainable=True):
   # defun cannot compile functions that return non-Tensor objects
   _ = variable_scope.get_variable(
       "dummy",
       shape=[1],
       trainable=trainable,
       initializer=init_ops.zeros_initializer())
Example #24
0
 def testZerosInitializer(self):
   with self.test_session(use_gpu=True):
     shape = [2, 3]
     x = variable_scope.get_variable(
         "x", shape=shape, initializer=init_ops.zeros_initializer())
     x.initializer.run()
     self.assertAllEqual(x.eval(), np.zeros(shape))
Example #25
0
 def testVariableInput(self):
   with self.test_session():
     v = variable_scope.get_variable(
         'X', initializer=init_ops.zeros_initializer(), shape=(1, 1))
     x = core_layers.Dense(1)(v)
     variables.global_variables_initializer().run()
     self.assertAllEqual(x.eval(), [[0.0]])
  def testInitialValueComesFromCheckpoint(self):
    checkpoint_dir = self.get_temp_dir()
    with self.test_session() as session:
      v1, _, _, _ = _create_checkpoints(session, checkpoint_dir)

    # New graph and session.
    with ops.Graph().as_default() as g:
      with self.test_session(graph=g) as session:
        with variable_scope.variable_scope(
            "some_scope", initializer=init_ops.zeros_initializer()):
          my1 = variable_scope.get_variable("my1", [1, 10])

        before = my1.initialized_value()

        checkpoint_utils.init_from_checkpoint(checkpoint_dir, {"var1": my1})

        after = my1.initialized_value()

        self.assertAllEqual(session.run(before), [[0.0] * 10])
        self.assertAllEqual(session.run(after), v1)

        session.run(variables.global_variables_initializer())

        self.assertAllEqual(session.run(my1), v1)
        self.assertAllEqual(session.run(my1.initialized_value()), v1)
        self.assertAllClose(session.run(before), v1)
        self.assertAllClose(session.run(after), v1)
        with self.assertRaises(AssertionError):
          self.assertAllClose(v1, [[0.0] * 10])
Example #27
0
  def build(self, inputs_shape):
    if inputs_shape[1].value is None:
      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
                       % inputs_shape)

    input_depth = inputs_shape[1].value
    self._gate_kernel = self.add_variable(
        "gates/%s" % _WEIGHTS_VARIABLE_NAME,
        shape=[input_depth + self._num_units, 2 * self._num_units],
        initializer=self._kernel_initializer)
    self._gate_bias = self.add_variable(
        "gates/%s" % _BIAS_VARIABLE_NAME,
        shape=[2 * self._num_units],
        initializer=(
            self._bias_initializer
            if self._bias_initializer is not None
            else init_ops.constant_initializer(1.0, dtype=self.dtype)))
    self._candidate_kernel = self.add_variable(
        "candidate/%s" % _WEIGHTS_VARIABLE_NAME,
        shape=[input_depth + self._num_units, self._num_units],
        initializer=self._kernel_initializer)
    self._candidate_bias = self.add_variable(
        "candidate/%s" % _BIAS_VARIABLE_NAME,
        shape=[self._num_units],
        initializer=(
            self._bias_initializer
            if self._bias_initializer is not None
            else init_ops.zeros_initializer(dtype=self.dtype)))

    self.built = True
Example #28
0
 def __init__(self, units,
              activation=None,
              use_bias=True,
              kernel_initializer=None,
              bias_initializer=init_ops.zeros_initializer(),
              kernel_regularizer=None,
              bias_regularizer=None,
              activity_regularizer=None,
              kernel_constraint=None,
              bias_constraint=None,
              trainable=True,
              name=None,
              **kwargs):
   super(Dense, self).__init__(units=units,
                               activation=activation,
                               use_bias=use_bias,
                               kernel_initializer=kernel_initializer,
                               bias_initializer=bias_initializer,
                               kernel_regularizer=kernel_regularizer,
                               bias_regularizer=bias_regularizer,
                               activity_regularizer=activity_regularizer,
                               kernel_constraint=kernel_constraint,
                               bias_constraint=bias_constraint,
                               trainable=trainable,
                               name=name,
                               **kwargs)
Example #29
0
 def __init__(self, units,
              activation=None,
              use_bias=True,
              kernel_initializer=None,
              bias_initializer=init_ops.zeros_initializer(),
              kernel_regularizer=None,
              bias_regularizer=None,
              activity_regularizer=None,
              kernel_constraint=None,
              bias_constraint=None,
              trainable=True,
              name=None,
              **kwargs):
   super(Dense, self).__init__(trainable=trainable, name=name, **kwargs)
   self.units = units
   self.activation = activation
   self.use_bias = use_bias
   self.kernel_initializer = kernel_initializer
   self.bias_initializer = bias_initializer
   self.kernel_regularizer = kernel_regularizer
   self.bias_regularizer = bias_regularizer
   self.activity_regularizer = activity_regularizer
   self.kernel_constraint = kernel_constraint
   self.bias_constraint = bias_constraint
   self.input_spec = base.InputSpec(min_ndim=2)
Example #30
0
def create_global_step(graph=None):
  """Create global step tensor in graph.

  Args:
    graph: The graph in which to create the global step. If missing, use default
        graph.

  Returns:
    Global step tensor.

  Raises:
    ValueError: if global step key is already defined.
  """
  graph = ops.get_default_graph() if graph is None else graph
  if get_global_step(graph) is not None:
    raise ValueError('"global_step" already exists.')
  # Create in proper graph and base name_scope.
  with graph.as_default() as g, g.name_scope(None):
    collections = [ops.GraphKeys.GLOBAL_VARIABLES, ops.GraphKeys.GLOBAL_STEP]
    return variable(
        ops.GraphKeys.GLOBAL_STEP,
        shape=[],
        dtype=dtypes.int64,
        initializer=init_ops.zeros_initializer(),
        trainable=False,
        collections=collections)
Example #31
0
def joint_weighted_sum_from_feature_columns(columns_to_tensors,
                                            feature_columns,
                                            num_outputs,
                                            weight_collections=None,
                                            trainable=True,
                                            scope=None):
    """A restricted linear prediction builder based on FeatureColumns.

  As long as all feature columns are unweighted sparse columns this computes the
  prediction of a linear model which stores all weights in a single variable.

  Args:
    columns_to_tensors: A mapping from feature column to tensors. 'string' key
      means a base feature (not-transformed). It can have FeatureColumn as a
      key too. That means that FeatureColumn is already transformed by input
      pipeline. For example, `inflow` may have handled transformations.
    feature_columns: A set containing all the feature columns. All items in the
      set should be instances of classes derived from FeatureColumn.
    num_outputs: An integer specifying number of outputs. Default value is 1.
    weight_collections: List of graph collections to which weights are added.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for variable_scope.

  Returns:
    A tuple containing:

    * A Tensor which represents predictions of a linear model.
    * A list of Variables storing the weights.
    * A Variable which is used for bias.

  Raises:
    ValueError: if FeatureColumn cannot be used for linear predictions.

  """
    check_feature_columns(feature_columns)
    with variable_scope.variable_scope(
            scope,
            default_name='joint_weighted_sum_from_feature_columns',
            values=columns_to_tensors.values()):
        transformer = _Transformer(columns_to_tensors)
        embedding_lookup_arguments = []
        for column in sorted(set(feature_columns), key=lambda x: x.key):
            transformed_tensor = transformer.transform(column)
            try:
                embedding_lookup_arguments.append(
                    column._wide_embedding_lookup_arguments(
                        transformed_tensor))  # pylint: disable=protected-access
            except NotImplementedError:
                raise NotImplementedError(
                    'Real-valued columns are not supported. '
                    'Use weighted_sum_from_feature_columns '
                    'instead, or bucketize these columns.')

        variable, predictions_no_bias = _create_joint_embedding_lookup(
            columns_to_tensors, embedding_lookup_arguments, num_outputs,
            trainable, weight_collections)
        bias = contrib_variables.model_variable(
            'bias_weight',
            shape=[num_outputs],
            initializer=init_ops.zeros_initializer(),
            trainable=trainable,
            collections=_add_variable_collection(weight_collections))
        _log_variable(bias)
        predictions = nn_ops.bias_add(predictions_no_bias, bias)

        return predictions, variable, bias
Example #32
0
def alexnet_v2(inputs,
               num_classes=1000,
               is_training=True,
               dropout_keep_prob=0.5,
               spatial_squeeze=True,
               scope='alexnet_v2'):
    """AlexNet version 2.

  Described in: http://arxiv.org/pdf/1404.5997v2.pdf
  Parameters from:
  github.com/akrizhevsky/cuda-convnet2/blob/master/layers/
  layers-imagenet-1gpu.cfg

  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 224x224. To use in fully
        convolutional mode, set spatial_squeeze to false.
        The LRN layers have been removed and change the initializers from
        random_normal_initializer to xavier_initializer.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.

  Returns:
    the last op containing the log predictions and end_points dict.
  """
    with variable_scope.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # Collect outputs for conv2d, fully_connected and max_pool2d.
        with arg_scope(
            [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d],
                outputs_collections=[end_points_collection]):
            net = layers.conv2d(inputs,
                                64, [11, 11],
                                4,
                                padding='VALID',
                                scope='conv1')
            net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool1')
            net = layers.conv2d(net, 192, [5, 5], scope='conv2')
            net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool2')
            net = layers.conv2d(net, 384, [3, 3], scope='conv3')
            net = layers.conv2d(net, 384, [3, 3], scope='conv4')
            net = layers.conv2d(net, 256, [3, 3], scope='conv5')
            net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool5')

            # Use conv2d instead of fully_connected layers.
            with arg_scope(
                [layers.conv2d],
                    weights_initializer=trunc_normal(0.005),
                    biases_initializer=init_ops.constant_initializer(0.1)):
                net = layers.conv2d(net,
                                    4096, [5, 5],
                                    padding='VALID',
                                    scope='fc6')
                net = layers_lib.dropout(net,
                                         dropout_keep_prob,
                                         is_training=is_training,
                                         scope='dropout6')
                net = layers.conv2d(net, 4096, [1, 1], scope='fc7')
                net = layers_lib.dropout(net,
                                         dropout_keep_prob,
                                         is_training=is_training,
                                         scope='dropout7')
                net = layers.conv2d(
                    net,
                    num_classes, [1, 1],
                    activation_fn=None,
                    normalizer_fn=None,
                    biases_initializer=init_ops.zeros_initializer(),
                    scope='fc8')

            # Convert end_points_collection into a end_point dict.
            end_points = utils.convert_collection_to_dict(
                end_points_collection)
            if spatial_squeeze:
                net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed')
                end_points[sc.name + '/fc8'] = net
            return net, end_points
Example #33
0
 def build(self, input_shape):
     self.my_var = self.add_variable(
         'my_var', [2, 2],
         initializer=init_ops.zeros_initializer())
Example #34
0
def moving_mean_variance(value, decay, collections=None, name=None):
    """Compute exponentially weighted moving {mean,variance} of a streaming value.

  The exponentially-weighting moving `mean_var` and `variance_var` are updated
  by `value` according to the following recurrence:

  ```python
  variance_var = decay * (variance_var + (1-decay) * (value - mean_var)**2)
  mean_var     = decay * mean_var + (1 - decay) * value
  ```

  Note: `mean_var` is updated *after* `variance_var`, i.e., `variance_var` uses
  the lag-`1` mean.

  For derivation justification, see equation 143 of:
    T. Finch, Feb 2009. "Incremental calculation of weighted mean and variance".
    http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf

  Unlike `assign_moving_mean_variance`, this function handles
  variable creation.

  Args:
    value: `float`-like `Tensor`. Same shape as `mean_var` and `variance_var`.
    decay: A `float`-like `Tensor`. The moving mean decay. Typically close to
      `1.`, e.g., `0.999`.
    collections: Python list of graph-collections keys to which the internal
      variables `mean_var` and `variance_var` are added.
      Default value is `[GraphKeys.GLOBAL_VARIABLES]`.
    name: Optional name of the returned operation.

  Returns:
    mean_var: `Variable` representing the `value`-updated exponentially weighted
      moving mean.
    variance_var: `Variable` representing the `value`-updated
      exponentially weighted moving variance.

  Raises:
    TypeError: if `value_var` does not have float type `dtype`.
    TypeError: if `value`, `decay` have different `base_dtype`.
  """
    if collections is None:
        collections = [ops.GraphKeys.GLOBAL_VARIABLES]
    with variable_scope.variable_scope(name, "moving_mean_variance",
                                       [value, decay]):
        value = ops.convert_to_tensor(value, name="value")
        base_dtype = value.dtype.base_dtype
        if not base_dtype.is_floating:
            raise TypeError(
                "value.base_dtype({}) does not have float type `dtype`.".
                format(base_dtype.name))
        decay = ops.convert_to_tensor(decay, dtype=base_dtype, name="decay")
        variance_var = variable_scope.get_variable(
            "moving_variance",
            shape=value.shape,
            dtype=value.dtype,
            initializer=init_ops.zeros_initializer(),
            trainable=False,
            collections=collections)
        mean_var = variable_scope.get_variable(
            "moving_mean",
            shape=value.shape,
            dtype=value.dtype,
            initializer=init_ops.zeros_initializer(),
            trainable=False,
            collections=collections)
        return assign_moving_mean_variance(mean_var, variance_var, value,
                                           decay)
Example #35
0
def conv2d(inputs,
                num_outputs,
                kernel_size,
                stride=1,
                padding='SAME',
                data_format=None,
                rate=1,
                activation_fn=nn.relu,
                normalizer_fn=None,
                normalizer_params=None,
                weights_initializer=initializers.xavier_initializer(),
                weights_regularizer=None,
                biases_initializer=init_ops.zeros_initializer(),
                biases_regularizer=None,
                reuse=None,
                variables_collections=None,
                outputs_collections=None,
                trainable=True,
                scope=None,
                quantizer=None,
                weight_quantizer = None):
  """ function call from slim library.
  """
  if data_format not in [None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC', 'NCDHW']:
    raise ValueError('Invalid data_format: %r' % (data_format,))

  layer_variable_getter = layers._build_variable_getter(
      {'bias': 'biases', 'kernel': 'weights'})

  with variable_scope.variable_scope(
      scope, 'Conv', [inputs], reuse=reuse,
      custom_getter=layer_variable_getter) as sc:
    inputs = ops.convert_to_tensor(inputs)
    input_rank = inputs.get_shape().ndims


    if input_rank == 4:
      layer_class = QConv2D #convolutional.Conv2D
    else:
      raise ValueError('Convolution not supported for input with rank',
                       input_rank)

    df = ('channels_first' if data_format and data_format.startswith('NC')
          else 'channels_last')
    layer = layer_class(filters=num_outputs,
                        kernel_size=kernel_size,
                        strides=stride,
                        padding=padding,
                        data_format=df,
                        dilation_rate=rate,
                        activation=None,
                        use_bias=not normalizer_fn and biases_initializer,
                        kernel_initializer=weights_initializer,
                        bias_initializer=biases_initializer,
                        kernel_regularizer=weights_regularizer,
                        bias_regularizer=biases_regularizer,
                        activity_regularizer=None,
                        trainable=trainable,
                        name=sc.name,
                        dtype=inputs.dtype.base_dtype,
                        _scope=sc,
                        _reuse=reuse,
                        quantizer = quantizer,
                        weight_quantizer=weight_quantizer)
    outputs = layer.apply(inputs)

    # Add variables to collections.
    layers._add_variable_to_collections(layer.kernel, variables_collections, 'weights')
    if layer.use_bias:
      layers._add_variable_to_collections(layer.bias, variables_collections, 'biases')


    if normalizer_fn is not None:
      normalizer_params = normalizer_params or {}
      outputs = normalizer_fn(outputs, **normalizer_params)
      if quantizer is not None:         # quantize after normalization
        outputs = quantizer.quantize(outputs)


    if activation_fn is not None:
      outputs = activation_fn(outputs)
      if quantizer is not None:         # quantize after activation
        outputs = quantizer.quantize(outputs)
    return slim_utils.collect_named_outputs(outputs_collections,
                                       sc.original_name_scope, outputs)
Example #36
0
def weighted_sum_from_feature_columns(columns_to_tensors,
                                      feature_columns,
                                      num_outputs,
                                      weight_collections=None,
                                      trainable=True,
                                      scope=None):
    """A tf.contrib.layer style linear prediction builder based on FeatureColumns.

  Generally a single example in training data is described with feature columns.
  This function generates weighted sum for each num_outputs. Weighted sum refers
  to logits in classification problems. It refers to prediction itself for
  linear regression problems.

  Example:

    ```
    # Building model for training
    feature_columns = (
        real_valued_column("my_feature1"),
        ...
    )
    columns_to_tensor = tf.parse_example(...)
    logits = weighted_sum_from_feature_columns(
        columns_to_tensors=columns_to_tensor,
        feature_columns=feature_columns,
        num_outputs=1)
    loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels,
                                                   logits=logits)
    ```

  Args:
    columns_to_tensors: A mapping from feature column to tensors. 'string' key
      means a base feature (not-transformed). It can have FeatureColumn as a
      key too. That means that FeatureColumn is already transformed by input
      pipeline. For example, `inflow` may have handled transformations.
    feature_columns: A set containing all the feature columns. All items in the
      set should be instances of classes derived from FeatureColumn.
    num_outputs: An integer specifying number of outputs. Default value is 1.
    weight_collections: List of graph collections to which weights are added.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for variable_scope.

  Returns:
    A tuple containing:

      * A Tensor which represents predictions of a linear model.
      * A dictionary which maps feature_column to corresponding Variable.
      * A Variable which is used for bias.

  Raises:
    ValueError: if FeatureColumn cannot be used for linear predictions.
  """
    check_feature_columns(feature_columns)
    with variable_scope.variable_scope(
            scope,
            default_name='weighted_sum_from_feature_columns',
            values=columns_to_tensors.values()):
        output_tensors = []
        column_to_variable = dict()
        transformer = _Transformer(columns_to_tensors)
        # pylint: disable=protected-access
        for column in sorted(set(feature_columns), key=lambda x: x.key):
            transformed_tensor = transformer.transform(column)
            try:
                embedding_lookup_arguments = column._wide_embedding_lookup_arguments(
                    transformed_tensor)
                variable, predictions = _create_embedding_lookup(
                    column, columns_to_tensors, embedding_lookup_arguments,
                    num_outputs, trainable, weight_collections)
            except NotImplementedError:
                with variable_scope.variable_scope(
                        None,
                        default_name=column.name,
                        values=columns_to_tensors.values()):
                    tensor = column._to_dense_tensor(transformed_tensor)
                    tensor = fc._reshape_real_valued_tensor(
                        tensor, 2, column.name)
                    variable = [
                        contrib_variables.model_variable(
                            name='weight',
                            shape=[tensor.get_shape()[1], num_outputs],
                            initializer=init_ops.zeros_initializer(),
                            trainable=trainable,
                            collections=weight_collections)
                    ]
                    predictions = math_ops.matmul(tensor,
                                                  variable[0],
                                                  name='matmul')
            except ValueError as ee:
                raise ValueError(
                    'Error creating weighted sum for column: {}.\n'
                    '{}'.format(column.name, ee))
            output_tensors.append(
                array_ops.reshape(predictions, shape=(-1, num_outputs)))
            column_to_variable[column] = variable
            _log_variable(variable)
            _maybe_restore_from_checkpoint(column._checkpoint_path(), variable)
        # pylint: enable=protected-access
        predictions_no_bias = math_ops.add_n(output_tensors)
        bias = contrib_variables.model_variable(
            'bias_weight',
            shape=[num_outputs],
            initializer=init_ops.zeros_initializer(),
            trainable=trainable,
            collections=_add_variable_collection(weight_collections))
        _log_variable(bias)
        predictions = nn_ops.bias_add(predictions_no_bias, bias)

        return predictions, column_to_variable, bias