예제 #1
0
def sequence_softmax(inputs, noutput, scope=None, name=None, linear_name=None):
  """Run a softmax layer over all the time steps of an input sequence.

  Args:
    inputs: (length, batch_size, depth) tensor
    noutput: output depth
    scope: optional scope name
    name: optional name for output tensor
    linear_name: name for linear (pre-softmax) output

  Returns:
    A tensor of size (length, batch_size, noutput).

  """
  length, _, ninputs = _shape(inputs)
  inputs_u = array_ops.unstack(inputs)
  output_u = []
  with variable_scope.variable_scope(scope, "SequenceSoftmax", [inputs]):
    initial_w = random_ops.truncated_normal([0 + ninputs, noutput], stddev=0.1)
    initial_b = constant_op.constant(0.1, shape=[noutput])
    w = variables.model_variable("weights", initializer=initial_w)
    b = variables.model_variable("biases", initializer=initial_b)
    for i in xrange(length):
      with variable_scope.variable_scope(scope, "SequenceSoftmaxStep",
                                         [inputs_u[i]]):
        # TODO(tmb) consider using slim.fully_connected(...,
        # activation_fn=tf.nn.softmax)
        linear = nn_ops.xw_plus_b(inputs_u[i], w, b, name=linear_name)
        output = nn_ops.softmax(linear)
        output_u += [output]
    outputs = array_ops.stack(output_u, name=name)
  return outputs
예제 #2
0
 def testGetLocalVariables(self):
   with self.test_session():
     with variable_scope.variable_scope('A'):
       _ = variables_lib2.model_variable('a', [5])
     with variable_scope.variable_scope('B'):
       _ = variables_lib2.model_variable('a', [5])
     self.assertEquals([], variables_lib2.get_local_variables('A'))
     self.assertEquals([], variables_lib2.get_local_variables('B'))
예제 #3
0
 def testGetModelVariables(self):
   with self.test_session():
     with variable_scope.variable_scope('A'):
       a = variables_lib2.model_variable('a', [5])
     with variable_scope.variable_scope('B'):
       b = variables_lib2.model_variable('a', [5])
     self.assertEquals([a], variables_lib2.get_model_variables('A'))
     self.assertEquals([b], variables_lib2.get_model_variables('B'))
예제 #4
0
  def testVariableWithVariableDeviceChooser(self):

    with ops.Graph().as_default():
      device_fn = variables_lib2.VariableDeviceChooser()
      with arg_scope([variables_lib2.model_variable], device=device_fn):
        a = variables_lib2.model_variable('a', [5])
        b = variables_lib2.model_variable('b', [20])
        self.assertDeviceEqual(a.device, 'cpu:0')
        self.assertEqual(a.initial_value.op.colocation_groups(),
                         a.op.colocation_groups())
        self.assertDeviceEqual(b.device, 'cpu:0')
        self.assertEqual(a.initial_value.op.colocation_groups(),
                         a.op.colocation_groups())
예제 #5
0
def _model_variable_getter(getter,
                           name,
                           shape=None,
                           dtype=None,
                           initializer=None,
                           regularizer=None,
                           trainable=True,
                           collections=None,
                           caching_device=None,
                           partitioner=None,
                           rename=None,
                           use_resource=None,
                           **_):
  """Getter that uses model_variable for compatibility with core layers."""
  short_name = name.split('/')[-1]
  if rename and short_name in rename:
    name_components = name.split('/')
    name_components[-1] = rename[short_name]
    name = '/'.join(name_components)
  return variables.model_variable(
      name,
      shape=shape,
      dtype=dtype,
      initializer=initializer,
      regularizer=regularizer,
      collections=collections,
      trainable=trainable,
      caching_device=caching_device,
      partitioner=partitioner,
      custom_getter=getter,
      use_resource=use_resource)
예제 #6
0
 def testNotInLocalVariables(self):
   with self.test_session():
     with variable_scope.variable_scope('A'):
       a = variables_lib2.model_variable('a', [5])
       self.assertTrue(a in variables_lib.global_variables())
       self.assertTrue(a in ops.get_collection(ops.GraphKeys.MODEL_VARIABLES))
       self.assertFalse(a in variables_lib.local_variables())
예제 #7
0
 def testNameAndShape(self):
   with self.test_session():
     with variable_scope.variable_scope('A'):
       a = variables_lib2.model_variable('a', [5])
       self.assertEquals(a.op.name, 'A/a')
       self.assertListEqual(a.get_shape().as_list(), [5])
       self.assertListEqual([a], variables_lib2.get_model_variables('A'))
예제 #8
0
def bow_encoder(ids,
                vocab_size,
                embed_dim,
                sparse_lookup=True,
                initializer=None,
                regularizer=None,
                trainable=True,
                scope=None,
                reuse=None):
  """Maps a sequence of symbols to a vector per example by averaging embeddings.

  Args:
    ids: `[batch_size, doc_length]` `Tensor` or `SparseTensor` of type
      `int32` or `int64` with symbol ids.
    vocab_size: Integer number of symbols in vocabulary.
    embed_dim: Integer number of dimensions for embedding matrix.
    sparse_lookup: `bool`, if `True`, converts ids to a `SparseTensor`
        and performs a sparse embedding lookup. This is usually faster,
        but not desirable if padding tokens should have an embedding. Empty rows
        are assigned a special embedding.
    initializer: An initializer for the embeddings, if `None` default for
        current scope is used.
    regularizer: Optional regularizer for the embeddings.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional string specifying the variable scope for the op, required
        if `reuse=True`.
    reuse: If `True`, variables inside the op will be reused.

  Returns:
    Encoding `Tensor` `[batch_size, embed_dim]` produced by
    averaging embeddings.

  Raises:
    ValueError: If `embed_dim` or `vocab_size` are not specified.
  """
  if not vocab_size or not embed_dim:
    raise ValueError('Must specify vocab size and embedding dimension')
  with variable_scope.variable_scope(
      scope, 'bow_encoder', [ids], reuse=reuse):
    embeddings = variables.model_variable(
        'embeddings', shape=[vocab_size, embed_dim],
        initializer=initializer, regularizer=regularizer,
        trainable=trainable)
    if sparse_lookup:
      if isinstance(ids, sparse_tensor.SparseTensor):
        sparse_ids = ids
      else:
        sparse_ids = sparse_ops.dense_to_sparse_tensor(ids)
      return contrib_embedding_ops.safe_embedding_lookup_sparse(
          [embeddings], sparse_ids, combiner='mean', default_id=0)
    else:
      if isinstance(ids, sparse_tensor.SparseTensor):
        raise TypeError('ids are expected to be dense Tensor, got: %s', ids)
      return math_ops.reduce_mean(
          embedding_ops.embedding_lookup(embeddings, ids),
          reduction_indices=1)
def l2_normalization(
        inputs,
        scaling=False,
        scale_initializer=init_ops.ones_initializer(),
        reuse=None,
        variables_collections=None,
        outputs_collections=None,
        trainable=True,
        scope=None):
    """Implement L2 normalization on every feature (i.e. spatial normalization).

    Should be extended in some near future to other dimensions, providing a more
    flexible normalization framework.

    inputs: a 4-D tensor with dimensions [batch_size, height, width, channels].
    scaling: whether or not to add a post scaling operation along the dimensions
      which have been normalized.
    scale_initializer: An initializer for the weights.
    reuse: whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: optional list of collections for all the variables or
      a dictionary containing a different list of collection per variable.
    outputs_collections: collection to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for `variable_scope`.
    Returns:
      A `Tensor` representing the output of the operation.
    """

    with variable_scope.variable_scope(
            scope, 'L2Normalization', [inputs], reuse=reuse) as sc:

        inputs_shape = inputs.get_shape()
        inputs_rank = inputs_shape.ndims
        params_shape = inputs_shape[-1:]
        dtype = inputs.dtype.base_dtype

        # Normalize along spatial dimensions.
        norm_dim = tf.range(1, inputs_rank-1)
        outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12)
        # Additional scaling.
        if scaling:
            scale_collections = utils.get_variable_collections(
                variables_collections, 'scale')
            scale = variables.model_variable('gamma',
                                             shape=params_shape,
                                             dtype=dtype,
                                             initializer=scale_initializer,
                                             collections=scale_collections,
                                             trainable=trainable)
            outputs = tf.multiply(outputs, scale)
        return utils.collect_named_outputs(outputs_collections,
                                           sc.original_name_scope, outputs)
예제 #10
0
  def testDeviceFn(self):

    class DevFn(object):

      def __init__(self):
        self.counter = -1

      def __call__(self, op):
        self.counter += 1
        return '/cpu:%d' % self.counter

    with ops.Graph().as_default():
      with arg_scope([variables_lib2.model_variable], device=DevFn()):
        a = variables_lib2.model_variable('a', [5])
        b = variables_lib2.model_variable('b', [20])
        self.assertDeviceEqual(a.device, '/cpu:0')
        self.assertEqual(a.initial_value.op.colocation_groups(),
                         a.op.colocation_groups())
        self.assertDeviceEqual(b.device, '/cpu:1')
        self.assertEqual(b.initial_value.op.colocation_groups(),
                         b.op.colocation_groups())
예제 #11
0
def embed_sequence(ids,
                   vocab_size=None,
                   embed_dim=None,
                   unique=False,
                   initializer=None,
                   regularizer=None,
                   trainable=True,
                   scope=None,
                   reuse=None):
  """Maps a sequence of symbols to a sequence of embeddings.

  Typical use case would be reusing embeddings between an encoder and decoder.

  Args:
    ids: `[batch_size, doc_length]` `Tensor` of type `int32` or `int64`
      with symbol ids.
    vocab_size: Integer number of symbols in vocabulary.
    embed_dim: Integer number of dimensions for embedding matrix.
    unique: If `True`, will first compute the unique set of indices, and then
         lookup each embedding once, repeating them in the output as needed.
    initializer: An initializer for the embeddings, if `None` default for
        current scope is used.
    regularizer: Optional regularizer for the embeddings.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
    scope: Optional string specifying the variable scope for the op, required
        if `reuse=True`.
    reuse: If `True`, variables inside the op will be reused.

  Returns:
    `Tensor` of `[batch_size, doc_length, embed_dim]` with embedded sequences.

  Raises:
    ValueError: if `embed_dim` or `vocab_size` are not specified when 
      `reuse` is `None` or `False`.
  """
  if not (reuse or (vocab_size and embed_dim)):
    raise ValueError('Must specify vocab size and embedding dimension when not'
                     'reusing. Got vocab_size=%s and embed_dim=%s' % (
                         vocab_size, embed_dim))
  with variable_scope.variable_scope(
      scope, 'EmbedSequence', [ids], reuse=reuse):
    shape = [vocab_size, embed_dim]
    if reuse and vocab_size is None or embed_dim is None:
      shape = None
    embeddings = variables.model_variable(
        'embeddings', shape=shape,
        initializer=initializer, regularizer=regularizer,
        trainable=trainable)
    if unique:
      return contrib_embedding_ops.embedding_lookup_unique(embeddings, ids)
    return embedding_ops.embedding_lookup(embeddings, ids)
예제 #12
0
def bias_add(inputs,
             activation_fn=None,
             initializer=init_ops.zeros_initializer,
             regularizer=None,
             reuse=None,
             variables_collections=None,
             outputs_collections=None,
             trainable=True,
             scope=None):
  """Adds a bias to the inputs.

  Can be used as a normalizer function for conv2d and fully_connected.

  Args:
    inputs: a tensor of with at least rank 2 and value for the last dimension,
      e.g. `[batch_size, depth]`, `[None, None, None, depth]`.
    activation_fn: Optional activation function.
    initializer: An initializer for the bias, defaults to 0.
    regularizer: A regularizer like the result of
      `l1_regularizer` or `l2_regularizer`.
    reuse: whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: optional collections for the variables.
    outputs_collections: collections to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for variable_op_scope.

  Returns:
    a tensor representing the result of adding biases to the inputs.
  """
  with variable_scope.variable_op_scope([inputs],
                                        scope, 'BiasAdd', reuse=reuse) as sc:
    inputs = ops.convert_to_tensor(inputs)
    dtype = inputs.dtype.base_dtype
    num_features = utils.last_dimension(inputs.get_shape(), min_rank=2)
    biases_collections = utils.get_variable_collections(variables_collections,
                                                        'biases')
    biases = variables.model_variable('biases',
                                      shape=[num_features,],
                                      dtype=dtype,
                                      initializer=initializer,
                                      regularizer=regularizer,
                                      collections=biases_collections,
                                      trainable=trainable)
    outputs = nn.bias_add(inputs, biases)
    if activation_fn:
      outputs = activation_fn(outputs)
    return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
예제 #13
0
def _create_joint_embedding_lookup(columns_to_tensors,
                                   embedding_lookup_arguments,
                                   num_outputs,
                                   trainable,
                                   weight_collections):
  """Creates an embedding lookup for all columns sharing a single weight."""
  for arg in embedding_lookup_arguments:
    assert arg.weight_tensor is None, (
        'Joint sums for weighted sparse columns are not supported. '
        'Please use weighted_sum_from_feature_columns instead.')
    assert arg.combiner == 'sum', (
        'Combiners other than sum are not supported for joint sums. '
        'Please use weighted_sum_from_feature_columns instead.')
  assert len(embedding_lookup_arguments) >= 1, (
      'At least one column must be in the model.')
  prev_size = 0
  sparse_tensors = []
  for a in embedding_lookup_arguments:
    t = a.input_tensor
    values = t.values + prev_size
    prev_size += a.vocab_size
    sparse_tensors.append(
        ops.SparseTensor(t.indices,
                         values,
                         t.shape))
  sparse_tensor = sparse_ops.sparse_concat(1, sparse_tensors)
  with variable_scope.variable_scope(
      None, default_name='linear_weights', values=columns_to_tensors.values()):
    variable = contrib_variables.model_variable(
        name='weights',
        shape=[prev_size, num_outputs],
        dtype=dtypes.float32,
        initializer=init_ops.zeros_initializer,
        trainable=trainable,
        collections=weight_collections)
    if isinstance(variable, variables.Variable):
      variable = [variable]
    else:
      variable = variable._get_variable_list()  # pylint: disable=protected-access
    predictions = embedding_ops.safe_embedding_lookup_sparse(
        variable,
        sparse_tensor,
        sparse_weights=None,
        default_id=0,
        combiner='sum',
        name='_weights')
    return variable, predictions
예제 #14
0
def _create_embedding_lookup(column,
                             columns_to_tensors,
                             embedding_lookup_arguments,
                             num_outputs,
                             trainable,
                             weight_collections):
  """Creates variables and returns predictions for linear weights in a model.

  Args:
   column: the column we're working on.
   columns_to_tensors: a map from column name to tensors.
   embedding_lookup_arguments: arguments for embedding lookup.
   num_outputs: how many outputs.
   trainable: whether the variable we create is trainable.
   weight_collections: weights will be placed here.

  Returns:
  variables: the created embeddings.
  predictions: the computed predictions.
  """
  with variable_scope.variable_scope(
      None, default_name=column.name, values=columns_to_tensors.values()):
    variable = contrib_variables.model_variable(
        name='weights',
        shape=[embedding_lookup_arguments.vocab_size, num_outputs],
        dtype=dtypes.float32,
        initializer=embedding_lookup_arguments.initializer,
        trainable=trainable,
        collections=weight_collections)
    if isinstance(variable, variables.Variable):
      variable = [variable]
    else:
      variable = variable._get_variable_list()  # pylint: disable=protected-access
    predictions = embedding_ops.safe_embedding_lookup_sparse(
        variable,
        embedding_lookup_arguments.input_tensor,
        sparse_weights=embedding_lookup_arguments.weight_tensor,
        default_id=0,
        combiner=embedding_lookup_arguments.combiner,
        name=column.name + '_weights')
    return variable, predictions
예제 #15
0
    def part_fn(self, feature_part, n, reuse):
        name = self.name + '/separable_split/' + n
        with tf.variable_scope(name, reuse=reuse):
            # shape = list(map(int, (*feature_part.shape[1:], 1)))
            in_chs = int(feature_part.shape[-1])
            # depthwise_filter: [filter_height, filter_width, in_channels, channel_multiplier].
            # Contains in_channels convolutional filters of depth 1.
            depthwise_shape = [self.kernel_size, self.kernel_size, in_chs, 1]
            depthwise_filter = tf.get_variable(name + '/depthwise_filter',
                                               depthwise_shape, tf.float32,
                                               self.depthwise_initializer)
            # depthwise_filter = tf.Variable(self.kernel_initializer(depthwise_shape))
            # pointwise_filter: [1, 1, channel_multiplier * in_channels, out_channels].
            # Pointwise filter to mix channels after depthwise_filter has convolved spatially.
            pointwise_shape = [1, 1, in_chs, in_chs]
            # pointwise_filter = tf.Variable(self.pointwise_initializer(pointwise_shape))
            pointwise_filter = tf.get_variable(name + 'pointwise_filter',
                                               pointwise_shape, tf.float32,
                                               self.kernel_initializer())

            outputs = tf.nn.separable_conv2d(
                feature_part,
                depthwise_filter=depthwise_filter,
                pointwise_filter=pointwise_filter,
                strides=self.stride,
                padding=self.padding,
            )
            if self.biases_initializer is not None:
                biases = variables.model_variable(
                    'biases' + n,
                    shape=[
                        in_chs,
                    ],
                    dtype=feature_part.dtype,
                    initializer=self.biases_initializer,
                )
                outputs = nn.bias_add(outputs, biases)
            outputs = self.act_fn(outputs)
        return outputs
예제 #16
0
 def spatial_normalization(self, inputs):
     with variable_scope.variable_scope(None,
                                        'L2Normalization', [inputs],
                                        reuse=None) as sc:
         inputs_shape = inputs.get_shape()
         inputs_rank = inputs_shape.ndims
         norm_dim = tf.range(inputs_rank - 1, inputs_rank)
         params_shape = inputs_shape[-1:]
         # Normalize along spatial dimensions.
         outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12)
         # Additional scaling.
         scale_collections = utils.get_variable_collections(None, 'scale')
         scale = variables.model_variable(
             'gamma',
             shape=params_shape,
             dtype=inputs.dtype.base_dtype,
             initializer=init_ops.ones_initializer(),
             collections=scale_collections,
             trainable=True)
         outputs = tf.multiply(outputs, scale)
         return utils.collect_named_outputs(None, sc.original_name_scope,
                                            outputs)
예제 #17
0
def _create_embedding_lookup(column, columns_to_tensors,
                             embedding_lookup_arguments, num_outputs,
                             trainable, weight_collections):
    """Creates variables and returns predictions for linear weights in a model.

  Args:
   column: the column we're working on.
   columns_to_tensors: a map from column name to tensors.
   embedding_lookup_arguments: arguments for embedding lookup.
   num_outputs: how many outputs.
   trainable: whether the variable we create is trainable.
   weight_collections: weights will be placed here.

  Returns:
  variables: the created embeddings.
  predictions: the computed predictions.
  """
    with variable_scope.variable_scope(None,
                                       default_name=column.name,
                                       values=columns_to_tensors.values()):
        variable = contrib_variables.model_variable(
            name='weights',
            shape=[embedding_lookup_arguments.vocab_size, num_outputs],
            dtype=dtypes.float32,
            initializer=embedding_lookup_arguments.initializer,
            trainable=trainable,
            collections=weight_collections)
        if fc._is_variable(variable):  # pylint: disable=protected-access
            variable = [variable]
        else:
            variable = variable._get_variable_list()  # pylint: disable=protected-access
        predictions = embedding_ops.safe_embedding_lookup_sparse(
            variable,
            embedding_lookup_arguments.input_tensor,
            sparse_weights=embedding_lookup_arguments.weight_tensor,
            combiner=embedding_lookup_arguments.combiner,
            name=column.name + '_weights')
        return variable, predictions
예제 #18
0
    def init_state(self, state_name, batch_size, dtype, learned_state=False):
        """Creates an initial state compatible with this cell.

    Args:
      state_name: name of the state tensor
      batch_size: model batch size
      dtype: dtype for the tensor values i.e. tf.float32
      learned_state: whether the initial state should be learnable. If false,
        the initial state is set to all 0's

    Returns:
      ret: the created initial state
    """
        state_size = (self.state_size_flat
                      if self._flatten_state else self.state_size)
        # list of 2 zero tensors or variables tensors,
        # depending on if learned_state is true
        # pylint: disable=g-long-ternary,g-complex-comprehension
        ret_flat = [(contrib_variables.model_variable(
            state_name + str(i),
            shape=s,
            dtype=dtype,
            initializer=tf.truncated_normal_initializer(
                stddev=0.03)) if learned_state else tf.zeros(
                    [batch_size] + s, dtype=dtype, name=state_name))
                    for i, s in enumerate(state_size)]

        # duplicates initial state across the batch axis if it's learned
        if learned_state:
            ret_flat = [
                tf.stack([tensor for i in range(int(batch_size))])
                for tensor in ret_flat
            ]
        for s, r in zip(state_size, ret_flat):
            r = tf.reshape(r, [-1] + s)
        ret = tf.nest.pack_sequence_as(structure=[1, 1],
                                       flat_sequence=ret_flat)
        return ret
예제 #19
0
def _conv(x,shape,stride,padding,dilation_rate=None,w_name='w',b_name='b',
          std=0.01,wd=None,dtype=tf.float32,add_bias=True,device=None):
  """
  Define a Convolutional layer with (optional) bias term.
  For documentation, see `conv_block`.

  If DILATION_RATE is specified, ATROU-conv is used.
    In this case, the STRIDE parameter is ignored, as the
    stride is set to one.
  """
  w = _variable_with_weight_decay(w_name,shape=shape,stddev=std,
                                  wd=wd,dtype=dtype,device=device)
  if dilation_rate is None:
    out = tf.nn.conv2d(x,w,strides=stride,padding=padding)
  else:
    out = tf.nn.atrous_conv2d(x,w,dilation_rate,padding=padding)
  # [optional] bias:
  if add_bias:
    b = variables.model_variable(b_name,shape=shape[-1:],dtype=dtype,
                                initializer=tf.constant_initializer(0.0),
                                device=device)
    out = tf.nn.bias_add(out,b)
  return out
예제 #20
0
def add_bias(x, n_units, biases_initializer, dtype, trainable):
    # Initializer
    biases_shape = [n_units]
    if biases_initializer is None:
        biases_initializer = tf.constant_initializer(0.0, dtype=tf.float32)
    elif isinstance(biases_initializer, np.ndarray):
        if biases_initializer.ndim != 1 or biases_initializer.shape[
                0] != biases_shape[0]:
            raise ValueError('Shape of constant initializer (' +
                             str(biases_initializer.shape) +
                             ') does not match expected shape (' +
                             str(biases_shape) + '). ')
        biases_shape = None  # Shape is inferred from initializer

    # Create variable for bias
    biases = variables.model_variable('biases',
                                      shape=biases_shape,
                                      dtype=dtype,
                                      initializer=biases_initializer,
                                      trainable=trainable)

    # Add bias
    return tf.nn.bias_add(x, biases)
예제 #21
0
def l2_normalization(inputs,
                     scaling=False,
                     scale_initializer=init_ops.ones_initializer(),
                     reuse=None,
                     variables_collections=None,
                     outputs_collections=None,
                     trainable=True,
                     scope=None):
    """
    conv4_3需要先进行l2正则,以减小该层和后面的误差
    """
    with variable_scope.variable_scope(scope,
                                       'L2Normalization', [inputs],
                                       reuse=reuse) as sc:
        inputs_shape = inputs.get_shape()
        inputs_rank = inputs_shape.ndims
        dtype = inputs.dtype.base_dtype
        norm_dim = tf.range(inputs_rank - 1, inputs_rank)
        params_shape = inputs_shape[-1:]

        # Normalize along spatial dimensions.
        outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12)
        # Additional scaling.
        if scaling:
            scale_collections = utils.get_variable_collections(
                variables_collections, 'scale')
            scale = variables.model_variable('gamma',
                                             shape=params_shape,
                                             dtype=dtype,
                                             initializer=scale_initializer,
                                             collections=scale_collections,
                                             trainable=trainable)

            outputs = tf.multiply(outputs, scale)

        return utils.collect_named_outputs(outputs_collections,
                                           sc.original_name_scope, outputs)
예제 #22
0
  def init_state(self, state_name, batch_size, dtype, learned_state=False):
    """Creates an initial state compatible with this cell.

    Args:
      state_name: name of the state tensor
      batch_size: model batch size
      dtype: dtype for the tensor values i.e. tf.float32
      learned_state: whether the initial state should be learnable. If false,
        the initial state is set to all 0's

    Returns:
      The created initial state.
    """
    state_size = (
        self.state_size_flat if self._flattened_state else self.state_size)
    # list of 2 zero tensors or variables tensors, depending on if
    # learned_state is true
    ret_flat = [(variables.model_variable(
        state_name + str(i),
        shape=s,
        dtype=dtype,
        initializer=tf.truncated_normal_initializer(stddev=0.03))
                 if learned_state else tf.zeros(
                     [batch_size] + s, dtype=dtype, name=state_name))
                for i, s in enumerate(state_size)]

    # duplicates initial state across the batch axis if it's learned
    if learned_state:
      ret_flat = [
          tf.stack([tensor
                    for i in range(int(batch_size))])
          for tensor in ret_flat
      ]
    for s, r in zip(state_size, ret_flat):
      r.set_shape([None] + s)
    return tf.contrib.framework.nest.pack_sequence_as(
        structure=[1, 1], flat_sequence=ret_flat)
예제 #23
0
  def testSeparableConvWithResourceVar(self):
    graph = ops.Graph()
    with graph.as_default():
      with variable_scope.variable_scope('', use_resource=True):
        batch_size, height, width, depth = 5, 128, 128, 3
        input1 = array_ops.zeros((batch_size, height, width, depth))
        kernel_size, depth_multiplier = 3, 1
        depthwise_shape = [kernel_size, kernel_size, depth, depth_multiplier]
        depthwise_weights = variables.model_variable(
            'depthwise_weights', shape=depthwise_shape)
        strides = [1, 1, 1, 1]
        with variable_scope.variable_scope('depthwise_conv_1'):
          conv1 = nn.depthwise_conv2d(
              input1, depthwise_weights, strides, padding='SAME')
        with variable_scope.variable_scope('depthwise_conv_2'):
          conv2 = nn.depthwise_conv2d(
              conv1, depthwise_weights, strides, padding='SAME')
          math_ops.add(conv2, input1, name='add')

    quantize.Quantize(graph, True)

    # Test that the weights and activations of all convs have been quantized.
    quant_node_name = 'FakeQuantWithMinMaxVars'
    weights_quant = graph.get_operation_by_name(
        'depthwise_conv_1/weights_quant/' + quant_node_name)
    self.assertEqual(weights_quant.type, quant_node_name)
    act_quant = graph.get_operation_by_name('depthwise_conv_1/act_quant/' +
                                            quant_node_name)
    self.assertEqual(act_quant.type, quant_node_name)

    weights_quant = graph.get_operation_by_name(
        'depthwise_conv_2/weights_quant/' + quant_node_name)
    self.assertEqual(weights_quant.type, quant_node_name)
    act_quant = graph.get_operation_by_name('depthwise_conv_2/act_quant/' +
                                            quant_node_name)
    self.assertEqual(act_quant.type, quant_node_name)
예제 #24
0
def embedding(x,
              vocab_dim,
              emb_dim,
              trainable=True,
              dtype=tf.float32,
              initializer=None,
              activation_collection=tf.GraphKeys.ACTIVATIONS,
              variable_collection=tf.GraphKeys.MODEL_VARIABLES,
              scope='lookup'):
    if initializer is None:
        init_width = 0.5 / emb_dim
        initializer = tf.random_uniform_initializer(-init_width, init_width)
    W = variables.model_variable('embedding',
                                 shape=[vocab_dim, emb_dim],
                                 dtype=dtype,
                                 initializer=initializer,
                                 trainable=trainable)
    x = tf.nn.embedding_lookup(W, x, name=scope)
    if activation_collection is not None:
        tf.add_to_collection(activation_collection, x)
    if variable_collection is not None:
        tf.add_to_collection(variable_collection, W)

    return x
예제 #25
0
def joint_weighted_sum_from_feature_columns(columns_to_tensors,
                                            feature_columns,
                                            num_outputs,
                                            weight_collections=None,
                                            trainable=True,
                                            scope=None):
    """A restricted linear prediction builder based on FeatureColumns.

  As long as all feature columns are unweighted sparse columns this computes the
  prediction of a linear model which stores all weights in a single variable.

  Args:
    columns_to_tensors: A mapping from feature column to tensors. 'string' key
      means a base feature (not-transformed). It can have FeatureColumn as a
      key too. That means that FeatureColumn is already transformed by input
      pipeline. For example, `inflow` may have handled transformations.
    feature_columns: A set containing all the feature columns. All items in the
      set should be instances of classes derived from FeatureColumn.
    num_outputs: An integer specifying number of outputs. Default value is 1.
    weight_collections: List of graph collections to which weights are added.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for variable_scope.

  Returns:
    A tuple containing:

    * A Tensor which represents predictions of a linear model.
    * A list of Variables storing the weights.
    * A Variable which is used for bias.

  Raises:
    ValueError: if FeatureColumn cannot be used for linear predictions.

  """
    columns_to_tensors = columns_to_tensors.copy()
    check_feature_columns(feature_columns)
    with variable_scope.variable_scope(
            scope,
            default_name='joint_weighted_sum_from_feature_columns',
            values=columns_to_tensors.values()):
        transformer = _Transformer(columns_to_tensors)
        embedding_lookup_arguments = []
        for column in sorted(set(feature_columns), key=lambda x: x.key):
            transformed_tensor = transformer.transform(column)
            try:
                embedding_lookup_arguments.append(
                    column._wide_embedding_lookup_arguments(
                        transformed_tensor))  # pylint: disable=protected-access
            except NotImplementedError:
                raise NotImplementedError(
                    'Real-valued columns are not supported. '
                    'Use weighted_sum_from_feature_columns '
                    'instead, or bucketize these columns.')

        variable, predictions_no_bias = _create_joint_embedding_lookup(
            columns_to_tensors, embedding_lookup_arguments, num_outputs,
            trainable, weight_collections)
        bias = contrib_variables.model_variable(
            'bias_weight',
            shape=[num_outputs],
            initializer=init_ops.zeros_initializer(),
            trainable=trainable,
            collections=_add_variable_collection(weight_collections))
        _log_variable(bias)
        predictions = nn_ops.bias_add(predictions_no_bias, bias)

        return predictions, variable, bias
def batch_norm(inputs,
               decay=0.999,
               center=True,
               scale=False,
               epsilon=0.001,
               activation_fn=None,
               updates_collections=ops.GraphKeys.UPDATE_OPS,
               is_training=True,
               reuse=None,
               variables_collections=None,
               outputs_collections=None,
               trainable=True,
               scope=None):
  """Adds a Batch Normalization layer from http://arxiv.org/abs/1502.03167.
    "Batch Normalization: Accelerating Deep Network Training by Reducing
    Internal Covariate Shift"
    Sergey Ioffe, Christian Szegedy
  Can be used as a normalizer function for conv2d and fully_connected.
  Args:
    -inputs: a tensor of size `[batch_size, height, width, channels]`
            or `[batch_size, channels]`.
    -decay: decay for the moving average.
    -center: If True, subtract `beta`. If False, `beta` is ignored.
    -scale: If True, multiply by `gamma`. If False, `gamma` is
      not used. When the next layer is linear (also e.g. `nn.relu`), this can be
      disabled since the scaling can be done by the next layer.
    -epsilon: small float added to variance to avoid dividing by zero.
    -activation_fn: Optional activation function.
    -updates_collections: collections to collect the update ops for computation.
      If None, a control dependency would be added to make sure the updates are
      computed.
    -is_training: whether or not the layer is in training mode. In training mode
      it would accumulate the statistics of the moments into `moving_mean` and
      `moving_variance` using an exponential moving average with the given
      `decay`. When it is not in training mode then it would use the values of
      the `moving_mean` and the `moving_variance`.
    -reuse: whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    -variables_collections: optional collections for the variables.
    -outputs_collections: collections to add the outputs.
    -trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    -scope: Optional scope for `variable_op_scope`.
  Returns:
    a tensor representing the output of the operation.
  """
  with variable_scope.variable_op_scope([inputs],scope, 'BatchNorm', reuse=reuse) as sc:
    inputs_shape = inputs.get_shape()
    dtype = inputs.dtype.base_dtype
    axis = list(range(len(inputs_shape) - 1))
    params_shape = inputs_shape[-1:]
    # Allocate parameters for the beta and gamma of the normalization.
    beta, gamma = None, None
    if center:
      beta_collections = utils.get_variable_collections(variables_collections,'beta')
      beta = variables.model_variable('beta',shape=params_shape,dtype=dtype,initializer=init_ops.zeros_initializer,collections=beta_collections,trainable=trainable)
    if scale:
      gamma_collections = utils.get_variable_collections(variables_collections,'gamma')
      gamma = variables.model_variable('gamma',shape=params_shape,dtype=dtype,initializer=init_ops.ones_initializer,collections=gamma_collections,trainable=trainable)
    # Create moving_mean and moving_variance variables and add them to the
    # appropiate collections.
    moving_mean_collections = utils.get_variable_collections(variables_collections, 'moving_mean')
    moving_mean = variables.model_variable('moving_mean',shape=params_shape,dtype=dtype,initializer=init_ops.zeros_initializer,trainable=False,collections=moving_mean_collections)
    moving_variance_collections = utils.get_variable_collections(variables_collections, 'moving_variance')
    moving_variance = variables.model_variable('moving_variance',shape=params_shape,dtype=dtype,initializer=init_ops.ones_initializer,trainable=False,collections=moving_variance_collections)
    if is_training:
      # Calculate the moments based on the individual batch.
      mean, variance = nn.moments(inputs, axis, shift=moving_mean)
      # Update the moving_mean and moving_variance moments.
      update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, decay)
      update_moving_variance = moving_averages.assign_moving_average(moving_variance, variance, decay)
      if updates_collections is None:
        # Make sure the updates are computed here.
        with ops.control_dependencies([update_moving_mean,update_moving_variance]):
          outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon)
      else:
        # Collect the updates to be computed later.
        ops.add_to_collections(updates_collections, update_moving_mean)
        ops.add_to_collections(updates_collections, update_moving_variance)
        outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon)
    else:
      outputs = nn.batch_normalization(
          inputs, moving_mean, moving_variance, beta, gamma, epsilon)
    outputs.set_shape(inputs.get_shape())
    if activation_fn:
      outputs = activation_fn(outputs)
    return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
예제 #27
0
def fully_connected(inputs,
                    num_outputs,
                    activation_fn=nn.relu,
                    normalizer_fn=None,
                    normalizer_params=None,
                    weights_normalizer_fn=None,
                    weights_normalizer_params=None,
                    weights_initializer=initializers.xavier_initializer(),
                    weights_regularizer=None,
                    biases_initializer=init_ops.zeros_initializer(),
                    biases_regularizer=None,
                    reuse=None,
                    variables_collections=None,
                    outputs_collections=None,
                    trainable=True,
                    scope=None):
    # Be copied and modified from tensorflow-0.12.0.contrib.layer.fully_connected,
    # add weights_nomalizer_* options.
    """Adds a fully connected layer.

    `fully_connected` creates a variable called `weights`, representing a fully
    connected weight matrix, which is multiplied by the `inputs` to produce a
    `Tensor` of hidden units. If a `normalizer_fn` is provided (such as
    `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is
    None and a `biases_initializer` is provided then a `biases` variable would be
    created and added the hidden units. Finally, if `activation_fn` is not `None`,
    it is applied to the hidden units as well.

    Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened
    prior to the initial matrix multiply by `weights`.

    Args:
      inputs: A tensor of with at least rank 2 and value for the last dimension,
        i.e. `[batch_size, depth]`, `[None, None, None, channels]`.
      num_outputs: Integer or long, the number of output units in the layer.
      activation_fn: activation function, set to None to skip it and maintain
        a linear activation.
      normalizer_fn: normalization function to use instead of `biases`. If
        `normalizer_fn` is provided then `biases_initializer` and
        `biases_regularizer` are ignored and `biases` are not created nor added.
        default set to None for no normalizer function
      normalizer_params: normalization function parameters.
      weights_normalizer_fn: weights normalization function.
      weights_normalizer_params: weights normalization function parameters.
      weights_initializer: An initializer for the weights.
      weights_regularizer: Optional regularizer for the weights.
      biases_initializer: An initializer for the biases. If None skip biases.
      biases_regularizer: Optional regularizer for the biases.
      reuse: whether or not the layer and its variables should be reused. To be
        able to reuse the layer scope must be given.
      variables_collections: Optional list of collections for all the variables or
        a dictionary containing a different list of collections per variable.
      outputs_collections: collection to add the outputs.
      trainable: If `True` also add variables to the graph collection
        `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
      scope: Optional scope for variable_scope.

    Returns:
       the tensor variable representing the result of the series of operations.

    Raises:
      ValueError: if x has rank less than 2 or if its last dimension is not set.
    """
    if not (isinstance(num_outputs, six.integer_types)):
        raise ValueError('num_outputs should be int or long, got %s.',
                         num_outputs)
    with variable_scope.variable_scope(scope,
                                       'fully_connected', [inputs],
                                       reuse=reuse) as sc:
        inputs = ops.convert_to_tensor(inputs)
        dtype = inputs.dtype.base_dtype
        inputs_shape = inputs.get_shape()
        num_input_units = utils.last_dimension(inputs_shape, min_rank=2)

        static_shape = inputs_shape.as_list()
        static_shape[-1] = num_outputs

        out_shape = array_ops.unpack(array_ops.shape(inputs),
                                     len(static_shape))
        out_shape[-1] = num_outputs

        weights_shape = [num_input_units, num_outputs]
        weights_collections = utils.get_variable_collections(
            variables_collections, 'weights')
        weights = variables.model_variable('weights',
                                           shape=weights_shape,
                                           dtype=dtype,
                                           initializer=weights_initializer,
                                           regularizer=weights_regularizer,
                                           collections=weights_collections,
                                           trainable=trainable)
        if weights_normalizer_fn is not None:
            weights_normalizer_params = weights_normalizer_params or {}
            weights = weights_normalizer_fn(weights,
                                            **weights_normalizer_params)
        if len(static_shape) > 2:
            # Reshape inputs
            inputs = array_ops.reshape(inputs, [-1, num_input_units])
        outputs = standard_ops.matmul(inputs, weights)
        if normalizer_fn is not None:
            normalizer_params = normalizer_params or {}
            outputs = normalizer_fn(outputs, **normalizer_params)
        else:
            if biases_initializer is not None:
                biases_collections = utils.get_variable_collections(
                    variables_collections, 'biases')
                biases = variables.model_variable(
                    'biases',
                    shape=[
                        num_outputs,
                    ],
                    dtype=dtype,
                    initializer=biases_initializer,
                    regularizer=biases_regularizer,
                    collections=biases_collections,
                    trainable=trainable)
                outputs = nn.bias_add(outputs, biases)
        if activation_fn is not None:
            outputs = activation_fn(outputs)
        if len(static_shape) > 2:
            # Reshape back outputs
            outputs = array_ops.reshape(outputs, array_ops.pack(out_shape))
            outputs.set_shape(static_shape)
        return utils.collect_named_outputs(outputs_collections,
                                           sc.original_name_scope, outputs)
def _embeddings_from_arguments(column,
                               args,
                               weight_collections,
                               trainable,
                               output_rank=2):
  """Returns embeddings for a column based on the computed arguments.

  Args:
   column: the column name.
   args: the _DeepEmbeddingLookupArguments for this column.
   weight_collections: collections to store weights in.
   trainable: whether these embeddings should be trainable.
   output_rank: the desired rank of the returned `Tensor`. Inner dimensions will
     be combined to produce the desired rank.

  Returns:
   the embeddings.

  Raises:
   ValueError: if not possible to create.
  """
  # pylint: disable=protected-access
  input_tensor = layers._inner_flatten(args.input_tensor, output_rank)
  weight_tensor = None
  if args.weight_tensor is not None:
    weight_tensor = layers._inner_flatten(args.weight_tensor, output_rank)
  # pylint: enable=protected-access

  # This option is only enabled for scattered_embedding_column.
  if args.hash_key:
    embeddings = contrib_variables.model_variable(
        name='weights',
        shape=[args.vocab_size],
        dtype=dtypes.float32,
        initializer=args.initializer,
        trainable=trainable,
        collections=weight_collections)

    return embedding_ops.scattered_embedding_lookup_sparse(
        embeddings, input_tensor, args.dimension,
        hash_key=args.hash_key,
        combiner=args.combiner, name='lookup')

  if args.shared_embedding_name is not None:
    shared_embedding_collection_name = (
        'SHARED_EMBEDDING_COLLECTION_' + args.shared_embedding_name.upper())
    graph = ops.get_default_graph()
    shared_embedding_collection = (
        graph.get_collection_ref(shared_embedding_collection_name))
    shape = [args.vocab_size, args.dimension]
    if shared_embedding_collection:
      if len(shared_embedding_collection) > 1:
        raise ValueError('Collection %s can only contain one '
                         '(partitioned) variable.'
                         % shared_embedding_collection_name)
      else:
        embeddings = shared_embedding_collection[0]
        if embeddings.get_shape() != shape:
          raise ValueError('The embedding variable with name {} already '
                           'exists, but its shape does not match required '
                           'embedding shape  here. Please make sure to use '
                           'different shared_embedding_name for different '
                           'shared embeddings.'.format(
                               args.shared_embedding_name))
    else:
      embeddings = contrib_variables.model_variable(
          name=args.shared_embedding_name,
          shape=shape,
          dtype=dtypes.float32,
          initializer=args.initializer,
          trainable=trainable,
          collections=weight_collections)
      graph.add_to_collection(shared_embedding_collection_name, embeddings)
  else:
    embeddings = contrib_variables.model_variable(
        name='weights',
        shape=[args.vocab_size, args.dimension],
        dtype=dtypes.float32,
        initializer=args.initializer,
        trainable=trainable,
        collections=weight_collections)

  if isinstance(embeddings, variables.Variable):
    embeddings = [embeddings]
  else:
    embeddings = embeddings._get_variable_list()  # pylint: disable=protected-access
  # pylint: disable=protected-access
  _maybe_restore_from_checkpoint(
      column._checkpoint_path(), embeddings)
  return embedding_ops.safe_embedding_lookup_sparse(
      embeddings,
      input_tensor,
      sparse_weights=weight_tensor,
      combiner=args.combiner,
      name=column.name + 'weights',
      max_norm=args.max_norm)
def convolution2d(inputs,
                  num_outputs,
                  kernel_size,
                  stride=1,
                  padding='SAME',
                  activation_fn=nn.relu,
                  normalizer_fn=None,
                  normalizer_params=None,
                  weights_initializer=initializers.xavier_initializer(),
                  weights_regularizer=None,
                  biases_initializer=init_ops.zeros_initializer,
                  biases_regularizer=None,
                  reuse=None,
                  variables_collections=None,
                  outputs_collections=None,
                  trainable=True,
                  scope=None):
  """Adds a 2D convolution followed by an optional batch_norm layer.
  `convolution2d` creates a variable called `weights`, representing the
  convolutional kernel, that is convolved with the `inputs` to produce a
  `Tensor` of activations. If a `normalizer_fn` is provided (such as
  `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is
  None and a `biases_initializer` is provided then a `biases` variable would be
  created and added the activations. Finally, if `activation_fn` is not `None`,
  it is applied to the activations as well.
  Args:
    inputs: a 4-D tensor  `[batch_size, height, width, channels]`.
    num_outputs: integer, the number of output filters.
    kernel_size: a list of length 2 `[kernel_height, kernel_width]` of
      of the filters. Can be an int if both values are the same.
    stride: a list of length 2 `[stride_height, stride_width]`.
      Can be an int if both strides are the same. Note that presently
      both strides must have the same value.
    padding: one of `VALID` or `SAME`.
    activation_fn: activation function.
    normalizer_fn: normalization function to use instead of `biases`. If
      `normalize_fn` is provided then `biases_initializer` and
      `biases_regularizer` are ignored and `biases` are not created nor added.
    normalizer_params: normalization function parameters.
    weights_initializer: An initializer for the weights.
    weights_regularizer: Optional regularizer for the weights.
    biases_initializer: An initializer for the biases. If None skip biases.
    biases_regularizer: Optional regularizer for the biases.
    reuse: whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: optional list of collections for all the variables or
      a dictionay containing a different list of collection per variable.
    outputs_collections: collection to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for `variable_op_scope`.
  Returns:
    a tensor representing the output of the operation.
  """
  with variable_scope.variable_op_scope([inputs],
                                        scope, 'Conv', reuse=reuse) as sc:
    dtype = inputs.dtype.base_dtype
    kernel_h, kernel_w = utils.two_element_tuple(kernel_size)
    stride_h, stride_w = utils.two_element_tuple(stride)
    num_filters_in = utils.last_dimension(inputs.get_shape(), min_rank=4)
    weights_shape = [kernel_h, kernel_w,
                     num_filters_in, num_outputs]
    weights_collections = utils.get_variable_collections(
        variables_collections, 'weights')
    weights = variables.model_variable('weights',
                                       shape=weights_shape,
                                       dtype=dtype,
                                       initializer=weights_initializer,
                                       regularizer=weights_regularizer,
                                       collections=weights_collections,
                                       trainable=trainable)
    outputs = nn.conv2d(inputs, weights, [1, stride_h, stride_w, 1],
                        padding=padding)
    if normalizer_fn:
      normalizer_params = normalizer_params or {}
      outputs = normalizer_fn(outputs, **normalizer_params)
    else:
      if biases_initializer is not None:
        biases_collections = utils.get_variable_collections(
            variables_collections, 'biases')
        biases = variables.model_variable('biases',
                                          shape=[num_outputs,],
                                          dtype=dtype,
                                          initializer=biases_initializer,
                                          regularizer=biases_regularizer,
                                          collections=biases_collections,
                                          trainable=trainable)
        outputs = nn.bias_add(outputs, biases)
    if activation_fn:
      outputs = activation_fn(outputs)
    return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
예제 #30
0
def instance_norm(inputs,
                  center=True,
                  scale=True,
                  epsilon=1e-6,
                  activation_fn=None,
                  param_initializers=None,
                  reuse=None,
                  variables_collections=None,
                  outputs_collections=None,
                  trainable=True,
                  data_format=DATA_FORMAT_NHWC,
                  scope=None):
  """Functional interface for the instance normalization layer.

  Reference: https://arxiv.org/abs/1607.08022.

    "Instance Normalization: The Missing Ingredient for Fast Stylization"
    Dmitry Ulyanov, Andrea Vedaldi, Victor Lempitsky

  Args:
    inputs: A tensor with 2 or more dimensions, where the first dimension has
      `batch_size`. The normalization is over all but the last dimension if
      `data_format` is `NHWC` and the second dimension if `data_format` is
      `NCHW`.
    center: If True, add offset of `beta` to normalized tensor. If False, `beta`
      is ignored.
    scale: If True, multiply by `gamma`. If False, `gamma` is
      not used. When the next layer is linear (also e.g. `nn.relu`), this can be
      disabled since the scaling can be done by the next layer.
    epsilon: Small float added to variance to avoid dividing by zero.
    activation_fn: Activation function, default set to None to skip it and
      maintain a linear activation.
    param_initializers: Optional initializers for beta, gamma, moving mean and
      moving variance.
    reuse: Whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional collections for the variables.
    outputs_collections: Collections to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
    data_format: A string. `NHWC` (default) and `NCHW` are supported.
    scope: Optional scope for `variable_scope`.

  Returns:
    A `Tensor` representing the output of the operation.

  Raises:
    ValueError: If `data_format` is neither `NHWC` nor `NCHW`.
    ValueError: If the rank of `inputs` is undefined.
    ValueError: If rank or channels dimension of `inputs` is undefined.
  """
  inputs = ops.convert_to_tensor(inputs)
  inputs_shape = inputs.shape
  inputs_rank = inputs.shape.ndims

  if inputs_rank is None:
    raise ValueError('Inputs %s has undefined rank.' % inputs.name)
  if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC):
    raise ValueError('data_format has to be either NCHW or NHWC.')

  with variable_scope.variable_scope(
      scope, 'InstanceNorm', [inputs], reuse=reuse) as sc:
    if data_format == DATA_FORMAT_NCHW:
      reduction_axis = 1
      # For NCHW format, rather than relying on implicit broadcasting, we
      # explicitly reshape the params to params_shape_broadcast when computing
      # the moments and the batch normalization.
      params_shape_broadcast = list(
          [1, inputs_shape[1].value] + [1 for _ in range(2, inputs_rank)])
    else:
      reduction_axis = inputs_rank - 1
      params_shape_broadcast = None
    moments_axes = list(range(inputs_rank))
    del moments_axes[reduction_axis]
    del moments_axes[0]
    params_shape = inputs_shape[reduction_axis:reduction_axis + 1]
    if not params_shape.is_fully_defined():
      raise ValueError('Inputs %s has undefined channels dimension %s.' % (
          inputs.name, params_shape))

    # Allocate parameters for the beta and gamma of the normalization.
    beta, gamma = None, None
    dtype = inputs.dtype.base_dtype
    if param_initializers is None:
      param_initializers = {}
    if center:
      beta_collections = utils.get_variable_collections(
          variables_collections, 'beta')
      beta_initializer = param_initializers.get(
          'beta', init_ops.zeros_initializer())
      beta = variables.model_variable('beta',
                                      shape=params_shape,
                                      dtype=dtype,
                                      initializer=beta_initializer,
                                      collections=beta_collections,
                                      trainable=trainable)
      if params_shape_broadcast:
        beta = array_ops.reshape(beta, params_shape_broadcast)
    if scale:
      gamma_collections = utils.get_variable_collections(
          variables_collections, 'gamma')
      gamma_initializer = param_initializers.get(
          'gamma', init_ops.ones_initializer())
      gamma = variables.model_variable('gamma',
                                       shape=params_shape,
                                       dtype=dtype,
                                       initializer=gamma_initializer,
                                       collections=gamma_collections,
                                       trainable=trainable)
      if params_shape_broadcast:
        gamma = array_ops.reshape(gamma, params_shape_broadcast)

    # Calculate the moments (instance activations).
    mean, variance = nn.moments(inputs, moments_axes, keep_dims=True)

    # Compute instance normalization.
    outputs = nn.batch_normalization(
        inputs, mean, variance, beta, gamma, epsilon, name='instancenorm')
    if activation_fn is not None:
      outputs = activation_fn(outputs)
    return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
예제 #31
0
def test(input, scope=None, reuse=None):
    with variable_scope.variable_op_scope([input], scope, 'test', reuse=reuse):
        return variables.model_variable('asdf', [1, 1],
                initializer=tf.constant_initializer(0.),
                trainable=True)
예제 #32
0
def group_norm(inputs,
               groups=32,
               channels_axis=-1,
               reduction_axes=(-3, -2),
               center=True,
               scale=True,
               epsilon=1e-6,
               activation_fn=None,
               param_initializers=None,
               reuse=None,
               variables_collections=None,
               outputs_collections=None,
               trainable=True,
               scope=None,
               mean_close_to_zero=False):
  """Functional interface for the group normalization layer.

  Reference: https://arxiv.org/abs/1803.08494.

    "Group Normalization", Yuxin Wu, Kaiming He

  Args:
    inputs: A Tensor with at least 2 dimensions one which is channels. All
     shape dimensions must be fully defined.
    groups: Integer. Divide the channels into this number of groups over which
      normalization statistics are computed. This number must be commensurate
      with the number of channels in `inputs`.
    channels_axis: An integer. Specifies index of channels axis which will be
      broken into `groups`, each of which whose statistics will be computed
      across. Must be mutually exclusive with `reduction_axes`. Preferred usage
      is to specify negative integers to be agnostic as to whether a batch
      dimension is included.
    reduction_axes: Tuple of integers. Specifies dimensions over which
       statistics will be accumulated. Must be mutually exclusive with
       `channels_axis`. Statistics will not be accumulated across axes not
       specified in `reduction_axes` nor `channel_axis`. Preferred usage is to
       specify negative integers to be agnostic to whether a batch dimension is
       included.

      Some sample usage cases:
        NHWC format: channels_axis=-1, reduction_axes=[-3, -2]
        NCHW format: channels_axis=-3, reduction_axes=[-2, -1]

    center: If True, add offset of `beta` to normalized tensor. If False, `beta`
      is ignored.
    scale: If True, multiply by `gamma`. If False, `gamma` is
      not used. When the next layer is linear (also e.g. `nn.relu`), this can be
      disabled since the scaling can be done by the next layer.
    epsilon: Small float added to variance to avoid dividing by zero.
    activation_fn: Activation function, default set to None to skip it and
      maintain a linear activation.
    param_initializers: Optional initializers for beta, gamma, moving mean and
      moving variance.
    reuse: Whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional collections for the variables.
    outputs_collections: Collections to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
    scope: Optional scope for `variable_scope`.
    mean_close_to_zero: The mean of `input` before ReLU will be close to zero
      when batch size >= 4k for Resnet-50 on TPU. If `True`, use
      `nn.sufficient_statistics` and `nn.normalize_moments` to calculate the
      variance. This is the same behavior as `fused` equals `True` in batch
      normalization. If `False`, use `nn.moments` to calculate the variance.
      When `mean` is close to zero, like 1e-4, use `mean` to calculate the
      variance may have poor result due to repeated roundoff error and
      denormalization in `mean`.  When `mean` is large, like 1e2,
      sum(`input`^2) is so large that only the high-order digits of the elements
      are being accumulated. Thus, use sum(`input` - `mean`)^2/n to calculate
      the variance has better accuracy compared to (sum(`input`^2)/n - `mean`^2)
      when `mean` is large.


  Returns:
    A `Tensor` representing the output of the operation.

  Raises:
    ValueError: If the rank of `inputs` is undefined.
    ValueError: If rank or channels dimension of `inputs` is undefined.
    ValueError: If number of groups is not commensurate with number of channels.
    ValueError: If reduction_axes or channels_axis are out of bounds.
    ValueError: If reduction_axes are not mutually exclusive with channels_axis.
  """
  # TODO(shlens): Support partially defined shapes for the inputs.
  inputs = ops.convert_to_tensor(inputs)
  original_shape = inputs.shape

  if inputs.shape.ndims is None:
    raise ValueError('Inputs %s has undefined rank.' % inputs.name)
  if channels_axis > (inputs.shape.ndims - 1):
    raise ValueError('Axis is out of bounds.')

  # Standardize the channels_axis to be positive and identify # of channels.
  if channels_axis < 0:
    channels_axis = inputs.shape.ndims + channels_axis
  channels = inputs.shape[channels_axis].value

  if channels is None:
    raise ValueError('Inputs %s has undefined channel dimension: %d.' % (
        inputs.name, channels_axis))

  # Standardize the reduction_axes to be positive.
  reduction_axes = list(reduction_axes)
  for i in range(len(reduction_axes)):
    if reduction_axes[i] < 0:
      reduction_axes[i] += inputs.shape.ndims

  for a in reduction_axes:
    if a > inputs.shape.ndims:
      raise ValueError('Axis is out of bounds.')
    if inputs.shape[a].value is None:
      raise ValueError('Inputs %s has undefined dimensions %d.' % (
          inputs.name, a))
    if channels_axis == a:
      raise ValueError('reduction_axis must be mutually exclusive '
                       'with channels_axis')
  if groups > channels:
    raise ValueError('Invalid groups %d for %d channels.' % (groups, channels))
  if channels % groups != 0:
    raise ValueError('%d channels is not commensurate with %d groups.' %
                     (channels, groups))

  # Determine axes before channels. Some examples of common image formats:
  #  'NCHW': before = [N], after = [HW]
  #  'NHWC': before = [NHW], after = []
  axes_before_channels = inputs.shape.as_list()[:channels_axis]
  axes_after_channels = inputs.shape.as_list()[channels_axis+1:]

  # Manually broadcast the parameters to conform to the number of groups.
  params_shape_broadcast = ([1] * len(axes_before_channels) +
                            [groups, channels // groups] +
                            [1] * len(axes_after_channels))

  # Reshape the input by the group within the channel dimension.
  inputs_shape = (axes_before_channels + [groups, channels // groups] +
                  axes_after_channels)
  inputs = array_ops.reshape(inputs, inputs_shape)

  # Determine the dimensions across which moments are calculated.
  moments_axes = [channels_axis + 1]
  for a in reduction_axes:
    if a > channels_axis:
      moments_axes.append(a + 1)
    else:
      moments_axes.append(a)

  with variable_scope.variable_scope(
      scope, 'GroupNorm', [inputs], reuse=reuse) as sc:
    # Note that the params_shape is the number of channels always.
    params_shape = [channels]

    # Allocate parameters for the beta and gamma of the normalization.
    beta, gamma = None, None
    dtype = inputs.dtype.base_dtype
    if param_initializers is None:
      param_initializers = {}
    if center:
      beta_collections = utils.get_variable_collections(
          variables_collections, 'beta')
      beta_initializer = param_initializers.get(
          'beta', init_ops.zeros_initializer())
      beta = variables.model_variable('beta',
                                      shape=params_shape,
                                      dtype=dtype,
                                      initializer=beta_initializer,
                                      collections=beta_collections,
                                      trainable=trainable)
      beta = array_ops.reshape(beta, params_shape_broadcast)

    if scale:
      gamma_collections = utils.get_variable_collections(
          variables_collections, 'gamma')
      gamma_initializer = param_initializers.get(
          'gamma', init_ops.ones_initializer())
      gamma = variables.model_variable('gamma',
                                       shape=params_shape,
                                       dtype=dtype,
                                       initializer=gamma_initializer,
                                       collections=gamma_collections,
                                       trainable=trainable)
      gamma = array_ops.reshape(gamma, params_shape_broadcast)

    # Calculate the moments.
    if mean_close_to_zero:
      # One pass algorithm returns better result when mean is close to zero.
      counts, means_ss, variance_ss, _ = nn.sufficient_statistics(
          inputs, moments_axes, keep_dims=True)
      mean, variance = nn.normalize_moments(
          counts, means_ss, variance_ss, shift=None)
    else:
      mean, variance = nn.moments(inputs, moments_axes, keep_dims=True)

    # Compute normalization.
    # TODO(shlens): Fix nn.batch_normalization to handle the 5-D Tensor
    # appropriately so that this operation may be faster.
    gain = math_ops.rsqrt(variance + epsilon)
    offset = -mean * gain
    if gamma is not None:
      gain *= gamma
      offset *= gamma
    if beta is not None:
      offset += beta
    outputs = inputs * gain + offset

    # Collapse the groups into the channel dimension.
    outputs = array_ops.reshape(outputs, original_shape)

    if activation_fn is not None:
      outputs = activation_fn(outputs)
    return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
예제 #33
0
def conv2d_leaders(
    inputs,
    num_outputs,
    kernel_size,
    rates=[1],
    stride=1,
    padding='SAME',
    activation_fn=nn.relu,
    normalizer_fn=None,
    normalizer_params=None,
    weights_initializer=initializers.xavier_initializer(),
    weights_regularizer=None,
    biases_initializer=init_ops.zeros_initializer,
    biases_regularizer=None,
    reuse=None,
    variables_collections=None,
    outputs_collections=None,
    trainable=True,
    scope=None,
):
    """Adds a 2D convolution followed by an optional batch_norm layer.
    `convolution2d` creates a variable called `weights`, representing the
    convolutional kernel, that is convolved with the `inputs` to produce a
    `Tensor` of activations. If a `normalizer_fn` is provided (such as
    `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is
    None and a `biases_initializer` is provided then a `biases` variable would be
    created and added the activations. Finally, if `activation_fn` is not `None`,
    it is applied to the activations as well.
    Performs a'trous convolution with input stride equal to rate if rate is
    greater than one.
    Args:
        inputs: a 4-D tensor  `[batch_size, height, width, channels]`.
        num_outputs: integer, the number of output filters.
        kernel_size: a list of length 2 `[kernel_height, kernel_width]` of
          of the filters. Can be an int if both values are the same.
        stride: a list of length 2 `[stride_height, stride_width]`.
          Can be an int if both strides are the same. Note that presently
          both strides must have the same value.
        padding: one of `VALID` or `SAME`.
        rate: integer. If less than or equal to 1, a standard convolution is used.
          If greater than 1, than the a'trous convolution is applied and `stride`
          must be set to 1.
        activation_fn: activation function.
        normalizer_fn: normalization function to use instead of `biases`. If
          `normalize_fn` is provided then `biases_initializer` and
          `biases_regularizer` are ignored and `biases` are not created nor added.
        normalizer_params: normalization function parameters.
        weights_initializer: An initializer for the weights.
        weights_regularizer: Optional regularizer for the weights.
        biases_initializer: An initializer for the biases. If None skip biases.
        biases_regularizer: Optional regularizer for the biases.
        reuse: whether or not the layer and its variables should be reused. To be
          able to reuse the layer scope must be given.
        variables_collections: optional list of collections for all the variables or
          a dictionay containing a different list of collection per variable.
        outputs_collections: collection to add the outputs.
        trainable: If `True` also add variables to the graph collection
          `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
        scope: Optional scope for `variable_op_scope`.
    Returns:
        a tensor representing the output of the operation.
    Raises:
        ValueError: if both 'rate' and `stride` are larger than one.
    """
    with variable_scope.variable_scope(scope, 'Conv', [inputs],
                                       reuse=reuse) as sc:

        inputs = ops.convert_to_tensor(inputs)
        dtype = inputs.dtype.base_dtype
        # inshape = tf.shape(inputs)

        # Leading kernel size.
        kernel_h, kernel_w = utils.two_element_tuple(kernel_size)
        stride_h, stride_w = utils.two_element_tuple(stride)
        num_filters_in = utils.last_dimension(inputs.get_shape(), min_rank=4)

        # Weights variable.
        weights_shape = [kernel_h, kernel_w, num_filters_in, num_outputs]
        weights_collections = utils.get_variable_collections(
            variables_collections, 'weights')
        weights = variables.model_variable('weights',
                                           shape=weights_shape,
                                           dtype=dtype,
                                           initializer=weights_initializer,
                                           regularizer=weights_regularizer,
                                           collections=weights_collections,
                                           trainable=trainable)
        # # Bias variable.
        # biases = None
        # if biases_initializer is not None:
        #     biases_collections = utils.get_variable_collections(
        #         variables_collections, 'biases')
        #     biases = variables.model_variable('biases',
        #                                       shape=[num_outputs, ],
        #                                       dtype=dtype,
        #                                       initializer=biases_initializer,
        #                                       regularizer=biases_regularizer,
        #                                       collections=biases_collections,
        #                                       trainable=trainable)

        # Convolution at different scales.
        outputs_pool = []
        for rate in rates:
            if rate > 1:
                conv = nn.atrous_conv2d(inputs, weights, rate, padding='SAME')
            else:
                conv = nn.conv2d(inputs, weights, [1, 1, 1, 1], padding='SAME')
            outputs_pool.append(conv)
        # 'Pooling' at different scales. A bit hacky. Use of concat + max_pool?
        outputs = None
        outputs_pool.reverse()
        for node in outputs_pool:
            if outputs is None:
                outputs = node
            else:
                outputs = tf.maximum(outputs, node)
        # # Add bias?
        # if biases is not None:
        #     outputs = tf.nn.bias_add(outputs, biases)

        # Fix padding and stride. A bit hacky too and not so efficient!
        if padding == 'VALID' or stride > 1:
            padfilter = np.zeros(shape=(kernel_h, kernel_w, num_filters_in, 1),
                                 dtype=dtype)
            x = (kernel_h - 1) / 2
            y = (kernel_w - 1) / 2
            padfilter[x, y, :, 0] = 1.
            outputs = tf.nn.depthwise_conv2d(outputs,
                                             padfilter,
                                             [1, stride_h, stride_w, 1],
                                             padding=padding)

        # Batch norm / bias and activation...
        if normalizer_fn is not None:
            normalizer_params = normalizer_params or {}
            outputs = normalizer_fn(outputs, **normalizer_params)
        else:
            if biases_initializer is not None:
                biases_collections = utils.get_variable_collections(
                    variables_collections, 'biases')
                biases = variables.model_variable(
                    'biases',
                    shape=[
                        num_outputs,
                    ],
                    dtype=dtype,
                    initializer=biases_initializer,
                    regularizer=biases_regularizer,
                    collections=biases_collections,
                    trainable=trainable)
                outputs = nn.bias_add(outputs, biases)
        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return utils.collect_named_outputs(outputs_collections, sc.name,
                                           outputs)
예제 #34
0
    def _build(self, inputs, is_training=True):
        """
        Args:
            inputs: A Tensor of shape `(batch_size, height, width, channels)`.

        Returns:
            A dict of feature maps to be consumed by an SSD network
        """
        # TODO: Is there a better way to manage scoping in these cases?
        scope = self.module_name
        if self.parent_name:
            scope = self.parent_name + '/' + scope

        base_net_endpoints = super(SSDFeatureExtractor, self)._build(
            inputs, is_training=is_training)['end_points']

        if self.vgg_16_type:
            # The original SSD paper uses a modified version of the vgg16
            # network, which we'll modify here
            vgg_network_truncation_endpoint = base_net_endpoints[
                scope + '/vgg_16/conv5/conv5_3']

            # As it is pointed out in SSD and ParseNet papers, `conv4_3` has a
            # different features scale compared to other layers, to adjust it
            # we need to add a spatial normalization before adding the
            # predictors.
            vgg_conv4_3_name = scope + '/vgg_16/conv4/conv4_3'
            vgg_conv4_3 = base_net_endpoints[vgg_conv4_3_name]
            with tf.variable_scope(vgg_conv4_3_name + '_norm'):
                inputs_shape = vgg_conv4_3.shape
                inputs_rank = inputs_shape.ndims
                dtype = vgg_conv4_3.dtype.base_dtype
                norm_dim = tf.range(inputs_rank - 1, inputs_rank)
                params_shape = inputs_shape[-1:]

                # Normalize.
                vgg_conv4_3_norm = tf.nn.l2_normalize(
                    vgg_conv4_3, norm_dim, epsilon=1e-12
                )

                # Scale.
                # TODO use tf.get_variable and initialize
                #      to 20 as described in paper
                scale = variables.model_variable(
                    'gamma', shape=params_shape, dtype=dtype,
                    initializer=init_ops.ones_initializer()
                )
                vgg_conv4_3_norm = tf.multiply(vgg_conv4_3_norm, scale)
            tf.add_to_collection('FEATURE_MAPS', vgg_conv4_3_norm)

            # Extra layers for vgg16 as detailed in paper
            self._init_vgg16_extra_layers()
            with tf.variable_scope('extra_feature_layers'):
                net = tf.nn.max_pool(
                    vgg_network_truncation_endpoint, [1, 3, 3, 1],
                    padding='SAME', strides=[1, 1, 1, 1], name='pool5'
                )
                net = self.conv6(net)
                net = self.activation_fn(net)
                net = self.conv7(net)
                net = self.activation_fn(net)
                tf.add_to_collection('FEATURE_MAPS', net)
                net = self.conv8_1(net)
                net = self.activation_fn(net)
                net = self.conv8_2(net)
                net = self.activation_fn(net)
                tf.add_to_collection('FEATURE_MAPS', net)
                net = self.conv9_1(net)
                net = self.activation_fn(net)
                net = self.conv9_2(net)
                net = self.activation_fn(net)
                tf.add_to_collection('FEATURE_MAPS', net)
                net = self.conv10_1(net)
                net = self.activation_fn(net)
                net = self.conv10_2(net)
                net = self.activation_fn(net)
                tf.add_to_collection('FEATURE_MAPS', net)
                net = self.conv11_1(net)
                net = self.activation_fn(net)
                net = self.conv11_2(net)
                net = self.activation_fn(net)
                tf.add_to_collection('FEATURE_MAPS', net)

            # This parameter determines onto which variables we try to load the
            # pretrained weights
            self.pretrained_weights_scope = scope + '/vgg_16'

        # It's actually an ordered dict
        return utils.convert_collection_to_dict('FEATURE_MAPS')
예제 #35
0
def fully_connected(inputs,
                    num_outputs,
                    activation_fn=nn.relu,
                    normalizer_fn=None,
                    normalizer_params=None,
                    weights_initializer=initializers.xavier_initializer(),
                    weights_regularizer=None,
                    biases_initializer=init_ops.zeros_initializer,
                    biases_regularizer=None,
                    reuse=None,
                    variables_collections=None,
                    outputs_collections=None,
                    scope=None):
    """Adds a fully connected layer.

  `fully_connected` creates a variable called `weights`, representing a fully
  connected weight matrix, which is multiplied by the `inputs` to produce a
  `Tensor` of hidden units. If a `normalizer_fn` is provided (such as
  `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is
  None and a `biases_initializer` is provided then a `biases` variable would be
  created and added the hidden units. Finally, if `activation_fn` is not `None`,
  it is applied to the hidden units as well.

  Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened
  prior to the initial matrix multiply by `weights`.

  Args:
    inputs: A tensor of with at least rank 2 and value for the last dimension,
      i.e. `[batch_size, depth]`, `[None, None, None, channels]`.
    num_outputs: Integer, the number of output units in the layer.
    activation_fn: activation function.
    normalizer_fn: normalization function to use instead of `biases`. If
      `normalize_fn` is provided then `biases_initializer` and
      `biases_regularizer` are ignored and `biases` are not created nor added.
    normalizer_params: normalization function parameters.
    weights_initializer: An initializer for the weights.
    weights_regularizer: Optional regularizer for the weights.
    biases_initializer: An initializer for the biases. If None skip biases.
    biases_regularizer: Optional regularizer for the biases.
    reuse: whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional list of collections for all the variables or
      a dictionay containing a different list of collection per variable.
    outputs_collections: collection to add the outputs.
    scope: Optional scope for variable_op_scope.

  Returns:
     the tensor variable representing the result of the series of operations.

  Raises:
    ValueError: if x has rank less than 2 or if its last dimension is not set.
  """
    with variable_scope.variable_op_scope([inputs],
                                          scope,
                                          'fully_connected',
                                          reuse=reuse) as sc:
        dtype = inputs.dtype.base_dtype
        num_input_units = utils.last_dimension(inputs.get_shape(), min_rank=2)

        static_shape = inputs.get_shape().as_list()
        static_shape[-1] = num_outputs

        out_shape = array_ops.unpack(array_ops.shape(inputs))
        out_shape[-1] = num_outputs

        weights_shape = [num_input_units, num_outputs]
        weights_collections = utils.get_variable_collections(
            variables_collections, 'weights')
        weights = variables.model_variable('weights',
                                           shape=weights_shape,
                                           dtype=dtype,
                                           initializer=weights_initializer,
                                           regularizer=weights_regularizer,
                                           collections=weights_collections)
        if len(static_shape) > 2:
            # Reshape inputs
            inputs = array_ops.reshape(inputs, [-1, num_input_units])
        outputs = standard_ops.matmul(inputs, weights)
        if normalizer_fn:
            normalizer_params = normalizer_params or {}
            outputs = normalizer_fn(outputs, **normalizer_params)
        else:
            if biases_initializer is not None:
                biases_collections = utils.get_variable_collections(
                    variables_collections, 'biases')
                biases = variables.model_variable(
                    'biases',
                    shape=[
                        num_outputs,
                    ],
                    dtype=dtype,
                    initializer=biases_initializer,
                    regularizer=biases_regularizer,
                    collections=biases_collections)
                outputs = nn.bias_add(outputs, biases)
        if len(static_shape) > 2:
            # Reshape back outputs
            outputs = array_ops.reshape(outputs, array_ops.pack(out_shape))
            outputs.set_shape(static_shape)
        if activation_fn:
            outputs = activation_fn(outputs)
        return utils.collect_named_outputs(outputs_collections, sc.name,
                                           outputs)
예제 #36
0
def group_norm(inputs,
               groups=32,
               channels_axis=-1,
               reduction_axes=(-3, -2),
               center=True,
               scale=True,
               epsilon=1e-6,
               activation_fn=None,
               param_initializers=None,
               reuse=None,
               variables_collections=None,
               outputs_collections=None,
               trainable=True,
               scope=None,
               mean_close_to_zero=False):
    """Functional interface for the group normalization layer.

  Reference: https://arxiv.org/abs/1803.08494.

    "Group Normalization", Yuxin Wu, Kaiming He

  Args:
    inputs: A Tensor with at least 2 dimensions one which is channels. All
     shape dimensions must be fully defined.
    groups: Integer. Divide the channels into this number of groups over which
      normalization statistics are computed. This number must be commensurate
      with the number of channels in `inputs`.
    channels_axis: An integer. Specifies index of channels axis which will be
      broken into `groups`, each of which whose statistics will be computed
      across. Must be mutually exclusive with `reduction_axes`. Preferred usage
      is to specify negative integers to be agnostic as to whether a batch
      dimension is included.
    reduction_axes: Tuple of integers. Specifies dimensions over which
       statistics will be accumulated. Must be mutually exclusive with
       `channels_axis`. Statistics will not be accumulated across axes not
       specified in `reduction_axes` nor `channel_axis`. Preferred usage is to
       specify negative integers to be agnostic to whether a batch dimension is
       included.

      Some sample usage cases:
        NHWC format: channels_axis=-1, reduction_axes=[-3, -2]
        NCHW format: channels_axis=-3, reduction_axes=[-2, -1]

    center: If True, add offset of `beta` to normalized tensor. If False, `beta`
      is ignored.
    scale: If True, multiply by `gamma`. If False, `gamma` is
      not used. When the next layer is linear (also e.g. `nn.relu`), this can be
      disabled since the scaling can be done by the next layer.
    epsilon: Small float added to variance to avoid dividing by zero.
    activation_fn: Activation function, default set to None to skip it and
      maintain a linear activation.
    param_initializers: Optional initializers for beta, gamma, moving mean and
      moving variance.
    reuse: Whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional collections for the variables.
    outputs_collections: Collections to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
    scope: Optional scope for `variable_scope`.
    mean_close_to_zero: The mean of `input` before ReLU will be close to zero
      when batch size >= 4k for Resnet-50 on TPU. If `True`, use
      `nn.sufficient_statistics` and `nn.normalize_moments` to calculate the
      variance. This is the same behavior as `fused` equals `True` in batch
      normalization. If `False`, use `nn.moments` to calculate the variance.
      When `mean` is close to zero, like 1e-4, use `mean` to calculate the
      variance may have poor result due to repeated roundoff error and
      denormalization in `mean`.  When `mean` is large, like 1e2,
      sum(`input`^2) is so large that only the high-order digits of the elements
      are being accumulated. Thus, use sum(`input` - `mean`)^2/n to calculate
      the variance has better accuracy compared to (sum(`input`^2)/n - `mean`^2)
      when `mean` is large.


  Returns:
    A `Tensor` representing the output of the operation.

  Raises:
    ValueError: If the rank of `inputs` is undefined.
    ValueError: If rank or channels dimension of `inputs` is undefined.
    ValueError: If number of groups is not commensurate with number of channels.
    ValueError: If reduction_axes or channels_axis are out of bounds.
    ValueError: If reduction_axes are not mutually exclusive with channels_axis.
  """
    # TODO(shlens): Support partially defined shapes for the inputs.
    inputs = ops.convert_to_tensor(inputs)
    original_shape = inputs.shape

    if inputs.shape.ndims is None:
        raise ValueError('Inputs %s has undefined rank.' % inputs.name)
    if channels_axis > (inputs.shape.ndims - 1):
        raise ValueError('Axis is out of bounds.')

    # Standardize the channels_axis to be positive and identify # of channels.
    if channels_axis < 0:
        channels_axis = inputs.shape.ndims + channels_axis
    channels = inputs.shape[channels_axis].value

    if channels is None:
        raise ValueError('Inputs %s has undefined channel dimension: %d.' %
                         (inputs.name, channels_axis))

    # Standardize the reduction_axes to be positive.
    reduction_axes = list(reduction_axes)
    for i in range(len(reduction_axes)):
        if reduction_axes[i] < 0:
            reduction_axes[i] += inputs.shape.ndims

    for a in reduction_axes:
        if a > inputs.shape.ndims:
            raise ValueError('Axis is out of bounds.')
        if inputs.shape[a].value is None:
            raise ValueError('Inputs %s has undefined dimensions %d.' %
                             (inputs.name, a))
        if channels_axis == a:
            raise ValueError('reduction_axis must be mutually exclusive '
                             'with channels_axis')
    if groups > channels:
        raise ValueError('Invalid groups %d for %d channels.' %
                         (groups, channels))
    if channels % groups != 0:
        raise ValueError('%d channels is not commensurate with %d groups.' %
                         (channels, groups))

    # Determine axes before channels. Some examples of common image formats:
    #  'NCHW': before = [N], after = [HW]
    #  'NHWC': before = [NHW], after = []
    axes_before_channels = inputs.shape.as_list()[:channels_axis]
    axes_after_channels = inputs.shape.as_list()[channels_axis + 1:]

    # Manually broadcast the parameters to conform to the number of groups.
    params_shape_broadcast = ([1] * len(axes_before_channels) +
                              [groups, channels // groups] +
                              [1] * len(axes_after_channels))

    # Reshape the input by the group within the channel dimension.
    inputs_shape = (axes_before_channels + [groups, channels // groups] +
                    axes_after_channels)
    inputs = array_ops.reshape(inputs, inputs_shape)

    # Determine the dimensions across which moments are calculated.
    moments_axes = [channels_axis + 1]
    for a in reduction_axes:
        if a > channels_axis:
            moments_axes.append(a + 1)
        else:
            moments_axes.append(a)

    with variable_scope.variable_scope(scope,
                                       'GroupNorm', [inputs],
                                       reuse=reuse) as sc:
        # Note that the params_shape is the number of channels always.
        params_shape = [channels]

        # Allocate parameters for the beta and gamma of the normalization.
        beta, gamma = None, None
        dtype = inputs.dtype.base_dtype
        if param_initializers is None:
            param_initializers = {}
        if center:
            beta_collections = utils.get_variable_collections(
                variables_collections, 'beta')
            beta_initializer = param_initializers.get(
                'beta', init_ops.zeros_initializer())
            beta = variables.model_variable('beta',
                                            shape=params_shape,
                                            dtype=dtype,
                                            initializer=beta_initializer,
                                            collections=beta_collections,
                                            trainable=trainable)
            beta = array_ops.reshape(beta, params_shape_broadcast)

        if scale:
            gamma_collections = utils.get_variable_collections(
                variables_collections, 'gamma')
            gamma_initializer = param_initializers.get(
                'gamma', init_ops.ones_initializer())
            gamma = variables.model_variable('gamma',
                                             shape=params_shape,
                                             dtype=dtype,
                                             initializer=gamma_initializer,
                                             collections=gamma_collections,
                                             trainable=trainable)
            gamma = array_ops.reshape(gamma, params_shape_broadcast)

        # Calculate the moments.
        if mean_close_to_zero:
            # One pass algorithm returns better result when mean is close to zero.
            counts, means_ss, variance_ss, _ = nn.sufficient_statistics(
                inputs, moments_axes, keep_dims=True)
            mean, variance = nn.normalize_moments(counts,
                                                  means_ss,
                                                  variance_ss,
                                                  shift=None)
        else:
            mean, variance = nn.moments(inputs, moments_axes, keep_dims=True)

        # Compute normalization.
        # TODO(shlens): Fix nn.batch_normalization to handle the 5-D Tensor
        # appropriately so that this operation may be faster.
        gain = math_ops.rsqrt(variance + epsilon)
        offset = -mean * gain
        if gamma is not None:
            gain *= gamma
            offset *= gamma
        if beta is not None:
            offset += beta
        outputs = inputs * gain + offset

        # Collapse the groups into the channel dimension.
        outputs = array_ops.reshape(outputs, original_shape)

        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return utils.collect_named_outputs(outputs_collections, sc.name,
                                           outputs)
예제 #37
0
def batch_norm(inputs,
               decay=0.999,
               center=True,
               scale=False,
               epsilon=0.001,
               updates_collections=ops.GraphKeys.UPDATE_OPS,
               is_training=True,
               reuse=None,
               variables_collections=None,
               outputs_collections=None,
               trainable=True,
               scope=None):
    """Code modification of tensorflow/contrib/layers/python/layers/layers.py
  """
    with variable_scope.variable_op_scope([inputs],
                                          scope,
                                          'BatchNorm',
                                          reuse=reuse) as sc:
        inputs = ops.convert_to_tensor(inputs)
        inputs_shape = inputs.get_shape()
        inputs_rank = inputs_shape.ndims
        if inputs_rank is None:
            raise ValueError('Inputs %s has undefined rank.' % inputs.name)
        dtype = inputs.dtype.base_dtype
        axis = list(range(inputs_rank - 1))
        params_shape = inputs_shape[-1:]
        if not params_shape.is_fully_defined():
            raise ValueError('Inputs %s has undefined last dimension %s.' %
                             (inputs.name, params_shape))
        # Allocate parameters for the beta and gamma of the normalization.
        beta, gamma = None, None
        if center:
            beta_collections = utils.get_variable_collections(
                variables_collections, 'beta')
            beta = variables.model_variable(
                'beta',
                shape=params_shape,
                dtype=dtype,
                initializer=init_ops.zeros_initializer,
                collections=beta_collections,
                trainable=trainable)
        if scale:
            gamma_collections = utils.get_variable_collections(
                variables_collections, 'gamma')
            gamma = variables.model_variable(
                'gamma',
                shape=params_shape,
                dtype=dtype,
                initializer=init_ops.ones_initializer,
                collections=gamma_collections,
                trainable=trainable)
        # Create moving_mean and moving_variance variables and add them to the
        # appropiate collections.
        moving_mean_collections = utils.get_variable_collections(
            variables_collections, 'moving_mean')
        moving_mean = variables.model_variable(
            'moving_mean',
            shape=params_shape,
            dtype=dtype,
            initializer=init_ops.zeros_initializer,
            trainable=False,
            collections=moving_mean_collections)
        moving_variance_collections = utils.get_variable_collections(
            variables_collections, 'moving_variance')
        moving_variance = variables.model_variable(
            'moving_variance',
            shape=params_shape,
            dtype=dtype,
            initializer=init_ops.ones_initializer,
            trainable=False,
            collections=moving_variance_collections)

        # Calculate the moments based on the individual batch.
        mean, variance = nn.moments(inputs, axis, shift=moving_mean)
        # Update the moving_mean and moving_variance moments.
        update_moving_mean = moving_averages.assign_moving_average(
            moving_mean, mean, decay)
        update_moving_variance = moving_averages.assign_moving_average(
            moving_variance, variance, decay)
        if updates_collections is None:
            # Make sure the updates are computed here.
            with ops.control_dependencies(
                [update_moving_mean, update_moving_variance]):
                outputs = nn.batch_normalization(inputs, mean, variance, beta,
                                                 gamma, epsilon)
        else:
            # Collect the updates to be computed later.
            ops.add_to_collections(updates_collections, update_moving_mean)
            ops.add_to_collections(updates_collections, update_moving_variance)
            outputs = nn.batch_normalization(inputs, mean, variance, beta,
                                             gamma, epsilon)

        test_outputs = nn.batch_normalization(inputs, moving_mean,
                                              moving_variance, beta, gamma,
                                              epsilon)

        outputs = tf.cond(is_training, lambda: outputs, lambda: test_outputs)
        outputs.set_shape(inputs_shape)

        return utils.collect_named_outputs(outputs_collections, sc.name,
                                           outputs)
예제 #38
0
def depth_conv2d(inputs,
                 kernel_size,
                 stride=1,
                 channel_multiplier=1,
                 padding='SAME',
                 data_format=DATA_FORMAT_NHWC,
                 rate=1,
                 activation_fn=nn.relu,
                 normalizer_fn=None,
                 normalizer_params=None,
                 weights_initializer=initializers.xavier_initializer(),
                 weights_regularizer=None,
                 biases_initializer=init_ops.zeros_initializer(),
                 biases_regularizer=None,
                 reuse=None,
                 variables_collections=None,
                 outputs_collections=None,
                 trainable=True,
                 scope=None):

    if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC):
        raise ValueError('data_format has to be either NCHW or NHWC.')
    layer_variable_getter = _build_variable_getter({
        'bias':
        'biases',
        'depthwise_kernel':
        'depthwise_weights'
    })

    with variable_scope.variable_scope(
            scope,
            'SeparableConv2d', [inputs],
            reuse=reuse,
            custom_getter=layer_variable_getter) as sc:
        inputs = ops.convert_to_tensor(inputs)

        df = ('channels_first' if data_format and data_format.startswith('NC')
              else 'channels_last')

        # Actually apply depthwise conv instead of separable conv.
        dtype = inputs.dtype.base_dtype
        kernel_h, kernel_w = utils.two_element_tuple(kernel_size)
        stride_h, stride_w = utils.two_element_tuple(stride)
        num_filters_in = utils.channel_dimension(inputs.get_shape(),
                                                 df,
                                                 min_rank=4)
        weights_collections = utils.get_variable_collections(
            variables_collections, 'weights')

        depthwise_shape = [
            kernel_h, kernel_w, num_filters_in, channel_multiplier
        ]
        depthwise_weights = variables.model_variable(
            'depthwise_weights',
            shape=depthwise_shape,
            dtype=dtype,
            initializer=weights_initializer,
            regularizer=weights_regularizer,
            trainable=trainable,
            collections=weights_collections)
        strides = [
            1, 1, stride_h, stride_w
        ] if data_format.startswith('NC') else [1, stride_h, stride_w, 1]

        outputs = nn.depthwise_conv2d(inputs,
                                      depthwise_weights,
                                      strides,
                                      padding,
                                      rate=utils.two_element_tuple(rate),
                                      data_format=data_format)
        num_outputs = num_filters_in

        if normalizer_fn is not None:
            normalizer_params = normalizer_params or {}
            outputs = normalizer_fn(outputs, **normalizer_params)
        else:
            if biases_initializer is not None:
                biases_collections = utils.get_variable_collections(
                    variables_collections, 'biases')
                biases = variables.model_variable(
                    'biases',
                    shape=[
                        num_outputs,
                    ],
                    dtype=dtype,
                    initializer=biases_initializer,
                    regularizer=biases_regularizer,
                    trainable=trainable,
                    collections=biases_collections)
                outputs = nn.bias_add(outputs, biases, data_format=data_format)

        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return utils.collect_named_outputs(outputs_collections, sc.name,
                                           outputs)
def layer_norm_custom(inputs,
                      center=True,
                      scale=True,
                      activation_fn=None,
                      reuse=None,
                      variables_collections=None,
                      outputs_collections=None,
                      trainable=True,
                      epsilon=1E-12,
                      scope=None):
    """Adds a Layer Normalization layer from https://arxiv.org/abs/1607.06450.
      "Layer Normalization"
      Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton
    Can be used as a normalizer function for conv2d and fully_connected.
    Args:
      inputs: a tensor with 2 or more dimensions. The normalization
              occurs over all but the first dimension.
      center: If True, subtract `beta`. If False, `beta` is ignored.
      scale: If True, multiply by `gamma`. If False, `gamma` is
        not used. When the next layer is linear (also e.g. `nn.relu`), this can be
        disabled since the scaling can be done by the next layer.
      activation_fn: activation function, default set to None to skip it and
        maintain a linear activation.
      reuse: whether or not the layer and its variables should be reused. To be
        able to reuse the layer scope must be given.
      variables_collections: optional collections for the variables.
      outputs_collections: collections to add the outputs.
      trainable: If `True` also add variables to the graph collection
        `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
      epsilon: small value added to prevent NaN outputs.
      scope: Optional scope for `variable_scope`.
    Returns:
      A `Tensor` representing the output of the operation.
    Raises:
      ValueError: if rank or last dimension of `inputs` is undefined.
    """
    with variable_scope.variable_scope(scope,
                                       'LayerNorm', [inputs],
                                       reuse=reuse) as sc:
        inputs = ops.convert_to_tensor(inputs)
        inputs_shape = inputs.get_shape()
        inputs_rank = inputs_shape.ndims
        if inputs_rank is None:
            raise ValueError('Inputs %s has undefined rank.' % inputs.name)
        dtype = inputs.dtype.base_dtype
        params_shape = inputs_shape[-1:]
        if not params_shape.is_fully_defined():
            raise ValueError('Inputs %s has undefined last dimension %s.' %
                             (inputs.name, params_shape))
        # Allocate parameters for the beta and gamma of the normalization.
        beta, gamma = None, None
        if center:
            beta_collections = utils.get_variable_collections(
                variables_collections, 'beta')
            beta = variables.model_variable(
                'beta',
                shape=params_shape,
                dtype=dtype,
                initializer=init_ops.zeros_initializer(),
                collections=beta_collections,
                trainable=trainable)
        if scale:
            gamma_collections = utils.get_variable_collections(
                variables_collections, 'gamma')
            gamma = variables.model_variable(
                'gamma',
                shape=params_shape,
                dtype=dtype,
                initializer=init_ops.ones_initializer(),
                collections=gamma_collections,
                trainable=trainable)

        variance_epsilon = epsilon
        if epsilon <= 0:
            print("WARNING: epsilon <=0, may result in NaN outputs.")
        if center and scale:
            outputs = cMod.layer_norm_fused_custom(inputs,
                                                   gamma,
                                                   beta,
                                                   epsilon=variance_epsilon)
        elif center:
            outputs = cMod.layer_norm_bias_add_custom(inputs,
                                                      beta,
                                                      epsilon=variance_epsilon)
        elif scale:
            # dummy constant beta for layer_norm_fused_custom()
            beta = tf.zeros(params_shape, dtype=dtype, name="dummy_beta")
            outputs = cMod.layer_norm_fused_custom(inputs,
                                                   gamma,
                                                   beta,
                                                   epsilon=variance_epsilon)
        else:
            outputs = cMod.layer_norm_custom(inputs, epsilon=variance_epsilon)
        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return utils.collect_named_outputs(outputs_collections,
                                           sc.original_name_scope, outputs)
예제 #40
0
def spatial_softmax(features,
                    temperature=None,
                    name=None,
                    variables_collections=None,
                    trainable=True,
                    data_format='NHWC'):
    """Computes the spatial softmax of a convolutional feature map.
  First computes the softmax over the spatial extent of each channel of a
  convolutional feature map. Then computes the expected 2D position of the
  points of maximal activation for each channel, resulting in a set of
  feature keypoints [x1, y1, ... xN, yN] for all N channels.
  Read more here:
  "Learning visual feature spaces for robotic manipulation with
  deep spatial autoencoders." Finn et al., http://arxiv.org/abs/1509.06113.
  Args:
    features: A `Tensor` of size [batch_size, W, H, num_channels]; the
      convolutional feature map.
    temperature: Softmax temperature (optional). If None, a learnable
      temperature is created.
    name: A name for this operation (optional).
    variables_collections: Collections for the temperature variable.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
    data_format: A string. `NHWC` (default) and `NCHW` are supported.
  Returns:
    feature_keypoints: A `Tensor` with size [batch_size, num_channels * 2];
      the expected 2D locations of each channel's feature keypoint (normalized
      to the range (-1,1)). The inner dimension is arranged as
      [x1, y1, ... xN, yN].
  Raises:
    ValueError: If unexpected data_format specified.
    ValueError: If num_channels dimension is unspecified.
  """
    shape = array_ops.shape(features)
    static_shape = features.shape
    height, width, num_channels = shape[1], shape[2], static_shape[3]
    if num_channels.value is None:
        raise ValueError('The num_channels dimension of the inputs to '
                         '`spatial_softmax` should be defined. Found `None`.')

    with ops.name_scope(name, 'spatial_softmax', [features]) as name:
        # Create tensors for x and y coordinate values, scaled to range [-1, 1].
        pos_x, pos_y = array_ops.meshgrid(math_ops.lin_space(-1.,
                                                             1.,
                                                             num=height),
                                          math_ops.lin_space(-1.,
                                                             1.,
                                                             num=width),
                                          indexing='ij')
        pos_x = array_ops.reshape(pos_x, [height * width])
        pos_y = array_ops.reshape(pos_y, [height * width])
        if temperature is None:
            temperature_collections = utils.get_variable_collections(
                variables_collections, name + 'temperature')
            temperature = variables.model_variable(
                name + 'temperature',
                shape=(),
                dtype=dtypes.float32,
                initializer=init_ops.ones_initializer(),
                collections=temperature_collections,
                trainable=trainable)
            # We assume all ops are [NBATCH, HEIGHT, WIDTH, CHANNELS] but this code
            # does not! It will reorder them appropriately.
            features = array_ops.reshape(
                array_ops.transpose(features, [0, 3, 1, 2]),
                [-1, height * width])

        softmax_attention = nn.softmax(features / temperature)
        expected_x = math_ops.reduce_sum(pos_x * softmax_attention, [1],
                                         keep_dims=True)
        expected_y = math_ops.reduce_sum(pos_y * softmax_attention, [1],
                                         keep_dims=True)
        expected_xy = array_ops.concat([expected_x, expected_y], 1)
        feature_keypoints = array_ops.reshape(expected_xy,
                                              [-1, num_channels.value * 2])
        feature_keypoints.set_shape([None, num_channels.value * 2])
        return feature_keypoints
def l2_normalization(inputs,
                     scaling=False,
                     scale_initializer=init_ops.ones_initializer(),
                     reuse=None,
                     variables_collections=None,
                     outputs_collections=None,
                     data_format='NHWC',
                     trainable=True,
                     scope=None):
    """Implement L2 normalization on every feature (i.e. spatial normalization).

    Should be extended in some near future to other dimensions, providing a more
    flexible normalization framework.

    Args:
      inputs: a 4-D tensor with dimensions [batch_size, height, width, channels].
      scaling: whether or not to add a post scaling operation along the dimensions
        which have been normalized.
      scale_initializer: An initializer for the weights.
      reuse: whether or not the layer and its variables should be reused. To be
        able to reuse the layer scope must be given.
      variables_collections: optional list of collections for all the variables or
        a dictionary containing a different list of collection per variable.
      outputs_collections: collection to add the outputs.
      data_format:  NHWC or NCHW data format.
      trainable: If `True` also add variables to the graph collection
        `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
      scope: Optional scope for `variable_scope`.
    Returns:
      A `Tensor` representing the output of the operation.
    """

    with variable_scope.variable_scope(scope,
                                       'L2Normalization', [inputs],
                                       reuse=reuse) as sc:
        inputs_shape = inputs.get_shape()
        inputs_rank = inputs_shape.ndims
        dtype = inputs.dtype.base_dtype
        if data_format == 'NHWC':
            # norm_dim = tf.range(1, inputs_rank-1)
            norm_dim = tf.range(inputs_rank - 1, inputs_rank)
            params_shape = inputs_shape[-1:]
        elif data_format == 'NCHW':
            # norm_dim = tf.range(2, inputs_rank)
            norm_dim = tf.range(1, 2)
            params_shape = (inputs_shape[1])

        # Normalize along spatial dimensions.
        outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12)
        # Additional scaling.
        if scaling:
            scale_collections = utils.get_variable_collections(
                variables_collections, 'scale')
            scale = variables.model_variable('gamma',
                                             shape=params_shape,
                                             dtype=dtype,
                                             initializer=scale_initializer,
                                             collections=scale_collections,
                                             trainable=trainable)
            if data_format == 'NHWC':
                outputs = tf.multiply(outputs, scale)
            elif data_format == 'NCHW':
                scale = tf.expand_dims(scale, axis=-1)
                scale = tf.expand_dims(scale, axis=-1)
                outputs = tf.multiply(outputs, scale)
                # outputs = tf.transpose(outputs, perm=(0, 2, 3.txt, 1))

        return utils.collect_named_outputs(outputs_collections,
                                           sc.original_name_scope, outputs)
예제 #42
0
파일: batch_norm.py 프로젝트: mkabra/poseTF
def batch_norm_mine_old(inputs,
               decay=0.999,
               center=True,
               scale=False,
               epsilon=0.001,
               activation_fn=None,
               param_initializers=None,
               param_regularizers=None,
               updates_collections=ops.GraphKeys.UPDATE_OPS,
               is_training=True,
               reuse=None,
               variables_collections=None,
               outputs_collections=None,
               trainable=True,
               batch_weights=None,
               fused=False,
               data_format=DATA_FORMAT_NHWC,
               zero_debias_moving_mean=False,
               scope=None,
               renorm=False,
               renorm_clipping=None,
               renorm_decay=0.99):
  """
  This earlier version of my modification to batch norm uses
current_mean and current_variance if is_training is True and
moving_mean and moving_variance otherwise. This was leading a large divergence between
the results depending upon whether the is_training set to True or not.

I think ideally it should always use moving_mean and moving_variance. batch_norm_mine
does this.

  Adds a Batch Normalization layer from http://arxiv.org/abs/1502.03167.
copy of tensorflow.contrib.layers
  Args:
    inputs: A tensor with 2 or more dimensions, where the first dimension has
      `batch_size`. The normalization is over all but the last dimension if
      `data_format` is `NHWC` and the second dimension if `data_format` is
      `NCHW`.
    decay: Decay for the moving average. Reasonable values for `decay` are close
      to 1.0, typically in the multiple-nines range: 0.999, 0.99, 0.9, etc.
      Lower `decay` value (recommend trying `decay`=0.9) if model experiences
      reasonably good training performance but poor validation and/or test
      performance. Try zero_debias_moving_mean=True for improved stability.
    center: If True, add offset of `beta` to normalized tensor. If False, `beta`
      is ignored.
    scale: If True, multiply by `gamma`. If False, `gamma` is
      not used. When the next layer is linear (also e.g. `nn.relu`), this can be
      disabled since the scaling can be done by the next layer.
    epsilon: Small float added to variance to avoid dividing by zero.
    activation_fn: Activation function, default set to None to skip it and
      maintain a linear activation.
    param_initializers: Optional initializers for beta, gamma, moving mean and
      moving variance.
    param_regularizers: Optional regularizer for beta and gamma.
    updates_collections: Collections to collect the update ops for computation.
      The updates_ops need to be executed with the train_op.
      If None, a control dependency would be added to make sure the updates are
      computed in place.
    is_training: Whether or not the layer is in training mode. In training mode
      it would accumulate the statistics of the moments into `moving_mean` and
      `moving_variance` using an exponential moving average with the given
      `decay`. When it is not in training mode then it would use the values of
      the `moving_mean` and the `moving_variance`.
    reuse: Whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional collections for the variables.
    outputs_collections: Collections to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
    batch_weights: An optional tensor of shape `[batch_size]`,
      containing a frequency weight for each batch item. If present,
      then the batch normalization uses weighted mean and
      variance. (This can be used to correct for bias in training
      example selection.)
    fused:  Use nn.fused_batch_norm if True, nn.batch_normalization otherwise.
    data_format: A string. `NHWC` (default) and `NCHW` are supported.
    zero_debias_moving_mean: Use zero_debias for moving_mean. It creates a new
      pair of variables 'moving_mean/biased' and 'moving_mean/local_step'.
    scope: Optional scope for `variable_scope`.
    renorm: Whether to use Batch Renormalization
      (https://arxiv.org/abs/1702.03275). This adds extra variables during
      training. The inference is the same for either value of this parameter.
    renorm_clipping: A dictionary that may map keys 'rmax', 'rmin', 'dmax' to
      scalar `Tensors` used to clip the renorm correction. The correction
      `(r, d)` is used as `corrected_value = normalized_value * r + d`, with
      `r` clipped to [rmin, rmax], and `d` to [-dmax, dmax]. Missing rmax, rmin,
      dmax are set to inf, 0, inf, respectively.
    renorm_decay: Momentum used to update the moving means and standard
      deviations with renorm. Unlike `momentum`, this affects training
      and should be neither too small (which would add noise) nor too large
      (which would give stale estimates). Note that `decay` is still applied
      to get the means and variances for inference.

  Returns:
    A `Tensor` representing the output of the operation.

  Raises:
    ValueError: If `batch_weights` is not None and `fused` is True.
    ValueError: If `param_regularizers` is not None and `fused` is True.
    ValueError: If `data_format` is neither `NHWC` nor `NCHW`.
    ValueError: If the rank of `inputs` is undefined.
    ValueError: If rank or channels dimension of `inputs` is undefined.
  """
  if fused:
    if batch_weights is not None:
      raise ValueError('Weighted mean and variance is not currently '
                       'supported for fused batch norm.')
    if param_regularizers is not None:
      raise ValueError('Regularizers are not currently '
                       'supported for fused batch norm.')
    if renorm:
      raise ValueError('Renorm is not supported for fused batch norm.')
    return _fused_batch_norm(
        inputs,
        decay=decay,
        center=center,
        scale=scale,
        epsilon=epsilon,
        activation_fn=activation_fn,
        param_initializers=param_initializers,
        updates_collections=updates_collections,
        is_training=is_training,
        reuse=reuse,
        variables_collections=variables_collections,
        outputs_collections=outputs_collections,
        trainable=trainable,
        data_format=data_format,
        zero_debias_moving_mean=zero_debias_moving_mean,
        scope=scope)

  if data_format not in (DATA_FORMAT_NCHW, DATA_FORMAT_NHWC):
    raise ValueError('data_format has to be either NCHW or NHWC.')

  layer_variable_getter = _build_variable_getter()
  with variable_scope.variable_scope(
      scope, 'BatchNorm', [inputs], reuse=reuse,
      custom_getter=layer_variable_getter) as sc:
    inputs = ops.convert_to_tensor(inputs)

    # Determine whether we can use the core layer class.
    if (batch_weights is None and
        updates_collections is ops.GraphKeys.UPDATE_OPS and
        not zero_debias_moving_mean):
      # Use the core layer class.
      axis = 1 if data_format == DATA_FORMAT_NCHW else -1
      if not param_initializers:
        param_initializers = {}
      beta_initializer = param_initializers.get('beta',
                                                init_ops.zeros_initializer())
      gamma_initializer = param_initializers.get('gamma',
                                                 init_ops.ones_initializer())
      moving_mean_initializer = param_initializers.get(
          'moving_mean', init_ops.zeros_initializer())
      moving_variance_initializer = param_initializers.get(
          'moving_variance', init_ops.ones_initializer())
      if not param_regularizers:
        param_regularizers = {}
      beta_regularizer = param_regularizers.get('beta')
      gamma_regularizer = param_regularizers.get('gamma')
      layer = normalization_layers.BatchNormalization(
          axis=axis,
          momentum=decay,
          epsilon=epsilon,
          center=center,
          scale=scale,
          beta_initializer=beta_initializer,
          gamma_initializer=gamma_initializer,
          moving_mean_initializer=moving_mean_initializer,
          moving_variance_initializer=moving_variance_initializer,
          beta_regularizer=beta_regularizer,
          gamma_regularizer=gamma_regularizer,
          trainable=trainable,
          renorm=renorm,
          renorm_clipping=renorm_clipping,
          renorm_momentum=renorm_decay,
          name=sc.name,
          _scope=sc,
          _reuse=reuse)
      outputs = layer.apply(inputs, training=is_training)

      # Add variables to collections.
      _add_variable_to_collections(
          layer.moving_mean, variables_collections, 'moving_mean')
      _add_variable_to_collections(
          layer.moving_variance, variables_collections, 'moving_variance')
      if layer.beta:
        _add_variable_to_collections(layer.beta, variables_collections, 'beta')
      if layer.gamma:
        _add_variable_to_collections(
            layer.gamma, variables_collections, 'gamma')

      if activation_fn is not None:
        outputs = activation_fn(outputs)
      return utils.collect_named_outputs(outputs_collections,
                                         sc.original_name_scope, outputs)

    # Not supported by layer class: batch_weights argument,
    # and custom updates_collections. In that case, use the legacy BN
    # implementation.
    # Custom updates collections are not supported because the update logic
    # is different in this case, in particular w.r.t. "forced updates" and
    # update op reuse.
    if renorm:
      raise ValueError('renorm is not supported with batch_weights, '
                       'updates_collections or zero_debias_moving_mean')
    inputs_shape = inputs.get_shape()
    inputs_rank = inputs_shape.ndims
    if inputs_rank is None:
      raise ValueError('Inputs %s has undefined rank.' % inputs.name)
    dtype = inputs.dtype.base_dtype
    if batch_weights is not None:
      batch_weights = ops.convert_to_tensor(batch_weights)
      inputs_shape[0:1].assert_is_compatible_with(batch_weights.get_shape())
      # Reshape batch weight values so they broadcast across inputs.
      nshape = [-1] + [1 for _ in range(inputs_rank - 1)]
      batch_weights = array_ops.reshape(batch_weights, nshape)

    if data_format == DATA_FORMAT_NCHW:
      moments_axes = [0] + list(range(2, inputs_rank))
      params_shape = inputs_shape[1:2]
      # For NCHW format, rather than relying on implicit broadcasting, we
      # explicitly reshape the params to params_shape_broadcast when computing
      # the moments and the batch normalization.
      params_shape_broadcast = list(
          [1, inputs_shape[1].value] + [1 for _ in range(2, inputs_rank)])
    else:
      moments_axes = list(range(inputs_rank - 1))
      params_shape = inputs_shape[-1:]
      params_shape_broadcast = None
    if not params_shape.is_fully_defined():
      raise ValueError('Inputs %s has undefined channels dimension %s.' % (
          inputs.name, params_shape))

    # Allocate parameters for the beta and gamma of the normalization.
    beta, gamma = None, None
    if not param_initializers:
      param_initializers = {}
    if center:
      beta_collections = utils.get_variable_collections(variables_collections,
                                                        'beta')
      beta_initializer = param_initializers.get('beta',
                                                init_ops.zeros_initializer())
      beta = variables.model_variable('beta',
                                      shape=params_shape,
                                      dtype=dtype,
                                      initializer=beta_initializer,
                                      collections=beta_collections,
                                      trainable=trainable)
    if scale:
      gamma_collections = utils.get_variable_collections(variables_collections,
                                                         'gamma')
      gamma_initializer = param_initializers.get('gamma',
                                                 init_ops.ones_initializer())
      gamma = variables.model_variable('gamma',
                                       shape=params_shape,
                                       dtype=dtype,
                                       initializer=gamma_initializer,
                                       collections=gamma_collections,
                                       trainable=trainable)

    # Create moving_mean and moving_variance variables and add them to the
    # appropriate collections. We disable variable partitioning while creating
    # them, because assign_moving_average is not yet supported for partitioned
    # variables.
    partitioner = variable_scope.get_variable_scope().partitioner
    try:
      variable_scope.get_variable_scope().set_partitioner(None)
      moving_mean_collections = utils.get_variable_collections(
          variables_collections, 'moving_mean')
      moving_mean_initializer = param_initializers.get(
          'moving_mean', init_ops.zeros_initializer())
      moving_mean = variables.model_variable(
          'moving_mean',
          shape=params_shape,
          dtype=dtype,
          initializer=moving_mean_initializer,
          trainable=False,
          collections=moving_mean_collections)
      moving_variance_collections = utils.get_variable_collections(
          variables_collections, 'moving_variance')
      moving_variance_initializer = param_initializers.get(
          'moving_variance', init_ops.ones_initializer())
      moving_variance = variables.model_variable(
          'moving_variance',
          shape=params_shape,
          dtype=dtype,
          initializer=moving_variance_initializer,
          trainable=False,
          collections=moving_variance_collections)
    finally:
      variable_scope.get_variable_scope().set_partitioner(partitioner)

    # If `is_training` doesn't have a constant value, because it is a `Tensor`,
    # a `Variable` or `Placeholder` then is_training_value will be None and
    # `needs_moments` will be true.
    is_training_value = utils.constant_value(is_training)
    need_moments = is_training_value is None or is_training_value
    if need_moments:
      # Calculate the moments based on the individual batch.
      if batch_weights is None:
        if data_format == DATA_FORMAT_NCHW:
          mean, _ = nn.moments(inputs, moments_axes, keep_dims=True)
          variance,_ = nn.moments( (inputs-moving_mean)**2, moments_axes, keep_dims=True)
          mean = array_ops.reshape(mean, [-1])
          variance = array_ops.reshape(variance, [-1])
        else:
          mean, _ = nn.moments(inputs, moments_axes)
          variance, _ = nn.moments( (inputs-moving_mean)**2, moments_axes)
      else:
        if data_format == DATA_FORMAT_NCHW:
          mean, _ = nn.weighted_moments(inputs, moments_axes,
                                               batch_weights, keep_dims=True)
          variance, _ = nn.weighted_moments( (inputs-moving_mean)**2, moments_axes,
                                               batch_weights, keep_dims=True)
          mean = array_ops.reshape(mean, [-1])
          variance = array_ops.reshape(variance, [-1])
        else:
          mean, _ = nn.weighted_moments(inputs, moments_axes,
                                               batch_weights)
          variance, _ = nn.weighted_moments( (inputs-moving_mean)**2, moments_axes,
                                               batch_weights)

      moving_vars_fn = lambda: (moving_mean, moving_variance)
      if updates_collections is None:
        def _force_updates():
          """Internal function forces updates moving_vars if is_training."""
          update_moving_mean = moving_averages.assign_moving_average(
              moving_mean, mean, decay, zero_debias=zero_debias_moving_mean)
          update_moving_variance = moving_averages.assign_moving_average(
              moving_variance, variance, decay, zero_debias=False)
          with ops.control_dependencies([update_moving_mean,
                                         update_moving_variance]):
            return array_ops.identity(mean), array_ops.identity(variance)
        mean, variance = utils.smart_cond(is_training,
                                          _force_updates,
                                          moving_vars_fn)
      else:
        def _delay_updates():
          """Internal function that delay updates moving_vars if is_training."""
          update_moving_mean = moving_averages.assign_moving_average(
              moving_mean, mean, decay, zero_debias=zero_debias_moving_mean)
          update_moving_variance = moving_averages.assign_moving_average(
              moving_variance, variance, decay, zero_debias=False)
          return update_moving_mean, update_moving_variance

        update_mean, update_variance = utils.smart_cond(is_training,
                                                        _delay_updates,
                                                        moving_vars_fn)
        ops.add_to_collections(updates_collections, update_mean)
        ops.add_to_collections(updates_collections, update_variance)
        # Use computed moments during training and moving_vars otherwise.
        vars_fn = lambda: (mean, variance)
        mean, variance = utils.smart_cond(is_training, vars_fn, moving_vars_fn)
    else:
      mean, variance = moving_mean, moving_variance
    if data_format == DATA_FORMAT_NCHW:
      mean = array_ops.reshape(mean, params_shape_broadcast)
      variance = array_ops.reshape(variance, params_shape_broadcast)
      beta = array_ops.reshape(beta, params_shape_broadcast)
      if gamma is not None:
        gamma = array_ops.reshape(gamma, params_shape_broadcast)

    # Compute batch_normalization.
    outputs = nn.batch_normalization(inputs, mean, variance, beta, gamma,
                                     epsilon)
    outputs.set_shape(inputs_shape)
    if activation_fn is not None:
      outputs = activation_fn(outputs)
    return utils.collect_named_outputs(outputs_collections,
                                       sc.original_name_scope, outputs)
예제 #43
0
def nan_batch_norm(inputs, decay=0.999, center=True, scale=False, epsilon=0.001,
        is_training=True, reuse=None, variables_collections=None, outputs_collections=None,
        trainable=False, scope=None):
    with variable_scope.variable_op_scope([inputs],
                    scope, 'NanBatchNorm', reuse=reuse) as sc:
        inputs_shape = inputs.get_shape()
        inputs_rank = inputs_shape.ndims
        if inputs_rank is None:
          raise ValueError('Inputs %s has undefined rank.' % inputs.name)
        dtype = inputs.dtype.base_dtype
        axis = list(range(inputs_rank - 1))
        params_shape = inputs_shape[-1:]
        beta, gamma = None, None
        if center:
          beta_collections = utils.get_variable_collections(variables_collections,
                                                            'beta')
          beta = variables.model_variable('beta',
                                          shape=params_shape,
                                          dtype=dtype,
                                          initializer=init_ops.zeros_initializer,
                                          collections=beta_collections,
                                          trainable=False)
        if scale:
          gamma_collections = utils.get_variable_collections(variables_collections,
                                                             'gamma')
          gamma = variables.model_variable('gamma',
                                           shape=params_shape,
                                           dtype=dtype,
                                           initializer=init_ops.ones_initializer,
                                           collections=gamma_collections,
                                           trainable=trainable)
        # Create moving_mean and moving_variance variables and add them to the
        # appropiate collections.
        moving_mean_collections = utils.get_variable_collections(
            variables_collections, 'moving_mean')
        moving_mean = variables.model_variable(
            'moving_mean',
            shape=params_shape,
            dtype=dtype,
            initializer=init_ops.zeros_initializer,
            trainable=False,
            collections=moving_mean_collections)
        moving_variance_collections = utils.get_variable_collections(
            variables_collections, 'moving_variance')
        moving_variance = variables.model_variable(
            'moving_variance',
            shape=params_shape,
            dtype=dtype,
            initializer=init_ops.ones_initializer,
            trainable=False,
            collections=moving_variance_collections)
        is_training_value = utils.constant_value(is_training)
        need_moments = is_training_value is None or is_training_value
        if need_moments:
            mean = nanmean(inputs, axis=axis)
            variance = nanvar(inputs, axis=axis)
            moving_mean = moving_averages.assign_moving_average(
                moving_mean, mean, decay)
            moving_variance = moving_averages.assign_moving_average(
                moving_variance, variance, decay)
        mean, variance = moving_mean, moving_variance
        outputs = tf.nn.batch_normalization(inputs, mean, variance, beta, gamma, epsilon)
        outputs.set_shape(inputs_shape)
        return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
예제 #44
0
def batch_norm(inputs,
               decay=0.999,
               center=True,
               scale=False,
               epsilon=0.001,
               updates_collections=ops.GraphKeys.UPDATE_OPS,
               is_training=True,
               reuse=None,
               variables_collections=None,
               outputs_collections=None,
               trainable=True,
               scope=None):
  """Code modification of tensorflow/contrib/layers/python/layers/layers.py
  """
  with variable_scope.variable_op_scope([inputs],
                                        scope, 'BatchNorm', reuse=reuse) as sc:
    inputs = ops.convert_to_tensor(inputs)
    inputs_shape = inputs.get_shape()
    inputs_rank = inputs_shape.ndims
    if inputs_rank is None:
      raise ValueError('Inputs %s has undefined rank.' % inputs.name)
    dtype = inputs.dtype.base_dtype
    axis = list(range(inputs_rank - 1))
    params_shape = inputs_shape[-1:]
    if not params_shape.is_fully_defined():
      raise ValueError('Inputs %s has undefined last dimension %s.' % (
          inputs.name, params_shape))
    # Allocate parameters for the beta and gamma of the normalization.
    beta, gamma = None, None
    if center:
      beta_collections = utils.get_variable_collections(variables_collections,
                                                        'beta')
      beta = variables.model_variable('beta',
                                      shape=params_shape,
                                      dtype=dtype,
                                      initializer=init_ops.zeros_initializer,
                                      collections=beta_collections,
                                      trainable=trainable)
    if scale:
      gamma_collections = utils.get_variable_collections(variables_collections,
                                                         'gamma')
      gamma = variables.model_variable('gamma',
                                       shape=params_shape,
                                       dtype=dtype,
                                       initializer=init_ops.ones_initializer,
                                       collections=gamma_collections,
                                       trainable=trainable)
    # Create moving_mean and moving_variance variables and add them to the
    # appropiate collections.
    moving_mean_collections = utils.get_variable_collections(
        variables_collections, 'moving_mean')
    moving_mean = variables.model_variable(
        'moving_mean',
        shape=params_shape,
        dtype=dtype,
        initializer=init_ops.zeros_initializer,
        trainable=False,
        collections=moving_mean_collections)
    moving_variance_collections = utils.get_variable_collections(
        variables_collections, 'moving_variance')
    moving_variance = variables.model_variable(
        'moving_variance',
        shape=params_shape,
        dtype=dtype,
        initializer=init_ops.ones_initializer,
        trainable=False,
        collections=moving_variance_collections)

    # Calculate the moments based on the individual batch.
    mean, variance = nn.moments(inputs, axis, shift=moving_mean)
    # Update the moving_mean and moving_variance moments.
    update_moving_mean = moving_averages.assign_moving_average(
        moving_mean, mean, decay)
    update_moving_variance = moving_averages.assign_moving_average(
        moving_variance, variance, decay)
    if updates_collections is None:
      # Make sure the updates are computed here.
      with ops.control_dependencies([update_moving_mean,
                                      update_moving_variance]):
        outputs = nn.batch_normalization(
            inputs, mean, variance, beta, gamma, epsilon)
    else:
      # Collect the updates to be computed later.
      ops.add_to_collections(updates_collections, update_moving_mean)
      ops.add_to_collections(updates_collections, update_moving_variance)
      outputs = nn.batch_normalization(
          inputs, mean, variance, beta, gamma, epsilon)

    test_outputs = nn.batch_normalization(
        inputs, moving_mean, moving_variance, beta, gamma, epsilon)

    outputs = tf.cond(is_training, lambda: outputs, lambda: test_outputs)
    outputs.set_shape(inputs_shape)

    return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def joint_weighted_sum_from_feature_columns(columns_to_tensors,
                                            feature_columns,
                                            num_outputs,
                                            weight_collections=None,
                                            trainable=True,
                                            scope=None):
  """A restricted linear prediction builder based on FeatureColumns.

  As long as all feature columns are unweighted sparse columns this computes the
  prediction of a linear model which stores all weights in a single variable.

  Args:
    columns_to_tensors: A mapping from feature column to tensors. 'string' key
      means a base feature (not-transformed). It can have FeatureColumn as a
      key too. That means that FeatureColumn is already transformed by input
      pipeline. For example, `inflow` may have handled transformations.
    feature_columns: A set containing all the feature columns. All items in the
      set should be instances of classes derived from FeatureColumn.
    num_outputs: An integer specifying number of outputs. Default value is 1.
    weight_collections: List of graph collections to which weights are added.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for variable_scope.

  Returns:
    A tuple containing:

      * A Tensor which represents predictions of a linear model.
      * A list of Variables storing the weights.
      * A Variable which is used for bias.

  Raises:
    ValueError: if FeatureColumn cannot be used for linear predictions.

  """
  check_feature_columns(feature_columns)
  with variable_scope.variable_scope(
      scope,
      default_name='joint_weighted_sum_from_feature_columns',
      values=columns_to_tensors.values()):
    transformer = _Transformer(columns_to_tensors)
    embedding_lookup_arguments = []
    for column in sorted(set(feature_columns), key=lambda x: x.key):
      transformed_tensor = transformer.transform(column)
      try:
        embedding_lookup_arguments.append(
            column._wide_embedding_lookup_arguments(transformed_tensor))   # pylint: disable=protected-access
      except NotImplementedError:
        raise NotImplementedError('Real-valued columns are not supported. '
                                  'Use weighted_sum_from_feature_columns '
                                  'instead, or bucketize these columns.')

    variable, predictions_no_bias = _create_joint_embedding_lookup(
        columns_to_tensors,
        embedding_lookup_arguments,
        num_outputs,
        trainable,
        weight_collections)
    bias = contrib_variables.model_variable(
        'bias_weight',
        shape=[num_outputs],
        initializer=init_ops.zeros_initializer(),
        trainable=trainable,
        collections=_add_variable_collection(weight_collections))
    _log_variable(bias)
    predictions = nn_ops.bias_add(predictions_no_bias, bias)

    return predictions, variable, bias
예제 #46
0
def depthwise_convolution2d(
        inputs,
        kernel_size,
        depth_multiplier=1,
        stride=1,
        padding='SAME',
        rate=1,
        activation_fn=nn.relu,
        normalizer_fn=None,
        normalizer_params=None,
        weights_initializer=initializers.xavier_initializer(),
        weights_regularizer=None,
        biases_initializer=init_ops.zeros_initializer(),
        biases_regularizer=None,
        reuse=None,
        variables_collections=None,
        outputs_collections=None,
        trainable=True,
        data_format='NHWC',
        scope=None):
    """Adds a depthwise 2D convolution with optional batch_norm layer.
    This op performs a depthwise convolution that acts separately on
    channels, creating a variable called `depthwise_weights`. Then,
    if `normalizer_fn` is None,
    it adds bias to the result, creating a variable called 'biases', otherwise,
    the `normalizer_fn` is applied. It finally applies an activation function
    to produce the end result.
    Args:
        inputs: A tensor of size [batch_size, height, width, channels].
        num_outputs: The number of pointwise convolution output filters. If is
          None, then we skip the pointwise convolution stage.
        kernel_size: A list of length 2: [kernel_height, kernel_width] of
          of the filters. Can be an int if both values are the same.
        depth_multiplier: The number of depthwise convolution output channels for
          each input channel. The total number of depthwise convolution output
          channels will be equal to `num_filters_in * depth_multiplier`.
        stride: A list of length 2: [stride_height, stride_width], specifying the
          depthwise convolution stride. Can be an int if both strides are the same.
        padding: One of 'VALID' or 'SAME'.
        rate: A list of length 2: [rate_height, rate_width], specifying the dilation
          rates for atrous convolution. Can be an int if both rates are the same.
          If any value is larger than one, then both stride values need to be one.
        activation_fn: Activation function. The default value is a ReLU function.
          Explicitly set it to None to skip it and maintain a linear activation.
        normalizer_fn: Normalization function to use instead of `biases`. If
          `normalizer_fn` is provided then `biases_initializer` and
          `biases_regularizer` are ignored and `biases` are not created nor added.
          default set to None for no normalizer function
        normalizer_params: Normalization function parameters.
        weights_initializer: An initializer for the weights.
        weights_regularizer: Optional regularizer for the weights.
        biases_initializer: An initializer for the biases. If None skip biases.
        biases_regularizer: Optional regularizer for the biases.
        reuse: Whether or not the layer and its variables should be reused. To be
          able to reuse the layer scope must be given.
        variables_collections: Optional list of collections for all the variables or
          a dictionary containing a different list of collection per variable.
        outputs_collections: Collection to add the outputs.
        trainable: Whether or not the variables should be trainable or not.
        scope: Optional scope for variable_scope.
    Returns:
        A `Tensor` representing the output of the operation.
    """
    with variable_scope.variable_scope(scope,
                                       'DepthwiseConv2d', [inputs],
                                       reuse=reuse) as sc:
        inputs = ops.convert_to_tensor(inputs)
        # Actually apply depthwise conv instead of separable conv.
        dtype = inputs.dtype.base_dtype
        kernel_h, kernel_w = utils.two_element_tuple(kernel_size)
        stride_h, stride_w = utils.two_element_tuple(stride)
        if data_format == 'NHWC':
            num_filters_in = utils.last_dimension(inputs.get_shape(),
                                                  min_rank=4)
            strides = [1, stride_h, stride_w, 1]
        else:
            num_filters_in = inputs.get_shape().as_list()[1]
            strides = [1, 1, stride_h, stride_w]

        weights_collections = utils.get_variable_collections(
            variables_collections, 'weights')

        # Depthwise weights variable.
        depthwise_shape = [
            kernel_h, kernel_w, num_filters_in, depth_multiplier
        ]
        depthwise_weights = variables.model_variable(
            'depthwise_weights',
            shape=depthwise_shape,
            dtype=dtype,
            initializer=weights_initializer,
            regularizer=weights_regularizer,
            trainable=trainable,
            collections=weights_collections)

        outputs = nn.depthwise_conv2d(inputs,
                                      depthwise_weights,
                                      strides,
                                      padding,
                                      rate=utils.two_element_tuple(rate),
                                      data_format=data_format)
        num_outputs = depth_multiplier * num_filters_in

        if normalizer_fn is not None:
            normalizer_params = normalizer_params or {}
            outputs = normalizer_fn(outputs, **normalizer_params)
        else:
            if biases_initializer is not None:
                biases_collections = utils.get_variable_collections(
                    variables_collections, 'biases')
                biases = variables.model_variable(
                    'biases',
                    shape=[
                        num_outputs,
                    ],
                    dtype=dtype,
                    initializer=biases_initializer,
                    regularizer=biases_regularizer,
                    trainable=trainable,
                    collections=biases_collections)
                outputs = nn.bias_add(outputs, biases, data_format=data_format)
        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return utils.collect_named_outputs(outputs_collections,
                                           sc.original_name_scope, outputs)
def weighted_sum_from_feature_columns(columns_to_tensors,
                                      feature_columns,
                                      num_outputs,
                                      weight_collections=None,
                                      trainable=True,
                                      scope=None):
  """A tf.contrib.layer style linear prediction builder based on FeatureColumns.

  Generally a single example in training data is described with feature columns.
  This function generates weighted sum for each num_outputs. Weighted sum refers
  to logits in classification problems. It refers to prediction itself for
  linear regression problems.

  Example:

    ```
    # Building model for training
    feature_columns = (
        real_valued_column("my_feature1"),
        ...
    )
    columns_to_tensor = tf.parse_example(...)
    logits = weighted_sum_from_feature_columns(
        columns_to_tensors=columns_to_tensor,
        feature_columns=feature_columns,
        num_outputs=1)
    loss = tf.nn.sigmoid_cross_entropy_with_logits(logits, labels)
    ```

  Args:
    columns_to_tensors: A mapping from feature column to tensors. 'string' key
      means a base feature (not-transformed). It can have FeatureColumn as a
      key too. That means that FeatureColumn is already transformed by input
      pipeline. For example, `inflow` may have handled transformations.
    feature_columns: A set containing all the feature columns. All items in the
      set should be instances of classes derived from FeatureColumn.
    num_outputs: An integer specifying number of outputs. Default value is 1.
    weight_collections: List of graph collections to which weights are added.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for variable_scope.

  Returns:
    A tuple containing:

      * A Tensor which represents predictions of a linear model.
      * A dictionary which maps feature_column to corresponding Variable.
      * A Variable which is used for bias.

  Raises:
    ValueError: if FeatureColumn cannot be used for linear predictions.
  """
  check_feature_columns(feature_columns)
  with variable_scope.variable_scope(
      scope,
      default_name='weighted_sum_from_feature_columns',
      values=columns_to_tensors.values()):
    output_tensors = []
    column_to_variable = dict()
    transformer = _Transformer(columns_to_tensors)
    # pylint: disable=protected-access
    for column in sorted(set(feature_columns), key=lambda x: x.key):
      transformed_tensor = transformer.transform(column)
      try:
        embedding_lookup_arguments = column._wide_embedding_lookup_arguments(
            transformed_tensor)
        variable, predictions = _create_embedding_lookup(
            column,
            columns_to_tensors,
            embedding_lookup_arguments,
            num_outputs,
            trainable,
            weight_collections)
      except NotImplementedError:
        with variable_scope.variable_scope(
            None,
            default_name=column.name,
            values=columns_to_tensors.values()):
          tensor = column._to_dense_tensor(transformed_tensor)
          tensor = fc._reshape_real_valued_tensor(tensor, 2, column.name)
          variable = [
              contrib_variables.model_variable(
                  name='weight',
                  shape=[tensor.get_shape()[1], num_outputs],
                  initializer=init_ops.zeros_initializer(),
                  trainable=trainable,
                  collections=weight_collections)
          ]
          predictions = math_ops.matmul(tensor, variable[0], name='matmul')
      except ValueError as ee:
        raise ValueError('Error creating weighted sum for column: {}.\n'
                         '{}'.format(column.name, ee))
      output_tensors.append(predictions)
      column_to_variable[column] = variable
      _log_variable(variable)
      _maybe_restore_from_checkpoint(column._checkpoint_path(), variable)
    # pylint: enable=protected-access
    predictions_no_bias = math_ops.add_n(output_tensors)
    bias = contrib_variables.model_variable(
        'bias_weight',
        shape=[num_outputs],
        initializer=init_ops.zeros_initializer(),
        trainable=trainable,
        collections=_add_variable_collection(weight_collections))
    _log_variable(bias)
    predictions = nn_ops.bias_add(predictions_no_bias, bias)

    return predictions, column_to_variable, bias
예제 #48
0
def weighted_sum_from_feature_columns(columns_to_tensors,
                                      feature_columns,
                                      num_outputs,
                                      weight_collections=None,
                                      trainable=True,
                                      scope=None):
    """A tf.contrib.layer style linear prediction builder based on FeatureColumns.

  Generally a single example in training data is described with feature columns.
  This function generates weighted sum for each num_outputs. Weighted sum refers
  to logits in classification problems. It refers to prediction itself for
  linear regression problems.

  An example usage of weighted_sum_from_feature_columns is as follows:

    # Building model for training
    columns_to_tensor = tf.parse_example(...)
    logits = weighted_sum_from_feature_columns(
        columns_to_tensors=columns_to_tensor,
        feature_columns=feature_columns,
        num_outputs=1)
    loss = tf.nn.sigmoid_cross_entropy_with_logits(logits, labels)

    where feature_columns can be defined as follows:

    occupation = sparse_column_with_hash_bucket(column_name="occupation",
                                              hash_bucket_size=1000)
    occupation_emb = embedding_column(sparse_id_column=occupation, dimension=16,
                                     combiner="sum")
    age = real_valued_column("age")
    age_buckets = bucketized_column(
        source_column=age,
        boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
    occupation_x_age = crossed_column(columns=[occupation, age_buckets],
                                      hash_bucket_size=10000)

    feature_columns=[occupation_emb, occupation_x_age]

  Args:
    columns_to_tensors: A mapping from feature column to tensors. 'string' key
      means a base feature (not-transformed). It can have FeatureColumn as a
      key too. That means that FeatureColumn is already transformed by input
      pipeline. For example, `inflow` may have handled transformations.
    feature_columns: A set containing all the feature columns. All items in the
      set should be instances of classes derived from FeatureColumn.
    num_outputs: An integer specifying number of outputs. Default value is 1.
    weight_collections: List of graph collections to which weights are added.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for variable_scope.

  Returns:
    A tuple of followings:
      * A Tensor which represents predictions of a linear model.
      * A dictionary which maps feature_column to corresponding Variable.
      * A Variable which is used for bias.

  Raises:
    ValueError: if FeatureColumn cannot be used for linear predictions.
  """
    check_feature_columns(feature_columns)
    with variable_scope.variable_scope(
            scope,
            default_name='weighted_sum_from_feature_columns',
            values=columns_to_tensors.values()):
        output_tensors = []
        column_to_variable = dict()
        transformer = _Transformer(columns_to_tensors)
        for column in sorted(set(feature_columns), key=lambda x: x.key):
            transformed_tensor = transformer.transform(column)
            try:
                embedding_lookup_arguments = column._to_embedding_lookup_arguments(  # pylint: disable=protected-access
                    transformed_tensor)
                variable, predictions = _create_embedding_lookup(
                    column, columns_to_tensors, embedding_lookup_arguments,
                    num_outputs, trainable, weight_collections)
            except NotImplementedError:
                with variable_scope.variable_scope(
                        None,
                        default_name=column.name,
                        values=columns_to_tensors.values()):
                    tensor = column._to_dense_tensor(transformed_tensor)  # pylint: disable=protected-access
                    variable = [
                        contrib_variables.model_variable(
                            name='weight',
                            shape=[tensor.get_shape()[1], num_outputs],
                            initializer=init_ops.zeros_initializer,
                            collections=weight_collections)
                    ]
                    predictions = math_ops.matmul(tensor,
                                                  variable[0],
                                                  name='matmul')
            except ValueError as ee:
                raise ValueError(
                    'Error creating weighted sum for column: {}.\n'
                    '{}'.format(column.name, ee))
            output_tensors.append(predictions)
            column_to_variable[column] = variable
            _log_variable(variable)
            _maybe_restore_from_checkpoint(column._checkpoint_path(), variable)  # pylint: disable=protected-access

        predictions_no_bias = math_ops.add_n(output_tensors)
        bias = contrib_variables.model_variable(
            'bias_weight',
            shape=[num_outputs],
            initializer=init_ops.zeros_initializer,
            collections=fc._add_variable_collection(weight_collections))  # pylint: disable=protected-access
        _log_variable(bias)
        predictions = nn_ops.bias_add(predictions_no_bias, bias)

        return predictions, column_to_variable, bias
def fully_connected(inputs,
                    num_outputs,
                    activation_fn=nn.relu,
                    normalizer_fn=None,
                    normalizer_params=None,
                    weights_initializer=initializers.xavier_initializer(),
                    weights_regularizer=None,
                    biases_initializer=init_ops.zeros_initializer,
                    biases_regularizer=None,
                    reuse=None,
                    variables_collections=None,
                    outputs_collections=None,
                    trainable=True,
                    scope=None):
  """Adds a fully connected layer.
  `fully_connected` creates a variable called `weights`, representing a fully
  connected weight matrix, which is multiplied by the `inputs` to produce a
  `Tensor` of hidden units. If a `normalizer_fn` is provided (such as
  `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is
  None and a `biases_initializer` is provided then a `biases` variable would be
  created and added the hidden units. Finally, if `activation_fn` is not `None`,
  it is applied to the hidden units as well.
  Note: that if `inputs` have a rank greater than 2, then `inputs` is flattened
  prior to the initial matrix multiply by `weights`.
  Args:
    inputs: A tensor of with at least rank 2 and value for the last dimension,
      i.e. `[batch_size, depth]`, `[None, None, None, channels]`.
    num_outputs: Integer, the number of output units in the layer.
    activation_fn: activation function.
    normalizer_fn: normalization function to use instead of `biases`. If
      `normalize_fn` is provided then `biases_initializer` and
      `biases_regularizer` are ignored and `biases` are not created nor added.
    normalizer_params: normalization function parameters.
    weights_initializer: An initializer for the weights.
    weights_regularizer: Optional regularizer for the weights.
    biases_initializer: An initializer for the biases. If None skip biases.
    biases_regularizer: Optional regularizer for the biases.
    reuse: whether or not the layer and its variables should be reused. To be
      able to reuse the layer scope must be given.
    variables_collections: Optional list of collections for all the variables or
      a dictionary containing a different list of collections per variable.
    outputs_collections: collection to add the outputs.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for variable_op_scope.
  Returns:
     the tensor variable representing the result of the series of operations.
  Raises:
    ValueError: if x has rank less than 2 or if its last dimension is not set.
  """
  if not isinstance(num_outputs, int):
    raise ValueError('num_outputs should be integer, got %s.', num_outputs)
  with variable_scope.variable_op_scope([inputs],
                                        scope,
                                        'fully_connected',
                                        reuse=reuse) as sc:
    dtype = inputs.dtype.base_dtype
    num_input_units = utils.last_dimension(inputs.get_shape(), min_rank=2)

    static_shape = inputs.get_shape().as_list()
    static_shape[-1] = num_outputs

    out_shape = array_ops.unpack(array_ops.shape(inputs))
    out_shape[-1] = num_outputs

    weights_shape = [num_input_units, num_outputs]
    weights_collections = utils.get_variable_collections(
        variables_collections, 'weights')
    weights = variables.model_variable('weights',
                                       shape=weights_shape,
                                       dtype=dtype,
                                       initializer=weights_initializer,
                                       regularizer=weights_regularizer,
                                       collections=weights_collections,
                                       trainable=trainable)
    if len(static_shape) > 2:
      # Reshape inputs
      inputs = array_ops.reshape(inputs, [-1, num_input_units])
    outputs = standard_ops.matmul(inputs, weights)
    if normalizer_fn:
      normalizer_params = normalizer_params or {}
      outputs = normalizer_fn(outputs, **normalizer_params)
    else:
      if biases_initializer is not None:
        biases_collections = utils.get_variable_collections(
            variables_collections, 'biases')
        biases = variables.model_variable('biases',
                                          shape=[num_outputs,],
                                          dtype=dtype,
                                          initializer=biases_initializer,
                                          regularizer=biases_regularizer,
                                          collections=biases_collections,
                                          trainable=trainable)
        outputs = nn.bias_add(outputs, biases)
    if len(static_shape) > 2:
      # Reshape back outputs
      outputs = array_ops.reshape(outputs, array_ops.pack(out_shape))
      outputs.set_shape(static_shape)
    if activation_fn:
      outputs = activation_fn(outputs)
    return utils.collect_named_outputs(outputs_collections, sc.name, outputs)
예제 #50
0
def _embeddings_from_arguments(column,
                               args,
                               weight_collections,
                               trainable,
                               output_rank=2):
  """Returns embeddings for a column based on the computed arguments.

  Args:
   column: the column name.
   args: the _DeepEmbeddingLookupArguments for this column.
   weight_collections: collections to store weights in.
   trainable: whether these embeddings should be trainable.
   output_rank: the desired rank of the returned `Tensor`. Inner dimensions will
     be combined to produce the desired rank.

  Returns:
   the embeddings.

  Raises:
   ValueError: if not possible to create.
  """
  # pylint: disable=protected-access
  input_tensor = layers._inner_flatten(args.input_tensor, output_rank)
  weight_tensor = None
  if args.weight_tensor is not None:
    weight_tensor = layers._inner_flatten(args.weight_tensor, output_rank)
  # pylint: enable=protected-access

  if args.hashed:
    embeddings = contrib_variables.model_variable(
        name='weights',
        shape=[args.vocab_size],
        dtype=dtypes.float32,
        initializer=args.initializer,
        trainable=trainable,
        collections=weight_collections)

    return embedding_ops.hashed_embedding_lookup_sparse(
        embeddings, input_tensor, args.dimension,
        combiner=args.combiner, name='lookup')

  if args.shared_embedding_name is not None:
    shared_embedding_collection_name = (
        'SHARED_EMBEDDING_COLLECTION_' + args.shared_embedding_name.upper())
    graph = ops.get_default_graph()
    shared_embedding_collection = (
        graph.get_collection_ref(shared_embedding_collection_name))
    shape = [args.vocab_size, args.dimension]
    if shared_embedding_collection:
      if len(shared_embedding_collection) > 1:
        raise ValueError('Collection %s can only contain one '
                         '(partitioned) variable.'
                         % shared_embedding_collection_name)
      else:
        embeddings = shared_embedding_collection[0]
        if embeddings.get_shape() != shape:
          raise ValueError('The embedding variable with name {} already '
                           'exists, but its shape does not match required '
                           'embedding shape  here. Please make sure to use '
                           'different shared_embedding_name for different '
                           'shared embeddings.'.format(
                               args.shared_embedding_name))
    else:
      embeddings = contrib_variables.model_variable(
          name=args.shared_embedding_name,
          shape=shape,
          dtype=dtypes.float32,
          initializer=args.initializer,
          trainable=trainable,
          collections=weight_collections)
      graph.add_to_collection(shared_embedding_collection_name, embeddings)
  else:
    embeddings = contrib_variables.model_variable(
        name='weights',
        shape=[args.vocab_size, args.dimension],
        dtype=dtypes.float32,
        initializer=args.initializer,
        trainable=trainable,
        collections=weight_collections)

  if isinstance(embeddings, variables.Variable):
    embeddings = [embeddings]
  else:
    embeddings = embeddings._get_variable_list()  # pylint: disable=protected-access
  # pylint: disable=protected-access
  _maybe_restore_from_checkpoint(
      column._checkpoint_path(), embeddings)
  return embedding_ops.safe_embedding_lookup_sparse(
      embeddings,
      input_tensor,
      sparse_weights=weight_tensor,
      combiner=args.combiner,
      name=column.name + 'weights',
      max_norm=args.max_norm)
예제 #51
0
def convolution(inputs,
                num_outputs,
                kernel_size,
                stride=1,
                padding='SAME',
                data_format=None,
                rate=1,
                activation_fn=nn.relu,
                normalizer_fn=None,
                normalizer_params=None,
                weights_normalizer_fn=None,
                weights_normalizer_params=None,
                weights_initializer=initializers.xavier_initializer(),
                weights_regularizer=None,
                biases_initializer=init_ops.zeros_initializer(),
                biases_regularizer=None,
                reuse=None,
                variables_collections=None,
                outputs_collections=None,
                trainable=True,
                scope=None):
    # Be copied and modified from tensorflow-0.12.0.contrib.layer.convolution,
    # add weights_nomalizer_* options.
    """Adds an N-D convolution followed by an optional batch_norm layer.

    It is required that 1 <= N <= 3.

    `convolution` creates a variable called `weights`, representing the
    convolutional kernel, that is convolved (actually cross-correlated) with the
    `inputs` to produce a `Tensor` of activations. If a `normalizer_fn` is
    provided (such as `batch_norm`), it is then applied. Otherwise, if
    `normalizer_fn` is None and a `biases_initializer` is provided then a `biases`
    variable would be created and added the activations. Finally, if
    `activation_fn` is not `None`, it is applied to the activations as well.

    Performs a'trous convolution with input stride/dilation rate equal to `rate`
    if a value > 1 for any dimension of `rate` is specified.  In this case
    `stride` values != 1 are not supported.

    Args:
      inputs: a Tensor of rank N+2 of shape
        `[batch_size] + input_spatial_shape + [in_channels]` if data_format does
        not start with "NC" (default), or
        `[batch_size, in_channels] + input_spatial_shape` if data_format starts
        with "NC".
      num_outputs: integer, the number of output filters.
      kernel_size: a sequence of N positive integers specifying the spatial
        dimensions of of the filters.  Can be a single integer to specify the same
        value for all spatial dimensions.
      stride: a sequence of N positive integers specifying the stride at which to
        compute output.  Can be a single integer to specify the same value for all
        spatial dimensions.  Specifying any `stride` value != 1 is incompatible
        with specifying any `rate` value != 1.
      padding: one of `"VALID"` or `"SAME"`.
      data_format: A string or None.  Specifies whether the channel dimension of
        the `input` and output is the last dimension (default, or if `data_format`
        does not start with "NC"), or the second dimension (if `data_format`
        starts with "NC").  For N=1, the valid values are "NWC" (default) and
        "NCW".  For N=2, the valid values are "NHWC" (default) and "NCHW".  For
        N=3, currently the only valid value is "NDHWC".
      rate: a sequence of N positive integers specifying the dilation rate to use
        for a'trous convolution.  Can be a single integer to specify the same
        value for all spatial dimensions.  Specifying any `rate` value != 1 is
        incompatible with specifying any `stride` value != 1.
      activation_fn: activation function, set to None to skip it and maintain
        a linear activation.
      normalizer_fn: normalization function to use instead of `biases`. If
        `normalizer_fn` is provided then `biases_initializer` and
        `biases_regularizer` are ignored and `biases` are not created nor added.
        default set to None for no normalizer function
      normalizer_params: normalization function parameters.
      weights_normalizer_fn: weights normalization function.
      weights_normalizer_params: weights normalization function parameters.
      weights_initializer: An initializer for the weights.
      weights_regularizer: Optional regularizer for the weights.
      biases_initializer: An initializer for the biases. If None skip biases.
      biases_regularizer: Optional regularizer for the biases.
      reuse: whether or not the layer and its variables should be reused. To be
        able to reuse the layer scope must be given.
      variables_collections: optional list of collections for all the variables or
        a dictionary containing a different list of collection per variable.
      outputs_collections: collection to add the outputs.
      trainable: If `True` also add variables to the graph collection
        `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
      scope: Optional scope for `variable_scope`.

    Returns:
      a tensor representing the output of the operation.

    Raises:
      ValueError: if `data_format` is invalid.
      ValueError: both 'rate' and `stride` are not uniformly 1.
    """
    if data_format not in [None, 'NWC', 'NCW', 'NHWC', 'NCHW', 'NDHWC']:
        raise ValueError('Invalid data_format: %r' % (data_format, ))
    with variable_scope.variable_scope(scope, 'Conv', [inputs],
                                       reuse=reuse) as sc:
        inputs = ops.convert_to_tensor(inputs)
        dtype = inputs.dtype.base_dtype
        input_rank = inputs.get_shape().ndims
        if input_rank is None:
            raise ValueError('Rank of inputs must be known')
        if input_rank < 3 or input_rank > 5:
            raise ValueError(
                'Rank of inputs is %d, which is not >= 3 and <= 5' %
                input_rank)
        conv_dims = input_rank - 2
        kernel_size = utils.n_positive_integers(conv_dims, kernel_size)
        stride = utils.n_positive_integers(conv_dims, stride)
        rate = utils.n_positive_integers(conv_dims, rate)

        if data_format is None or data_format.endswith('C'):
            num_input_channels = inputs.get_shape()[input_rank - 1].value
        elif data_format.startswith('NC'):
            num_input_channels = inputs.get_shape()[1].value
        else:
            raise ValueError('Invalid data_format')

        if num_input_channels is None:
            raise ValueError('Number of in_channels must be known.')

        weights_shape = (list(kernel_size) + [num_input_channels, num_outputs])
        weights_collections = utils.get_variable_collections(
            variables_collections, 'weights')
        weights = variables.model_variable('weights',
                                           shape=weights_shape,
                                           dtype=dtype,
                                           initializer=weights_initializer,
                                           regularizer=weights_regularizer,
                                           collections=weights_collections,
                                           trainable=trainable)
        if weights_normalizer_fn is not None:
            weights_normalizer_params = weights_normalizer_params or {}
            weights = weights_normalizer_fn(weights,
                                            **weights_normalizer_params)
        outputs = nn.convolution(input=inputs,
                                 filter=weights,
                                 dilation_rate=rate,
                                 strides=stride,
                                 padding=padding,
                                 data_format=data_format)
        if normalizer_fn is not None:
            normalizer_params = normalizer_params or {}
            outputs = normalizer_fn(outputs, **normalizer_params)
        else:
            if biases_initializer is not None:
                biases_collections = utils.get_variable_collections(
                    variables_collections, 'biases')
                biases = variables.model_variable(
                    'biases',
                    shape=[num_outputs],
                    dtype=dtype,
                    initializer=biases_initializer,
                    regularizer=biases_regularizer,
                    collections=biases_collections,
                    trainable=trainable)
                outputs = nn.bias_add(outputs, biases, data_format=data_format)
        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return utils.collect_named_outputs(outputs_collections,
                                           sc.original_name_scope, outputs)
예제 #52
0
def weighted_sum_from_feature_columns(columns_to_tensors,
                                      feature_columns,
                                      num_outputs,
                                      weight_collections=None,
                                      trainable=True,
                                      scope=None):
  """A tf.contrib.layer style linear prediction builder based on FeatureColumns.

  Generally a single example in training data is described with feature columns.
  This function generates weighted sum for each num_outputs. Weighted sum refers
  to logits in classification problems. It refers to prediction itself for
  linear regression problems.

  An example usage of weighted_sum_from_feature_columns is as follows:

    # Building model for training
    columns_to_tensor = tf.parse_example(...)
    logits = weighted_sum_from_feature_columns(
        columns_to_tensors=columns_to_tensor,
        feature_columns=feature_columns,
        num_outputs=1)
    loss = tf.nn.sigmoid_cross_entropy_with_logits(logits, labels)

    where feature_columns can be defined as follows:

    occupation = sparse_column_with_hash_bucket(column_name="occupation",
                                              hash_bucket_size=1000)
    occupation_emb = embedding_column(sparse_id_column=occupation, dimension=16,
                                     combiner="sum")
    age = real_valued_column("age")
    age_buckets = bucketized_column(
        source_column=age,
        boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
    occupation_x_age = crossed_column(columns=[occupation, age_buckets],
                                      hash_bucket_size=10000)

    feature_columns=[occupation_emb, occupation_x_age]

  Args:
    columns_to_tensors: A mapping from feature column to tensors. 'string' key
      means a base feature (not-transformed). It can have FeatureColumn as a
      key too. That means that FeatureColumn is already transformed by input
      pipeline. For example, `inflow` may have handled transformations.
    feature_columns: A set containing all the feature columns. All items in the
      set should be instances of classes derived from FeatureColumn.
    num_outputs: An integer specifying number of outputs. Default value is 1.
    weight_collections: List of graph collections to which weights are added.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for variable_scope.

  Returns:
    A tuple of followings:
      * A Tensor which represents predictions of a linear model.
      * A dictionary which maps feature_column to corresponding Variable.
      * A Variable which is used for bias.

  Raises:
    ValueError: if FeatureColumn cannot be used for linear predictions.
  """
  check_feature_columns(feature_columns)
  with variable_scope.variable_scope(
      scope,
      default_name='weighted_sum_from_feature_columns',
      values=columns_to_tensors.values()):
    output_tensors = []
    column_to_variable = dict()
    transformer = _Transformer(columns_to_tensors)
    for column in sorted(set(feature_columns), key=lambda x: x.key):
      with variable_scope.variable_scope(
          None,
          default_name=column.name,
          values=columns_to_tensors.values()):
        try:
          transformed_tensor = transformer.transform(column)
          predictions, variable = column.to_weighted_sum(transformed_tensor,
                                                         num_outputs,
                                                         weight_collections,
                                                         trainable)
        except ValueError as e:
          raise ValueError('Error creating weighted sum for column: {}.\n'
                           '{}'.format(column.name, e))
      output_tensors.append(predictions)
      column_to_variable[column] = variable
      _log_variable(variable)

    predictions_no_bias = math_ops.add_n(output_tensors)
    bias = contrib_variables.model_variable(
        'bias_weight',
        shape=[num_outputs],
        initializer=init_ops.zeros_initializer,
        collections=fc._add_variable_collection(weight_collections))  # pylint: disable=protected-access
    _log_variable(bias)
    predictions = nn_ops.bias_add(predictions_no_bias, bias)

    return predictions, column_to_variable, bias
예제 #53
0
def conv2d_leaders(inputs,
                   num_outputs,
                   kernel_size,
                   rates=[1],
                   stride=1,
                   padding='SAME',
                   activation_fn=nn.relu,
                   normalizer_fn=None,
                   normalizer_params=None,
                   weights_initializer=initializers.xavier_initializer(),
                   weights_regularizer=None,
                   biases_initializer=init_ops.zeros_initializer,
                   biases_regularizer=None,
                   reuse=None,
                   variables_collections=None,
                   outputs_collections=None,
                   trainable=True,
                   scope=None,):
    """Adds a 2D convolution followed by an optional batch_norm layer.
    `convolution2d` creates a variable called `weights`, representing the
    convolutional kernel, that is convolved with the `inputs` to produce a
    `Tensor` of activations. If a `normalizer_fn` is provided (such as
    `batch_norm`), it is then applied. Otherwise, if `normalizer_fn` is
    None and a `biases_initializer` is provided then a `biases` variable would be
    created and added the activations. Finally, if `activation_fn` is not `None`,
    it is applied to the activations as well.
    Performs a'trous convolution with input stride equal to rate if rate is
    greater than one.
    Args:
        inputs: a 4-D tensor  `[batch_size, height, width, channels]`.
        num_outputs: integer, the number of output filters.
        kernel_size: a list of length 2 `[kernel_height, kernel_width]` of
          of the filters. Can be an int if both values are the same.
        stride: a list of length 2 `[stride_height, stride_width]`.
          Can be an int if both strides are the same. Note that presently
          both strides must have the same value.
        padding: one of `VALID` or `SAME`.
        rate: integer. If less than or equal to 1, a standard convolution is used.
          If greater than 1, than the a'trous convolution is applied and `stride`
          must be set to 1.
        activation_fn: activation function.
        normalizer_fn: normalization function to use instead of `biases`. If
          `normalize_fn` is provided then `biases_initializer` and
          `biases_regularizer` are ignored and `biases` are not created nor added.
        normalizer_params: normalization function parameters.
        weights_initializer: An initializer for the weights.
        weights_regularizer: Optional regularizer for the weights.
        biases_initializer: An initializer for the biases. If None skip biases.
        biases_regularizer: Optional regularizer for the biases.
        reuse: whether or not the layer and its variables should be reused. To be
          able to reuse the layer scope must be given.
        variables_collections: optional list of collections for all the variables or
          a dictionay containing a different list of collection per variable.
        outputs_collections: collection to add the outputs.
        trainable: If `True` also add variables to the graph collection
          `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
        scope: Optional scope for `variable_op_scope`.
    Returns:
        a tensor representing the output of the operation.
    Raises:
        ValueError: if both 'rate' and `stride` are larger than one.
    """
    with variable_scope.variable_scope(scope, 'Conv', [inputs],
                                       reuse=reuse) as sc:

        inputs = ops.convert_to_tensor(inputs)
        dtype = inputs.dtype.base_dtype
        # inshape = tf.shape(inputs)

        # Leading kernel size.
        kernel_h, kernel_w = utils.two_element_tuple(kernel_size)
        stride_h, stride_w = utils.two_element_tuple(stride)
        num_filters_in = utils.last_dimension(inputs.get_shape(), min_rank=4)

        # Weights variable.
        weights_shape = [kernel_h, kernel_w,
                         num_filters_in, num_outputs]
        weights_collections = utils.get_variable_collections(
            variables_collections, 'weights')
        weights = variables.model_variable('weights',
                                           shape=weights_shape,
                                           dtype=dtype,
                                           initializer=weights_initializer,
                                           regularizer=weights_regularizer,
                                           collections=weights_collections,
                                           trainable=trainable)
        # # Bias variable.
        # biases = None
        # if biases_initializer is not None:
        #     biases_collections = utils.get_variable_collections(
        #         variables_collections, 'biases')
        #     biases = variables.model_variable('biases',
        #                                       shape=[num_outputs, ],
        #                                       dtype=dtype,
        #                                       initializer=biases_initializer,
        #                                       regularizer=biases_regularizer,
        #                                       collections=biases_collections,
        #                                       trainable=trainable)

        # Convolution at different scales.
        outputs_pool = []
        for rate in rates:
            if rate > 1:
                conv = nn.atrous_conv2d(inputs, weights, rate, padding='SAME')
            else:
                conv = nn.conv2d(inputs, weights, [1, 1, 1, 1], padding='SAME')
            outputs_pool.append(conv)
        # 'Pooling' at different scales. A bit hacky. Use of concat + max_pool?
        outputs = None
        outputs_pool.reverse()
        for node in outputs_pool:
            if outputs is None:
                outputs = node
            else:
                outputs = tf.maximum(outputs, node)
        # # Add bias?
        # if biases is not None:
        #     outputs = tf.nn.bias_add(outputs, biases)

        # Fix padding and stride. A bit hacky too and not so efficient!
        if padding == 'VALID' or stride > 1:
            padfilter = np.zeros(shape=(kernel_h, kernel_w, num_filters_in, 1),
                                 dtype=dtype)
            x = (kernel_h - 1) / 2
            y = (kernel_w - 1) / 2
            padfilter[x, y, :, 0] = 1.
            outputs = tf.nn.depthwise_conv2d(outputs, padfilter,
                                             [1, stride_h, stride_w, 1],
                                             padding=padding)

        # Batch norm / bias and activation...
        if normalizer_fn is not None:
            normalizer_params = normalizer_params or {}
            outputs = normalizer_fn(outputs, **normalizer_params)
        else:
            if biases_initializer is not None:
                biases_collections = utils.get_variable_collections(
                    variables_collections, 'biases')
                biases = variables.model_variable('biases',
                                                  shape=[num_outputs, ],
                                                  dtype=dtype,
                                                  initializer=biases_initializer,
                                                  regularizer=biases_regularizer,
                                                  collections=biases_collections,
                                                  trainable=trainable)
                outputs = nn.bias_add(outputs, biases)
        if activation_fn is not None:
            outputs = activation_fn(outputs)
        return utils.collect_named_outputs(outputs_collections,
                                           sc.name, outputs)
예제 #54
0
파일: train.py 프로젝트: nii4u/imm
def main(args):
    config = load_configs(args.configs)
    train_config = config.training
    gpus = range(args.ngpus)

    # get the data and logging (checkpointing) directories:
    data_dir = train_config.datadir
    log_dir = train_config.logdir

    SUBSET = 'train'
    NUM_STEPS = 30000000
    # value at which the gradients are clipped
    GRAD_CLIP = train_config.gradclip

    if args.checkpoint is not None:
        checkpoint_fname = args.checkpoint
    else:
        print(
            colorize('No checkpoint file specified. Initializing randomly.',
                     'red',
                     bold=True))
        checkpoint_fname = osp.join(log_dir, 'INVALID')

    opts = {}
    opts['gpu_ids'] = gpus
    opts['log_dir'] = log_dir
    opts[
        'n_summary'] = 10  # number of iterations after which to run the summary-op
    if hasattr(train_config, 'n_test'):
        opts['n_test'] = train_config.n_test
    else:
        opts['n_test'] = 500
    opts[
        'n_checkpoint'] = train_config.ncheckpoint  # number of iteration after which to save the model

    batch_size = train_config.batch
    graph = tf.Graph()
    with graph.as_default():
        global_step = variables.model_variable(
            'global_step',
            shape=[],
            initializer=tf.constant_initializer(args.reset_global_step),
            trainable=False)

        # common model / optimizer parameters:
        lr = args.lr_multiple * tf.train.exponential_decay(
            train_config.lr.start_val,
            global_step,
            train_config.lr.step,
            train_config.lr.decay,
            staircase=True)
        if train_config.optim.lower() == 'adam':
            optim = tf.train.AdamOptimizer(lr, name='Adam')
        elif train_config.optim.lower() == 'adadelta':
            optim = tf.train.AdadeltaOptimizer(lr,
                                               rho=0.95,
                                               epsilon=1e-06,
                                               use_locking=False,
                                               name='Adadelta')
        elif train_config.optim.lower() == 'adagrad':
            optim = tf.train.AdagradOptimizer(lr,
                                              use_locking=False,
                                              name='AdaGrad')
        else:
            raise ValueError('Optimizer = %s not suppoerted' %
                             train_config.optim)

        factory = model_factory(IMMModel,
                                config=config.model,
                                global_step=global_step)

        opts['batch_size'] = batch_size
        tf.summary.scalar('lr', lr)  # add a summary
        print(colorize('log_dir: ' + log_dir, 'green', bold=True))
        print(colorize('BATCH-SIZE: %d' % batch_size, 'red', bold=True))

        # dynamic import of a dataset class
        dset_class = import_dataset(train_config.dset)

        # default datasets parameters
        train_dset_params = {}
        test_dset_params = {}

        train_subset = 'train'
        test_subset = 'test'
        if hasattr(train_config, 'train_dset_params'):
            train_dset_params.update(train_config.train_dset_params)
            if 'subset' in train_dset_params:
                train_subset = train_dset_params['subset']
                # delete because not positional kwarg
                del train_dset_params['subset']
        if hasattr(train_config, 'test_dset_params'):
            test_dset_params.update(train_config.test_dset_params)
            if 'subset' in test_dset_params:
                test_subset = test_dset_params['subset']
                # delete because not positional kwarg
                del test_dset_params['subset']

        train_dset = dset_class(train_config.datadir,
                                subset=train_subset,
                                **train_dset_params)
        train_dset = train_dset.get_dataset(batch_size,
                                            repeat=True,
                                            shuffle=False,
                                            num_preprocess_threads=12)

        if hasattr(train_config, 'max_test_samples'):
            raise ValueError('max_test_samples attribute deprecated')
        test_dset = dset_class(train_config.datadir,
                               subset=test_subset,
                               **test_dset_params)
        test_dset = test_dset.get_dataset(batch_size,
                                          repeat=False,
                                          shuffle=False,
                                          num_preprocess_threads=12)

        # set up inputs
        training_pl = tf.placeholder(tf.bool)
        handle_pl = tf.placeholder(tf.string, shape=[])
        base_iterator = tf.data.Iterator.from_string_handle(
            handle_pl, train_dset.output_types, train_dset.output_shapes)
        inputs = base_iterator.get_next()

        split_gpus = False
        if hasattr(config.model, 'split_gpus'):
            split_gpus = config.model.split_gpus

        # create the network distributed over multi-GPUs:
        loss, train_op, train_summary_op, test_summary_op, _ = tru.setup_training(
            opts,
            graph,
            optim,
            inputs,
            training_pl,
            factory,
            global_step,
            clip_value=GRAD_CLIP,
            split_gpus=split_gpus)

        # run the training loop:
        if args.restore_optim:
            restore_vars = 'all'
        else:
            restore_vars = 'model'

        tru.train_loop(opts,
                       graph,
                       loss,
                       train_dset,
                       training_pl,
                       handle_pl,
                       train_op,
                       train_summary_op,
                       test_summary_op,
                       NUM_STEPS,
                       global_step,
                       checkpoint_fname,
                       test_dataset=test_dset,
                       ignore_missing_vars=args.ignore_missing_vars,
                       reset_global_step=args.reset_global_step,
                       vars_to_restore=restore_vars,
                       exclude_vars=[],
                       allow_growth=train_config.allow_growth)
예제 #55
0
def weighted_sum_from_feature_columns(columns_to_tensors,
                                      feature_columns,
                                      num_outputs,
                                      weight_collections=None,
                                      trainable=True,
                                      scope=None):
    """A tf.contrib.layers style linear prediction builder based on FeatureColumn.

  Generally a single example in training data is described with feature columns.
  This function generates weighted sum for each num_outputs. Weighted sum refers
  to logits in classification problems. It refers to prediction itself for
  linear regression problems.

  Example:

    ```
    # Building model for training
    feature_columns = (
        real_valued_column("my_feature1"),
        ...
    )
    columns_to_tensor = tf.io.parse_example(...)
    logits = weighted_sum_from_feature_columns(
        columns_to_tensors=columns_to_tensor,
        feature_columns=feature_columns,
        num_outputs=1)
    loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels,
                                                   logits=logits)
    ```

  Args:
    columns_to_tensors: A mapping from feature column to tensors. 'string' key
      means a base feature (not-transformed). It can have FeatureColumn as a
      key too. That means that FeatureColumn is already transformed by input
      pipeline. For example, `inflow` may have handled transformations.
    feature_columns: A set containing all the feature columns. All items in the
      set should be instances of classes derived from FeatureColumn.
    num_outputs: An integer specifying number of outputs. Default value is 1.
    weight_collections: List of graph collections to which weights are added.
    trainable: If `True` also add variables to the graph collection
      `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
    scope: Optional scope for variable_scope.

  Returns:
    A tuple containing:

      * A Tensor which represents predictions of a linear model.
      * A dictionary which maps feature_column to corresponding Variable.
      * A Variable which is used for bias.

  Raises:
    ValueError: if FeatureColumn cannot be used for linear predictions.
  """
    columns_to_tensors = columns_to_tensors.copy()
    check_feature_columns(feature_columns)
    with variable_scope.variable_scope(
            scope,
            default_name='weighted_sum_from_feature_columns',
            values=columns_to_tensors.values()):
        output_tensors = []
        column_to_variable = {}
        transformer = _Transformer(columns_to_tensors)
        # pylint: disable=protected-access
        for column in sorted(set(feature_columns), key=lambda x: x.key):
            transformed_tensor = transformer.transform(column)
            try:
                embedding_lookup_arguments = column._wide_embedding_lookup_arguments(
                    transformed_tensor)
                variable, predictions = _create_embedding_lookup(
                    column, columns_to_tensors, embedding_lookup_arguments,
                    num_outputs, trainable, weight_collections)
            except NotImplementedError:
                with variable_scope.variable_scope(
                        None,
                        default_name=column.name,
                        values=columns_to_tensors.values()):
                    tensor = column._to_dense_tensor(transformed_tensor)
                    tensor = _maybe_reshape_input_tensor(tensor,
                                                         column.name,
                                                         output_rank=2)
                    variable = [
                        contrib_variables.model_variable(
                            name='weight',
                            shape=[tensor.get_shape()[1], num_outputs],
                            initializer=init_ops.zeros_initializer(),
                            trainable=trainable,
                            collections=weight_collections)
                    ]
                    predictions = math_ops.matmul(tensor,
                                                  variable[0],
                                                  name='matmul')
            except ValueError as ee:
                raise ValueError(
                    'Error creating weighted sum for column: {}.\n'
                    '{}'.format(column.name, ee))
            output_tensors.append(
                array_ops.reshape(predictions, shape=(-1, num_outputs)))
            column_to_variable[column] = variable
            _log_variable(variable)
            fc._maybe_restore_from_checkpoint(column._checkpoint_path(),
                                              variable)  # pylint: disable=protected-access
        # pylint: enable=protected-access
        predictions_no_bias = math_ops.add_n(output_tensors)
        bias = contrib_variables.model_variable(
            'bias_weight',
            shape=[num_outputs],
            initializer=init_ops.zeros_initializer(),
            trainable=trainable,
            collections=_add_variable_collection(weight_collections))
        _log_variable(bias)
        predictions = nn_ops.bias_add(predictions_no_bias, bias)

        return predictions, column_to_variable, bias
def dnn_sampled_softmax_classifier_model_fn(features, target_indices,
                                            mode, params):
  """model_fn that uses candidate sampling.

  Args:
    features: Single Tensor or dict of Tensor (depends on data passed to `fit`)
    target_indices: A single Tensor of shape [batch_size, n_labels] containing
      the target indices.
    mode: Represents if this training, evaluation or prediction. See `ModeKeys`.
    params: A dict of hyperparameters that are listed below.
      hidden_units- List of hidden units per layer. All layers are fully
        connected. Ex. `[64, 32]` means first layer has 64 nodes and second one
        has 32.
      feature_columns- An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      n_classes- number of target classes. It must be greater than 2.
      n_samples- number of sample target classes. Needs to be tuned - A good
        starting point could be 2% of n_classes.
      n_labels- number of labels in each example.
      top_k- The number of classes to predict.
      optimizer- An instance of `tf.Optimizer` used to train the model. If
        `None`, will use an Adagrad optimizer.
      dropout- When not `None`, the probability we will drop out a given
        coordinate.
      gradient_clip_norm- A float > 0. If provided, gradients are
        clipped to their global norm with this clipping ratio. See
        tf.clip_by_global_norm for more details.
      num_ps_replicas- The number of parameter server replicas.

  Returns:
    predictions: A single Tensor or a dict of Tensors.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """

  hidden_units = params["hidden_units"]
  feature_columns = params["feature_columns"]
  n_classes = params["n_classes"]
  n_samples = params["n_samples"]
  n_labels = params["n_labels"]
  top_k = params["top_k"]
  optimizer = params["optimizer"]
  dropout = params["dropout"]
  gradient_clip_norm = params["gradient_clip_norm"]
  num_ps_replicas = params["num_ps_replicas"]

  parent_scope = "dnn_ss"

  # Setup the input layer partitioner.
  input_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))

  # Create the input layer.
  with variable_scope.variable_scope(
      parent_scope + "/input_from_feature_columns",
      features.values(),
      partitioner=input_layer_partitioner) as scope:
    net = layers.input_from_feature_columns(
        features,
        feature_columns,
        weight_collections=[parent_scope],
        scope=scope)

  # Setup the hidden layer partitioner.
  hidden_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas))

  final_hidden_layer_dim = None
  # Create hidden layers using fully_connected.
  for layer_id, num_hidden_units in enumerate(hidden_units):
    with variable_scope.variable_scope(
        parent_scope + "/hiddenlayer_%d" % layer_id, [net],
        partitioner=hidden_layer_partitioner) as scope:
      net = layers.fully_connected(net,
                                   num_hidden_units,
                                   variables_collections=[parent_scope],
                                   scope=scope)
      final_hidden_layer_dim = num_hidden_units
      # Add dropout if it is enabled.
      if dropout is not None and mode == estimator.ModeKeys.TRAIN:
        net = layers.dropout(net, keep_prob=(1.0 - dropout))

  # Create the weights and biases for the logit layer.
  with variable_scope.variable_scope(
      parent_scope + "/logits", [net],
      partitioner=hidden_layer_partitioner) as scope:
    dtype = net.dtype.base_dtype
    weights_shape = [n_classes, final_hidden_layer_dim]
    weights = variables.model_variable(
        "weights",
        shape=weights_shape,
        dtype=dtype,
        initializer=initializers.xavier_initializer(),
        trainable=True,
        collections=[parent_scope])
    biases = variables.model_variable(
        "biases",
        shape=[n_classes,],
        dtype=dtype,
        initializer=init_ops.zeros_initializer,
        trainable=True,
        collections=[parent_scope])

  if mode == estimator.ModeKeys.TRAIN:
    # Call the candidate sampling APIs and calculate the loss.
    sampled_values = nn.learned_unigram_candidate_sampler(
        true_classes=math_ops.to_int64(target_indices),
        num_true=n_labels,
        num_sampled=n_samples,
        unique=True,
        range_max=n_classes)

    sampled_softmax_loss = nn.sampled_softmax_loss(
        weights=weights,
        biases=biases,
        inputs=net,
        labels=math_ops.to_int64(target_indices),
        num_sampled=n_samples,
        num_classes=n_classes,
        num_true=n_labels,
        sampled_values=sampled_values)

    loss = math_ops.reduce_mean(sampled_softmax_loss, name="loss")

    train_op = optimizers.optimize_loss(
        loss=loss, global_step=contrib_framework.get_global_step(),
        learning_rate=_DEFAULT_LEARNING_RATE,
        optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm,
        name=parent_scope)
    return None, loss, train_op

  elif mode == estimator.ModeKeys.EVAL:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    # Since the targets have multiple labels, setup the target probabilities
    # as 1.0/n_labels for each of the labels.
    target_one_hot = array_ops.one_hot(
        indices=target_indices,
        depth=n_classes,
        on_value=1.0 / n_labels)
    target_one_hot = math_ops.reduce_sum(
        input_tensor=target_one_hot,
        reduction_indices=[1])

    loss = math_ops.reduce_mean(
        nn.softmax_cross_entropy_with_logits(logits, target_one_hot))

    return predictions, loss, None

  elif mode == estimator.ModeKeys.INFER:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    return predictions, None, None