def __init__(self,
              relu=True,
              init_zero=False,
              center=True,
              scale=True,
              data_format='channels_last',
              **kwargs):
     super(BatchNormRelu, self).__init__(**kwargs)
     self.relu = relu
     if init_zero:
         gamma_initializer = tf.zeros_initializer()
     else:
         gamma_initializer = tf.ones_initializer()
     if data_format == 'channels_first':
         axis = 1
     else:
         axis = -1
     self.bn = tf.keras.layers.BatchNormalization(
         axis=axis,
         momentum=BATCH_NORM_DECAY,
         epsilon=BATCH_NORM_EPSILON,
         center=center,
         scale=scale,
         fused=False,
         gamma_initializer=gamma_initializer)
Esempio n. 2
0
    def test_shared_sequence_non_sequence_into_input_layer(self):
        non_seq = tf.feature_column.categorical_column_with_identity(
            'non_seq', num_buckets=10)
        seq = tf.feature_column.sequence_categorical_column_with_identity(
            'seq', num_buckets=10)
        shared_non_seq, shared_seq = tf.feature_column.shared_embeddings(
            [non_seq, seq],
            dimension=4,
            combiner='sum',
            initializer=tf.ones_initializer(),
            shared_embedding_collection_name='shared')

        seq = tf.SparseTensor(indices=[[0, 0], [0, 1], [1, 0]],
                              values=[0, 1, 2],
                              dense_shape=[2, 2])
        non_seq = tf.SparseTensor(indices=[[0, 0], [0, 1], [1, 0]],
                                  values=[0, 1, 2],
                                  dense_shape=[2, 2])
        features = {'seq': seq, 'non_seq': non_seq}

        # Tile the context features across the sequence features
        seq_input, seq_length = ksfc.SequenceFeatures([shared_seq])(features)
        non_seq_input = dense_features.DenseFeatures([shared_non_seq
                                                      ])(features)

        with self.cached_session() as sess:
            sess.run(tf.compat.v1.global_variables_initializer())
            output_seq, output_seq_length, output_non_seq = sess.run(
                [seq_input, seq_length, non_seq_input])
            self.assertAllEqual(
                output_seq,
                [[[1, 1, 1, 1], [1, 1, 1, 1]], [[1, 1, 1, 1], [0, 0, 0, 0]]])
            self.assertAllEqual(output_seq_length, [2, 1])
            self.assertAllEqual(output_non_seq, [[2, 2, 2, 2], [1, 1, 1, 1]])
    def __init__(self,
                 num_groups=None,
                 group_size=None,
                 eps=1e-5,
                 beta_init=tf.zeros_initializer(),
                 gamma_init=tf.ones_initializer(),
                 **kwargs):
        """Initializer.

    Args:
      num_groups: int, the number of channel-groups to normalize over.
      group_size: int, size of the groups to normalize over.
      eps: float, a small additive constant to avoid /sqrt(0).
      beta_init: initializer for bias, defaults to zeros.
      gamma_init: initializer for scale, defaults to ones.
      **kwargs: other tf.keras.layers.Layer arguments.
    """
        super(GroupNormalization, self).__init__(**kwargs)
        if num_groups is None and group_size is None:
            num_groups = 32

        self._num_groups = num_groups
        self._group_size = group_size
        self._eps = eps
        self._beta_init = beta_init
        self._gamma_init = gamma_init
Esempio n. 4
0
  def __init__(self, hdim, dtype=tf.float32, name="LayerNorm"):
    super(NormLayer, self).__init__(name=name)
    self._dtype = dtype

    with tf.compat.v1.variable_scope(name):
      self.beta = tf.compat.v1.get_variable(
          "beta", [hdim], dtype=dtype, initializer=tf.zeros_initializer())
      self.gamma = tf.compat.v1.get_variable(
          "gamma", [hdim], dtype=dtype, initializer=tf.ones_initializer())
Esempio n. 5
0
 def __init__(self,
              relu=True,
              init_zero=False,
              center=True,
              scale=True,
              data_format='channels_last',
              **kwargs):
     super(BatchNormRelu, self).__init__(**kwargs)
     self.relu = relu
     if init_zero:
         gamma_initializer = tf.zeros_initializer()
     else:
         gamma_initializer = tf.ones_initializer()
     if data_format == 'channels_first':
         axis = 1
     else:
         axis = -1
     if FLAGS.global_bn:
         # TODO(srbs): Set fused=True
         # Batch normalization layers with fused=True only support 4D input
         # tensors.
         self.bn = tf.keras.layers.experimental.SyncBatchNormalization(
             axis=axis,
             momentum=FLAGS.batch_norm_decay,
             epsilon=BATCH_NORM_EPSILON,
             center=center,
             scale=scale,
             gamma_initializer=gamma_initializer)
     else:
         # TODO(srbs): Set fused=True
         # Batch normalization layers with fused=True only support 4D input
         # tensors.
         self.bn = tf.keras.layers.BatchNormalization(
             axis=axis,
             momentum=FLAGS.batch_norm_decay,
             epsilon=BATCH_NORM_EPSILON,
             center=center,
             scale=scale,
             fused=False,
             gamma_initializer=gamma_initializer)
Esempio n. 6
0
    def call(self, input_tensor):
        inputs = tf.convert_to_tensor(input_tensor)
        inputs_shape = get_shape_list(inputs)
        inputs_rank = len(inputs_shape)
        dtype = inputs.dtype.base_dtype
        norm_axis = inputs_rank - 1
        params_shape = [inputs_shape[norm_axis]]

        # Allocate parameters for the beta and gamma of the normalization.
        if self.beta is None:
            self.beta = tf.compat.v1.get_variable(
                "beta",
                shape=params_shape,
                dtype=dtype,
                initializer=tf.zeros_initializer(),
                trainable=True)
            self._trainable_weights.append(self.beta)
        if self.gamma is None:
            self.gamma = tf.compat.v1.get_variable(
                "gamma",
                shape=params_shape,
                dtype=dtype,
                initializer=tf.ones_initializer(),
                trainable=True)
            self._trainable_weights.append(self.gamma)
        # Compute norm along last axis
        mean, variance = tf.nn.moments(inputs, [norm_axis], keepdims=True)
        # Compute layer normalization using the batch_normalization function.
        # Note that epsilon must be increased for float16 due to the limited
        # representable range.
        variance_epsilon = 1e-12 if dtype != tf.float16 else 1e-3
        outputs = tf.nn.batch_normalization(inputs,
                                            mean,
                                            variance,
                                            offset=self.beta,
                                            scale=self.gamma,
                                            variance_epsilon=variance_epsilon)
        outputs.set_shape(inputs_shape)
        return outputs
Esempio n. 7
0
    def test_generate_candidates(self,
                                 want_names,
                                 want_subnetwork_losses,
                                 want_mixture_weight_losses,
                                 want_complexities,
                                 learn_mixture_weights=False,
                                 initial_num_layers=0,
                                 previous_ensemble=None):
        feature_columns = [tf.feature_column.numeric_column("x")]
        generator = simple_dnn.Generator(
            feature_columns=feature_columns,
            optimizer=tf.compat.v1.train.GradientDescentOptimizer(.1),
            layer_size=3,
            initial_num_layers=initial_num_layers,
            learn_mixture_weights=learn_mixture_weights,
            seed=42)
        with context.graph_mode(), tf.Graph().as_default() as g:
            iteration_step = tf.compat.v1.train.create_global_step()
            features = {"x": [[1.], [2.]]}
            labels = tf.constant([[0.], [1.]])
            names = []
            subnetwork_losses = []
            mixture_weight_losses = []
            complexities = []
            for builder in generator.generate_candidates(
                    previous_ensemble,
                    # The following arguments are not used by
                    # simple_dnn.BuilderGenerator's generate_candidates.
                    iteration_number=0,
                    previous_ensemble_reports=[],
                    all_reports=[]):
                names.append(builder.name)

                # 1. Build subnetwork graph.
                subnetwork = builder.build_subnetwork(
                    features,
                    logits_dimension=1,
                    training=True,
                    iteration_step=iteration_step,
                    summary=tf.summary,
                    previous_ensemble=previous_ensemble)

                # 2. Build subnetwork train ops.
                subnetwork_loss = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(
                        logits=subnetwork.logits, labels=labels))
                subnetwork_train_op = builder.build_subnetwork_train_op(
                    subnetwork,
                    subnetwork_loss,
                    var_list=None,
                    labels=labels,
                    iteration_step=iteration_step,
                    summary=tf.summary,
                    previous_ensemble=None)

                # 3. Build mixture weight train ops.

                # Stop gradients since mixture weights should have not propagate
                # beyond top layer.
                subnetwork_logits = tf.stop_gradient(subnetwork.logits)

                # Mixture weight will initialize to a one-valued scalar.
                mixture_weight_logits = tf.compat.v1.layers.dense(
                    subnetwork_logits,
                    units=1,
                    use_bias=False,
                    kernel_initializer=tf.ones_initializer())
                mixture_weight_loss = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(
                        logits=mixture_weight_logits, labels=labels))
                mixture_weight_train_op = builder.build_mixture_weights_train_op(
                    mixture_weight_loss,
                    var_list=None,
                    labels=labels,
                    logits=mixture_weight_logits,
                    iteration_step=iteration_step,
                    summary=tf.summary)

                with self.test_session(graph=g) as sess:
                    sess.run(tf.compat.v1.global_variables_initializer())
                    sess.run(subnetwork_train_op)
                    sess.run(mixture_weight_train_op)
                    subnetwork_losses.append(sess.run(subnetwork_loss))
                    mixture_weight_losses.append(sess.run(mixture_weight_loss))
                    complexities.append(sess.run(subnetwork.complexity))

        self.assertEqual(want_names, names)
        self.assertAllClose(want_subnetwork_losses,
                            subnetwork_losses,
                            atol=1e-3)
        self.assertAllClose(want_mixture_weight_losses,
                            mixture_weight_losses,
                            atol=1e-3)
        self.assertAllClose(want_complexities, complexities, atol=1e-3)