Ejemplo n.º 1
0
  def dnn_logit_fn(features, mode):
    """Deep Neural Network logit_fn.

    Args:
      features: This is the first item returned from the `input_fn`
                passed to `train`, `evaluate`, and `predict`. This should be a
                single `Tensor` or `dict` of same.
      mode: Optional. Specifies if this training, evaluation or prediction. See
            `ModeKeys`.

    Returns:
      A `Tensor` representing the logits, or a list of `Tensor`'s representing
      multiple logits in the MultiHead case.
    """
    with variable_scope.variable_scope(
        'input_from_feature_columns',
        values=tuple(six.itervalues(features)),
        partitioner=input_layer_partitioner):
      net = feature_column_lib.input_layer(
          features=features, feature_columns=feature_columns)

    for layer_id, num_hidden_units in enumerate(hidden_units):
      with variable_scope.variable_scope(
          'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope:
        net = core_layers.dense(
            net,
            units=num_hidden_units,
            activation=activation_fn,
            kernel_initializer=init_ops.glorot_uniform_initializer(),
            name=hidden_layer_scope)
        if dropout is not None and mode == model_fn.ModeKeys.TRAIN:
          net = core_layers.dropout(net, rate=dropout, training=True)
      _add_hidden_layer_summary(net, hidden_layer_scope.name)

    if isinstance(units, int):
      with variable_scope.variable_scope(
          'logits', values=(net,)) as logits_scope:
        logits = core_layers.dense(
            net,
            units=units,
            activation=None,
            kernel_initializer=init_ops.glorot_uniform_initializer(),
            name=logits_scope)
      _add_hidden_layer_summary(logits, logits_scope.name)
    else:
      logits = []
      for head_index, logits_dimension in enumerate(units):
        with variable_scope.variable_scope(
            'logits_head_{}'.format(head_index), values=(net,)) as logits_scope:
          these_logits = core_layers.dense(
              net,
              units=logits_dimension,
              activation=None,
              kernel_initializer=init_ops.glorot_uniform_initializer(),
              name=logits_scope)
        _add_hidden_layer_summary(these_logits, logits_scope.name)
        logits.append(these_logits)
    return logits
Ejemplo n.º 2
0
  def dnn_logit_fn(features, mode):
    """Deep Neural Network logit_fn.

    Args:
      features: This is the first item returned from the `input_fn`
                passed to `train`, `evaluate`, and `predict`. This should be a
                single `Tensor` or `dict` of same.
      mode: Optional. Specifies if this training, evaluation or prediction. See
            `ModeKeys`.

    Returns:
      A `Tensor` representing the logits, or a list of `Tensor`'s representing
      multiple logits in the MultiHead case.
    """
    is_training = mode == model_fn.ModeKeys.TRAIN
    with variable_scope.variable_scope(
        'input_from_feature_columns',
        values=tuple(six.itervalues(features)),
        partitioner=input_layer_partitioner):
      net = feature_column_lib.input_layer(
          features=features, feature_columns=feature_columns)
    for layer_id, num_hidden_units in enumerate(hidden_units):
      with variable_scope.variable_scope(
          'hiddenlayer_%d' % layer_id, values=(net,)) as hidden_layer_scope:
        net = core_layers.dense(
            net,
            units=num_hidden_units,
            activation=activation_fn,
            kernel_initializer=init_ops.glorot_uniform_initializer(),
            name=hidden_layer_scope)
        if dropout is not None and is_training:
          net = core_layers.dropout(net, rate=dropout, training=True)
        if batch_norm:
          # TODO(hjm): In future, if this becomes popular, we can enable
          # customization of the batch normalization params by accepting a
          # list of `BatchNormalization` instances as `batch_norm`.
          net = normalization.batch_normalization(
              net,
              # The default momentum 0.99 actually crashes on certain
              # problem, so here we use 0.999, which is the default of
              # tf.contrib.layers.batch_norm.
              momentum=0.999,
              training=is_training,
              name='batchnorm_%d' % layer_id)
      _add_hidden_layer_summary(net, hidden_layer_scope.name)

    with variable_scope.variable_scope('logits', values=(net,)) as logits_scope:
      logits = core_layers.dense(
          net,
          units=units,
          activation=None,
          kernel_initializer=init_ops.glorot_uniform_initializer(),
          name=logits_scope)
    _add_hidden_layer_summary(logits, logits_scope.name)

    return logits
Ejemplo n.º 3
0
 def testAttentionCellWrapperCorrectResult(self):
   num_units = 4
   attn_length = 6
   batch_size = 2
   expected_output = np.array(
       [[1.068372, 0.45496, -0.678277, 0.340538],
        [1.018088, 0.378983, -0.572179, 0.268591]],
       dtype=np.float32)
   expected_state = np.array(
       [[0.74946702, 0.34681597, 0.26474735, 1.06485605, 0.38465962,
         0.11420801, 0.10272158, 0.30925757, 0.63899988, 0.7181077,
         0.47534478, 0.33715725, 0.58086717, 0.49446869, 0.7641536,
         0.12814975, 0.92231739, 0.89857256, 0.21889746, 0.38442063,
         0.53481543, 0.8876909, 0.45823169, 0.5905602, 0.78038228,
         0.56501579, 0.03971386, 0.09870267, 0.8074435, 0.66821432,
         0.99211812, 0.12295902, 1.14606023, 0.34370938, -0.79251152,
         0.51843399],
        [0.5179342, 0.48682183, -0.25426468, 0.96810579, 0.28809637,
         0.13607743, -0.11446252, 0.26792109, 0.78047138, 0.63460857,
         0.49122369, 0.52007174, 0.73000264, 0.66986895, 0.73576689,
         0.86301267, 0.87887371, 0.35185754, 0.93417215, 0.64732957,
         0.63173044, 0.66627824, 0.53644657, 0.20477486, 0.98458421,
         0.38277245, 0.03746676, 0.92510188, 0.57714164, 0.84932971,
         0.36127412, 0.12125921, 1.1362772, 0.34361625, -0.78150457,
         0.70582712]],
       dtype=np.float32)
   seed = 12345
   random_seed.set_random_seed(seed)
   for state_is_tuple in [False, True]:
     with session.Session() as sess:
       with variable_scope.variable_scope(
           "state_is_tuple", reuse=state_is_tuple,
           initializer=init_ops.glorot_uniform_initializer()):
         lstm_cell = core_rnn_cell_impl.BasicLSTMCell(
             num_units, state_is_tuple=state_is_tuple)
         cell = rnn_cell.AttentionCellWrapper(
             lstm_cell, attn_length, state_is_tuple=state_is_tuple)
         zeros1 = random_ops.random_uniform(
             (batch_size, num_units), 0.0, 1.0, seed=seed + 1)
         zeros2 = random_ops.random_uniform(
             (batch_size, num_units), 0.0, 1.0, seed=seed + 2)
         zeros3 = random_ops.random_uniform(
             (batch_size, num_units), 0.0, 1.0, seed=seed + 3)
         attn_state_zeros = random_ops.random_uniform(
             (batch_size, attn_length * num_units), 0.0, 1.0, seed=seed + 4)
         zero_state = ((zeros1, zeros2), zeros3, attn_state_zeros)
         if not state_is_tuple:
           zero_state = array_ops.concat([
               zero_state[0][0], zero_state[0][1], zero_state[1], zero_state[2]
           ], 1)
         inputs = random_ops.random_uniform(
             (batch_size, num_units), 0.0, 1.0, seed=seed + 5)
         output, state = cell(inputs, zero_state)
         if state_is_tuple:
           state = array_ops.concat(
               [state[0][0], state[0][1], state[1], state[2]], 1)
         sess.run(variables.global_variables_initializer())
         self.assertAllClose(sess.run(output), expected_output)
         self.assertAllClose(sess.run(state), expected_state)
Ejemplo n.º 4
0
 def test_glorot_uniform_initializer(self):
   tensor_shape = (5, 6, 4, 2)
   with self.cached_session():
     fan_in, fan_out = init_ops._compute_fans(tensor_shape)
     std = np.sqrt(2. / (fan_in + fan_out))
     self._runner(
         init_ops.glorot_uniform_initializer(seed=123),
         tensor_shape,
         target_mean=0.,
         target_std=std)
    def __init__(self,
                 reference_dims,
                 hypothesis_dims,
                 hidden_dims,
                 float_dtype,
                 dropout_attn,
                 training,
                 name,
                 attn_type='multiplicative'):

        # Declare attributes
        self.reference_dims = reference_dims
        self.hypothesis_dims = hypothesis_dims
        self.hidden_dims = hidden_dims
        self.float_dtype = float_dtype
        self.dropout_attn = dropout_attn
        self.attn_type = attn_type
        self.training = training
        self.name = name

        assert attn_type in ['additive', 'multiplicative'], 'Attention type {:s} is not supported.'.format(attn_type)

        # Instantiate parameters
        with tf.variable_scope(self.name):
            self.queries_projection = None
            self.attn_weight = None
            if attn_type == 'additive':
                self.queries_projection = FeedForwardLayer(self.hypothesis_dims,
                                                           self.hidden_dims,
                                                           float_dtype,
                                                           dropout_rate=0.,
                                                           activation=None,
                                                           use_bias=False,
                                                           use_layer_norm=False,
                                                           training=self.training,
                                                           name='queries_projection')

                self.attn_weight = tf.get_variable(name='attention_weight',
                                                   shape=self.hidden_dims,
                                                   dtype=float_dtype,
                                                   initializer=glorot_uniform_initializer(),
                                                   trainable=True)

            self.keys_projection = FeedForwardLayer(self.reference_dims,
                                                    self.hidden_dims,
                                                    float_dtype,
                                                    dropout_rate=0.,
                                                    activation=None,
                                                    use_bias=False,
                                                    use_layer_norm=False,
                                                    training=self.training,
                                                    name='keys_projection')
Ejemplo n.º 6
0
 def build(self, input_shape):
   v_shape = tensor_shape.TensorShape(input_shape[1])
   dim = v_shape[-1]
   if isinstance(dim, tensor_shape.Dimension):
     dim = dim.value
   if self.use_scale:
     self.scale = self.add_weight(
         name='scale',
         shape=[dim],
         initializer=init_ops.glorot_uniform_initializer(),
         dtype=self.dtype,
         trainable=True)
   else:
     self.scale = None
   super(AdditiveAttention, self).build(input_shape)
Ejemplo n.º 7
0
    def __init__(self, vocabulary_size, embedding_size, hidden_size, float_dtype, name):
        # Set arguments
        self.vocabulary_size = vocabulary_size
        self.hidden_size = hidden_size
        self.float_dtype = float_dtype
        self.name = name

        # Create embedding matrix and its transposes
        with tf.variable_scope(self.name):
            self.embedding_table = tf.get_variable(name='embedding_table',
                                                shape=[vocabulary_size, embedding_size],
                                                dtype=float_dtype,
                                                initializer=glorot_uniform_initializer(),
                                                trainable=True)
            self.projection_matrix = tf.transpose(self.embedding_table, name='vocab_projection_matrix')
Ejemplo n.º 8
0
  def rnn_logit_fn(features, mode):
    """Recurrent Neural Network logit_fn.

    Args:
      features: This is the first item returned from the `input_fn`
                passed to `train`, `evaluate`, and `predict`. This should be a
                single `Tensor` or `dict` of same.
      mode: Optional. Specifies if this training, evaluation or prediction. See
            `ModeKeys`.

    Returns:
      A `Tensor` representing the logits.
    """
    with variable_scope.variable_scope(
        'sequence_input_layer',
        values=tuple(six.itervalues(features)),
        partitioner=input_layer_partitioner):
      sequence_input, sequence_length = seq_fc.sequence_input_layer(
          features=features, feature_columns=sequence_feature_columns)
      summary.histogram('sequence_length', sequence_length)

      if context_feature_columns:
        context_input = feature_column_lib.input_layer(
            features=features,
            feature_columns=context_feature_columns)
        sequence_input = seq_fc.concatenate_context_input(
            context_input, sequence_input)

    cell = rnn_cell_fn(mode)
    # Ignore output state.
    rnn_outputs, _ = rnn.dynamic_rnn(
        cell=cell,
        inputs=sequence_input,
        sequence_length=sequence_length,
        dtype=dtypes.float32,
        time_major=False)
    last_activations = _select_last_activations(rnn_outputs, sequence_length)

    with variable_scope.variable_scope('logits', values=(rnn_outputs,)):
      logits = core_layers.dense(
          last_activations,
          units=output_units,
          activation=None,
          kernel_initializer=init_ops.glorot_uniform_initializer())
    return logits
Ejemplo n.º 9
0
Archivo: rnn.py Proyecto: imito/odin
  def __init__(self, num_units,
          W_init=init_ops.glorot_uniform_initializer(seed=randint()),
          b_init=init_ops.constant_initializer(0.),
          rnn_mode='lstm', num_layers=1,
          skip_input=False, is_bidirectional=False,
          return_states=False, dropout=0., **kwargs):
    super(CudnnRNN, self).__init__(**kwargs)
    # ====== defaults recurrent control ====== #
    self.num_units = int(num_units)
    self.num_layers = int(num_layers)
    self.rnn_mode = str(rnn_mode)
    self.skip_input = bool(skip_input)
    self.is_bidirectional = bool(is_bidirectional)
    self.return_states = bool(return_states)
    self.dropout = dropout

    self.W_init = W_init
    self.b_init = b_init
    if skip_input:
      wprint("`skip_input` is not supported in Tensorflow.")
Ejemplo n.º 10
0
    def __init__(self,
                 in_size,
                 out_size,
                 float_dtype,
                 dropout_rate,
                 activation,
                 use_bias,
                 use_layer_norm,
                 training,
                 name):
        # Set attributes
        self.in_size = in_size
        self.out_size = out_size
        self.dropout_rate = dropout_rate
        self.activation = activation
        self.use_bias = use_bias
        self.training = training
        self.name = name

        with tf.variable_scope(self.name):
            # Set up layer normalization
            if use_layer_norm:
                self.layer_norm_layer = LayerNormLayer(out_size)
            else:
                self.layer_norm_layer = None

            # Define parameters
            weights_shape = [in_size, out_size] if out_size is not None else [in_size]
            self.weights = tf.get_variable(name='dense_layer_weights',
                                           shape=weights_shape,
                                           dtype=float_dtype,
                                           initializer=glorot_uniform_initializer(),
                                           trainable=True)
            if use_bias:
                biases_shape = [out_size] if out_size is not None else [in_size]
                self.biases = tf.get_variable(name='dense_layer_biases',
                                              shape=biases_shape,
                                              dtype=float_dtype,
                                              initializer=tf.zeros_initializer(),
                                              trainable=True)
  def __init__(self, hparams, item, cluster, controller_id=0):
    """HierarchicalController class initializer.

    Args:
      hparams: All hyper-parameters.
      item: The metagraph to place.
      cluster: The cluster of hardware devices to optimize for.
      controller_id: the id of the controller in a multi-controller setup.
    """
    super(HierarchicalController, self).__init__(item, cluster)
    self.ctrl_id = controller_id
    self.hparams = hparams

    if self.hparams.num_groups is None:
      self.num_groups = min(256, 20 * self.num_devices)
    else:
      self.num_groups = self.hparams.num_groups

    # creates self.op_embeddings and self.type_dict
    self.create_op_embeddings(verbose=False)
    # TODO(azalia) clean up embedding/group_embedding_size names
    self.group_emb_size = (
        2 * self.num_groups + len(self.type_dict) +
        self.hparams.max_num_outputs * self.hparams.max_output_size)
    self.embedding_size = self.group_emb_size
    self.initializer = init_ops.glorot_uniform_initializer(
        seed=self.hparams.seed)

    with variable_scope.variable_scope(
        self.hparams.name,
        initializer=self.initializer,
        reuse=variable_scope.AUTO_REUSE):
      # define parameters of feedforward
      variable_scope.get_variable("w_grouping_ff", [
          1 + self.hparams.max_num_outputs * self.hparams.max_output_size +
          self.hparams.adj_embed_dim, self.hparams.grouping_hidden_size
      ])
      variable_scope.get_variable(
          "w_grouping_softmax",
          [self.hparams.grouping_hidden_size, self.num_groups])
      if self.hparams.bi_lstm:
        variable_scope.get_variable("encoder_lstm_forward", [
            self.embedding_size + self.hparams.hidden_size / 2,
            2 * self.hparams.hidden_size
        ])
        variable_scope.get_variable("encoder_lstm_backward", [
            self.embedding_size + self.hparams.hidden_size / 2,
            2 * self.hparams.hidden_size
        ])
        variable_scope.get_variable(
            "device_embeddings", [self.num_devices, self.hparams.hidden_size])
        variable_scope.get_variable(
            "decoder_lstm",
            [2 * self.hparams.hidden_size, 4 * self.hparams.hidden_size])
        variable_scope.get_variable(
            "device_softmax", [2 * self.hparams.hidden_size, self.num_devices])
        variable_scope.get_variable("device_go_embedding",
                                    [1, self.hparams.hidden_size])
        variable_scope.get_variable(
            "encoder_forget_bias",
            shape=1,
            dtype=dtypes.float32,
            initializer=init_ops.constant_initializer(
                self.hparams.forget_bias_init))
        variable_scope.get_variable(
            "decoder_forget_bias",
            shape=1,
            dtype=dtypes.float32,
            initializer=init_ops.constant_initializer(
                self.hparams.forget_bias_init))
        variable_scope.get_variable(
            "attn_w_1", [self.hparams.hidden_size, self.hparams.hidden_size])
        variable_scope.get_variable(
            "attn_w_2", [self.hparams.hidden_size, self.hparams.hidden_size])
        variable_scope.get_variable("attn_v", [self.hparams.hidden_size, 1])

      else:
        variable_scope.get_variable("encoder_lstm", [
            self.embedding_size + self.hparams.hidden_size,
            4 * self.hparams.hidden_size
        ])
        variable_scope.get_variable(
            "device_embeddings", [self.num_devices, self.hparams.hidden_size])
        variable_scope.get_variable(
            "decoder_lstm",
            [2 * self.hparams.hidden_size, 4 * self.hparams.hidden_size])
        variable_scope.get_variable(
            "device_softmax", [2 * self.hparams.hidden_size, self.num_devices])
        variable_scope.get_variable("device_go_embedding",
                                    [1, self.hparams.hidden_size])
        variable_scope.get_variable(
            "encoder_forget_bias",
            shape=1,
            dtype=dtypes.float32,
            initializer=init_ops.constant_initializer(
                self.hparams.forget_bias_init))
        variable_scope.get_variable(
            "decoder_forget_bias",
            shape=1,
            dtype=dtypes.float32,
            initializer=init_ops.constant_initializer(
                self.hparams.forget_bias_init))
        variable_scope.get_variable(
            "attn_w_1", [self.hparams.hidden_size, self.hparams.hidden_size])
        variable_scope.get_variable(
            "attn_w_2", [self.hparams.hidden_size, self.hparams.hidden_size])
        variable_scope.get_variable("attn_v", [self.hparams.hidden_size, 1])
    seq2seq_input_layer = array_ops.placeholder_with_default(
        array_ops.zeros([1, self.num_groups, self.group_emb_size],
                        dtypes.float32),
        shape=(1, self.num_groups, self.group_emb_size))
    self.seq2seq_input_layer = seq2seq_input_layer
Ejemplo n.º 12
0
 def _initialize(self, X):
     # ====== check inputs dimensions ====== #
     if not hasattr(X, 'shape'):
         raise ValueError("`X` must have `shape` attribute.")
     feat_dim = np.prod(X.shape[1:])
     if self._feat_dim is None:
         self._feat_dim = feat_dim
     # validate input dimension
     if feat_dim != self._feat_dim:
         raise RuntimeError("Feature dimension mismatch %d and %d" %
                            (feat_dim, self.feat_dim))
     # check if tensorflow op initalized
     if hasattr(self, '_f_train'):
         return
     # ====== binary or multi-classes ====== #
     if self.nb_classes == 2:
         out_shape = (None, )
         fn_activation = tf.nn.sigmoid
         fn_loss = tf.losses.sigmoid_cross_entropy
         fn_acc = K.metrics.binary_accuracy
     else:
         out_shape = (None, self.nb_classes)
         fn_activation = tf.nn.softmax
         fn_loss = tf.losses.softmax_cross_entropy
         fn_acc = K.metrics.categorical_accuracy
     # ====== create model ====== #
     with tf.name_scope(self.name, 'logistic_regression'):
         # inputs
         self._X = K.placeholder(shape=(None, self.feat_dim),
                                 dtype=self.dtype,
                                 name='%s_input' % self.name)
         self._y = K.placeholder(shape=out_shape,
                                 dtype=self.dtype,
                                 name='%s_output' % self.name)
         # check the bias
         if is_number(self.fit_intercept):
             b_init = float(self.fit_intercept)
         elif self.fit_intercept is False or \
         self.fit_intercept is None:
             b_init = None
         else:
             b_init = self.fit_intercept
         # create the model and initialize
         with K.variable_dtype(dtype=self.dtype):
             self._model = N.Dense(
                 num_units=self.nb_classes,
                 W_init=init_ops.glorot_uniform_initializer(
                     seed=self._rand_state.randint()),
                 b_init=b_init,
                 activation=K.linear)
             y_logits = self._model(self._X)
         y_prob = fn_activation(y_logits)
         # applying class weights
         class_weights = tf.constant(value=self._class_weight,
                                     dtype=self.dtype,
                                     name="class_weights")
         weights = tf.gather(
             class_weights,
             tf.cast(self._y, 'int32')
             if self.nb_classes == 2 else tf.argmax(self._y, axis=-1))
         # optimizer
         params = [
             v for v in self._model.variables
             if has_roles(v, Weight) or has_roles(v, Bias)
         ]
         losses = fn_loss(self._y, y_logits, weights=weights)
         l1_norm = tf.norm(self._model.get('W'),
                           ord=1) if self.l1 > 0. else 0
         l2_norm = tf.norm(self._model.get('W'),
                           ord=2) if self.l2 > 0. else 0
         losses = losses + self.l1 * l1_norm + self.l2 * l2_norm
         acc = fn_acc(self._y, y_prob)
         updates = self._optimizer.get_updates(losses, params)
         # create function
         if self.confusion_matrix:
             cm = K.metrics.confusion_matrix(y_true=self._y,
                                             y_pred=y_prob,
                                             labels=self.nb_classes)
         metrics = [losses, acc, cm
                    ] if self.confusion_matrix else [losses, acc]
         self._f_train = K.function(inputs=(self._X, self._y),
                                    outputs=metrics,
                                    updates=updates,
                                    training=True)
         self._f_score = K.function(inputs=(self._X, self._y),
                                    outputs=metrics,
                                    training=False)
         self._f_pred_prob = K.function(inputs=self._X,
                                        outputs=y_prob,
                                        training=False)
         self._f_pred_logit = K.function(inputs=self._X,
                                         outputs=y_logits,
                                         training=False)
     return self
Ejemplo n.º 13
0
    def __init__(self, hparams, item, cluster, controller_id=0):
        """HierarchicalController class initializer.

    Args:
      hparams: All hyper-parameters.
      item: The metagraph to place.
      cluster: The cluster of hardware devices to optimize for.
      controller_id: the id of the controller in a multi-controller setup.
    """
        super(HierarchicalController, self).__init__(item, cluster)
        self.ctrl_id = controller_id
        self.hparams = hparams

        if self.hparams.num_groups is None:
            self.num_groups = min(256, 20 * self.num_devices)
        else:
            self.num_groups = self.hparams.num_groups

        # creates self.op_embeddings and self.type_dict
        self.create_op_embeddings(verbose=False)
        # TODO(azalia) clean up embedding/group_embedding_size names
        self.group_emb_size = (
            2 * self.num_groups + len(self.type_dict) +
            self.hparams.max_num_outputs * self.hparams.max_output_size)
        self.embedding_size = self.group_emb_size
        self.initializer = init_ops.glorot_uniform_initializer(
            seed=self.hparams.seed)

        with variable_scope.variable_scope(self.hparams.name,
                                           initializer=self.initializer,
                                           reuse=variable_scope.AUTO_REUSE):
            # define parameters of feedforward
            variable_scope.get_variable("w_grouping_ff", [
                1 +
                self.hparams.max_num_outputs * self.hparams.max_output_size +
                self.hparams.adj_embed_dim, self.hparams.grouping_hidden_size
            ])
            variable_scope.get_variable(
                "w_grouping_softmax",
                [self.hparams.grouping_hidden_size, self.num_groups])
            if self.hparams.bi_lstm:
                variable_scope.get_variable("encoder_lstm_forward", [
                    self.embedding_size + self.hparams.hidden_size / 2,
                    2 * self.hparams.hidden_size
                ])
                variable_scope.get_variable("encoder_lstm_backward", [
                    self.embedding_size + self.hparams.hidden_size / 2,
                    2 * self.hparams.hidden_size
                ])
                variable_scope.get_variable(
                    "device_embeddings",
                    [self.num_devices, self.hparams.hidden_size])
                variable_scope.get_variable("decoder_lstm", [
                    2 * self.hparams.hidden_size, 4 * self.hparams.hidden_size
                ])
                variable_scope.get_variable(
                    "device_softmax",
                    [2 * self.hparams.hidden_size, self.num_devices])
                variable_scope.get_variable("device_go_embedding",
                                            [1, self.hparams.hidden_size])
                variable_scope.get_variable(
                    "encoder_forget_bias",
                    shape=1,
                    dtype=dtypes.float32,
                    initializer=init_ops.constant_initializer(
                        self.hparams.forget_bias_init))
                variable_scope.get_variable(
                    "decoder_forget_bias",
                    shape=1,
                    dtype=dtypes.float32,
                    initializer=init_ops.constant_initializer(
                        self.hparams.forget_bias_init))
                variable_scope.get_variable(
                    "attn_w_1",
                    [self.hparams.hidden_size, self.hparams.hidden_size])
                variable_scope.get_variable(
                    "attn_w_2",
                    [self.hparams.hidden_size, self.hparams.hidden_size])
                variable_scope.get_variable("attn_v",
                                            [self.hparams.hidden_size, 1])

            else:
                variable_scope.get_variable("encoder_lstm", [
                    self.embedding_size + self.hparams.hidden_size,
                    4 * self.hparams.hidden_size
                ])
                variable_scope.get_variable(
                    "device_embeddings",
                    [self.num_devices, self.hparams.hidden_size])
                variable_scope.get_variable("decoder_lstm", [
                    2 * self.hparams.hidden_size, 4 * self.hparams.hidden_size
                ])
                variable_scope.get_variable(
                    "device_softmax",
                    [2 * self.hparams.hidden_size, self.num_devices])
                variable_scope.get_variable("device_go_embedding",
                                            [1, self.hparams.hidden_size])
                variable_scope.get_variable(
                    "encoder_forget_bias",
                    shape=1,
                    dtype=dtypes.float32,
                    initializer=init_ops.constant_initializer(
                        self.hparams.forget_bias_init))
                variable_scope.get_variable(
                    "decoder_forget_bias",
                    shape=1,
                    dtype=dtypes.float32,
                    initializer=init_ops.constant_initializer(
                        self.hparams.forget_bias_init))
                variable_scope.get_variable(
                    "attn_w_1",
                    [self.hparams.hidden_size, self.hparams.hidden_size])
                variable_scope.get_variable(
                    "attn_w_2",
                    [self.hparams.hidden_size, self.hparams.hidden_size])
                variable_scope.get_variable("attn_v",
                                            [self.hparams.hidden_size, 1])
        seq2seq_input_layer = array_ops.placeholder_with_default(
            array_ops.zeros([
                self.hparams.num_children, self.num_groups, self.group_emb_size
            ], dtypes.float32),
            shape=(self.hparams.num_children, self.num_groups,
                   self.group_emb_size))
        self.seq2seq_input_layer = seq2seq_input_layer
Ejemplo n.º 14
0
def _dnn_model_fn(
    features, labels, mode, head, hidden_units, feature_columns,
    optimizer='Adagrad', activation_fn=nn.relu, dropout=None,
    input_layer_partitioner=None, config=None):
  """Deep Neural Net model_fn.

  Args:
    features: Dict of `Tensor` (depends on data passed to `train`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    head: A `head_lib._Head` instance.
    hidden_units: Iterable of integer number of hidden units per layer.
    feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
    optimizer: String, `tf.Optimizer` object, or callable that creates the
      optimizer to use for training. If not specified, will use the Adagrad
      optimizer with a default learning rate of 0.05.
    activation_fn: Activation function applied to each layer.
    dropout: When not `None`, the probability we will drop out a given
      coordinate.
    input_layer_partitioner: Partitioner for input layer. Defaults
      to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """
  optimizer = optimizers.get_optimizer_instance(
      optimizer, learning_rate=_LEARNING_RATE)
  num_ps_replicas = config.num_ps_replicas if config else 0

  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas)
  with variable_scope.variable_scope(
      'dnn',
      values=tuple(six.itervalues(features)),
      partitioner=partitioner):
    input_layer_partitioner = input_layer_partitioner or (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas,
            min_slice_size=64 << 20))
    with variable_scope.variable_scope(
        'input_from_feature_columns',
        values=tuple(six.itervalues(features)),
        partitioner=input_layer_partitioner):
      net = feature_column_lib.input_layer(
          features=features,
          feature_columns=feature_columns)

    for layer_id, num_hidden_units in enumerate(hidden_units):
      with variable_scope.variable_scope(
          'hiddenlayer_%d' % layer_id,
          values=(net,)) as hidden_layer_scope:
        net = core_layers.dense(
            net,
            units=num_hidden_units,
            activation=activation_fn,
            kernel_initializer=init_ops.glorot_uniform_initializer(),
            name=hidden_layer_scope)
        if dropout is not None and mode == model_fn.ModeKeys.TRAIN:
          net = core_layers.dropout(net, rate=dropout, training=True)
      _add_hidden_layer_summary(net, hidden_layer_scope.name)

    with variable_scope.variable_scope(
        'logits',
        values=(net,)) as logits_scope:
      logits = core_layers.dense(
          net,
          units=head.logits_dimension,
          activation=None,
          kernel_initializer=init_ops.glorot_uniform_initializer(),
          name=logits_scope)
    _add_hidden_layer_summary(logits, logits_scope.name)

    def _train_op_fn(loss):
      """Returns the op to optimize the loss."""
      return optimizer.minimize(
          loss,
          global_step=training_util.get_global_step())

    return head.create_estimator_spec(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_train_op_fn,
        logits=logits)
def _weight(shape):
  """Generates a weight of a given shape."""
  # Note that the lambda is needed to allow construction inside loops.
  return variables.Variable(
      lambda: init_ops.glorot_uniform_initializer(seed=0)(shape))
Ejemplo n.º 16
0
def _dnn_linear_combined_model_fn(
    features, labels, mode, head,
    linear_feature_columns=None, linear_optimizer='Ftrl',
    dnn_feature_columns=None, dnn_optimizer='Adagrad', dnn_hidden_units=None,
    dnn_activation_fn=nn.relu, dnn_dropout=None,
    input_layer_partitioner=None, config=None):
  """Deep Neural Net and Linear combined model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    head: A `Head` instance.
    linear_feature_columns: An iterable containing all the feature columns used
      by the Linear model.
    linear_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the Linear model. Defaults to the Ftrl
      optimizer.
    dnn_feature_columns: An iterable containing all the feature columns used by
      the DNN model.
    dnn_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the DNN model. Defaults to the Adagrad
      optimizer.
    dnn_hidden_units: List of hidden units per DNN layer.
    dnn_activation_fn: Activation function applied to each DNN layer. If `None`,
      will use `tf.nn.relu`.
    dnn_dropout: When not `None`, the probability we will drop out a given DNN
      coordinate.
    input_layer_partitioner: Partitioner for input layer.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    `ModelFnOps`

  Raises:
    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
      are empty at the same time, or `input_layer_partitioner` is missing.
  """
  if not linear_feature_columns and not dnn_feature_columns:
    raise ValueError(
        'Either linear_feature_columns or dnn_feature_columns must be defined.')
  num_ps_replicas = config.num_ps_replicas if config else 0
  input_layer_partitioner = input_layer_partitioner or (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))

  # Build DNN Logits.
  dnn_parent_scope = 'dnn'

  if not dnn_feature_columns:
    dnn_logits = None
  else:
    dnn_optimizer = optimizers.get_optimizer_instance(
        dnn_optimizer, learning_rate=_DNN_LEARNING_RATE)
    _check_no_sync_replicas_optimizer(dnn_optimizer)
    if not dnn_hidden_units:
      raise ValueError(
          'dnn_hidden_units must be defined when dnn_feature_columns is '
          'specified.')
    dnn_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas))
    with variable_scope.variable_scope(
        dnn_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=dnn_partitioner):
      with variable_scope.variable_scope('input',
                                         partitioner=input_layer_partitioner):
        net = feature_column_lib.input_layer(
            features=features,
            feature_columns=dnn_feature_columns)

      for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
        with variable_scope.variable_scope(
            'hiddenlayer_%d' % layer_id,
            values=(net,)) as dnn_hidden_layer_scope:
          net = core_layers.dense(
              net,
              units=num_hidden_units,
              activation=dnn_activation_fn,
              kernel_initializer=init_ops.glorot_uniform_initializer(),
              name=dnn_hidden_layer_scope)
          if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
            net = core_layers.dropout(net, rate=dnn_dropout, training=True)
        _add_layer_summary(net, dnn_hidden_layer_scope.name)

      with variable_scope.variable_scope(
          'logits',
          values=(net,)) as dnn_logits_scope:
        dnn_logits = core_layers.dense(
            net,
            units=head.logits_dimension,
            activation=None,
            kernel_initializer=init_ops.glorot_uniform_initializer(),
            name=dnn_logits_scope)
      _add_layer_summary(dnn_logits, dnn_logits_scope.name)

  linear_parent_scope = 'linear'

  if not linear_feature_columns:
    linear_logits = None
  else:
    linear_optimizer = optimizers.get_optimizer_instance(
        linear_optimizer,
        learning_rate=_linear_learning_rate(len(linear_feature_columns)))
    _check_no_sync_replicas_optimizer(linear_optimizer)
    with variable_scope.variable_scope(
        linear_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=input_layer_partitioner) as scope:
      linear_logits = feature_column_lib.linear_model(
          features=features,
          feature_columns=linear_feature_columns,
          units=head.logits_dimension)
      _add_layer_summary(linear_logits, scope.name)

  # Combine logits and build full model.
  if dnn_logits is not None and linear_logits is not None:
    logits = dnn_logits + linear_logits
  elif dnn_logits is not None:
    logits = dnn_logits
  else:
    logits = linear_logits

  def _train_op_fn(loss):
    """Returns the op to optimize the loss."""
    train_ops = []
    global_step = training_util.get_global_step()
    if dnn_logits is not None:
      train_ops.append(
          dnn_optimizer.minimize(
              loss,
              var_list=ops.get_collection(
                  ops.GraphKeys.TRAINABLE_VARIABLES,
                  scope=dnn_parent_scope)))
    if linear_logits is not None:
      train_ops.append(
          linear_optimizer.minimize(
              loss,
              var_list=ops.get_collection(
                  ops.GraphKeys.TRAINABLE_VARIABLES,
                  scope=linear_parent_scope)))

    train_op = control_flow_ops.group(*train_ops)
    with ops.control_dependencies([train_op]):
      with ops.colocate_with(global_step):
        return state_ops.assign_add(global_step, 1)

  return head.create_estimator_spec(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_train_op_fn,
      logits=logits)
Ejemplo n.º 17
0
  def build(self, input_shape):
    """Create variables of the Cudnn RNN.

    It can be called manually before `__call__()` or automatically through
    `__call__()`. In the former case, subsequent `__call__()`s will skip
    creating variables.
    Args:
      input_shape: network input tensor shape, a python list or a TensorShape
        object with 3 dimensions.
    Raises:
      ValueError: if input_shape has wrong dimension or unknown 3rd dimension.
    """
    if self.built:
      return

    input_shape = tensor_shape.TensorShape(input_shape)
    if input_shape.ndims != 3:
      raise ValueError("Expecting input_shape with 3 dims, got %d" %
                       input_shape.ndims)
    if input_shape[-1].value is None:
      raise ValueError("The last dimension of the inputs to `CudnnRNN` "
                       "should be defined. Found `None`.")
    self._input_size = input_shape[-1].value
    self.input_spec = input_spec.InputSpec(ndim=3, axes={-1: self._input_size})

    self._set_scope(None)

    # Not using base class `add_variable()` since the it calls
    # `tf.get_variable()` with a callable initializer whereas here with a
    # tensor. The difference is mandated to support forward-compatibility with
    # Cudnn.
    with vs.variable_scope(
        self._scope,
        reuse=self.built,
        custom_getter=self._update_trainable_weights):
      if self._kernel_initializer is None:
        self._kernel_initializer = init_ops.glorot_uniform_initializer(
            seed=self._seed, dtype=self._plain_dtype)
      if self._bias_initializer is None:
        self._bias_initializer = init_ops.constant_initializer(
            0.0, dtype=self._plain_dtype)

      weights = [
          self._kernel_initializer(sp, dtype=self._plain_dtype)
          for sp in self.canonical_weight_shapes
      ]
      biases = [
          self._bias_initializer(sp, dtype=self._plain_dtype)
          for sp in self.canonical_bias_shapes
      ]
      opaque_params_t = self._canonical_to_opaque(weights, biases)

      if vs.get_variable_scope().partitioner is not None:
        logging.warn(
            "Partitioner is not supported for Cudnn RNN layer variables, using "
            "it will create forward-compatibility issues with future "
            "CUDA/CuDNN generations.")
      # Initialize opaque params with a tensor with unknown shape, thus couldn't
      # use self.add_variable(name, shape, initializer, ...)
      self.kernel = vs.get_variable(
          "opaque_kernel", dtype=self._plain_dtype,
          initializer=opaque_params_t, validate_shape=False)
    # Create saveable in the outer scope of the cudnn subgraph, such that
    # alternative subgraph with platform-independent rnn cells can load the
    # checkpoints directly.
    if not (self.built or vs.get_variable_scope().reuse is True):
      self._create_saveable()
    self.built = True
Ejemplo n.º 18
0
 def __init__(self, cell_size):
     self.cell_size = cell_size
     self.default_initializer = tf.get_variable_scope(
     ).initializer or init_ops.glorot_uniform_initializer()
     self.initializer = tf.orthogonal_initializer()
Ejemplo n.º 19
0
def _dnn_model_fn(features,
                  labels,
                  mode,
                  head,
                  hidden_units,
                  feature_columns,
                  optimizer='Adagrad',
                  activation_fn=nn.relu,
                  dropout=None,
                  input_layer_partitioner=None,
                  config=None):
    """Deep Neural Net model_fn.

  Args:
    features: Dict of `Tensor` (depends on data passed to `train`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    head: A `head_lib._Head` instance.
    hidden_units: Iterable of integer number of hidden units per layer.
    feature_columns: Iterable of `feature_column._FeatureColumn` model inputs.
    optimizer: String, `tf.Optimizer` object, or callable that creates the
      optimizer to use for training. If not specified, will use the Adagrad
      optimizer with a default learning rate of 0.05.
    activation_fn: Activation function applied to each layer.
    dropout: When not `None`, the probability we will drop out a given
      coordinate.
    input_layer_partitioner: Partitioner for input layer. Defaults
      to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """
    optimizer = optimizers.get_optimizer_instance(optimizer,
                                                  learning_rate=_LEARNING_RATE)
    num_ps_replicas = config.num_ps_replicas if config else 0

    partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas)
    with variable_scope.variable_scope('dnn',
                                       values=tuple(six.itervalues(features)),
                                       partitioner=partitioner):
        input_layer_partitioner = input_layer_partitioner or (
            partitioned_variables.min_max_variable_partitioner(
                max_partitions=num_ps_replicas, min_slice_size=64 << 20))
        with variable_scope.variable_scope(
                'input_from_feature_columns',
                values=tuple(six.itervalues(features)),
                partitioner=input_layer_partitioner):
            net = feature_column_lib.input_layer(
                features=features, feature_columns=feature_columns)

        for layer_id, num_hidden_units in enumerate(hidden_units):
            with variable_scope.variable_scope(
                    'hiddenlayer_%d' % layer_id,
                    values=(net, )) as hidden_layer_scope:
                net = core_layers.dense(
                    net,
                    units=num_hidden_units,
                    activation=activation_fn,
                    kernel_initializer=init_ops.glorot_uniform_initializer(),
                    name=hidden_layer_scope)
                if dropout is not None and mode == model_fn.ModeKeys.TRAIN:
                    net = core_layers.dropout(net, rate=dropout, training=True)
            _add_hidden_layer_summary(net, hidden_layer_scope.name)

        with variable_scope.variable_scope('logits',
                                           values=(net, )) as logits_scope:
            logits = core_layers.dense(
                net,
                units=head.logits_dimension,
                activation=None,
                kernel_initializer=init_ops.glorot_uniform_initializer(),
                name=logits_scope)
        _add_hidden_layer_summary(logits, logits_scope.name)

        def _train_op_fn(loss):
            """Returns the op to optimize the loss."""
            return optimizer.minimize(
                loss, global_step=training_util.get_global_step())

        return head.create_estimator_spec(features=features,
                                          mode=mode,
                                          labels=labels,
                                          train_op_fn=_train_op_fn,
                                          logits=logits)
Ejemplo n.º 20
0
 def test_cudnn_rnn(self):
     if get_ngpu() == 0:
         return
     print()
     batch_size = 2
     time_steps = 5
     input_dim = 12
     hidden_dim = 8
     X = K.variable(value=np.random.rand(batch_size, time_steps, input_dim),
                    dtype='float32',
                    name='X')
     for rnn_mode in ('lstm', 'rnn_relu', 'gru'):
         for num_layers in [1, 2]:
             for W_init in [
                     init_ops.glorot_uniform_initializer(seed=1234),
                     init_ops.random_normal_initializer(seed=1234)
             ]:
                 for b_init in [0, 1]:
                     for bidirectional in (True, False):
                         for skip_input in (False, ):
                             print('RNNmode:%s' % rnn_mode,
                                   "#Layers:%d" % num_layers,
                                   'Bidirectional:%s' % bidirectional,
                                   'SkipInput:%s' % skip_input)
                             weights, biases = K.init_rnn(
                                 input_dim=input_dim,
                                 hidden_dim=hidden_dim,
                                 num_gates=rnn_mode,
                                 num_layers=num_layers,
                                 W_init=W_init,
                                 b_init=b_init,
                                 skip_input=skip_input,
                                 cudnn_vector=False,
                                 is_bidirectional=bidirectional,
                                 name=None)
                             # ====== check number of params ====== #
                             params1 = K.params_to_cudnn(weights, biases)
                             n = params1.shape[0].value
                             nb_params = cudnn_rnn_ops.cudnn_rnn_opaque_params_size(
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 num_units=hidden_dim,
                                 input_size=input_dim,
                                 input_mode='skip_input'
                                 if skip_input else 'linear_input',
                                 direction='bidirectional'
                                 if bidirectional else 'unidirectional')
                             nb_params = K.eval(nb_params)
                             assert n == nb_params
                             # ====== check cannonical shape match ====== #
                             kwargs = {
                                 'num_layers':
                                 num_layers,
                                 'num_units':
                                 hidden_dim,
                                 'input_mode':
                                 'skip_input'
                                 if skip_input else 'linear_input',
                                 'direction':
                                 'bidirectional'
                                 if bidirectional else 'unidirectional'
                             }
                             if rnn_mode == 'lstm':
                                 rnn = cudnn_rnn.CudnnLSTM(**kwargs)
                             elif rnn_mode == 'gru':
                                 rnn = cudnn_rnn.CudnnGRU(**kwargs)
                             if rnn_mode == 'rnn_relu':
                                 rnn = cudnn_rnn.CudnnRNNRelu(**kwargs)
                             if rnn_mode == 'rnn_tanh':
                                 rnn = cudnn_rnn.CudnnRNNTanh(**kwargs)
                             rnn.build(input_shape=(None, None, input_dim))
                             assert len(weights) == len(
                                 rnn.canonical_weight_shapes)
                             assert len(biases) == len(
                                 rnn.canonical_bias_shapes)
                             for w, s in zip(weights,
                                             rnn.canonical_weight_shapes):
                                 assert tuple(w.shape.as_list()) == s
                             # ====== check params conversion ====== #
                             K.initialize_all_variables()
                             params2 = cudnn_rnn_ops.cudnn_rnn_canonical_to_opaque_params(
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 num_units=hidden_dim,
                                 input_size=input_dim,
                                 input_mode='skip_input'
                                 if skip_input else 'linear_input',
                                 direction='bidirectional'
                                 if bidirectional else 'unidirectional',
                                 weights=weights,
                                 biases=biases)
                             assert np.all(
                                 K.eval(params1) == K.eval(params2))
                             # ====== odin cudnn implementation ====== #
                             name = 'TEST' + uuid(length=25)
                             outputs = K.cudnn_rnn(
                                 X=X,
                                 num_units=hidden_dim,
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 parameters=None,
                                 skip_input=skip_input,
                                 is_bidirectional=bidirectional,
                                 dropout=0.1,
                                 name=name)
                             K.initialize_all_variables()
                             s0 = K.eval(outputs[0]).sum()
                             s1 = K.eval(outputs[1]).sum()
                             all_variables = K.get_all_variables(scope=name)
                             new_weights = [
                                 i for i in all_variables
                                 if K.role.has_roles(i, roles=K.role.Weight)
                             ]
                             new_biases = [
                                 i for i in all_variables
                                 if K.role.has_roles(i, roles=K.role.Bias)
                             ]
                             new_weights, new_biases = K.sort_cudnn_params(
                                 new_weights, new_biases, rnn_mode=rnn_mode)
                             assert len(weights) == len(weights)
                             assert len(biases) == len(biases)
                             for i, j in zip(weights + biases,
                                             new_weights + new_biases):
                                 assert i.name.split(
                                     '/')[-1] == j.name.split('/')[-1]
                             # ====== CudnnRNN wrapper ====== #
                             rnn = N.CudnnRNN(
                                 num_units=hidden_dim,
                                 W_init=new_weights,
                                 b_init=new_biases,
                                 rnn_mode=rnn_mode,
                                 num_layers=num_layers,
                                 skip_input=skip_input,
                                 is_bidirectional=bidirectional,
                                 return_states=True,
                                 dropout=0.)
                             outputs = rnn(X)
                             K.initialize_all_variables()
                             y0 = K.eval(outputs[0]).sum()
                             y1 = K.eval(outputs[1]).sum()
                             assert y0 == s0
                             assert y1 == s1
Ejemplo n.º 21
0
    def dnn_logit_fn(features, mode):
        """Deep Neural Network logit_fn.

        Args:
          features: This is the first item returned from the `input_fn`
                    passed to `train`, `evaluate`, and `predict`. This should be a
                    single `Tensor` or `dict` of same.
          mode: Optional. Specifies if this training, evaluation or prediction. See
                `ModeKeys`.

        Returns:
          A `Tensor` representing the logits, or a list of `Tensor`'s representing
          multiple logits in the MultiHead case.
        """
        with variable_scope.variable_scope(
                'input_from_feature_columns',
                values=tuple(six.itervalues(features)),
                partitioner=input_layer_partitioner):
            net = feature_column_lib.input_layer(
                features=features, feature_columns=feature_columns)
            if rnn_feature_columns != None:
                rnn_features_embedding = feature_column_lib.input_layer(
                    features=features, feature_columns=rnn_feature_columns)
                rnn_features_embedding = tf.reshape(
                    rnn_features_embedding,
                    [-1, FLAGS.rnn_length, FLAGS.rnn_input_size])
                cell = tf.nn.rnn_cell.BasicLSTMCell(FLAGS.rnn_hidden_size)
                att_wrapper = tf.contrib.rnn.AttentionCellWrapper(
                    cell=cell, attn_length=10)
                outputs, _ = tf.nn.dynamic_rnn(att_wrapper,
                                               rnn_features_embedding,
                                               dtype=tf.float32)
                outputs = tf.reshape(
                    outputs, [-1, FLAGS.rnn_length * FLAGS.rnn_hidden_size])
                net = array_ops.concat([net, outputs], 1)

        for layer_id, num_hidden_units in enumerate(hidden_units):
            with variable_scope.variable_scope(
                    'hiddenlayer_%d' % layer_id,
                    values=(net, )) as hidden_layer_scope:
                net = core_layers.dense(
                    net,
                    units=num_hidden_units,
                    activation=activation_fn,
                    kernel_initializer=init_ops.glorot_uniform_initializer(),
                    name=hidden_layer_scope)
                if dropout is not None and mode == model_fn.ModeKeys.TRAIN:
                    net = core_layers.dropout(net, rate=dropout, training=True)
            _add_hidden_layer_summary(net, hidden_layer_scope.name)

        with variable_scope.variable_scope('logits',
                                           values=(net, )) as logits_scope:
            logits = core_layers.dense(
                net,
                units=units,
                activation=None,
                kernel_initializer=init_ops.glorot_uniform_initializer(),
                name=logits_scope)
        _add_hidden_layer_summary(logits, logits_scope.name)

        return logits
Ejemplo n.º 22
0
    def build(self, input_shape):
        """Create variables of the Cudnn RNN.
        It can be called manually before `__call__()` or automatically through
        `__call__()`. In the former case, subsequent `__call__()`s will skip
        creating variables.
        Args:
          input_shape: network input tensor shape, a python list or a TensorShape
            object with 3 dimensions.
        Raises:
          ValueError: if input_shape has wrong dimension or unknown 3rd dimension.
        """
        if self.built:
            return

        input_shape = tensor_shape.TensorShape(input_shape)
        if input_shape.ndims != 3:
            raise ValueError("Expecting input_shape with 3 dims, got %d" %
                             input_shape.ndims)
        if input_shape[-1].value is None:
            raise ValueError("The last dimension of the inputs to `CudnnRNN` "
                             "should be defined. Found `None`.")
        self._input_size = input_shape[-1].value
        self.input_spec = base_layer.InputSpec(ndim=3,
                                               axes={-1: self._input_size})

        self._set_scope(None)

        # Not using base class `add_variable()` since the it calls
        # `tf.get_variable()` with a callable initializer whereas here with a
        # tensor. The difference is mandated to support forward-compatibility with
        # Cudnn.
        with vs.variable_scope(self._variable_namespace,
                               reuse=AUTO_REUSE,
                               custom_getter=self._update_trainable_weights):
            if self._kernel_initializer is None:
                self._kernel_initializer = init_ops.glorot_uniform_initializer(
                    seed=self._seed, dtype=self._plain_dtype)
            if self._bias_initializer is None:
                self._bias_initializer = init_ops.constant_initializer(
                    0.0, dtype=self._plain_dtype)

            weights = [
                self._kernel_initializer(sp, dtype=self._plain_dtype)
                for sp in self.canonical_weight_shapes
            ]
            biases = [
                self._bias_initializer(sp, dtype=self._plain_dtype)
                for sp in self.canonical_bias_shapes
            ]
            opaque_params_t = self._canonical_to_opaque(weights, biases)

            if vs.get_variable_scope().partitioner is not None:
                logging.warn(
                    "Partitioner is not supported for Cudnn RNN layer variables, using "
                    "it will create forward-compatibility issues with future "
                    "CUDA/CuDNN generations.")
            # Initialize opaque params with a tensor.
            self.kernel = vs.get_variable("opaque_kernel",
                                          initializer=opaque_params_t,
                                          validate_shape=False)
        # Create saveable in the outer scope of the cudnn subgraph, such that
        # alternative subgraph with platform-independent rnn cells can load the
        # checkpoints directly.
        if not (self.built or vs.get_variable_scope().reuse is True):
            self._create_saveable()
        self.built = True
Ejemplo n.º 23
0
def wide_and_deep(features=None, params=None):
    ###############
    WIDE_CATE_COLS = params['WIDE_CATE_COLS']
    CONTINUOUS_COLS = params['CONTINUOUS_COLS']
    DEEP_EMBEDDING_COLS = params['DEEP_EMBEDDING_COLS']
    WIDE_CROSS_COLS = params['WIDE_CROSS_COLS']
    DEEP_SHARED_EMBEDDING_COLS = params['DEEP_SHARED_EMBEDDING_COLS']
    _HIDDEN_UNITS = params['_HIDDEN_UNITS']
    _LINEAR_LEARNING_RATE = params['_LINEAR_LEARNING_RATE']
    _DNN_LEARNING_RATE = params['_DNN_LEARNING_RATE']

    wide_logits = None
    linear_absolute_scope = None
    if params['WIDE']:
        wide_sum = []
        with variable_scope.variable_scope(
                'linear', values=tuple(six.itervalues(features))) as scope:
            linear_absolute_scope = scope.name
            for col, size in WIDE_CATE_COLS:
                w_wide = tf.get_variable(
                    shape=[size, 1],
                    initializer=init_ops.zeros_initializer,
                    trainable=True,
                    name="Wide_Part_Weights_Cate" + col)
                indices = string_ops.string_to_hash_bucket_fast(
                    features[col], size, name="wide_hash_" + col)
                wide_sum.append(
                    tf.nn.embedding_lookup(w_wide,
                                           indices,
                                           name="wide_cat_lookup_" + col))
            # for col, size in WIDE_BUCKET_COLS:
            #     w_wide = tf.get_variable(shape=[size, 1], initializer=init_ops.zeros_initializer, trainable=True,
            #                              name="Wide_Part_Weights_Bucket" + col)
            #     indices = string_ops.string_to_hash_bucket_fast(
            #         features[col], size, name="wide_hash_" + col)
            #     wide_sum.append(tf.nn.embedding_lookup(w_wide, indices, name="wide_bucket_lookup_" + col))
            for col1, col2, size in WIDE_CROSS_COLS:
                w_wide = tf.get_variable(
                    shape=[size, 1],
                    initializer=init_ops.zeros_initializer,
                    trainable=True,
                    name="Wide_Part_Weights_Cross" + col1 + '_' + col2)
                # cross_input = tf.as_string(tf.string_to_number(features[col1],_dtypes.int64)*tf.string_to_number(features[col2],_dtypes.int64))
                cross_input = tf.string_join([features[col1], features[col2]],
                                             separator="_")
                indices = string_ops.string_to_hash_bucket_fast(
                    cross_input, size, name="wide_hash_" + col1 + '_' + col2)
                wide_sum.append(
                    tf.nn.embedding_lookup(w_wide,
                                           indices,
                                           name="wide_cross_lookup_" + col1 +
                                           '_' + col2))

            w_wide = tf.get_variable(shape=[len(CONTINUOUS_COLS), 1],
                                     initializer=init_ops.zeros_initializer,
                                     trainable=True,
                                     name="Wide_Part_Weights_Continus")
            bias = tf.get_variable(shape=[1],
                                   initializer=init_ops.zeros_initializer,
                                   trainable=True,
                                   name="Wide_Part_Bias")
            x = tf.concat([
                tf.expand_dims(tf.to_float(features[col]), -1)
                for col in CONTINUOUS_COLS
            ],
                          1,
                          name='continus_concat')
            continue_logits = tf.matmul(x, w_wide) + bias

            wide_logits = tf.reduce_sum(wide_sum, 0)
            wide_logits += continue_logits
    ##################
    deep_logits = None
    dnn_absolute_scope = None
    if params['DEEP']:
        # with tf.variable_scope('Deep_model'):
        with variable_scope.variable_scope(
                'Deep_model',
                values=tuple(six.itervalues(features)),
        ) as scope:
            dnn_absolute_scope = scope.name
            # Convert categorical (string) values to embeddings
            deep_sum = []
            for col, vals, embedding_size, col_type in DEEP_EMBEDDING_COLS:
                bucket_size = vals if isinstance(vals, int) else len(vals)
                # embed_initializer = tf.truncated_normal_initializer(
                #     stddev=(1.0 / tf.sqrt(float(embedding_size))))
                embeddings = tf.get_variable(
                    shape=[bucket_size, embedding_size],
                    initializer=init_ops.glorot_uniform_initializer(),
                    name="deep_embedding_" + col)

                if col_type != 'int':
                    indices = string_ops.string_to_hash_bucket_fast(
                        features[col], bucket_size, name="deep_hash_" + col)
                else:
                    table = tf.contrib.lookup.index_table_from_tensor(vals)
                    indices = table.lookup(features[col])
                seq_emb = tf.nn.embedding_lookup(embeddings,
                                                 indices,
                                                 name="deep_lookup_" + col)
                if col_type == 'seq':
                    print("test my seq:", col)
                    seq_emb = tf.reduce_mean(seq_emb, 1)
                deep_sum.append(seq_emb)
            for cols, vals, embedding_size, col_type, shared_flag in DEEP_SHARED_EMBEDDING_COLS:

                def get_indices(col, embedding_size, bucket_size):
                    if col_type != 'int':
                        indices = string_ops.string_to_hash_bucket_fast(
                            features[col],
                            bucket_size,
                            name="deep_shared_hash_" + col + str(shared_flag))
                    else:
                        table = tf.contrib.lookup.index_table_from_tensor(
                            embedding_size)
                        indices = table.lookup(features[col])
                    return indices

                bucket_size = vals if isinstance(vals, int) else len(vals)
                embeddings = tf.get_variable(
                    shape=[bucket_size, embedding_size],
                    initializer=init_ops.glorot_uniform_initializer(),
                    name="deep_shared_embedding_" + '_'.join(c for c in cols) +
                    str(shared_flag))
                for col in cols:
                    indices = get_indices(col, embedding_size, bucket_size)
                    seq_emb = tf.nn.embedding_lookup(
                        embeddings,
                        indices,
                        name="deep_shared_lookup_" + col + str(shared_flag))
                    if col.endswith('seq'):
                        seq_emb = tf.reduce_mean(seq_emb, 1)
                    deep_sum.append(seq_emb)
            for col in CONTINUOUS_COLS:
                deep_sum.append(
                    tf.expand_dims(tf.to_float(features[col]),
                                   -1,
                                   name='continuous_' + col))
            curr_layer = tf.concat(deep_sum, 1, name="deep_inputs_layer")

            # Build the DNN

            for index, layer_size in enumerate(_HIDDEN_UNITS):
                curr_layer = tf.layers.dense(
                    curr_layer,
                    layer_size,
                    activation=tf.nn.relu,
                    kernel_initializer=init_ops.glorot_uniform_initializer(),
                    name="deep_hidden_layer" + str(index))
            deep_logits = tf.layers.dense(curr_layer,
                                          units=1,
                                          name="deep_logits")
    ####################################

    my_head = head._binary_logistic_head_with_sigmoid_cross_entropy_loss(  # pylint: disable=protected-access
        loss_reduction=losses.Reduction.SUM)
    print(my_head.logits_dimension)

    if deep_logits is not None and wide_logits is not None:
        logits = deep_logits + wide_logits
    elif deep_logits is not None:
        logits = deep_logits
    else:
        logits = wide_logits

    dnn_optimizer = optimizers.get_optimizer_instance(
        'Adagrad', learning_rate=_DNN_LEARNING_RATE)

    def _linear_learning_rate(num_linear_feature_columns):
        default_learning_rate = 1. / math.sqrt(num_linear_feature_columns)
        return min(_LINEAR_LEARNING_RATE, default_learning_rate)

    linear_optimizer = optimizers.get_optimizer_instance(
        'Ftrl', learning_rate=_linear_learning_rate(len(WIDE_CATE_COLS)))

    def _train_op_fn(loss):
        train_ops = []
        global_step = training_util.get_global_step()
        if deep_logits is not None:
            train_ops.append(
                dnn_optimizer.minimize(loss,
                                       var_list=ops.get_collection(
                                           ops.GraphKeys.TRAINABLE_VARIABLES,
                                           scope=dnn_absolute_scope)))
        if wide_logits is not None:
            train_ops.append(
                linear_optimizer.minimize(
                    loss,
                    var_list=ops.get_collection(
                        ops.GraphKeys.TRAINABLE_VARIABLES,
                        scope=linear_absolute_scope)))

        train_op = control_flow_ops.group(*train_ops)
        with ops.control_dependencies([train_op]):
            return state_ops.assign_add(global_step, 1).op

    return my_head, logits, _train_op_fn
Ejemplo n.º 24
0
  def __init__(self,
               units,
               hidden_units,
               feature_columns,
               activation_fn,
               dropout,
               input_layer_partitioner,
               batch_norm,
               shared_state_manager,
               name=None,
               **kwargs):
    super(_DNNModel, self).__init__(name=name, **kwargs)
    if feature_column_v2.is_feature_column_v2(feature_columns):
      self._input_layer = feature_column_v2.FeatureLayer(
          feature_columns=feature_columns,
          name='input_layer',
          shared_state_manager=shared_state_manager)
    else:
      self._input_layer = feature_column.InputLayer(
          feature_columns=feature_columns,
          name='input_layer',
          create_scope_now=False)

    self._add_layer(self._input_layer, 'input_layer')

    self._dropout = dropout
    self._batch_norm = batch_norm

    self._hidden_layers = []
    self._dropout_layers = []
    self._batch_norm_layers = []
    self._hidden_layer_scope_names = []
    for layer_id, num_hidden_units in enumerate(hidden_units):
      with variable_scope.variable_scope(
          'hiddenlayer_%d' % layer_id) as hidden_layer_scope:
        hidden_layer = core_layers.Dense(
            units=num_hidden_units,
            activation=activation_fn,
            kernel_initializer=init_ops.glorot_uniform_initializer(),
            name=hidden_layer_scope,
            _scope=hidden_layer_scope)
        self._add_layer(hidden_layer, hidden_layer_scope.name)
        self._hidden_layer_scope_names.append(hidden_layer_scope.name)
        self._hidden_layers.append(hidden_layer)
        if self._dropout is not None:
          dropout_layer = core_layers.Dropout(rate=self._dropout)
          self._add_layer(dropout_layer, dropout_layer.name)
          self._dropout_layers.append(dropout_layer)
        if self._batch_norm:
          batch_norm_layer = normalization.BatchNormalization(
              # The default momentum 0.99 actually crashes on certain
              # problem, so here we use 0.999, which is the default of
              # tf.contrib.layers.batch_norm.
              momentum=0.999,
              trainable=True,
              name='batchnorm_%d' % layer_id,
              _scope='batchnorm_%d' % layer_id)
          self._add_layer(batch_norm_layer, batch_norm_layer.name)
          self._batch_norm_layers.append(batch_norm_layer)

    with variable_scope.variable_scope('logits') as logits_scope:
      self._logits_layer = core_layers.Dense(
          units=units,
          activation=None,
          kernel_initializer=init_ops.glorot_uniform_initializer(),
          name=logits_scope,
          _scope=logits_scope)
      self._add_layer(self._logits_layer, logits_scope.name)
      self._logits_scope_name = logits_scope.name
    self._input_layer_partitioner = input_layer_partitioner
Ejemplo n.º 25
0
def glorot_uniform():
  return init_ops.glorot_uniform_initializer()
Ejemplo n.º 26
0
 def testAttentionCellWrapperCorrectResult(self):
     num_units = 4
     attn_length = 6
     batch_size = 2
     expected_output = np.array([[1.068372, 0.45496, -0.678277, 0.340538],
                                 [1.018088, 0.378983, -0.572179, 0.268591]],
                                dtype=np.float32)
     expected_state = np.array(
         [[
             0.74946702, 0.34681597, 0.26474735, 1.06485605, 0.38465962,
             0.11420801, 0.10272158, 0.30925757, 0.63899988, 0.7181077,
             0.47534478, 0.33715725, 0.58086717, 0.49446869, 0.7641536,
             0.12814975, 0.92231739, 0.89857256, 0.21889746, 0.38442063,
             0.53481543, 0.8876909, 0.45823169, 0.5905602, 0.78038228,
             0.56501579, 0.03971386, 0.09870267, 0.8074435, 0.66821432,
             0.99211812, 0.12295902, 1.14606023, 0.34370938, -0.79251152,
             0.51843399
         ],
          [
              0.5179342, 0.48682183, -0.25426468, 0.96810579, 0.28809637,
              0.13607743, -0.11446252, 0.26792109, 0.78047138, 0.63460857,
              0.49122369, 0.52007174, 0.73000264, 0.66986895, 0.73576689,
              0.86301267, 0.87887371, 0.35185754, 0.93417215, 0.64732957,
              0.63173044, 0.66627824, 0.53644657, 0.20477486, 0.98458421,
              0.38277245, 0.03746676, 0.92510188, 0.57714164, 0.84932971,
              0.36127412, 0.12125921, 1.1362772, 0.34361625, -0.78150457,
              0.70582712
          ]],
         dtype=np.float32)
     seed = 12345
     random_seed.set_random_seed(seed)
     for state_is_tuple in [False, True]:
         with session.Session() as sess:
             with variable_scope.variable_scope(
                     "state_is_tuple",
                     reuse=state_is_tuple,
                     initializer=init_ops.glorot_uniform_initializer()):
                 lstm_cell = core_rnn_cell_impl.BasicLSTMCell(
                     num_units, state_is_tuple=state_is_tuple)
                 cell = rnn_cell.AttentionCellWrapper(
                     lstm_cell, attn_length, state_is_tuple=state_is_tuple)
                 zeros1 = random_ops.random_uniform((batch_size, num_units),
                                                    0.0,
                                                    1.0,
                                                    seed=seed + 1)
                 zeros2 = random_ops.random_uniform((batch_size, num_units),
                                                    0.0,
                                                    1.0,
                                                    seed=seed + 2)
                 zeros3 = random_ops.random_uniform((batch_size, num_units),
                                                    0.0,
                                                    1.0,
                                                    seed=seed + 3)
                 attn_state_zeros = random_ops.random_uniform(
                     (batch_size, attn_length * num_units),
                     0.0,
                     1.0,
                     seed=seed + 4)
                 zero_state = ((zeros1, zeros2), zeros3, attn_state_zeros)
                 if not state_is_tuple:
                     zero_state = array_ops.concat([
                         zero_state[0][0], zero_state[0][1], zero_state[1],
                         zero_state[2]
                     ], 1)
                 inputs = random_ops.random_uniform((batch_size, num_units),
                                                    0.0,
                                                    1.0,
                                                    seed=seed + 5)
                 output, state = cell(inputs, zero_state)
                 if state_is_tuple:
                     state = array_ops.concat(
                         [state[0][0], state[0][1], state[1], state[2]], 1)
                 sess.run(variables.global_variables_initializer())
                 self.assertAllClose(sess.run(output), expected_output)
                 self.assertAllClose(sess.run(state), expected_state)
def _weight(shape):
    """Generates a weight of a given shape."""
    # Note that the lambda is needed to allow construction inside loops.
    return variables.Variable(
        lambda: init_ops.glorot_uniform_initializer(seed=0)(shape))
Ejemplo n.º 28
0
 def _initialize(self, X):
   # ====== check inputs dimensions ====== #
   if not hasattr(X, 'shape'):
     raise ValueError("`X` must have `shape` attribute.")
   feat_dim = np.prod(X.shape[1:])
   if self._feat_dim is None:
     self._feat_dim = feat_dim
   # validate input dimension
   if feat_dim != self._feat_dim:
     raise RuntimeError("Feature dimension mismatch %d and %d" %
                        (feat_dim, self.feat_dim))
   # check if tensorflow op initalized
   if hasattr(self, '_f_train'):
     return
   # ====== binary or multi-classes ====== #
   if self.nb_classes == 2:
     out_shape = (None,)
     fn_activation = tf.nn.sigmoid
     fn_loss = tf.losses.sigmoid_cross_entropy
     fn_acc = K.metrics.binary_accuracy
   else:
     out_shape = (None, self.nb_classes)
     fn_activation = tf.nn.softmax
     fn_loss = tf.losses.softmax_cross_entropy
     fn_acc = K.metrics.categorical_accuracy
   # ====== create model ====== #
   with tf.name_scope(self.name, 'logistic_regression'):
     # inputs
     self._X = K.placeholder(shape=(None, self.feat_dim),
                             dtype=self.dtype,
                             name='%s_input' % self.name)
     self._y = K.placeholder(shape=out_shape,
                             dtype=self.dtype,
                             name='%s_output' % self.name)
     # check the bias
     if is_number(self.fit_intercept):
       b_init = float(self.fit_intercept)
     elif self.fit_intercept is False or \
     self.fit_intercept is None:
       b_init = None
     else:
       b_init = self.fit_intercept
     # create the model and initialize
     with K.variable_dtype(dtype=self.dtype):
       self._model = N.Dense(num_units=self.nb_classes,
                         W_init=init_ops.glorot_uniform_initializer(seed=self._rand_state.randint()),
                         b_init=b_init,
                         activation=K.linear)
       y_logits = self._model(self._X)
     y_prob = fn_activation(y_logits)
     # applying class weights
     class_weights = tf.constant(value=self._class_weight,
                                 dtype=self.dtype,
                                 name="class_weights")
     weights = tf.gather(class_weights,
                         tf.cast(self._y, 'int32') if self.nb_classes == 2 else
                         tf.argmax(self._y, axis=-1))
     # optimizer
     params = [v for v in self._model.variables
               if has_roles(v, Weight) or has_roles(v, Bias)]
     losses = fn_loss(self._y, y_logits, weights=weights)
     l1_norm = tf.norm(self._model.get('W'), ord=1) if self.l1 > 0. else 0
     l2_norm = tf.norm(self._model.get('W'), ord=2) if self.l2 > 0. else 0
     losses = losses + self.l1 * l1_norm + self.l2 * l2_norm
     acc = fn_acc(self._y, y_prob)
     updates = self._optimizer.get_updates(losses, params)
     # create function
     if self.confusion_matrix:
       cm = K.metrics.confusion_matrix(y_true=self._y, y_pred=y_prob,
                                       labels=self.nb_classes)
     metrics = [losses, acc, cm] if self.confusion_matrix else [losses, acc]
     self._f_train = K.function(inputs=(self._X, self._y),
                                outputs=metrics,
                                updates=updates,
                                training=True)
     self._f_score = K.function(inputs=(self._X, self._y),
                                outputs=metrics,
                                training=False)
     self._f_pred_prob = K.function(inputs=self._X,
                                    outputs=y_prob,
                                    training=False)
     self._f_pred_logit = K.function(inputs=self._X,
                                     outputs=y_logits,
                                     training=False)
   return self