Ejemplo n.º 1
0
def sampled_softmax_loss(src_emb,
                         pos_ids,
                         neg_num,
                         output_emb_table,
                         output_emb_bias,
                         node_size,
                         s2h=True):
  """Sampled softmax loss.
  Args:
    src_emb: positive src embedding with shape [batch_size, dim]
    pos_ids: positive ids.
    output_emb_table:
    output_emb_bias:
    node_size: total node size.
    s2h: set True if need string to hash.
  """
  if s2h:
    pos_ids = tf.as_string(pos_ids)
    pos_ids = tf.string_to_hash_bucket_fast(
        pos_ids,
        node_size,
        name='softmax_loss_to_hash_bucket_oper')

  loss = tf.nn.sampled_softmax_loss(
      weights=output_emb_table,
      biases=output_emb_bias,
      labels=tf.reshape(pos_ids, [-1, 1]),
      inputs=src_emb,
      num_sampled=neg_num,
      num_classes=node_size,
      partition_strategy='mod',
      remove_accidental_hits=True)

  return [tf.reduce_mean(loss), None, None]
Ejemplo n.º 2
0
def _get_features_dict(input_dict, include_source_id=False):
  """Extracts features dict from input dict."""

  source_id = _replace_empty_string_with_random_number(
      input_dict[fields.InputDataFields.source_id])

  hash_from_source_id = tf.string_to_hash_bucket_fast(source_id, HASH_BINS)
  features = {
      fields.InputDataFields.image:
          input_dict[fields.InputDataFields.image],
      HASH_KEY: tf.cast(hash_from_source_id, tf.int32),
      fields.InputDataFields.true_image_shape:
          input_dict[fields.InputDataFields.true_image_shape],
      fields.InputDataFields.original_image_spatial_shape:
          input_dict[fields.InputDataFields.original_image_spatial_shape]
  }
  if include_source_id:
    features[fields.InputDataFields.source_id] = source_id
  if fields.InputDataFields.original_image in input_dict:
    features[fields.InputDataFields.original_image] = input_dict[
        fields.InputDataFields.original_image]
  if fields.InputDataFields.image_additional_channels in input_dict:
    features[fields.InputDataFields.image_additional_channels] = input_dict[
        fields.InputDataFields.image_additional_channels]
  if fields.InputDataFields.context_features in input_dict:
    features[fields.InputDataFields.context_features] = input_dict[
        fields.InputDataFields.context_features]
  if fields.InputDataFields.valid_context_size in input_dict:
    features[fields.InputDataFields.valid_context_size] = input_dict[
        fields.InputDataFields.valid_context_size]
  return features
Ejemplo n.º 3
0
    def encode(self, ego_tensor):
        if not self._use_edge:
            ids = ego_tensor.src.ids
        else:
            # TODO: replace this hack with edge ids.
            ids = tf.cast(ego_tensor.src.continuous_attrs, dtype=tf.int64)

        if self._str2hash:
            index = tf.as_string(ids)
            ids = tf.string_to_hash_bucket_fast(index,
                                                self._num,
                                                name=self._name +
                                                'str_to_hash_bucket_op')

        emb = tf.nn.embedding_lookup(self._emb_table,
                                     ids,
                                     name=self._name +
                                     'ids_embedding_lookup_op')
        emb = tf.reshape(emb, [-1, self._dim])

        return emb
Ejemplo n.º 4
0
 def hash_in_range(self, buckets, base, limit):
     """Return true if the hashing key falls in the range [base, limit)."""
     hash_bucket = tf.string_to_hash_bucket_fast(self.scene_id, buckets)
     return tf.logical_and(tf.greater_equal(hash_bucket, base),
                           tf.less(hash_bucket, limit))
Ejemplo n.º 5
0
    def encode(self, input_attrs):
        """Encode input_attrs to embeddings.

    Args:
      input_attrs: A list in the format of [continuous_attrs, categorical_attrs]

    Returns:
      Embeddings.
    """

        continuous_attrs = input_attrs[0]
        categorical_attrs = input_attrs[1]

        to_concats_cate = []
        if self._categorical_features:
            for idx, attr_name, max_num, _ in self._categorical_features:
                attr = categorical_attrs[:, idx]
                attr = tf.string_to_hash_bucket_fast(
                    attr,
                    max_num,
                    name=self._name + 'to_hash_bucket_%s' % (attr_name))

                to_concats_cate.append(
                    tf.nn.embedding_lookup(
                        self._emb_table[attr_name],
                        attr,
                        name=self._name + 'embedding_lookup_%s' % (attr_name)))

        to_concats_con = None
        continuous_feats_num = self._feature_num - len(
            self._categorical_features)
        if continuous_feats_num > 0:  # contains continuous features
            to_concats_con = tf.log(
                tf.reshape(continuous_attrs, [-1, continuous_feats_num]) + 2)

        with tf.variable_scope(self._name + 'attrs_encoding',
                               reuse=tf.AUTO_REUSE):
            raw_emb_cate = None
            if to_concats_cate:  # list is not empty
                raw_emb_cate = tf.concat(to_concats_cate,
                                         axis=-1,
                                         name="cate_concat")

            if to_concats_con is not None:
                raw_emb_con = to_concats_con
                if self._use_input_bn:
                    raw_emb_con = \
                      tf.layers.batch_normalization(raw_emb_con, training=True)

                if raw_emb_cate is not None:
                    raw_emb = \
                      tf.concat([raw_emb_cate, raw_emb_con],
                                axis=-1, name='con_cate_concat')
                else:
                    raw_emb = raw_emb_con
            else:
                print('no continuous feature to emb')
                raw_emb = raw_emb_cate

            if self._need_dense:
                raw_emb = \
                  tf.layers.dense(raw_emb, self._output_dim,
                                  activation=self._act, name='dense')

        return raw_emb