def test_table_roundtrip(self):
        export_path = os.path.join(tempfile.mkdtemp(), 'export')

        with tf.Graph().as_default():
            with tf.Session().as_default() as session:
                input_string = tf.placeholder(tf.string)
                # Map string through a table, in this case based on a constant tensor.
                table = lookup.string_to_index_table_from_tensor(
                    tf.constant(['cat', 'dog', 'giraffe']))
                output = table.lookup(input_string)
                inputs = {'input': input_string}
                outputs = {'output': output}
                saved_transform_io.write_saved_transform_from_session(
                    session, inputs, outputs, export_path)

        with tf.Graph().as_default():
            with tf.Session().as_default() as session:
                # Using a computed input gives confidence that the graphs are fused.
                input_string = tf.constant('dog')
                inputs = {'input': input_string}
                outputs = saved_transform_io.apply_saved_transform(
                    export_path, inputs)
                session.run(tf.tables_initializer())
                result = session.run(outputs['output'])
                self.assertEqual(1, result)
 def input_fn():
     start = random_ops.random_uniform((),
                                       minval=0,
                                       maxval=sequence_length,
                                       dtype=dtypes.int32,
                                       seed=seed)
     # Concatenate lyrics_list so inputs and labels wrap when start > 0.
     lyrics_list_concat = lyrics_list + lyrics_list
     inputs_dense = array_ops.slice(lyrics_list_concat, [start],
                                    [sequence_length])
     indices = array_ops.constant([[i, 0]
                                   for i in range(sequence_length)],
                                  dtype=dtypes.int64)
     dense_shape = [sequence_length, 1]
     inputs = sparse_tensor.SparseTensor(indices=indices,
                                         values=inputs_dense,
                                         dense_shape=dense_shape)
     table = lookup.string_to_index_table_from_tensor(
         mapping=list(vocab), default_value=-1, name='lookup')
     labels = table.lookup(
         array_ops.slice(lyrics_list_concat, [start + 1],
                         [sequence_length]))
     input_key = string_ops.string_join([
         'key_',
         string_ops.as_string(
             random_ops.random_uniform((),
                                       minval=0,
                                       maxval=10000000,
                                       dtype=dtypes.int32,
                                       seed=seed))
     ])
     return {
         'lyrics': inputs,
         input_key_column_name: input_key
     }, labels
def get_lookup_table_from_tensor(tensor,
                                 oov_buckets,
                                 device='/cpu:0',
                                 name='lookup_table'):
    with tf.device(device):
        return lookup.string_to_index_table_from_tensor(
            tensor, num_oov_buckets=oov_buckets, default_value=-1, name=name)
 def input_fn():
   start = random_ops.random_uniform(
       (), minval=0, maxval=sequence_length, dtype=dtypes.int32, seed=seed)
   # Concatenate lyrics_list so inputs and labels wrap when start > 0.
   lyrics_list_concat = lyrics_list + lyrics_list
   inputs_dense = array_ops.slice(lyrics_list_concat, [start],
                                  [sequence_length])
   indices = array_ops.constant(
       [[i, 0] for i in range(sequence_length)], dtype=dtypes.int64)
   dense_shape = [sequence_length, 1]
   inputs = sparse_tensor.SparseTensor(
       indices=indices, values=inputs_dense, dense_shape=dense_shape)
   table = lookup.string_to_index_table_from_tensor(
       mapping=list(vocab), default_value=-1, name='lookup')
   labels = table.lookup(
       array_ops.slice(lyrics_list_concat, [start + 1], [sequence_length]))
   input_key = string_ops.string_join([
       'key_', string_ops.as_string(
           random_ops.random_uniform(
               (),
               minval=0,
               maxval=10000000,
               dtype=dtypes.int32,
               seed=seed))
   ])
   return {'lyrics': inputs, input_key_column_name: input_key}, labels
Example #5
0
  def _bow(x):
    split = tf.string_split(x)
    table = lookup.string_to_index_table_from_tensor(
        vocab, num_oov_buckets=0,
        default_value=len(vocab))
    int_text = table.lookup(split)

    term_count_per_doc = get_term_count_per_doc(int_text, len(vocab) + 1)

    bow_weights = tf.to_float(term_count_per_doc.values)
    bow_ids = term_count_per_doc.indices[:, 1]

    indices = tf.stack([term_count_per_doc.indices[:, 0],
                        segment_indices(term_count_per_doc.indices[:, 0],
                                        int_text.dense_shape[0])],
                       1)
    dense_shape = term_count_per_doc.dense_shape

    bow_st_weights = tf.SparseTensor(indices=indices, values=bow_weights, dense_shape=dense_shape)
    bow_st_ids = tf.SparseTensor(indices=indices, values=bow_ids, dense_shape=dense_shape)

    if part == 'ids':
      return bow_st_ids
    else:
      return bow_st_weights
Example #6
0
    def _map_to_int(x, vocab):
        """Maps string tensor into indexes using vocab.

    It uses a dummy vocab when the input vocab is empty.

    Args:
      x : a Tensor/SparseTensor of string.
      vocab : a Tensor/SparseTensor containing unique string values within x.

    Returns:
      a Tensor/SparseTensor of indexes (int) of the same shape as x.
    """
        def _fix_vocab_if_needed(vocab):
            num_to_add = 1 - tf.minimum(tf.size(vocab), 1)
            return tf.concat([
                vocab,
                tf.fill(tf.reshape(num_to_add,
                                   (1, )), '__dummy_value__index_zero__')
            ], 0)

        table = lookup.string_to_index_table_from_tensor(
            _fix_vocab_if_needed(vocab),
            num_oov_buckets=num_oov_buckets,
            default_value=default_value)
        return table.lookup(x)
    def _map_to_int(x):
        """Maps string tensor into indexes using vocab.

    Args:
      x : a Tensor/SparseTensor of string.
    Returns:
      a Tensor/SparseTensor of indexes (int) of the same shape as x.
    """
        table = lookup.string_to_index_table_from_tensor(
            vocab, default_value=len(vocab))
        return table.lookup(x)
Example #8
0
  def _map_to_int(x):
    """Maps string tensor into indexes using vocab.

    Args:
      x : a Tensor/SparseTensor of string.
    Returns:
      a Tensor/SparseTensor of indexes (int) of the same shape as x.
    """
    table = lookup.string_to_index_table_from_tensor(
        vocab,
        default_value=len(vocab))
    return table.lookup(x)
Example #9
0
  def _tfidf(x):
    split = tf.string_split(x)
    table = lookup.string_to_index_table_from_tensor(
        vocab, num_oov_buckets=0,
        default_value=len(vocab))
    int_text = table.lookup(split)

    term_count_per_doc = get_term_count_per_doc(int_text, len(vocab) + 1)

    # Add one to the reduced term freqnencies to avoid dividing by zero.
    example_count_with_oov = tf.to_float(tf.concat([example_count, [0]], 0))
    idf = tf.log(tf.to_float(corpus_size) / (1.0 + example_count_with_oov))

    dense_doc_sizes = tf.to_float(tf.sparse_reduce_sum(tf.SparseTensor(
        indices=int_text.indices,
        values=tf.ones_like(int_text.values),
        dense_shape=int_text.dense_shape), 1))

    idf_times_term_count = tf.multiply(
        tf.gather(idf, term_count_per_doc.indices[:, 1]),
        tf.to_float(term_count_per_doc.values))
    tfidf_weights = (
        idf_times_term_count / tf.gather(dense_doc_sizes,
                                         term_count_per_doc.indices[:, 0]))

    tfidf_ids = term_count_per_doc.indices[:, 1]

    indices = tf.stack([term_count_per_doc.indices[:, 0],
                        segment_indices(term_count_per_doc.indices[:, 0],
                                        int_text.dense_shape[0])],
                       1)
    dense_shape = term_count_per_doc.dense_shape

    tfidf_st_weights = tf.SparseTensor(indices=indices,
                                       values=tfidf_weights,
                                       dense_shape=dense_shape)
    tfidf_st_ids = tf.SparseTensor(indices=indices,
                                   values=tfidf_ids,
                                   dense_shape=dense_shape)

    if part == 'ids':
      return tfidf_st_ids
    else:
      return tfidf_st_weights
Example #10
0
 def convert_label(label):
     table = lookup.string_to_index_table_from_tensor(['>50K', '<=50K'])
     return table.lookup(label)
Example #11
0
 def map_to_int(x, vocab):
     table = lookup.string_to_index_table_from_tensor(
         vocab, default_value=default_value)
     return table.lookup(x)
Example #12
0
 def preprocessing_fn(inputs):
     table = lookup.string_to_index_table_from_tensor(['a', 'b'])
     integerized = table.lookup(inputs['x'])
     return {'integerized': integerized}
Example #13
0
 def _str_to_int(x):
   table = lookup.string_to_index_table_from_tensor(
       vocab, num_oov_buckets=0,
       default_value=default_value)
   return table.lookup(x)
Example #14
0
 def _apply_vocab(x, vocab):
     table = lookup.string_to_index_table_from_tensor(
         vocab,
         num_oov_buckets=num_oov_buckets,
         default_value=default_value)
     return table.lookup(x)