def test_table_roundtrip(self): export_path = os.path.join(tempfile.mkdtemp(), 'export') with tf.Graph().as_default(): with tf.Session().as_default() as session: input_string = tf.placeholder(tf.string) # Map string through a table, in this case based on a constant tensor. table = lookup.string_to_index_table_from_tensor( tf.constant(['cat', 'dog', 'giraffe'])) output = table.lookup(input_string) inputs = {'input': input_string} outputs = {'output': output} saved_transform_io.write_saved_transform_from_session( session, inputs, outputs, export_path) with tf.Graph().as_default(): with tf.Session().as_default() as session: # Using a computed input gives confidence that the graphs are fused. input_string = tf.constant('dog') inputs = {'input': input_string} outputs = saved_transform_io.apply_saved_transform( export_path, inputs) session.run(tf.tables_initializer()) result = session.run(outputs['output']) self.assertEqual(1, result)
def input_fn(): start = random_ops.random_uniform((), minval=0, maxval=sequence_length, dtype=dtypes.int32, seed=seed) # Concatenate lyrics_list so inputs and labels wrap when start > 0. lyrics_list_concat = lyrics_list + lyrics_list inputs_dense = array_ops.slice(lyrics_list_concat, [start], [sequence_length]) indices = array_ops.constant([[i, 0] for i in range(sequence_length)], dtype=dtypes.int64) dense_shape = [sequence_length, 1] inputs = sparse_tensor.SparseTensor(indices=indices, values=inputs_dense, dense_shape=dense_shape) table = lookup.string_to_index_table_from_tensor( mapping=list(vocab), default_value=-1, name='lookup') labels = table.lookup( array_ops.slice(lyrics_list_concat, [start + 1], [sequence_length])) input_key = string_ops.string_join([ 'key_', string_ops.as_string( random_ops.random_uniform((), minval=0, maxval=10000000, dtype=dtypes.int32, seed=seed)) ]) return { 'lyrics': inputs, input_key_column_name: input_key }, labels
def get_lookup_table_from_tensor(tensor, oov_buckets, device='/cpu:0', name='lookup_table'): with tf.device(device): return lookup.string_to_index_table_from_tensor( tensor, num_oov_buckets=oov_buckets, default_value=-1, name=name)
def input_fn(): start = random_ops.random_uniform( (), minval=0, maxval=sequence_length, dtype=dtypes.int32, seed=seed) # Concatenate lyrics_list so inputs and labels wrap when start > 0. lyrics_list_concat = lyrics_list + lyrics_list inputs_dense = array_ops.slice(lyrics_list_concat, [start], [sequence_length]) indices = array_ops.constant( [[i, 0] for i in range(sequence_length)], dtype=dtypes.int64) dense_shape = [sequence_length, 1] inputs = sparse_tensor.SparseTensor( indices=indices, values=inputs_dense, dense_shape=dense_shape) table = lookup.string_to_index_table_from_tensor( mapping=list(vocab), default_value=-1, name='lookup') labels = table.lookup( array_ops.slice(lyrics_list_concat, [start + 1], [sequence_length])) input_key = string_ops.string_join([ 'key_', string_ops.as_string( random_ops.random_uniform( (), minval=0, maxval=10000000, dtype=dtypes.int32, seed=seed)) ]) return {'lyrics': inputs, input_key_column_name: input_key}, labels
def _bow(x): split = tf.string_split(x) table = lookup.string_to_index_table_from_tensor( vocab, num_oov_buckets=0, default_value=len(vocab)) int_text = table.lookup(split) term_count_per_doc = get_term_count_per_doc(int_text, len(vocab) + 1) bow_weights = tf.to_float(term_count_per_doc.values) bow_ids = term_count_per_doc.indices[:, 1] indices = tf.stack([term_count_per_doc.indices[:, 0], segment_indices(term_count_per_doc.indices[:, 0], int_text.dense_shape[0])], 1) dense_shape = term_count_per_doc.dense_shape bow_st_weights = tf.SparseTensor(indices=indices, values=bow_weights, dense_shape=dense_shape) bow_st_ids = tf.SparseTensor(indices=indices, values=bow_ids, dense_shape=dense_shape) if part == 'ids': return bow_st_ids else: return bow_st_weights
def _map_to_int(x, vocab): """Maps string tensor into indexes using vocab. It uses a dummy vocab when the input vocab is empty. Args: x : a Tensor/SparseTensor of string. vocab : a Tensor/SparseTensor containing unique string values within x. Returns: a Tensor/SparseTensor of indexes (int) of the same shape as x. """ def _fix_vocab_if_needed(vocab): num_to_add = 1 - tf.minimum(tf.size(vocab), 1) return tf.concat([ vocab, tf.fill(tf.reshape(num_to_add, (1, )), '__dummy_value__index_zero__') ], 0) table = lookup.string_to_index_table_from_tensor( _fix_vocab_if_needed(vocab), num_oov_buckets=num_oov_buckets, default_value=default_value) return table.lookup(x)
def _map_to_int(x): """Maps string tensor into indexes using vocab. Args: x : a Tensor/SparseTensor of string. Returns: a Tensor/SparseTensor of indexes (int) of the same shape as x. """ table = lookup.string_to_index_table_from_tensor( vocab, default_value=len(vocab)) return table.lookup(x)
def _tfidf(x): split = tf.string_split(x) table = lookup.string_to_index_table_from_tensor( vocab, num_oov_buckets=0, default_value=len(vocab)) int_text = table.lookup(split) term_count_per_doc = get_term_count_per_doc(int_text, len(vocab) + 1) # Add one to the reduced term freqnencies to avoid dividing by zero. example_count_with_oov = tf.to_float(tf.concat([example_count, [0]], 0)) idf = tf.log(tf.to_float(corpus_size) / (1.0 + example_count_with_oov)) dense_doc_sizes = tf.to_float(tf.sparse_reduce_sum(tf.SparseTensor( indices=int_text.indices, values=tf.ones_like(int_text.values), dense_shape=int_text.dense_shape), 1)) idf_times_term_count = tf.multiply( tf.gather(idf, term_count_per_doc.indices[:, 1]), tf.to_float(term_count_per_doc.values)) tfidf_weights = ( idf_times_term_count / tf.gather(dense_doc_sizes, term_count_per_doc.indices[:, 0])) tfidf_ids = term_count_per_doc.indices[:, 1] indices = tf.stack([term_count_per_doc.indices[:, 0], segment_indices(term_count_per_doc.indices[:, 0], int_text.dense_shape[0])], 1) dense_shape = term_count_per_doc.dense_shape tfidf_st_weights = tf.SparseTensor(indices=indices, values=tfidf_weights, dense_shape=dense_shape) tfidf_st_ids = tf.SparseTensor(indices=indices, values=tfidf_ids, dense_shape=dense_shape) if part == 'ids': return tfidf_st_ids else: return tfidf_st_weights
def convert_label(label): table = lookup.string_to_index_table_from_tensor(['>50K', '<=50K']) return table.lookup(label)
def map_to_int(x, vocab): table = lookup.string_to_index_table_from_tensor( vocab, default_value=default_value) return table.lookup(x)
def preprocessing_fn(inputs): table = lookup.string_to_index_table_from_tensor(['a', 'b']) integerized = table.lookup(inputs['x']) return {'integerized': integerized}
def _str_to_int(x): table = lookup.string_to_index_table_from_tensor( vocab, num_oov_buckets=0, default_value=default_value) return table.lookup(x)
def _apply_vocab(x, vocab): table = lookup.string_to_index_table_from_tensor( vocab, num_oov_buckets=num_oov_buckets, default_value=default_value) return table.lookup(x)