def __init__(self,
                 input_size,
                 batch_size,
                 data_generator_creator,
                 max_steps=None):
        super().__init__(input_size)

        self.batch_size = batch_size
        self.data_generator_creator = data_generator_creator
        self.steps_left = max_steps

        with tf.device("/cpu:0"):
            self.inputs = tf.placeholder(tf.float32,
                                         [batch_size, None, input_size],
                                         name='inputs')
            self.sequence_lengths = tf.placeholder(tf.int32, [batch_size],
                                                   name='sequence_lengths')
            self.labels = tf.sparse_placeholder(tf.int32, name='labels')

            self.queue = tf.FIFOQueue(dtypes=[tf.float32, tf.int32, tf.string],
                                      capacity=100)

            serialized_labels = tf.serialize_many_sparse(self.labels)
            self.enqueue_op = self.queue.enqueue(
                [self.inputs, self.sequence_lengths, serialized_labels])
Beispiel #2
0
    def __init__(self,
                 input_size,
                 batch_size,
                 data_generator_creator,
                 max_steps=None):

        super().__init__(input_size)
        self.batch_size = batch_size
        self.data_generator_creator = data_generator_creator
        self.steps_left = max_steps

        with tf.device("/cpu:0"):
            # Define input and label placeholders
            # inputs is of dimension [batch_size, max_time, input_size]
            self.inputs = tf.placeholder(tf.float32,
                                         [batch_size, None, input_size],
                                         name='inputs')
            self.sequence_lengths = tf.placeholder(tf.int32, [batch_size],
                                                   name='sequence_lengths')
            self.labels = tf.sparse_placeholder(tf.int32, name='labels')

            # Queue for inputs and labels
            self.queue = tf.FIFOQueue(dtypes=[tf.float32, tf.int32, tf.string],
                                      capacity=100)

            # queues do not support sparse tensors yet, we need to serialize...
            serialized_labels = tf.serialize_many_sparse(self.labels)

            self.enqueue_op = self.queue.enqueue(
                [self.inputs, self.sequence_lengths, serialized_labels])
  def testSerializeManySparse(self):
    sp_input = tf.SparseTensor(
        indices=tf.constant([[0, 1]], dtype=tf.int64),
        values=tf.constant([2], dtype=tf.int64),
        dense_shape=[1, 2])

    with self.cached_session():
      serialized_sp = tf.serialize_many_sparse(
          sp_input, 'serialize_name', tf.string)
      self.assertEqual((1, 3), serialized_sp.shape)
Beispiel #4
0
  def testSerializeManySparse(self):
    sp_input = tf.SparseTensor(
        indices=tf.constant([[0, 1]], dtype=tf.int64),
        values=tf.constant([2], dtype=tf.int64),
        dense_shape=[1, 2])

    with self.cached_session():
      serialized_sp = tf.serialize_many_sparse(
          sp_input, 'serialize_name', tf.string)
      self.assertEqual((1, 3), serialized_sp.shape)
Beispiel #5
0
    def benchmarkVeryLarge2DFloatSparseTensor(self):
        np.random.seed(127)
        num_elements = 10000
        batch_size = 64
        indices_batch = np.random.randint(batch_size,
                                          size=num_elements,
                                          dtype=np.int64)
        indices_value = np.arange(num_elements, dtype=np.int64)
        indices = np.asarray(sorted(zip(indices_batch, indices_value)),
                             dtype=np.int64)
        values = ["feature_value_for_embedding_lookup"] * num_elements
        shape = np.asarray([batch_size, num_elements], dtype=np.int64)
        with tf.Session() as sess:
            with tf.device("/cpu:0"):
                indices = tf.Variable(indices)
                values = tf.Variable(values)
                shape = tf.Variable(shape)
                st = tf.SparseTensor(indices, values, shape)

                st_handles = add_many_sparse_to_tensors_map(st)
                st_roundtrip = take_many_sparse_from_tensors_map(
                    sparse_map_op=st_handles.op, sparse_handles=st_handles)
                st_roundtrip_op = st_roundtrip.values.op

                st_serialized = tf.serialize_many_sparse(st)
                st_deserialized = tf.deserialize_many_sparse(
                    st_serialized, dtype=values.dtype)
                st_deserialized_op = st_deserialized.values.op

                tf.global_variables_initializer().run()

                st_roundtrip_values = sess.run(st_roundtrip)
                st_deserialized_values = sess.run(st_deserialized)
                np.testing.assert_equal(st_roundtrip_values.values,
                                        st_deserialized_values.values)
                np.testing.assert_equal(st_roundtrip_values.indices,
                                        st_deserialized_values.indices)
                np.testing.assert_equal(st_roundtrip_values.shape,
                                        st_deserialized_values.shape)

                self.run_op_benchmark(
                    sess,
                    st_roundtrip_op,
                    min_iters=2000,
                    name="benchmark_very_large_2d_float_st_tensor_maps")
                self.run_op_benchmark(
                    sess,
                    st_deserialized_op,
                    min_iters=2000,
                    name="benchmark_very_large_2d_float_st_serialization")
  def benchmarkVeryLarge2DFloatSparseTensor(self):
    np.random.seed(127)
    num_elements = 10000
    batch_size = 64
    indices_batch = np.random.randint(
        batch_size, size=num_elements, dtype=np.int64)
    indices_value = np.arange(num_elements, dtype=np.int64)
    indices = np.asarray(
        sorted(zip(indices_batch, indices_value)), dtype=np.int64)
    values = ["feature_value_for_embedding_lookup"] * num_elements
    shape = np.asarray([batch_size, num_elements], dtype=np.int64)
    with tf.Session() as sess:
      with tf.device("/cpu:0"):
        indices = tf.Variable(indices)
        values = tf.Variable(values)
        shape = tf.Variable(shape)
        st = tf.SparseTensor(indices, values, shape)

        st_handles = add_many_sparse_to_tensors_map(st)
        st_roundtrip = take_many_sparse_from_tensors_map(
            sparse_map_op=st_handles.op, sparse_handles=st_handles)
        st_roundtrip_op = st_roundtrip.values.op

        st_serialized = tf.serialize_many_sparse(st)
        st_deserialized = tf.deserialize_many_sparse(
            st_serialized, dtype=values.dtype)
        st_deserialized_op = st_deserialized.values.op

        tf.initialize_all_variables().run()

        st_roundtrip_values = sess.run(st_roundtrip)
        st_deserialized_values = sess.run(st_deserialized)
        np.testing.assert_equal(
            st_roundtrip_values.values, st_deserialized_values.values)
        np.testing.assert_equal(
            st_roundtrip_values.indices, st_deserialized_values.indices)
        np.testing.assert_equal(
            st_roundtrip_values.shape, st_deserialized_values.shape)

        self.run_op_benchmark(
            sess, st_roundtrip_op, min_iters=2000,
            name="benchmark_very_large_2d_float_st_tensor_maps")
        self.run_op_benchmark(
            sess, st_deserialized_op, min_iters=2000,
            name="benchmark_very_large_2d_float_st_serialization")
 def testSerializeManyDeserializeManyRoundTrip(self):
   with self.test_session(use_gpu=False) as sess:
     # N == 4 because shape_value == [4, 5]
     indices_value = np.array([[0, 0], [0, 1], [2, 0]], dtype=np.int64)
     values_value = np.array([b"a", b"b", b"c"])
     shape_value = np.array([4, 5], dtype=np.int64)
     sparse_tensor = self._SparseTensorPlaceholder(dtype=tf.string)
     serialized = tf.serialize_many_sparse(sparse_tensor)
     deserialized = tf.deserialize_many_sparse(serialized, dtype=tf.string)
     serialized_value, deserialized_value = sess.run(
         [serialized, deserialized],
         feed_dict={sparse_tensor.indices: indices_value,
                    sparse_tensor.values: values_value,
                    sparse_tensor.shape: shape_value})
     self.assertEqual(serialized_value.shape, (4, 3))
     self.assertAllEqual(deserialized_value.indices, indices_value)
     self.assertAllEqual(deserialized_value.values, values_value)
     self.assertAllEqual(deserialized_value.shape, shape_value)
 def testSerializeManyDeserializeManyRoundTrip(self):
     with self.test_session(use_gpu=False) as sess:
         # N == 4 because shape_value == [4, 5]
         indices_value = np.array([[0, 0], [0, 1], [2, 0]], dtype=np.int64)
         values_value = np.array([b"a", b"b", b"c"])
         shape_value = np.array([4, 5], dtype=np.int64)
         sparse_tensor = self._SparseTensorPlaceholder(dtype=tf.string)
         serialized = tf.serialize_many_sparse(sparse_tensor)
         deserialized = tf.deserialize_many_sparse(serialized, dtype=tf.string)
         serialized_value, deserialized_value = sess.run(
             [serialized, deserialized],
             feed_dict={sparse_tensor.indices: indices_value,
                        sparse_tensor.values: values_value,
                        sparse_tensor.shape: shape_value})
         self.assertEqual(serialized_value.shape, (4, 3))
         self.assertAllEqual(deserialized_value.indices, indices_value)
         self.assertAllEqual(deserialized_value.values, values_value)
         self.assertAllEqual(deserialized_value.shape, shape_value)