def __init__(self, capacity): global DATA_FORMAT self.names = [] self.placeholders = [] for name, (dtype, shape) in DATA_FORMAT.items(): if shape is None: self.placeholders.append( tf.sparse_placeholder(dtype, name='{}_placeholder'.format(name))) else: self.placeholders.append( tf.placeholder(dtype, shape=shape, name='{}_placeholder'.format(name))) self.names.append(name) tensor_list, self.sparse_info = _store_sparse_tensors( self.placeholders[:], False) shapes = _shapes([tensor_list], None, False) dtypes = _dtypes([tensor_list]) self.example_queue = tf.FIFOQueue(capacity, dtypes, shapes, names=self.names) self.enqueue_op = self.example_queue.enqueue( dict(zip(self.names, tensor_list))) self._test_inputs = None
def __init__(self, capacity, unsupervised=False): global DATA_FORMAT, DATA_KEY_ORDER global DATA_FORMAT_UNSUPERVISED, DATA_KEY_ORDER_UNSUPERVISED if unsupervised: self.data_format = DATA_FORMAT_UNSUPERVISED self.data_key_order = DATA_KEY_ORDER_UNSUPERVISED else: self.data_format = DATA_FORMAT self.data_key_order = DATA_KEY_ORDER self.names = [] self.placeholders = [] for name, (dtype, shape) in self.data_format.items(): if shape is None: self.placeholders.append( tf.sparse_placeholder(dtype, name='{}_placeholder'.format(name))) else: self.placeholders.append( tf.placeholder(dtype, shape=shape, name='{}_placeholder'.format(name))) self.names.append(name) tensor_list, self.sparse_info = _store_sparse_tensors( self.placeholders[:], False) shapes = _shapes([tensor_list], None, False) dtypes = _dtypes([tensor_list]) self.example_queue = tf.FIFOQueue(capacity, dtypes, shapes, names=self.names) self.enqueue_op = self.example_queue.enqueue( dict(zip(self.names, tensor_list))) self._test_inputs = None
def _custom_shuffle_batch(tensors, batch_size, capacity, min_after_dequeue, keep_input, num_threads=1, seed=None, enqueue_many=False, shapes=None, allow_smaller_final_batch=False, shared_name=None, name=None, shuffle=False): """Helper function for `shuffle_batch` and `maybe_shuffle_batch`.""" if context.executing_eagerly(): raise ValueError( "Input pipelines based on Queues are not supported when eager execution" " is enabled. Please use tf.data to ingest data into your model" " instead.") tensor_list = tf_input._as_tensor_list(tensors) with ops.name_scope(name, "shuffle_batch", list(tensor_list) + [keep_input]) as name: if capacity <= min_after_dequeue: raise ValueError( "capacity %d must be bigger than min_after_dequeue %d." % (capacity, min_after_dequeue)) tensor_list = tf_input._validate(tensor_list) keep_input = tf_input._validate_keep_input(keep_input, enqueue_many) tensor_list, sparse_info = tf_input._store_sparse_tensors( tensor_list, enqueue_many, keep_input) types = tf_input._dtypes([tensor_list]) shapes = tf_input._shapes([tensor_list], shapes, enqueue_many) ########################################################################################### if shuffle: queue = data_flow_ops.RandomShuffleQueue( capacity=capacity, min_after_dequeue=min_after_dequeue, seed=seed, dtypes=types, shapes=shapes, shared_name=shared_name) else: # Remove shuffle property queue = data_flow_ops.FIFOQueue(capacity=capacity, dtypes=types, shapes=shapes, shared_name=shared_name) ########################################################################################### tf_input._enqueue(queue, tensor_list, num_threads, enqueue_many, keep_input) full = (math_ops.to_float( math_ops.maximum(0, queue.size() - min_after_dequeue)) * (1. / (capacity - min_after_dequeue))) summary_name = ("fraction_over_%d_of_%d_full" % (min_after_dequeue, capacity - min_after_dequeue)) summary.scalar(summary_name, full) if allow_smaller_final_batch: dequeued = queue.dequeue_up_to(batch_size, name=name) else: dequeued = queue.dequeue_many(batch_size, name=name) dequeued = tf_input._restore_sparse_tensors(dequeued, sparse_info) return tf_input._as_original_type(tensors, dequeued)
def tf_shuffle_batch_join(tensors_list, batch_size, capacity, do_dequeue, min_after_dequeue, seed=None, enqueue_many=False, shapes=None, allow_smaller_final_batch=False, shared_name=None, name=None): """ Custom version of tf.train.tf_shuffle_batch which correctly queues and dequeues data from the given pipeline depending on a tf.cond switch. :param tensors_list: Data pipeline tensors. :type tensors_list: List of Dict :param batch_size: Train and test batch size. :type batch_size: Int :param capacity: The maximum number of elements in the queue. :type capacity: Int :param do_dequeue: Switch for dequeuing :type do_dequeue: tf.Bool :param min_after_dequeue: Minimum number elements in the queue after a dequeue. :type min_after_dequeue: Int :param seed: Seed for the random shuffling within the queue. :type seed: Int :param enqueue_many: Whether each tensor in tensor_list is a single example. :type enqueue_many: Bool :param shapes: The shapes for each example. Defaults to the inferred shapes for tensor_list. :type shapes: List :param allow_smaller_final_batch: Allow the final batch to be smaller if there are insufficient items left in the queue. :type allow_smaller_final_batch: Bool :param shared_name: If set, this queue will be shared under the given name across multiple sessions. :type shared_name: String :param name: A name for the operations. :type name: String :returns: A list or dictionary of tensors with the types as tensors_list :rtype: List or Dict """ tensor_list_list = _as_tensor_list_list(tensors_list) with ops.name_scope(name, "shuffle_batch_join", _flatten(tensor_list_list)) as name: tensor_list_list = _validate_join(tensor_list_list) tensor_list_list, sparse_info = _store_sparse_tensors_join( tensor_list_list, enqueue_many) types = _dtypes(tensor_list_list) shapes = _shapes(tensor_list_list, shapes, enqueue_many) queue = data_flow_ops.RandomShuffleQueue( capacity=capacity, min_after_dequeue=min_after_dequeue, seed=seed, dtypes=types, shapes=shapes, shared_name=shared_name) _enqueue_join(queue, tensor_list_list, enqueue_many) full = (math_ops.cast( math_ops.maximum(0, queue.size() - min_after_dequeue), dtypes.float32) * (1. / (capacity - min_after_dequeue))) summary_name = ( "queue/%sfraction_over_%d_of_%d_full" % (name, min_after_dequeue, capacity - min_after_dequeue)) summary.scalar(summary_name, full) def do_dequeue_func(): if allow_smaller_final_batch: dequeued = queue.dequeue_up_to(batch_size) else: dequeued = queue.dequeue_many(batch_size, name=name) dequeued = _restore_sparse_tensors(dequeued, sparse_info) return _as_original_type(tensors_list[0], dequeued) def do_not_dequeue_func(): # dequeued = queue.dequeue_up_to(batch_size) # queue.enqueue_many(dequeued) if allow_smaller_final_batch: queue_size = queue.size() batch_size_tensor = tf.constant(batch_size) dequeued_batch_size = tf.select( tf.less(queue_size, batch_size_tensor), queue_size, batch_size_tensor) # return [tf.ones() for t in tensors_list[0]] else: return [ tf.ones(shape=[batch_size] + t.get_shape().as_list()) for t in tensors_list[0] ] dequeued = tf.cond(do_dequeue, do_dequeue_func, do_not_dequeue_func) return dequeued