def __init__(self, audio_dir, coord, sample_rate, gc_enabled, receptive_field, sample_size=None, silence_threshold=None, queue_size=32): self.audio_dir = audio_dir self.sample_rate = sample_rate self.coord = coord self.sample_size = sample_size self.receptive_field = receptive_field self.silence_threshold = silence_threshold self.gc_enabled = gc_enabled self.threads = [] self.sample_placeholder = tf.placeholder(dtype=tf.float32, shape=None) self.queue = tf.PaddingFIFOQueue(queue_size, ['float32'], shapes=[(None, 1)]) self.enqueue = self.queue.enqueue([self.sample_placeholder]) if self.gc_enabled: self.id_placeholder = tf.placeholder(dtype=tf.int32, shape=()) self.gc_queue = tf.PaddingFIFOQueue(queue_size, ['int32'], shapes=[()]) self.gc_enqueue = self.gc_queue.enqueue([self.id_placeholder]) # TODO Find a better way to check this. # Checking inside the AudioReader's thread makes it hard to terminate # the execution of the script, so we do it in the constructor for now. files = find_files(audio_dir) if not files: raise ValueError("No audio files found in '{}'.".format(audio_dir)) if self.gc_enabled and not_all_have_id(files): raise ValueError("Global conditioning is enabled, but file names " "do not conform to pattern having id.") # Determine the number of mutually-exclusive categories we will # accomodate in our embedding table. if self.gc_enabled: _, self.gc_category_cardinality = get_category_cardinality(files) # Add one to the largest index to get the number of categories, # since tf.nn.embedding_lookup expects zero-indexing. This # means one or more at the bottom correspond to unused entries # in the embedding lookup table. But that's a small waste of memory # to keep the code simpler, and preserves correspondance between # the id one specifies when generating, and the ids in the # file names. self.gc_category_cardinality += 1 print("Detected --gc_cardinality={}".format( self.gc_category_cardinality)) else: self.gc_category_cardinality = None
def __init__(self, metadata_filename, coord, receptive_field, gc_enable=False, sample_size=None, queue_size=128, npy_dataroot=None, num_mels=None, speaker_id=None): self.metadata_filename = metadata_filename self.coord = coord self.receptive_field = receptive_field self.sample_size = sample_size self.queue_size = queue_size self.gc_enable = gc_enable self.npy_dataroot = npy_dataroot self.num_mels = num_mels self.speaker_id = speaker_id self.threads = [] self._placeholders = [ tf.placeholder(tf.float32, shape=None), tf.placeholder(tf.float32, shape=None) ] if self.gc_enable: self._placeholders.append(tf.placeholder(tf.int32, shape=None)) self.queue = tf.PaddingFIFOQueue( self.queue_size, [tf.float32, tf.float32, tf.int32], shapes=[(None, 1), (None, self.num_mels), ()], name='input_queue') elif hparams.triphone: self.queue = tf.PaddingFIFOQueue(self.queue_size, [tf.float32, tf.float32], shapes=[(None, 1), (None, self.num_mels * 3) ], name='input_queue') else: self.queue = tf.PaddingFIFOQueue(self.queue_size, [tf.float32, tf.float32], shapes=[(None, 1), (None, self.num_mels)], name='input_queue') self.enqueue = self.queue.enqueue(self._placeholders)
def __init__(self, config): self.config = config input_props = self.add_model_specific_valuables(config) self.queue_input_tensors = [ tf.placeholder(dtype, shape) for dtype, shape in input_props ] dtypes, shapes = zip(*input_props) queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue() self.predictions, self.loss = self.get_predictions_and_loss( self.input_tensors) self.global_step = tf.Variable(0, name="global_step", trainable=False) self.reset_global_step = tf.assign(self.global_step, 0) learning_rate = tf.train.exponential_decay( self.config["learning_rate"], self.global_step, self.config["decay_frequency"], self.config["decay_rate"], staircase=True) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) gradients, _ = tf.clip_by_global_norm(gradients, self.config["max_gradient_norm"]) optimizers = { "adam": tf.train.AdamOptimizer, "sgd": tf.train.GradientDescentOptimizer } optimizer = optimizers[self.config["optimizer"]](learning_rate) self.train_op = optimizer.apply_gradients(zip(gradients, trainable_params), global_step=self.global_step)
def __init__(self, config): self.config = config self.embedding_info = [(emb["size"], emb["lowercase"]) for emb in config["embeddings"]] self.embedding_size = sum(size for size, _ in self.embedding_info) self.char_embedding_size = config["char_embedding_size"] self.char_dict = util.load_char_dict(config["char_vocab_path"]) self.embedding_dicts = [ util.load_embedding_dict(emb["path"], emb["size"], emb["format"]) for emb in config["embeddings"] ] self.max_mention_width = config["max_mention_width"] self.max_context_width = config["max_context_width"] self.genres = {g: i for i, g in enumerate(config["genres"])} self.eval_data = None # Load eval data lazily. input_props = [] input_props.append( (tf.float32, [None, None, self.embedding_size])) # Text embeddings. input_props.append((tf.int32, [None, None, None])) # Character indices. input_props.append((tf.int32, [None])) # Text lengths. input_props.append((tf.int32, [None])) # Speaker IDs. input_props.append((tf.int32, [])) # Genre. input_props.append((tf.bool, [])) # Is training. input_props.append((tf.int32, [None])) # Gold starts. input_props.append((tf.int32, [None])) # Gold ends. input_props.append((tf.int32, [None])) # Cluster ids. self.queue_input_tensors = [ tf.placeholder(dtype, shape) for dtype, shape in input_props ] dtypes, shapes = zip(*input_props) queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue() self.predictions, self.loss = self.get_predictions_and_loss( *self.input_tensors) self.global_step = tf.Variable(0, name="global_step", trainable=False) self.reset_global_step = tf.assign(self.global_step, 0) learning_rate = tf.train.exponential_decay( self.config["learning_rate"], self.global_step, self.config["decay_frequency"], self.config["decay_rate"], staircase=True) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) gradients, _ = tf.clip_by_global_norm(gradients, self.config["max_gradient_norm"]) optimizers = { "adam": tf.train.AdamOptimizer, "sgd": tf.train.GradientDescentOptimizer } optimizer = optimizers[self.config["optimizer"]](learning_rate) self.train_op = optimizer.apply_gradients(zip(gradients, trainable_params), global_step=self.global_step)
def __init__(self, data_dir, testdata_dir, coord, receptive_field, prediction_lag=360, quantization_thresholds=[0.01, 0.025], sample_size=None, queue_size=10): self.data_dir = data_dir self.testdata_dir = testdata_dir self.coord = coord self.sample_size = sample_size self.receptive_field = receptive_field self.prediction_lag = prediction_lag self.quantization_thresholds = quantization_thresholds self.threads = [] self.sample_placeholder = tf.placeholder(dtype=tf.float32, shape=None) self.queue = tf.PaddingFIFOQueue(queue_size, ['float32'], shapes=[(None, self.num_features() + 1)]) self.enqueue = self.queue.enqueue([self.sample_placeholder]) self.cache = [None] * 10 self.test_cache = [None] * 10 self.category_cardinalities = [0] * (self.num_return_categories()) dirs = find_dirs(data_dir) if not dirs: raise ValueError("No directories found in '{}'.".format(data_dir)) self.compute_statistics(dirs)
def testBlockingEnqueueBeforeClose(self): with self.test_session() as sess: q = tf.PaddingFIFOQueue(4, tf.float32, ((),)) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) blocking_enqueue_op = q.enqueue((50.0,)) close_op = q.close() dequeued_t = q.dequeue() enqueue_op.run() def blocking_enqueue(): # Expect the operation to succeed once the dequeue op runs. sess.run(blocking_enqueue_op) enqueue_thread = self.checkedThread(target=blocking_enqueue) enqueue_thread.start() # The close_op should run after the blocking_enqueue_op has blocked. # TODO(mrry): Figure out how to do this without sleeping. time.sleep(0.1) def close(): sess.run(close_op) close_thread = self.checkedThread(target=close) close_thread.start() # The dequeue will unblock both threads. self.assertEqual(10.0, dequeued_t.eval()) enqueue_thread.join() close_thread.join() for elem in [20.0, 30.0, 40.0, 50.0]: self.assertEqual(elem, dequeued_t.eval()) self.assertEqual(0, q.size().eval())
def testBlockingDequeueFromClosedQueue(self): with self.test_session() as sess: q = tf.PaddingFIFOQueue(10, tf.float32, ((),)) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) close_op = q.close() dequeued_t = q.dequeue() enqueue_op.run() def dequeue(): for elem in elems: self.assertEqual([elem], sess.run(dequeued_t)) # Expect the operation to fail due to the queue being closed. with self.assertRaisesRegexp(tf.errors.OutOfRangeError, "is closed and has insufficient"): sess.run(dequeued_t) dequeue_thread = self.checkedThread(target=dequeue) dequeue_thread.start() # The close_op should run after the dequeue_thread has blocked. # TODO(mrry): Figure out how to do this without sleeping. time.sleep(0.1) close_op.run() dequeue_thread.join()
def testMixtureOfDequeueAndDequeueMany(self): with self.test_session() as sess: q = tf.PaddingFIFOQueue(10, tf.int32, shapes=((),)) enqueue_op = q.enqueue_many((np.arange(250, dtype=np.int32),)) dequeued_t = q.dequeue() count_placeholder = tf.placeholder(tf.int32, shape=()) dequeuemany_t = q.dequeue_many(count_placeholder) def enqueue(): sess.run(enqueue_op) enqueue_thread = self.checkedThread(target=enqueue) enqueue_thread.start() elements_dequeued = 0 while elements_dequeued < 250: # With equal probability, run Dequeue or dequeue_many. if random.random() > 0.5: self.assertEqual(elements_dequeued, dequeued_t.eval()) elements_dequeued += 1 else: count = random.randint(0, min(20, 250 - elements_dequeued)) expected_range = np.arange(elements_dequeued, elements_dequeued + count, dtype=np.int32) self.assertAllEqual( expected_range, dequeuemany_t.eval({count_placeholder: count})) elements_dequeued += count q.close().run() enqueue_thread.join() self.assertEqual(0, q.size().eval())
def testParallelEnqueueAndDequeue(self): with self.test_session() as sess: q = tf.PaddingFIFOQueue(50, tf.float32, shapes=((),)) initial_elements = [10.0] * 49 q.enqueue_many((initial_elements,)).run() enqueue_op = q.enqueue((20.0,)) dequeued_t = q.dequeue() def enqueue(): for _ in xrange(100): sess.run(enqueue_op) def dequeue(): for _ in xrange(100): self.assertTrue(sess.run(dequeued_t) in (10.0, 20.0)) enqueue_threads = [self.checkedThread(target=enqueue) for _ in range(10)] dequeue_threads = [self.checkedThread(target=dequeue) for _ in range(10)] for enqueue_thread in enqueue_threads: enqueue_thread.start() for dequeue_thread in dequeue_threads: dequeue_thread.start() for enqueue_thread in enqueue_threads: enqueue_thread.join() for dequeue_thread in dequeue_threads: dequeue_thread.join() # Dequeue the initial count of elements to clean up. cleanup_elems = q.dequeue_many(49).eval() for elem in cleanup_elems: self.assertTrue(elem in (10.0, 20.0))
def prefetch(tensor_dict, capacity): """ creates a prefetch queue for tensors. Create a FIFO queue to asynchronously enqueue tensor_dicts and returns a dequeue op that evaluates to a tensor_dict. This function is useful in prefetching preprocessed tensors so that the data is readily available for consumers Example input pipeline when you don't need batching: ---------------------------------------------------- key, string_tensor = slim.parallel_reader.parallel_read(...) tensor_dict = decoder.decode(string_tensor) tensor_dict = preprocessor.preprocess(tensor_dict, ...) prefetch_queue = prefetcher.prefetch(tensor_dict, capacity=20) tensor_dict = prefetch_queue.dequeue() outputs = Model(tensor_dict) ---------------------------------------------------- For input pipelines with batching, refer to core/batcher.py Args: tensor_dict: a dictionary of tensors to prefetch. capacity: the size of the prefetch queue. Returns: a FIFO prefetcher queue """ names = list(tensor_dict.keys()) dtypes = [t.dtype for t in tensor_dict.values()] shapes = [t.get_shape() for t in tensor_dict.values()] prefetch_queue = tf.PaddingFIFOQueue(capacity, dtypes=dtypes, shapes=shapes, names=names, name='prefetch_queue') enqueue_op = prefetch_queue.enqueue(tensor_dict) tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner(prefetch_queue, [enqueue_op])) tf.summary.scalar('queue/%s/fraction_of_%d_full' % (prefetch_queue.name, capacity), tf.to_float(prefetch_queue.size()) * (1. / capacity)) return prefetch_queue
def prefetch(self, tensor_dict, capacity): """Creates a prefetch queue for tensors. Creates a FIFO queue to asynchronously enqueue tensor_dicts and returns a dequeue op that evaluates to a tensor_dict. This function is useful in prefetching preprocessed tensors so that the data is readily available for consumers. Args: tensor_dict: a dictionary of tensors to prefetch. capacity: the size of the prefetch queue. Returns: a FIFO prefetcher queue """ names = list(tensor_dict.keys()) dtypes = [t.dtype for t in tensor_dict.values()] shapes = [t.get_shape() for t in tensor_dict.values()] prefetch_queue = tf.PaddingFIFOQueue(capacity, dtypes=dtypes, shapes=shapes, names=names, name='prefetch_queue') enqueue_op = prefetch_queue.enqueue(tensor_dict) tf.train.queue_runner.add_queue_runner( tf.train.queue_runner.QueueRunner(prefetch_queue, [enqueue_op])) return prefetch_queue
def __init__(self, audio_dir: str, coord: tf.train.Coordinator, sample_rate: int, audio_patterns=None, sample_size=2 ** 16, silence_threshold=0.3, queue_size=32, enqueue_num_per_piece=2, ): if audio_patterns is None: audio_patterns = ['*.ogg'] self.audio_dir = audio_dir self.coord = coord self.audio_patterns = audio_patterns self.sample_rate = sample_rate self.sample_size = sample_size self.queue_size = queue_size self.sample_placeholder = tf.placeholder( dtype=tf.float32, shape=(sample_size,), name='sample', ) self.queue = tf.PaddingFIFOQueue( queue_size, [tf.float32], shapes=[(sample_size,)] ) self.enqueue = self.queue.enqueue([self.sample_placeholder]) self.threads = [] self.enqueue_num_per_piece = enqueue_num_per_piece self.silence_threshold = silence_threshold if len(find_files(self.audio_dir, self.audio_patterns)) == 0: raise ValueError('file not found')
def __init__(self, feat_array, feature_normalization, coord, logdir, queue_size=128): self.feat_array = feat_array self.normalize = feature_normalization self.num_data = feat_array.shape[0] self.dimension = feat_array.shape[1] self.coord = coord self.logdir = logdir self.threads = [] print('Total amount of data: ', self.num_data) print("Input feature dimension: ", self.dimension) # Make sure normalization factors have been calculated if self.normalize: normalize(self.feat_array, self.logdir) self.feature_placeholder = tf.placeholder(dtype=tf.float32, shape=None) self.feature_queue = tf.PaddingFIFOQueue(queue_size, ['float32'], shapes=[[self.dimension]]) self.feature_enqueue = self.feature_queue.enqueue( [self.feature_placeholder])
def get_batch_producer(path=FLAGS.path, batch_size=FLAGS.batch_size, prefetch_size=FLAGS.capacity, num_of_threads=FLAGS.threads, scope='batch_producer'): with tf.name_scope(scope): filename_queue = tf.train.string_input_producer( [path], name='filename_producer') with tf.name_scope('example_producer'): data, seq_len, label, td_feature = parse_example(filename_queue) data = tf.placeholder_with_default(data, [None], name='data') label = tf.cast(label, tf.int32, name='label') seq_len = tf.cast(seq_len, tf.int32, name='seq_length') td_feature = tf.placeholder_with_default(td_feature, [None], name='td_feature') with tf.name_scope('padded_batch_producer'): q = tf.PaddingFIFOQueue( capacity=prefetch_size, dtypes=[tf.float32, tf.int32, tf.int32, tf.float32], shapes=[[None], [], [], [None]], name='padding_queue') enqueue_op = q.enqueue([data, seq_len, label, td_feature], name='push_single_example') qr = tf.train.QueueRunner(q, [enqueue_op] * num_of_threads) tf.train.add_queue_runner(qr) batch_op = q.dequeue_many(n=batch_size, name='pop_batch') return batch_op
def __init__(self, data_reader, batch_size, indices=None, shuffle=True, capacity=5000): self.data_reader = data_reader self.batch_size = batch_size self.shuffle = shuffle self._is_running = False if indices is None: self.indices = list(range(len(self.data_reader))) else: self.indices = indices # setup queue self.names = data_reader.names self.shapes = data_reader.shapes self.shapes = [self.shapes[n] for n in self.names] self.dtypes = data_reader.dtypes self.dtypes = [self.dtypes[n] for n in self.names] self.placeholders = { name: tf.placeholder(dt, shape=shape, name=name) for dt, name, shape in zip(self.dtypes, self.names, self.shapes) } self.queue = tf.PaddingFIFOQueue(capacity, dtypes=self.dtypes, shapes=self.shapes, names=self.names) self.enqueue_op = self.queue.enqueue(self.placeholders) self.dequeue_op = self.queue.dequeue()
def testEnqueueAndBlockingDequeue(self): with self.test_session() as sess: q = tf.PaddingFIFOQueue(3, tf.float32, ((),)) elems = [10.0, 20.0, 30.0] enqueue_ops = [q.enqueue((x,)) for x in elems] dequeued_t = q.dequeue() def enqueue(): # The enqueue_ops should run after the dequeue op has blocked. # TODO(mrry): Figure out how to do this without sleeping. time.sleep(0.1) for enqueue_op in enqueue_ops: sess.run(enqueue_op) results = [] def dequeue(): for _ in xrange(len(elems)): results.append(sess.run(dequeued_t)) enqueue_thread = self.checkedThread(target=enqueue) dequeue_thread = self.checkedThread(target=dequeue) enqueue_thread.start() dequeue_thread.start() enqueue_thread.join() dequeue_thread.join() for elem, result in zip(elems, results): self.assertEqual([elem], result)
def testMultiDequeueMany(self): with self.test_session() as sess: q = tf.PaddingFIFOQueue(10, (tf.float32, tf.int32), shapes=((), (2,))) float_elems = [ 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0] int_elems = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], [17, 18], [19, 20]] enqueue_op = q.enqueue_many((float_elems, int_elems)) dequeued_t = q.dequeue_many(4) dequeued_single_t = q.dequeue() enqueue_op.run() float_val, int_val = sess.run(dequeued_t) self.assertAllEqual(float_elems[0:4], float_val) self.assertAllEqual(int_elems[0:4], int_val) self.assertEqual(float_val.shape, dequeued_t[0].get_shape()) self.assertEqual(int_val.shape, dequeued_t[1].get_shape()) float_val, int_val = sess.run(dequeued_t) self.assertAllEqual(float_elems[4:8], float_val) self.assertAllEqual(int_elems[4:8], int_val) float_val, int_val = sess.run(dequeued_single_t) self.assertAllEqual(float_elems[8], float_val) self.assertAllEqual(int_elems[8], int_val) self.assertEqual(float_val.shape, dequeued_single_t[0].get_shape()) self.assertEqual(int_val.shape, dequeued_single_t[1].get_shape())
def __init__(self, audio_dir, coord, sample_rate, sample_size=None, silence_threshold=None, quantization_channels=256, queue_size=256, pattern='*.wav'): self.audio_dir = audio_dir self.pattern = pattern self.quantization_channels = quantization_channels self.sample_rate = sample_rate self.coord = coord self.sample_size = sample_size self.silence_threshold = silence_threshold self.threads = [] self.sample_placeholder = tf.placeholder(dtype=tf.float32, shape=None) self.queue = tf.PaddingFIFOQueue(queue_size, ['float32'], shapes=[(None, 1)]) self.enqueue = self.queue.enqueue([self.sample_placeholder]) # TODO Find a better way to check this. # Checking inside the AudioReader's thread makes it # hard to terminate the execution of the script, so # we do it in the constructor for now. if not find_files(audio_dir, self.pattern): raise ValueError("No audio files found in '{}'.".format(audio_dir))
def testMixtureOfEnqueueAndEnqueueMany(self): with self.test_session() as sess: q = tf.PaddingFIFOQueue(10, tf.int32, shapes=((),)) enqueue_placeholder = tf.placeholder(tf.int32, shape=()) enqueue_op = q.enqueue((enqueue_placeholder,)) enqueuemany_placeholder = tf.placeholder( tf.int32, shape=(None,)) enqueuemany_op = q.enqueue_many((enqueuemany_placeholder,)) dequeued_t = q.dequeue() close_op = q.close() def dequeue(): for i in xrange(250): self.assertEqual(i, sess.run(dequeued_t)) dequeue_thread = self.checkedThread(target=dequeue) dequeue_thread.start() elements_enqueued = 0 while elements_enqueued < 250: # With equal probability, run Enqueue or enqueue_many. if random.random() > 0.5: enqueue_op.run({enqueue_placeholder: elements_enqueued}) elements_enqueued += 1 else: count = random.randint(0, min(20, 250 - elements_enqueued)) range_to_enqueue = np.arange(elements_enqueued, elements_enqueued + count, dtype=np.int32) enqueuemany_op.run({enqueuemany_placeholder: range_to_enqueue}) elements_enqueued += count close_op.run() dequeue_thread.join() self.assertEqual(0, q.size().eval())
def __init__(self, data_dir, coord, symbol_list, year_range, symbol_first, data_win_len, receptive_field, queue_size=500): # system initialize self.db_manager = DBManager(data_dir) self.preprocessor = Preprocessor() self.coord = coord self.threads = [] # processing params self.data_dir = data_dir self.symbol_list = symbol_list self.year_range = year_range self.symbol_first = symbol_first self.data_win_len = data_win_len self.receptive_field = receptive_field # queue setup self.trans_placeholder = tf.placeholder(dtype=tf.float32, shape=None) self.trans_queue = tf.PaddingFIFOQueue(queue_size, ['float32'], shapes=[(None, 1)]) self.trans = self.trans_queue.enqueue([self.trans_placeholder]) # for multithreading: self.yield_list = itertools.product( self.symbol_list, self.year_range) if self.symbol_first else itertools.product( self.year_range, self.symbol_list)
def testBlockingDequeueMany(self): with self.test_session() as sess: q = tf.PaddingFIFOQueue(10, tf.float32, ((),)) elems = [10.0, 20.0, 30.0, 40.0] enqueue_op = q.enqueue_many((elems,)) dequeued_t = q.dequeue_many(4) dequeued_elems = [] def enqueue(): # The enqueue_op should run after the dequeue op has blocked. # TODO(mrry): Figure out how to do this without sleeping. time.sleep(0.1) sess.run(enqueue_op) def dequeue(): dequeued_elems.extend(sess.run(dequeued_t).tolist()) enqueue_thread = self.checkedThread(target=enqueue) dequeue_thread = self.checkedThread(target=dequeue) enqueue_thread.start() dequeue_thread.start() enqueue_thread.join() dequeue_thread.join() self.assertAllEqual(elems, dequeued_elems)
def dynamic_rnn_batch(file_list, hparams): """Reads batches of SequenceExamples from TFRecord and pads them. Can deal with variable length SequenceExamples by padding each batch to the length of the longest sequence with zeros. Args: file_list: List of TFRecord files containing SequenceExamples. hparams: HParams instance containing model hyperparameters. Returns: inputs: Tensor of shape [batch_size, examples_per_sequence, one_hot_length] with floats indicating the next note event. labels: Tensor of shape [batch_size, examples_per_sequence] with int64s indicating the prediction for next note event given the notes up to this point in the inputs sequence. lengths: Tensor vector of shape [batch_size] with the length of the SequenceExamples before padding. """ _, _, sequences = input_sequence_example(file_list, hparams) length = tf.shape(sequences['inputs'])[0] queue = tf.PaddingFIFOQueue(capacity=1000, dtypes=[tf.float32, tf.int64, tf.int32], shapes=[(None, hparams.one_hot_length), (None, ), ()]) # The number of threads for enqueuing. num_threads = 4 enqueue_ops = [ queue.enqueue([sequences['inputs'], sequences['labels'], length]) ] * num_threads tf.train.add_queue_runner(tf.train.QueueRunner(queue, enqueue_ops)) return queue.dequeue_many(hparams.batch_size)
def testClosedBlockingDequeueManyRestoresPartialBatch(self): with self.test_session() as sess: q = tf.PaddingFIFOQueue(4, (tf.float32, tf.float32), ((), ())) elems_a = [1.0, 2.0, 3.0] elems_b = [10.0, 20.0, 30.0] enqueue_op = q.enqueue_many((elems_a, elems_b)) dequeued_a_t, dequeued_b_t = q.dequeue_many(4) cleanup_dequeue_a_t, cleanup_dequeue_b_t = q.dequeue() close_op = q.close() enqueue_op.run() def dequeue(): with self.assertRaises(tf.errors.OutOfRangeError): sess.run([dequeued_a_t, dequeued_b_t]) dequeue_thread = self.checkedThread(target=dequeue) dequeue_thread.start() # The close_op should run after the dequeue_thread has blocked. # TODO(mrry): Figure out how to do this without sleeping. time.sleep(0.1) close_op.run() dequeue_thread.join() # Test that the elements in the partially-dequeued batch are # restored in the correct order. for elem_a, elem_b in zip(elems_a, elems_b): val_a, val_b = sess.run([cleanup_dequeue_a_t, cleanup_dequeue_b_t]) self.assertEqual(elem_a, val_a) self.assertEqual(elem_b, val_b) self.assertEqual(0, q.size().eval())
def add_placeholder(self): self.sample_placeholder = tf.placeholder(dtype=tf.float32, shape=self.config.X_shape) self.target_placeholder = tf.placeholder(dtype=tf.float32, shape=self.config.Y_shape) self.queue = tf.PaddingFIFOQueue(self.queue_size, ['float32', 'float32'], shapes=[self.config.X_shape, self.config.Y_shape]) self.enqueue = self.queue.enqueue([self.sample_placeholder, self.target_placeholder])
def __init__(self, nb_audio_dir, wb_audio_dir, coord, sample_rate, sample_size=None, silence_threshold=None, queue_size=32): self.nb_audio_dir = nb_audio_dir self.wb_audio_dir = wb_audio_dir self.sample_rate = sample_rate self.coord = coord self.sample_size = sample_size self.silence_threshold = silence_threshold self.threads = [] self.nb_sample_placeholder = tf.placeholder(dtype=tf.float32, shape=None) self.wb_sample_placeholder = tf.placeholder(dtype=tf.float32, shape=None) self.queue = tf.PaddingFIFOQueue(queue_size, ['float32', 'float32'], shapes=[(None, 1), (None, 1)]) self.enqueue = self.queue.enqueue( [self.nb_sample_placeholder, self.wb_sample_placeholder]) nb_files = find_files(nb_audio_dir) wb_files = find_files(wb_audio_dir) if not nb_files: raise ValueError("No audio files found in '{}'".format(nb_audio_dir)) if not wb_files: raise ValueError("No audio files found in '{}'".format(wb_audio_dir)) return
def __init__(self, audio_dir, audio_output_dir, coord, sample_rate, sample_size=None, silence_threshold=None, queue_size=256, step_length=100): self.step_length = step_length self.audio_dir = audio_dir self.audio_output_dir = audio_output_dir self.sample_rate = sample_rate self.coord = coord self.sample_size = sample_size self.silence_threshold = silence_threshold self.threads = [] self.sample_placeholder = tf.placeholder(dtype=tf.float32, shape=None) self.output_placeholder = tf.placeholder(dtype=tf.float32, shape=None) self.queue = tf.PaddingFIFOQueue(queue_size, ['float32', 'float32'], shapes=[(None, 1), (None, 1)]) self.enqueue = self.queue.enqueue([self.sample_placeholder, self.output_placeholder]) print("step is {}.".format(self.step_length)) # TODO Find a better way to check this. # Checking inside the AudioReader's thread makes it hard to terminate # the execution of the script, so we do it in the constructor for now. if not find_files(audio_dir): raise ValueError("No audio files found in '{}'.".format(audio_dir))
def __init__(self, data, batch_size, padding_queue_cap=1000, random_queue_cap=400000, dtypes=[tf.int32], shapes=[[None, 1]], threads_q1=100, threads_q2=10): input_queue = tf.RandomShuffleQueue(capacity=random_queue_cap, min_after_dequeue=int( random_queue_cap * 0.5), dtypes=dtypes) input_enqueue_op = input_queue.enqueue(data) qr_input = tf.train.QueueRunner(input_queue, [input_enqueue_op] * threads_q1) tf.train.add_queue_runner(qr_input) non_paddled_input = input_queue.dequeue() self.non_paddled_input = non_paddled_input padding_queue = tf.PaddingFIFOQueue(capacity=padding_queue_cap, dtypes=dtypes, shapes=shapes) padding_enqueue_op = padding_queue.enqueue(non_paddled_input) qr_padding = tf.train.QueueRunner(padding_queue, [padding_enqueue_op] * threads_q2) tf.train.add_queue_runner(qr_padding) self.dequeue_batch = padding_queue.dequeue_many(batch_size)
def testDtypes(self): with self.test_session() as sess: dtypes = [tf.float32, tf.float64, tf.int32, tf.uint8, tf.int16, tf.int8, tf.int64, tf.bool, tf.complex64, tf.complex128] shape = (32, 4, 128) q = tf.PaddingFIFOQueue(32, dtypes, [shape[1:]] * len(dtypes)) input_tuple = [] for dtype in dtypes: np_dtype = dtype.as_numpy_dtype np_array = np.random.randint(-10, 10, shape) if dtype == tf.bool: np_array = np_array > 0 elif dtype in (tf.complex64, tf.complex128): np_array = np.sqrt(np_array.astype(np_dtype)) else: np_array = np_array.astype(np_dtype) input_tuple.append(np_array) q.enqueue_many(input_tuple).run() output_tuple_t = q.dequeue_many(32) output_tuple = sess.run(output_tuple_t) for (input_elem, output_elem) in zip(input_tuple, output_tuple): self.assertAllEqual(input_elem, output_elem)
def __init__(self, txt_files, thread_count, batch_size, numcep, numcontext): self._coord = None self._numcep = numcep self._x = tf.placeholder(tf.float32, [None, numcep + (2 * numcep * numcontext)]) self._x_length = tf.placeholder(tf.int32, []) self._y = tf.placeholder(tf.int32, [ None, ]) self._y_length = tf.placeholder(tf.int32, []) self._example_queue = tf.PaddingFIFOQueue( shapes=[[None, numcep + (2 * numcep * numcontext)], [], [ None, ], []], dtypes=[tf.float32, tf.int32, tf.int32, tf.int32], capacity=2 * self._get_device_count() * batch_size) self._enqueue_op = self._example_queue.enqueue( [self._x, self._x_length, self._y, self._y_length]) self._close_op = self._example_queue.close( cancel_pending_enqueues=True) self._txt_files = txt_files self._batch_size = batch_size self._numcontext = numcontext self._thread_count = thread_count self._files_circular_list = self._create_files_circular_list()
def __init__(self, files_list, thread_count, batch_size, numcep, numcontext, next_index=lambda x: x + 1): self._coord = None self._numcep = numcep self._uttid = tf.placeholder(tf.string, []) self._x = tf.placeholder(tf.float32, [None, numcep + (2 * numcep * numcontext)]) self._x_length = tf.placeholder(tf.int32, []) self._y = tf.placeholder(tf.int32, [ None, ]) self._y_length = tf.placeholder(tf.int32, []) self.example_queue = tf.PaddingFIFOQueue( shapes=[[], [None, numcep + (2 * numcep * numcontext)], [], [ None, ], []], dtypes=[tf.string, tf.float32, tf.int32, tf.int32, tf.int32], capacity=2 * self._get_device_count() * batch_size) self._enqueue_op = self.example_queue.enqueue( [self._uttid, self._x, self._x_length, self._y, self._y_length]) self._close_op = self.example_queue.close(cancel_pending_enqueues=True) self.batch_size = batch_size self._numcontext = numcontext self._thread_count = thread_count self._files_list = self._create_files_list(files_list) self._next_index = next_index