def __init__(self,
                 audio_dir,
                 coord,
                 sample_rate,
                 gc_enabled,
                 receptive_field,
                 sample_size=None,
                 silence_threshold=None,
                 queue_size=32):
        self.audio_dir = audio_dir
        self.sample_rate = sample_rate
        self.coord = coord
        self.sample_size = sample_size
        self.receptive_field = receptive_field
        self.silence_threshold = silence_threshold
        self.gc_enabled = gc_enabled
        self.threads = []
        self.sample_placeholder = tf.placeholder(dtype=tf.float32, shape=None)
        self.queue = tf.PaddingFIFOQueue(queue_size, ['float32'],
                                         shapes=[(None, 1)])
        self.enqueue = self.queue.enqueue([self.sample_placeholder])

        if self.gc_enabled:
            self.id_placeholder = tf.placeholder(dtype=tf.int32, shape=())
            self.gc_queue = tf.PaddingFIFOQueue(queue_size, ['int32'],
                                                shapes=[()])
            self.gc_enqueue = self.gc_queue.enqueue([self.id_placeholder])

        # TODO Find a better way to check this.
        # Checking inside the AudioReader's thread makes it hard to terminate
        # the execution of the script, so we do it in the constructor for now.
        files = find_files(audio_dir)
        if not files:
            raise ValueError("No audio files found in '{}'.".format(audio_dir))
        if self.gc_enabled and not_all_have_id(files):
            raise ValueError("Global conditioning is enabled, but file names "
                             "do not conform to pattern having id.")
        # Determine the number of mutually-exclusive categories we will
        # accomodate in our embedding table.
        if self.gc_enabled:
            _, self.gc_category_cardinality = get_category_cardinality(files)
            # Add one to the largest index to get the number of categories,
            # since tf.nn.embedding_lookup expects zero-indexing. This
            # means one or more at the bottom correspond to unused entries
            # in the embedding lookup table. But that's a small waste of memory
            # to keep the code simpler, and preserves correspondance between
            # the id one specifies when generating, and the ids in the
            # file names.
            self.gc_category_cardinality += 1
            print("Detected --gc_cardinality={}".format(
                self.gc_category_cardinality))
        else:
            self.gc_category_cardinality = None
Example #2
0
    def __init__(self,
                 metadata_filename,
                 coord,
                 receptive_field,
                 gc_enable=False,
                 sample_size=None,
                 queue_size=128,
                 npy_dataroot=None,
                 num_mels=None,
                 speaker_id=None):
        self.metadata_filename = metadata_filename
        self.coord = coord
        self.receptive_field = receptive_field
        self.sample_size = sample_size
        self.queue_size = queue_size
        self.gc_enable = gc_enable
        self.npy_dataroot = npy_dataroot
        self.num_mels = num_mels
        self.speaker_id = speaker_id

        self.threads = []

        self._placeholders = [
            tf.placeholder(tf.float32, shape=None),
            tf.placeholder(tf.float32, shape=None)
        ]

        if self.gc_enable:
            self._placeholders.append(tf.placeholder(tf.int32, shape=None))
            self.queue = tf.PaddingFIFOQueue(
                self.queue_size, [tf.float32, tf.float32, tf.int32],
                shapes=[(None, 1), (None, self.num_mels), ()],
                name='input_queue')
        elif hparams.triphone:
            self.queue = tf.PaddingFIFOQueue(self.queue_size,
                                             [tf.float32, tf.float32],
                                             shapes=[(None, 1),
                                                     (None, self.num_mels * 3)
                                                     ],
                                             name='input_queue')
        else:
            self.queue = tf.PaddingFIFOQueue(self.queue_size,
                                             [tf.float32, tf.float32],
                                             shapes=[(None, 1),
                                                     (None, self.num_mels)],
                                             name='input_queue')

        self.enqueue = self.queue.enqueue(self._placeholders)
Example #3
0
    def __init__(self, config):
        self.config = config

        input_props = self.add_model_specific_valuables(config)
        self.queue_input_tensors = [
            tf.placeholder(dtype, shape) for dtype, shape in input_props
        ]
        dtypes, shapes = zip(*input_props)
        queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes)
        self.enqueue_op = queue.enqueue(self.queue_input_tensors)
        self.input_tensors = queue.dequeue()

        self.predictions, self.loss = self.get_predictions_and_loss(
            self.input_tensors)
        self.global_step = tf.Variable(0, name="global_step", trainable=False)
        self.reset_global_step = tf.assign(self.global_step, 0)
        learning_rate = tf.train.exponential_decay(
            self.config["learning_rate"],
            self.global_step,
            self.config["decay_frequency"],
            self.config["decay_rate"],
            staircase=True)
        trainable_params = tf.trainable_variables()
        gradients = tf.gradients(self.loss, trainable_params)
        gradients, _ = tf.clip_by_global_norm(gradients,
                                              self.config["max_gradient_norm"])
        optimizers = {
            "adam": tf.train.AdamOptimizer,
            "sgd": tf.train.GradientDescentOptimizer
        }
        optimizer = optimizers[self.config["optimizer"]](learning_rate)
        self.train_op = optimizer.apply_gradients(zip(gradients,
                                                      trainable_params),
                                                  global_step=self.global_step)
    def __init__(self, config):
        self.config = config
        self.embedding_info = [(emb["size"], emb["lowercase"])
                               for emb in config["embeddings"]]
        self.embedding_size = sum(size for size, _ in self.embedding_info)
        self.char_embedding_size = config["char_embedding_size"]
        self.char_dict = util.load_char_dict(config["char_vocab_path"])
        self.embedding_dicts = [
            util.load_embedding_dict(emb["path"], emb["size"], emb["format"])
            for emb in config["embeddings"]
        ]
        self.max_mention_width = config["max_mention_width"]
        self.max_context_width = config["max_context_width"]
        self.genres = {g: i for i, g in enumerate(config["genres"])}
        self.eval_data = None  # Load eval data lazily.

        input_props = []
        input_props.append(
            (tf.float32, [None, None,
                          self.embedding_size]))  # Text embeddings.
        input_props.append((tf.int32, [None, None,
                                       None]))  # Character indices.
        input_props.append((tf.int32, [None]))  # Text lengths.
        input_props.append((tf.int32, [None]))  # Speaker IDs.
        input_props.append((tf.int32, []))  # Genre.
        input_props.append((tf.bool, []))  # Is training.
        input_props.append((tf.int32, [None]))  # Gold starts.
        input_props.append((tf.int32, [None]))  # Gold ends.
        input_props.append((tf.int32, [None]))  # Cluster ids.

        self.queue_input_tensors = [
            tf.placeholder(dtype, shape) for dtype, shape in input_props
        ]
        dtypes, shapes = zip(*input_props)
        queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes)
        self.enqueue_op = queue.enqueue(self.queue_input_tensors)
        self.input_tensors = queue.dequeue()

        self.predictions, self.loss = self.get_predictions_and_loss(
            *self.input_tensors)
        self.global_step = tf.Variable(0, name="global_step", trainable=False)
        self.reset_global_step = tf.assign(self.global_step, 0)
        learning_rate = tf.train.exponential_decay(
            self.config["learning_rate"],
            self.global_step,
            self.config["decay_frequency"],
            self.config["decay_rate"],
            staircase=True)
        trainable_params = tf.trainable_variables()
        gradients = tf.gradients(self.loss, trainable_params)
        gradients, _ = tf.clip_by_global_norm(gradients,
                                              self.config["max_gradient_norm"])
        optimizers = {
            "adam": tf.train.AdamOptimizer,
            "sgd": tf.train.GradientDescentOptimizer
        }
        optimizer = optimizers[self.config["optimizer"]](learning_rate)
        self.train_op = optimizer.apply_gradients(zip(gradients,
                                                      trainable_params),
                                                  global_step=self.global_step)
Example #5
0
    def __init__(self,
                 data_dir,
                 testdata_dir,
                 coord,
                 receptive_field,
                 prediction_lag=360,
                 quantization_thresholds=[0.01, 0.025],
                 sample_size=None,
                 queue_size=10):
        self.data_dir = data_dir
        self.testdata_dir = testdata_dir
        self.coord = coord
        self.sample_size = sample_size
        self.receptive_field = receptive_field
        self.prediction_lag = prediction_lag
        self.quantization_thresholds = quantization_thresholds
        self.threads = []
        self.sample_placeholder = tf.placeholder(dtype=tf.float32, shape=None)
        self.queue = tf.PaddingFIFOQueue(queue_size, ['float32'],
                                         shapes=[(None,
                                                  self.num_features() + 1)])
        self.enqueue = self.queue.enqueue([self.sample_placeholder])
        self.cache = [None] * 10
        self.test_cache = [None] * 10
        self.category_cardinalities = [0] * (self.num_return_categories())

        dirs = find_dirs(data_dir)
        if not dirs:
            raise ValueError("No directories found in '{}'.".format(data_dir))
        self.compute_statistics(dirs)
  def testBlockingEnqueueBeforeClose(self):
    with self.test_session() as sess:
      q = tf.PaddingFIFOQueue(4, tf.float32, ((),))
      elems = [10.0, 20.0, 30.0, 40.0]
      enqueue_op = q.enqueue_many((elems,))
      blocking_enqueue_op = q.enqueue((50.0,))
      close_op = q.close()
      dequeued_t = q.dequeue()

      enqueue_op.run()

      def blocking_enqueue():
        # Expect the operation to succeed once the dequeue op runs.
        sess.run(blocking_enqueue_op)
      enqueue_thread = self.checkedThread(target=blocking_enqueue)
      enqueue_thread.start()

      # The close_op should run after the blocking_enqueue_op has blocked.
      # TODO(mrry): Figure out how to do this without sleeping.
      time.sleep(0.1)

      def close():
        sess.run(close_op)
      close_thread = self.checkedThread(target=close)
      close_thread.start()

      # The dequeue will unblock both threads.
      self.assertEqual(10.0, dequeued_t.eval())
      enqueue_thread.join()
      close_thread.join()

      for elem in [20.0, 30.0, 40.0, 50.0]:
        self.assertEqual(elem, dequeued_t.eval())
      self.assertEqual(0, q.size().eval())
  def testBlockingDequeueFromClosedQueue(self):
    with self.test_session() as sess:
      q = tf.PaddingFIFOQueue(10, tf.float32, ((),))
      elems = [10.0, 20.0, 30.0, 40.0]
      enqueue_op = q.enqueue_many((elems,))
      close_op = q.close()
      dequeued_t = q.dequeue()

      enqueue_op.run()

      def dequeue():
        for elem in elems:
          self.assertEqual([elem], sess.run(dequeued_t))
        # Expect the operation to fail due to the queue being closed.
        with self.assertRaisesRegexp(tf.errors.OutOfRangeError,
                                     "is closed and has insufficient"):
          sess.run(dequeued_t)

      dequeue_thread = self.checkedThread(target=dequeue)
      dequeue_thread.start()
      # The close_op should run after the dequeue_thread has blocked.
      # TODO(mrry): Figure out how to do this without sleeping.
      time.sleep(0.1)
      close_op.run()
      dequeue_thread.join()
  def testMixtureOfDequeueAndDequeueMany(self):
    with self.test_session() as sess:
      q = tf.PaddingFIFOQueue(10, tf.int32, shapes=((),))
      enqueue_op = q.enqueue_many((np.arange(250, dtype=np.int32),))
      dequeued_t = q.dequeue()
      count_placeholder = tf.placeholder(tf.int32, shape=())
      dequeuemany_t = q.dequeue_many(count_placeholder)

      def enqueue():
        sess.run(enqueue_op)
      enqueue_thread = self.checkedThread(target=enqueue)
      enqueue_thread.start()

      elements_dequeued = 0
      while elements_dequeued < 250:
        # With equal probability, run Dequeue or dequeue_many.
        if random.random() > 0.5:
          self.assertEqual(elements_dequeued, dequeued_t.eval())
          elements_dequeued += 1
        else:
          count = random.randint(0, min(20, 250 - elements_dequeued))
          expected_range = np.arange(elements_dequeued,
                                     elements_dequeued + count,
                                     dtype=np.int32)
          self.assertAllEqual(
              expected_range, dequeuemany_t.eval({count_placeholder: count}))
          elements_dequeued += count

      q.close().run()
      enqueue_thread.join()
      self.assertEqual(0, q.size().eval())
  def testParallelEnqueueAndDequeue(self):
    with self.test_session() as sess:
      q = tf.PaddingFIFOQueue(50, tf.float32, shapes=((),))
      initial_elements = [10.0] * 49
      q.enqueue_many((initial_elements,)).run()

      enqueue_op = q.enqueue((20.0,))
      dequeued_t = q.dequeue()

      def enqueue():
        for _ in xrange(100):
          sess.run(enqueue_op)
      def dequeue():
        for _ in xrange(100):
          self.assertTrue(sess.run(dequeued_t) in (10.0, 20.0))

      enqueue_threads = [self.checkedThread(target=enqueue) for _ in range(10)]
      dequeue_threads = [self.checkedThread(target=dequeue) for _ in range(10)]
      for enqueue_thread in enqueue_threads:
        enqueue_thread.start()
      for dequeue_thread in dequeue_threads:
        dequeue_thread.start()
      for enqueue_thread in enqueue_threads:
        enqueue_thread.join()
      for dequeue_thread in dequeue_threads:
        dequeue_thread.join()

      # Dequeue the initial count of elements to clean up.
      cleanup_elems = q.dequeue_many(49).eval()
      for elem in cleanup_elems:
        self.assertTrue(elem in (10.0, 20.0))
Example #10
0
def prefetch(tensor_dict, capacity):
    """
    creates a prefetch queue for tensors.
    Create a FIFO queue to asynchronously enqueue tensor_dicts and returns a dequeue op that evaluates to
    a tensor_dict. This function is useful in prefetching preprocessed tensors so that the data is readily available for
    consumers
    Example input pipeline when you don't need batching:
    ----------------------------------------------------
    key, string_tensor = slim.parallel_reader.parallel_read(...)
    tensor_dict = decoder.decode(string_tensor)
    tensor_dict = preprocessor.preprocess(tensor_dict, ...)
    prefetch_queue = prefetcher.prefetch(tensor_dict, capacity=20)
    tensor_dict = prefetch_queue.dequeue()
    outputs = Model(tensor_dict)
    ----------------------------------------------------
    For input pipelines with batching, refer to core/batcher.py
    Args:
        tensor_dict: a dictionary of tensors to prefetch.
        capacity: the size of the prefetch queue.
    Returns:
        a FIFO prefetcher queue
    """
    names = list(tensor_dict.keys())
    dtypes = [t.dtype for t in tensor_dict.values()]
    shapes = [t.get_shape() for t in tensor_dict.values()]
    prefetch_queue = tf.PaddingFIFOQueue(capacity, dtypes=dtypes, shapes=shapes, names=names, name='prefetch_queue')
    enqueue_op = prefetch_queue.enqueue(tensor_dict)
    tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner(prefetch_queue, [enqueue_op]))
    tf.summary.scalar('queue/%s/fraction_of_%d_full' % (prefetch_queue.name, capacity),
                      tf.to_float(prefetch_queue.size()) * (1. / capacity))
    return prefetch_queue
Example #11
0
    def prefetch(self, tensor_dict, capacity):
        """Creates a prefetch queue for tensors. Creates a FIFO queue to
    asynchronously enqueue tensor_dicts and returns a dequeue op that evaluates
    to a tensor_dict. This function is useful in prefetching preprocessed
    tensors so that the data is readily available for consumers.

    Args:
        tensor_dict: a dictionary of tensors to prefetch.
        capacity: the size of the prefetch queue.

    Returns:
        a FIFO prefetcher queue
    """
        names = list(tensor_dict.keys())
        dtypes = [t.dtype for t in tensor_dict.values()]
        shapes = [t.get_shape() for t in tensor_dict.values()]
        prefetch_queue = tf.PaddingFIFOQueue(capacity,
                                             dtypes=dtypes,
                                             shapes=shapes,
                                             names=names,
                                             name='prefetch_queue')
        enqueue_op = prefetch_queue.enqueue(tensor_dict)
        tf.train.queue_runner.add_queue_runner(
            tf.train.queue_runner.QueueRunner(prefetch_queue, [enqueue_op]))
        return prefetch_queue
Example #12
0
 def __init__(self,
              audio_dir: str,
              coord: tf.train.Coordinator,
              sample_rate: int,
              audio_patterns=None,
              sample_size=2 ** 16,
              silence_threshold=0.3,
              queue_size=32,
              enqueue_num_per_piece=2,
              ):
     if audio_patterns is None:
         audio_patterns = ['*.ogg']
     self.audio_dir = audio_dir
     self.coord = coord
     self.audio_patterns = audio_patterns
     self.sample_rate = sample_rate
     self.sample_size = sample_size
     self.queue_size = queue_size
     self.sample_placeholder = tf.placeholder(
         dtype=tf.float32,
         shape=(sample_size,),
         name='sample',
     )
     self.queue = tf.PaddingFIFOQueue(
         queue_size,
         [tf.float32],
         shapes=[(sample_size,)]
     )
     self.enqueue = self.queue.enqueue([self.sample_placeholder])
     self.threads = []
     self.enqueue_num_per_piece = enqueue_num_per_piece
     self.silence_threshold = silence_threshold
     if len(find_files(self.audio_dir, self.audio_patterns)) == 0:
         raise ValueError('file not found')
    def __init__(self,
                 feat_array,
                 feature_normalization,
                 coord,
                 logdir,
                 queue_size=128):

        self.feat_array = feat_array
        self.normalize = feature_normalization
        self.num_data = feat_array.shape[0]
        self.dimension = feat_array.shape[1]
        self.coord = coord
        self.logdir = logdir
        self.threads = []

        print('Total amount of data: ', self.num_data)
        print("Input feature dimension: ", self.dimension)

        # Make sure normalization factors have been calculated
        if self.normalize:
            normalize(self.feat_array, self.logdir)

        self.feature_placeholder = tf.placeholder(dtype=tf.float32, shape=None)
        self.feature_queue = tf.PaddingFIFOQueue(queue_size, ['float32'],
                                                 shapes=[[self.dimension]])
        self.feature_enqueue = self.feature_queue.enqueue(
            [self.feature_placeholder])
Example #14
0
def get_batch_producer(path=FLAGS.path,
                       batch_size=FLAGS.batch_size,
                       prefetch_size=FLAGS.capacity,
                       num_of_threads=FLAGS.threads,
                       scope='batch_producer'):
    with tf.name_scope(scope):
        filename_queue = tf.train.string_input_producer(
            [path], name='filename_producer')
        with tf.name_scope('example_producer'):
            data, seq_len, label, td_feature = parse_example(filename_queue)
            data = tf.placeholder_with_default(data, [None], name='data')
            label = tf.cast(label, tf.int32, name='label')
            seq_len = tf.cast(seq_len, tf.int32, name='seq_length')
            td_feature = tf.placeholder_with_default(td_feature, [None],
                                                     name='td_feature')
        with tf.name_scope('padded_batch_producer'):
            q = tf.PaddingFIFOQueue(
                capacity=prefetch_size,
                dtypes=[tf.float32, tf.int32, tf.int32, tf.float32],
                shapes=[[None], [], [], [None]],
                name='padding_queue')

            enqueue_op = q.enqueue([data, seq_len, label, td_feature],
                                   name='push_single_example')
            qr = tf.train.QueueRunner(q, [enqueue_op] * num_of_threads)
            tf.train.add_queue_runner(qr)
            batch_op = q.dequeue_many(n=batch_size, name='pop_batch')
    return batch_op
Example #15
0
    def __init__(self,
                 data_reader,
                 batch_size,
                 indices=None,
                 shuffle=True,
                 capacity=5000):
        self.data_reader = data_reader
        self.batch_size = batch_size
        self.shuffle = shuffle
        self._is_running = False

        if indices is None:
            self.indices = list(range(len(self.data_reader)))
        else:
            self.indices = indices

        # setup queue
        self.names = data_reader.names
        self.shapes = data_reader.shapes
        self.shapes = [self.shapes[n] for n in self.names]
        self.dtypes = data_reader.dtypes
        self.dtypes = [self.dtypes[n] for n in self.names]
        self.placeholders = {
            name: tf.placeholder(dt, shape=shape, name=name)
            for dt, name, shape in zip(self.dtypes, self.names, self.shapes)
        }

        self.queue = tf.PaddingFIFOQueue(capacity,
                                         dtypes=self.dtypes,
                                         shapes=self.shapes,
                                         names=self.names)
        self.enqueue_op = self.queue.enqueue(self.placeholders)
        self.dequeue_op = self.queue.dequeue()
  def testEnqueueAndBlockingDequeue(self):
    with self.test_session() as sess:
      q = tf.PaddingFIFOQueue(3, tf.float32, ((),))
      elems = [10.0, 20.0, 30.0]
      enqueue_ops = [q.enqueue((x,)) for x in elems]
      dequeued_t = q.dequeue()

      def enqueue():
        # The enqueue_ops should run after the dequeue op has blocked.
        # TODO(mrry): Figure out how to do this without sleeping.
        time.sleep(0.1)
        for enqueue_op in enqueue_ops:
          sess.run(enqueue_op)

      results = []

      def dequeue():
        for _ in xrange(len(elems)):
          results.append(sess.run(dequeued_t))

      enqueue_thread = self.checkedThread(target=enqueue)
      dequeue_thread = self.checkedThread(target=dequeue)
      enqueue_thread.start()
      dequeue_thread.start()
      enqueue_thread.join()
      dequeue_thread.join()

      for elem, result in zip(elems, results):
        self.assertEqual([elem], result)
  def testMultiDequeueMany(self):
    with self.test_session() as sess:
      q = tf.PaddingFIFOQueue(10, (tf.float32, tf.int32),
                              shapes=((), (2,)))
      float_elems = [
          10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
      int_elems = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10],
                   [11, 12], [13, 14], [15, 16], [17, 18], [19, 20]]
      enqueue_op = q.enqueue_many((float_elems, int_elems))
      dequeued_t = q.dequeue_many(4)
      dequeued_single_t = q.dequeue()

      enqueue_op.run()

      float_val, int_val = sess.run(dequeued_t)
      self.assertAllEqual(float_elems[0:4], float_val)
      self.assertAllEqual(int_elems[0:4], int_val)
      self.assertEqual(float_val.shape, dequeued_t[0].get_shape())
      self.assertEqual(int_val.shape, dequeued_t[1].get_shape())

      float_val, int_val = sess.run(dequeued_t)
      self.assertAllEqual(float_elems[4:8], float_val)
      self.assertAllEqual(int_elems[4:8], int_val)

      float_val, int_val = sess.run(dequeued_single_t)
      self.assertAllEqual(float_elems[8], float_val)
      self.assertAllEqual(int_elems[8], int_val)
      self.assertEqual(float_val.shape, dequeued_single_t[0].get_shape())
      self.assertEqual(int_val.shape, dequeued_single_t[1].get_shape())
Example #18
0
    def __init__(self,
                 audio_dir,
                 coord,
                 sample_rate,
                 sample_size=None,
                 silence_threshold=None,
                 quantization_channels=256,
                 queue_size=256,
                 pattern='*.wav'):
        self.audio_dir = audio_dir
        self.pattern = pattern
        self.quantization_channels = quantization_channels
        self.sample_rate = sample_rate
        self.coord = coord
        self.sample_size = sample_size
        self.silence_threshold = silence_threshold
        self.threads = []
        self.sample_placeholder = tf.placeholder(dtype=tf.float32, shape=None)
        self.queue = tf.PaddingFIFOQueue(queue_size, ['float32'],
                                         shapes=[(None, 1)])
        self.enqueue = self.queue.enqueue([self.sample_placeholder])

        # TODO Find a better way to check this.
        # Checking inside the AudioReader's thread makes it
        # hard to terminate the execution of the script, so
        # we do it in the constructor for now.
        if not find_files(audio_dir, self.pattern):
            raise ValueError("No audio files found in '{}'.".format(audio_dir))
  def testMixtureOfEnqueueAndEnqueueMany(self):
    with self.test_session() as sess:
      q = tf.PaddingFIFOQueue(10, tf.int32, shapes=((),))
      enqueue_placeholder = tf.placeholder(tf.int32, shape=())
      enqueue_op = q.enqueue((enqueue_placeholder,))
      enqueuemany_placeholder = tf.placeholder(
          tf.int32, shape=(None,))
      enqueuemany_op = q.enqueue_many((enqueuemany_placeholder,))

      dequeued_t = q.dequeue()
      close_op = q.close()

      def dequeue():
        for i in xrange(250):
          self.assertEqual(i, sess.run(dequeued_t))
      dequeue_thread = self.checkedThread(target=dequeue)
      dequeue_thread.start()

      elements_enqueued = 0
      while elements_enqueued < 250:
        # With equal probability, run Enqueue or enqueue_many.
        if random.random() > 0.5:
          enqueue_op.run({enqueue_placeholder: elements_enqueued})
          elements_enqueued += 1
        else:
          count = random.randint(0, min(20, 250 - elements_enqueued))
          range_to_enqueue = np.arange(elements_enqueued,
                                       elements_enqueued + count,
                                       dtype=np.int32)
          enqueuemany_op.run({enqueuemany_placeholder: range_to_enqueue})
          elements_enqueued += count

      close_op.run()
      dequeue_thread.join()
      self.assertEqual(0, q.size().eval())
    def __init__(self,
                 data_dir,
                 coord,
                 symbol_list,
                 year_range,
                 symbol_first,
                 data_win_len,
                 receptive_field,
                 queue_size=500):
        # system initialize
        self.db_manager = DBManager(data_dir)
        self.preprocessor = Preprocessor()

        self.coord = coord
        self.threads = []

        # processing params
        self.data_dir = data_dir
        self.symbol_list = symbol_list
        self.year_range = year_range
        self.symbol_first = symbol_first
        self.data_win_len = data_win_len
        self.receptive_field = receptive_field

        # queue setup
        self.trans_placeholder = tf.placeholder(dtype=tf.float32, shape=None)
        self.trans_queue = tf.PaddingFIFOQueue(queue_size, ['float32'],
                                               shapes=[(None, 1)])
        self.trans = self.trans_queue.enqueue([self.trans_placeholder])
        # for multithreading:
        self.yield_list = itertools.product(
            self.symbol_list,
            self.year_range) if self.symbol_first else itertools.product(
                self.year_range, self.symbol_list)
  def testBlockingDequeueMany(self):
    with self.test_session() as sess:
      q = tf.PaddingFIFOQueue(10, tf.float32, ((),))
      elems = [10.0, 20.0, 30.0, 40.0]
      enqueue_op = q.enqueue_many((elems,))
      dequeued_t = q.dequeue_many(4)

      dequeued_elems = []

      def enqueue():
        # The enqueue_op should run after the dequeue op has blocked.
        # TODO(mrry): Figure out how to do this without sleeping.
        time.sleep(0.1)
        sess.run(enqueue_op)

      def dequeue():
        dequeued_elems.extend(sess.run(dequeued_t).tolist())

      enqueue_thread = self.checkedThread(target=enqueue)
      dequeue_thread = self.checkedThread(target=dequeue)
      enqueue_thread.start()
      dequeue_thread.start()
      enqueue_thread.join()
      dequeue_thread.join()

      self.assertAllEqual(elems, dequeued_elems)
def dynamic_rnn_batch(file_list, hparams):
    """Reads batches of SequenceExamples from TFRecord and pads them.

  Can deal with variable length SequenceExamples by padding each batch to the
  length of the longest sequence with zeros.

  Args:
    file_list: List of TFRecord files containing SequenceExamples.
    hparams: HParams instance containing model hyperparameters.

  Returns:
    inputs: Tensor of shape [batch_size, examples_per_sequence, one_hot_length]
        with floats indicating the next note event.
    labels: Tensor of shape [batch_size, examples_per_sequence] with int64s
        indicating the prediction for next note event given the notes up to
        this point in the inputs sequence.
    lengths: Tensor vector of shape [batch_size] with the length of the
        SequenceExamples before padding.
  """
    _, _, sequences = input_sequence_example(file_list, hparams)

    length = tf.shape(sequences['inputs'])[0]

    queue = tf.PaddingFIFOQueue(capacity=1000,
                                dtypes=[tf.float32, tf.int64, tf.int32],
                                shapes=[(None, hparams.one_hot_length),
                                        (None, ), ()])

    # The number of threads for enqueuing.
    num_threads = 4
    enqueue_ops = [
        queue.enqueue([sequences['inputs'], sequences['labels'], length])
    ] * num_threads
    tf.train.add_queue_runner(tf.train.QueueRunner(queue, enqueue_ops))
    return queue.dequeue_many(hparams.batch_size)
  def testClosedBlockingDequeueManyRestoresPartialBatch(self):
    with self.test_session() as sess:
      q = tf.PaddingFIFOQueue(4, (tf.float32, tf.float32), ((), ()))
      elems_a = [1.0, 2.0, 3.0]
      elems_b = [10.0, 20.0, 30.0]
      enqueue_op = q.enqueue_many((elems_a, elems_b))
      dequeued_a_t, dequeued_b_t = q.dequeue_many(4)
      cleanup_dequeue_a_t, cleanup_dequeue_b_t = q.dequeue()
      close_op = q.close()

      enqueue_op.run()

      def dequeue():
        with self.assertRaises(tf.errors.OutOfRangeError):
          sess.run([dequeued_a_t, dequeued_b_t])

      dequeue_thread = self.checkedThread(target=dequeue)
      dequeue_thread.start()
      # The close_op should run after the dequeue_thread has blocked.
      # TODO(mrry): Figure out how to do this without sleeping.
      time.sleep(0.1)

      close_op.run()
      dequeue_thread.join()
      # Test that the elements in the partially-dequeued batch are
      # restored in the correct order.
      for elem_a, elem_b in zip(elems_a, elems_b):
        val_a, val_b = sess.run([cleanup_dequeue_a_t, cleanup_dequeue_b_t])
        self.assertEqual(elem_a, val_a)
        self.assertEqual(elem_b, val_b)
      self.assertEqual(0, q.size().eval())
Example #24
0
 def add_placeholder(self):
   self.sample_placeholder = tf.placeholder(dtype=tf.float32, shape=self.config.X_shape)
   self.target_placeholder = tf.placeholder(dtype=tf.float32, shape=self.config.Y_shape)
   self.queue = tf.PaddingFIFOQueue(self.queue_size,
                                    ['float32', 'float32'],
                                    shapes=[self.config.X_shape, self.config.Y_shape])
   self.enqueue = self.queue.enqueue([self.sample_placeholder, self.target_placeholder])
Example #25
0
    def __init__(self,
                 nb_audio_dir,
                 wb_audio_dir,
                 coord,
                 sample_rate,
                 sample_size=None,
                 silence_threshold=None,
                 queue_size=32):
        self.nb_audio_dir = nb_audio_dir
        self.wb_audio_dir = wb_audio_dir
        self.sample_rate = sample_rate
        self.coord = coord
        self.sample_size = sample_size
        self.silence_threshold = silence_threshold
        self.threads = []
        self.nb_sample_placeholder = tf.placeholder(dtype=tf.float32, shape=None)
        self.wb_sample_placeholder = tf.placeholder(dtype=tf.float32, shape=None)
        self.queue = tf.PaddingFIFOQueue(queue_size,
                                         ['float32', 'float32'],
                                         shapes=[(None, 1), (None, 1)])
        self.enqueue = self.queue.enqueue(
            [self.nb_sample_placeholder, self.wb_sample_placeholder])

        nb_files = find_files(nb_audio_dir)
        wb_files = find_files(wb_audio_dir)
        if not nb_files:
            raise ValueError("No audio files found in '{}'".format(nb_audio_dir))
        if not wb_files:
            raise ValueError("No audio files found in '{}'".format(wb_audio_dir))
        return
 def __init__(self,
              audio_dir,
              audio_output_dir,
              coord,
              sample_rate,
              sample_size=None,
              silence_threshold=None,
              queue_size=256,
              step_length=100):
     self.step_length = step_length
     self.audio_dir = audio_dir
     self.audio_output_dir = audio_output_dir
     self.sample_rate = sample_rate
     self.coord = coord
     self.sample_size = sample_size
     self.silence_threshold = silence_threshold
     self.threads = []
     self.sample_placeholder = tf.placeholder(dtype=tf.float32, shape=None)
     self.output_placeholder = tf.placeholder(dtype=tf.float32, shape=None)
     self.queue = tf.PaddingFIFOQueue(queue_size,
                                      ['float32', 'float32'],
                                      shapes=[(None, 1), (None, 1)])
     self.enqueue = self.queue.enqueue([self.sample_placeholder, self.output_placeholder])
     print("step is {}.".format(self.step_length))
     # TODO Find a better way to check this.
     # Checking inside the AudioReader's thread makes it hard to terminate
     # the execution of the script, so we do it in the constructor for now.
     if not find_files(audio_dir):
         raise ValueError("No audio files found in '{}'.".format(audio_dir))
Example #27
0
    def __init__(self,
                 data,
                 batch_size,
                 padding_queue_cap=1000,
                 random_queue_cap=400000,
                 dtypes=[tf.int32],
                 shapes=[[None, 1]],
                 threads_q1=100,
                 threads_q2=10):

        input_queue = tf.RandomShuffleQueue(capacity=random_queue_cap,
                                            min_after_dequeue=int(
                                                random_queue_cap * 0.5),
                                            dtypes=dtypes)

        input_enqueue_op = input_queue.enqueue(data)
        qr_input = tf.train.QueueRunner(input_queue,
                                        [input_enqueue_op] * threads_q1)
        tf.train.add_queue_runner(qr_input)
        non_paddled_input = input_queue.dequeue()
        self.non_paddled_input = non_paddled_input

        padding_queue = tf.PaddingFIFOQueue(capacity=padding_queue_cap,
                                            dtypes=dtypes,
                                            shapes=shapes)

        padding_enqueue_op = padding_queue.enqueue(non_paddled_input)
        qr_padding = tf.train.QueueRunner(padding_queue,
                                          [padding_enqueue_op] * threads_q2)
        tf.train.add_queue_runner(qr_padding)
        self.dequeue_batch = padding_queue.dequeue_many(batch_size)
  def testDtypes(self):
    with self.test_session() as sess:
      dtypes = [tf.float32, tf.float64, tf.int32, tf.uint8, tf.int16, tf.int8,
                tf.int64, tf.bool, tf.complex64, tf.complex128]
      shape = (32, 4, 128)
      q = tf.PaddingFIFOQueue(32, dtypes, [shape[1:]] * len(dtypes))

      input_tuple = []
      for dtype in dtypes:
        np_dtype = dtype.as_numpy_dtype
        np_array = np.random.randint(-10, 10, shape)
        if dtype == tf.bool:
          np_array = np_array > 0
        elif dtype in (tf.complex64, tf.complex128):
          np_array = np.sqrt(np_array.astype(np_dtype))
        else:
          np_array = np_array.astype(np_dtype)
        input_tuple.append(np_array)

      q.enqueue_many(input_tuple).run()

      output_tuple_t = q.dequeue_many(32)
      output_tuple = sess.run(output_tuple_t)

      for (input_elem, output_elem) in zip(input_tuple, output_tuple):
        self.assertAllEqual(input_elem, output_elem)
Example #29
0
 def __init__(self, txt_files, thread_count, batch_size, numcep,
              numcontext):
     self._coord = None
     self._numcep = numcep
     self._x = tf.placeholder(tf.float32,
                              [None, numcep + (2 * numcep * numcontext)])
     self._x_length = tf.placeholder(tf.int32, [])
     self._y = tf.placeholder(tf.int32, [
         None,
     ])
     self._y_length = tf.placeholder(tf.int32, [])
     self._example_queue = tf.PaddingFIFOQueue(
         shapes=[[None, numcep + (2 * numcep * numcontext)], [], [
             None,
         ], []],
         dtypes=[tf.float32, tf.int32, tf.int32, tf.int32],
         capacity=2 * self._get_device_count() * batch_size)
     self._enqueue_op = self._example_queue.enqueue(
         [self._x, self._x_length, self._y, self._y_length])
     self._close_op = self._example_queue.close(
         cancel_pending_enqueues=True)
     self._txt_files = txt_files
     self._batch_size = batch_size
     self._numcontext = numcontext
     self._thread_count = thread_count
     self._files_circular_list = self._create_files_circular_list()
Example #30
0
 def __init__(self,
              files_list,
              thread_count,
              batch_size,
              numcep,
              numcontext,
              next_index=lambda x: x + 1):
     self._coord = None
     self._numcep = numcep
     self._uttid = tf.placeholder(tf.string, [])
     self._x = tf.placeholder(tf.float32,
                              [None, numcep + (2 * numcep * numcontext)])
     self._x_length = tf.placeholder(tf.int32, [])
     self._y = tf.placeholder(tf.int32, [
         None,
     ])
     self._y_length = tf.placeholder(tf.int32, [])
     self.example_queue = tf.PaddingFIFOQueue(
         shapes=[[], [None, numcep + (2 * numcep * numcontext)], [], [
             None,
         ], []],
         dtypes=[tf.string, tf.float32, tf.int32, tf.int32, tf.int32],
         capacity=2 * self._get_device_count() * batch_size)
     self._enqueue_op = self.example_queue.enqueue(
         [self._uttid, self._x, self._x_length, self._y, self._y_length])
     self._close_op = self.example_queue.close(cancel_pending_enqueues=True)
     self.batch_size = batch_size
     self._numcontext = numcontext
     self._thread_count = thread_count
     self._files_list = self._create_files_list(files_list)
     self._next_index = next_index