Python parallel_read Examples, tensorflow.contrib.slim.python.slim.data.parallel_reader.parallel_read Python Examples

Example #1

0

Show file

    def __init__(self,
                 dataset_source,
                 dataset_target,
                 shuffle=True,
                 num_epochs=None,
                 common_queue_capacity=4096,
                 common_queue_min=1024,
                 seed=None):

        if seed is None:
            seed = np.random.randint(10e8)

        _, data_source = parallel_read(dataset_source.data_sources,
                                       reader_class=dataset_source.reader,
                                       num_epochs=num_epochs,
                                       num_readers=1,
                                       shuffle=False,
                                       capacity=common_queue_capacity,
                                       min_after_dequeue=common_queue_min,
                                       seed=seed)

        data_target = ""
        if dataset_target is not None:
            _, data_target = parallel_read(dataset_target.data_sources,
                                           reader_class=dataset_target.reader,
                                           num_epochs=num_epochs,
                                           num_readers=1,
                                           shuffle=False,
                                           capacity=common_queue_capacity,
                                           min_after_dequeue=common_queue_min,
                                           seed=seed)

        # Optionally shuffle the data
        if shuffle:
            shuffle_queue = tf.RandomShuffleQueue(
                capacity=common_queue_capacity,
                min_after_dequeue=common_queue_min,
                dtypes=[tf.string, tf.string],
                seed=seed)
            enqueue_ops = [shuffle_queue.enqueue([data_source, data_target])]
            tf.train.add_queue_runner(
                tf.train.QueueRunner(shuffle_queue, enqueue_ops))
            data_source, data_target = shuffle_queue.dequeue()

        # Decode source items
        items = dataset_source.decoder.list_items()
        tensors = dataset_source.decoder.decode(data_source, items)

        if dataset_target is not None:
            # Decode target items
            items2 = dataset_target.decoder.list_items()
            tensors2 = dataset_target.decoder.decode(data_target, items2)

            # Merge items and results
            items = items + items2
            tensors = tensors + tensors2

        super(ParallelDatasetProvider,
              self).__init__(items_to_tensors=dict(zip(items, tensors)),
                             num_samples=dataset_source.num_samples)

Example #2

0

Show file

File: data_providers.py Project: AlexMikhalev/polyaxon

    def __init__(self, dataset_source, dataset_target, shuffle=True, num_epochs=None,
                 common_queue_capacity=4096, common_queue_min=1024, seed=None):

        if seed is None:
            seed = np.random.randint(10e8)

        _, data_source = parallel_read(
            dataset_source.data_sources,
            reader_class=dataset_source.reader,
            num_epochs=num_epochs,
            num_readers=1,
            shuffle=False,
            capacity=common_queue_capacity,
            min_after_dequeue=common_queue_min,
            seed=seed)

        data_target = ""
        if dataset_target is not None:
            _, data_target = parallel_read(
                dataset_target.data_sources,
                reader_class=dataset_target.reader,
                num_epochs=num_epochs,
                num_readers=1,
                shuffle=False,
                capacity=common_queue_capacity,
                min_after_dequeue=common_queue_min,
                seed=seed)

        # Optionally shuffle the data
        if shuffle:
            shuffle_queue = tf.RandomShuffleQueue(
                capacity=common_queue_capacity,
                min_after_dequeue=common_queue_min,
                dtypes=[tf.string, tf.string],
                seed=seed)
            enqueue_ops = [shuffle_queue.enqueue([data_source, data_target])]
            tf.train.add_queue_runner(
                tf.train.QueueRunner(shuffle_queue, enqueue_ops))
            data_source, data_target = shuffle_queue.dequeue()

        # Decode source items
        items = dataset_source.decoder.list_items()
        tensors = dataset_source.decoder.decode(data_source, items)

        if dataset_target is not None:
            # Decode target items
            items2 = dataset_target.decoder.list_items()
            tensors2 = dataset_target.decoder.decode(data_target, items2)

            # Merge items and results
            items = items + items2
            tensors = tensors + tensors2

        super(ParallelDatasetProvider, self).__init__(items_to_tensors=dict(zip(items, tensors)),
                                                      num_samples=dataset_source.num_samples)

Example #3

0

Show file

  def __init__(self, dataset, num_readers=1, shuffle=True, num_epochs=None,
               common_queue_capacity=256, common_queue_min=128):
    """Creates a DatasetDataProvider.

    Args:
      dataset: An instance of the Dataset class.
      num_readers: The number of parallel readers to use.
      shuffle: Whether to shuffle the data sources and common queue when
        reading.
      num_epochs: The number of times each data source is read. If left as None,
        the data will be cycled through indefinitely.
      common_queue_capacity: The capacity of the common queue.
      common_queue_min: The minimum number of elements in the common queue after
        a dequeue.
    """
    _, data = parallel_reader.parallel_read(
        dataset.data_sources,
        reader_class=dataset.reader,
        num_epochs=num_epochs,
        num_readers=num_readers,
        shuffle=shuffle,
        capacity=common_queue_capacity,
        min_after_dequeue=common_queue_min)

    items = dataset.decoder.list_items()
    tensors = dataset.decoder.decode(data, items)

    super(DatasetDataProvider, self).__init__(
        items_to_tensors=dict(zip(items, tensors)),
        num_samples=dataset.num_samples)

Example #4

0

Show file

File: data_providers.py Project: ubaidsayyed54/polyaxon

    def __init__(self, dataset, num_readers=1, reader_kwargs=None, shuffle=True, num_epochs=None,
                 common_queue_capacity=256, common_queue_min=128, record_key='__record_key__',
                 seed=None, scope=None):
        key, data = parallel_read(
            dataset.data_sources,
            reader_class=dataset.reader,
            num_epochs=num_epochs,
            num_readers=num_readers,
            reader_kwargs=reader_kwargs,
            shuffle=shuffle,
            capacity=common_queue_capacity,
            min_after_dequeue=common_queue_min,
            seed=seed,
            scope=scope)

        items = dataset.decoder.list_items()
        tensors = dataset.decoder.decode(data, items)

        if record_key in items:
            raise ValueError('The item name used for `record_key` cannot also be '
                             'used for a dataset item: %s', record_key)
        # items.append(record_key)
        # tensors.append(key)

        super(DatasetDataProvider, self).__init__(items_to_tensors=dict(zip(items, tensors)),
                                                  num_samples=dataset.num_samples)

Example #5

0

Show file

File: dataset_data_provider.py Project: zmk520/tensorflow

    def __init__(self,
                 dataset,
                 num_readers=1,
                 reader_kwargs=None,
                 shuffle=True,
                 num_epochs=None,
                 common_queue_capacity=256,
                 common_queue_min=128,
                 record_key='record_key',
                 seed=None,
                 scope=None):
        """Creates a DatasetDataProvider.
    Note: if `num_epochs` is not `None`,  local counter `epochs` will be created
    by relevant function. Use `local_variables_initializer()` to initialize
    local variables.
    Args:
      dataset: An instance of the Dataset class.
      num_readers: The number of parallel readers to use.
      reader_kwargs: An optional dict of kwargs for the reader.
      shuffle: Whether to shuffle the data sources and common queue when
        reading.
      num_epochs: The number of times each data source is read. If left as None,
        the data will be cycled through indefinitely.
      common_queue_capacity: The capacity of the common queue.
      common_queue_min: The minimum number of elements in the common queue after
        a dequeue.
      record_key: The item name to use for the dataset record keys in the
        provided tensors.
      seed: The seed to use if shuffling.
      scope: Optional name scope for the ops.
    Raises:
      ValueError: If `record_key` matches one of the items in the dataset.
    """
        key, data = parallel_reader.parallel_read(
            dataset.data_sources,
            reader_class=dataset.reader,
            num_epochs=num_epochs,
            num_readers=num_readers,
            reader_kwargs=reader_kwargs,
            shuffle=shuffle,
            capacity=common_queue_capacity,
            min_after_dequeue=common_queue_min,
            seed=seed,
            scope=scope)

        items = dataset.decoder.list_items()
        tensors = dataset.decoder.decode(data, items)

        if record_key in items:
            raise ValueError(
                'The item name used for `record_key` cannot also be '
                'used for a dataset item: %s', record_key)
        items.append(record_key)
        tensors.append(key)

        super(DatasetDataProvider,
              self).__init__(items_to_tensors=dict(zip(items, tensors)),
                             num_samples=dataset.num_samples)

Example #6

0

Show file

File: dataset_data_provider.py Project: Kongsea/tensorflow

  def __init__(self,
               dataset,
               num_readers=1,
               reader_kwargs=None,
               shuffle=True,
               num_epochs=None,
               common_queue_capacity=256,
               common_queue_min=128,
               record_key='record_key',
               seed=None,
               scope=None):
    """Creates a DatasetDataProvider.
    Note: if `num_epochs` is not `None`,  local counter `epochs` will be created
    by relevant function. Use `local_variables_initializer()` to initialize
    local variables.
    Args:
      dataset: An instance of the Dataset class.
      num_readers: The number of parallel readers to use.
      reader_kwargs: An optional dict of kwargs for the reader.
      shuffle: Whether to shuffle the data sources and common queue when
        reading.
      num_epochs: The number of times each data source is read. If left as None,
        the data will be cycled through indefinitely.
      common_queue_capacity: The capacity of the common queue.
      common_queue_min: The minimum number of elements in the common queue after
        a dequeue.
      record_key: The item name to use for the dataset record keys in the
        provided tensors.
      seed: The seed to use if shuffling.
      scope: Optional name scope for the ops.
    Raises:
      ValueError: If `record_key` matches one of the items in the dataset.
    """
    key, data = parallel_reader.parallel_read(
        dataset.data_sources,
        reader_class=dataset.reader,
        num_epochs=num_epochs,
        num_readers=num_readers,
        reader_kwargs=reader_kwargs,
        shuffle=shuffle,
        capacity=common_queue_capacity,
        min_after_dequeue=common_queue_min,
        seed=seed,
        scope=scope)

    items = dataset.decoder.list_items()
    tensors = dataset.decoder.decode(data, items)

    if record_key in items:
      raise ValueError('The item name used for `record_key` cannot also be '
                       'used for a dataset item: %s', record_key)
    items.append(record_key)
    tensors.append(key)

    super(DatasetDataProvider, self).__init__(
        items_to_tensors=dict(zip(items, tensors)),
        num_samples=dataset.num_samples)

Example #7

0

Show file

    def __init__(self,
                 dataset,
                 num_readers=1,
                 shuffle=True,
                 num_epochs=None,
                 common_queue_capacity=256,
                 common_queue_min=128,
                 bgr_flips=None):
        """Creates a DatasetDataProvider.

    Args:
      dataset: An instance of the Dataset class.
      num_readers: The number of parallel readers to use.
      shuffle: Whether to shuffle the data sources and common queue when
        reading.
      num_epochs: The number of times each data source is read. If left as None,
        the data will be cycled through indefinitely.
      common_queue_capacity: The capacity of the common queue.
      common_queue_min: The minimum number of elements in the common queue after
        a dequeue.
    """
        self.num_channels_stream = []
        if bgr_flips is not None:
            bgr_flips = bgr_flips.split(',')
        img_str, label = parallel_reader.parallel_read(
            dataset.data_sources,
            reader_class=dataset.reader,
            num_epochs=num_epochs,
            num_readers=num_readers,
            shuffle=shuffle,
            capacity=common_queue_capacity,
            min_after_dequeue=common_queue_min)

        items = dataset.decoder.list_items()
        imgs = dataset.decoder.decode(img_str, items)
        num_streams = len(imgs[0])
        final_imgs = []
        for sid in range(num_streams):
            self.num_channels_stream.append(
                imgs[0][sid].get_shape().as_list()[-1])
            img_stream = []
            for bid in range(len(imgs)):
                img_stream.append(imgs[bid][sid])
            img = tf.pack(img_stream)
            if bgr_flips[sid] == 'True':
                logging.info('BGR flipping stream %d' % sid)
                img = tf.reverse(img, [False, False, False, True])
            final_imgs.append(img)

        img = tf.concat(3, final_imgs)
        tensors = [img, label]

        super(DatasetDataProvider,
              self).__init__(items_to_tensors=dict(zip(items, tensors)),
                             num_samples=dataset.num_samples)

Example #8

0

Show file

File: featuredDataProvider.py Project: xysmlx/seq2seq

  def __init__(self,
               dataset,
               num_readers=1,
               reader_kwargs=None,
               shuffle=True,
               num_epochs=None,
               common_queue_capacity=256,
               common_queue_min=128,
               record_key='record_key',
               seed=None,
               scope=None):
    """Creates a DatasetDataProvider.

    Args:
      dataset: An instance of the Dataset class.
      num_readers: The number of parallel readers to use.
      reader_kwargs: An optional dict of kwargs for the reader.
      shuffle: Whether to shuffle the data sources and common queue when
        reading.
      num_epochs: The number of times each data source is read. If left as None,
        the data will be cycled through indefinitely.
      common_queue_capacity: The capacity of the common queue.
      common_queue_min: The minimum number of elements in the common queue after
        a dequeue.
      record_key: The item name to use for the dataset record keys in the
        provided tensors.
      seed: The seed to use if shuffling.
      scope: Optional name scope for the ops.
    Raises:
      ValueError: If `record_key` matches one of the items in the dataset.
    """
    key, data = parallel_reader.parallel_read(
        dataset.data_sources,
        reader_class=dataset.reader,
        num_epochs=num_epochs,
        num_readers=num_readers,
        reader_kwargs=reader_kwargs,
        shuffle=shuffle,
        capacity=common_queue_capacity,
        min_after_dequeue=common_queue_min,
        seed=seed,
        scope=scope)

    items = dataset.decoder.list_items()
    tensors = dataset.decoder.decode(data, items)

    super(FeaturedDataProvider, self).__init__(
        items_to_tensors=dict(zip(items, tensors)),
        num_samples=dataset.num_samples)

Example #9

0

Show file

File: dataset_data_provider.py Project: wanjinchang/ActionVLAD

  def __init__(self, dataset, num_readers=1, shuffle=True, num_epochs=None,
               common_queue_capacity=256, common_queue_min=128,
               bgr_flips=None):
    """Creates a DatasetDataProvider.

    Args:
      dataset: An instance of the Dataset class.
      num_readers: The number of parallel readers to use.
      shuffle: Whether to shuffle the data sources and common queue when
        reading.
      num_epochs: The number of times each data source is read. If left as None,
        the data will be cycled through indefinitely.
      common_queue_capacity: The capacity of the common queue.
      common_queue_min: The minimum number of elements in the common queue after
        a dequeue.
    """
    self.num_channels_stream = []
    if bgr_flips is not None:
      bgr_flips = bgr_flips.split(',')
    img_str, label = parallel_reader.parallel_read(
        dataset.data_sources,
        reader_class=dataset.reader,
        num_epochs=num_epochs,
        num_readers=num_readers,
        shuffle=shuffle,
        capacity=common_queue_capacity,
        min_after_dequeue=common_queue_min)

    items = dataset.decoder.list_items()
    imgs = dataset.decoder.decode(img_str, items)
    num_streams = len(imgs[0])
    final_imgs = []
    for sid in range(num_streams):
      self.num_channels_stream.append(imgs[0][sid].get_shape().as_list()[-1])
      img_stream = []
      for bid in range(len(imgs)):
        img_stream.append(imgs[bid][sid])
      img = tf.pack(img_stream)
      if bgr_flips[sid] == 'True':
        logging.info('BGR flipping stream %d' % sid)
        img = tf.reverse(img, [False, False, False, True])
      final_imgs.append(img)

    img = tf.concat(3, final_imgs)
    tensors = [img, label]

    super(DatasetDataProvider, self).__init__(
        items_to_tensors=dict(zip(items, tensors)),
        num_samples=dataset.num_samples)

Example #10

0

Show file

File: io_parse.py Project: julykid/youTube-8m

def main():
  reader = tf.TFRecordReader
  data_sources = ["traineh.tfrecord"]
  _, data = parallel_reader.parallel_read(
      data_sources,
      reader_class=reader,
      num_epochs=1,
      num_readers=1,
      shuffle=False,
      capacity=256,
      min_after_dequeue=1)

  context_features, sequence_features = parsing_ops.parse_single_sequence_example(data, context_features={
      'video_id': tf.VarLenFeature(tf.string),
      'labels': tf.VarLenFeature(tf.int64),
    }, sequence_features={
      'inc3': tf.FixedLenSequenceFeature(1, tf.string)
    }, example_name="")

  with tf.Session() as sess:
    sess.run(tf.initialize_local_variables())
    sess.run(tf.initialize_all_variables())
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    try:
      while not coord.should_stop():
        meta = sess.run(context_features)
        vid = meta['video_id'].values[0]
        labels = meta['labels'].values

        inc3_fea = sess.run(sequence_features)['inc3']
        frame_feas = []
        for r in inc3_fea:
          v = np.fromstring(r[0], dtype=np.uint8)
          frame_feas.append(v[None, :])
        frame_feas = np.vstack(frame_feas)
        print(vid, labels)
        print(frame_feas.shape)
        # Do something here
    except tf.errors.OutOfRangeError:
      print('Finished extracting.')
    finally:
      coord.request_stop()
      coord.join(threads)

Example #11

0

Show file

File: parallel_reader_test.py Project: 1000sprites/tensorflow

  def testTFRecordReader(self):
    with self.test_session():
      self._tfrecord_paths = test_utils.create_tfrecord_files(
          self.get_temp_dir(), num_files=3)

    key, value = parallel_reader.parallel_read(
        self._tfrecord_paths, reader_class=io_ops.TFRecordReader, num_readers=3)

    sv = supervisor.Supervisor(logdir=self.get_temp_dir())
    with sv.prepare_or_wait_for_session() as sess:
      sv.start_queue_runners(sess)

      flowers = 0
      num_reads = 100
      for _ in range(num_reads):
        current_key, _ = sess.run([key, value])
        if 'flowers' in str(current_key):
          flowers += 1
      self.assertGreater(flowers, 0)
      self.assertEquals(flowers, num_reads)

Example #12

0

Show file

    def testTFRecordReader(self):
        with self.test_session():
            self._tfrecord_paths = test_utils.create_tfrecord_files(
                self.get_temp_dir(), num_files=3)

        key, value = parallel_reader.parallel_read(
            self._tfrecord_paths,
            reader_class=io_ops.TFRecordReader,
            num_readers=3)

        sv = supervisor.Supervisor(logdir=self.get_temp_dir())
        with sv.managed_session() as sess:

            flowers = 0
            num_reads = 100
            for _ in range(num_reads):
                current_key, _ = sess.run([key, value])
                if 'flowers' in str(current_key):
                    flowers += 1
            self.assertGreater(flowers, 0)
            self.assertEquals(flowers, num_reads)

Example #13

0

Show file

    def _get_tensor_and_example(
        self,
        mode: tf.estimator.ModeKeys,
        shuffle: bool = False,
        num_epochs: Optional[int] = None
    ) -> Tuple[Dict[bytes, tf.Tensor], bytes]:
        """Read and decode the serialized tf.Example into tensors.

    Args:
      mode: One of tf.estimator.ModeKeys {TRAIN,EVAL,INFER}.
      shuffle: Whether to shuffle the input.
      num_epochs: Number of times a tf.Example will be visited in generating the
        input. If set to None, each Example will be cycled indefinitely.

    Returns:
      Tuple with:
      A dictionary that maps tensorflow.Example feature names to tensors.
      serialized_example: bytes, a serialized example.
    """
        dataset = self._data[mode]
        if mode == tf.estimator.ModeKeys.INFER:
            serialized_example = tf.placeholder(
                dtype=tf.string, shape=[], name='input_serialized_examples')
        else:
            _, serialized_example = parallel_reader.parallel_read(
                dataset.data_sources,
                reader_class=dataset.reader,
                num_epochs=num_epochs,
                num_readers=self._num_readers,
                shuffle=shuffle,
                capacity=self._queue_capacity,
                min_after_dequeue=self._queue_min)
        items = dataset.decoder.list_items()
        tensors = dataset.decoder.decode(serialized_example, items)

        return dict(zip(items, tensors)), serialized_example

Example #14

0

Show file

File: triple_data_provider.py Project: kavigupta/seq2struct

    def __init__(self,
                 dataset1,
                 dataset2,
                 schemas=None,
                 shuffle=True,
                 num_epochs=None,
                 common_queue_capacity=4096,
                 common_queue_min=1024,
                 seed=None):

        if seed is None:
            seed = np.random.randint(10e8)

        _, data_source = parallel_reader.parallel_read(
            dataset1.data_sources,
            reader_class=dataset1.reader,
            num_epochs=num_epochs,
            num_readers=1,
            shuffle=False,
            capacity=common_queue_capacity,
            min_after_dequeue=common_queue_min,
            seed=seed)

        data_target = ""
        if dataset2 is not None:
            _, data_target = parallel_reader.parallel_read(
                dataset2.data_sources,
                reader_class=dataset2.reader,
                num_epochs=num_epochs,
                num_readers=1,
                shuffle=False,
                capacity=common_queue_capacity,
                min_after_dequeue=common_queue_min,
                seed=seed)

        data_schemas = ""
        print("schemas.data_sources", schemas.data_sources)
        if schemas is not None:
            _, data_schemas = parallel_reader.parallel_read(
                schemas.data_sources,
                reader_class=schemas.reader,
                num_epochs=num_epochs,
                num_readers=1,
                shuffle=False,
                capacity=common_queue_capacity,
                min_after_dequeue=common_queue_min,
                seed=seed)

        # Optionally shuffle the data
        if shuffle:
            shuffle_queue = tf.RandomShuffleQueue(
                capacity=common_queue_capacity,
                min_after_dequeue=common_queue_min,
                dtypes=[tf.string, tf.string, tf.string],
                seed=seed)
            enqueue_ops = []
            enqueue_ops.append(
                shuffle_queue.enqueue([data_source, data_target,
                                       data_schemas]))
            tf.train.add_queue_runner(
                tf.train.QueueRunner(shuffle_queue, enqueue_ops))
            data_source, data_target, data_schemas = shuffle_queue.dequeue()

        # Decode source items
        items = dataset1.decoder.list_items()
        tensors = dataset1.decoder.decode(data_source, items)

        if dataset2 is not None:
            # Decode target items
            items2 = dataset2.decoder.list_items()
            print("items2", items2)
            print("data_target", data_target)
            tensors2 = dataset2.decoder.decode(data_target, items2)

            # Merge items and results
            items = items + items2
            tensors = tensors + tensors2
        if schemas is not None:
            items_schema = schemas.decoder.list_items()
            tensors_schema = schemas.decoder.decode(data_schemas, items_schema)
            print("items_schema", items_schema)
            print("tensor_schema", tensors_schema)
            sess = tf.Session()
            # with tf.Session() as sess:
            #   print (tf.Tensor.eval(tensors_schema[0]))
            #   print (tf.shape(tensors_schema[0]))
            items = items + items_schema
            tensors = tensors + tensors_schema

        super(TripleDataProvider,
              self).__init__(items_to_tensors=dict(zip(items, tensors)),
                             num_samples=dataset1.num_samples)

Example #15

0

Show file

def _convert_Youtube8M_tfrecord_to_numpy(tfrecord_filename):
    '''
        Function:
                _convert_Youtube8M_tfrecord_to_numpy
                i.e. parse each data_component according to example_prototxt
        Input:
                <string> tfrecord_filename
        Output:
                <dictionary> parsed_data
    '''

    reader = tf.TFRecordReader

    _, data = parallel_reader.parallel_read(data_sources=tfrecord_filename,
                                            reader_class=reader,
                                            num_epochs=1,
                                            num_readers=1,
                                            shuffle=False,
                                            capacity=256,
                                            min_after_dequeue=1)

    # build-up fileQueue and exampleQueue for tfrecords.file...
    context_feat, seq_feat = parsing_ops.parse_single_sequence_example(
        data,
        context_features={
            'video_id': tf.VarLenFeature(tf.string),
            'labels': tf.VarLenFeature(tf.int64)
        },
        sequence_features={
            'rgb': tf.FixedLenSequenceFeature([], tf.string),
            'audio': tf.FixedLenSequenceFeature([], tf.string)
        },
        example_name=" ")

    # standard framework for example parsing...
    with tf.Session() as sess:

        #--- initialize variables in tensorflow session ---#
        sess.run(tf.local_variables_initializer())
        sess.run(tf.global_variables_initializer())

        #--- start-up coordinator to manage the QueueRunner threads ---#
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        #--- training operations ---#
        try:
            total_rgb_feat = []
            total_audio_feat = []
            total_label = []

            while not coord.should_stop():

                video_context, video_features = sess.run(
                    (context_feat, seq_feat))

                #--- extract 'video_id' and 'labels' from context features ---#
                video_id = video_context['video_id'].values[0]
                labels = video_context['labels'].values

                #--- one-hot vector for labels ---#
                labels = sess.run(
                    tf.sparse_to_dense(labels, (4716, ),
                                       1,
                                       validate_indices=False))

                #--- extract 'rgb' and 'audio' features from video features ---#
                hex_rgb_feat = video_features['rgb']
                hex_audio_feat = video_features['audio']

                rgb_feat = []
                audio_feat = []

                #--- convert hex data i.e. hex_rgb_feat to numpy.uint8 format ---#
                for ii in range(len(hex_rgb_feat)):
                    single_rgb_feat = np.fromstring(hex_rgb_feat[ii],
                                                    dtype=np.uint8)
                    single_audio_feat = np.fromstring(hex_audio_feat[ii],
                                                      dtype=np.uint8)

                    rgb_feat.append(single_rgb_feat)
                    audio_feat.append(single_audio_feat)

                #--- reshape e.g. [[1,2], [3,4]] -> [1,2; 3,4]
                rgb_feat = np.vstack(rgb_feat)
                audio_feat = np.vstack(audio_feat)

                #--- dequantize the rgb and audio features... ---#
                rgb_feat = _dequantize(rgb_feat, 2, -2)
                audio_feat = _dequantize(audio_feat, 2, -2)

                #--- padding or crop to fixed nframe=300... ---#
                rgb_feat = _frame_padding(input_feat=rgb_feat,
                                          padding_value=0,
                                          target_nframe=300)
                audio_feat = _frame_padding(input_feat=audio_feat,
                                            padding_value=0,
                                            target_nframe=300)

                total_rgb_feat.append(rgb_feat)
                total_audio_feat.append(audio_feat)
                total_label.append(labels)

        except tf.errors.OutOfRangeError:
            print('!All video features have been exported...')
        finally:
            coord.request_stop()
            coord.join(threads=threads)

        return total_rgb_feat, total_audio_feat, total_label

    sess.close()

Example #16

0

Show file

File: parallel_data_provider.py Project: scape1989/biseq2seq

    def __init__(self,
                 dataset1,
                 dataset2,
                 dataset3,
                 shuffle=True,
                 num_epochs=None,
                 common_queue_capacity=4096,
                 common_queue_min=1024,
                 seed=None):

        if seed is None:
            seed = np.random.randint(10e8)

        _, data_source_query = parallel_reader.parallel_read(
            dataset1.data_sources,
            reader_class=dataset1.reader,
            num_epochs=num_epochs,
            num_readers=1,
            shuffle=False,
            capacity=common_queue_capacity,
            min_after_dequeue=common_queue_min,
            seed=seed)

        _, data_source_candidate = parallel_reader.parallel_read(
            dataset2.data_sources,
            reader_class=dataset2.reader,
            num_epochs=num_epochs,
            num_readers=1,
            shuffle=False,
            capacity=common_queue_capacity,
            min_after_dequeue=common_queue_min,
            seed=seed)

        data_target = ""
        if dataset3 is not None:
            _, data_target = parallel_reader.parallel_read(
                dataset3.data_sources,
                reader_class=dataset3.reader,
                num_epochs=num_epochs,
                num_readers=1,
                shuffle=False,
                capacity=common_queue_capacity,
                min_after_dequeue=common_queue_min,
                seed=seed)

        # Optionally shuffle the data
        if shuffle:
            shuffle_queue = tf.RandomShuffleQueue(
                capacity=common_queue_capacity,
                min_after_dequeue=common_queue_min,
                dtypes=[tf.string, tf.string, tf.string],
                seed=seed)
            enqueue_ops = []
            enqueue_ops.append(
                shuffle_queue.enqueue(
                    [data_source_query, data_source_candidate, data_target]))
            tf.train.add_queue_runner(
                tf.train.QueueRunner(shuffle_queue, enqueue_ops))
            data_source_query, data_source_candidate, data_target = shuffle_queue.dequeue(
            )

        # Decode source query items
        items = dataset1.decoder.list_items()
        tensors = dataset1.decoder.decode(data_source_query, items)

        # Decode source candidate items
        items2 = dataset2.decoder.list_items()
        tensors2 = dataset2.decoder.decode(data_source_candidate, items2)
        items = items + items2
        tensors = tensors + tensors2
        if dataset3 is not None:
            # Decode target items
            items3 = dataset3.decoder.list_items()
            tensors3 = dataset3.decoder.decode(data_target, items3)

            items = items + items3
            tensors = tensors + tensors3

        # Merge items and results
        #items = items + items2 + items3
        #tensors = tensors + tensors2 + tensors3
        print("items:{}".format(items))
        print("tensors:{}".format(tensors))

        super(TripleDataProvider,
              self).__init__(items_to_tensors=dict(zip(items, tensors)),
                             num_samples=dataset1.num_samples)