예제 #1
0
 def __init__(self,file_names,batch_size):
     '''
     Haven't seen the necessity to initialize spt.Dataflow
     Initialize for tf.data.Dataset
     '''
     self.dataset = Dataset.from_generator(AlteredDataFlow.input_fn,(tf.uint8))
     self.dataset = self.dataset.batch(batch_size)
예제 #2
0
  def __call__(self, params):
    """Input function which provides a single batch for train or eval."""
    # Retrieves the batch size for the current shard. The # of shards is
    # computed according to the input pipeline deployment. See
    # `tf.contrib.tpu.RunConfig` for details.
    batch_size = params['batch_size']
    logging.info('call ToyModelInput() with batch size {}'.format(batch_size))

    ds = Dataset.from_tensor_slices((self._images, self._labels)).repeat()

    dataset = ds.batch(batch_size, drop_remainder=True).prefetch(2)

    return dataset
예제 #3
0
def create_multi_increasing_dataset(value,
                                    shapes=[[1, 32, 32, 4], [1, 8]],
                                    dtypes=[np.float32, np.float32],
                                    repeat=True):
  def _get_one_input(data):
    result = []
    for i in range(len(shapes)):
      result.append(
          math_ops.cast(
              gen_array_ops.broadcast_to(data, shape=shapes[i]),
              dtype=dtypes[i]))
    return result

  dataset = Dataset.range(value).map(_get_one_input)
  if repeat:
    dataset = dataset.repeat()
  return dataset
예제 #4
0
    def _get_dataset(self, x, y, name_scope='data'):
        '''
        get dataset
        :param name_scope:
        :param x:
        :param y:
        :return: train_init_op, train_next_op
        '''
        batch_size = self._batch_size
        with tf.name_scope(name=name_scope):
            dataset = Dataset.from_tensor_slices((x, y))
            dataset = dataset.shuffle(buffer_size=batch_size * 10) \
                .repeat() \
                .map(map_func=map_fn, num_parallel_calls=8) \
                .batch(batch_size=batch_size) \
                .prefetch(buffer_size=batch_size * 10)

            init_op = dataset.make_initializable_iterator(shared_name='init_op')
            next_op = init_op.get_next(name='next_op')
        return init_op, next_op
    def __init__(self,
                 mode,
                 batch_size,
                 num_classes,
                 shuffle=True,
                 buffer_size=1000):
        """Create a new ImageDataGenerator.
        Recieves a path string to a text file, which consists of many lines,
        where each line has first a path string to an image and seperated by
        a space an integer, referring to the class number. Using this data,
        this class will create TensrFlow datasets, that can be used to train
        e.g. a convolutional neural network.
        Args:
            mode: Either 'training' or 'inference'. Depending on this value,
                different parsing functions will be used.
            batch_size: Number of images per batch.
            num_classes: Number of classes in the dataset.
            shuffle: Wether or not to shuffle the data in the dataset and the
                initial file list.
            buffer_size: Number of images used as buffer for TensorFlows
                shuffling of the dataset.
        Raises:
            ValueError: If an invalid mode is passed.
        """

        self.num_classes = num_classes
        self.duplicated_tag_list = []

        fp = open("/media/jiwunghyun/DATA/nus-wide/Concepts81.txt", 'r')
        tag_list = []
        for i in range(81):
            a = fp.readline().split('\n')[0]
            tag_list.append(a)
        fp.close()

        fp2 = open(
            "/media/jiwunghyun/DATA/nus-wide/NUS_WID_Tags/TagList1k.txt", 'r')
        tag1000_list = []
        for i in range(1000):
            a = fp2.readline().split('\n')[0]
            tag1000_list.append(a)
        fp2.close()

        for i in range(len(tag1000_list)):
            for j in range(len(tag_list)):
                if tag1000_list[i] == tag_list[j]:
                    self.duplicated_tag_list.append(i)

        # retrieve the data from the text file
        if mode == 'training':
            self._read_train_txt_file()
        elif mode == 'inference':
            self._read_test_txt_file()

        # number of samples in the dataset
        self.data_size = len(self.labels)

        # initial shuffling of the file and label lists (together!)
        if shuffle:
            self._shuffle_lists()

        # convert lists to TF tensor
        self.img_paths = convert_to_tensor(self.img_paths, dtype=dtypes.string)
        self.labels = convert_to_tensor(self.labels, dtype=dtypes.int32)
        self.noisy_tags = convert_to_tensor(self.noisy_tags,
                                            dtype=dtypes.int32)

        # create dataset
        data = Dataset.from_tensor_slices(
            (self.img_paths, self.labels, self.noisy_tags))

        # distinguish between train/infer. when calling the parsing functions
        if mode == 'training':
            data = data.map(self._parse_function_train)

        elif mode == 'inference':
            data = data.map(self._parse_function_inference)

        else:
            raise ValueError("Invalid mode '%s'." % (mode))

        # shuffle the first `buffer_size` elements of the dataset
        if shuffle:
            data = data.shuffle(buffer_size=buffer_size)

        # create a new dataset with batches of images
        data = data.batch(batch_size)

        self.data = data