Python TFRecordDatasetの例、tensorflow.contrib.data.TFRecordDataset Pythonの例

コード例 #1

0

ファイルを表示

ファイル: FlowIO.py プロジェクト: luolugithub/VGG-TensorFlow

 def get_batch_data(self):
     '''
     获取 Batch size 数据
     :return:  图像Tensor ， 标签Tensor
     '''
     dataSet = data.TFRecordDataset(self.data)
     dataSet = dataSet.map(self.parse)
     dataSet = dataSet.map(lambda image, label: (self.total_image_norm(
         image, [self.image_w, self.image_h, 3]), label))
     dataSet = dataSet.repeat()
     if self.shuffle:
         dataSet = dataSet.shuffle(1000)
     dataSet = dataSet.batch(self.batch_size)
     iterator = dataSet.make_initializable_iterator()
     image_batch, label_batch = iterator.get_next()
     self.sess.run(tf.local_variables_initializer())
     self.sess.run(iterator.initializer)
     label_batch = tf.reshape(label_batch, [-1, 1])
     return image_batch, label_batch

コード例 #2

0

ファイルを表示

    def launch_tfrecord_dataset(self):
        def __parse(example_proto):
            features = {
                'pic_name_feat': tf.FixedLenFeature([1], tf.string),
                'pic_class_feat': tf.FixedLenFeature([1], tf.string),
                'attr_label_feat': tf.VarLenFeature(tf.float32),
                'img_feat': tf.FixedLenFeature([], tf.string),
            }
            parsed_features = tf.parse_single_example(example_proto, features)

            pic_name = parsed_features['pic_name_feat']
            pic_class = parsed_features['pic_class_feat']
            attr_label = parsed_features['attr_label_feat']
            img = parsed_features['img_feat']

            pic_name = tf.cast(pic_name, dtype=tf.string)
            pic_class = tf.cast(pic_class, dtype=tf.string)
            attr_label = tf.sparse_tensor_to_dense(attr_label)

            # img = tf.decode_raw(img, out_type=tf.uint8)       # 不知道这个解析方法跟img.decode_jpeg有什么区别
            img = tf.image.decode_jpeg(img, channels=3)
            # img = tf.image.per_image_standardization(img)       # 做个图像的归一化
            # img = tf.random_crop(img, (224, 224, 3))            # 固定尺寸的时候才用

            return pic_name, pic_class, attr_label, img

        data_set = cb_data.TFRecordDataset(self.tfrecord_path_list)
        # 这个比较奇怪不知道为什么必须要加个括号。。必须要是个tuple？
        parsed_dataset = (data_set.map(__parse))
        if self.repeat_epoch_num is not None:
            # 指定重复的次数，队列会以这个epoch次数为长度
            parsed_dataset = parsed_dataset.repeat(self.repeat_epoch_num)
        if self.shuffle_buffer_size is not None:
            # 指定shuffle的范围，一般要选比整个数据集的长度大，才能整体打乱
            parsed_dataset = parsed_dataset.shuffle(
                buffer_size=self.shuffle_buffer_size)

        # 由于有变长数组，所以必须要padd，否则用parsed_dataset.batch(batch_size)就可以了
        # 像这个例子里，如果parse函数的返回为多个变量，则padded_shapes需要是一个tuple，每个元素对应该变量在该批次pad到的上限，-1为pad到最长
        # parsed_dataset = parsed_dataset.padded_batch(self.batch_size, padded_shapes=([-1], [-1], [-1], [-1, -1, 3]))
        parsed_dataset = parsed_dataset.batch(self.batch_size)
        return parsed_dataset  # 包含4个字段：pic_name, pic_class, attr_label, img

コード例 #3

0

ファイルを表示

 def __init__(self, split='train',
              modality='fixed_num',
              is_training=True, dummy=False):
     self.file_names = glob(du.get_file_pattern(config.dataset_dir,
                                                config.dataset_name,
                                                split, modality, config.num_shards,
                                                is_training))
     if not dummy:
         self.file_names_placeholder = tf.placeholder(tf.string, shape=[None])
         if is_training:
             parser = train_parser
         elif split != 'tests':
             parser = eval_parser
         else:
             parser = test_parser
         dataset = data.TFRecordDataset(self.file_names_placeholder)
         dataset = dataset.map(parser)
         dataset = dataset.shuffle(config.size_shuffle_buffer)
         self.iterator = dataset.make_initializable_iterator()
     self.unpack_data(dummy, split)

コード例 #4

0

ファイルを表示

ファイル: data_utils.py プロジェクト: sarathknv/keras-mobile-colorizer

def _construct_dataset(record_path, batch_size, sess):
    def parse_record(serialized_example):
        # parse a single record
        features = tf.parse_single_example(
            serialized_example,
            features={
                'image_l':
                tf.FixedLenFeature([IMAGE_SIZE, IMAGE_SIZE, 1], tf.float32),
                'image_ab':
                tf.FixedLenFeature([IMAGE_SIZE, IMAGE_SIZE, 2], tf.float32),
                'image_features':
                tf.FixedLenFeature([
                    1000,
                ], tf.float32)
            })

        l, ab, embed = features['image_l'], features['image_ab'], features[
            'image_features']
        return l, ab, embed

    dataset = tfdata.TFRecordDataset(
        [record_path], 'ZLIB')  # create a Dataset to wrap the TFRecord
    dataset = dataset.map(parse_record,
                          num_threads=2,
                          output_buffer_size=2 *
                          batch_size)  # parse the record
    dataset = dataset.repeat()  # repeat forever
    dataset = dataset.batch(batch_size)  # batch into the required batchsize
    dataset = dataset.shuffle(buffer_size=5)  # shuffle the batches
    iterator = dataset.make_initializable_iterator(
    )  # get an iterator over the dataset

    sess.run(iterator.initializer)  # initialize the iterator
    next_batch = iterator.get_next()  # get the iterator Tensor

    return dataset, next_batch

コード例 #5

0

ファイルを表示

ファイル: mnist_to_tf.py プロジェクト: ISRyuu/ISNNTF


import tensorflow.contrib.data as tdata
from convert_to_tfrecords import parse_function_maker

if __name__ == '__main__':
    tr, v, te = load_from_minst("mnist.pkl.gz")
    convert_to_tfrecords('training', tr, 28, 28, 1)
    convert_to_tfrecords('validation', v, 28, 28, 1)
    convert_to_tfrecords('test', te, 28, 28, 1)
    exit()
    mbs = 10
    file = 'MNIST_GZ/training.tfrecords.gz'
    vfile = 'MNIST_GZ/validation.tfrecords.gz'
    file_placeholder = tf.placeholder(dtype=tf.string)
    dataset = tdata.TFRecordDataset(file_placeholder, compression_type='GZIP')
    dataset = dataset.map(parse_function_maker(784))
    dataset = dataset.batch(mbs)
    # currently there is no option like "allow_smaller_final_batch" in tf.train.batch
    # using the filter is an alternative way.
    dataset = dataset.filter(lambda x, y: tf.equal(tf.shape(y)[0], mbs))
    # iterate the whole dataset once an initiation.
    dataset = dataset.repeat(1)
    iterator = dataset.make_initializable_iterator()
    next_element = iterator.get_next()

    sess = tf.Session()
    sess.run(iterator.initializer, feed_dict={file_placeholder: vfile})

    while True:
        try:

コード例 #6

0

ファイルを表示

def get_dataset(filenames, shape):
    return data.TFRecordDataset(filenames).map(get_example_parser(shape))

コード例 #7

0

ファイルを表示

ファイル: dataset_provider.py プロジェクト: NoicFank/ChestRayXNet

    def get_dataset(self, fname_pattern):

        n_readers = 10

        ## unpythonic way ,just for debug.

        def get_num_tfrecords(tfname):
            n_samples = 0
            for _ in tf.python_io.tf_record_iterator(tfname):
                n_samples += 1
            return n_samples

        def get_file(fname_pattern):
            logging.debug("fname_pattern: %s", fname_pattern)
            if isinstance(fname_pattern, list):
                # if it is list, it should be a filename list
                tfnames = fname_pattern
            else:
                if os.path.splitext(
                        os.path.basename(fname_pattern))[1] == ".tfrecord":
                    # if tfname has .tfrecord extension, it means it is a single filename and has no match pattern
                    tfnames = [fname_pattern]
                else:
                    tfnames = glob.glob(fname_pattern + "_*-of-*.tfrecord")

            if len(tfnames) == 0:
                raise ValueError("Can not find file pattern: %s" %
                                 fname_pattern)

            n_samples = 0
            for fname in tfnames:
                n = get_num_tfrecords(fname)
                logging.info("file: %s, has %d records" % (fname, n))
                n_samples += n

            if n_samples == 0:
                raise ValueError("No records found fname_pattern: %s" %
                                 fname_pattern)

            # self.n_readers = len(tfnames) ##

            return tfnames, n_samples

        def sampling_filter(example):
            # prob_ratio = 0.2
            ##
            # label = example['image/class/label']
            # label = tf.string_to_number(tf.string_split([label], delimiter="").values, tf.float32)  ##
            # label = tf.slice(label, [self.class_id], [1])
            # label = tf.to_int64(label)
            # if label == 1:
            acceptance = tf.logical_or(  ##
                tf.equal(example['label'][0], 1),
                tf.logical_and(
                    tf.equal(example['label'][0], 0),
                    tf.less_equal(tf.random_uniform([], dtype=tf.float32),
                                  self.sampling_ratio)))
            # acceptance = tf.equal(example['label'][0], 0)
            return acceptance

        # def upsampling(example):
        #     acc =

        def _parse_fn(example_proto):  ## the example_proto is what?

            # image_shape = [224, 224, 1]
            ## unpythonic way ,just for debug._

            keys_to_features = {
                'image/encoded':
                tf.FixedLenFeature((), tf.string, default_value=''),
                'image/format':
                tf.FixedLenFeature((), tf.string, default_value='png'),
                'image/class/label':
                tf.FixedLenFeature((), tf.string, default_value='10'),
                ## make sence?? that is m2
            }
            parsed = tf.parse_single_example(example_proto, keys_to_features)
            raw_image = tf.image.decode_image(parsed['image/encoded'],
                                              channels=1)
            # raw_image = tf.Print(raw_image, ["raw image shape: ", tf.shape(raw_image)])
            # image = raw_image[:, :, 0]
            # image = tf.expand_dims(image, -1)
            image = raw_image

            # raw_image = tf.image.random_flip_left_right(raw_image)

            # As for the raw images, we just do a simple reshape to batch it up
            # raw_image = tf.expand_dims(raw_image, 0)
            # raw_image = tf.image.resize_nearest_neighbor(raw_image, [image_shape[0], image_shape[1]])
            # raw_image = tf.squeeze(raw_image)
            # logging.debug("raw_image.get_shape(): %s", raw_image.get_shape())
            image = ImageUtil.aspect_preserving_resize(image,
                                                       self.image_shape[0])
            image = ImageUtil._central_crop([image], self.image_shape[0],
                                            self.image_shape[1])[0]
            # image = tf.Print(image, ["after center crop", tf.shape(image)])

            # logging.info("self.image_shape: %s", image_shape)
            image = tf.to_float(image)
            # tf.Print(image,['image before divide:',image], summarize=5)
            # image = tf.div(image, 255.0) ## divide by
            # tf.Print(image, ['image after divide:', image], summarize=5)

            # total_mean is:126.97351712
            # tf.Print(image,['before process', image])
            image = tf.subtract(image, self.total_mean)
            image = tf.div(image, self.std)
            image.set_shape(self.image_shape)
            # tf.Print(image,['after process', image])

            label = parsed['image/class/label']
            label = tf.string_to_number(
                tf.string_split([label], delimiter="").values, tf.float32)  ##
            # label = tf.Print(label, ["after string_to_number", label])
            if self.class_id != -1:
                # label = tf.reshape(label, shape=[2])
                label = tf.slice(label, [self.class_id], [1])
            else:
                logging.debug("no need to slice")

            label = tf.to_int64(label)
            logging.debug("label.shape: %s", label.get_shape())
            # label = tf.Print(label, ["after slice", label])

            return {'image': image, 'label': label}

        tfnames, n_samples = get_file(fname_pattern)
        dataset = tf_data.TFRecordDataset(tfnames)  ##tf.data.TFRecordDataset

        dataset = dataset.map(_parse_fn, num_parallel_calls=n_readers)
        logging.info("map, %s, %s", dataset.output_types,
                     dataset.output_shapes)

        if self.sampling_ratio < 1.0:
            dataset = dataset.filter(sampling_filter)
        elif self.sampling_ratio > 1.0:
            repeat_num = tf.cast(self.sampling_ratio, tf.int64)
            base_num = tf.cast(1, tf.int64)
            ##we asume the label need to repeat is 1
            oversample_fn = lambda x: tf.cond(tf.equal(x['label'], 1), lambda:
                                              repeat_num, lambda: base_num)
            dataset = dataset.flat_map(lambda x: tf.data.Dataset.from_tensors(
                x).repeat(oversample_fn(x)))

            # dataset = dataset.filter(lambda x: tf.equal(x['label'][0], 0))

        dataset = dataset.repeat(None)
        dataset = dataset.shuffle(buffer_size=self.batch_size *
                                  100)  ##need modify
        dataset = dataset.batch(self.batch_size)

        return tfnames, n_samples, dataset

コード例 #8

0

ファイルを表示

ファイル: read_tfrecord.py プロジェクト: NoicFank/ChestRayXNet

    return tfnames  # , n_samples


def parse_tfrecord(example_proto):
    keys_to_features = {
        'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format': tf.FixedLenFeature((), tf.string, default_value='png'),
        'image/class/label': tf.FixedLenFeature((),
                                                tf.string,
                                                default_value='10'),
        ## make sence?? that is m2
    }
    parsed = tf.parse_single_example(example_proto, keys_to_features)
    raw_image = tf.image.decode_image(parsed['image/encoded'], channels=1)
    image = raw_image
    image = tf.to_float(image)
    label = parsed['image/class/label']
    label = tf.string_to_number(
        tf.string_split([label], delimiter="").values, tf.float32)
    label = tf.to_int64(label)
    return {'image': image, 'label': label}


if __name__ == "__main__":
    tf_names = get_file('data/tfrecords/train ')
    dataset = tf_data.TFRecordDataset(tf_names)  ##tf.data.TFRecordDataset
    dataset = dataset.map(parse_tfrecord, num_parallel_calls=1)
    image = dataset['image']
    print image

コード例 #9

0

ファイルを表示

ファイル: vis.py プロジェクト: jackd/crohme

import tensorflow as tf
import numpy as np
import tensorflow.contrib.data as data
# from crohme.data import DatasetManager
from create import records_filename
import matplotlib.pyplot as plt
from dataset import get_example_parser

output_shape = (256, 64)

name = 'train2011'
parse_image = get_example_parser(output_shape)

tex_dataset = data.TFRecordDataset(
    [records_filename(name, 'tex', output_shape)]).map(parse_image)
hme_dataset = data.TFRecordDataset(
    [records_filename(name, 'hme', output_shape)]).map(parse_image)

combined = data.Dataset.zip((tex_dataset, hme_dataset))

batch_size = 4
tex_tf, hme_tf = combined.batch(batch_size).make_one_shot_iterator().get_next()

with tf.Session() as sess:
    tex, hme = sess.run([tex_tf, hme_tf])


for t, h in zip(tex, hme):
    fig, (ax0, ax1) = plt.subplots(2, 1)
    ax0.imshow(t)
    ax1.imshow(h)