Python ImageReaderの例、build_data.ImageReader Pythonの例

コード例 #1

0

ファイルを表示

ファイル: build_cityscapes_data.py プロジェクト: zzzzzzrc/models

def _convert_dataset(dataset_split):
    """Converts the specified dataset split to TFRecord format.

  Args:
    dataset_split: The dataset split (e.g., train, val).

  Raises:
    RuntimeError: If loaded image and label have different shape, or if the
      image file with specified postfix could not be found.
  """
    image_files = _get_files('image', dataset_split)
    label_files = _get_files('label', dataset_split)

    num_images = len(image_files)
    num_labels = len(label_files)
    num_per_shard = int(math.ceil(num_images / _NUM_SHARDS))

    if num_images != num_labels:
        raise RuntimeError(
            "The number of images and labels doesn't match: {} {}".format(
                num_images, num_labels))

    image_reader = build_data.ImageReader('png', channels=3)
    label_reader = build_data.ImageReader('png', channels=1)

    for shard_id in range(_NUM_SHARDS):
        shard_filename = '%s_fine-%05d-of-%05d.tfrecord' % (
            dataset_split, shard_id, _NUM_SHARDS)
        output_filename = os.path.join(FLAGS.output_dir, shard_filename)
        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            start_idx = shard_id * num_per_shard
            end_idx = min((shard_id + 1) * num_per_shard, num_images)
            for i in range(start_idx, end_idx):
                sys.stdout.write('\r>> Converting image %d/%d shard %d' %
                                 (i + 1, num_images, shard_id))
                sys.stdout.flush()
                # Read the image.
                image_data = tf.gfile.FastGFile(image_files[i], 'rb').read()
                height, width = image_reader.read_image_dims(image_data)
                # Read the semantic segmentation annotation.
                seg_data = tf.gfile.FastGFile(label_files[i], 'rb').read()
                seg_height, seg_width = label_reader.read_image_dims(seg_data)
                if height != seg_height or width != seg_width:
                    raise RuntimeError(
                        'Shape mismatched between image and label.')
                # Convert to tf example.
                re_match = _IMAGE_FILENAME_RE.search(image_files[i])
                if re_match is None:
                    raise RuntimeError('Invalid image filename: ' +
                                       image_files[i])
                filename = os.path.basename(re_match.group(1))
                example = build_data.image_seg_to_tfexample(
                    image_data, filename, height, width, seg_data)
                tfrecord_writer.write(example.SerializeToString())
        sys.stdout.write('\n')
        sys.stdout.flush()

コード例 #2

0

ファイルを表示

def _convert_dataset(dataset_split, dataset_dir, dataset_label_dir):
    """ Converts the ADE20k dataset into into tfrecord format (SSTable).

  Args:
    dataset_split: Dataset split (e.g., train, val).
    dataset_dir: Dir in which the dataset locates.
    dataset_label_dir: Dir in which the annotations locates.

  Raises:
    RuntimeError: If loaded image and label have different shape.
  """

    img_names = tf.gfile.Glob(os.path.join(dataset_dir, '*.jpg'))
    random.shuffle(img_names)
    seg_names = []
    for f in img_names:
        # get the filename without the extension
        basename = os.path.basename(f).split(".")[0]
        # cover its corresponding *_seg.png
        seg = os.path.join(dataset_label_dir, basename + '.png')
        seg_names.append(seg)

    num_images = len(img_names)
    num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))

    image_reader = build_data.ImageReader('jpeg', channels=3)
    label_reader = build_data.ImageReader('png', channels=1)

    for shard_id in range(_NUM_SHARDS):
        output_filename = os.path.join(
            FLAGS.output_dir, '%s-%05d-of-%05d.tfrecord' %
            (dataset_split, shard_id, _NUM_SHARDS))
        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            start_idx = shard_id * num_per_shard
            end_idx = min((shard_id + 1) * num_per_shard, num_images)
            for i in range(start_idx, end_idx):
                sys.stdout.write('\r>> Converting image %d/%d shard %d' %
                                 (i + 1, num_images, shard_id))
                sys.stdout.flush()
                # Read the image.
                image_filename = img_names[i]
                image_data = tf.gfile.FastGFile(image_filename, 'rb').read()
                height, width = image_reader.read_image_dims(image_data)
                # Read the semantic segmentation annotation.
                seg_filename = seg_names[i]
                seg_data = tf.gfile.FastGFile(seg_filename, 'rb').read()
                seg_height, seg_width = label_reader.read_image_dims(seg_data)
                if height != seg_height or width != seg_width:
                    raise RuntimeError(
                        'Shape mismatched between image and label.')
                # Convert to tf example.
                example = build_data.image_seg_to_tfexample(
                    image_data, img_names[i], height, width, seg_data)
                tfrecord_writer.write(example.SerializeToString())
        sys.stdout.write('\n')
        sys.stdout.flush()

コード例 #3

0

ファイルを表示

ファイル: build_pqr_data.py プロジェクト: rajatverma396/deeplaaptraning

def _convert_dataset(dataset_split):
    """Converts the specified dataset split to TFRecord format.

  Args:
    dataset_split: The dataset split (e.g., train, test).

  Raises:
    RuntimeError: If loaded image and label have different shape.
  """
    dataset = os.path.basename(dataset_split)[:-4]
    sys.stdout.write('Processing ' + dataset)
    filenames = [x.strip('\n') for x in open(dataset_split, 'r')]
    num_images = len(filenames)
    num_per_shard = int(math.ceil(num_images / _NUM_SHARDS))

    image_reader = build_data.ImageReader('jpeg', channels=3)
    label_reader = build_data.ImageReader('jpeg', channels=1)

    for shard_id in range(_NUM_SHARDS):
        output_filename = os.path.join(
            FLAGS.output_dir,
            '%s-%05d-of-%05d.tfrecord' % (dataset, shard_id, _NUM_SHARDS))
        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            start_idx = shard_id * num_per_shard
            end_idx = min((shard_id + 1) * num_per_shard, num_images)
            for i in range(start_idx, end_idx):
                sys.stdout.write('\r>> Converting image %d/%d shard %d' %
                                 (i + 1, len(filenames), shard_id))
                sys.stdout.flush()
                # Read the image.
                image_filename = os.path.join(
                    #MH:
                    #FLAGS.image_folder, filenames[i] + '.' + FLAGS.image_format)
                    FLAGS.image_folder,
                    filenames[i] + '.jpg')
                #END MH
                image_data = tf.gfile.GFile(image_filename, 'rb').read()
                height, width = image_reader.read_image_dims(image_data)
                # Read the semantic segmentation annotation.
                seg_filename = os.path.join(
                    FLAGS.semantic_segmentation_folder,
                    #MH:
                    #filenames[i] + '.' + FLAGS.label_format)
                    filenames[i] + '.jpg')
                #END MH
                seg_data = tf.gfile.GFile(seg_filename, 'rb').read()
                seg_height, seg_width = label_reader.read_image_dims(seg_data)
                if height != seg_height or width != seg_width:
                    raise RuntimeError(
                        'Shape mismatched between image and label.')
                # Convert to tf example.
                example = build_data.image_seg_to_tfexample(
                    image_data, filenames[i], height, width, seg_data)
                tfrecord_writer.write(example.SerializeToString())
        sys.stdout.write('\n')
        sys.stdout.flush()

コード例 #4

0

ファイルを表示

def create_tfrecords(src_files, file_ids, n_shards, sub_seq, use_tif,
                     output_dir):
    if use_tif:
        image_reader = build_data.ImageReader('png', channels=1)
    else:
        image_reader = build_data.ImageReader('jpeg', channels=1)

    label_reader = build_data.ImageReader('png', channels=1)

    n_images = len(src_files)
    n_per_shard = int(math.ceil(n_images / float(n_shards)))

    os.makedirs(output_dir, exist_ok=True)

    print('Creating {} shards with {} images ({} per shard)'.format(
        n_shards, n_images, n_per_shard))

    for shard_id in range(n_shards):

        output_file_path = os.path.join(
            output_dir,
            '{:s}-{:05d}-of-{:05d}.tfrecord'.format(sub_seq, shard_id,
                                                    n_shards))

        with tf.python_io.TFRecordWriter(output_file_path) as tfrecord_writer:
            start_idx = shard_id * n_per_shard
            end_idx = min((shard_id + 1) * n_per_shard, n_images)

            for img_id in tqdm(range(start_idx, end_idx), ncols=50):

                img_src_file = src_files[img_id]
                img_src_file_id, seq_name, seg_src_path, img_src_path = file_ids[
                    img_src_file]
                image_data = tf.gfile.FastGFile(img_src_path, 'rb').read()
                height, width = image_reader.read_image_dims(image_data)

                if seg_src_path is not None:
                    seg_data = tf.gfile.FastGFile(seg_src_path, 'rb').read()
                    seg_height, seg_width = label_reader.read_image_dims(
                        seg_data)

                    if height != seg_height or width != seg_width:
                        raise RuntimeError(
                            'Shape mismatch found between image and label')
                else:
                    seg_data = None

                # Convert to tf example.
                example = build_data.image_seg_to_tfexample(
                    image_data, img_src_path, height, width, seg_data)

                tfrecord_writer.write(example.SerializeToString())

コード例 #5

0

ファイルを表示

ファイル: build_voc2012_data.py プロジェクト: haydenh96/Tenses

def _convert_dataset(dataset_split):
    dataset = os.path.basename(dataset_split)[:-4]
    sys.stdout.write('Processing ' + dataset)
    filenames = [x.strip('\n') for x in open(dataset_split, 'r')]
    print(filenames)
    num_images = len(filenames)
    num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))

    image_reader = build_data.ImageReader('jpeg', channels=3)

    label_reader = build_data.ImageReader('jpg', channels=1)

    for shard_id in range(_NUM_SHARDS):
        output_filename = os.path.join(
            FLAGS.output_dir,
            '%s-%05d-of-%05d.tfrecord' % (dataset, shard_id, _NUM_SHARDS))
        
       
        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            start_idx = shard_id * num_per_shard
            end_idx = min((shard_id + 1) * num_per_shard, num_images)
        for i in range(start_idx, end_idx):
            sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
                i + 1, len(filenames), shard_id))
            print("hell")
            sys.stdout.flush()
            # Read the image.
            image_filename = os.path.join(
                FLAGS.image_folder, filenames[i])# + '.' + FLAGS.image_format)
                
            print(image_filename)
            image_data = tf.gfile.GFile(image_filename, 'rb').read()
            print("what")
            height, width = image_reader.read_image_dims(image_data)
            print("the")
            # Read the semantic segmentation annotation.
            seg_filename = os.path.join(
                FLAGS.semantic_segmentation_folder,
                filenames[i])# + '.' + FLAGS.label_format)
            seg_data = tf.gfile.GFile(seg_filename, 'rb').read()
            seg_height, seg_width = label_reader.read_image_dims(seg_data)
            if height != seg_height or width != seg_width:
                raise RuntimeError('Shape mismatched between image and label.')
            # Convert to tf example.
            example = build_data.image_seg_to_tfexample(
                image_data, filenames[i], height, width, seg_data)
            B = tf.io.TFRecordWriter('./tfrecord/storage.tfrecord')
            
            B.write(example.SerializeToString())
            print("me")
        sys.stdout.write('\n')
        sys.stdout.flush()

コード例 #6

0

ファイルを表示

def _convert_dataset(dataset_split):
  """Converts the specified dataset split to TFRecord format.

  Args:
    dataset_split: The dataset split (e.g., train, test).

  Raises:
    RuntimeError: If loaded image and label have different shape.
  """

  # val.txt
  dataset = os.path.basename(dataset_split)[:-4]
  input('dataset: %s' % dataset)
  sys.stdout.write('Processing ' + dataset)
  input('after processing')
  filenames = [x.strip('\n') for x in open(dataset_split, 'r')]
  num_images = len(filenames)
  num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))

  input('before image reader')
  image_reader = build_data.ImageReader('jpeg', channels=3)
  # label_reader = build_data.ImageReader('png', channels=1)
  label_reader = build_data.ImageReader('jpeg', channels=3)

  for shard_id in range(_NUM_SHARDS):
    output_filename = os.path.join(
        FLAGS.output_dir,
        '%s-%05d-of-%05d.tfrecord' % (dataset, shard_id, _NUM_SHARDS))
    input('before writer')
    with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
      start_idx = shard_id * num_per_shard
      end_idx = min((shard_id + 1) * num_per_shard, num_images)
      for i in range(start_idx, end_idx):
        sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
            i + 1, len(filenames), shard_id))
        sys.stdout.flush()
        # Read the image.
        image_filename = os.path.join(
            FLAGS.image_folder, filenames[i] + '.' + FLAGS.image_format)
        image_data = tf.gfile.FastGFile(image_filename, 'rb').read()
        height, width = image_reader.read_image_dims(image_data)

        seg_data = image_data
        # Convert to tf example.
        example = build_data.image_seg_to_tfexample(
            image_data, filenames[i], height, width, seg_data)
        tfrecord_writer.write(example.SerializeToString())
    sys.stdout.write('\n')
    sys.stdout.flush()

コード例 #7

0

ファイルを表示

def _convert_dataset(dataset_split, dataset_dir, dataset_label_dir):

    image_reader = build_data.ImageReader('jpeg', channels=3)
    image_names = os.listdir(FLAGS.train_image_folder)
    image_names[:] = [x for x in image_names if not x.startswith('.')]
    image_names.sort()

    output_filename = '%s_%s.tfrecord' % ('Carvana', dataset_split)
    output_filename = os.path.join(FLAGS.build_datadir, output_filename)
    with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
        total = 0
        for idx, image_name in enumerate(image_names):
            if idx % 5 == 0:
                print('total', total, 'file(s), process', idx, 'file(s).')

            data_path = os.path.join(dataset_dir, image_name)
            label_path = os.path.join(dataset_label_dir,
                                      image_name[:-4] + '_mask.png')

            image_data = tf.gfile.GFile(data_path, 'rb').read()
            seg_data = tf.gfile.GFile(label_path, 'rb').read()
            height, width = image_reader.read_image_dims(image_data)
            try:
                tf_example = build_data.image_seg_to_tfexample(
                    image_data, image_name, height, width, seg_data)
                if tf_example is not None:
                    tfrecord_writer.write(tf_example.SerializeToString())
            except ValueError:
                tf.logging.warning('Invalid example:', image_name,
                                   ', ignorig.')

            total += 1
    print('total', total, 'file(s), process', idx, 'file(s).')

コード例 #8

0

ファイルを表示

def _convert_siamese_dataset(split,
                             image_folder,
                             annos,
                             tfrecord_base_dir,
                             num_shards=10):
    # shuffle annos
    sf_annos = copy.deepcopy(annos)
    random.shuffle(sf_annos)
    random.shuffle(sf_annos)
    random.shuffle(sf_annos)

    # convert each row in annos into tfrecord
    num_annos = len(sf_annos)
    num_per_shard = int(math.ceil(num_annos / float(num_shards)))

    image_reader = build_data.ImageReader('jpeg', channels=3)

    for shard_id in range(num_shards):
        output_filename = os.path.join(
            tfrecord_base_dir,
            '%s-%05d-of-%05d.tfrecord' % (split, shard_id, num_shards))
        start_idx = shard_id * num_per_shard
        end_idx = min((shard_id + 1) * num_per_shard, num_annos)
        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            for i in range(start_idx, end_idx):
                sys.stdout.write('\r>> Converting image %d/%d shard %d' %
                                 (i + 1, num_annos, shard_id))
                sys.stdout.flush()

                # Read the image and extract more information
                id_a = sf_annos[i][0]
                id_b = sf_annos[i][1]
                im_a_name = sf_annos[i][2]
                im_a = tf.gfile.FastGFile(
                    os.path.join(image_folder, im_a_name), 'rb').read()
                im_a_h, im_a_w = image_reader.read_image_dims(im_a)
                im_b_name = sf_annos[i][3]
                im_b = tf.gfile.FastGFile(
                    os.path.join(image_folder, im_b_name), 'rb').read()
                im_b_h, im_b_w = image_reader.read_image_dims(im_b)
                label = int(sf_annos[i][4])

                # make one tf example
                example = tf.train.Example(features=tf.train.Features(
                    feature={
                        'id_a': _bytes_list_feature(id_a),
                        'im_a': _bytes_list_feature(im_a),
                        'im_a_name': _bytes_list_feature(im_a_name),
                        'im_a_h': _int64_list_feature(im_a_h),
                        'im_a_w': _int64_list_feature(im_a_w),
                        'id_b': _bytes_list_feature(id_b),
                        'im_b': _bytes_list_feature(im_b),
                        'im_b_name': _bytes_list_feature(im_b_name),
                        'im_b_h': _int64_list_feature(im_b_h),
                        'im_b_w': _int64_list_feature(im_b_w),
                        'label': _int64_list_feature(label),
                    }))
                tfrecord_writer.write(example.SerializeToString())
        sys.stdout.write('\n')
        sys.stdout.flush()

コード例 #9

0

ファイルを表示

def _convert_dataset(dataset_split):
    """Converts the specified dataset split to numpy format.

    Args:
      dataset_split: The dataset split (e.g., train, test).

    Raises:
      RuntimeError: If loaded image and label have different shape.
    """
    dataset = os.path.basename(dataset_split)[:-4]
    sys.stdout.write('Processing ' + dataset)
    filenames = [x.strip('\n') for x in open(dataset_split, 'r')]
    num_images = len(filenames)

    image_reader = build_data.ImageReader('jpeg', channels=3)
    label_reader = build_data.ImageReader('png', channels=1)

    img_np = np.empty((num_images, 160, 160, 3), dtype=np.uint8)
    label_np = np.empty((num_images, 160, 160), dtype=np.uint8)

    for i in tqdm(range(num_images)):
        # Read the image.
        image_filename = os.path.join(FLAGS.image_folder,
                                      filenames[i] + '.' + FLAGS.image_format)
        # image_data = tf.gfile.FastGFile(image_filename, 'rb').read()
        # height, width = image_reader.read_image_dims(image_data)
        image_data = imresize(imread(image_filename), (160, 160), 'bilinear')
        height, width = image_data.shape[0:2]
        # Read the semantic segmentation annotation.
        seg_filename = os.path.join(FLAGS.semantic_segmentation_folder,
                                    filenames[i] + '.' + FLAGS.label_format)
        # seg_data = tf.gfile.FastGFile(seg_filename, 'rb').read()
        # seg_height, seg_width = label_reader.read_image_dims(seg_data)
        seg_data = imresize(imread(seg_filename), (160, 160), 'nearest')
        seg_height, seg_width = seg_data.shape[0:2]
        if height != seg_height or width != seg_width:
            raise RuntimeError('Shape mismatched between image and label.')
        # print(image_data.shape)
        # print(image_data.dtype)
        # print(seg_data.shape)
        # print(seg_data.dtype)
        # exit(0)
        img_np[i] = image_data.copy()
        label_np[i] = seg_data.copy()
    np.save(FLAGS.output_dir + 'x_' + dataset,
            np.transpose(img_np, (0, 3, 1, 2)))
    np.save(FLAGS.output_dir + 'y_' + dataset, label_np)

コード例 #10

0

ファイルを表示

ファイル: receipt_dataset.py プロジェクト: zoujuny/tf_doc_localisation

def _convert_dataset(dataset_split):
    """
    convert images and annots to tfrecord format.

    Args:
        dataset_split: "train" or "eval"
    """
    image_files = _get_files("image", dataset_split)
    txt_files = _get_files("txt", dataset_split)

    idx_list = list(range(len(image_files)))

    assert len(image_files) == len(
        txt_files
    ), "number of image files and number of txts files must be same, if not, check your dataset dir"

    random.shuffle(idx_list)

    num_images = len(image_files)
    num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))
    image_reader = build_data.ImageReader("jpg", channels=3)

    if not os.path.exists(tfrecord_output_dir):
        os.mkdir(tfrecord_output_dir)

    for shard_id in range(_NUM_SHARDS):
        shard_filename = "%s-%02d-of-%02d.tfrecord" % (dataset_split, shard_id,
                                                       _NUM_SHARDS)
        output_filename = os.path.join(tfrecord_output_dir, shard_filename)

        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            start_idx = shard_id * num_per_shard
            end_idx = min((shard_id + 1) * num_per_shard, num_images)

            for i in range(start_idx, end_idx):
                sys.stdout.write('\r>> Converting image %d/%d shard %d' %
                                 (i + 1, num_images, shard_id))
                sys.stdout.flush()

                image_data = tf.gfile.GFile(image_files[idx_list[i]],
                                            'rb').read()
                height, width = image_reader.read_image_dims(image_data)

                with open(txt_files[idx_list[i]], 'r') as txt_f:
                    content = txt_f.readline().strip().split(',')
                    points = [int(_) for _ in content]

                filename = os.path.basename(image_files[idx_list[i]])
                txtname = os.path.basename(txt_files[idx_list[i]])

                if filename.split('.')[0] != txtname.split('.')[0]:
                    raise ValueError('filename != txtname')

                example = build_data.image_label_to_tfexample(
                    image_data, filename, height, width, points)
                tfrecord_writer.write(example.SerializeToString())

        sys.stdout.write('\n')
        sys.stdout.flush()

コード例 #11

0

ファイルを表示

def _convert_dataset(dataset_split):
  """Converts the specified dataset split to TFRecord format.

  Args:
    dataset_split: The dataset split (e.g., train, val).

  Raises:
    RuntimeError: If loaded image and label have different shape, or if the
      image file with specified postfix could not be found.
  """
  image_files = _get_files('image', dataset_split)
  label_files = _get_files('label', dataset_split)
  num_images = len(image_files)
  num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))
  print 'num_images, num_labels, num_per_shard: ', num_images, len(label_files), num_per_shard

  image_reader = build_data.ImageReader('jpg', channels=3)
  # label_reader = build_data.ImageReader('png', channels=1)

  for shard_id in range(_NUM_SHARDS):
    shard_filename = '%s-%05d-of-%05d.tfrecord' % (
        dataset_split, shard_id, _NUM_SHARDS)
    output_filename = os.path.join(FLAGS.output_dir, shard_filename)
    options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.GZIP)
    with tf.python_io.TFRecordWriter(output_filename, options=options) as tfrecord_writer:
      start_idx = shard_id * num_per_shard
      end_idx = min((shard_id + 1) * num_per_shard, num_images)
      for i in range(start_idx, end_idx):
        sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
            i + 1, num_images, shard_id))
        sys.stdout.flush()
        # Read the image.
        image_data = tf.gfile.FastGFile(image_files[i], 'rb').read()
        height, width = image_reader.read_image_dims(image_data)
        # Read the semantic segmentation annotation.
        # posemap_data = np.load(label_files[i])
        with h5py.File(label_files[i], 'r') as hf:
            posemap_data = hf['posemap'][:]
        posemap_data[posemap_data==np.inf] = 255.
        print np.sum(posemap_data), np.max(posemap_data), np.min(posemap_data)
        # print seg_data.shape, seg_data.dtype
        # seg_height, seg_width = label_reader.read_image_dims(seg_data)
        posemap_height, posemap_width = posemap_data.shape[0], posemap_data.shape[1]
        if height != posemap_height or width != posemap_width:
          raise RuntimeError('Shape mismatched between image and label.')
        # Convert to tf example.
        re_match = _IMAGE_FILENAME_RE.search(image_files[i])
        if re_match is None:
          raise RuntimeError('Invalid image filename: ' + image_files[i])
        filename = os.path.basename(re_match.group(1))
        example = build_data.image_posemap_to_tfexample(
            image_data, filename, height, width, posemap_data)
        tfrecord_writer.write(example.SerializeToString())
    sys.stdout.write('\n')
    sys.stdout.flush()

コード例 #12

0

ファイルを表示

def _convert_dataset(dataset_split, image_names, labels_df):
    """Converts the ADE20k dataset into into tfrecord format.

  Args:
    dataset_split: Dataset split (e.g., train, val).
    image_ids: ...
    labels_df: ...

  Raises:
    RuntimeError: If loaded image and label have different shape.
  """
    num_images = len(image_names)
    num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))
    image_class_id_to_rle_mask = dict(
        zip(labels_df.ImageId_ClassId, labels_df.EncodedPixels))

    image_reader = build_data.ImageReader('jpeg', channels=3)
    # label_reader = build_data.ImageReader('png', channels=1)

    for shard_id in range(_NUM_SHARDS):
        output_filename = os.path.join(
            FLAGS.output_dir, '%s-%05d-of-%05d.tfrecord' %
            (dataset_split, shard_id, _NUM_SHARDS))

        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            start_idx = shard_id * num_per_shard
            end_idx = min((shard_id + 1) * num_per_shard, num_images)
            for i in range(start_idx, end_idx):
                sys.stdout.write('\r>> Converting image %d/%d shard %d' %
                                 (i + 1, num_images, shard_id))
                sys.stdout.flush()
                # Read the image.
                image_name = image_names[i]
                image_data = tf.gfile.FastGFile(image_name, 'rb').read()
                height, width = image_reader.read_image_dims(image_data)

                # Read the semantic segmentation annotation.
                image_id = image_name.split('/')[-1].split('.')[0]
                rle_masks = [
                    image_class_id_to_rle_mask['{}.jpg_{}'.format(
                        image_id, i + 1)] for i in range(4)
                ]
                masks = [
                    image_utils.rle_to_mask(rle_mask, height, width)
                    for rle_mask in rle_masks
                ]
                mask = masks_to_mask(masks)
                mask_data = image_utils.numpy_to_bytes(mask, 'png')

                # Convert to tf example.
                example = build_data.image_seg_to_tfexample(
                    image_data, image_name, height, width, mask_data)
                tfrecord_writer.write(example.SerializeToString())
        sys.stdout.write('\n')
        sys.stdout.flush()

コード例 #13

0

ファイルを表示

def convert_dataset(dataset, names, num_shared=4):
    sys.stdout.write('Processing ' + dataset)
    sys.stdout.write('\n')
    sys.stdout.flush()
    num = len(names)
    num_per_shard = int(math.ceil(num / float(num_shared)))

    image_reader = build_data.ImageReader('jpg', channels=3)
    label_reader = build_data.ImageReader('png', channels=1)
    for shard_id in range(num_shared):
        output_filename = os.path.join(
            dataset_folder, 'tfrecord',
            '%s-%05d-of-%05d.tfrecord' % (dataset, shard_id, num_shared))
        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            start_idx = shard_id * num_per_shard
            end_idx = min((shard_id + 1) * num_per_shard, num)
            for i in range(start_idx, end_idx):
                sys.stdout.write('\r>> Converting image %d/%d shard %d' %
                                 (i + 1, num, shard_id))
                sys.stdout.flush()
                # Read the image.
                image_filename = os.path.join(dataset_folder, 'train',
                                              names[i] + data_format)
                image_data = tf.gfile.FastGFile(image_filename, 'rb').read()
                height, width = image_reader.read_image_dims(image_data)
                # Read the semantic segmentation annotation.

                seg_filename = os.path.join(dataset_folder, 'seg',
                                            names[i] + seg_format)
                seg_data = tf.gfile.FastGFile(seg_filename, 'rb').read()
                seg_height, seg_width = label_reader.read_image_dims(seg_data)

                if height != seg_height or width != seg_width:
                    raise RuntimeError(
                        'Shape mismatched between image and label.')
                # Convert to tf example.
                example = build_data.image_seg_to_tfexample(
                    image_data, names[i], height, width, seg_data)
                tfrecord_writer.write(example.SerializeToString())
        sys.stdout.write('\n')
        sys.stdout.flush()

コード例 #14

0

ファイルを表示

def convert_dataset(dataset, is_ship_list, nan_ship_list, train_df):

    sys.stdout.write('Processing ' + dataset + '\n')
    sys.stdout.flush()
    min_num = min(len(is_ship_list), len(nan_ship_list))
    #min_num = 100
    sys.stdout.write('Number of train samples: %d\n' % (2 * min_num))
    sys.stdout.flush()
    is_ship_list = random.sample(is_ship_list, min_num)
    nan_ship_list = random.sample(nan_ship_list, min_num)
    num_per_shard = int(math.ceil(min_num / float(_NUM_SHARDS)))

    image_reader = build_data.ImageReader('jpg', channels=3)
    label_reader = build_data.ImageReader('png', channels=1)
    for shard_id in range(_NUM_SHARDS):
        output_filename = os.path.join(
            FLAGS.output_dir,
            '%s-%05d-of-%05d.tfrecord' % (dataset, shard_id, _NUM_SHARDS))
        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            start_idx = shard_id * num_per_shard
            end_idx = min((shard_id + 1) * num_per_shard, min_num)
            for i in range(start_idx, end_idx):
                sys.stdout.write(
                    '\r>> Converting pair of images %d/%d shard %d' %
                    (i + 1, min_num, shard_id))
                sys.stdout.flush()
                if dataset == 'train':
                    aug_write_tfexample(is_ship_list[i], image_reader,
                                        label_reader, train_df,
                                        tfrecord_writer)
                elif dataset == 'val':
                    write_tfexample(is_ship_list[i], image_reader,
                                    label_reader, train_df, tfrecord_writer)
                else:
                    raise RuntimeError('Dataset must be val/train.')
                write_tfexample(nan_ship_list[i], image_reader, label_reader,
                                train_df, tfrecord_writer)

        sys.stdout.write('\n')
        sys.stdout.flush()

コード例 #15

0

ファイルを表示

def _convert_dataset(dataset_split):
    """Converts the specified dataset split to TFRecord format.

    Args:
      dataset_split: The dataset split (e.g., train, test).

    """
    dataset = os.path.basename(dataset_split)[:-4]
    sys.stdout.write('Processing ' + dataset)
    filenames = [x.strip('\n') for x in open(dataset_split, 'r')]
    coordinate_filenames = 'figCon'
    num_images = len(filenames)
    num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))

    image_reader = build_data.ImageReader('jpg', channels=1)  # 3

    # Read the finger roots data.
    coordinates_filename = os.path.join(
        FLAGS.coordinates_filename_folder,
        coordinate_filenames + '.' + FLAGS.label_format)
    coordinates_data = tf.gfile.FastGFile(coordinates_filename, 'r').read()
    label_reader = build_data.TxtReader(coordinates_data)

    for shard_id in range(_NUM_SHARDS):
        output_filename = os.path.join(
            FLAGS.output_dir,
            '%s-%05d-of-%05d.tfrecord' % (dataset, shard_id, _NUM_SHARDS))
        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            start_idx = shard_id * num_per_shard
            end_idx = min((shard_id + 1) * num_per_shard, num_images)
            for i in range(start_idx, end_idx):
                sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
                    i + 1, len(filenames), shard_id))
                sys.stdout.flush()

                # Read the image.
                image_filename = os.path.join(
                    FLAGS.image_folder, filenames[i] + '.' + FLAGS.image_format)
                image_data = tf.gfile.FastGFile(image_filename, 'rb').read()
                # tf.cast(image_data, tf.float64)
                # image_data = tf.math.l2_normalize(image_data)
                height, width = image_reader.read_image_dims(image_data)
                finger_roots = label_reader.read_coordinates_data(filenames[i] + '.' + FLAGS.image_format)
                if len(finger_roots) != 6:
                    raise ValueError("finger roots data: %s is illegal", finger_roots)

                # Convert to tf example.
                example = build_data.image_coordinates_to_tfexample(
                    image_data, filenames[i], height, width, finger_roots)
                tfrecord_writer.write(example.SerializeToString())
        sys.stdout.write('\n')
        sys.stdout.flush()

コード例 #16

0

ファイルを表示

ファイル: 2_build_city_seq_data.py プロジェクト: RuiZhang-ICT/dataset_preprocessing

def _convert_dataset(seq_idx):
    """Converts the specified dataset split to TFRecord format.

  Args:
    dataset_split: The dataset split (e.g., train, val).

  Raises:
    RuntimeError: If loaded image and label have different shape, or if the
      image file with specified postfix could not be found.
  """
    image_files = _get_files('image', seq_idx)
    label_files = _get_files('label', seq_idx)

    image_reader = build_data.ImageReader('png', channels=3)
    label_reader = build_data.ImageReader('png', channels=1)

    shard_filename = 'seq-%05d-of-%05d.tfrecord' % (0, 1)
    output_filename = os.path.join(usr_dir, FLAGS.cityscapes_root,
                                   FLAGS.output_dir, shard_filename)
    with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
        for i in xrange(len(image_files)):
            # Read the image.
            image_data = tf.gfile.FastGFile(image_files[i], 'rb').read()
            height, width = image_reader.read_image_dims(image_data)
            # Read the semantic segmentation annotation.
            seg_data = tf.gfile.FastGFile(label_files[i], 'rb').read()
            seg_height, seg_width = label_reader.read_image_dims(seg_data)
            if height != seg_height or width != seg_width:
                raise RuntimeError('Shape mismatched between image and label.')
            # Convert to tf example.
            re_match = _IMAGE_FILENAME_RE.search(image_files[i])
            if re_match is None:
                raise RuntimeError('Invalid image filename: ' + image_files[i])
            filename = os.path.basename(re_match.group(1))
            example = build_data.image_seg_to_tfexample(
                image_data, filename, height, width, seg_data)
            tfrecord_writer.write(example.SerializeToString())

コード例 #17

0

ファイルを表示

ファイル: makerecord.py プロジェクト: bhlarson/EmbeddedClassification

def WriteRecords(args, datasets, imbddata):
    '''
        datasets = [{'name':'training', 'ratio':0.7}, {'name':'validation', 'ratio':0.3}]
        imbddata = []
    '''

    # shuffle records between datasets and shards
    random.shuffle(imbddata)

    image_reader = build_data.ImageReader('jpeg', channels=3)

    start = 0
    numEntries = len(imbddata)
    for ids, dataset in enumerate(datasets):
        for shard_id in range(args.shards):
            output_filename = os.path.join(
                args.out, '%s-%05d-of-%05d.tfrecord' %
                (dataset['name'], shard_id, args.shards))
            if (ids == len(datasets) - 1 and shard_id == args.shards - 1):
                stop = numEntries
            else:
                groupSize = int(numEntries * dataset['ratio'] / args.shards)
                stop = start + groupSize

            print('{} start {} stop {}'.format(output_filename, start, stop))
            pb = ProgressBar(total=stop - start,
                             prefix='{}'.format(dataset['name']),
                             suffix='of {}'.format(stop - start),
                             decimals=3,
                             length=75,
                             fill='%',
                             zfill='-')
            with tf.io.TFRecordWriter(output_filename) as tfrecord_writer:
                for i in range(start, stop):
                    example = Example(args, i, imbddata[i], image_reader)
                    tfrecord_writer.write(example.SerializeToString())
                    if i % 100 == 0:
                        pb.print_progress_bar(i - start)
            pb.print_progress_bar(stop - start)
            start = stop

            sys.stdout.write('\n')
            sys.stdout.flush()

コード例 #18

0

ファイルを表示

def _convert_dataset():
    """Converts the specified dataset split to TFRecord format.

  Args:
    dataset_split: The dataset split (e.g., train, val).

  Raises:
    RuntimeError: If loaded image and label have different shape, or if the
      image file with specified postfix could not be found.
  """
    image_files = _get_files('image')
    print(len(image_files))
    num_images = len(image_files)
    num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))

    image_reader = build_data.ImageReader('png', channels=3)

    for shard_id in range(_NUM_SHARDS):
        shard_filename = '%s-%05d-of-%05d.tfrecord' % ('train', shard_id,
                                                       _NUM_SHARDS)
        output_filename = os.path.join(FLAGS.output_dir, shard_filename)
        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            start_idx = shard_id * num_per_shard
            end_idx = min((shard_id + 1) * num_per_shard, num_images)
            for i in range(start_idx, end_idx):
                sys.stdout.write('\r>> Converting image %d/%d shard %d' %
                                 (i + 1, num_images, shard_id))
                sys.stdout.flush()
                # Read the image.
                image_data = tf.gfile.FastGFile(image_files[i], 'rb').read()
                height, width = image_reader.read_image_dims(image_data)
                # Convert to tf example.
                re_match = _IMAGE_FILENAME_RE.search(image_files[i])
                if re_match is None:
                    raise RuntimeError('Invalid image filename: ' +
                                       image_files[i])
                filename = os.path.basename(re_match.group(1))
                example = build_data.image_to_tfexample(
                    image_data, filename, height, width)
                tfrecord_writer.write(example.SerializeToString())
        sys.stdout.write('\n')
        sys.stdout.flush()

コード例 #19

0

ファイルを表示

def _convert_dataset(dataset, train_isship_list, train_nanship_list, train_df):

    sys.stdout.write('Processing ' + dataset)

    min_num = min(len(train_isship_list), len(train_nanship_list))
    #min_num = 100
    print('Number of train samples: ', 2 * min_num)
    train_isship_list = random.sample(train_isship_list, min_num)
    train_nanship_list = random.sample(train_nanship_list, min_num)
    num_per_shard = int(math.ceil(min_num / float(_NUM_SHARDS)))

    image_reader = build_data.ImageReader('jpg', channels=3)
    for shard_id in range(_NUM_SHARDS):
        output_filename = os.path.join(
            FLAGS.output_dir,
            '%s-%05d-of-%05d.tfrecord' % (dataset, shard_id, _NUM_SHARDS))
        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            start_idx = shard_id * num_per_shard
            end_idx = min((shard_id + 1) * num_per_shard, min_num)
            for i in range(start_idx, end_idx):
                sys.stdout.write(
                    '\r>> Converting pair of images %d/%d shard %d' %
                    (i + 1, min_num, shard_id))
                sys.stdout.flush()
                write_tfexample(train_isship_list[i],
                                image_reader,
                                train_df,
                                tfrecord_writer,
                                label=1)
                write_tfexample(train_nanship_list[i],
                                image_reader,
                                train_df,
                                tfrecord_writer,
                                label=0)
        sys.stdout.write('\n')
        sys.stdout.flush()

コード例 #20

0

ファイルを表示

ITL_anoDir = 'e:/temp/itl/models/research/deeplab/datasets/itl/segmentationclass/'
ITL_imageDir = 'e:/temp/itl/models/research/deeplab/datasets/itl/jpegimages/'
ITL_listDir = 'e:/temp/itl/models/research/deeplab/datasets/itl/imagesets/'
ITL_outputDir = 'e:/temp/itl/models/research/deeplab/datasets/itl/tfrecord/'
ITL_numShards = 4

ITL_datasetSplits = tf.gfile.Glob(os.path.join(ITL_listDir, '*.txt'))

for ITL_datasetSplit in ITL_datasetSplits:
	ITL_dataset = os.path.basename(ITL_datasetSplit)[:-4]
	print('Processing ' + ITL_dataset)
	ITL_filenames = [x.strip('\n') for x in open(ITL_datasetSplit, 'r')]
	ITL_numImages = len(ITL_filenames)
	ITL_numPerShard = int(math.ceil(ITL_numImages / float(ITL_numShards)))

	ITL_imageReader = build_data.ImageReader('jpeg', channels=3)
	ITL_labelReader = build_data.ImageReader('png', channels=1)

	for ITL_shardId in range(ITL_numShards):
		ITL_outputFilename = os.path.join(ITL_outputDir, '%s-%05d-of-%05d.tfrecord' % (ITL_dataset, ITL_shardId, ITL_numShards))
		with tf.python_io.TFRecordWriter(ITL_outputFilename) as tfrecord_writer:
			ITL_startIdx = ITL_shardId * ITL_numPerShard
			ITL_endIdx = min((ITL_shardId + 1) * ITL_numPerShard, ITL_numImages)
			for i in range(ITL_startIdx, ITL_endIdx):
				print('\r>> Converting image %d/%d shard %d' % (i + 1, len(ITL_filenames), ITL_shardId))
				sys.stdout.flush()
				ITL_imageFilename = os.path.join(ITL_imageDir, ITL_filenames[i] + '.jpg')
				ITL_imageData = tf.gfile.FastGFile(ITL_imageFilename, 'rb').read()
				height, width = ITL_imageReader.read_image_dims(ITL_imageData)
				ITL_anoFilename = os.path.join(ITL_anoDir, ITL_filenames[i] + '.png')
				ITL_anoData = tf.gfile.FastGFile(ITL_anoFilename, 'rb').read()

コード例 #21

0

ファイルを表示

ファイル: build_duts_data.py プロジェクト: zhbli/cvpr19

def _convert_dataset(dataset_split):
    dataset = os.path.basename(dataset_split)[:-4]  # train or test
    print('Processing ' + dataset)
    filenames = [x.strip('\n') for x in open(dataset_split, 'r')]
    num_images = len(filenames)
    num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))

    image_reader = build_data.ImageReader('jpeg', channels=3)
    label_reader = build_data.ImageReader('png', channels=1)
    superpixel_reader = build_data.ImageReader('png', channels=1)

    for shard_id in range(_NUM_SHARDS):
        output_filename = os.path.join(
            FLAGS.output_dir,
            '%s-%05d-of-%05d.tfrecord' % (dataset, shard_id, _NUM_SHARDS))
        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            start_idx = shard_id * num_per_shard
            end_idx = min((shard_id + 1) * num_per_shard, num_images)
            for i in range(start_idx, end_idx):
                print('\r>> Converting image %d/%d shard %d' %
                      (i + 1, len(filenames), shard_id))
                if dataset == 'train':
                    image_filename = os.path.join(
                        FLAGS.train_image_folder,
                        filenames[i] + '.' + FLAGS.image_format)
                    seg_filename = os.path.join(
                        FLAGS.train_semantic_segmentation_folder,
                        filenames[i] + '.' + FLAGS.label_format)
                    superpixel_filename = os.path.join(
                        FLAGS.train_superpixel_folder,
                        filenames[i] + '.' + FLAGS.label_format)
                elif dataset == 'test':
                    image_filename = os.path.join(
                        FLAGS.test_image_folder,
                        filenames[i] + '.' + FLAGS.image_format)
                    seg_filename = os.path.join(
                        FLAGS.test_semantic_segmentation_folder,
                        filenames[i] + '.' + FLAGS.label_format)
                    superpixel_filename = os.path.join(
                        FLAGS.test_superpixel_folder,
                        filenames[i] + '.' + FLAGS.label_format)
                else:
                    assert False

                # Read the image.
                image_data = tf.gfile.FastGFile(image_filename, 'rb').read()
                height, width = image_reader.read_image_dims(image_data)
                # Read the semantic segmentation annotation.
                seg_data = tf.gfile.FastGFile(seg_filename, 'rb').read()
                seg_height, seg_width = label_reader.read_image_dims(seg_data)
                # Read the super pixels.
                superpixel_data = tf.gfile.FastGFile(superpixel_filename,
                                                     'rb').read()
                sp_height, sp_width = superpixel_reader.read_image_dims(
                    superpixel_data)
                if not (height == seg_height == sp_height
                        and width == seg_width == sp_width):
                    raise RuntimeError(
                        'Shape mismatched between image, label and super pixel map.'
                    )

                # Convert to tf example.
                example = build_data.image_seg_sp_to_tfexample(
                    image_data, filenames[i], height, width, seg_data,
                    superpixel_data)
                tfrecord_writer.write(example.SerializeToString())
        print('\n')

コード例 #22

0

ファイルを表示

def _convert_dataset(dataset_split, _NUM_SHARDS, structure_path, plane):
    """Converts the specified dataset split to TFRecord format.

  Args:
    dataset_split: The dataset split (e.g., train, test).

  Raises:
    RuntimeError: If loaded image and label have different shape.
  """
    image_folder = os.path.join(structure_path, 'processed', 'PNGImages')
    semantic_segmentation_folder = os.path.join(structure_path, 'processed',
                                                'SegmentationClass')
    image_format = label_format = 'png'

    if not os.path.exists(
            os.path.join(structure_path, 'tfrecord' + '_' + plane)):
        os.makedirs(os.path.join(structure_path, 'tfrecord' + '_' + plane))

    dataset = os.path.basename(dataset_split)[:-4]
    sys.stdout.write('Processing ' + dataset)

    filenames = [x.strip('\n') for x in open(dataset_split, 'r')]
    filenames.sort()
    # random.shuffle(filenames)
    print(filenames)
    num_images = len(filenames)
    num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))

    image_reader = build_data.ImageReader('png', channels=3)
    label_reader = build_data.ImageReader('png', channels=1)

    for shard_id in range(_NUM_SHARDS):
        output_filename = os.path.join(
            structure_path, 'tfrecord' + '_' + plane,
            '%s-%05d-of-%05d.tfrecord' % (dataset, shard_id, _NUM_SHARDS))
        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            start_idx = shard_id * num_per_shard
            end_idx = min((shard_id + 1) * num_per_shard, num_images)
            for i in range(start_idx, end_idx):
                sys.stdout.write('\r>> Converting image %d/%d shard %d' %
                                 (i + 1, len(filenames), shard_id))
                sys.stdout.flush()
                # Read the image.
                image_filename = os.path.join(
                    image_folder, filenames[i] + '.' + image_format)
                image_data = tf.gfile.FastGFile(image_filename, 'rb').read()
                height, width = image_reader.read_image_dims(image_data)
                # Read the semantic segmentation annotation.
                seg_filename = os.path.join(semantic_segmentation_folder,
                                            filenames[i] + '.' + label_format)
                seg_data = tf.gfile.FastGFile(seg_filename, 'rb').read()
                seg_height, seg_width = label_reader.read_image_dims(seg_data)
                if height != seg_height or width != seg_width:
                    raise RuntimeError(
                        'Shape mismatched between image and label.')
                # Convert to tf example.
                example = build_data.image_seg_to_tfexample(
                    image_data, str.encode(filenames[i], 'utf-8'), height,
                    width, seg_data)
                tfrecord_writer.write(example.SerializeToString())
        sys.stdout.write('\n')
        sys.stdout.flush()

コード例 #23

0

ファイルを表示

ファイル: build_cityscapes_data.py プロジェクト: hubertsgithub/block_annotation

def _convert_dataset(dataset_split,
                     dataset_name='',
                     class_ignore_value=255,
                     num_shards=1,
                     num_imgs=-1,
                     remaining_imgs_type=None,
                     remaining_imgs_num_or_ratio=None,
                     overwrite=False,
                     verbose=True,
                     shuffle=False,
                     shuffle_seed=1234):
    """Converts the specified dataset split to TFRecord format.

    Args:
      dataset_split: The dataset split (e.g., train, val).
      dataset_name: The dataset name (e.g., train_hints, val_hints). Default is set to dataset_split. This is the name of the tfrecord.

      remaining_imgs_type: if num_imgs is set, what should we do we remaining images if remaining_imgs_num_or_ratio is set?
        None: Use remaining image labels as-is
        path -- string: Use labels from path (use alternative labels)

      remaining_imgs_num_or_ratio::
        None: don't use remaining images (same as 0 or 0.0)
        ratio -- float between 0 and 1: use ratio * num_remaining_imgs remaining images.
        num -- integer: use num remaining images

    Raises:
      RuntimeError: If loaded image and label have different shape, or if the
        image file with specified postfix could not be found.
    """
    sys.stdout.flush()
    print('###############################################')
    sys.stdout.write('\rWorking on: {}\n'.format(dataset_name))

    image_files = []
    label_files = []

    print('Using full res images and labels...')
    image_files.extend(_get_files('image', dataset_split))
    if 'coarse' in dataset_name:
        label_files.extend(_get_files('label_coarse', dataset_split))
    elif 'block' in dataset_name:
        label_files.extend(_get_files('label_block', dataset_split))
    else:
        label_files.extend(_get_files('label', dataset_split))

    if num_imgs < 0:
        num_images = len(image_files)
    else:
        num_images = num_imgs

    remaining_imgs_label_files = None
    num_remaining_imgs_num = None
    if remaining_imgs_num_or_ratio is not None:

        if type(remaining_imgs_num_or_ratio) == float:
            assert 0 <= remaining_imgs_num_or_ratio <= 1
            num_remaining_images = int(remaining_imgs_num_or_ratio *
                                       (len(image_files) - num_images))
        if type(remaining_imgs_num_or_ratio) == int:
            assert 0 <= remaining_imgs_num_or_ratio
            num_remaining_images = min(remaining_imgs_num_or_ratio,
                                       (len(image_files) - num_images))

        if remaining_imgs_type is None:
            remaining_imgs_label_files = list(label_files)
        elif type(remaining_imgs_type) == str:
            print("Searching {} for label files.")
            remaining_imgs_label_files = []
            for root, dirnames, filenames in os.walk(remaining_imgs_type):
                for filename in fnmatch.filter(filenames, "*"):
                    remaining_imgs_label_files.append(
                        os.path.join(root, filename))

            remaining_imgs_label_files = sorted(remaining_imgs_label_files)
            assert len(remaining_imgs_label_files) == len(
                label_files
            ), 'Expected {} alternative label files; found {}'.format(
                len(label_files), len(remaining_imgs_label_files))
        else:
            raise TypeError("remaining_imgs_type should be a string or None")

    if shuffle:
        shuffled_idxs = np.arange(len(image_files))
        np.random.seed(shuffle_seed)
        np.random.shuffle(shuffled_idxs)

        print('Using indices {} ...'.format(shuffled_idxs[:10]))

        # Image, label, boundary distance map
        image_files = np.array(image_files)[shuffled_idxs]
        label_files = np.array(label_files)[shuffled_idxs]

        # Alternative label files
        if remaining_imgs_label_files is not None:
            remaining_imgs_label_files = np.array(
                remaining_imgs_label_files)[shuffled_idxs]

            # Concat num_images label_files with num_remaining_images_label_files
            label_files = list(label_files)[:num_images] + list(
                remaining_imgs_label_files)[num_images:num_images +
                                            num_remaining_images]

            assert len(label_files) == num_images + num_remaining_images, (
                len(label_files), num_images, num_remaining_images)
            num_images = num_images + num_remaining_images

    if not shuffle and remaining_imgs_label_files is not None:
        raise NotImplementedError("This is not going to work; check the code")

    num_per_shard = int(math.ceil(num_images / float(num_shards)))

    image_reader = build_data.ImageReader('png', channels=3)
    label_reader = build_data.ImageReader('png', channels=1)

    for shard_id in range(num_shards):
        if dataset_name == '':
            dataset_name = dataset_split

        shard_filename = '%s-%05d-of-%05d.tfrecord' % (dataset_name, shard_id,
                                                       num_shards)
        output_filename = os.path.join(FLAGS.output_dir, shard_filename)
        if os.path.exists(output_filename) and not overwrite:
            print('File exists. Skipping. {}'.format(output_filename))
            continue

        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            start_idx = shard_id * num_per_shard
            end_idx = min((shard_id + 1) * num_per_shard, num_images)

            for i in range(start_idx, end_idx):
                sys.stdout.flush()
                sys.stdout.write('\r>> Converting image %d/%d shard %d' %
                                 (i + 1, num_images, shard_id))
                #sys.stdout.flush()

                # Read the image.
                image_data = tf.gfile.FastGFile(image_files[i], 'rb').read()
                height, width = image_reader.read_image_dims(image_data)

                # Read the semantic segmentation annotation.
                seg_data = tf.gfile.FastGFile(label_files[i], 'rb').read()
                seg_height, seg_width = label_reader.read_image_dims(seg_data)
                if verbose:
                    sys.stdout.write('\r\nUsing\n {}\n {}\n'.format(
                        image_files[i], label_files[i]))

                if height != seg_height or width != seg_width:
                    raise RuntimeError(
                        'Shape mismatched between image and label.')

                # Convert to tf example.
                base_filename = os.path.basename(image_files[i]).\
                                    replace(_POSTFIX_MAP['image']+'.'+_DATA_FORMAT_MAP['image'], '')
                filename = base_filename

                example = build_data.image_seg_to_tfexample(
                    image_data=image_data,
                    filename=filename,
                    height=height,
                    width=width,
                    seg_data=seg_data,
                )
                tfrecord_writer.write(example.SerializeToString())

        sys.stdout.write('\n')
        sys.stdout.flush()

コード例 #24

0

ファイルを表示

ファイル: build_pascal.py プロジェクト: Ryuta-S/ppanet

def _convert_dataset(dataset_split):
    """Converts the specified dataset split to TFRecord format.

    Args:
        dataset_split: The dataset split (e.g., train, test).

    Raises:
        RuntimeError: If loaded image and label have different shape.
    """

    # get the image and annotation path from dataset text.
    dataset = os.path.basename(dataset_split)[:-4]
    sys.stdout.write('Processing ' + dataset + '\n')

    img_list = []
    ann_list = []
    img_dir = FLAGS.image_dir
    ann_dir = FLAGS.annotation_dir

    with open(dataset_split, 'r') as f:
        for row in f:
            row = row.strip()
            img_path = os.path.join(img_dir, row) + '.jpg'
            ann_path = os.path.join(ann_dir, row) + '.png'
            img_list.append(img_path)
            ann_list.append(ann_path)

    image_reader = build_data.ImageReader(channels=3, img_format='jpeg')
    label_reader = build_data.ImageReader(channels=1)

    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    assert len(img_list) == len(ann_list)

    # create each video type tfrecord
    num_img = len(img_list)
    num_img_per_shard = math.ceil(num_img / NUM_SHARD)

    print('The number of %s image: %d' % (dataset_split, num_img))
    for shard_id in range(NUM_SHARD):
        output_filename = os.path.join(
            FLAGS.output_dir,
            '%s-%05d-of-%05d.tfrecord' % (dataset, shard_id + 1, NUM_SHARD))
        start_idx = shard_id * num_img_per_shard
        end_idx = min((shard_id + 1) * num_img_per_shard, num_img)

        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            for i in range(start_idx, end_idx):
                img_path = img_list[i]
                ann_path = ann_list[i]
                sys.stdout.write('\r>> Converting image %d/%d shard %d' %
                                 (i + 1, num_img, shard_id))
                sys.stdout.flush()

                # Read the image
                img_data = tf.gfile.GFile(img_path, 'rb').read()
                height, width = image_reader.read_image_dims(img_data)

                ann_data = tf.gfile.GFile(ann_path, 'rb').read()
                ann_height, ann_width = label_reader.read_image_dims(ann_data)
                if height != ann_height or width != ann_width:
                    raise RuntimeError(
                        'Shape mismatched between image and annotations.')

                # Convert to tf example.
                example = build_data.image_seg_to_tfexample(
                    os.path.basename(img_path), img_data, height, width,
                    ann_data)
                tfrecord_writer.write(example.SerializeToString())
            sys.stdout.write('\n')
            sys.stdout.flush()

コード例 #25

0

ファイルを表示

ファイル: build_617_data.py プロジェクト: abhineet123/river_ice_segmentation

def _convert_dataset(db_name):
    """Converts the specified dataset split to TFRecord format.

    Args:
      db_name: The dataset split (e.g., train, test).

    Raises:
      RuntimeError: If loaded image and label have different shape.
    """

    output_dir = os.path.join(FLAGS.db_root_dir, FLAGS.output_dir, 'tfrecord')
    sys.stdout.write('Processing {}\n\n'.format(db_name))
    images = os.path.join(FLAGS.db_root_dir, db_name, 'images',
                          '*.{}'.format(FLAGS.image_format))
    print('Reading images from: {}'.format(images))

    image_filenames = glob.glob(images)
    if image_filenames is None:
        raise SystemError('No images found at {}'.format(images))

    if FLAGS.create_dummy_labels:
        labels_path = os.path.join(FLAGS.db_root_dir, db_name, 'labels')
        if not os.path.isdir(labels_path):
            os.makedirs(labels_path)
        print('Creating dummy labels at: {}'.format(labels_path))
        for image_filename in image_filenames:
            image = misc.imread(image_filename)
            height, width, _ = image.shape
            dummy_label = np.zeros((height, width), dtype=np.uint8)
            out_fname = os.path.splitext(
                os.path.basename(image_filename))[0] + '.{}'.format(
                    FLAGS.label_format)
            misc.imsave(os.path.join(labels_path, out_fname), dummy_label)
        print('Done')

    labels = os.path.join(FLAGS.db_root_dir, db_name, 'labels',
                          '*.{}'.format(FLAGS.label_format))
    print('Reading labels from: {}'.format(labels))

    seg_filenames = glob.glob(labels)
    if seg_filenames is None:
        raise SystemError('No labels found at {}'.format(labels))

    # filenames = [x.strip('\n') for x in open(dataset_split, 'r')]
    num_images = len(image_filenames)
    num_labels = len(seg_filenames)

    if num_images != num_labels:
        raise SystemError(
            'Mismatch between image and label file counts: {}, {}'.format(
                num_images, num_labels))

    num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))

    image_reader = build_data.ImageReader('png', channels=3)
    label_reader = build_data.ImageReader('png', channels=3)

    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    print('Writing tfrecords to: {}'.format(output_dir))

    for shard_id in range(_NUM_SHARDS):
        output_filename = os.path.join(
            output_dir,
            '%s-%05d-of-%05d.tfrecord' % (db_name, shard_id, _NUM_SHARDS))
        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            start_idx = shard_id * num_per_shard
            end_idx = min((shard_id + 1) * num_per_shard, num_images)
            for i in range(start_idx, end_idx):
                sys.stdout.write('\r>> Converting image %d/%d shard %d' %
                                 (i + 1, num_images, shard_id))
                sys.stdout.flush()
                image_filename = image_filenames[i]
                f1 = os.path.basename(image_filename)[:-4]
                seg_filename = seg_filenames[i]
                f2 = os.path.basename(image_filename)[:-4]
                if f1 != f2:
                    raise SystemError(
                        'Mismatch between image and label filenames: {}, {}'.
                        format(f1, f2))

                # Read the image.
                image_data = tf.gfile.FastGFile(image_filename, 'r').read()
                height, width = image_reader.read_image_dims(image_data)
                # Read the semantic segmentation annotation.
                seg_data = tf.gfile.FastGFile(seg_filename, 'r').read()
                seg_height, seg_width = label_reader.read_image_dims(seg_data)
                if height != seg_height or width != seg_width:
                    raise RuntimeError(
                        'Shape mismatched between image and label.')
                # Convert to tf example.
                example = build_data.image_seg_to_tfexample(
                    image_data, image_filename, height, width, seg_data)
                tfrecord_writer.write(example.SerializeToString())
        sys.stdout.write('\n')
        sys.stdout.flush()

コード例 #26

0

ファイルを表示

def _convert_dataset(split, image_folder, anno_file, cls_name_to_index,
                     tfrecord_base_dir, num_shards):
    split_statistics = {}
    # get raw information accroding to anno_file, and shuffle it
    anno_raw = []
    if anno_file is not None:
        with open(anno_file, 'r') as f:
            for l in f.readlines():
                if "Image" in l:
                    continue
                anno_temp = l.strip().split(',')
                im_name = anno_temp[0].strip()
                im_name = os.path.join(image_folder,
                                       im_name)  # to absolute addrs
                im_cls = anno_temp[1].strip()
                im_idx = cls_name_to_index[im_cls]
                anno_raw.append([im_name, im_cls, im_idx])
        random.shuffle(anno_raw)
        random.shuffle(anno_raw)
        random.shuffle(anno_raw)
    else:  # test set
        image_file_list = os.listdir(image_folder)
        for one_im in image_file_list:
            if one_im[-4:] == '.jpg':
                im_name = os.path.join(image_folder, one_im)
                im_cls = 'test_image'
                im_idx = 999999
                anno_raw.append([im_name, im_cls, im_idx])

    # extract all information of one image and write it tfrecords
    num_images = len(anno_raw)
    num_per_shard = int(math.ceil(num_images / float(num_shards)))

    image_reader = build_data.ImageReader('jpeg', channels=3)

    for shard_id in range(num_shards):
        output_filename = os.path.join(
            tfrecord_base_dir,
            '%s-%05d-of-%05d.tfrecord' % (split, shard_id, num_shards))
        start_idx = shard_id * num_per_shard
        end_idx = min((shard_id + 1) * num_per_shard, num_images)
        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            for i in range(start_idx, end_idx):
                sys.stdout.write('\r>> Converting image %d/%d shard %d' %
                                 (i + 1, num_images, shard_id))
                sys.stdout.flush()
                # Read the image and extract more information
                image_data = tf.gfile.FastGFile(anno_raw[i][0], 'rb').read()
                image_name = os.path.basename(anno_raw[i][0])
                image_label = anno_raw[i][1]
                image_index = anno_raw[i][2]
                image_height, image_width = \
                    image_reader.read_image_dims(image_data)

                example = tf.train.Example(features=tf.train.Features(
                    feature={
                        'image': _bytes_list_feature(image_data),
                        'image_name': _bytes_list_feature(image_name),
                        'class_name': _bytes_list_feature(image_label),
                        'label': _int64_list_feature(image_index),
                        'height': _int64_list_feature(image_height),
                        'width': _int64_list_feature(image_width),
                    }))
                tfrecord_writer.write(example.SerializeToString())
                anno_raw[i].extend([image_height,
                                    image_width])  # for statistic
        sys.stdout.write('\n')
        sys.stdout.flush()

    # for split_statistics
    split_statistics['split_name'] = split
    split_statistics['total_examples'] = len(anno_raw)
    height_all = []
    width_all = []
    split_statistics['per_class_num'] = {}
    for anno in anno_raw:
        one_cls = anno[1]
        height_all.append(anno[3])
        width_all.append(anno[4])
        if one_cls not in split_statistics['per_class_num']:
            split_statistics['per_class_num'][one_cls] = 1
        else:
            split_statistics['per_class_num'][one_cls] += 1
    split_statistics['height_mean'] = float(np.mean(height_all))
    split_statistics['height_std'] = float(np.std(height_all))
    split_statistics['width_mean'] = float(np.mean(width_all))
    split_statistics['width_std'] = float(np.std(width_all))

    return split_statistics

コード例 #27

0

ファイルを表示

ファイル: build_gta_pfd_data.py プロジェクト: muaz-git/dataset_modifier_deeplab

def _convert_dataset(dataset_split, dataset):
    """Converts the specified dataset split to TFRecord format.


    Raises:
      RuntimeError: If loaded image and label have different shape, or if the
        image file with specified postfix could not be found.
    """

    img_dir_name = FLAGS.img_dir_name
    labels_dir_name = FLAGS.trainIDs_dir_name
    output_dir = FLAGS.output_dir

    image_files = dataset[0]
    label_files = dataset[1]

    if not (len(image_files) == len(label_files)):
        raise RuntimeError('Length mismatch image and label.')

    num_images = len(image_files)
    num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))

    image_reader = build_data.ImageReader('png', channels=3)
    label_reader = build_data.ImageReader('png', channels=1)

    for shard_id in range(_NUM_SHARDS):
        shard_filename = '%s-%05d-of-%05d.tfrecord' % (dataset_split, shard_id,
                                                       _NUM_SHARDS)
        output_filename = os.path.join(output_dir, shard_filename)
        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            start_idx = shard_id * num_per_shard
            end_idx = min((shard_id + 1) * num_per_shard, num_images)
            for i in range(start_idx, end_idx):
                sys.stdout.write('\r>> Converting image %d/%d shard %d' %
                                 (i + 1, num_images, shard_id))
                sys.stdout.flush()
                # Read the image.
                image_data = tf.gfile.FastGFile(img_dir_name + image_files[i],
                                                'rb').read()
                height, width = image_reader.read_image_dims(image_data)
                # Read the semantic segmentation annotation.
                seg_data = tf.gfile.FastGFile(labels_dir_name + label_files[i],
                                              'rb').read()
                seg_height, seg_width = label_reader.read_image_dims(seg_data)
                if height != seg_height or width != seg_width:
                    print(
                        "Shape mismatched between image and label. height. Ignoring."
                    )
                    continue
                    raise RuntimeError(
                        'Shape mismatched between image and label. height : ',
                        height, ' seg_height: ', seg_height, ' width: ', width,
                        ' seg_width: ', seg_width, ' \nlabel_files[i]: ',
                        label_files[i], ' image_files[i]: ', image_files[i])
                # Convert to tf example.

                if not (image_files[i] == label_files[i]):
                    raise RuntimeError('image file name : ' + image_files[i] +
                                       ' is not equal to label file name : ' +
                                       label_files[i])
                filename = os.path.basename(image_files[i])

                example = build_data.image_seg_to_tfexample(
                    image_data, filename, height, width, seg_data)
                tfrecord_writer.write(example.SerializeToString())
        sys.stdout.write('\n')
        sys.stdout.flush()

コード例 #28

0

ファイルを表示

def _convert_dataset(dataset_split):
    """Converts the specified dataset split to TFRecord format.
  Args:
    dataset_split: The dataset split (e.g., train, val, test).

  Raises:
    RuntimeError: If loaded image and label have different shape.
  """

    if not os.path.isdir(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    dataset = os.path.basename(dataset_split)[:-4]
    sys.stdout.write('Processing ' + dataset)
    filenames = [x.strip('\n').split(',') for x in open(dataset_split, 'r')]
    num_images = len(filenames)
    num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))

    image_reader = build_data.ImageReader('png', channels=3)
    label_reader = build_data.ImageReader('png', channels=1)

    #  print("n images:", num_images)
    #  print("n per shard:", num_per_shard)
    #  print(filenames[0])

    for shard_id in range(_NUM_SHARDS):
        output_filename = os.path.join(
            FLAGS.output_dir,
            '%s-%05d-of-%05d.tfrecord' % (dataset, shard_id, _NUM_SHARDS))
        if os.path.isfile(output_filename):
            continue

        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            start_idx = shard_id * num_per_shard
            end_idx = min((shard_id + 1) * num_per_shard, num_images)

            for i in range(start_idx, end_idx):
                sys.stdout.write('\r>> Converting image %d/%d shard %d' %
                                 (i + 1, len(filenames), shard_id))
                sys.stdout.flush()

                # Read the image data
                image_filename = os.path.join(FLAGS.image_folder,
                                              filenames[i][0])
                with tf.gfile.FastGFile(image_filename, 'rb') as f:
                    image_data = f.read()
                #image_data = tf.gfile.FastGFile(image_filename, 'rb').read()

                ## Resize image
                image = image_reader.decode_image(image_data)
                image = resize_image(image)
                image = tf.image.resize_images(
                    image,
                    size=[400, 624],
                    method=tf.image.ResizeMethod.BILINEAR,
                    align_corners=False)

                with tf.Session().as_default():
                    img = image.eval()
                cv2.imwrite(
                    "check_localization/test_images/tfrecord-test-image-{}.png"
                    .format(i), img)

                height, width = image.shape[:2]
                image_data = image_reader.encode_image(image)

                #        height, width = image_reader.read_image_dims(image_data)

                # Read the semantic segmentation annotation.
                seg_filename = os.path.join(FLAGS.semantic_segmentation_folder,
                                            filenames[i][1])
                with tf.gfile.FastGFile(seg_filename, 'rb') as f:
                    seg_data = f.read()
                #seg_data = tf.gfile.FastGFile(seg_filename, 'rb').read()

                ## Resize mask
                seg_image = label_reader.decode_image(seg_data)
                seg_image = resize_image(seg_image)
                seg_image = tf.image.resize_images(
                    seg_image,
                    size=[400, 624],
                    method=tf.image.ResizeMethod.BILINEAR,
                    align_corners=False)

                with tf.Session().as_default():
                    mask = seg_image.eval()
                cv2.imwrite(
                    "check_localization/test_images/tfrecord-test-annot-{}.png"
                    .format(i), mask)

                seg_height, seg_width = seg_image.shape[:2]
                seg_data = label_reader.encode_image(seg_image)

                #        seg_height, seg_width = label_reader.read_image_dims(seg_data, resize=True)

                if height != seg_height or width != seg_width:
                    raise RuntimeError(
                        'Shape mismatched between image and label.')

                # Convert to tf example.
                example = build_data.image_seg_to_tfexample(
                    image_data, filenames[i][0], height, width, seg_data)
                tfrecord_writer.write(example.SerializeToString())

        sys.stdout.write('\n')
        sys.stdout.flush()

        del example, tfrecord_writer,

        break

コード例 #29

0

ファイルを表示

def _convert_dataset(dataset_split):
    """Converts the specified dataset split to TFRecord format.

    Args:
        dataset_split: The dataset split (e.g., train, val, trainval)

    Raises:
        RuntimeError: If loaded image and label have different shape.
    """

    # get the annotation paths.
    pattern = os.path.join(FLAGS.annotation_dir, '%s2017' % dataset_split,
                           '*.png')
    ann_list = sorted(glob.glob(pattern))

    image_reader = build_data.ImageReader(channels=3)
    label_reader = build_data.ImageReader(channels=1)

    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    num_img = len(ann_list)
    num_img_per_shard = math.ceil(num_img / NUM_SHARD)

    print('The number of %s image: %d' % (dataset_split, num_img))
    for shard_id in range(NUM_SHARD):
        output_filename = os.path.join(
            FLAGS.output_dir, '%s-%05d-of-%05d.tfrecord' %
            (dataset_split, shard_id + 1, NUM_SHARD))
        start_idx = shard_id * num_img_per_shard
        end_idx = min((shard_id + 1) * num_img_per_shard, num_img)

        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            for i in range(start_idx, end_idx):
                ann_path = ann_list[i]
                img_path = os.path.join(
                    FLAGS.image_dir,
                    ann_path.split(os.sep)[-2],
                    os.path.basename(ann_path).replace('.png', '.jpg'))
                sys.stdout.write('\r>> Converting image %d/%d shard %d' %
                                 (i + 1, num_img, shard_id))
                sys.stdout.flush()

                if not os.path.exists(img_path):
                    raise ValueError('image {} dont exists'.format(img_path))

                # Read the image
                img_data = tf.gfile.GFile(img_path, 'rb').read()
                height, width = image_reader.read_image_dims(img_data)

                ann_data = tf.gfile.GFile(ann_path, 'rb').read()
                ann_height, ann_width = label_reader.read_image_dims(ann_data)
                if height != ann_height or width != ann_width:
                    raise RuntimeError(
                        'Shape mismatched between image and annotation.')

                # Convert to tf example.
                example = build_data.image_seg_to_tfexample(
                    os.path.basename(img_path), img_data, height, width,
                    ann_data)
                tfrecord_writer.write(example.SerializeToString())
            sys.stdout.write('\n')
            sys.stdout.flush()

コード例 #30

0

ファイルを表示

ファイル: build_PubLayNet_tfrecords.py プロジェクト: cherish24/PubLayNet_tfrecords

def _convert_dataset(dataset_split):
    """Converts the specified dataset split to TFRecord format.

    Args:
        dataset_split: The dataset split (e.g., train, val).
    Raises:
        RuntimeError: If loaded image and label have different shape.
    """
    sys.stdout.write('Processing ' + dataset_split)

    if dataset_split == 'train':
        seg_base_dir = 'PubLayNet/SegmentationClass/train/'
        raw_base_dir = 'PubLayNet/RawImages/train_data/'
    else:
        seg_base_dir = 'PubLayNet/SegmentationClass/val/'
        raw_base_dir = 'PubLayNet/RawImages/val_data/val/'

    seg_file_names = [
        f for f in glob.glob(seg_base_dir + "**/*.png", recursive=True)
    ]
    raw_file_names = [
        f for f in glob.glob(raw_base_dir + "**/*.jpg", recursive=True)
    ]
    raw_name_only = [f.split('/')[-1].split('.')[-2] for f in raw_file_names]
    num_images = len(seg_file_names)
    num_shards = int(math.ceil(num_images / float(num_docs_per_shard)))

    image_reader = build_data.ImageReader('jpeg', channels=3)
    label_reader = build_data.ImageReader('png', channels=1)

    for shard_id in range(num_shards):
        output_filename = os.path.join(
            './PubLayNet/tfrecord',
            '%s-%05d-of-%05d.tfrecord' % (dataset_split, shard_id, num_shards))
        with tf.io.TFRecordWriter(output_filename) as tfrecord_writer:
            start_idx = shard_id * num_docs_per_shard
            end_idx = min((shard_id + 1) * num_docs_per_shard, num_images)
            for i in range(start_idx, end_idx):
                sys.stdout.write('\r>> Converting image %d/%d shard %d' %
                                 (i + 1, num_images, shard_id))
                sys.stdout.flush()

                # Read the semantic segmentation annotation.
                seg_name_only = seg_file_names[i].split('/')[-1].split('.')[-2]
                seg_data = tf.io.gfile.GFile(seg_file_names[i], 'rb').read()
                '''print('/n', dataset_split, 'seg', seg_file_names[i])
                print(dataset_split, 'seg', seg_name_only)'''
                seg_height, seg_width = label_reader.read_image_dims(seg_data)

                # Read the raw image.
                name_ind = raw_name_only.index(seg_name_only)
                '''print(dataset_split, 'raw', raw_file_names[name_ind])
                print(dataset_split, 'raw', raw_name_only[name_ind])'''
                image_data = tf.io.gfile.GFile(raw_file_names[name_ind],
                                               'rb').read()
                height, width = image_reader.read_image_dims(image_data)

                if height != seg_height or width != seg_width:
                    #raise RuntimeError('Shape mismatched between image and label.')
                    print(
                        'The raw image and segmentation mask do not have the same dimensions.'
                    )
                    print('Skipping image {}'.format(seg_name_only))
                else:
                    # Convert to tf example.
                    example = build_data.image_seg_to_tfexample(
                        image_data, seg_name_only, height, width, seg_data)
                    tfrecord_writer.write(example.SerializeToString())
                sys.stdout.write('\n')
                sys.stdout.flush()