def main(image_directory: str, annotation_directory: str, output_path_training_split: str,
         output_path_validation_split: str, output_path_test_split: str, label_map_path: str, number_of_shards: int,
         included_classes: List[str]):
    os.makedirs(os.path.dirname(output_path_training_split), exist_ok=True)
    label_map_dict = label_map_util.get_label_map_dict(label_map_path)
    all_jpg_image_paths = glob(f"{image_directory}/**/*.jpg", recursive=True)
    all_png_image_paths = glob(f"{image_directory}/**/*.png", recursive=True)
    all_image_paths = all_jpg_image_paths + all_png_image_paths
    all_annotation_paths = glob(f"{annotation_directory}/**/*.json", recursive=True)

    # Filter out the dataset.json files, which are complete dataset annotations
    all_annotation_paths = [a for a in all_annotation_paths if "dataset.json" not in a]

    training_sample_indices, validation_sample_indices, test_sample_indices = get_training_validation_test_indices(
        all_image_paths)

    all_annotation_paths = sorted(all_annotation_paths)
    all_image_paths = sorted(all_image_paths)

    if len(all_image_paths) != len(all_annotation_paths):
        print("Not every image has annotations")

    for annotation_path, image_path in zip(all_annotation_paths, all_image_paths):
        if os.path.splitext(os.path.basename(image_path))[0] not in annotation_path:
            print("Invalid annotations detected: {0}, {1}".format(image_path, annotation_path))

    print(f"Exporting\n"
          f"- {len(training_sample_indices)} training samples\n"
          f"- {len(validation_sample_indices)} validation samples\n"
          f"- {len(test_sample_indices)} test samples")

    with contextlib2.ExitStack() as tf_record_close_stack:
        training_tf_records = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_path_training_split, number_of_shards)
        validation_tf_records = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_path_validation_split, number_of_shards)
        test_tf_records = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_path_test_split, number_of_shards)
        index = 0
        for tf_example in annotations_to_tf_example_list(all_image_paths, all_annotation_paths, label_map_dict, included_classes):
            shard_index = index % number_of_shards
            index += 1

            if index in training_sample_indices:
                training_tf_records[shard_index].write(tf_example.SerializeToString())
            elif index in validation_sample_indices:
                validation_tf_records[shard_index].write(tf_example.SerializeToString())
            elif index in test_sample_indices:
                test_tf_records[shard_index].write(tf_example.SerializeToString())
def create_tf_record(output_filename, num_shards, label_map_dict,
                     annotations_dir, image_dir, examples):
    """Creates a TFRecord file from examples.

  Args:
    output_filename: Path to where output file is saved.
    num_shards: Number of shards for output file.
    label_map_dict: The label map dictionary.
    annotations_dir: Directory where annotation files are stored.
    image_dir: Directory where image files are stored.
    examples: Examples to parse and save to tf record.
  """
    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_filename, num_shards)
        for idx, example in enumerate(examples):
            if idx % 100 == 0:
                logging.info('On image %d of %d', idx, len(examples))
            mask_path = os.path.join(annotations_dir, example + '.png')
            image_path = os.path.join(image_dir, example + '.jpg')

            try:
                tf_example = dict_to_tf_example(example, mask_path,
                                                label_map_dict, image_path)
                if tf_example:
                    shard_idx = idx % num_shards
                    output_tfrecords[shard_idx].write(
                        tf_example.SerializeToString())
                    print("done")
            except ValueError:
                logging.warning('Invalid example: %s, ignoring.', xml_path)
Ejemplo n.º 3
0
def main(image_dir, csv_input, output_path, num_shards=1):
    # writer = tf.python_io.TFRecordWriter(output_path)
    path = image_dir
    examples = pd.read_csv(csv_input)
    grouped = split(examples, 'filename')
    num = 0
    # for group in grouped:
    #     num += 1
    #     tf_example = create_tf_example(group, path)
    #     writer.write(tf_example.SerializeToString())
    #     if num % 100 == 0:  # 每完成100个转换,打印一次
    #         print(num)

    # num_shards = 10
    # output_filebase = os.path.join(output_path,'record')

    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_path, num_shards)
        for index, group in enumerate(grouped):
            num += 1
            tf_example = create_tf_example(group, path)
            output_shard_index = index % num_shards
            output_tfrecords[output_shard_index].write(
                tf_example.SerializeToString())
            if num % 100 == 0:  # 每完成100个转换,打印一次
                print(num)

    # writer.close()
    # output_path = os.path.join(os.getcwd(), FLAGS.output_path)
    print('Successfully created the TFRecords: {}'.format(output_path))
Ejemplo n.º 4
0
 def write_tf_record_shard(self, path_tf_record, path_yaml, dir_yaml_data, num_shards):
     with contextlib2.ExitStack() as tf_record_close_stack:
         output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
             tf_record_close_stack, path_tf_record, num_shards) #output_filebase, num_shards)
         examples = yaml.load(open(path_yaml, 'rb').read())
         count = 0
         #for index, example in examples:
         for example in examples:
             #print("index:", index)
             print("example:", example)
             
             key_file = None
             if (self.input_type == self.INPUT_TYPES[IDX_BOSCH]):
                 key_file = 'path'
             else if (self.input_type == self.INPUT_TYPES[IDX_SLOTH]):
                 key_file = 'filename'
             
             filename = example['path']
             filename = os.path.abspath(os.path.join(os.path.dirname(dir_yaml_data), filename))
             #filename = "C:/Work_BigData/Bosch_Small_Traffic_Lights_Dataset/dataset_train_rgb/" + filename
             #print(filename)
             if (not os.path.exists(filename)):
                 print(filename, " does not exist.")
                 continue
             count = count + 1
             if MAX_COUNT_DATA != 0 and count > MAX_COUNT_DATA:
                 break
             tf_example = None
             if (self.input_type == self.INPUT_TYPES[IDX_BOSCH]):
                 tf_example = self.create_tf_example_bosch(example, filename)
             else if (self.input_type == self.INPUT_TYPES[IDX_SLOTH]):
                 tf_example = self.create_tf_example_sloth(example, filename)
             #output_shard_index = index % num_shards
             output_shard_index = count % num_shards
             output_tfrecords[output_shard_index].write(tf_example.SerializeToString())
Ejemplo n.º 5
0
def convert_to_tf_records(images_data: List[ImageData],
                          label_map: Dict[str, int],
                          filepath: Union[str, Path],
                          num_workers: int = 1,
                          num_shards: int = 2,
                          max_pictures_per_worker: int = 1000,
                          use_thumbnail: Tuple[int, int] = None):
    logger.info('Create tf_records from data.')

    data_chunks = np.array_split(images_data, max_pictures_per_worker)
    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = creation_util.open_sharded_output_tfrecords(
            exit_stack=tf_record_close_stack,
            base_path=filepath,
            num_shards=num_shards)
        for data_chunk in tqdm(data_chunks):
            tf_records = Parallel(n_jobs=num_workers)(
                delayed(tf_record_from_image_data)(image_data=image_data,
                                                   label_map=label_map,
                                                   use_thumbnail=use_thumbnail)
                for image_data in data_chunk)

            for index, tf_record in enumerate(tf_records):
                output_shard_index = index % num_shards
                output_tfrecords[output_shard_index].write(
                    tf_record.SerializeToString())

    logger.info(
        f"tf_records saved to {filepath}-?????-of-{str(num_shards).zfill(5)}.")
Ejemplo n.º 6
0
def create_tf_record(output_filename, num_shards, label_map_dict, image_dir,
                     examples):
    """Creates a TFRecord file from examples.

  Args:
    output_filename: Path to where output file is saved.
    num_shards: Number of shards for output file.
    label_map_dict: The label map dictionary.
    annotations_dir: Directory where annotation files are stored.
    image_dir: Directory where image files are stored.
    examples: Examples to parse and save to tf record.
    faces_only: If True, generates bounding boxes for pet faces.  Otherwise
      generates bounding boxes (as well as segmentations for full pet bodies).
    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
      smaller file sizes.
  """
    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_filename, num_shards)
        for idx, example in enumerate(examples):
            if idx % 100 == 0:
                logging.info('On image %d of %d', idx, len(examples))

            try:
                tf_example = dict_to_tf_example(example, label_map_dict,
                                                image_dir)
                if tf_example:
                    shard_idx = idx % num_shards
                    output_tfrecords[shard_idx].write(
                        tf_example.SerializeToString())
            except ValueError:
                logging.warning('Invalid example: %s, ignoring.', xml_path)
Ejemplo n.º 7
0
def main(_):
    labels = dict(Person=1, Car=2)

    train = Path('./object_detection/smart_spy/dataset/train')
    test = Path('./object_detection/smart_spy/dataset/test')
    # train = Path("/tmp/ds/train")
    # test = Path("/tmp/ds/test")

    for ds in [train, test]:
        num_shards = 50
        output_filebase = f"./object_detection/smart_spy/dataset/tfrecords/{ds.name}/tf.record"

        image_path_list = chain(ds.glob("Person/*.jpg"), ds.glob("Car/*.jpg"))
        examples = [
            to_image_info(i, i.parent.name, labels[i.parent.name])
            for i in image_path_list
        ]

        with contextlib2.ExitStack() as tf_record_close_stack:
            output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
                tf_record_close_stack, output_filebase, num_shards
            )
            for index, example in enumerate(examples):
                tf_example = create_tf_example(example)
                output_shard_index = index % num_shards
                output_tfrecords[output_shard_index].write(tf_example.SerializeToString())
Ejemplo n.º 8
0
def main(_):
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
    path = os.path.join(FLAGS.image_dir)
    examples = pd.read_csv(FLAGS.csv_input)
    grouped = split(examples, 'filename')
    #     for group in grouped:
    #         tf_example = create_tf_example(group, path)
    #         writer.write(tf_example.SerializeToString())
    #
    #     writer.close()
    #     output_path = os.path.join(os.getcwd(), FLAGS.output_path)
    #     print('Successfully created the TFRecords: {}'.format(output_path))

    num_shards = 100

    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, FLAGS.output_path, num_shards)
        index = 0
        for example in grouped:
            tf_example = create_tf_example(example, path)
            output_shard_index = index % num_shards
            output_tfrecords[output_shard_index].write(
                tf_example.SerializeToString())
            index = index + 1
Ejemplo n.º 9
0
def create_tf_record(image_dir_path, annotations_dir_path, tfrecord_dir_path,
                     label_map_dict, images_filename):
    """Creates a TFRecord file from data.

  Args:
    image_dir_path: Directory where image files are stored.
    annotations_dir_path: Directory where annotation files are stored.
    tfrecord_dir_path: Path to where output file is saved.
    label_map_dict: The label map dictionary.
    images_filename: Examples to parse and save to tf record.
  """
    masks_dir_path = os.path.join(annotations_dir_path, FLAGS.masks_dir)
    xmls_dir_path = os.path.join(annotations_dir_path, FLAGS.xmls_dir)
    with contextlib2.ExitStack() as tf_record_close_stack:
        tfrecord_path = os.path.join(tfrecord_dir_path,
                                     FLAGS.tfrecord_filename)
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, tfrecord_path, FLAGS.num_shards)
        for idx, filename in enumerate(images_filename):
            if idx % 100 == 0:
                logging.info('On image %d of %d', idx, len(images_filename))
            image_path = os.path.join(image_dir_path, filename + '.jpg')
            mask_path = os.path.join(masks_dir_path, filename + '.png')
            xml_path = os.path.join(xmls_dir_path, filename + '.xml')
            try:
                tf_example = image_to_tf_data(image_path, mask_path, xml_path,
                                              label_map_dict, filename)
                if tf_example:
                    shard_idx = idx % FLAGS.num_shards
                    output_tfrecords[shard_idx].write(
                        tf_example.SerializeToString())
                    logging.info('done')
            except ValueError:
                logging.warning('Invalid example: %s, ignoring.', image_path)
def create_tf_record_from_images_list(images, annotations_index,
                                      dataset_base_dir, category_index,
                                      original_category_index, output_path):
    num_shards = 1 + (len(images) // FLAGS.images_per_shard)
    total_annot_skipped = 0
    total_empty_annot_skipped = 0
    total_image_skipped = 0

    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_path, num_shards)

        for index, image in enumerate(images):
            image_id = image['id']
            if image_id not in annotations_index:
                annotations_index[image_id] = []
            tf_example, annot_skipped, empty_annot_skipped = create_tf_example(
                image, dataset_base_dir, annotations_index[image_id],
                category_index, original_category_index)
            total_annot_skipped += annot_skipped
            total_empty_annot_skipped += empty_annot_skipped

            if tf_example is not None:
                output_shard_index = index % num_shards
                output_tfrecords[output_shard_index].write(
                    tf_example.SerializeToString())
            else:
                total_image_skipped += 1

        tf.compat.v1.logging.info('Finished writing, skipped %d bboxes.',
                                  total_annot_skipped)
        tf.compat.v1.logging.info('Skipped %d bboxes on empty images.',
                                  total_empty_annot_skipped)
        tf.compat.v1.logging.info('%d images not found.', total_image_skipped)
def run(images_path, description_file, output_path, no_bbox=False):
    f = open(description_file)
    i = 0
    if no_bbox:
        writer = tf.python_io.TFRecordWriter(output_path)
        while True:
            try:
                tf_example = parse_test_example(f, images_path)
                writer.write(tf_example.SerializeToString())
                i += 1
            except IOError:
                break
            except Exception:
                raise

        writer.close()
    else:
        num_shards = 10
        print("Processing {}".format(images_path))
        with contextlib2.ExitStack() as tf_record_close_stack:
            output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
                tf_record_close_stack, output_path, num_shards)
            while True:
                try:
                    tf_example = parse_example(f, images_path)
                    output_shard_index = i % num_shards
                    output_tfrecords[output_shard_index].write(
                        tf_example.SerializeToString())
                    i += 1
                except IOError:
                    break
                except Exception:
                    raise

    print("Correctly created record for {} images\n".format(i))
Ejemplo n.º 12
0
def _create_tf_record_from_bdd_annotations(annotations_file, image_dir,
                                           output_path, num_shards):
    """Loads BDD annotation json files and converts to tf.Record format.
    Args:
      annotations_file: JSON file containing bounding box annotations.
      image_dir: Directory containing the image files.
      output_path: Path to output tf.Record file.
      num_shards: number of output file shards.
    """
    with contextlib2.ExitStack() as tf_record_close_stack, \
            tf.gfile.GFile(annotations_file, 'r') as fid:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_path, num_shards)

        groundtruth_data = json.load(fid)
        category_index = label_map_util.create_category_index_from_labelmap(
            'bdd_label_map.pbtxt', use_display_name=False)
        image_list = os.listdir(image_dir)
        total_num_annotations_skipped = 0
        for idx, frame in enumerate(groundtruth_data):
            if idx % 100 == 0:
                tf.logging.info('On image {} of {}'.format(
                    idx + 1, len(image_list)))
            tf_example, num_annotations_skipped = create_tf_example(
                frame, image_dir, category_index)
            total_num_annotations_skipped += num_annotations_skipped
            shard_idx = idx % num_shards
            output_tfrecords[shard_idx].write(tf_example.SerializeToString())
        tf.logging.info('Finished writing, skipped {} annotations.'.format(
            total_num_annotations_skipped))
Ejemplo n.º 13
0
def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
                                            output_path, include_masks,
                                            num_shards):
    with contextlib2.ExitStack() as tf_record_close_stack, tf.gfile.GFile(
            annotations_file, 'r') as fid:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_path, num_shards)
        groundtruth_data = json.load(fid)
        images = groundtruth_data['images']
        category_index = label_map_util.create_category_index(
            groundtruth_data['categories'])

        annotations_index = {}
        if 'annotations' in groundtruth_data:
            for annotation in groundtruth_data['annotations']:
                image_id = annotation['image_id']
                if image_id not in annotations_index:
                    annotations_index[image_id] = []
                annotations_index[image_id].append(annotation)

        for idx, image in enumerate(images):
            annotations_list = annotations_index[image['id']]
            tf_example = create_tf_example(image, annotations_list, image_dir,
                                           category_index, include_masks)
            shard_idx = idx % num_shards
            output_tfrecords[shard_idx].write(tf_example.SerializeToString())
Ejemplo n.º 14
0
def process_list(path, output_path, num_shards, min_size, do_write=True):
    global total_faces
    global num_filtered

    content = [l.strip() for l in open(path)]
    examples = []
    i = 0
    while i < len(content):
        path = os.path.join(r'X:\wider-face\WIDER_all\images', content[i])
        i += 1
        num_boxes = int(content[i])
        i += 1
        boxes = []
        for i in range(i, i + num_boxes):
            total_faces += 1
            box = [int(a) for a in content[i].split()[:4]]
            if (min_size == 0) or (box[2] > min_size and box[3] > min_size):
                boxes += [box]
            else:
                num_filtered += 1
        i += 1
        examples += [(path, boxes)]

    if do_write:
        with contextlib2.ExitStack() as tf_record_close_stack:
            output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
                tf_record_close_stack, output_path, num_shards)
            for index, example in tqdm(enumerate(examples),
                                       total=len(examples)):
                tf_example = create_tf_example(example)
                output_shard_index = index % num_shards
                output_tfrecords[output_shard_index].write(
                    tf_example.SerializeToString())
    else:
        return examples
Ejemplo n.º 15
0
def create_tf_record(output_filename,
                     num_shards,
                     label_map_dict,
                     annotations_dir,
                     image_dir,
                     examples):
    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_filename, num_shards)
        for idx, example in enumerate(examples):
            if idx % 100 == 0:
                logging.info('On image %d of %d', idx, len(examples))
            xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml')


            if not os.path.exists(xml_path):
                logging.warning('Could not find %s, ignoring example.', xml_path)
                continue
            with tf.gfile.GFile(xml_path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            try:
                tf_example = dict_to_tf_example(
                    data,
                    label_map_dict,
                    image_dir)
                if tf_example:
                    shard_idx = idx % num_shards
                    output_tfrecords[shard_idx].write(tf_example.SerializeToString())
            except ValueError:
                logging.warning('Invalid example: %s, ignoring.', xml_path)
Ejemplo n.º 16
0
def main(_):
    record_files = list_records(FLAGS.path_to_records)
    logging.debug("Number of records to be processed: {}".format(
        len(record_files)))
    next_example = read_records(record_files, shuffle=FLAGS.shuffle)

    # To maxmize file I/O throughout, split the training data into pieces.
    with contextlib2.ExitStack() as tf_record_close_stack:
        output_records = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, FLAGS.output_file, FLAGS.num_shards)

        with tf.Session() as sess:
            index = 0
            while True:
                try:
                    serialized_example = sess.run(next_example)
                    index += 1
                    output_shard_index = index % FLAGS.num_shards
                    output_records[output_shard_index].write(
                        serialized_example)

                    logging.debug("Samples processed: {}".format(index))

                except tf.errors.OutOfRangeError:
                    break
Ejemplo n.º 17
0
def create_tf_record(output_filename,
                     num_shards,
                     label_map_dict,
                     annotations_dir,
                     image_dir,
                     examples,
                     faces_only=True,
                     mask_type='png'):
    """Creates a TFRecord file from examples.

    Args:
      output_filename: Path to where output file is saved.
      num_shards: Number of shards for output file.
      label_map_dict: The label map dictionary.
      annotations_dir: Directory where annotation files are stored.
      image_dir: Directory where image files are stored.
      examples: Examples to parse and save to tf record.
      faces_only: If True, generates bounding boxes for pet faces.  Otherwise
        generates bounding boxes (as well as segmentations for full pet bodies).
      mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
        smaller file sizes.
    """
    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_filename, num_shards)

        for idx, example in enumerate(examples):
            if idx % 100 == 0:
                logging.info('On image %d of %d', idx, len(examples))

            list_file_name = example.split('.')
            json_file = os.path.join(annotations_dir,
                                     '.'.join(list_file_name[:-1]) + '.json')
            with open(json_file) as f:
                json_data = json.load(f)
            # xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml')
            # mask_path = os.path.join(annotations_dir, 'trimaps', example + '.png')

            # if not os.path.exists(xml_path):
            #     logging.warning('Could not find %s, ignoring example.', xml_path)
            #     continue
            # with tf.gfile.GFile(xml_path, 'r') as fid:
            #     xml_str = fid.read()
            # xml = etree.fromstring(xml_str)
            # data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            try:
                tf_example = dict_to_tf_example(json_data,
                                                label_map_dict,
                                                image_dir,
                                                example,
                                                faces_only=faces_only,
                                                mask_type=mask_type)
                if tf_example:
                    shard_idx = idx % num_shards
                    output_tfrecords[shard_idx].write(
                        tf_example.SerializeToString())
            except ValueError:
                logging.warning('Invalid example: %s, ignoring.', json_file)
Ejemplo n.º 18
0
def _create_tf_record_from_wfs_annotations(annotations_file, image_root_dir,
                                           output_path, include_masks,
                                           num_shards):
    with contextlib2.ExitStack() as tf_record_close_stack, tf.gfile.GFile(
            annotations_file, 'r') as fid:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_path, num_shards)
        groundtruth_data = json.load(fid)
        images = groundtruth_data['images']
        category_index = label_map_util.create_category_index(
            groundtruth_data['categories'])

        annotations_index = {}
        if 'annotations' in groundtruth_data:
            tf.logging.info(
                'Found groundtruth annotations. Building annotations index.')
            for annotation in groundtruth_data['annotations']:
                image_id = annotation['image_id']
                if image_id not in annotations_index:
                    annotations_index[image_id] = []
                annotations_index[image_id].append(annotation)
        missing_annotation_count = 0
        for image in images:
            image_id = image['id']
            if image_id not in annotations_index:
                missing_annotation_count += 1
                annotations_index[image_id] = []
        tf.logging.info('%d images are missing annotations.',
                        missing_annotation_count)

        total_num_annotations_skipped = 0
        not_found_images = 0
        for idx, image in enumerate(images):
            # if idx % 100 == 0:
            #     tf.logging.info('On image %d of %d / Not found %d', idx, len(images), not_found_images)
            # print('On image {} of {} / Not found {}'.format(idx + 1, len(images), not_found_images), end='\r')

            annotations_list = annotations_index[image['id']]
            category_name = category_index[annotations_list[0]
                                           ['category_id']]['name']
            category_folder = get_directory_name(category_name)
            image_dir = os.path.join(image_root_dir, category_folder)

            result, tf_example, num_annotations_skipped = create_tf_example(
                image, annotations_list, image_dir, category_index,
                include_masks)

            if result is None:
                not_found_images += 1
                continue

            total_num_annotations_skipped += num_annotations_skipped
            shard_idx = idx % num_shards
            output_tfrecords[shard_idx].write(tf_example.SerializeToString())

        print("\n")
        tf.logging.info('Finished writing, skipped %d annotations.',
                        total_num_annotations_skipped)
Ejemplo n.º 19
0
def create_tf_record(
    output_filename,
    num_shards,
    label_map_dict,
    annotations_dir,
    image_dir,
    examples,
    faces_only=True,
    mask_type="png",
):
    """Creates a TFRecord file from examples.

  Args:
    output_filename: Path to where output file is saved.
    num_shards: Number of shards for output file.
    label_map_dict: The label map dictionary.
    annotations_dir: Directory where annotation files are stored.
    image_dir: Directory where image files are stored.
    examples: Examples to parse and save to tf record.
    faces_only: If True, generates bounding boxes for pet faces.  Otherwise
      generates bounding boxes (as well as segmentations for full pet bodies).
    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
      smaller file sizes.
  """
    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_filename, num_shards)
        for idx, example in enumerate(examples):
            if idx % 100 == 0:
                logging.info("On image %d of %d", idx, len(examples))
            xml_path = os.path.join(annotations_dir, "xmls", example + ".xml")
            mask_path = os.path.join(annotations_dir, "trimaps",
                                     example + ".png")

            if not os.path.exists(xml_path):
                logging.warning("Could not find %s, ignoring example.",
                                xml_path)
                continue
            with tf.gfile.GFile(xml_path, "r") as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)["annotation"]

            try:
                tf_example = dict_to_tf_example(
                    data,
                    mask_path,
                    label_map_dict,
                    image_dir,
                    faces_only=faces_only,
                    mask_type=mask_type,
                )
                if tf_example:
                    shard_idx = idx % num_shards
                    output_tfrecords[shard_idx].write(
                        tf_example.SerializeToString())
            except ValueError:
                logging.warning("Invalid example: %s, ignoring.", xml_path)
Ejemplo n.º 20
0
def write_records(examples, output_path, num_shards):
    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_path, num_shards)
        for index, example in tqdm(enumerate(examples), total=len(examples)):
            tf_example = create_tf_example(example)
            output_shard_index = index % num_shards
            output_tfrecords[output_shard_index].write(
                tf_example.SerializeToString())
def encode_to_tfr_record(test_feature, out_tfr_file):
    num_shards = 100
    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, out_tfr_file, num_shards)
        for index, example in enumerate(test_feature):
            tf_example = create_tf_example(example)
            output_shard_index = index % num_shards
            output_tfrecords[output_shard_index].write(
                tf_example.SerializeToString())
Ejemplo n.º 22
0
def _create_tf_record_from_coco_annotations(annotations_file, image_dir,
                                            output_path, include_masks,
                                            num_shards):
    """Loads COCO annotation json files and converts to tf.Record format.

  Args:
    annotations_file: JSON file containing bounding box annotations.
    image_dir: Directory containing the image files.
    output_path: Path to output tf.Record file.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
    num_shards: number of output file shards.
  """
    with contextlib2.ExitStack() as tf_record_close_stack, \
        tf.gfile.GFile(annotations_file, 'r') as fid:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_path, num_shards)
        groundtruth_data = json.load(fid)
        images = groundtruth_data['images']
        category_index = label_map_util.create_category_index(
            groundtruth_data['categories'])

        annotations_index = {}
        if 'annotations' in groundtruth_data:
            tf.logging.info(
                'Found groundtruth annotations. Building annotations index.')
            for annotation in groundtruth_data['annotations']:
                image_id = annotation['image_id']
                if image_id not in annotations_index:
                    annotations_index[image_id] = []
                annotations_index[image_id].append(annotation)
        missing_annotation_count = 0
        for image in images:
            image_id = image['id']
            if image_id not in annotations_index:
                missing_annotation_count += 1
                annotations_index[image_id] = []
        tf.logging.info('%d images are missing annotations.',
                        missing_annotation_count)

        total_num_annotations_skipped = 0
        for idx, image in enumerate(images):
            if idx % 100 == 0:
                tf.logging.info('On image %d of %d', idx, len(images))
            annotations_list = annotations_index[image['id']]
            _, tf_example, num_annotations_skipped, is_empty = create_tf_example(
                image, annotations_list, image_dir, category_index,
                include_masks)
            total_num_annotations_skipped += num_annotations_skipped
            shard_idx = idx % num_shards
            if not is_empty:
                output_tfrecords[shard_idx].write(
                    tf_example.SerializeToString())
        tf.logging.info('Finished writing, skipped %d annotations.',
                        total_num_annotations_skipped)
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    required_flags = [
        'input_box_annotations_csv', 'input_images_directory',
        'input_label_map', 'output_tf_record_path_prefix'
    ]
    for flag_name in required_flags:
        if not getattr(FLAGS, flag_name):
            raise ValueError('Flag --{} is required'.format(flag_name))

    label_map = label_map_util.get_label_map_dict(FLAGS.input_label_map)
    all_box_annotations = pd.read_csv(FLAGS.input_box_annotations_csv)
    if FLAGS.input_image_label_annotations_csv:
        all_label_annotations = pd.read_csv(
            FLAGS.input_image_label_annotations_csv)
        all_label_annotations.rename(
            columns={'Confidence': 'ConfidenceImageLabel'}, inplace=True)
    else:
        all_label_annotations = None
    all_images = tf.gfile.Glob(
        os.path.join(FLAGS.input_images_directory, '*.jpg'))
    all_image_ids = [
        os.path.splitext(os.path.basename(v))[0] for v in all_images
    ]
    all_image_ids = pd.DataFrame({'ImageID': all_image_ids})
    all_annotations = pd.concat(
        [all_box_annotations, all_image_ids, all_label_annotations])

    tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids))

    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, FLAGS.output_tf_record_path_prefix,
            FLAGS.num_shards)

        for counter, image_data in enumerate(
                all_annotations.groupby('ImageID')):
            tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...',
                                   1000, counter)

            image_id, image_annotations = image_data
            # In OID image file names are formed by appending ".jpg" to the image ID.
            image_path = os.path.join(FLAGS.input_images_directory,
                                      image_id + '.jpg')
            with tf.gfile.Open(image_path) as image_file:
                encoded_image = image_file.read()

            tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
                image_annotations, label_map, encoded_image)
            if tf_example:
                shard_idx = int(image_id, 16) % FLAGS.num_shards
                output_tfrecords[shard_idx].write(
                    tf_example.SerializeToString())
Ejemplo n.º 24
0
def _create_tf_record_from_rsna_set(parsed, pids, num_shards, record_name):
    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, os.path.join(FLAGS.output_dir, record_name),
            num_shards)
        for index, pid in enumerate(pids):
            data = parsed[pid]
            tf_example = create_tf_example(FLAGS.dicom_dir, pid, data['label'],
                                           data['boxes'])
            output_shard_index = index % num_shards
            output_tfrecords[output_shard_index].write(
                tf_example.SerializeToString())
Ejemplo n.º 25
0
def shard_tf(output_filebase, examples, num_shards=10):
    count = len(examples)
    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_filebase, num_shards)
        for index, example in enumerate(examples):
            print("Creating example {}/{}".format(index, count), end="\r")

            tf_example = create_tf_example(example)
            output_shard_index = index % num_shards
            output_tfrecords[output_shard_index].write(
                tf_example.SerializeToString())
Ejemplo n.º 26
0
def create_tf_record(output_filename, file_pars):
    # Your code here
     with contextlib2.ExitStack() as tf_record_close_stack:
       output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
          tf_record_close_stack, output_filename, 1)
       for data, label in file_pars:
          try:
             tf_example = dict_to_tf_example(data,label)
             if tf_example:
                output_tfrecords[0].write(tf_example.SerializeToString())
          except ValueError:
             logging.warning('Invalid example: %s, ignoring.', xml_path)
Ejemplo n.º 27
0
def create_tf_record(output_filename,
                     num_shards,
                     label_map_dict,
                     annotations_dir,
                     image_dir,
                     examples,
                     faces_only=True,
                     mask_type='png'):
  """Creates a TFRecord file from examples.

  Args:
    output_filename: Path to where output file is saved.
    num_shards: Number of shards for output file.
    label_map_dict: The label map dictionary.
    annotations_dir: Directory where annotation files are stored.
    image_dir: Directory where image files are stored.
    examples: Examples to parse and save to tf record.
    faces_only: If True, generates bounding boxes for pet faces.  Otherwise
      generates bounding boxes (as well as segmentations for full pet bodies).
    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
      smaller file sizes.
  """
  with contextlib2.ExitStack() as tf_record_close_stack:
    output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
        tf_record_close_stack, output_filename, num_shards)
    for idx, example in enumerate(examples):
      if idx % 100 == 0:
        logging.info('On image %d of %d', idx, len(examples))
      xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml')
      mask_path = os.path.join(annotations_dir, 'trimaps', example + '.png')

      if not os.path.exists(xml_path):
        logging.warning('Could not find %s, ignoring example.', xml_path)
        continue
      with tf.gfile.GFile(xml_path, 'r') as fid:
        xml_str = fid.read()
      xml = etree.fromstring(xml_str)
      data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

      try:
        tf_example = dict_to_tf_example(
            data,
            mask_path,
            label_map_dict,
            image_dir,
            faces_only=faces_only,
            mask_type=mask_type)
        if tf_example:
          shard_idx = idx % num_shards
          output_tfrecords[shard_idx].write(tf_example.SerializeToString())
      except ValueError:
        logging.warning('Invalid example: %s, ignoring.', xml_path)
def create_tf_record(output_filename,
                     num_shards,
                     label_map_dict,
                     annotations_dir,
                     image_dir,
                     examples,
                     mask_type='png'):
    """Creates a TFRecord file from examples.

  Args:
    output_filename: Path to where output file is saved.
    num_shards: Number of shards for output file.
    label_map_dict: The label map dictionary.
    annotations_dir: Directory where annotation files are stored.
    image_dir: Directory where image files are stored.
    examples: Examples to parse and save to tf record.
    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
      smaller file sizes.
  """
    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_filename, num_shards)
        for idx, f_example in enumerate(examples):
            if idx % 100 == 0:
                logging.info('On image %d of %d', idx, len(examples))
            example = os.path.splitext(f_example)[0]
            xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml')
            mask_path = os.path.join(annotations_dir, 'trimaps',
                                     example + '.png')

            if not os.path.exists(xml_path):
                logging.warning('Could not find %s, ignoring example.',
                                xml_path)
                continue
            with tf.gfile.GFile(xml_path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            print('xml_path:', xml_path)
            try:
                tf_example = dict_to_tf_example(data,
                                                mask_path,
                                                label_map_dict,
                                                image_dir,
                                                mask_type=mask_type)
                if tf_example:
                    shard_idx = idx % num_shards
                    output_tfrecords[shard_idx].write(
                        tf_example.SerializeToString())
            except ValueError:
                logging.warning('Invalid example: %s, ignoring.', xml_path)
Ejemplo n.º 29
0
  def test_sharded_tfrecord_writes(self):
    with contextlib2.ExitStack() as tf_record_close_stack:
      output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
          tf_record_close_stack,
          os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), 10)
      for idx in range(10):
        output_tfrecords[idx].write('test_{}'.format(idx))

    for idx in range(10):
      tf_record_path = '{}-{:05d}-of-00010'.format(
          os.path.join(tf.test.get_temp_dir(), 'test.tfrec'), idx)
      records = list(tf.python_io.tf_record_iterator(tf_record_path))
      self.assertAllEqual(records, ['test_{}'.format(idx)])
Ejemplo n.º 30
0
def _create_tf_record_from_coco_annotations(
    annotations_file, image_dir, output_path, include_masks, num_shards):
  """Loads COCO annotation json files and converts to tf.Record format.

  Args:
    annotations_file: JSON file containing bounding box annotations.
    image_dir: Directory containing the image files.
    output_path: Path to output tf.Record file.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
    num_shards: number of output file shards.
  """
  with contextlib2.ExitStack() as tf_record_close_stack, \
      tf.gfile.GFile(annotations_file, 'r') as fid:
    output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
        tf_record_close_stack, output_path, num_shards)
    groundtruth_data = json.load(fid)
    images = groundtruth_data['images']
    category_index = label_map_util.create_category_index(
        groundtruth_data['categories'])

    annotations_index = {}
    if 'annotations' in groundtruth_data:
      tf.logging.info(
          'Found groundtruth annotations. Building annotations index.')
      for annotation in groundtruth_data['annotations']:
        image_id = annotation['image_id']
        if image_id not in annotations_index:
          annotations_index[image_id] = []
        annotations_index[image_id].append(annotation)
    missing_annotation_count = 0
    for image in images:
      image_id = image['id']
      if image_id not in annotations_index:
        missing_annotation_count += 1
        annotations_index[image_id] = []
    tf.logging.info('%d images are missing annotations.',
                    missing_annotation_count)

    total_num_annotations_skipped = 0
    for idx, image in enumerate(images):
      if idx % 100 == 0:
        tf.logging.info('On image %d of %d', idx, len(images))
      annotations_list = annotations_index[image['id']]
      _, tf_example, num_annotations_skipped = create_tf_example(
          image, annotations_list, image_dir, category_index, include_masks)
      total_num_annotations_skipped += num_annotations_skipped
      shard_idx = idx % num_shards
      output_tfrecords[shard_idx].write(tf_example.SerializeToString())
    tf.logging.info('Finished writing, skipped %d annotations.',
                    total_num_annotations_skipped)
Ejemplo n.º 31
0
def gen_tfrecord(panda_df, output_path, num_shards = 10):
	"""Creates a TFRecord of the current dataframe into the output file"""
	with contextlib2.ExitStack() as tf_record_close_stack:
		writer = tf_record_creation_util.open_sharded_output_tfrecords(
			tf_record_close_stack, output_path, num_shards)
		grouped = split(panda_df, 'filename')
		for idx, group in enumerate(grouped):
			if idx % 100 == 0:
				print("On image " + str(idx) + " of " + str(len(grouped)))
			tf_example = create_tf_example(group, "./images/raw")
			shard_idx = idx % num_shards
			writer[shard_idx].write(tf_example.SerializeToString())
		
		print("Successfully creates the TFRecords: {}".format(output_path))
Ejemplo n.º 32
0
def create_tf_record(output_filename,
                     num_shards,
                     label_map_dict,
                     annotations_dir,
                     image_dir,
                     examples,
                     use_alt_names):
    """Creates a TFRecord file from examples.

    Args:
        output_filename: Path to where output file is saved.
        num_shards: Number of shards for output file.
        label_map_dict: The label map dictionary.
        annotations_dir: Directory where annotation files are stored.
        image_dir: Directory where image files are stored.
        examples: Examples to parse and save to tf record.
        use_alt_names: use alternative class name mapping.
    """

    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_filename, num_shards)

        for idx, example in enumerate(examples):
            if idx % 10 == 0:
                logger.info('On image %d of %d', idx, len(examples))

            xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml')

            if not os.path.exists(xml_path):
                logger.warning('Could not find %s, ignoring example.', xml_path)
                continue

            with tf.io.gfile.GFile(xml_path, 'r') as fid:
                xml_str = fid.read()
                xml = etree.fromstring(xml_str)
                data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
                try:
                    tf_example = dict_to_tf_example(
                        data=data,
                        label_map_dict=label_map_dict,
                        image_subdirectory=image_dir,
                        use_alt_names=use_alt_names)

                    if tf_example:
                        shard_idx = idx % num_shards

                    output_tfrecords[shard_idx].write(tf_example.SerializeToString())
                except ValueError:
                    logger.warning('Invalid example: %s, ignoring.', xml_path)
Ejemplo n.º 33
0
def main(_):
  tf.logging.set_verbosity(tf.logging.INFO)

  required_flags = [
      'input_box_annotations_csv', 'input_images_directory', 'input_label_map',
      'output_tf_record_path_prefix'
  ]
  for flag_name in required_flags:
    if not getattr(FLAGS, flag_name):
      raise ValueError('Flag --{} is required'.format(flag_name))

  label_map = label_map_util.get_label_map_dict(FLAGS.input_label_map)
  all_box_annotations = pd.read_csv(FLAGS.input_box_annotations_csv)
  if FLAGS.input_image_label_annotations_csv:
    all_label_annotations = pd.read_csv(FLAGS.input_image_label_annotations_csv)
    all_label_annotations.rename(
        columns={'Confidence': 'ConfidenceImageLabel'}, inplace=True)
  else:
    all_label_annotations = None
  all_images = tf.gfile.Glob(
      os.path.join(FLAGS.input_images_directory, '*.jpg'))
  all_image_ids = [os.path.splitext(os.path.basename(v))[0] for v in all_images]
  all_image_ids = pd.DataFrame({'ImageID': all_image_ids})
  all_annotations = pd.concat(
      [all_box_annotations, all_image_ids, all_label_annotations])

  tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids))

  with contextlib2.ExitStack() as tf_record_close_stack:
    output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
        tf_record_close_stack, FLAGS.output_tf_record_path_prefix,
        FLAGS.num_shards)

    for counter, image_data in enumerate(all_annotations.groupby('ImageID')):
      tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000,
                             counter)

      image_id, image_annotations = image_data
      # In OID image file names are formed by appending ".jpg" to the image ID.
      image_path = os.path.join(FLAGS.input_images_directory, image_id + '.jpg')
      with tf.gfile.Open(image_path) as image_file:
        encoded_image = image_file.read()

      tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
          image_annotations, label_map, encoded_image)
      if tf_example:
        shard_idx = int(image_id, 16) % FLAGS.num_shards
        output_tfrecords[shard_idx].write(tf_example.SerializeToString())
    def create_tf_record(output_filename, num_shards, examples):
        with contextlib2.ExitStack() as tf_record_close_stack:
            output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
                tf_record_close_stack,
                output_filename,
                num_shards)
            for idx, example in enumerate(examples):
                img_path = os.path.join(read_bucket, example)
                if not os.path.isfile(img_path):
                    continue                
                with tf.gfile.GFile(img_path, 'rb') as fid:
                    encoded_jpg = fid.read()
                encoded_jpg_io = io.BytesIO(encoded_jpg)
                image = PIL.Image.open(encoded_jpg_io)
                if image.format != 'JPEG':
                    raise ValueError('Image format not JPEG')
                key = hashlib.sha256(encoded_jpg).hexdigest()

                width, height = image.size

                xmins = []
                xmaxs = []
                ymins = []
                ymaxs = []
                classes_text = [] # 'coke', 'pepsi', 'coke'...
                classes = [] # 1, 2, 1...
                difficult_obj = []
                truncated = []
                poses = []

                for annotation in annotations[example]:
                    xmins.append(annotation['x'])
                    xmaxs.append(annotation['x2'])
                    ymins.append(annotation['y'])
                    ymaxs.append(annotation['y2'])
                    classes_text.append(annotation['label'].encode('utf8'))
                    classes.append(1) # temporary, I need to assign labels to actual ids
                    difficult_obj.append(0)
                    truncated.append(0)
                    poses.append(''.encode('utf8'))

                try:
                    feature_dict = {
                        'image/height': dataset_util.int64_feature(height),
                        'image/width': dataset_util.int64_feature(width),
                        'image/filename': dataset_util.bytes_feature(example.encode('utf8')),
                        'image/source_id': dataset_util.bytes_feature(example.encode('utf8')),
                        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
                        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
                        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
                        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
                        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
                        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
                        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
                        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
                        'image/object/class/label': dataset_util.int64_list_feature(classes),
                        'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
                        'image/object/truncated': dataset_util.int64_list_feature(truncated),
                        'image/object/view': dataset_util.bytes_list_feature(poses)
                    }
                    tf_example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
                    if tf_example:
                        shard_idx = idx % num_shards
                        output_tfrecords[shard_idx].write(tf_example.SerializeToString())
                except ValueError:
                    print('Invalid example, ignoring.')