def main(_):

    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    image_dir = FLAGS.image_dir
    annotations_dir = FLAGS.annotations_dir
    logging.info('Reading from dataset: ' + annotations_dir)
    examples_list = os.listdir(annotations_dir)

    for idx, example in enumerate(examples_list):
        if example.endswith('.xml'):
            if idx % 50 == 0:
                print('On image %d of %d' % (idx, len(examples_list)))

            path = os.path.join(annotations_dir, example)
            with tf.gfile.GFile(path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            tf_example = dict_to_tf_example(data, image_dir, label_map_dict)
            writer.write(tf_example.SerializeToString())

    writer.close()
Exemple #2
0
def main(_):
    if FLAGS.set not in SETS:
        raise ValueError('Set arg must be in {}'.format(SETS))
    set_name = FLAGS.set

    data_dir = FLAGS.data_dir
    # Validate data directory
    if not data_dir:
        raise ValueError('Did not specify required arg "data_dir"')
    elif os.path.isfile(data_dir) == False:
        raise ValueError(
            'data_dir arg does not appear to be real directory, please check it.'
        )

    output_path_root = FLAGS.output_path
    # Infer the full output path unless it looks like a full path
    if output_path_root.startswith('/') == False:
        # Then it's a relative path like we were aiming for
        output_path_root = os.path.join(data_dir, output_path_root)
    if os.path.isfile(output_path_root) == False:
        raise ValueError(
            'Could not validate that target output path exists, please check arg.'
        )
    tf_filename = set_name + '.tfrecord'
    output_path = os.path.join(output_path_root, tf_filename)
    writer = tf.python_io.TFRecordWriter(output_path)

    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    samples_path_root = os.path.join(data_dir, 'ImageSets', 'Main')
    if os.path.isfile(samples_path_root) == False:
        raise ValueError(
            'Expected ImagePath/Main to be in samples path, did not find.')
    # Get the names of .txt files in there and infer the one we need
    for rootpath, dirnames, filenames in os.walk(samples_path_root):
        for filename in filenames:
            if filename.endswith('.txt'):
                # In worst case we'll try whatever ends in TXT there
                samples_path = filename
                # But preferably the filename has the set name in it
                if filename.contains(set_name):
                    break

    annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir)
    # Should not need to validate samples_list since we derived it from existing TXT's
    samples_list = dataset_util.read_samples_list(samples_path)
    for idx, sample in enumerate(samples_list):
        if idx % 100 == 0:
            logging.info('On image %d of %d', idx, len(samples_list))
        path = os.path.join(annotations_dir, sample + '.xml')
        with tf.gfile.GFile(path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

        tf_record = dict_to_tf_record(data, FLAGS.data_dir, label_map_dict,
                                      FLAGS.ignore_difficult_instances)
        writer.write(tf_record.SerializeToString())

    writer.close()
def main(_):
    if FLAGS.set not in SETS:
        raise ValueError('set must be in : {}'.format(SETS))

    data_dir = FLAGS.data_dir

    os.makedirs(os.path.dirname(FLAGS.output_path), exist_ok=True)

    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    examples_path = os.path.join(data_dir, FLAGS.set + '.txt')
    annotations_dir = os.path.join(
        os.path.split(data_dir)[0], FLAGS.annotations_dir)
    examples_list = dataset_util.read_examples_list(examples_path)
    for idx, example in tqdm(
            enumerate(examples_list),
            desc="Parsing annotations from {0} set into TF-Example".format(
                FLAGS.set),
            total=len(examples_list)):
        path = os.path.join(annotations_dir, example + '.xml')
        with tf.gfile.GFile(path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

        tf_example = dict_to_tf_example(data, FLAGS.data_dir, FLAGS.set,
                                        label_map_dict,
                                        FLAGS.ignore_difficult_instances)
        writer.write(tf_example.SerializeToString())

    writer.close()
Exemple #4
0
def main(_):
    data_dir = FLAGS.data_dir

    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    annotaions_path = os.path.join(data_dir, "annotations")
    idx = 0
    tf_idx = 0
    file_list = os.listdir(annotaions_path)
    print(annotaions_path)
    # file_list = shuffle(file_list)
    # for fname in l:
    while idx < len(file_list):
        tf_filename = get_output_filename(FLAGS.output_path, tf_idx)
        writer = tf.python_io.TFRecordWriter(tf_filename)
        j = 0
        while idx < len(file_list) and j < int(FLAGS.samples_per_file):
            fname = file_list[idx]
            path = os.path.join(annotaions_path, fname[0:-4] + ".xml")
            with tf.gfile.GFile(path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            tf_example = dict_to_tf_example(data, data_dir, label_map_dict,
                                            FLAGS.ignore_difficult_instances)
            writer.write(tf_example.SerializeToString())
            idx += 1
            j += 1
        tf_idx += 1
        writer.close()
Exemple #5
0
def main(_):
    data_dir = FLAGS.data_dir
    output_dir = FLAGS.output_dir
    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    images_dir = os.path.join(data_dir, 'images')
    if not os.path.exists(images_dir):
        raise RuntimeError('`data_dir`目录下面需要有`images`图片目录')

    annotations_dir = os.path.join(data_dir, 'annotations')
    if not os.path.exists(annotations_dir):
        raise RuntimeError('`data_dir`目录下面需要有`annotations`标注xml目录')

    for s in SETS:

        writer = tf.python_io.TFRecordWriter(
            os.path.join(output_dir, '{}.record'.format(s)))

        examples_path = os.path.join(data_dir, 'set', '{}.txt'.format(s))
        examples_list = dataset_util.read_examples_list(examples_path)

        for idx, example in enumerate(examples_list):
            path = os.path.join(annotations_dir, example + '.xml')
            with tf.gfile.GFile(path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            tf_example = dict_to_tf_example(data, images_dir, label_map_dict)
            writer.write(tf_example.SerializeToString())
        print('[INFO] `{}.record` 保存在 `{}` 成功'.format(s, output_dir))

        writer.close()
def generate_tfrecord(data_dir, tf_recod_path):
    data_dir = os.path.abspath(data_dir)
    output_path = os.path.abspath(tf_recod_path)
    ignore_difficult_instances = False
    writer = tf.python_io.TFRecordWriter(output_path)
    label_map_dict = {'0': 0, '1': 1}
    from glob import glob
    xml_files = glob(os.path.join(data_dir, '*.xml'))
    img_files=glob(os.path.join(data_dir,'*.jpg'))+ \
              glob(os.path.join(data_dir, '*.png'))
    get_bld_filename = lambda path: os.path.splitext(os.path.basename(path))[0]
    get_bld_set = lambda filenames: set(map(get_bld_filename, filenames))
    xml_4_check = get_bld_set(xml_files)
    img_4_check = get_bld_set(img_files)
    diff = xml_4_check.symmetric_difference(img_4_check)
    if len(diff) != 0:
        for item in diff:
            print(item, 'xml or img file lost')
        raise ('please check the data over and over again')

    for idx, (xml_path, img_path) in enumerate(zip(xml_files, img_files)):
        if idx % 100 == 0:
            print('On image {} of {}'.format(idx, len(xml_files)))

        with tf.gfile.GFile(xml_path, 'r') as fid:
            xml_str = fid.read().encode('utf8')
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
        data['folder'] = data_dir
        data['filename'] = img_path
        tf_example = dict_to_tf_example(data, data_dir, label_map_dict,
                                        ignore_difficult_instances)
        writer.write(tf_example.SerializeToString())

    writer.close()
def create_tf_record(output_filename,
                     num_shards,
                     label_map_dict,
                     annotations_dir,
                     image_dir,
                     examples):
    with contextlib2.ExitStack() as tf_record_close_stack:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_filename, num_shards)
        for idx, example in enumerate(examples):
            if idx % 100 == 0:
                logging.info('On image %d of %d', idx, len(examples))
            xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml')


            if not os.path.exists(xml_path):
                logging.warning('Could not find %s, ignoring example.', xml_path)
                continue
            with tf.gfile.GFile(xml_path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            try:
                tf_example = dict_to_tf_example(
                    data,
                    label_map_dict,
                    image_dir)
                if tf_example:
                    shard_idx = idx % num_shards
                    output_tfrecords[shard_idx].write(tf_example.SerializeToString())
            except ValueError:
                logging.warning('Invalid example: %s, ignoring.', xml_path)
Exemple #8
0
def main(_):
    data_dir = FLAGS.data_dir

    output_dir = os.path.dirname(FLAGS.output_path)
    if os.path.exists(output_dir) is False:
        os.mkdir(output_dir)
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    logging.info('Reading dataset.')
    examples_path = os.path.join(data_dir, 'example', 'cooking_example.txt')
    annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir)
    examples_list = dataset_util.read_examples_list(examples_path)
    for idx, example in enumerate(examples_list):
        print('example', example)
        if idx % 100 == 0:
            logging.info('On image %d of %d', idx, len(examples_list))
        path = os.path.join(annotations_dir, example + '.xml')
        with tf.gfile.GFile(path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

        tf_example = dict_to_tf_example(
            data,
            FLAGS.data_dir,
            label_map_dict,
            example,
            FLAGS.ignore_difficult_instances,
        )
        writer.write(tf_example.SerializeToString())

    writer.close()
Exemple #9
0
def main(_):
    images_dir = FLAGS.images_dir
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
    os.chdir(images_dir)
    file_types = ('*.jpg', '*.jpeg')
    image_files = []
    for file_type in file_types:
        image_files.extend(glob.glob(file_type))
    annotations_dir = os.path.join(images_dir, FLAGS.annotations_dir)
    for idx, image_file in enumerate(image_files):
        print(idx, image_file)
        annotation_path = os.path.join(
            annotations_dir,
            os.path.splitext(image_file)[0] + '.xml')
        if not os.path.exists(annotation_path):
            print('Could not find %s, ignoring example.', annotation_path)
            continue
        with tf.gfile.GFile(annotation_path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
        tf_example = dict_to_tf_example(data, annotations_dir,
                                        FLAGS.images_dir, label_map_dict,
                                        FLAGS.include_masks,
                                        FLAGS.ignore_difficult_instances)
        writer.write(tf_example.SerializeToString())
    writer.close()
Exemple #10
0
def fun(path, imgpath):
    img = cv.imread(imgpath)
    line = open(path).read()
    xml = etree.fromstring(line)
    data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
    image_list = []
    box_list = []
    for obj in data['object']:
        box_list.append([
            int(obj['bndbox']['ymin']),
            int(obj['bndbox']['ymax']),
            int(obj['bndbox']['xmin']),
            int(obj['bndbox']['xmax'])
        ])
    mark = False
    for i, box in enumerate(boxe43):
        _path = os.path.basename(imgpath).split(
            '.jpg')[0] + "%02d" % i + '.jpg'
        if (listIou(box_list, box)):
            continue
            _path = os.path.join(DIR_yc, _path)
            cv.imwrite(_path, img[box[0]:box[1], box[2]:box[3]])
        else:
            if random.randint(0, 100) < 60: continue
            _path = os.path.join(DIR_zc, _path)
            cv.imwrite(_path, img[box[0]:box[1], box[2]:box[3]])
Exemple #11
0
def create_train_records_from_xml_files(label_dirs,
                                        xml_filenames_list,
                                        output_dir,
                                        shard_num=4):
    etrees = [
        read_etree_from_path(os.path.join(label_dir, filename))
        for label_dir, filenames in zip(label_dirs, xml_filenames_list)
        for filename in filenames
    ]
    datas = [
        dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
        for xml in etrees
    ]
    datas = [data for data in datas if data.get('object', None)]

    trainset_dir = output_dir
    evalset_dir = _join_path(output_dir, 'eval')
    print("Detected {} valid xml files".format(len(datas)))
    shard_size = len(datas) // shard_num
    processes = []
    for shard_index in range(shard_num):
        start_i = shard_index * shard_size
        end_i = (shard_index +
                 1) * shard_size if shard_index < shard_num - 1 else len(datas)
        process = PipelineProcess(datas[start_i:end_i],
                                  trainset_dir=trainset_dir,
                                  evalset_dir=evalset_dir)
        processes.append(process)
        process.start()

    for process in processes:
        process.join()
        print('Pipeline #{} joined.'.format(process.pid))

    print("done.")
Exemple #12
0
def main(_):
    if FLAGS.set not in SETS:
        raise ValueError('set must be in : {}'.format(SETS))
    data_dir = FLAGS.data_dir

    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
    print(FLAGS.label_map_path)

    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    #for year in years:
    logging.info('Reading from QUIZ dataset.')
    examples_path = os.path.join(data_dir, 'split',
                                 'computer_' + FLAGS.set + '.txt')
    print(examples_path)
    annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir)
    examples_list = dataset_util.read_examples_list(examples_path)
    for idx, example in enumerate(examples_list):
        if idx % 100 == 0:
            logging.info('On image %d of %d', idx, len(examples_list))
        path = os.path.join(annotations_dir, example + '.xml')
        with tf.gfile.GFile(path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

        tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
                                        FLAGS.ignore_difficult_instances)
        writer.write(tf_example.SerializeToString())

    writer.close()
def main(_):
    # if FLAGS.set not in SETS:
    #   raise ValueError('set must be in : {}'.format(SETS))
    # if FLAGS.year not in YEARS:
    #   raise ValueError('year must be in : {}'.format(YEARS))

    data_dir = FLAGS.data_dir
    # years = ['VOC2007', 'VOC2012']
    # if FLAGS.year != 'merged':
    #   years = [FLAGS.year]

    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    # for year in years:
    # logging.info('Reading from PASCAL %s dataset.', year)

    # examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
    #                              'aeroplane_' + FLAGS.set + '.txt')

    annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir)
    # examples_list = dataset_util.read_examples_list(examples_path)

    # Dataset filenames, and shuffling.
    path = os.path.join(annotations_dir)
    examples_list = sorted(os.listdir(path))
    if (True):
        random.seed(42)
        random.shuffle(examples_list)



    dataset_len = len(examples_list)
    idx = 0
    # for idx, example in enumerate(examples_list):
    type = 'test'
    tf_filename = _get_output_filename(FLAGS.output_path, name='', idx=0, type=type)
    writer = tf.python_io.TFRecordWriter(tf_filename)
    write_train_flag = False
    while idx < dataset_len:
        if idx % 100 == 0:
            logging.info('On image %d of %d', idx, len(examples_list))
        if((idx / dataset_len)*100 > FLAGS.test_percentage and not (write_train_flag)):
            type = 'train'
            tf_filename = _get_output_filename(FLAGS.output_path, name='', idx=0, type=type)
            writer = tf.python_io.TFRecordWriter(tf_filename)
            write_train_flag = True

        example = examples_list[idx]
        path = os.path.join(annotations_dir, example)
        with tf.gfile.GFile(path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

        tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
                                      FLAGS.ignore_difficult_instances)
        writer.write(tf_example.SerializeToString())
        idx += 1

    writer.close()
def create_tf_record(output_filename,
                     label_map_dict,
                     annotations_dir,
                     image_dir,
                     examples):
  """Creates a TFRecord file from examples.

  Args:
    output_filename: Path to where output file is saved.
    label_map_dict: The label map dictionary.
    annotations_dir: Directory where annotation files are stored.
    image_dir: Directory where image files are stored.
    examples: Examples to parse and save to tf record.
  """
  writer = tf.python_io.TFRecordWriter(output_filename)
  for idx, example in enumerate(examples):
    if idx % 100 == 0:
      logging.info('On image %d of %d', idx, len(examples))
    path = os.path.join(annotations_dir, 'xmls', example + '.xml')

    if not os.path.exists(path):
      logging.warning('Could not find %s, ignoring example.', path)
      continue
    with tf.gfile.GFile(path, 'r') as fid:
      xml_str = fid.read()
    xml = etree.fromstring(xml_str)
    data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

    tf_example = dict_to_tf_example(data, label_map_dict, image_dir)
    writer.write(tf_example.SerializeToString())

  writer.close()
def create_tf_record(output_filename,
                     label_map_dict,
                     annotation_dir,
                     image_dir,
                     examples,
                     mask_type='png'):
    writer = tf.python_io.TFRecordWriter(output_filename)
    for idx, example in enumerate(examples):
        if idx % 100 == 0:
            logging.info('On image %d of %d', idx, len(examples))
        xml_path = annotation_dir + 'xmls/' + example + '.xml'
        mask_dir = annotation_dir + 'masks/'

        if not os.path.exists(xml_path):
            logging.warning('Could not find %s, ignoring example.', xml_path)
            continue
        with tf.gfile.GFile(xml_path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

        try:
            tf_example = dict_to_tf_example(data,
                                            mask_dir,
                                            label_map_dict,
                                            image_dir,
                                            mask_type='png')
            writer.write(tf_example.SerializeToString())
        except ValueError:
            logging.warning('Invalid example: %s, ignoring.', xml_path)

        writer.close()
Exemple #16
0
def create_tf_record(
    output_filename,
    #num_shards,
    label_map_dict,
    annotations_dir,
    image_dir,
    examples,
    #faces_only=True,
    #mask_type='png'
):
    """Creates a TFRecord file from examples.

  Args:
    output_filename: Path to where output file is saved.
    num_shards: Number of shards for output file.
    label_map_dict: The label map dictionary.
    annotations_dir: Directory where annotation files are stored.
    image_dir: Directory where image files are stored.
    examples: Examples to parse and save to tf record.
    faces_only: If True, generates bounding boxes for pet faces.  Otherwise
      generates bounding boxes (as well as segmentations for full pet bodies).
    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
      smaller file sizes.
  """
    with contextlib2.ExitStack() as tf_record_close_stack:
        writer = tf.python_io.TFRecordWriter(output_filename)
        #output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
        #    tf_record_close_stack, output_filename) #num_shards)

        for idx, example in enumerate(examples):
            if idx % 100 == 0:
                logging.info('On image %d of %d', idx, len(examples))
            xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml')
            #mask_path = os.path.join(annotations_dir, 'trimaps', example + '.png')

            if not os.path.exists(xml_path):
                logging.warning('Could not find %s, ignoring example.',
                                xml_path)
                continue
            with tf.gfile.GFile(xml_path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            try:
                tf_example = dict_to_tf_example(
                    data,
                    #mask_path,
                    label_map_dict,
                    image_dir,
                    #faces_only=faces_only,
                    #mask_type=mask_type
                )
                writer.write(tf_example.SerializeToString())
                #if tf_example:
                #shard_idx = idx % num_shards
                # output_tfrecords[shard_idx].write(tf_example.SerializeToString())
            except ValueError:
                logging.warning('Invalid example: %s, ignoring.', xml_path)
        writer.close()
def main(_):
  if FLAGS.set not in SETS:
    raise ValueError('set must be in : {}'.format(SETS))
  if FLAGS.year not in YEARS:
    raise ValueError('year must be in : {}'.format(YEARS))

  data_dir = FLAGS.data_dir
  years = ['VOC2007', 'VOC2012']
  if FLAGS.year != 'merged':
    years = [FLAGS.year]

  writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

  label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

  for year in years:
    logging.info('Reading from PASCAL %s dataset.', year)
    examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
                                 'aeroplane_' + FLAGS.set + '.txt')
    annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
    examples_list = dataset_util.read_examples_list(examples_path)
    for idx, example in enumerate(examples_list):
      if idx % 100 == 0:
        logging.info('On image %d of %d', idx, len(examples_list))
      path = os.path.join(annotations_dir, example + '.xml')
      with tf.gfile.GFile(path, 'r') as fid:
        xml_str = fid.read()
      xml = etree.fromstring(xml_str)
      data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

      tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
                                      FLAGS.ignore_difficult_instances)
      writer.write(tf_example.SerializeToString())

  writer.close()
def create_tf_record(output_filename, label_map_dict, annotations_dir,
                     image_dir, examples):
    """Creates a TFRecord file from examples.

  Args:
    output_filename: Path to where output file is saved.
    label_map_dict: The label map dictionary.
    annotations_dir: Directory where annotation files are stored.
    image_dir: Directory where image files are stored.
    examples: Examples to parse and save to tf record.
    faces_only: If True, generates bounding boxes for pet faces.  Otherwise
      generates bounding boxes (as well as segmentations for full pet bodies).
  """
    writer = tf.python_io.TFRecordWriter(output_filename)
    for idx, example in enumerate(examples):
        if idx % 100 == 0:
            logging.info('On image %d of %d', idx, len(examples))
        xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml')

        if not os.path.exists(xml_path):
            logging.warning('Could not find %s, ignoring example.', xml_path)
            continue
        with tf.gfile.GFile(xml_path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

        try:
            tf_example = dict_to_tf_example(data, label_map_dict, image_dir)
            writer.write(tf_example.SerializeToString())
        except ValueError:
            logging.warning('Invalid example: %s, ignoring.', xml_path)

    writer.close()
def main(_):
    if FLAGS.set not in SETS:
        raise ValueError('set must be in : {}'.format(SETS))

    writer = tf_io.TFRecordWriter(FLAGS.output_path)

    logging.info('Reading from all_drones %s dataset!')
    files_list_file = os.path.join(FLAGS.data_dir, FLAGS.set + '.txt')

    examples_list = dataset_util.read_examples_list(files_list_file)
    for idx, example in enumerate(examples_list):
        if idx % 100 == 0:
            logging.info('On image %d of %d', idx, len(examples_list))
            logging.info('Save the tfrecord file to %s!' % FLAGS.output_path)
        path = os.path.join(FLAGS.annotations_dir, example + '.xml')
        with tf_gfile.GFile(path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

        tf_example = dict_to_tf_example(data, FLAGS.raw_data_dir, FLAGS.label_map_path,
                                        FLAGS.ignore_difficult_instances,\
                                        is_debug=FLAGS.is_debug)
        writer.write(tf_example.SerializeToString())

    writer.close()
def main(_):
    if FLAGS.set not in SETS:
        raise ValueError('set must be in : {}'.format(SETS))

    data_dir = FLAGS.data_dir
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    logging.info('Reading from CVHCI dataset.')
    # examples_path = os.path.join(data_dir, 'ImageSets', 'aeroplane_' + FLAGS.set + '.txt') <-- koennte wichtig sein!!!
    examples_path = os.path.join(data_dir, 'ImageSets', FLAGS.set + '.txt')
    annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir)
    examples_list = dataset_util.read_examples_list(examples_path)

    for idx, example in enumerate(examples_list):
        if idx % 100 == 0:
            logging.info('On image %d of %d', idx, len(examples_list))

        path = os.path.join(annotations_dir, example + '.xml')

        with tf.gfile.GFile(path, 'r') as fid:
            xml_str = fid.read()

        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

        tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
                                        FLAGS.image_dir, path)
        writer.write(tf_example.SerializeToString())

    writer.close()
Exemple #21
0
def main(_):
  if FLAGS.set not in SETS:
    raise ValueError('set must be in : {}'.format(SETS))
  if FLAGS.year not in YEARS:
    raise ValueError('year must be in : {}'.format(YEARS))

  data_dir = FLAGS.data_dir
  years = ['VOC2007', 'VOC2012']
  if FLAGS.year != 'merged':
    years = [FLAGS.year]

  writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

  label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

  for year in years:
    logging.info('Reading from PASCAL %s dataset.', year)
    examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
                                 'aeroplane_' + FLAGS.set + '.txt')
    annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
    examples_list = dataset_util.read_examples_list(examples_path)
    for idx, example in enumerate(examples_list):
      if idx % 100 == 0:
        logging.info('On image %d of %d', idx, len(examples_list))
      path = os.path.join(annotations_dir, example + '.xml')
      with tf.gfile.GFile(path, 'r') as fid:
        xml_str = fid.read()
      xml = etree.fromstring(xml_str)
      data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

      tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
                                      FLAGS.ignore_difficult_instances)
      writer.write(tf_example.SerializeToString())

  writer.close()
def main(_):
    if FLAGS.set not in SETS:
        raise ValueError('set must be in : {}'.format(SETS))
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
    data_dir = FLAGS.data_dir
    label_map_dict = label_map_util.get_label_map_dict(
        '/home/zju/models/research/object_detection/Det_datasets/pascal_label_map.pbtxt'
    )
    txt_path = os.path.join('Det_datasets', 'train_list' + '.txt')
    examples_list = read_examples_list(txt_path)
    for idx, example in enumerate(examples_list):
        path = os.path.join('Det_datasets', example)
        with tf.gfile.GFile(path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)

        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
        print(data)
        tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
                                        FLAGS.ignore_difficult_instances)
        writer.write(tf_example.SerializeToString())

    writer.close()
    print('************************')
    print('FINISHED')
    print('************************')
def main(_):
    data_dir = FLAGS.data_dir

    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    logging.info('Reading from UMDfaces dataset.')
    examples_path = os.path.join(data_dir, 'ImageSets', FLAGS.set + '.txt')
    annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir)
    examples_list = dataset_util.read_examples_list(examples_path)
    for idx, example in enumerate(examples_list):
        if idx % 100 == 0:
            logging.info('On image %d of %d', idx, len(examples_list))
        if idx % 1000 == 0:
            print("Working on #{}/{} image".format(idx, len(examples_list)))
        path = os.path.join(annotations_dir, example + '.xml')
        with tf.gfile.GFile(path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

        tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
                                        FLAGS.ignore_difficult_instances)
        writer.write(tf_example.SerializeToString())

    writer.close()
Exemple #24
0
def main(_):
    data_dir = FLAGS.data_dir

    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    logging.info('Reading from {} dataset at {}...'.format(
        FLAGS.set, data_dir))
    annotations_dir = os.path.join(data_dir, FLAGS.set, 'annotations')
    idx = 0
    # for idx, example in enumerate(examples_list):
    for xml_file in os.listdir(annotations_dir):
        if idx % 10 == 0:
            logging.info('On image %d', idx)
        xml_path = os.path.join(annotations_dir, xml_file)
        with tf.gfile.GFile(xml_path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

        tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
                                        FLAGS.ignore_difficult_instances)
        writer.write(tf_example.SerializeToString())
        idx += 1

    writer.close()
Exemple #25
0
def main(_):
  if FLAGS.set not in SETS:
    raise ValueError('set must be in : {}'.format(SETS))

  data_dir = FLAGS.data_dir
  writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
  label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
  annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir)

  examples_list = os.listdir(annotations_dir) 
  for el in examples_list:
    if el[-3:] !='xml':
      del examples_list[examples_list.index(el)]
  for el in examples_list:  
    examples_list[examples_list.index(el)] = el[0:-4]

  for idx, example in enumerate(examples_list):
    if idx % 100 == 0:
      logging.info('On image %d of %d', idx, len(examples_list))
    path = os.path.join(annotations_dir, example + '.xml')
    with tf.gfile.GFile(path, 'r') as fid:
      xml_str = fid.read()
    xml = etree.fromstring(xml_str)
    data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

    tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
                                      FLAGS.ignore_difficult_instances)
    writer.write(tf_example.SerializeToString())

  writer.close()
Exemple #26
0
def main(_):

    # data directory
    data_dir = FLAGS.data_dir

    # for output
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

    # label map dictionary
    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    # path to text file containing with list of jpeg file names (excluding .jpeg)
    examples_path = os.path.join(data_dir, 'filename_list.txt')
    examples_list = dataset_util.read_examples_list(examples_path)

    # path to folder containing annotations
    annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir)

    for example in examples_list:

        # path to xml file
        path = os.path.join(annotations_dir, example + '.xml')

        # read xml file
        with tf.gfile.GFile(path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

        # to tf.Example format and write to output file
        tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
                                                FLAGS.ignore_difficult_instances)
        writer.write(tf_example.SerializeToString())

    writer.close()
def main(_):
    data_dir = FLAGS.data_dir
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
    logging.info('Reading from dataset.')
    images_dir = os.path.join(data_dir, FLAGS.images_dir)
    images_path = os.listdir(images_dir)
    annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir)
    print(annotations_dir)
    examples_list = [os.path.splitext(x)[0] for x in images_path]
    for idx, example in enumerate(examples_list):
        if idx % 10 == 0:
            logging.info('On image %d of %d', idx, len(examples_list))
        path = os.path.join(annotations_dir, example.split('.')[0] + '.xml')
        with tf.gfile.GFile(path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

        tf_example = dict_to_tf_example(data, FLAGS.data_dir, FLAGS.images_dir,
                                        label_map_dict,
                                        FLAGS.ignore_difficult_instances)
        writer.write(tf_example.SerializeToString())

    writer.close()
def main(_):
    if FLAGS.set not in SETS:
        raise ValueError('set must be in : {}'.format(SETS))

    data_dir = FLAGS.data_dir

    train_writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
    validation_writer = tf.python_io.TFRecordWriter(FLAGS.output_path +
                                                    ".validation")

    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)

    logging.info('Reading from dataset.')
    annotations_dir = data_dir
    files = glob.glob(os.path.join(data_dir, "*.xml"))
    random.shuffle(files)
    train_files = files[:int(len(files) * 0.8)]
    validation_files = files[int(len(files) * 0.8):]
    for writer, files in [(train_writer, train_files),
                          (validation_writer, validation_files)]:
        for idx, xml_file in enumerate(files):
            if idx % 100 == 0:
                logging.info('On image %d of %d', idx, len(files))
            path = xml_file
            with tf.gfile.GFile(path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

            tf_example = dict_to_tf_example(data, path, label_map_dict,
                                            FLAGS.ignore_difficult_instances)
            writer.write(tf_example.SerializeToString())

        writer.close()
Exemple #29
0
def main(_):
    data_dir = FLAGS.data_dir
    years = ['VOC2007', 'VOC2012']
    if FLAGS.year != 'merged':
        years = [FLAGS.year]
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
    label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
    for year in years:
        logging.info('Reading from PASCAL %s dataset', year)
        examples_path = FLAGS.set
        annotations_dir = FLAGS.annotations_dir
        print(examples_path)
        examples_list = dataset_util.read_examples_list(examples_path)
        for idx, example in enumerate(examples_list):
            if idx % 100 == 0:
                logging.info('On image %d of %d', idx, len(examples_list))
            path = os.path.join(annotations_dir, example + '.xml')
            with tf.gfile.GFile(path, 'r') as fid:
                xml_str = fid.read()
            xml = etree.fromstring(xml_str)
            data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
            tf_example = dict_to_tf_example(data, FLAGS.data_dir,
                                            label_map_dict,
                                            FLAGS.ignore_difficult_instances)
            writer.write(tf_example.SerializeToString())
    writer.close()
Exemple #30
0
def main(_):
    args = process_command_line()

    if args.set not in SETS:
        raise ValueError('set must be in : {}'.format(SETS))

    output = os.path.join(args.data_dir, args.output_path)

    # touch the file if it doesn't already exist
    if not os.path.exists(output):
        with open(output, 'a'):
            os.utime(output)

    writer = tf.python_io.TFRecordWriter(output)
    label_map_dict = label_map_util.get_label_map_dict(
        os.path.join(args.data_dir, args.label_map_path))
    label_count = {}
    for item, key in label_map_dict.items():
        label_count[item] = 0
    for c in args.collection:
        print('Reading from {0} dataset.'.format(c))
        examples_path = os.path.join(args.data_dir, c, args.set + '.txt')
        png_dir = '{0}/{1}/PNGImages'.format(args.data_dir, c)
        annotations_dir = '{0}/{1}/Annotations'.format(args.data_dir, c)

        with open(examples_path) as fid:
            lines = fid.readlines()
            examples_list = [line.strip() for line in lines]

        for idx, example in enumerate(examples_list):
            if idx % 50 == 0:
                print('Processing image {0} of {1}'.format(
                    idx, len(examples_list)))
            file = os.path.join(annotations_dir, example)
            with open(file, 'r') as fid:
                xml_str = fid.read()
            try:
                xml = etree.fromstring(xml_str)
                data = dataset_util.recursive_parse_xml_to_dict(
                    xml)['annotation']
                tf_example, count = dict_to_tf_example(data, args.data_dir,
                                                       label_map_dict,
                                                       args.labels, png_dir)
                if tf_example:
                    for key, item in count.items():
                        label_count[key] += item
                    writer.write(tf_example.SerializeToString())
                else:
                    print('No objects found in {0}'.format(example))

            except Exception as ex:
                print(ex)
                continue

    writer.close()
    ttl_objs = 0
    for key, item in label_count.items():
        print('{0} {1}'.format(key, item))
        ttl_objs += item
    print('Done. Found {0} examples in {1} set'.format(ttl_objs, args.set))
def create_tf_record(output_filename,
                     label_map_dict,
                     annotations_dir,
                     image_dir,
                     examples):
  """Creates a TFRecord file from examples.

  Args:
    output_filename: Path to where output file is saved.
    label_map_dict: The label map dictionary.
    annotations_dir: Directory where annotation files are stored.
    image_dir: Directory where image files are stored.
    examples: Examples to parse and save to tf record.
  """
  writer = tf.python_io.TFRecordWriter(output_filename)
  for idx, example in enumerate(examples):
    if idx % 100 == 0:
      logging.info('On image %d of %d', idx, len(examples))
    path = os.path.join(annotations_dir, 'xmls', example + '.xml')

    if not os.path.exists(path):
      logging.warning('Could not find %s, ignoring example.', path)
      continue
    with tf.gfile.GFile(path, 'r') as fid:
      xml_str = fid.read()
    xml = etree.fromstring(xml_str)
    data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

    tf_example = dict_to_tf_example(data, label_map_dict, image_dir)
    writer.write(tf_example.SerializeToString())

  writer.close()
def main(_):
  if FLAGS.set not in SETS:
    raise ValueError('set must be in : {}'.format(SETS))

  data_dir = FLAGS.data_dir

  output_file = os.path.join(FLAGS.output_dir, 'helmet_' + FLAGS.set + '.record')
  writer = tf.python_io.TFRecordWriter(output_file)

  label_map_file = os.path.join(data_dir, FLAGS.label_map_file)
  label_map_dict = label_map_util.get_label_map_dict(label_map_file)
  examples_path = os.path.join(data_dir, 'ImageSets', 'Main',
                                 'helmet_' + FLAGS.set + '.txt')
  annotations_dir = os.path.join(data_dir, FLAGS.annotations_dir)
  examples_list = dataset_util.read_examples_list(examples_path)

  for idx, example in enumerate(examples_list):
    if idx % 5 == 0:
      print('On image %d of %d', idx, len(examples_list))
    path = os.path.join(annotations_dir, example + '.xml')
    with tf.gfile.GFile(path, 'r') as fid:
      xml_str = fid.read()
    xml = etree.fromstring(xml_str)
    data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

    tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
                                    FLAGS.ignore_difficult_instances)
    writer.write(tf_example.SerializeToString())

  writer.close()
def create_tf_record(output_filename,
                     num_shards,
                     label_map_dict,
                     annotations_dir,
                     image_dir,
                     examples,
                     faces_only=True,
                     mask_type='png'):
  """Creates a TFRecord file from examples.

  Args:
    output_filename: Path to where output file is saved.
    num_shards: Number of shards for output file.
    label_map_dict: The label map dictionary.
    annotations_dir: Directory where annotation files are stored.
    image_dir: Directory where image files are stored.
    examples: Examples to parse and save to tf record.
    faces_only: If True, generates bounding boxes for pet faces.  Otherwise
      generates bounding boxes (as well as segmentations for full pet bodies).
    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
      smaller file sizes.
  """
  with contextlib2.ExitStack() as tf_record_close_stack:
    output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
        tf_record_close_stack, output_filename, num_shards)
    for idx, example in enumerate(examples):
      if idx % 100 == 0:
        logging.info('On image %d of %d', idx, len(examples))
      xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml')
      mask_path = os.path.join(annotations_dir, 'trimaps', example + '.png')

      if not os.path.exists(xml_path):
        logging.warning('Could not find %s, ignoring example.', xml_path)
        continue
      with tf.gfile.GFile(xml_path, 'r') as fid:
        xml_str = fid.read()
      xml = etree.fromstring(xml_str)
      data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

      try:
        tf_example = dict_to_tf_example(
            data,
            mask_path,
            label_map_dict,
            image_dir,
            faces_only=faces_only,
            mask_type=mask_type)
        if tf_example:
          shard_idx = idx % num_shards
          output_tfrecords[shard_idx].write(tf_example.SerializeToString())
      except ValueError:
        logging.warning('Invalid example: %s, ignoring.', xml_path)
Exemple #34
0
def create_tf_record(output_filename,
                     label_map_dict,
                     annotations_dir,
                     image_dir,
                     examples):
  """Creates a TFRecord file from examples.

  Args:
    output_filename: Path to where output file is saved.
    label_map_dict: The label map dictionary.
    annotations_dir: Directory where annotation files are stored.
    image_dir: Directory where image files are stored.
    examples: Examples to parse and save to tf record.
    faces_only: If True, generates bounding boxes for pet faces.  Otherwise
      generates bounding boxes (as well as segmentations for full pet bodies).
    mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
      smaller file sizes.
  """
  writer = tf.python_io.TFRecordWriter(output_filename)
  for idx, example in enumerate(examples):
    if idx % 100 == 0:
      logging.info('On image %d of %d', idx, len(examples))
    xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml')

    if not os.path.exists(xml_path):
      logging.warning('Could not find %s, ignoring example.', xml_path)
      continue
    with tf.gfile.GFile(xml_path, 'r') as fid:
      xml_str = fid.read()
    xml = etree.fromstring(xml_str)
    data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']

    try:
      tf_example = dict_to_tf_example(
          data,
          label_map_dict,
          image_dir)
      writer.write(tf_example.SerializeToString())
    except ValueError:
      logging.warning('Invalid example: %s, ignoring.', xml_path)

  writer.close()