def _process_dataset(name, filenames, labels, bbox_info, num_shards):
    """
  이미지 파일 목록을 읽어들여 TFRecord 객체로 변환하는 함수
  :param name: string, 데이터 고유 문자열 (train, validation 등)
  :param filenames: list of strings; 이미지 파일 경로 리스트.
  :param labels: list of integer; 이미지에 대한 정수화된 정답 레이블 리스트
  :param bbox_info: double dict of label id to file id. 사용하지 않는 경우 None 을 입력
  :param num_shards: 데이터 집합을 샤딩할 갯수.
  """
    assert len(filenames) == len(labels)

    shard_offsets = dataset_utils.make_shard_offsets(len(filenames),
                                                     FLAGS.num_threads,
                                                     num_shards)
    shard_output_filenames = dataset_utils.make_shard_filenames(
        name, len(filenames), FLAGS.num_threads, num_shards)

    def _process_batch(thread_index):
        offsets = shard_offsets[thread_index]
        output_filenames = shard_output_filenames[thread_index]
        _process_image_files_batch(thread_index, offsets, output_filenames,
                                   filenames, labels, bbox_info)

    dataset_utils.thread_execute(FLAGS.num_threads, _process_batch)
    dataset_utils.log('%s: Finished writing all %d images in data set.' %
                      (datetime.now(), len(filenames)))
def _process_image_files_batch(thread_index, offsets, output_filenames,
                               filenames, labels, bbox_info):
    """
  하나의 스레드 단위에서 이미지 리스트를 읽어 TRRecord 타입으로 변환하는 함수
  :param thread_index: 현재 작업중인 thread 번호.
  :param offsets: offset list. 이미지 목록 중 현재 스레드에서 처리해야 할 offset 값으로 shard 갯수만큼 리스트로 제공
  :param output_filenames: 출력 파일 이름으로 shard 갯수만큼 리스트로 제공.
  :param filenames: 처리해야 할 전체 이미지 파일 리스트
  :param labels: 처리해야 할 전체 이미지 레이블 리스트
  """
    assert len(offsets) == len(output_filenames)
    assert len(filenames) == len(labels)

    num_files_in_thread = offsets[-1][1] - offsets[0][0]
    counter = 0
    # 하나의 thread 에는 여러 개의 shard 가 할당될 수 있다.
    for offset, output_filename in zip(offsets, output_filenames):
        output_file = os.path.join(FLAGS.output_dir, output_filename)
        writer = tf.python_io.TFRecordWriter(output_file)

        # offset 에는 현재 shard 에 대한 (start, end) offset이 저장되어 있음.
        files_in_shard = np.arange(offset[0], offset[1], dtype=int)
        shard_counter = 0
        for i in files_in_shard:
            filename = filenames[i]
            label = labels[i]

            file_id = os.path.splitext(os.path.basename(filename))[0]
            if bbox_info is None:
                bbox = None
            else:
                bbox = bbox_info[file_id]

            # try:
            image_data, height, width, image_format = _process_image(
                filename, bbox)
            # except ValueError:
            #   dataset_utils.log('[thread %2d]: Invalid image found. %s - [skip].' % (thread_index, filename))
            #   continue

            example = data_util.convert_to_example_without_bbox(
                image_data, 'jpg', label, height, width)
            writer.write(example.SerializeToString())

            counter += 1
            shard_counter += 1
            if not counter % 1000:
                dataset_utils.log(
                    '%s [thread %2d]: Processed %d of %d images in thread batch.'
                    % (datetime.now(), thread_index, counter,
                       num_files_in_thread))

        writer.close()
        dataset_utils.log(
            '%s [thread %2d]: Wrote %d images to %s' %
            (datetime.now(), thread_index, shard_counter, output_file))
def main(unused_argv):
  if (FLAGS.data_dir is None) or (FLAGS.output_dir is None):
    parser.print_help()
    return

  dataset_utils.log('Make Naver-Food TFRecord dataset by label.')

  if not os.path.exists(FLAGS.output_dir):
    os.makedirs(FLAGS.output_dir)

  if FLAGS.use_bbox:
    bbox_info = _get_bbox_info(FLAGS.data_dir)
    dataset_utils.log(' - Use bounding box info. (opt. ON)')
  else:
    bbox_info = None

  source_dir = os.path.join(FLAGS.data_dir, 'images')

  filenames_train, labels_train, id_to_name, total = _find_image_files('train', source_dir)
  dataset_utils.log('Convert [train] dataset.')
  _process_dataset('train', filenames_train, labels_train, bbox_info, 128)

  filenames_val, labels_val, id_to_name, total = _find_image_files('validation', source_dir)
  dataset_utils.log('Convert [validation] dataset.')
  _process_dataset('validation', filenames_val, labels_val, bbox_info, 16)
def main(unused_argv):
    if (FLAGS.data_dir is None) or (FLAGS.output_dir is None):
        parser.print_help()
        return

    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    data_path = FLAGS.data_dir
    image_filepath = os.path.join(data_path, "fgvc-aircraft-2013b.tar.gz")

    if not os.path.exists(os.path.join(data_path, 'fgvc-aircraft-2013b')):
        subprocess.call([
            "tar", "zxvf",
            image_filepath.replace("\\", "/"), "-C",
            data_path.replace("\\", "/"), "--force-local"
        ])

    data_path = os.path.join(data_path, 'fgvc-aircraft-2013b')
    train_filenames, train_labels = _get_filenames_and_labels(
        data_path, 'data/images_train.txt', 'data/images_variant_train.txt',
        'data/variants.txt')

    train_filenames2, train_labels2 = _get_filenames_and_labels(
        data_path, 'data/images_val.txt', 'data/images_variant_val.txt',
        'data/variants.txt')
    train_filenames.extend(train_filenames2)
    train_labels.extend(train_labels2)

    shuffled_index = list(range(len(train_filenames)))
    random.seed(12345)
    random.shuffle(shuffled_index)
    train_filenames = [train_filenames[i] for i in shuffled_index]
    train_labels = [train_labels[i] for i in shuffled_index]

    validation_filenames, validation_labels = _get_filenames_and_labels(
        data_path,
        'data/images_test.txt',
        'data/images_variant_test.txt',
        'data/variants.txt',
        shuffle=False)

    dataset_utils.log('Convert [train] dataset.')
    _process_dataset('train', train_filenames, train_labels,
                     FLAGS.train_shards)

    dataset_utils.log('Convert [validation] dataset.')
    _process_dataset('validation', validation_filenames, validation_labels,
                     FLAGS.validation_shards)
def main(unused_argv):
    if (FLAGS.data_dir is None) or (FLAGS.output_dir is None):
        parser.print_help()
        return

    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    if FLAGS.use_bbox:
        bbox_info = _get_bbox_info()
        dataset_utils.log(' - Use bounding box info. (opt. ON)')
    else:
        bbox_info = None

    filenames, labels, total = _find_image_files('train', FLAGS.data_dir)
    _process_dataset('train', filenames, labels, bbox_info)

    filenames, labels, total = _find_image_files('validation', FLAGS.data_dir)
    _process_dataset('validation', filenames, labels, bbox_info)
Exemple #6
0
def main(_):
  if (FLAGS.data_dir is None) or (FLAGS.output_dir is None):
    parser.print_help()
    return

  assert not FLAGS.train_shards % FLAGS.num_threads, (
    'Please make the FLAGS.num_threads commensurate with FLAGS.train_shards')
  assert not FLAGS.validation_shards % FLAGS.num_threads, (
    'Please make the FLAGS.num_threads commensurate with FLAGS.validation_shards')
  print('Saving results to {}'.format(FLAGS.output_dir))

  dataset_utils.log('Make UEC-food100 TFRecord dataset.')

  if not os.path.exists(FLAGS.output_dir):
    os.makedirs(FLAGS.output_dir)

  label_path = os.path.join(FLAGS.data_dir, format("meta/labels.txt"))
  train_path = os.path.join(FLAGS.data_dir, format("meta/train.txt"))
  validation_path = os.path.join(FLAGS.data_dir, format("meta/test.txt"))

  train_filenames, train_labels = _get_filenames_and_labels(
    FLAGS.data_dir, train_path, label_path, shuffle=True)

  validation_filenames, validation_labels = _get_filenames_and_labels(
    FLAGS.data_dir, validation_path, label_path, shuffle=False)

  dataset_utils.log('Convert [train] dataset.')
  _process_dataset('train', train_filenames, train_labels, FLAGS.train_shards)

  dataset_utils.log('Convert [validation] dataset.')
  _process_dataset('validation', validation_filenames, validation_labels, FLAGS.validation_shards)
Exemple #7
0
def main(_):
  if (FLAGS.input_dir is None) or (FLAGS.output_dir is None):
    parser.print_help()
    return

  assert not FLAGS.train_shards % FLAGS.num_threads, (
    'Please make the FLAGS.num_threads commensurate with FLAGS.train_shards')
  assert not FLAGS.validation_shards % FLAGS.num_threads, (
    'Please make the FLAGS.num_threads commensurate with FLAGS.validation_shards')
  print('Saving results to {}'.format(FLAGS.output_dir))

  dataset_utils.log('Make UEC-food100 TFRecord dataset.')

  if not os.path.exists(FLAGS.output_dir):
    os.makedirs(FLAGS.output_dir)

  # train_txt = '/home1/irteam/user/jklee/dataset/Stanford_Online_Products/Stanford_Online_Products/Ebay_train.txt'
  # test_txt = '/home1/irteam/user/jklee/dataset/Stanford_Online_Products/Stanford_Online_Products/Ebay_test.txt'

  train_filenames, train_labels = get_filenames_and_labels(FLAGS.input_dir)
  validation_filenames, validation_labels = get_filenames_and_labels(FLAGS.input_dir, is_training=False)

  # train_filenames, train_labels = _get_filenames_and_labels(
  #   FLAGS.data_dir, train_path, label_path, shuffle=True)
  #
  # validation_filenames, validation_labels = _get_filenames_and_labels(
  #   FLAGS.data_dir, validation_path, label_path, shuffle=False)

  dataset_utils.log('Convert [train] dataset.')
  _process_dataset('train', train_filenames, train_labels, FLAGS.train_shards)

  dataset_utils.log('Convert [validation] dataset.')
  _process_dataset('validation', validation_filenames, validation_labels, FLAGS.validation_shards)
def main(unused_argv):
    if (FLAGS.data_dir is None) or (FLAGS.output_dir is None):
        parser.print_help()
        return

    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    if FLAGS.use_bbox:
        bbox_info = _get_bbox_info()
        dataset_utils.log(' - Use bounding box info. (opt. ON)')
    else:
        bbox_info = None

    filenames_train, filenames_val, labels_train, labels_val, total_train, total_val \
      = _find_image_files(FLAGS.data_dir)
    # _process_dataset('train', filenames_train, labels_train, bbox_info)
    # # _write_label_id_to_name('train', FLAGS.output_dir, id_to_name)
    # dataset_utils.log('Finished writing all %d images in train data set.' % total_train)
    #
    # _process_dataset('validation', filenames_val, labels_val, bbox_info)
    # # _write_label_id_to_name('validation', FLAGS.output_dir, id_to_name)
    # dataset_utils.log('Finished writing all %d images in validation data set.' % total_val)

    print('filenames_train', len(filenames_train))
    dataset_utils.log('Convert [train] dataset.')
    _process_dataset('train', filenames_train, labels_train,
                     FLAGS.train_shards)

    dataset_utils.log('Convert [validation] dataset.')
    _process_dataset('validation', filenames_val, labels_val,
                     FLAGS.validation_shards)
def main(_):
    if (FLAGS.data_dir is None) or (FLAGS.output_dir is None):
        parser.print_help()
        return

    assert not FLAGS.train_shards % FLAGS.num_threads, (
        'Please make the FLAGS.num_threads commensurate with FLAGS.train_shards'
    )
    assert not FLAGS.validation_shards % FLAGS.num_threads, (
        'Please make the FLAGS.num_threads commensurate with FLAGS.validation_shards'
    )
    print('Saving results to {}'.format(FLAGS.output_dir))

    dataset_utils.log('Make UEC-food100 TFRecord dataset.')

    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    if FLAGS.use_bbox:
        bbox_info = _get_bbox_info(FLAGS.data_dir)
        dataset_utils.log(' - Use bounding box info. (opt. ON)')
    else:
        bbox_info = None

    # val{}.txt 파일 중 0~3 은 train set 으로 사용하고 4는 validation set 으로 사용.
    train_filenames, train_labels = _get_filenames_and_labels([0, 1, 2, 3],
                                                              shuffle=True)
    validation_filenames, validation_labels = _get_filenames_and_labels(
        [4], shuffle=False)

    dataset_utils.log('Convert [train] dataset.')
    _process_dataset('train', train_filenames, train_labels, bbox_info,
                     FLAGS.train_shards)

    dataset_utils.log('Convert [validation] dataset.')
    _process_dataset('validation', validation_filenames, validation_labels,
                     bbox_info, FLAGS.validation_shards)

    dataset_utils.log('Make UEC-food100 TFRecord dataset. [OK]')