def main(argv):
    if len(argv) > 1:
        raise app.UsageError("Too many command-line arguments.")

    tf.logging.set_verbosity(tf.logging.INFO)
    if not FLAGS.output_dir:
        FLAGS.output_dir = os.path.dirname(FLAGS.generation_file)
    tf.io.gfile.makedirs(FLAGS.output_dir)

    # Load generations.
    vocab = text_utils.Vocab.load(FLAGS.vocab_path)
    captions = load_synthetic_captions(FLAGS.generation_file, vocab)
    images = []
    for image_id, captions in captions.items():
        metadata = image_utils.ImageMetadata(image_id=image_id,
                                             captions=captions,
                                             objects=FLAGS.features)
        images.append(metadata)

    # Dump to sharded TFRecords.
    io_utils.convert_to_tfrecords(dataset=images,
                                  num_shards=FLAGS.num_shards,
                                  basename=os.path.join(
                                      FLAGS.output_dir, "train"),
                                  example_fn=io_utils.caption_example)
Exemple #2
0
def main(argv):
  if len(argv) > 1:
    raise app.UsageError("Too many command-line arguments.")

  tf.logging.set_verbosity(tf.logging.INFO)
  tf.io.gfile.makedirs(FLAGS.output_dir)

  pattern = "%s/%s-*" % (FLAGS.sqa_path, FLAGS.split)
  questions = load_synthetic_questions(pattern)

  # Convert to ImageMetadata.
  images = []
  for image_id, data in questions.items():
    metadata = image_utils.ImageMetadata(
        image_id=image_id,
        question_ids=data["question_ids"],
        questions=data["questions"],
        answers=data["answers"],
        captions=data["captions"],
        objects="%s/%s_features.hdf5" % (FLAGS.coco_path, FLAGS.split))
    images.append(metadata)

  # Dump to sharded TFRecords.
  io_utils.convert_to_tfrecords(
      dataset=images,
      num_shards=FLAGS.num_shards,
      basename=os.path.join(FLAGS.output_dir, FLAGS.split),
      example_fn=io_utils.vqa_example)
Exemple #3
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError("Too many command-line arguments.")

    tf.logging.set_verbosity(tf.logging.INFO)
    tf.io.gfile.makedirs(FLAGS.output_dir)

    # Load data and re-split according to Karpathy paper.
    splits = io_utils.load_karpathy_splits(FLAGS.splits)
    vocab = text_utils.Vocab.load(FLAGS.vocab_path)
    captions = collections.defaultdict(list)
    for split in ["train", "val"]:
        captions_file = ("%s/annotations/captions_%s2014.json" %
                         (FLAGS.coco_path, split))
        for image_id, split_captions in load_captions(captions_file,
                                                      vocab).items():
            captions[image_id].extend(split_captions)

    for split, image_ids in splits.items():
        # Convert to RCInputs.
        inputs = []
        for image_id, image_captions in captions.items():
            if image_id not in image_ids:
                continue
            for input_ids, input_mask, segment_ids in image_captions:
                inputs.append(
                    RCInputs(image_id, input_ids, input_mask, segment_ids))

        # Dump to sharded TFRecords.
        io_utils.convert_to_tfrecords(
            dataset=inputs,
            num_shards=getattr(FLAGS, "%s_shards" % split),
            basename=os.path.join(FLAGS.output_dir, split),
            example_fn=rc_example)
Exemple #4
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError("Too many command-line arguments.")

    tf.logging.set_verbosity(tf.logging.INFO)
    tf.io.gfile.makedirs(FLAGS.output_dir)

    # Load VIZWIZ data.
    vocab = text_utils.Vocab.load(FLAGS.vocab_path)
    vizwiz_data = collections.defaultdict(
        lambda: collections.defaultdict(list))
    for split in ["train", "val"]:
        questions_file = "%s/Annotations/%s.json" % (FLAGS.vizwiz_path, split)
        for image_id, entry in load_questions(questions_file, vocab).items():
            for k, v in entry.items():
                vizwiz_data[image_id][k].extend(v)

    # Convert to ImageMetadata.
    images = []
    for image_id, data in vizwiz_data.items():
        images.append(
            image_utils.ImageMetadata(image_id=image_id,
                                      question_ids=data["question_ids"],
                                      questions=data["questions"],
                                      answers=data["answers"],
                                      objects="%s/features.hdf5" %
                                      FLAGS.vizwiz_path))

    # Load pre-computed random splits.
    # (This is for backwards compatibility with previous experiments).
    splits_file = "%s/capwap_splits.json" % FLAGS.vizwiz_path
    with tf.io.gfile.GFile(splits_file, "r") as f:
        split_ids = json.load(f)
    splits = dict(
        test=[im for im in images if im.image_id in split_ids["test"]],
        val=[im for im in images if im.image_id in split_ids["val"]],
        train=[im for im in images if im.image_id in split_ids["train"]])

    # Make sure we have the right images.
    assert len(splits["train"]) == len(split_ids["train"])
    assert len(splits["val"]) == len(split_ids["val"])
    assert len(splits["test"]) == len(split_ids["test"])

    # Dump to sharded TFRecords and also RC format.
    for split, split_images in splits.items():
        io_utils.convert_to_tfrecords(
            dataset=split_images,
            num_shards=getattr(FLAGS, "%s_shards" % split),
            basename=os.path.join(FLAGS.output_dir, split),
            example_fn=io_utils.vqa_example)
        output_file = os.path.join(FLAGS.output_dir, split + ".json")
        io_utils.convert_to_rc(split_images, output_file)
Exemple #5
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError("Too many command-line arguments.")

    tf.logging.set_verbosity(tf.logging.INFO)
    tf.io.gfile.makedirs(FLAGS.output_dir)

    # Load and aggregate all VQA data.
    vocab = text_utils.Vocab.load(FLAGS.vocab_path)
    questions = collections.defaultdict(lambda: collections.defaultdict(list))
    for split in ["train", "val"]:
        questions_file = ("%s/v2_OpenEnded_mscoco_%s2014_questions.json" %
                          (FLAGS.vqa_path, split))
        annotations_file = ("%s/v2_mscoco_%s2014_annotations.json" %
                            (FLAGS.vqa_path, split))
        split_questions = load_questions(questions_file, annotations_file,
                                         vocab)
        for image_id, entry in split_questions.items():
            for k, v in entry.items():
                questions[image_id][k].extend(v)

    # Re-split according to Karpathy splits.
    splits = io_utils.load_karpathy_splits(FLAGS.splits)
    for split, image_ids in splits.items():
        # Convert to ImageMetadata.
        images = []
        for image_id, data in questions.items():
            if image_id not in image_ids:
                continue
            images.append(
                image_utils.ImageMetadata(image_id=image_id,
                                          question_ids=data["question_ids"],
                                          questions=data["questions"],
                                          answers=data["answers"],
                                          objects="%s/%s_features.hdf5" %
                                          (FLAGS.coco_path, split)))

        # Dump to sharded TFRecords.
        io_utils.convert_to_tfrecords(
            dataset=images,
            num_shards=getattr(FLAGS, "%s_shards" % split),
            basename=os.path.join(FLAGS.output_dir, split),
            example_fn=io_utils.vqa_example)

        # And to RC formatted file.
        output_file = os.path.join(FLAGS.output_dir, split + ".json")
        io_utils.convert_to_rc(images, output_file)
Exemple #6
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError("Too many command-line arguments.")

    tf.logging.set_verbosity(tf.logging.INFO)
    tf.io.gfile.makedirs(FLAGS.output_dir)

    # Create dataset splits.
    metadata_file = "%s/image_data.json" % FLAGS.v7w_path
    dataset_file = os.path.join(FLAGS.v7w_path, "dataset_v7w_telling.json")
    karpathy_splits = io_utils.load_karpathy_splits(FLAGS.splits)
    splits = make_splits(dataset_file, metadata_file, karpathy_splits)

    # Load Visual7W questions.
    vocab = text_utils.Vocab.load(FLAGS.vocab_path)
    questions = load_questions(dataset_file, vocab)

    for split, image_ids in splits.items():
        # Convert to ImageMetadata.
        images = []
        for image_id, data in questions.items():
            if image_id not in image_ids:
                continue
            images.append(
                image_utils.ImageMetadata(image_id=image_id,
                                          question_ids=data["question_ids"],
                                          questions=data["questions"],
                                          answers=data["answers"],
                                          objects="%s/features.hdf5" %
                                          FLAGS.v7w_path))

        # Dump to sharded TFRecords.
        io_utils.convert_to_tfrecords(
            dataset=images,
            num_shards=getattr(FLAGS, "%s_shards" % split),
            basename=os.path.join(FLAGS.output_dir, split),
            example_fn=io_utils.vqa_example)

        # And to RC formatted file.
        output_file = os.path.join(FLAGS.output_dir, split + ".json")
        io_utils.convert_to_rc(images, output_file)
Exemple #7
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError("Too many command-line arguments.")

    tf.logging.set_verbosity(tf.logging.INFO)
    tf.io.gfile.makedirs(FLAGS.output_dir)

    # Load COCO train image ids to be sure not to use them.
    coco_train = io_utils.load_karpathy_splits(FLAGS.splits)["train"]
    vocab = text_utils.Vocab.load(FLAGS.vocab_path)

    # Load pre-computed random splits.
    # (This is for backwards compatibility with previous experiments).
    splits_file = "%s/capwap_splits.json" % FLAGS.gqa_path
    with tf.io.gfile.GFile(splits_file, "r") as f:
        split_ids = json.load(f)

    # Load GQA train.
    filename = "%s/%s_balanced_questions.json" % (FLAGS.gqa_path, "train")
    gqa_data = load_questions(filename, vocab)
    images = []
    for image_id, data in gqa_data.items():
        images.append(
            image_utils.ImageMetadata(image_id=image_id,
                                      question_ids=data["question_ids"],
                                      questions=data["questions"],
                                      answers=data["answers"],
                                      objects="%s/train_features.hdf5" %
                                      FLAGS.gqa_path))

    # Make sure we have the right images.
    images = [im for im in images if im.image_id in split_ids["train"]]
    assert len(images) == len(split_ids["train"])

    # Dump.
    io_utils.convert_to_tfrecords(dataset=images,
                                  num_shards=FLAGS.train_shards,
                                  basename=os.path.join(
                                      FLAGS.output_dir, "train"),
                                  example_fn=io_utils.vqa_example)
    io_utils.convert_to_rc(images, os.path.join(FLAGS.output_dir,
                                                "train.json"))

    # Load GQA dev.
    filename = "%s/%s_balanced_questions.json" % (FLAGS.gqa_path, "val")
    gqa_data = load_questions(filename, vocab)
    images = []
    for image_id, data in gqa_data.items():
        images.append(
            image_utils.ImageMetadata(image_id=image_id,
                                      question_ids=data["question_ids"],
                                      questions=data["questions"],
                                      answers=data["answers"],
                                      objects="%s/val_features.hdf5" %
                                      FLAGS.gqa_path))
    images = [image for image in images if image.image_id not in coco_train]

    # Resplit into dev and test.
    splits = dict(
        val=[im for im in images if im.image_id in split_ids["val"]],
        test=[im for im in images if im.image_id in split_ids["test"]])

    # Make sure we have the right images.
    assert len(splits["val"]) == len(split_ids["val"])
    assert len(splits["test"]) == len(split_ids["test"])

    # Dump to sharded TFRecords and also RC format.
    for split, split_images in splits.items():
        io_utils.convert_to_tfrecords(
            dataset=split_images,
            num_shards=getattr(FLAGS, "%s_shards" % split),
            basename=os.path.join(FLAGS.output_dir, split),
            example_fn=io_utils.vqa_example)
        output_file = os.path.join(FLAGS.output_dir, split + ".json")
        io_utils.convert_to_rc(split_images, output_file)