コード例 #1
0
ファイル: model_utils.py プロジェクト: rpeloff/moonshot
def save_model(model, output_dir, epoch, step, metric, current_score,
               best_score, name="model"):
    """Save a model and its training progress."""
    assert hasattr(model, "loss") and model.loss is not None
    assert hasattr(model, "optimizer") and model.optimizer is not None

    model.save(os.path.join(output_dir, f"{name}.h5"))

    file_io.write_csv(
        os.path.join(output_dir, f"{name}.step"),
        [epoch, step, metric, current_score, best_score])
コード例 #2
0
def main(argv):
    del argv  # unused

    logging.log(logging.INFO, "Logging application {}".format(__file__))

    # Compute md5 hash of Flickr 30k images
    flickr30k_path = os.path.join("data", "external", "flickr30k_images")
    flickr30k_files = os.listdir(flickr30k_path)

    logging.log(logging.INFO, "Computing Flickr 30k image hashes ...")
    flickr30k_hash = []
    for filename in flickr30k_files:
        with open(os.path.join(flickr30k_path, filename), "rb") as f:
            image_bytes = f.read()
        flickr30k_hash.append(hashlib.md5(image_bytes).hexdigest())

    flickr30k_hash = np.asarray(flickr30k_hash)

    # Compute md5 hash of MSCOCO images
    mscoco_path = os.path.join("data", "external", "mscoco", "train2017")
    mscoco_files = os.listdir(mscoco_path)

    logging.log(logging.INFO, "Computing MSCOCO image hashes ...")
    mscoco_hash = []
    for filename in mscoco_files:
        with open(os.path.join(mscoco_path, filename), "rb") as f:
            image_bytes = f.read()
        mscoco_hash.append(hashlib.md5(image_bytes).hexdigest())

    mscoco_hash = np.asarray(mscoco_hash)

    # Find Flickr 30k images with hashes in MSOCO hashes
    match_idx = np.where(np.isin(flickr30k_hash, mscoco_hash))[0]

    mscoco_remove = []
    for index in match_idx:
        mscoco_index = np.where(mscoco_hash == flickr30k_hash[index])[0][0]
        logging.log(
            logging.INFO,
            "Found Flickr30k image {} matching MSCOCO (train 2017) image {}".format(
                flickr30k_files[index],
                mscoco_files[mscoco_index]))
        mscoco_remove.append(mscoco_files[mscoco_index])

    # Write matches to file
    output_path = os.path.join("data", "splits", "mscoco", "remove_flickr30k.txt")
    logging.log(
        logging.INFO,
        "Writing list of Flickr 30k images in MSCOCO dataset: {}".format(output_path))

    file_io.write_csv(
        output_path,
        mscoco_remove)
コード例 #3
0
def main(argv):
    del argv  # unused

    logging.log(logging.INFO, "Logging application {}".format(__file__))
    if FLAGS.debug:
        logging.set_verbosity(logging.DEBUG)
        logging.log(logging.DEBUG, "Running in debug mode")

    # get flickr 8k text captions
    caption_corpus = flickr8k.load_flickr8k_captions(
        os.path.join("data", "external", "flickr8k_text"),
        splits_dir=os.path.join("data", "splits", "flickr8k"))

    subsets = ["train", "dev", "test"]
    caption_corpus = {
        subset: caption_set
        for (subset, caption_set) in zip(subsets, caption_corpus)
    }

    # get flickr-audio data
    faudio_uid_dict = flickraudio.fetch_isolated_word_lists(
        os.path.join("data", "processed", "flickr_audio", "mfcc"))

    faudio_data = {}
    for subset in subsets:
        faudio_data[subset] = flickraudio.extract_all_uid_metadata(
            faudio_uid_dict[subset])

    # flickr 8k caption keyword filtering
    # ===================================

    caption_keywords = {}
    for subset in subsets:
        # 1. identify and lemmatize keywords with a language model
        caption_keywords[subset] = keywords.process_caption_keywords(
            caption_corpus[subset], spacy_model=FLAGS.spacy_model)

        # 2. filter quality of keyword-image pairs
        caption_keywords[subset] = keywords.filter_keyword_quality(
            caption_keywords[subset], min_caption_occurence=FLAGS.min_captions)

    # flickr 8k one-shot benchmark evaluation data selection
    # ======================================================

    # select random classes for one-shot learning evaluation benchmark
    # from keywords paired with at least 20 and no more than 100 unique images

    # minimum required keyword-images for one-shot learning and evaluation
    keep_min_keywords = keywords.get_count_limited_keywords(
        caption_keywords["train"], min_occurence=20, use_lemma=True)

    # limit the effect on background data, specifically flickr-audio words
    remove_max_keywords = keywords.get_count_limited_keywords(
        caption_keywords["train"], min_occurence=100, use_lemma=True)

    one_shot_keyword_range = np.asarray(  # sort because of undefined set order
        list(sorted(set(keep_min_keywords) - set(remove_max_keywords))))

    np.random.seed(42)
    rand_idx = np.random.choice(np.arange(len(one_shot_keyword_range)),
                                FLAGS.one_shot_classes,
                                replace=False)

    one_shot_keyword_set = one_shot_keyword_range[rand_idx]

    # select 30 of the random keywords throwing away ambigious terms
    manual_keywords = np.array([
        "asian", "basketball", "bench", "bird", "blonde", "boat", "car",
        "cliff", "climber", "dance", "fire", "floor", "ground", "guitar",
        "hair", "hill", "horse", "obstacle", "paddle", "path", "purple",
        "rope", "sand", "sled", "snowboard", "splash", "suit", "surfboard",
        "throw", "vest"
    ])

    for keyword in manual_keywords:
        assert keyword in one_shot_keyword_set

    one_shot_keyword_set = manual_keywords

    # fetch keyword-image pairs for one-shot evaluation benchmark
    one_shot_caption_keywords = keywords.filter_keep_keywords(
        caption_keywords["train"], one_shot_keyword_set)

    # flickr 8k background data selection and filtering
    # =================================================

    background_caption_keywords = {}
    background_caption_keywords_full = {}
    for subset in subsets:
        # get list of one-shot keywords and semantically similar keywords
        one_shot_remove_words = keywords.find_similar_keywords(
            caption_keywords[subset],
            one_shot_keyword_set,
            threshold=FLAGS.similarity,
            use_lemma=True,
            spacy_model=FLAGS.spacy_model)
        one_shot_remove_words += one_shot_keyword_set.tolist()

        # remove one-shot keywords and associated images from filtered keywords
        background_caption_keywords[
            subset] = keywords.filter_remove_keyword_images(
                caption_keywords[subset], one_shot_remove_words)

        # store full version of background data before removing long tail
        background_caption_keywords_full[subset] = copy.deepcopy(
            background_caption_keywords[subset])

        # remove long tail of infrequent keywords by keeping only minimum required
        # keyword-images for one-shot background training (e.g. with meta-learning)
        keep_keywords = keywords.get_count_limited_keywords(
            background_caption_keywords[subset],
            min_occurence=FLAGS.min_occurence,
            use_lemma=True)
        background_caption_keywords[subset] = keywords.filter_keep_keywords(
            background_caption_keywords[subset], keep_keywords)

    # flickr-audio alignment with background and one-shot evaluation data
    # ===================================================================

    # align flickr-audio spoken word image pairs for one-shot/background learning
    # by removing pairs that do not correspond to a keyword-image pair
    faudio_one_shot_data, one_shot_caption_keywords = keywords.filter_flickr_audio_by_keywords(
        faudio_data["train"], one_shot_caption_keywords)

    faudio_background_data = {}
    faudio_background_data_full = {}
    for subset in subsets:
        faudio_background_data[subset], background_caption_keywords[subset] = (
            keywords.filter_flickr_audio_by_keywords(
                faudio_data[subset], background_caption_keywords[subset]))

        faudio_background_data_full[subset], background_caption_keywords_full[
            subset] = (keywords.filter_flickr_audio_by_keywords(
                faudio_data[subset], background_caption_keywords_full[subset]))

    # write one-shot evaluation and background keyword-image set splits
    # =================================================================

    # write keyword set splits to data directory
    if FLAGS.mode == "write" or FLAGS.mode == "both":
        file_io.write_csv(  # keyword list
            os.path.join("data", "splits", "flickr8k",
                         "one_shot_keywords.txt"), one_shot_keyword_set)

        file_io.write_csv(  # one-shot evaluation benchmark split
            os.path.join("data", "splits", "flickr8k",
                         "one_shot_evaluation.csv"),
            *one_shot_caption_keywords,
            column_names=["image_uid", "caption_number", "keyword", "lemma"])

        file_io.write_csv(  # aligned flickr-audio uids for one-shot evaluation
            os.path.join("data", "splits", "flickr8k",
                         "faudio_one_shot_evaluation.txt"),
            faudio_one_shot_data[0])

        for subset in subsets:
            file_io.write_csv(  # background subset split, one-shot data removed
                os.path.join("data", "splits", "flickr8k",
                             "background_{}.csv".format(subset)),
                *background_caption_keywords[subset],
                column_names=[
                    "image_uid", "caption_number", "keyword", "lemma"
                ])

            file_io.write_csv(  # background subset split (with tail), one-shot data removed
                os.path.join("data", "splits", "flickr8k",
                             "background_full_{}.csv".format(subset)),
                *background_caption_keywords_full[subset],
                column_names=[
                    "image_uid", "caption_number", "keyword", "lemma"
                ])

            file_io.write_csv(  # aligned flickr-audio uids for background split
                os.path.join("data", "splits", "flickr8k",
                             "faudio_background_{}.txt".format(subset)),
                faudio_background_data[subset][0])

            file_io.write_csv(  # aligned flickr-audio uids for background split
                os.path.join("data", "splits", "flickr8k",
                             "faudio_background_full_{}.txt".format(subset)),
                faudio_background_data_full[subset][0])

    # output keywords stats and .. TODO(rpeloff) distribution plots
    if FLAGS.mode == "statistics" or FLAGS.mode == "both":
        keywords.log_keyword_stats(one_shot_caption_keywords,
                                   "one_shot_evaluation")
        for subset in subsets:
            keywords.log_keyword_stats(background_caption_keywords[subset],
                                       "background_{}".format(subset))
            keywords.log_keyword_stats(
                background_caption_keywords_full[subset],
                "background_full_{}".format(subset))

    # save example one-shot evaluation images if specified
    if FLAGS.save_images == "one_shot" or FLAGS.save_images == "both":
        save_keywords = np.asarray(one_shot_keyword_set)

        if FLAGS.num_keywords is not None:
            save_keyword_idx = np.random.choice(np.arange(len(save_keywords)),
                                                FLAGS.num_keywords,
                                                replace=False)
            save_keywords = save_keywords[save_keyword_idx]

        keywords.save_keyword_images(one_shot_caption_keywords,
                                     os.path.join("data", "external",
                                                  "flickr8k_images"),
                                     save_keywords,
                                     os.path.join("figures", "flickr8k",
                                                  "one_shot_keywords"),
                                     max_per_row=5,
                                     max_images=20)

    # save example one-shot background images if specified
    if FLAGS.save_images == "background" or FLAGS.save_images == "both":
        save_keywords = np.unique(background_caption_keywords["train"][3])

        if FLAGS.num_keywords is not None:
            save_keyword_idx = np.random.choice(np.arange(len(save_keywords)),
                                                FLAGS.num_keywords,
                                                replace=False)
            save_keywords = save_keywords[save_keyword_idx]

        keywords.save_keyword_images(background_caption_keywords["train"],
                                     os.path.join("data", "external",
                                                  "flickr8k_images"),
                                     save_keywords,
                                     os.path.join("figures", "flickr8k",
                                                  "background_keywords"),
                                     max_per_row=5,
                                     max_images=20)
コード例 #4
0
def main(argv):
    del argv  # unused

    logging.log(logging.INFO, "Logging application {}".format(__file__))
    if FLAGS.debug:
        logging.set_verbosity(logging.DEBUG)
        logging.log(logging.DEBUG, "Running in debug mode")

    # get flickr 30k text captions with flickr 8k removed
    flickr8k_splits = flickr8k.load_flickr8k_splits(
        os.path.join("data", "splits", "flickr8k"))

    caption_corpus = flickr30k.load_flickr30k_captions(
        os.path.join("data", "external", "flickr30k_text"),
        splits_dir=os.path.join("data", "splits", "flickr30k"),
        flickr8k_splits=flickr8k_splits)

    subsets = ["train", "dev", "test"]
    caption_corpus = {
        subset: caption_set
        for (subset, caption_set) in zip(subsets, caption_corpus)
    }

    # flickr 30k caption keyword filtering
    # ====================================

    caption_keywords = {}
    for subset in subsets:
        # 1. identify and lemmatize keywords with a language model
        caption_keywords[subset] = keywords.process_caption_keywords(
            caption_corpus[subset], spacy_model=FLAGS.spacy_model)

        # 2. filter quality of keyword-image pairs
        caption_keywords[subset] = keywords.filter_keyword_quality(
            caption_keywords[subset], min_caption_occurence=FLAGS.min_captions)

    # flickr 30k one-shot benchmark evaluation data selection
    # =======================================================

    # load one-shot keywords selected from flickr 8k keyword-image pairs
    one_shot_keyword_set = file_io.read_csv(
        os.path.join("data", "splits", "flickr8k", "one_shot_keywords.txt"))[0]

    one_shot_caption_keywords = keywords.filter_keep_keywords(
        caption_keywords["train"], one_shot_keyword_set)

    missing_set = set(one_shot_keyword_set) - set(one_shot_caption_keywords[3])
    if len(missing_set) > 0:
        logging.log(
            logging.INFO,
            "Flickr30k is missing one-shot keywords: {}".format(missing_set))

    # flickr 30k background data selection and filtering
    # ==================================================

    background_caption_keywords = {}
    background_caption_keywords_full = {}
    for subset in subsets:
        # get list of one-shot keywords and semantically similar keywords
        one_shot_remove_words = keywords.find_similar_keywords(
            caption_keywords[subset],
            one_shot_keyword_set,
            threshold=FLAGS.similarity,
            use_lemma=True,
            spacy_model=FLAGS.spacy_model)
        one_shot_remove_words += one_shot_keyword_set

        # remove one-shot keywords and associated images from filtered keywords
        background_caption_keywords[
            subset] = keywords.filter_remove_keyword_images(
                caption_keywords[subset], one_shot_remove_words)

        # store full version of background data before removing long tail
        background_caption_keywords_full[subset] = copy.deepcopy(
            background_caption_keywords[subset])

        # remove long tail of infrequent keywords by keeping only minimum required
        # keyword-images for one-shot background training (e.g. with meta-learning)
        keep_keywords = keywords.get_count_limited_keywords(
            background_caption_keywords[subset],
            min_occurence=FLAGS.min_occurence,
            use_lemma=True)
        background_caption_keywords[subset] = keywords.filter_keep_keywords(
            background_caption_keywords[subset], keep_keywords)

    # write one-shot evaluation and background keyword-image set splits
    # =================================================================

    # write keyword set splits to data directory
    if FLAGS.mode == "write" or FLAGS.mode == "both":
        file_io.write_csv(  # one-shot evaluation benchmark split
            os.path.join("data", "splits", "flickr30k",
                         "one_shot_evaluation.csv"),
            *one_shot_caption_keywords,
            column_names=["image_uid", "caption_number", "keyword", "lemma"])

        for subset in subsets:
            file_io.write_csv(  # background subset split, one-shot data removed
                os.path.join("data", "splits", "flickr30k",
                             "background_{}.csv".format(subset)),
                *background_caption_keywords[subset],
                column_names=[
                    "image_uid", "caption_number", "keyword", "lemma"
                ])

            file_io.write_csv(  # background subset split (with tail), one-shot data removed
                os.path.join("data", "splits", "flickr30k",
                             "background_full_{}.csv".format(subset)),
                *background_caption_keywords_full[subset],
                column_names=[
                    "image_uid", "caption_number", "keyword", "lemma"
                ])

    # output keywords stats and .. TODO(rpeloff) distribution plots
    if FLAGS.mode == "statistics" or FLAGS.mode == "both":
        keywords.log_keyword_stats(one_shot_caption_keywords,
                                   "one_shot_evaluation")
        for subset in subsets:
            keywords.log_keyword_stats(background_caption_keywords[subset],
                                       "background_{}".format(subset))
            keywords.log_keyword_stats(
                background_caption_keywords_full[subset],
                "background_full_{}".format(subset))

    # save example one-shot evaluation images if specified
    if FLAGS.save_images == "one_shot" or FLAGS.save_images == "both":
        save_keywords = np.asarray(one_shot_keyword_set)

        if FLAGS.num_keywords is not None:
            save_keyword_idx = np.random.choice(np.arange(len(save_keywords)),
                                                FLAGS.num_keywords,
                                                replace=False)
            save_keywords = save_keywords[save_keyword_idx]

        keywords.save_keyword_images(one_shot_caption_keywords,
                                     os.path.join("data", "external",
                                                  "flickr30k_images"),
                                     save_keywords,
                                     os.path.join("figures", "flickr30k",
                                                  "one_shot_keywords"),
                                     max_per_row=5,
                                     max_images=20)

    # save example one-shot background images if specified
    if FLAGS.save_images == "background" or FLAGS.save_images == "both":
        save_keywords = np.unique(background_caption_keywords["train"][3])

        if FLAGS.num_keywords is not None:
            save_keyword_idx = np.random.choice(np.arange(len(save_keywords)),
                                                FLAGS.num_keywords,
                                                replace=False)
            save_keywords = save_keywords[save_keyword_idx]

        keywords.save_keyword_images(background_caption_keywords["train"],
                                     os.path.join("data", "external",
                                                  "flickr30k_images"),
                                     save_keywords,
                                     os.path.join("figures", "flickr30k",
                                                  "background_keywords"),
                                     max_per_row=5,
                                     max_images=20)