Python store_json_to Examples, shatt.dataset.store_json_to Python Examples

Example #1

0

Show file

File: categorical_embeddings.py Project: phisad/keras-shatt

def categorical_knearest_neighbors_from_model_dir(model_dir, categories, k=10):
    word_sequence = load_json_from(model_dir, "word_sequence.json")
    word_embeddings = load_numpy_from(model_dir, "word_embeddings.npy")
    categories_with_neighbors = categorical_knearest_neighbors(
        categories, word_sequence, word_embeddings, k)
    store_json_to(categories_with_neighbors, model_dir,
                  "category_neighbors.json")

Example #2

0

Show file

File: analyse_external_results.py Project: phisad/keras-shatt

def analyse_file(results_dir,
                 results_file,
                 target_file="extern_results_statistics.json"):
    """
        [{
            "caption": "hot dogs and buns on a hot dog bun",
            "question_info": {
                "answer": "red",
                "answer_type": "other",
                "image_id": 42,
                "question": "What color is the flip flop?",
                "question_id": 421
            },
            "type": "word",
            "caption_in_question" : [],
            "caption_in_answer" : []
        },]
        
        for each entry look if
            caption 
                stemmed words (excluding stop words)
            are contained in 
                stemmed words (excluding stop words)
                of 
                    answer
                    question
                
        (keep track of question infos for control and self-att)
    """
    result_listing = load_json_from(results_dir, results_file)
    statistics = analyse_listing(result_listing)
    store_json_to(statistics, results_dir, target_file)

Example #3

0

Show file

File: evaluate_external_results.py Project: phisad/keras-shatt

def evaluate_file(results_dir, results_file, control_file, selfatt_file, ignore_missing=False):
    """
        [{
            "caption": "hot dogs and buns on a hot dog bun",
            "question_info": {
                "answer": "red",
                "answer_type": "other",
                "image_id": 42,
                "question": "What color is the flip flop?",
                "question_id": 421
            },
            "type": "word"
        },]
        
        for each entry look if
            caption 
                stemmed words (excluding stop words)
            are contained in 
                stemmed words (excluding stop words)
                of 
                    answer
                    question
                
        (keep track of question infos for control and self-att)
    """ 
    result_listing, skip_list = enrich_result_listing(results_dir, results_file, control_file, selfatt_file)
    
    if not ignore_missing:
        skip_list = []
    result_listing = evaluate_results(result_listing, skip_list)
    
    file_name = "extern_results.json"
    if ignore_missing:
        file_name = "extern_results_ignore_missing.json"
    store_json_to(result_listing, results_dir, file_name)

Example #4

0

Show file

File: captions.py Project: phisad/keras-shatt

def __write_unknown_words_file(encoded_captions, vocabulary, directory_path, split_name):
    counter = Counter([w for s in encoded_captions for w in s])
    
    json_content = []
    
    total_count = 0
    for s in encoded_captions:
        if 1 in s:
            total_count = total_count + 1
    json_content.append({"captions with unknown": total_count})
    
    caption_count = {}        
    for s in encoded_captions:
        count = sum([1 for w in s if w == 1])
        if count not in caption_count:
            caption_count[count] = 0
        caption_count[count] = caption_count[count] + 1
    keys = sorted(caption_count.keys())
    for k in keys:
        json_content.append({"unknown per caption " + str(k): caption_count[k]})
    
    for idx, total_count in counter.most_common():
        content = {"word" : vocabulary.tokenizer.index_word[idx], "total_count" : total_count}
        json_content.append(content)
    
    json_content.append(content)
    store_json_to(json_content, directory_or_file=directory_path, lookup_filename="mscoco_caption_counts_unknown_{}.json".format(split_name))

Example #5

0

Show file

File: categorical_embeddings.py Project: phisad/keras-shatt

def compute_word_embeddings(model_dir, epoch):
    path_to_model = to_model_path(model_dir, epoch)
    """ lookup model vocabulary """
    vocabulary = Vocabulary.create_vocabulary_from_vocabulary_json(
        model_dir, "", use_nltk=False)
    """ prepare and load model """
    vinput = Input((196, 512))
    model = create_shatt_model_v2(image_features_graph=(vinput, vinput),
                                  caption_max_length=16,
                                  vocabulary_size=len(vocabulary),
                                  dropout_rate=0.,
                                  start_encoding=vocabulary.get_start_symbol(),
                                  image_features_dimensions=196,
                                  embedding_size=512,
                                  hidden_size=1024,
                                  inference_mode=True,
                                  attention_graph=None,
                                  return_attention=True,
                                  use_max_sampler=True)
    model.load_weights(path_to_model, by_name=True)
    """ establish embedding model """
    layer_name = "shatt_word_embeddings"
    layer = model.get_layer(layer_name)
    if layer == None:
        raise Exception("Cannot find layer with name " + layer_name)
    input_words = Input(shape=(1, ), name="embedding_callback_input_words")
    layer_output = layer(input_words)
    layer_output = Flatten(name="embedding_callback_flatten")(layer_output)
    embedding_model = Model(inputs=input_words, outputs=layer_output)
    """ write metadata.tsv """
    word_sequence = vocabulary.get_word_sequence(padding_symbol="<PAD>")
    store_json_to(word_sequence,
                  model_dir,
                  lookup_filename="word_sequence.json")
    """ encode sequence"""
    encoded_word_sequence = vocabulary.get_encoded_word_sequence(
        include_padding=True)

    sequence = WordSequence(encoded_word_sequence, 64)

    processed_count = 0
    expected_num_batches = sequence.get_num_batches()
    results = []
    try:
        for words in sequence.one_shot_iterator():
            words = np.expand_dims(words, axis=-1)
            word_embeddings = embedding_model.predict_on_batch(words)
            results.extend(word_embeddings)
            processed_count = processed_count + 1
            print(">> Computing word embeddings {:d}/{:d} ({:3.0f}%)".format(
                processed_count, expected_num_batches,
                processed_count / expected_num_batches * 100),
                  end="\r")
    except Exception as e:
        print("Exception: ", e)
    results = np.array(results)
    store_numpy_to(results, model_dir, lookup_file_name="word_embeddings.npy")

Example #6

0

Show file

File: __init__.py Project: phisad/keras-shatt

def compute_categorical_matches_from_model_dir_at_k(
        dataset_dir,
        model_dir,
        epoch,
        experiment_name="attention_fixed",
        do_strict=False,
        do_tight=True,
        k_list=[1]):
    """
        The computation will make use of the categories that are actually involved.
        Therefore all missing categories are ignored.
        
        Furthermore the computation will provide scores for k-nearest categorical words.
        Therefore the word embeddings and nearest neighbors must be computed before.
        
        In addition, the neighbors file provides a flatten view on the categories 
        (bigrams mapped to unigrams) to easier compute the matches via word matching in the caption.
    """
    import logging
    import sys
    logging.basicConfig(level=logging.DEBUG,
                        format="%(message)s",
                        handlers=[
                            logging.FileHandler("{0}/{1}.log".format(
                                model_dir, "categorical_results")),
                            logging.StreamHandler(sys.stdout)
                        ])
    """ the categories also with two-word occurrences to produce the output"""
    categories = load_categories_by_id(dataset_dir, "validate")
    """ the flatten categories with neighbors """
    categories_neighbors = load_json_from(
        model_dir, lookup_filename="category_neighbors.json")
    """ the experimental results for each box and each image """
    box_results_file_name = RESULT_FILE_PATTERN.format("{}_epoch_{:03}".format(
        experiment_name, epoch))
    box_captions = load_json_from(model_dir, box_results_file_name)
    """ the result captions per image on epoch end """
    results_file_name = RESULT_FILE_PATTERN.format(
        "validate_epoch_{:03}".format(epoch))
    alternating_captions = load_json_from(model_dir, results_file_name)

    if do_tight:
        results = compute_filtered_categorical_matches_at_k(
            categories, categories_neighbors, box_captions,
            alternating_captions, do_strict, k_list)
    #else: Never invoked for thesis results
    #results = compute_filtered_categorical_matches_at_k_allowed(categories, categories_neighbors, box_captions, alternating_captions, do_strict, k_list)
    store_json_to(results,
                  model_dir,
                  lookup_filename="categorical_results_all.json")
    # strict meaning that only those that full fill only k or more are stored
    for k in k_list:
        store_json_to(
            [r for r in results if is_correct_at_k(r, k, k_list, strict=True)],
            model_dir,
            lookup_filename="categorical_results_correct_at_{}.json".format(k))

Example #7

0

Show file

File: results.py Project: phisad/keras-shatt

 def write_results_file(self, vocabulary, target_path, file_infix):
     """
         Results Format
         results = [result]
         
         result {
         "image_id": int,
         "caption": str
         }
     """
     try:
         if os.path.isdir(target_path):
             directory_path = target_path
         else:
             directory_path = os.path.dirname(target_path)
             if directory_path == "":
                 directory_path = "."
         results = [{
             "image_id": str(image_id),
             "caption": predicted_caption
         } for (image_id, predicted_caption
                ) in self.get_captions_by_image_id(vocabulary).items()]
         return store_json_to(
             results, directory_path,
             "mscoco_caption_shatt_{}_results.json".format(file_infix))
     except Exception as e:
         print("Cannot write results file: " + str(e))

Example #8

0

Show file

File: boxes.py Project: phisad/keras-shatt

def store_prepared_boxes_as_file(boxes,
                                 target_directory_path_or_file,
                                 split_name=None):
    lookup_filename = DEFAULT_PREPARED_BOXES_FILE_NAME
    if split_name:
        lookup_filename = DEFAULT_PREPARED_BOXES_SPLIT_FILE_NAME_PATTERN.format(
            split_name)
    return store_json_to(boxes, target_directory_path_or_file, lookup_filename)

Example #9

0

Show file

File: boxes.py Project: phisad/keras-shatt

def store_categories_as_file(categories,
                             target_directory_path_or_file,
                             split_name=None):
    lookup_filename = DEFAULT_CATEGORIES_FILE_NAME
    if split_name:
        lookup_filename = DEFAULT_CATEGORIES_SPLIT_FILE_NAME_PATTERN.format(
            split_name)
    return store_json_to(categories, target_directory_path_or_file,
                         lookup_filename)

Example #10

0

Show file

 def write_results_file(self, target_path, file_infix):
     try:
         if os.path.isdir(target_path):
             directory_path = target_path
         else:
             directory_path = os.path.dirname(target_path)
             if directory_path == "":
                 directory_path = "."
         final_results = self.to_final_results()
         return store_json_to(final_results, directory_path, RESULT_FILE_PATTERN.format(file_infix))
     except Exception as e:
         print("Cannot write results file: " + str(e))

Example #11

0

Show file

def analyse_capability(captions, categories, name, model_dir):
    category_names = set([w for c in categories for w in c["name"].split(" ")])
    word_counter = Counter(
        [w for c in captions for w in c if w in category_names])

    print(name, "Total", len(category_names))
    missing_cats = [c for c in category_names if c not in word_counter]
    print(name, "Existing", len(category_names) - len(missing_cats))
    print(name, "Missing", len(missing_cats), missing_cats)

    analysed_categories = []
    for c in categories:
        analysed_category = {}
        analysed_category["category"] = c
        for w in c["name"].split(" "):
            analysed_category["count_{}".format(w)] = word_counter[
                w]  # is missing when count = 0
        analysed_categories.append(analysed_category)
    store_json_to(
        analysed_categories,
        model_dir,
        lookup_filename="categorical_capability_{}.json".format(name))

Example #12

0

Show file

def store_tokenizer(tokenizer, target_directory_path_or_file, split_name):
    lookup_filename = DEFAULT_VOCABULARY_FILE_NAME
    if split_name:    
        lookup_filename = "mscoco_vocabulary_{}.json".format(split_name) 
    return store_json_to(json.loads(tokenizer.to_json()), target_directory_path_or_file, lookup_filename)