예제 #1
0
def compute_score(gts, val_caps, train_imgids, val_imgids, i, j):
    res = {}
    for imgid in train_imgids:
        res[imgid] = [val_caps[val_imgids[i]][j]]

    scorer = Cider()
    score, scores = scorer.compute_score(gts, res, train_imgids)
    #print(score)
    #print(len(scores))
    return np.array(scores)
예제 #2
0
class CaptionEvaluater(object):
    def __init__(self, ):
        self.blue_scorer = Bleu(4)
        self.rouge_scorer = Rouge()
        self.cider_scorer = Cider()
        self.truth = None
        remove = string.punctuation + "、。,."
        self.remove_pattern = r"[{}]".format(remove)  # create the pattern

    def remove_punctuation(self, line):
        #I am not sure how unicode works in python, so just in case.
        line = line.replace(u"<unk>", "")
        line = line.replace("<unk>", "")
        line = line.replace(u"。", "")
        line = line.replace('\u3002', "")
        return re.sub(self.remove_pattern, "", line)

    def trnasform_utf8(self, line):
        # return u' '.join(line).encode('utf-8').strip()
        return line

    def set_ground_truth(self, ground_truth):
        '''
        ground_truth should be a python dictonary whose shape is; 
            {"image_identifier": ["a caption", "a similar caption", ...], ...}
        "image_identifier" can be either string or number.
        '''
        for img in ground_truth:
            # ground_truth[img]=map(self.trnasform_utf8,ground_truth[img])
            ground_truth[img] = map(self.remove_punctuation, ground_truth[img])
        self.truth = ground_truth

    def evaluate(self, predicetd_captions):
        '''
        predicetd_captions should be a python dictonary whose shape is; 
            {"image_identifier": ["the prediced caption"], ...}
        "image_identifier" need to be same as used in ground truth.
        make sure the number of caption is only one, even though it uses python list. 
        '''
        for img in predicetd_captions:
            # predicetd_captions[img]=map(self.trnasform_utf8,predicetd_captions[img])
            predicetd_captions[img] = map(self.remove_punctuation,
                                          predicetd_captions[img])

        results = {}
        for i, score in enumerate(self.get_bleu(predicetd_captions)[0]):
            results["bleu-%d" % i] = score
        results["rouge"] = self.get_rouge(predicetd_captions)[0]
        results["cider"] = self.get_cider(predicetd_captions)[0]

        return results

    def get_bleu(self, predicetd_captions):
        score, scores = self.blue_scorer.compute_score(self.truth,
                                                       predicetd_captions)
        #output is a python list [bleu-1,bleu-2,bleu-3,bleu-4]
        return score, scores

    def get_rouge(self, predicetd_captions):
        score, scores = self.rouge_scorer.compute_score(
            self.truth, predicetd_captions)
        return score, scores

    def get_cider(self, predicetd_captions):
        score, scores = self.cider_scorer.compute_score(
            self.truth, predicetd_captions)
        return score, scores
예제 #3
0
def calc_cider(gts, res):
    cider = Cider()
    score, scores = cider.compute_score(gts, res)
    return score, scores
예제 #4
0
print('Bleu-4: %f' % score_bleu_4)
print('Blue-Cumulative: %f' % score_bleu_C)

# CIDer Scores
ref_dict = {
    '1': ['the quick brown fox jumped over the lazy dog'],
    '2': ['test test test test'],
    '3': ['here is one sentence']
}
hyp_dict = {
    '1': ['the fast brown fox jumped over the lazy dog'],
    '2': ['test test test test'],
    '3': ['this statement shares no words']
}
cider_eval = Cider()
cider_score = cider_eval.compute_score(ref_dict, hyp_dict)
print('Cider Average: %f' % (cider_score[0] / 10))
print('Cider sample 1: %f' % (cider_score[1][0] / 10))
print('Cider sample 2: %f' % (cider_score[1][1] / 10))
print('Cider sample 3: %f' % (cider_score[1][2] / 10))

# METEOR Scores
gateway = JavaGateway()
meteor_eval = gateway.entry_point
# meteor_eval = Meteor
# ref_str = "the quick brown fox jumped over the lazy dog"
# hyp_str = "the fast brown fox jumped over the lazy dog"
ref_str = "Man on ocean beach flying several kites on windy day"
hyp_str = "a person on a beach flying a kite"
meteor_score = meteor_eval.compute_score(hyp_str, ref_str)
print('Meteor: %f' % meteor_score)
class CaptionExtractor:
    def __init__(self):
        logging.info("New 'CaptionExtractor' instance has been initialized.")

        # Variables for computing metrics and performing transformations
        self.stemmer = nltk.stem.WordNetLemmatizer()
        self.vectorizer = CountVectorizer()

        # Variables related to assisting in the generating guidance captions
        self.captions = helpers.get_data('captions')
        self.cider = Cider(n=FLAGS.ngrams)

        # ETL
        if len(self.captions.keys()) == 0:
            self.annotations_data, self.images_data = self.get_annotations()
            self.make_caption_representations()

            # Save the dictionary for future use
            helpers.save_obj(self.captions, 'captions')

    @staticmethod
    # Remove anything that is not a character or space in a sentence (list of words)
    def clean_sentence(sentence):
        return re.sub(r'[^\w\s]', '', sentence)

    @staticmethod
    # Retrieve the MSCOCO annotations and images data in the form of dictionaries
    def get_annotations(path=helpers.get_captions_path()):
        with open(path) as data_file:
            data = json.load(data_file)
            return data['annotations'], data['images']

    @staticmethod
    # Make a word lowercased and stem it
    def stem_word(stemmer, word):
        return stemmer.lemmatize(word.lower())

    def get_guidance_caption(self, nearest_neighbors, inference=False):
        """
        Return the guidance caption for each example in a batch

        :param nearest_neighbors: set of nearest neighbors for a batch of images [batch size, FLAGS.n]
        :param inference: whether or not this is for inference (vs training)
        :return: guidance caption for each example of shape [batch size, 1]
        """

        with Manager() as manager:
            guidance_caption = manager.list(range(FLAGS.batch_size))

            def stem(extractor, stemmer, caption):
                caption = extractor.tokenize_sentence(stemmer, caption)
                caption = ' '.join(caption)
                caption = extractor.clean_sentence(caption)
                return caption

            # Get the guidance caption for a particular example given its nearest neighboring image filenames
            def get_example_guidance(neighbors, index):
                stemmer = nltk.stem.WordNetLemmatizer()

                # Filter full captions list to get captions relevant to our neighbors
                neighbors = [
                    os.path.basename(neighbor.decode('UTF-8'))
                    for neighbor in neighbors
                ]
                captions = {
                    k: v
                    for k, v in self.captions.items() if k in neighbors
                }

                # Flatten candidate captions into one list and stem all their words
                candidates = list(itertools.chain(*captions.values()))
                candidates = [
                    stem(self, stemmer, candidate) for candidate in candidates
                ]

                # Compute CIDEr scores in parallel
                with Manager() as cider_manager:
                    total_scores = cider_manager.dict()
                    cider_threads = []
                    cider_lock = Lock()

                    def update_scores(c):
                        ref = {filename: [c] for filename in captions.keys()}
                        score, _ = self.cider.compute_score(captions, ref)

                        with cider_lock:
                            total_scores[c] = score

                    for candidate in candidates:
                        ct = Process(target=update_scores, args=(candidate, ))
                        ct.start()
                        cider_threads.append(ct)

                    [ct.join() for ct in cider_threads]
                    scores = [value for value in total_scores.values()]

                if inference:
                    # Select the highest scoring caption
                    score_index = scores.index(max(scores))
                    guidance = candidates[score_index]
                else:
                    # Select a random caption from the top k to prevent over-fitting during learning
                    k = FLAGS.k if len(scores) >= FLAGS.k else len(scores)
                    indices = np.argpartition(scores, -k)[-k:]
                    top_captions = [
                        candidates[top_index] for top_index in indices
                    ]
                    guidance = top_captions[random.randint(0, k - 1)]

                guidance_caption[index] = guidance

            # Iterate through each example's candidate captions and select the appropriate guidance caption
            threads = []
            for i, n in enumerate(nearest_neighbors):
                t = Process(target=get_example_guidance, args=(
                    n,
                    i,
                ))
                t.start()
                threads.append(t)

            [t.join() for t in threads]
            return list(guidance_caption)

    # Create a dictionary storing training image names with their associated captions
    def make_caption_representations(self):
        # Iterate through the annotations data and find all captions belonging to our image
        for image in self.images_data:
            for annotation in self.annotations_data:
                filename = image['file_name']
                image_id = image['id']

                if annotation['image_id'] == image_id:
                    if filename not in self.captions:
                        self.captions[filename] = []

                    self.captions[filename].append(annotation['caption'])

    # Tokenize a given sentence
    def tokenize_sentence(self, stemmer, sentence):
        return [
            self.stem_word(stemmer, word)
            for word in nltk.word_tokenize(sentence)
        ]