def compute_score(gts, val_caps, train_imgids, val_imgids, i, j): res = {} for imgid in train_imgids: res[imgid] = [val_caps[val_imgids[i]][j]] scorer = Cider() score, scores = scorer.compute_score(gts, res, train_imgids) #print(score) #print(len(scores)) return np.array(scores)
class CaptionEvaluater(object): def __init__(self, ): self.blue_scorer = Bleu(4) self.rouge_scorer = Rouge() self.cider_scorer = Cider() self.truth = None remove = string.punctuation + "、。,." self.remove_pattern = r"[{}]".format(remove) # create the pattern def remove_punctuation(self, line): #I am not sure how unicode works in python, so just in case. line = line.replace(u"<unk>", "") line = line.replace("<unk>", "") line = line.replace(u"。", "") line = line.replace('\u3002', "") return re.sub(self.remove_pattern, "", line) def trnasform_utf8(self, line): # return u' '.join(line).encode('utf-8').strip() return line def set_ground_truth(self, ground_truth): ''' ground_truth should be a python dictonary whose shape is; {"image_identifier": ["a caption", "a similar caption", ...], ...} "image_identifier" can be either string or number. ''' for img in ground_truth: # ground_truth[img]=map(self.trnasform_utf8,ground_truth[img]) ground_truth[img] = map(self.remove_punctuation, ground_truth[img]) self.truth = ground_truth def evaluate(self, predicetd_captions): ''' predicetd_captions should be a python dictonary whose shape is; {"image_identifier": ["the prediced caption"], ...} "image_identifier" need to be same as used in ground truth. make sure the number of caption is only one, even though it uses python list. ''' for img in predicetd_captions: # predicetd_captions[img]=map(self.trnasform_utf8,predicetd_captions[img]) predicetd_captions[img] = map(self.remove_punctuation, predicetd_captions[img]) results = {} for i, score in enumerate(self.get_bleu(predicetd_captions)[0]): results["bleu-%d" % i] = score results["rouge"] = self.get_rouge(predicetd_captions)[0] results["cider"] = self.get_cider(predicetd_captions)[0] return results def get_bleu(self, predicetd_captions): score, scores = self.blue_scorer.compute_score(self.truth, predicetd_captions) #output is a python list [bleu-1,bleu-2,bleu-3,bleu-4] return score, scores def get_rouge(self, predicetd_captions): score, scores = self.rouge_scorer.compute_score( self.truth, predicetd_captions) return score, scores def get_cider(self, predicetd_captions): score, scores = self.cider_scorer.compute_score( self.truth, predicetd_captions) return score, scores
def calc_cider(gts, res): cider = Cider() score, scores = cider.compute_score(gts, res) return score, scores
print('Bleu-4: %f' % score_bleu_4) print('Blue-Cumulative: %f' % score_bleu_C) # CIDer Scores ref_dict = { '1': ['the quick brown fox jumped over the lazy dog'], '2': ['test test test test'], '3': ['here is one sentence'] } hyp_dict = { '1': ['the fast brown fox jumped over the lazy dog'], '2': ['test test test test'], '3': ['this statement shares no words'] } cider_eval = Cider() cider_score = cider_eval.compute_score(ref_dict, hyp_dict) print('Cider Average: %f' % (cider_score[0] / 10)) print('Cider sample 1: %f' % (cider_score[1][0] / 10)) print('Cider sample 2: %f' % (cider_score[1][1] / 10)) print('Cider sample 3: %f' % (cider_score[1][2] / 10)) # METEOR Scores gateway = JavaGateway() meteor_eval = gateway.entry_point # meteor_eval = Meteor # ref_str = "the quick brown fox jumped over the lazy dog" # hyp_str = "the fast brown fox jumped over the lazy dog" ref_str = "Man on ocean beach flying several kites on windy day" hyp_str = "a person on a beach flying a kite" meteor_score = meteor_eval.compute_score(hyp_str, ref_str) print('Meteor: %f' % meteor_score)
class CaptionExtractor: def __init__(self): logging.info("New 'CaptionExtractor' instance has been initialized.") # Variables for computing metrics and performing transformations self.stemmer = nltk.stem.WordNetLemmatizer() self.vectorizer = CountVectorizer() # Variables related to assisting in the generating guidance captions self.captions = helpers.get_data('captions') self.cider = Cider(n=FLAGS.ngrams) # ETL if len(self.captions.keys()) == 0: self.annotations_data, self.images_data = self.get_annotations() self.make_caption_representations() # Save the dictionary for future use helpers.save_obj(self.captions, 'captions') @staticmethod # Remove anything that is not a character or space in a sentence (list of words) def clean_sentence(sentence): return re.sub(r'[^\w\s]', '', sentence) @staticmethod # Retrieve the MSCOCO annotations and images data in the form of dictionaries def get_annotations(path=helpers.get_captions_path()): with open(path) as data_file: data = json.load(data_file) return data['annotations'], data['images'] @staticmethod # Make a word lowercased and stem it def stem_word(stemmer, word): return stemmer.lemmatize(word.lower()) def get_guidance_caption(self, nearest_neighbors, inference=False): """ Return the guidance caption for each example in a batch :param nearest_neighbors: set of nearest neighbors for a batch of images [batch size, FLAGS.n] :param inference: whether or not this is for inference (vs training) :return: guidance caption for each example of shape [batch size, 1] """ with Manager() as manager: guidance_caption = manager.list(range(FLAGS.batch_size)) def stem(extractor, stemmer, caption): caption = extractor.tokenize_sentence(stemmer, caption) caption = ' '.join(caption) caption = extractor.clean_sentence(caption) return caption # Get the guidance caption for a particular example given its nearest neighboring image filenames def get_example_guidance(neighbors, index): stemmer = nltk.stem.WordNetLemmatizer() # Filter full captions list to get captions relevant to our neighbors neighbors = [ os.path.basename(neighbor.decode('UTF-8')) for neighbor in neighbors ] captions = { k: v for k, v in self.captions.items() if k in neighbors } # Flatten candidate captions into one list and stem all their words candidates = list(itertools.chain(*captions.values())) candidates = [ stem(self, stemmer, candidate) for candidate in candidates ] # Compute CIDEr scores in parallel with Manager() as cider_manager: total_scores = cider_manager.dict() cider_threads = [] cider_lock = Lock() def update_scores(c): ref = {filename: [c] for filename in captions.keys()} score, _ = self.cider.compute_score(captions, ref) with cider_lock: total_scores[c] = score for candidate in candidates: ct = Process(target=update_scores, args=(candidate, )) ct.start() cider_threads.append(ct) [ct.join() for ct in cider_threads] scores = [value for value in total_scores.values()] if inference: # Select the highest scoring caption score_index = scores.index(max(scores)) guidance = candidates[score_index] else: # Select a random caption from the top k to prevent over-fitting during learning k = FLAGS.k if len(scores) >= FLAGS.k else len(scores) indices = np.argpartition(scores, -k)[-k:] top_captions = [ candidates[top_index] for top_index in indices ] guidance = top_captions[random.randint(0, k - 1)] guidance_caption[index] = guidance # Iterate through each example's candidate captions and select the appropriate guidance caption threads = [] for i, n in enumerate(nearest_neighbors): t = Process(target=get_example_guidance, args=( n, i, )) t.start() threads.append(t) [t.join() for t in threads] return list(guidance_caption) # Create a dictionary storing training image names with their associated captions def make_caption_representations(self): # Iterate through the annotations data and find all captions belonging to our image for image in self.images_data: for annotation in self.annotations_data: filename = image['file_name'] image_id = image['id'] if annotation['image_id'] == image_id: if filename not in self.captions: self.captions[filename] = [] self.captions[filename].append(annotation['caption']) # Tokenize a given sentence def tokenize_sentence(self, stemmer, sentence): return [ self.stem_word(stemmer, word) for word in nltk.word_tokenize(sentence) ]