def setup(self, metrics, noatt_gt_path, regions_gt_path, example_ids):
        # Load scorers
        self.scorers = dict()
        for metric in metrics:
            if metric == 'BLEU-1':
                self.scorers[metric] = Bleu(n=1)
            if metric == 'BLEU-2':
                self.scorers[metric] = Bleu(n=2)
            if metric == 'BLEU-3':
                self.scorers[metric] = Bleu(n=3)
            if metric == 'BLEU-4':
                self.scorers[metric] = Bleu(n=4)
            if metric == 'ROUGE-L':
                self.scorers[metric] = Rouge()
            if metric == 'METEOR':
                self.scorers[metric] = Meteor()
            if metric == 'CIDEr':
                self.scorers[metric] = Cider()
            if metric == 'SPICE':
                self.scorers[metric] = Spice()

        # Load all ground truth captions for text metrics evaluation
        with open(noatt_gt_path) as gt_file:
            gts_all_base = PTBTokenizer.tokenize(json.load(gt_file)['gts'])

        # Match our example_ids to the base captions (without the _X)
        self.gts_all = dict()
        for example_id in example_ids:
            self.gts_all[example_id] = gts_all_base[example_id.split('_')[0]]

        if regions_gt_path is not None:
            with open(regions_gt_path) as gt_file:
                gts_regions_base = PTBTokenizer.tokenize(
                    json.load(gt_file)['gts'])

            self.gts_regions = dict()
            for example_id in example_ids:
                self.gts_regions[example_id] = gts_regions_base[example_id]
    score_nw = 0.
    for c in gt_captions[i]:
        score = nw_aligner.score(c, pred_cap)
        score_nw += score
    scores_nw.append(score_nw / len(gt_captions[i]))

    gts[i] = gt_captions[i]
    gen[i] = [pred_cap]

gts_t = PTBTokenizer.tokenize(gts)
gen_t = PTBTokenizer.tokenize(gen)

val_bleu, _ = Bleu(n=4).compute_score(gts_t, gen_t)
method = ['Blue_1', 'Bleu_2', 'Bleu_3', 'Bleu_4']
for metric, score in zip(method, val_bleu):
    print(metric, score)

val_meteor, _ = Meteor().compute_score(gts_t, gen_t)
print('METEOR', val_meteor)

val_rouge, _ = Rouge().compute_score(gts_t, gen_t)
print('ROUGE_L', val_rouge)

val_cider, _ = Cider().compute_score(gts_t, gen_t)
print('CIDEr', val_cider)

val_spice, _ = Spice().compute_score(gts_t, gen_t)
print('SPICE', val_spice)

print('NW Alignment Score', np.mean(scores_nw))