Example #1
0
def make_meters():  
    meters_dict = {
        'loss': logger.AvgMeter(),
        'acc1': logger.AvgMeter(),
        'acc5': logger.AvgMeter(),
        'batch_time': logger.AvgMeter(),
        'data_time': logger.AvgMeter(),
        'epoch_time': logger.SumMeter()
    }
    return meters_dict
Example #2
0
def construct_logger(name: str) -> logger.Experiment:
    exp_logger = logger.Experiment(name)
    exp_logger.add_meters(
        "test",
        {
            "acc1": logger.AvgMeter(),
            "acc5": logger.AvgMeter(),
            "acc10": logger.AvgMeter(),
            "bleu_score": logger.AvgMeter(),
            "batch_time": logger.AvgMeter(),
        },
    )
    return exp_logger
def make_meters():  
    meters_dict = {
        'loss': logger.AvgMeter(),
        'loss_a': logger.AvgMeter(),
        'loss_q': logger.AvgMeter(),
        'batch_time': logger.AvgMeter(),
        'data_time': logger.AvgMeter(),
        'epoch_time': logger.SumMeter(), 
        'bleu_score': logger.AvgMeter(), 
        'acc1': logger.AvgMeter(),
        'acc5': logger.AvgMeter(),
        'acc10': logger.AvgMeter(),
        'dual_loss': logger.AvgMeter(),
    }
    return meters_dict
Example #4
0
def make_meters():
    meters_dict = {
        "loss": logger.AvgMeter(),
        "loss_a": logger.AvgMeter(),
        "loss_q": logger.AvgMeter(),
        "batch_time": logger.AvgMeter(),
        "data_time": logger.AvgMeter(),
        "epoch_time": logger.SumMeter(),
        "bleu_score": logger.AvgMeter(),
        "acc1": logger.AvgMeter(),
        "acc5": logger.AvgMeter(),
        "acc10": logger.AvgMeter(),
        "dual_loss": logger.AvgMeter(),
    }
    return meters_dict
Example #5
0
def evaluate(loader, model, logger, print_freq=10, sampling_num=5):
    model.eval()
    model.module.set_testing(True, sample_num=sampling_num)
    meters = logger.reset_meters('test')

    meters['Bleu_1'] = logger2.AvgMeter()
    meters['Bleu_2'] = logger2.AvgMeter()
    meters['Bleu_3'] = logger2.AvgMeter()
    meters['Bleu_4'] = logger2.AvgMeter()
    meters['METEOR'] = logger2.AvgMeter()
    meters['ROUGE_L'] = logger2.AvgMeter()
    meters['CIDEr'] = logger2.AvgMeter()

    results = []
    end = time.time()
    blue_score_all = 0
    for i, sample in enumerate(loader):
        batch_size = sample['visual'].size(0)
        # measure data loading time
        input_visual = Variable(sample['visual'].cuda(async=True),
                                volatile=True)
        input_answer = Variable(sample['answer'].cuda(async=True),
                                volatile=True)
        target_answer = sample['answer']
        input_question = Variable(sample['question'].cuda(async=True),
                                  volatile=True)
        nlg_metrics = calculate_nlg_score(generated_q.cpu().data,
                                          sample['question'],
                                          loader.dataset.wid_to_word)

        output_answer, g_answers, g_answers_score, generated_q = model(
            input_visual, input_question, input_answer)
        bleu_score = calculate_bleu_score(generated_q.cpu().data,
                                          sample['question'],
                                          loader.dataset.wid_to_word)
        acc1, acc5, acc10 = utils.accuracy(output_answer.cpu().data,
                                           target_answer,
                                           topk=(1, 5, 10))
        meters['acc1'].update(acc1[0], n=batch_size)
        meters['acc5'].update(acc5[0], n=batch_size)
        meters['acc10'].update(acc10[0], n=batch_size)
        meters['bleu_score'].update(bleu_score, n=batch_size)
        meters['Bleu_1'].update(nlg_metrics['Bleu_1'], n=batch_size)
        meters['Bleu_2'].update(nlg_metrics['Bleu_2'], n=batch_size)
        meters['Bleu_3'].update(nlg_metrics['Bleu_3'], n=batch_size)
        meters['Bleu_4'].update(nlg_metrics['Bleu_4'], n=batch_size)
        meters['METEOR'].update(nlg_metrics['METEOR'], n=batch_size)
        meters['ROUGE_L'].update(nlg_metrics['ROUGE_L'], n=batch_size)
        meters['CIDEr'].update(nlg_metrics['CIDEr'], n=batch_size)

        g_answers = g_answers.cpu().data
        g_answers_score = g_answers_score.cpu().data

        for j in range(batch_size):
            new_question = generated_q.cpu().data[j].tolist()
            new_answer = g_answers[j]
            new_answer_score = g_answers_score[j]
            sampled_aqa = [
                [new_question, new_answer, new_answer_score],
            ]
            num_result = {
                'gt_question': sample['question'][j]
                [1:].tolist(),  #sample['question'][j].numpy(),
                'gt_answer': sample['answer'][j],
                'augmented_qa': sampled_aqa,
            }
            readable_result = {
                'gt_question':
                translate_tokens(sample['question'][j][1:],
                                 loader.dataset.wid_to_word),
                'gt_answer':
                loader.dataset.aid_to_ans[sample['answer'][j]],
                'augmented_qa': [
                    [
                        translate_tokens(
                            item[0],
                            loader.dataset.wid_to_word),  # translate question
                        loader.dataset.aid_to_ans[item[1]],  # translate answer
                    ] for item in sampled_aqa
                ],
            }
            results.append(
                {
                    'image': sample['image'][j],
                    'numeric_result': num_result,
                    'readable_result': readable_result
                }, )
        # measure elapsed time
        meters['batch_time'].update(time.time() - end, n=batch_size)
        end = time.time()

    print('* [Evaluation] Result: Acc@1:{acc1.avg:.3f}\t'
          'Acc@5:{acc5.avg:.3f}\tAcc@10:{acc10.avg:.3f}\t'
          'Time: {batch_time.avg:.3f}\t'
          'Bleu_1: {Bleu_1.avg:.5f}\t'
          'Bleu_2: {Bleu_2.avg:.5f}\t'
          'Bleu_3: {Bleu_3.avg:.5f}\t'
          'Bleu_4: {Bleu_4.avg:.5f}\t'
          'METEOR: {METEOR.avg:.5f}\t'
          'ROUGE_L: {ROUGE_L.avg:.5f}\t '
          'CIDEr: {CIDEr.avg:.5f}\t'
          'BLEU: {bleu_score.avg:.5f}'.format(acc1=meters['acc1'],
                                              acc5=meters['acc5'],
                                              acc10=meters['acc10'],
                                              batch_time=meters['batch_time'],
                                              Bleu_1=meters['Bleu_1'],
                                              Bleu_2=meters['Bleu_2'],
                                              Bleu_3=meters['Bleu_3'],
                                              Bleu_4=meters['Bleu_4'],
                                              METEOR=meters['METEOR'],
                                              ROUGE_L=meters['ROUGE_L'],
                                              CIDEr=meters['CIDEr'],
                                              bleu_score=meters['bleu_score']))

    model.module.set_testing(False)
    return results