evaluate.py

"""Evaluates the model"""

import argparse
import logging
import os

import numpy as np
import torch
from torch.autograd import Variable
import utils
import model.net2 as net
import model.data_loader2 as data_loader

def evaluate(model, loss_fn, dataloader, metrics, params, writer=None, global_step=0):
    """Evaluate the model on `num_steps` batches.

    Args:
        model: (torch.nn.Module) the neural network
        loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch
        dataloader: (DataLoader) a torch.utils.data.DataLoader object that fetches data
        metrics: (dict) a dictionary of functions that compute a metric using the output and labels of each batch
        params: (Params) hyperparameters
        num_steps: (int) number of batches to train on, each of size params.batch_size
    """

    # set model to evaluation mode
    model.eval()

    # summary for current eval loop
    summ = []

    # compute metrics over the dataset
    for data_batch, labels_batch in dataloader:

        # move to GPU if available
        if params.cuda:
            device = torch.device('cuda')
        else:
            device = torch.device('cpu')

        # convert to torch Variables
        dtype = torch.float32 # we will be using float throughout this tutorial
        x, x2, x3 = data_batch
        x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
        x2 = x2.to(device=device, dtype=dtype)
        x3 = x3.to(device=device, dtype=dtype)
        data_batch = (x, x2, x3)

        # data_batch = data_batch.to(device=device, dtype=torch.float)
        labels_batch = labels_batch.to(device=device, dtype=dtype)

        # compute model output
        output_batch = model(data_batch)
        loss = loss_fn(output_batch, labels_batch)

        # extract data from torch Variable, move to cpu, convert to numpy arrays
        output_batch = output_batch.data.cpu().numpy()
        labels_batch = labels_batch.data.cpu().numpy()
        # residual = x3.cpu().numpy()


        # compute all metrics on this batch
        summary_batch = {metric: metrics[metric](output_batch, labels_batch)
                         for metric in metrics}
        summary_batch['loss'] = loss.data.item()
        # summary_batch["explaining_variation"] = np.abs(residual - output_batch)

        summ.append(summary_batch)

    # compute mean of all metrics in summary
    # metrics_mean = {metric:np.mean([x[metric] for x in summ]) for metric in summ[0]}
    metrics_mean = {}
    for metric in summ[0]:
        for x in summ:
            temp = np.mean([x[metric]])
            metrics_mean[metric] = temp

    metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items())
    logging.info("- Eval metrics : " + metrics_string)

    if writer != None:
        for k, v in metrics_mean.items():
            if k != "dollar_value":
                writer.add_scalar(tag=k, global_step=global_step, scalar_value=v)
    return metrics_mean

def runEvaluate(model_dir, data_dir, restore_file):
    """
        Evaluate the model on the test set.
    """

    # Load the parameters

    json_path = os.path.join(model_dir, 'params.json')
    assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path)
    params = utils.Params(json_path)

    # use GPU if available
    params.cuda = torch.cuda.is_available()     # use GPU is available

    # Set the random seed for reproducible experiments
    torch.manual_seed(231)
    if params.cuda: torch.cuda.manual_seed(231)

    # Get the logger
    utils.set_logger(os.path.join(model_dir, 'evaluate.log'))

    # Create the input data pipeline
    logging.info("Creating the dataset...")

    # fetch dataloaders
    dataloaders = data_loader.fetch_dataloader(['test'], data_dir, params)
    test_dl = dataloaders['test']

    logging.info("- done.")

    # Define the model
    model = net.Net(params).cuda() if params.cuda else net.Net(params)

    loss_fn = net.loss_fn
    metrics = net.metrics

    logging.info("Starting evaluation")

    # Reload weights from the saved file
    utils.load_checkpoint(os.path.join(model_dir, restore_file + '.pth.tar'), model)

    # Evaluate
    test_metrics = evaluate(model, loss_fn, test_dl, metrics, params)
    save_path = os.path.join(model_dir, "metrics_test_{}.json".format(restore_file))
    utils.save_dict_to_json(test_metrics, save_path)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_dir', default='data/HOUSES_SPLIT,data/HOUSES_SATELLITE_SPLIT', help="Directory containing the dataset")
    parser.add_argument('--model_dir', default='experiments/MSE/both_images/', help="Directory containing params.json")
    parser.add_argument('--restore_file', default='best', help="name of the file in --model_dir \
                         containing weights to load")

    args = parser.parse_args()
    data_dir = args.data_dir.split(",")
    print(data_dir, "list")
    print(args.model_dir)
    runEvaluate(model_dir=args.model_dir, data_dir=data_dir, restore_file=args.restore_file)