Ejemplo n.º 1
0
def compute_loss(labels,
                 output,
                 src,
                 criterion,
                 validation_dataset,
                 probabilistic=None,
                 output_std=None,
                 m=1):
    # Warning this assumes src target is 1-D
    if not probabilistic and isinstance(output, torch.Tensor):
        if len(labels.shape) != len(output.shape):
            print(labels.shape)
            print(output.shape)
            if len(labels.shape) > 1:
                if labels.shape[1] == output.shape[1]:
                    labels = labels.unsqueeze(2)
                else:
                    labels = labels.unsqueeze(0)
    if probabilistic:
        if type(output_std) != torch.Tensor:
            print("Converted")
            output_std = torch.from_numpy(output_std)
        if type(output) != torch.Tensor:
            output = torch.from_numpy(output)
        output_dist = torch.distributions.Normal(output, output_std)
    if validation_dataset:
        if probabilistic:
            unscaled_out = validation_dataset.inverse_scale(output)
            try:
                output_std = numpy_to_tvar(output_std)
            except Exception:
                pass
            output_dist = torch.distributions.Normal(unscaled_out, output_std)
        else:
            output = validation_dataset.inverse_scale(output.cpu())
            labels = validation_dataset.inverse_scale(labels.cpu())
            src = validation_dataset.inverse_scale(src.cpu())
    if probabilistic:
        loss = -output_dist.log_prob(labels.float()).sum()  # FIX THIS
        loss = loss.numpy()
    elif isinstance(criterion, GaussianLoss):
        g_loss = GaussianLoss(output[0], output[1])
        loss = g_loss(labels)
    elif isinstance(criterion, MASELoss):
        assert len(labels.shape) == len(output.shape)
        loss = criterion(labels.float(), output, src, m)
    else:
        assert len(labels.shape) == len(output.shape)
        assert labels.shape[0] == output.shape[0]
        loss = criterion(output, labels.float())
    return loss
Ejemplo n.º 2
0
def torch_single_train(model: PyTorchForecast,
                       opt: optim.Optimizer,
                       criterion: Type[torch.nn.modules.loss._Loss],
                       data_loader: DataLoader,
                       takes_target: bool,
                       meta_data_model: PyTorchForecast,
                       meta_data_model_representation: torch.Tensor,
                       forward_params: Dict = {}) -> float:
    i = 0
    running_loss = 0.0
    for src, trg in data_loader:
        opt.zero_grad()
        # Convert to CPU/GPU/TPU
        src = src.to(model.device)
        trg = trg.to(model.device)
        # TODO figure how to avoid
        if meta_data_model:
            representation = meta_data_model.model.generate_representation(
                meta_data_model_representation)
            forward_params["meta_data"] = representation
        if takes_target:
            forward_params["t"] = trg
        output = model.model(src, **forward_params)
        labels = trg[:, :, 0]
        if isinstance(criterion, GaussianLoss):
            g_loss = GaussianLoss(output[0], output[1])
            loss = g_loss(labels)
        else:
            loss = criterion(output, labels.float())
        # TODO fix Guassian loss
        if loss > 100:
            print("Warning: high loss detected")
        loss.backward()
        opt.step()
        if torch.isnan(loss) or loss == float('inf'):
            raise ValueError(
                "Error infinite or NaN loss detected. Try normalizing data or performing interpolation"
            )
        running_loss += loss.item()
        i += 1
    print("The running loss is:")
    print(running_loss)
    print("The number of items in train is: ")
    print(i)
    total_loss = running_loss / float(i)
    return total_loss
Ejemplo n.º 3
0
def compute_loss(labels,
                 output,
                 src,
                 criterion,
                 validation_dataset,
                 probabilistic=None,
                 output_std=None,
                 m=1):
    """Function for computing the loss

    :param labels: The real values for the target. Shape can be variable but should follow (batch_size, time)
    :type labels: torch.Tensor
    :param output: The output of the model
    :type output: torch.Tensor
    :param src: The source values (only really needed for the MASELoss function)
    :type src: torch.Tensor
    :param criterion: [description]
    :type criterion: [type]
    :param validation_dataset: Only passed when unscaling of data is needed.
    :type validation_dataset: torch.utils.data.dataset
    :param probabilistic: Whether the model is a probabalistic returns a distribution, defaults to None
    :type probabilistic: [type], optional
    :param output_std: The standard distribution, defaults to None
    :type output_std: [type], optional
    :param m: The number of targs defaults to 1
    :type m: int, optional
    :return: Returns the computed loss
    :rtype: float
"""
    if isinstance(criterion, GaussianLoss):
        if len(output[0].shape) > 2:
            g_loss = GaussianLoss(output[0][:, :, 0], output[1][:, :, 0])
        else:
            g_loss = GaussianLoss(output[0][:, 0], output[1][:, 0])
        loss = g_loss(labels)
        return loss
    if not probabilistic and isinstance(output, torch.Tensor):
        if len(labels.shape) != len(output.shape):
            if len(labels.shape) > 1:
                if labels.shape[1] == output.shape[1]:
                    labels = labels.unsqueeze(2)
                else:
                    labels = labels.unsqueeze(0)
    if probabilistic:
        if type(output_std) != torch.Tensor:
            print("Converted tensor")
            output_std = torch.from_numpy(output_std)
        if type(output) != torch.Tensor:
            output = torch.from_numpy(output)
        output_dist = torch.distributions.Normal(output, output_std)
    if validation_dataset:
        src, output, labels, output_dist = handle_scaling(
            validation_dataset, src, output, labels, probabilistic, m,
            output_std)
    if probabilistic:
        loss = -output_dist.log_prob(labels.float()).sum()  # FIX THIS?
    elif isinstance(criterion, MASELoss):
        assert len(labels.shape) == len(output.shape)
        loss = criterion(labels.float(), output, src, m)
    else:
        assert len(labels.shape) == len(output.shape)
        assert labels.shape[0] == output.shape[0]
        loss = criterion(output, labels.float())
    return loss
Ejemplo n.º 4
0
def evaluate_model(
    model: Type[TimeSeriesModel],
    model_type: str,
    target_col: List[str],
    evaluation_metrics: List,
    inference_params: Dict,
    eval_log: Dict,
) -> Tuple[Dict, pd.DataFrame, int, pd.DataFrame]:
    """
    A function to evaluate a model. Called automatically at end of training.
    Can be imported for continuing to evaluate a model in other places as well.


    .. highlight:: python
    .. code-block:: python

        from flood_forecast.evaluator import evaluate_model
        forecast_model = PyTorchForecast()
        evaluate_model(forecast_model, )
        ...
    '''
    """
    if model_type == "PyTorch":
        (
            df_train_and_test,
            end_tensor,
            forecast_history,
            forecast_start_idx,
            test_data,
            df_predictions,
            # df_prediction_samples_std_dev,
        ) = infer_on_torch_model(model, **inference_params)
        # To-do turn this into a general function
        g_loss = False
        probablistic = True if "probabilistic" in inference_params else False
        if isinstance(end_tensor, tuple) and not probablistic:
            end_tensor_0 = end_tensor[1]
            end_tensor = end_tensor[0]
            g_loss = True
        print("transform end tens preform")
        if test_data.scale:
            print("Un-transforming data")
            if probablistic:
                print('probabilistic running on infer_on_torch_model')
                end_tensor_mean = test_data.inverse_scale(end_tensor[0].detach().reshape(-1, 1))
                end_tensor_list = flatten_list_function(end_tensor_mean.numpy().tolist())
                end_tensor_mean = end_tensor_mean.squeeze(1)
            else:
                if "n_targets" in model.params:
                    end_tensor = test_data.inverse_scale(end_tensor.detach())
                else:
                    end_tensor = test_data.inverse_scale(end_tensor.detach().reshape(-1, 1))
                end_tensor_list = flatten_list_function(end_tensor.numpy().tolist())
                end_tensor = end_tensor.squeeze(1)  # Removing extra dim from reshape?
            history_length = model.params["dataset_params"]["forecast_history"]
            if "n_targets" in model.params:
                df_train_and_test.loc[df_train_and_test.index[history_length:],
                                      "preds"] = end_tensor[:, 0].numpy().tolist()
                for i, target in enumerate(target_col):
                    df_train_and_test["pred_" + target] = 0
                    df_train_and_test.loc[df_train_and_test.index[history_length:],
                                          "pred_" + target] = end_tensor[:, i].numpy().tolist()
            else:
                df_train_and_test.loc[df_train_and_test.index[history_length:], "preds"] = end_tensor_list
                df_train_and_test["pred_" + target_col[0]] = 0
                df_train_and_test.loc[df_train_and_test.index[history_length:],
                                      "pred_" + target_col[0]] = end_tensor_list
        print("Current historical dataframe ")
        print(df_train_and_test)
    for evaluation_metric in model.crit:
        idx = 0
        for target in target_col:
            labels = torch.from_numpy(df_train_and_test[target][forecast_history:].to_numpy())
            evaluation_metric_function = evaluation_metric
            if "probabilistic" in inference_params:
                s = evaluation_metric_function(
                    torch.distributions.Normal(end_tensor[0], end_tensor[1][0]),
                    labels,
                )
            elif isinstance(evaluation_metric_function, MASELoss):
                s = evaluation_metric_function(
                    labels,
                    end_tensor,
                    torch.from_numpy(
                        df_train_and_test[target][:forecast_history].to_numpy()
                    )
                )
            elif g_loss:
                g = GaussianLoss(end_tensor.unsqueeze(1), end_tensor_0.unsqueeze(1))
                s = g(labels.unsqueeze(1))

            else:
                if "n_targets" in model.params:
                    s = evaluation_metric_function(
                        labels,
                        end_tensor[:, idx],
                    )
                else:
                    s = evaluation_metric_function(
                        labels,
                        end_tensor,
                    )
            idx += 1
            eval_log[target + "_" + evaluation_metric.__class__.__name__] = s

    # Explain model behaviour using shap
    if "probabilistic" in inference_params:
        print("Probabilistic explainability currently not supported.")
    elif "n_targets" in model.params:
        print("Multitask forecasting support coming soon")
    elif g_loss:
        print("SHAP not yet supported for these models with multiple outputs")
    else:
        deep_explain_model_summary_plot(
            model, test_data, inference_params["datetime_start"]
        )
        deep_explain_model_heatmap(model, test_data, inference_params["datetime_start"])

    return eval_log, df_train_and_test, forecast_start_idx, df_predictions
Ejemplo n.º 5
0
def compute_validation(validation_loader: DataLoader,  # s lint
                       model,
                       epoch: int,
                       sequence_size: int,
                       criterion: Type[torch.nn.modules.loss._Loss],
                       device: torch.device,
                       decoder_structure=False,
                       meta_data_model=None,
                       use_wandb: bool = False,
                       meta_model=None,
                       val_or_test="validation_loss",
                       probabilistic=False) -> float:
    """
    Function to compute the validation or the test loss
    """
    print('compute_validation')
    model.eval()
    loop_loss = 0.0
    with torch.no_grad():
        i = 0
        loss_unscaled_full = 0.0
        for src, targ in validation_loader:
            src = src.to(device)
            targ = targ.to(device)
            i += 1
            if decoder_structure:
                if type(model).__name__ == "SimpleTransformer":
                    targ_clone = targ.detach().clone()
                    output = greedy_decode(
                        model,
                        src,
                        targ.shape[1],
                        targ_clone,
                        device=device)[
                        :,
                        :,
                        0]
                else:
                    if probabilistic:
                        output, output_std = simple_decode(model,
                                                           src,
                                                           targ.shape[1],
                                                           targ,
                                                           1,
                                                           probabilistic=probabilistic)
                        output, output_std = output[:, :, 0], output_std[0]
                        output_dist = torch.distributions.Normal(output, output_std)
                    else:
                        output = simple_decode(model=model,
                                               src=src,
                                               max_seq_len=targ.shape[1],
                                               real_target=targ,
                                               output_len=1,
                                               probabilistic=probabilistic)[:, :, 0]
            else:
                if probabilistic:
                    output_dist = model(src.float())
                    output = output_dist.mean.detach().numpy()
                    output_std = output_dist.stddev.detach().numpy()
                else:
                    output = model(src.float())
            labels = targ[:, :, 0]
            validation_dataset = validation_loader.dataset
            if validation_dataset.scale:
                unscaled_labels = validation_dataset.inverse_scale(labels)
                if probabilistic:
                    unscaled_out = validation_dataset.inverse_scale(output)
                    try:
                        output_std = numpy_to_tvar(output_std)
                    except Exception:
                        pass
                    unscaled_dist = torch.distributions.Normal(unscaled_out, output_std)
                    loss_unscaled = -unscaled_dist.log_prob(unscaled_labels.float()).sum()  # FIX THIS
                    loss_unscaled_full += len(labels.float()) * loss_unscaled.numpy().item()
                else:
                    # unscaled_src = validation_dataset.scale.inverse_transform(src.cpu())
                    unscaled_out = validation_dataset.inverse_scale(output.cpu())
                    unscaled_labels = validation_dataset.inverse_scale(labels.cpu())
                    loss_unscaled = criterion(unscaled_out, unscaled_labels.float())
                    loss_unscaled_full += len(labels.float()) * loss_unscaled.item()
                if i % 10 == 0 and use_wandb:
                    wandb.log({"trg": unscaled_labels, "model_pred": unscaled_out})
            if probabilistic:
                loss = -output_dist.log_prob(labels.float()).sum()  # FIX THIS
                loss = loss.numpy()
            elif isinstance(criterion, GaussianLoss):
                g_loss = GaussianLoss(output[0], output[1])
                loss = g_loss(labels)
            else:
                loss = criterion(output, labels.float())
            loop_loss += len(labels.float()) * loss.item()
    if use_wandb:
        if loss_unscaled_full:
            tot_unscaled_loss = loss_unscaled_full / (len(validation_loader.dataset) - 1)
            wandb.log({'epoch': epoch,
                       val_or_test: loop_loss / (len(validation_loader.dataset) - 1),
                       "unscaled_" + val_or_test: tot_unscaled_loss})
        else:
            wandb.log({'epoch': epoch, val_or_test: loop_loss /
                       (len(validation_loader.dataset) - 1)})
    model.train()
    return loop_loss / (len(validation_loader.dataset) - 1)
Ejemplo n.º 6
0
def compute_loss(labels, output, src, criterion, validation_dataset, probabilistic=None, output_std=None, m=1):
    """Function for computing the loss

    :param labels: The real forecasted values
    :type labels: torch.Tensor
    :param output: The output of the model
    :type output: torch.Tensor
    :param src: The source values (only really needed for MASELoss)
    :type src: torch.Tensor
    :param criterion: [description]
    :type criterion: [type]
    :param validation_dataset: [description]
    :type validation_dataset: [type]
    :param probabilistic: [description], defaults to None
    :type probabilistic: [type], optional
    :param output_std: [description], defaults to None
    :type output_std: [type], optional
    :param m: [description], defaults to 1
    :type m: int, optional
    :return: [description]
    :rtype: [type]
    """
    if not probabilistic and isinstance(output, torch.Tensor):
        if len(labels.shape) != len(output.shape):
            if len(labels.shape) > 1:
                if labels.shape[1] == output.shape[1]:
                    labels = labels.unsqueeze(2)
                else:
                    labels = labels.unsqueeze(0)
    if probabilistic:
        if type(output_std) != torch.Tensor:
            print("Converted")
            output_std = torch.from_numpy(output_std)
        if type(output) != torch.Tensor:
            output = torch.from_numpy(output)
        output_dist = torch.distributions.Normal(output, output_std)
    if validation_dataset:
        if probabilistic:
            unscaled_out = validation_dataset.inverse_scale(output)
            try:
                output_std = numpy_to_tvar(output_std)
            except Exception:
                pass
            output_dist = torch.distributions.Normal(unscaled_out, output_std)
        elif m > 1:
            output = validation_dataset.inverse_scale(output.cpu())
            labels = validation_dataset.inverse_scale(labels.cpu())
        elif len(output.shape) == 3:
            output = output.cpu().numpy().transpose(0, 2, 1)
            labels = labels.cpu().numpy().transpose(0, 2, 1)
            output = validation_dataset.inverse_scale(torch.from_numpy(output))
            labels = validation_dataset.inverse_scale(torch.from_numpy(labels))
            stuff = src.cpu().numpy().transpose(0, 2, 1)
            src = validation_dataset.inverse_scale(torch.from_numpy(stuff))
        else:
            output = validation_dataset.inverse_scale(output.cpu().transpose(1, 0))
            labels = validation_dataset.inverse_scale(labels.cpu().transpose(1, 0))
            src = validation_dataset.inverse_scale(src.cpu().transpose(1, 0))
    if probabilistic:
        loss = -output_dist.log_prob(labels.float()).sum()  # FIX THIS
    elif isinstance(criterion, GaussianLoss):
        g_loss = GaussianLoss(output[0], output[1])
        loss = g_loss(labels)
    elif isinstance(criterion, MASELoss):
        assert len(labels.shape) == len(output.shape)
        loss = criterion(labels.float(), output, src, m)
    else:
        assert len(labels.shape) == len(output.shape)
        assert labels.shape[0] == output.shape[0]
        loss = criterion(output, labels.float())
    return loss