Example #1
0
def get_pred_interval(predictions, criterion, targets):
    """
    Get the upper and lower limits and expected values of predictions

    Parameters
    ----------
    predictions : torch.Tensor
        The predicted values
    criterion : callable
        One of the metrics from utils.eval_metrics
    targets : torch.Tensor
        The the actual (not predicted) values

    Returns
    -------
    torch.Tensor
        The upper limit of the predictions
    torch.Tensor
        The lower limit of the predictions
    torch.Tensor
        The expected values of the predictions
    """
    # Better solution for future: save criterion as class object of 'loss' with 'name' attribute
    #detect criterion
    if ('nll_gaus' in str(criterion)) or ('crps' in str(criterion)):
        #loss_type = 'nll_gaus'
        expected_values = predictions[:, :, 0:1]  # expected_values:mu
        sigma = torch.sqrt(predictions[:, :, -1:].exp())
        #TODO: make 95% prediction interval changeable
        y_pred_upper = expected_values + 1.96 * sigma
        y_pred_lower = expected_values - 1.96 * sigma
    elif 'quantile' in str(criterion):
        #loss_type = 'pinball'
        y_pred_lower = predictions[:, :, 0:1]
        y_pred_upper = predictions[:, :, 1:2]
        expected_values = predictions[:, :, -1:]
    elif 'rmse' in str(criterion):
        expected_values = predictions
        rmse = metrics.rmse(targets, expected_values.unsqueeze(0))
        # In order to produce an interval covering roughly 95% of the error magnitudes,
        # the prediction interval is usually calculated using the model output ± 2 × RMSE.
        y_pred_lower = expected_values - 2 * rmse
        y_pred_upper = expected_values + 2 * rmse
    elif ('mse' in str(criterion)) or ('mape' in str(criterion)):
        # loss_type = 'mis'
        expected_values = predictions
        y_pred_lower = 0
        y_pred_upper = 1

        #TODO: add all criterion possibilities
    else:
        print('invalid criterion')

    return y_pred_upper, y_pred_lower, expected_values
Example #2
0
        final_clusters = clusters_contours.reshape(k, coef_size - 1)
        print final_clusters
        for xi in range(len(clusters)):
            cluster_indx = clusters[xi] - 1
            X1[xi, 1:] = final_clusters[cluster_indx, :]

        template_dct_features = X1.reshape(len(dct_features), )

        ### reconstruction of f0 file ###
        recons_f0_contour = prosody_funcs.DCT_reconstruction(
            template_dct_features, stat_features, coef_size)

        ### evaluation metrics ###
        print 'RMSE: ' + str(
            eval_metrics.rmse(recons_f0_contour,
                              interp_f0_arr[0:len(recons_f0_contour)]))
        print 'CORR: ' + str(
            eval_metrics.corr(recons_f0_contour,
                              interp_f0_arr[0:len(recons_f0_contour)]))

        ### plot ###
        #prosody_funcs.plot_dct(dct_features)
        prosody_funcs.plot_DBR(interp_f0_arr, recons_f0_contour)

### Directory of files processing ###
    DFP = True
    if DFP:

        prosodydecomp = True
        hierarcluster = True
        templatefeats = False
Example #3
0
            net, test_data_loader, horizon, number_of_targets)

        net.eval()
        # TODO: check model type (e.g gnll)
        criterion = net.criterion
        # get metrics parameters
        y_pred_upper, y_pred_lower, record_expected_values = mh.get_pred_interval(
            record_output, criterion, df[target_id])

        # rescale(test_output, test_targets)
        # dt.rescale_manually(..)

        # calculate the metrics
        mse_horizon = metrics.mse(record_targets, [record_expected_values],
                                  total=False)
        rmse_horizon = metrics.rmse(record_targets, [record_expected_values],
                                    total=False)
        sharpness_horizon = metrics.sharpness(None,
                                              [y_pred_upper, y_pred_lower],
                                              total=False)
        coverage_horizon = metrics.picp(record_targets,
                                        [y_pred_upper, y_pred_lower],
                                        total=False)
        mis_horizon = metrics.mis(record_targets, [y_pred_upper, y_pred_lower],
                                  alpha=0.05,
                                  total=False)
        # collect metrics by disregarding the development over the horizon
        mse = metrics.mse(record_targets, [record_expected_values])
        rmse = metrics.rmse(record_targets, [record_expected_values])
        mase = metrics.mase(record_targets, [record_expected_values], 7 * 24)
        rae = metrics.rae(record_targets, [record_expected_values])
        mae = metrics.mae(record_targets, [record_expected_values])
Example #4
0
                    op1 = open(output_dur_file, 'w')
                    for j in range(len(pred_dur_features)):
                        fstr = ' '.join(map(str, pred_dur_features[j][0:5]))
                        op1.write(fstr + '\n')
                    op1.close()

        if modifyLabels:
            if not os.path.exists(out_mod_lab_dir):
                os.makedirs(out_mod_lab_dir)
            filelist = os.path.join(work_dir,
                                    'Data/fileList/' + speaker + '_test.scp')
            list_of_files = io_funcs.load_file_list(filelist)
            modify_phone_labels(label_align_dir, out_dur_dir, out_mod_lab_dir,
                                list_of_files)

        if skip_pause:
            ph_all_files = np.array(ph_all_files)
            org_dur_all_files[ph_all_files == 'pau'] = 0
            pred_dur_all_files[ph_all_files == 'pau'] = 0
            org_dur_all_files = org_dur_all_files[org_dur_all_files > 0]
            pred_dur_all_files = pred_dur_all_files[pred_dur_all_files > 0]

        if calcRMSE:
            ### evaluation metrics ###
            rmse_error = eval_metrics.rmse(org_dur_all_files,
                                           pred_dur_all_files)
            print 'RMSE: ' + str(rmse_error)
            print 'CORR: ' + str(
                eval_metrics.corr(org_dur_all_files, pred_dur_all_files))
                        os.makedirs(out_dur_dir)
                    
                    output_dur_file = os.path.join(out_dur_dir, filename+ '.dur')
                    
                    op1 = open(output_dur_file,'w')
                    for j in range(len(pred_dur_features)):
                        fstr = ' '.join(map(str, pred_dur_features[j][0:5]))
                        op1.write(fstr+'\n')
                    op1.close()
                    
        if modifyLabels:
            if not os.path.exists(out_mod_lab_dir):
                os.makedirs(out_mod_lab_dir)
            filelist = os.path.join(work_dir, 'Data/fileList/'+speaker+'_test.scp')
            list_of_files = io_funcs.load_file_list(filelist)
            modify_phone_labels(label_align_dir, out_dur_dir, out_mod_lab_dir, list_of_files)
        
        if skip_pause:
            ph_all_files = np.array(ph_all_files)
            org_dur_all_files[ph_all_files=='pau'] = 0
            pred_dur_all_files[ph_all_files=='pau'] = 0
            org_dur_all_files = org_dur_all_files[org_dur_all_files>0]
            pred_dur_all_files = pred_dur_all_files[pred_dur_all_files>0]
            
                
        if calcRMSE:
            ### evaluation metrics ###
            rmse_error = eval_metrics.rmse(org_dur_all_files, pred_dur_all_files)
            print 'RMSE: ' + str(rmse_error) 
            print 'CORR: ' + str(eval_metrics.corr(org_dur_all_files, pred_dur_all_files))
def eval_forecast(forecasts,
                  endog_val,
                  upper_limits,
                  lower_limits,
                  seasonality=24,
                  alpha=0.05,
                  total=True):
    """
    Calculate evaluation metrics

    Returned values depend on the value of the 'total' parameter.

    - When total is True, returns overall values calculated using the following metrics:
    mse, rmse, mase, rae, mae, sharpness, coverage, mis, qs
    - When total is False, returns values over the horizon, calculated using the following metrics:
    rmse, sharpness, coverage, mis

    Parameters
    ----------
    forecasts : array_like
        The calculated forecasts
    endog_val : array_like
        The reference or measured target variable, if available
    upper_limits : array_like
        The upper interval for the given forecasts
    lower_limits : array_like
        The lower interval for the given forecasts
    seasonality : int, default = 24
        The seasonality of the data
    alpha : float, default = 0.05
        The significance level for the prediction interval (required for MIS)
    total : bool, default = True
        - When total is set to True, metrics calculate an overall value
        - When total is set to False, metrics are calculated over the horizon

    Returns
    -------
    ### If **total** is True:
    torch.Tensor
        overall value calculated using mse
    torch.Tensor
        overall value calculated using rmse
    torch.Tensor
        overall value calculated using mase
    torch.Tensor
        overall value calculated using rae
    torch.Tensor
        overall value calculated using mae
    torch.Tensor
        overall value calculated using sharpness
    torch.Tensor
        overall value calculated using coverage
    torch.Tensor
        overall value calculated using mis

    ### If **total** is False:
    torch.Tensor
        overall value calculated using qs
    torch.Tensor
        rmse over the horizon
    torch.Tensor
        sharpness over the horizon
    torch.Tensor
        coverage over the horizon
    torch.Tensor
        mis over the horizon
    """
    # TODO the parameter 'seasonality' is unused
    forecasts = torch.tensor(forecasts)
    true_values = torch.tensor(endog_val)
    upper_limits = torch.tensor(upper_limits)
    lower_limits = torch.tensor(lower_limits)
    #torch.tensor([i[target].T.values for i in output_matrix]).reshape(forecasts.shape).type(torch.FloatTensor)
    #true_values = torch.tensor([i.T.values for i in endog_val).reshape(forecasts.shape).type(torch.FloatTensor)
    #upper_limits = torch.tensor([i.values for i in upper_limits]).reshape(forecasts.shape).type(torch.FloatTensor)
    #lower_limits = torch.tensor([i.values for i in lower_limits]).reshape(forecasts.shape).type(torch.FloatTensor)

    if total:
        mse = metrics.mse(true_values, [forecasts])
        rmse = metrics.rmse(true_values, [forecasts])
        mase = metrics.mase(
            true_values, [forecasts], 7 * 24
        )  #MASE always needs a reference vale, here we assume a 24 timestep seasonality.
        rae = metrics.rae(true_values, [forecasts])
        mae = metrics.mae(true_values, [forecasts])
        sharpness = metrics.sharpness(None, [upper_limits, lower_limits])
        coverage = metrics.picp(true_values, [upper_limits, lower_limits])
        mis = metrics.mis(true_values, [upper_limits, lower_limits],
                          alpha=alpha)
        qs = metrics.pinball_loss(true_values, [upper_limits, lower_limits],
                                  [0.025, 0.975])
        return mse, rmse, mase, rae, mae, sharpness, coverage, mis, qs
    else:
        rmse_horizon = metrics.rmse(true_values, [forecasts], total)
        sharpness_horizon = metrics.sharpness(None,
                                              [upper_limits, lower_limits],
                                              total)
        coverage_horizon = metrics.picp(true_values,
                                        [upper_limits, lower_limits], total)
        mis_horizon = metrics.mis(true_values, [upper_limits, lower_limits],
                                  alpha=alpha,
                                  total=total)
        return rmse_horizon, sharpness_horizon, coverage_horizon, mis_horizon
            ind_cnt = np.where(clusters == xi+1)
            clusters_contours = np.concatenate((clusters_contours, np.mean(Y[ind_cnt], axis=0)), axis=0)
        
        final_clusters = clusters_contours.reshape(k,coef_size-1)
        print final_clusters
        for xi in range(len(clusters)):
            cluster_indx = clusters[xi] - 1
            X1[xi, 1:] = final_clusters[cluster_indx, :]
            
        template_dct_features = X1.reshape(len(dct_features),)
            
        ### reconstruction of f0 file ###
        recons_f0_contour = prosody_funcs.DCT_reconstruction(template_dct_features, stat_features, coef_size)
        
        ### evaluation metrics ###
        print 'RMSE: ' + str(eval_metrics.rmse(recons_f0_contour, interp_f0_arr[0:len(recons_f0_contour)]))
        print 'CORR: ' + str(eval_metrics.corr(recons_f0_contour, interp_f0_arr[0:len(recons_f0_contour)]))
        
        ### plot ###
        #prosody_funcs.plot_dct(dct_features)       
        prosody_funcs.plot_DBR(interp_f0_arr, recons_f0_contour)

### Directory of files processing ###
    DFP = True;
    if DFP:
        
        prosodydecomp = True;
        hierarcluster = True;
        templatefeats = False;
        analyseTrainData = False;
        
Example #8
0
def performance_test(net,
                     data_loader,
                     score_type='mis',
                     option=0.05,
                     avg_on_horizon=True,
                     horizon=1,
                     number_of_targets=1):
    """
    Determine the score of the given model using the specified metrics in utils.eval_metrics

    Return a single float value (total score) or a 1-D array (score over horizon) based on
    the value of avg_on_horizon

    Parameters
    ----------
    net : utils.fc_network.EncoderDecoder
        The model for which the score is to be determined
    data_loader : utils.tensorloader.CustomTensorDataLoader
        Contains the input data and targets
    score_type : string, default = 'mis'
        The name of the metric to use for scoring. See functions in utils.eval_metrics for
        possible values.
    option : float or list, default = 0.05
        An optional parameter in case the 'mis' or 'quantile_score' functions are used. In the
        case of 'mis' it should be given the value for 'alpha'. In the case of QS, it should
        contain a list with the quantiles.
    avg_on_horizon : bool, default = true
        Determines whether the return value is a float (total score, when true) or a 1-D array
        (score over the horizon, when false).
    horizon : int, default = 1
        The horizon for the prediction
    number_of_targets : int, default = 1
        The number of targets for the prediction.

    Returns
    -------
    float or 1-D array (torch.Tensor)
        Either the total score (float) or the score over the horizon (array), depending on the
        value of avg_on_horizon
    """
    # check performance
    targets, raw_output = get_prediction(
        net, data_loader, horizon,
        number_of_targets)  ##should be test data loader
    [y_pred_upper, y_pred_lower,
     expected_values] = get_pred_interval(raw_output, net.criterion, targets)
    #get upper and lower prediction interval, depending on loss function used for training
    if ('mis' in str(score_type)):
        output = [y_pred_upper, y_pred_lower]
        score = metrics.mis(targets,
                            output,
                            alpha=option,
                            total=avg_on_horizon)
    elif ('nll_gauss' in str(score_type)):
        output = raw_output  ##this is only valid if net was trained with gaussin nll or crps
        score = metrics.nll_gauss(targets, output, total=avg_on_horizon)
    elif ('quant' in str(score_type)):
        output = expected_values
        score = metrics.quantile_score(targets,
                                       output,
                                       quantiles=option,
                                       total=avg_on_horizon)
    elif ('crps' in str(score_type)):
        output = raw_output  ##this is only valid if net was trained with gaussin nll or crps
        score = metrics.crps_gaussian(targets, output, total=avg_on_horizon)
    elif (score_type == 'mse'):
        output = expected_values
        score = metrics.mse(targets, output, total=avg_on_horizon)
    elif (score_type == 'rmse'):
        output = expected_values
        score = metrics.rmse(targets,
                             output.unsqueeze(0),
                             total=avg_on_horizon)
    elif ('mape' in str(score_type)):
        output = expected_values
        score = metrics.mape(targets, output, total=avg_on_horizon)
    else:
        #TODO: catch exception here if performance score is undefined
        score = None
    return score