Ejemplo n.º 1
0
    def ori_correc(
        self,
        overwrite=False
    ):  #Outputs corrected orientations based on inputted directions.
        #Should be given two arrays of numbers

        correc = convert180to360(
            self.orientations)  #Put orientations in a 360 degrees format
        flipped = flip(correc)  #Flips orientations
        minus = flip(correc, -90)
        plus = flip(correc, 90)

        #Calculate correlation for the 4 possibilities
        correl = dict()
        correl['flipped'] = ma.corrcoef(ma.masked_invalid(flipped),
                                        ma.masked_invalid(self.directions))[1,
                                                                            0]
        correl['minus'] = ma.corrcoef(ma.masked_invalid(minus),
                                      ma.masked_invalid(self.directions))[1, 0]
        correl['plus'] = ma.corrcoef(ma.masked_invalid(plus),
                                     ma.masked_invalid(self.directions))[1, 0]
        correl['correc'] = ma.corrcoef(ma.masked_invalid(correc),
                                       ma.masked_invalid(self.directions))[1,
                                                                           0]

        #get optimum
        opti = max(correl.items(), key=operator.itemgetter(1))[0]

        #Outputs better corrected orientations
        if opti == 'flipped':
            print("Orientations have been corrected and flipped 180 degrees.")
            print("Correlation is ", correl['flipped'])
            if overwrite == True:
                self.orientations = flipped
            else:
                return flipped

        elif opti == 'minus':
            print("orientations have been corrected and flipped -90 degrees")
            print("Correlation is ", correl['minus'])
            if overwrite == True:
                self.orientations = minus
            else:
                return minus

        elif opti == 'plus':
            print("orientations have been corrected and flipped +90 degrees")
            print("Correlation is ", correl['plus'])
            if overwrite == True:
                self.orientations = plus
            else:
                return plus

        elif opti == 'correc':
            print("orientations have been corrected only")
            print("Correlation is ", correl['correc'])
            if overwrite == True:
                self.orientations = correc
            else:
                return correc
def pears_corr_obs(corr_obs_ts, corr_ts, use_log):
    """
    Pearson-Correlation of modeled and observed damages
    ----------
    corr_obs_ts : np.array
        observed damages
    corr_ts : np.array
        damages to be correlated
    use_log : string
        correlation in log space
    Returns
    -------
    CorrelationObject

    """
    if use_log:
        a = ma.masked_invalid(np.log10(corr_obs_ts).replace([-np.inf, np.inf],
                                                            [np.nan, np.nan]))
        b = ma.masked_invalid(np.log10(corr_ts))
        msk = (~a.mask & ~b.mask)
        corrcoef = ma.corrcoef(a[msk], b[msk])

        # corrcoef = stats.spearmanr(a[msk], b[msk])

    else:
        a = ma.masked_invalid(corr_obs_ts.replace([-np.inf, np.inf],
                                                  [np.nan, np.nan]))
        b = ma.masked_invalid(corr_ts)
        msk = (~a.mask & ~b.mask)
        corrcoef = ma.corrcoef(a[msk], b[msk])

        #  corrcoef = stats.spearmanr(a[msk], b[msk])

    return corrcoef
Ejemplo n.º 3
0
def do_analysis(medians_df, voa_pred_df, p533_pred_df):
    # Build a masked array of median values for each hour.  The mask hides missing UTC values.
    medians_ma = ma.masked_values(
        [medians_df['median_pwr'].get(utc, 1.e20) for utc in range(0, 24)],
        1.e20)
    #print(type(medians_ma))

    p533_corr = ma.corrcoef(medians_ma, np.array(p533_pred_df['rx_pwr']))[0, 1]
    p533_rmse = get_rmse(medians_ma, np.array(p533_pred_df['rx_pwr']))
    voa_corr = ma.corrcoef(medians_ma, np.array(voa_pred_df['rx_pwr']))[0, 1]
    voa_rmse = get_rmse(medians_ma, np.array(voa_pred_df['rx_pwr']))

    voacap_residuals = voa_pred_df['rx_pwr'].subtract(medians_df['median_pwr'])
    p533_residuals = p533_pred_df['rx_pwr'].subtract(medians_df['median_pwr'])

    # OR the mask with a mask for prob muf <= 0.03 (1 day) (True = value is masked.)
    medians_ma.mask = medians_ma.mask | [
        False if x > 0.03 else True for x in voa_pred_df['muf_day'].tolist()
    ]

    muf_day = y = np.array(voa_pred_df['muf_day'].tolist())
    p533_corr_gt_1d = ma.corrcoef(medians_ma,
                                  np.array(p533_pred_df['rx_pwr']))[0, 1]
    p533_rmse_gt_1d = get_rmse(medians_ma, np.array(p533_pred_df['rx_pwr']))
    voa_corr_gt_1d = ma.corrcoef(medians_ma,
                                 np.array(voa_pred_df['rx_pwr']))[0, 1]
    voa_rmse_gt_1d = get_rmse(medians_ma, np.array(voa_pred_df['rx_pwr']))

    #p533_residuals_gt_1d = p533_pred_df['rx_pwr'].subtract(medians_df['median_pwr'])
    voacap_residuals_gt_1d = np.ma.masked_where(muf_day <= 0.03,
                                                voacap_residuals).compressed()
    #voacap_residuals_gt_1d = voa_pred_df['rx_pwr'].subtract(medians_df['median_pwr'])
    p533_residuals_gt_1d = np.ma.masked_where(muf_day <= 0.03,
                                              p533_residuals).compressed()

    voa_residual_mean_gt_1d = np.mean(voacap_residuals_gt_1d)
    voa_residual_sd_gt_1d = np.std(voacap_residuals_gt_1d)
    p533_residual_mean_gt_1d = np.mean(p533_residuals_gt_1d)
    p533_residual_sd_gt_1d = np.std(p533_residuals_gt_1d)

    #print(voacap_residuals)

    return ({
        "p533_rmse": p533_rmse,
        "p533_corr": p533_corr,
        "voa_rmse": voa_rmse,
        "voa_corr": voa_corr,
        "p533_rmse_gt_1d": p533_rmse_gt_1d,
        "p533_corr_gt_1d": p533_corr_gt_1d,
        "voa_rmse_gt_1d": voa_rmse_gt_1d,
        "voa_corr_gt_1d": voa_corr_gt_1d
    }, voacap_residuals, p533_residuals, voacap_residuals_gt_1d,
            p533_residuals_gt_1d)
Ejemplo n.º 4
0
def crosscorrcoef(x,y) :
    
    """Take the numpy.ma.corrcoef function that deals with missing data,
    and automatically return cross correlation element in the matrix."""
    
    # return cross correlation
    return corrcoef(x,y)[0,1]
Ejemplo n.º 5
0
    def autocorr_sA_sB(self, sep=1):
        """Compute the autocorrelation across windows separated by a distance.

        NOTE : this is meant to be a faster
            alternative to the monte-carlo sampling approach

        """
        assert self.chrom_total_dict is not None
        corrs = []
        rec_dists = []
        for c in self.chrom_total_dict:
            # Grabbing the current version of the data for this chromosome
            cur_tot_data = self.chrom_total_dict[c]
            win_vars = cur_tot_data[0, :]
            rec_midpts = cur_tot_data[2, :]
            mask_weights = cur_tot_data[3, :]
            # Weight according to a mask - if it exists
            x = mask_weights * win_vars
            # Compute the empirical correlation here
            x1s = x[sep:]
            x2s = x[:-sep]
            # Setting the mask here
            a = ma.masked_invalid(x1s)
            b = ma.masked_invalid(x2s)
            corr_est = ma.corrcoef(a, b)[0, 1]
            # Should it be the mean or something else here
            rec_dist_mean = np.nanmean(rec_midpts[sep:] - rec_midpts[:-sep])
            corrs.append(corr_est)
            rec_dists.append(rec_dist_mean)
        corrs = np.array(corrs, dtype=np.float32)
        rec_dists = np.array(rec_dists, dtype=np.float32)
        return (rec_dists, corrs)
Ejemplo n.º 6
0
def correlate_all(M):
  """Return all pairs correlation matrix.

  Note: dot product optimizations cannot be as readily applied
  due to different pairs discarded due to missing values per pair.
  """
  return ma.corrcoef(M)
Ejemplo n.º 7
0
def get_mutual_info(x: np.array,
                    y: np.array,
                    n_bins: int = None,
                    normalize: bool = False) -> float:
    """
    Get mutual info score for x and y described in
    https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3512994&download=yes (p.16).

    :param x: (np.array) x vector
    :param y: (np.array) y vector
    :param n_bins: (int) number of bins for discretization, if None get number of bins based on correlation coefficient.
    :param normalize: (bool) True to normalize the result to [0, 1].
    :return: (float) mutual info score.
    """

    good_indices = ~(np.isnan(x) | np.isnan(y) | np.isinf(x) | np.isinf(y))
    if n_bins is None:
        import numpy.ma as ma
        corr_coef = ma.corrcoef(ma.masked_invalid(x),
                                ma.masked_invalid(y))[0][1]
        n_bins = get_optimal_number_of_bins(x[good_indices].shape[0],
                                            corr_coef=corr_coef)

    contingency = np.histogram2d(x[good_indices], y[good_indices], n_bins)[0]
    mutual_info = mutual_info_score(
        None, None, contingency=contingency)  # Mutual information
    if normalize is True:
        marginal_x = ss.entropy(np.histogram(x[good_indices],
                                             n_bins)[0])  # Marginal for x
        marginal_y = ss.entropy(np.histogram(y[good_indices],
                                             n_bins)[0])  # Marginal for y
        mutual_info /= min(marginal_x, marginal_y)
    return mutual_info
Ejemplo n.º 8
0
def corr_coefficient(predictions,targets,bias=False):
    """Calculates the correlation coefficient (the 'r' in '-squared' between two series.
   
    For time series where the targets are serially correlated and may span only a fraction
    of the natural variability, this statistic may not be appropriate and Murphy (1988) explains
    why caution should be exercised in using this statistic.
    
    Parameters
    ----------
    predictions, targets : array_like
        Time series to analyze
    
    bias : boolean
        Whether to use the biased (N) or unbiased (N-1) sample size for normalization
    
    Returns
    -------
    r : float
        Correlation coefficient
    """
    
    
    from numpy.ma import corrcoef
    y = numpy.ma.masked_invalid(predictions)
    z = numpy.ma.masked_invalid(targets)
    return corrcoef(y,z,bias)[0][1]
Ejemplo n.º 9
0
def compute_hess_corr(eigval_col, eigvec_col, fdnm=""):
    posN = len(eigval_col)
    T0 = time()
    corr_mat_log = np.zeros((posN, posN))
    corr_mat_lin = np.zeros((posN, posN))
    for eigi in tqdm(range(posN)):
        for eigj in range(posN):
            eva_i, evc_i = eigval_col[eigi], eigvec_col[
                eigi]  # torch.from_numpy(eigvect_j).cuda()
            eva_j, evc_j = eigval_col[eigj], eigvec_col[
                eigj]  # torch.from_numpy(eigval_j).cuda()
            inpr = evc_i.T @ evc_j
            vHv_ij = np.diag((inpr * eva_j[np.newaxis, :]) @ inpr.T)
            corr_mat_log[eigi, eigj] = ma.corrcoef(
                ma.masked_invalid(np.log10(vHv_ij)),
                ma.masked_invalid(np.log10(eva_j)))[0, 1]
            corr_mat_lin[eigi, eigj] = np.corrcoef(vHv_ij, eva_j)[0, 1]
            # corr_mat_log[eigi, eigj] = corr_nan_torch(vHv_ij.log10(), eva_j.log10())
            # corr_mat_lin[eigi, eigj] = corr_nan_torch(vHv_ij, eva_j)
            # vHv_ij = np.diag(eigvec_col[eigi].T @ H_col[eigj] @ eigvec_col[eigi])

    print("%.1f sec" %
          (time() - T0))  # 582.2 secs for the 1000 by 1000 mat. not bad!
    np.savez(join(figdir, "Hess_%s_corr_mat.npz" % fdnm),
             corr_mat_log=corr_mat_log,
             corr_mat_lin=corr_mat_lin)
    return corr_mat_log, corr_mat_lin
def get_pearson(pred, climdat):
    """
    pearson correlation of model predicted data and damage time series

    Parameters
    ----------
    pred : GLM
        model
    climdat : np.array
        damage time series

    Returns
    -------
    float
        Pearson correlation coefficient

    """
    a = ma.masked_invalid(climdat)
    b = ma.masked_invalid(pred.predict())
    msk = (~a.mask & ~b.mask)
    corrcoef = ma.corrcoef(a[msk], b[msk])

    # corrcoef = stats.spearmanr(a[msk], b[msk])

    return corrcoef[0, 1]
Ejemplo n.º 11
0
def gen_binned_estimates(rec_dists, s1, s2, **kwargs):
    """Get binned estimates of the correlation in segregating sites."""
    _, bins = np.histogram(rec_dists, **kwargs)
    bin_idx = np.digitize(rec_dists, bins)
    bin_idx = bin_idx - 1

    # Setting up the accumulator vectors here
    rec_rate_mean = np.zeros(np.max(bin_idx))
    rec_rate_se = np.zeros(np.max(bin_idx))
    corr_s1_s2 = np.zeros(np.max(bin_idx))
    se_r = np.zeros(np.max(bin_idx))
    for i in range(np.max(bin_idx)):
        cur_idx = bin_idx == i
        n_pairs = np.sum(cur_idx)
        cur_rec_rates = rec_dists[cur_idx]
        rec_rate = np.nanmean(cur_rec_rates)
        se_rec_rate = np.nanstd(cur_rec_rates)

        # TODO : take concatenation of the masks
        corr_s1_s2_cur = ma.corrcoef(
            ma.masked_invalid(s1[cur_idx]), ma.masked_invalid(s2[cur_idx])
        )[0, 1]
        se_r_cur = np.sqrt((1.0 - corr_s1_s2_cur ** 2) / (n_pairs - 2))

        # Set the accumulators to return
        rec_rate_mean[i] = rec_rate
        rec_rate_se[i] = se_rec_rate
        corr_s1_s2[i] = corr_s1_s2_cur
        se_r[i] = se_r_cur

    # Return the different accumulators as we have them
    return (rec_rate_mean, rec_rate_se, corr_s1_s2, se_r)
Ejemplo n.º 12
0
 def _get_corr_arr( self ):
     corr_data = getattr( self.yarn_data, self.var_enum_ )
     #corr_data = corr_data[prod( corr_data >= 0, axis=1, dtype=bool )]
     import numpy.ma as ma
     corr_data = ma.masked_array( corr_data, mask = self.yarn_data.mask_arr )
     # return small differences between ma and numpy corrcoef
     #print ma.corrcoef( corr_data, rowvar=False, allow_masked=True, bias=False )
     return ma.corrcoef( corr_data, rowvar = False, allow_masked = True )
Ejemplo n.º 13
0
def r_rmse(obs_series, model_series):
    R = ma.corrcoef(ma.masked_invalid(obs_series), ma.masked_invalid(model_series))
    x = obs_series[~np.isnan(obs_series)]
    y = model_series[~np.isnan(model_series)]
    rmse = np.sqrt(((y - x) ** 2).mean())
    format_R = float("{0:.2f}".format(R[0,1]))
    format_rmse = float("{0:.2f}".format(rmse))
    return format_R, format_rmse
Ejemplo n.º 14
0
def match(dbz1, dbz2):
    """
    input:  two armor.pattern.DBZ objects
    output: just their correlation
    **** PROBLEM:  need to resolve the grid problem with e.g. interpolation

    """
    size = dbz1.matrix.size
    return ma.corrcoef(dbz1.matrix.reshape(size), dbz2.matrix.reshape(size))[0,1]
Ejemplo n.º 15
0
def nancorr(x, y):
    """
    r = nancorr(x,y)
    Calculate correlation matrix, treating NaN values as missing data
    """
    x_msk = ma.masked_invalid(x)
    y_msk = ma.masked_invalid(y)
    r = ma.corrcoef(x_msk, y_msk)
    return r
Ejemplo n.º 16
0
def test_crossval_Melanoma():
    """ Tests the cross val function that creates the train and test data. """
    data = ImportMelanoma().to_numpy()
    train_X, test_X = split_data(data)
    full_X = impute(train_X)

    print(
        ma.corrcoef(ma.masked_invalid(full_X.flatten()),
                    ma.masked_invalid(test_X.flatten())))
Ejemplo n.º 17
0
def make_scatterplot_heights(preds, lbls, preds_horavg, lbls_horavg, heights,
                             component, time_step):
    #NOTE1: third last input of this function is a string indicating the name of the component being plotted.
    for k in range(len(heights) + 1):
        if k == len(heights):
            preds_height = preds_horavg[:] / (utau_ref**2.)
            lbls_height = lbls_horavg[:] / (utau_ref**2.)
        else:
            preds_height = preds[k, :, :] / (utau_ref**2.)
            lbls_height = lbls[k, :, :] / (utau_ref**2.)

        preds_height = preds_height.flatten()
        lbls_height = lbls_height.flatten()

        #Make scatterplots of Smagorinsky/CNN fluxes versus labels
        corrcoef = np.round(
            ma.corrcoef(preds_height, lbls_height)[0, 1], 3
        )  #Calculate, extract, and round off Pearson correlation coefficient from correlation matrix
        plt.figure()
        plt.scatter(lbls_height, preds_height, s=6, marker='o', alpha=0.2)
        if k == len(heights):
            #plt.xlim([-0.004, 0.004])
            #plt.ylim([-0.004, 0.004])
            #plt.xlim([-0.000004, 0.000004])
            #plt.ylim([-0.000004, 0.000004])
            plt.xlim([-2.0, 2.0])
            plt.ylim([-2.0, 2.0])
        else:
            plt.xlim([-2.0, 2.0])
            plt.ylim([-2.0, 2.0])
            #plt.xlim([-15.0, 15.0])
            #plt.ylim([-15.0, 15.0])
            #plt.xlim([-40.0, 40.0])
            #plt.ylim([-40.0, 40.0])
            #plt.xlim([-0.0005, 0.0005])
            #plt.ylim([-0.0005, 0.0005])
        axes = plt.gca()
        plt.plot(axes.get_xlim(), axes.get_ylim(), 'b--')
        #plt.gca().set_aspect('equal',adjustable='box')
        plt.xlabel(r'$\rm \frac{\tau_{wu}^{DNS}}{u_{\tau}^2} \,\ {[-]}$',
                   fontsize=20)
        plt.ylabel(r'$\rm \frac{\tau_{wu}^{smag}}{u_{\tau}^2} \,\ {[-]}$',
                   fontsize=20)
        #plt.title("ρ = " + str(corrcoef),fontsize = 20)
        plt.axhline(c='black')
        plt.axvline(c='black')
        plt.xticks(fontsize=16, rotation=90)
        plt.yticks(fontsize=16, rotation=0)
        if k == len(heights):
            plt.savefig("Scatter_Smagorinsky_tau_" + component + "_horavg.png",
                        dpi=200)
        else:
            plt.savefig("Scatter_Smagorinsky_tau_" + component + "_" +
                        str(heights[k]) + ".png",
                        dpi=200)
        plt.tight_layout()
        plt.close()
Ejemplo n.º 18
0
def squared_angular_distance(x: np.array, y: np.array) -> float:
    """
    Returns a modification of angular distance where square of correlation coefficient is used.

    :param x: (np.array) X vector
    :param y: (np.array) Y vector
    :return: (float) squared angular distance
    """

    corr_coef = ma.corrcoef(ma.masked_invalid(x), ma.masked_invalid(y))[0][1]
    return np.sqrt(0.5 * (1 - corr_coef**2))
Ejemplo n.º 19
0
def repeatImputation(data, linear=False, numIter=20):
    """ Repeat imputation and calculate the average of cost for 20 iterations. """
    coefs = []
    for _ in range(numIter):
        train_X, test_X = split_data(data)
        full_X = impute(train_X, linear)
        corr_coef = ma.corrcoef(ma.masked_invalid(full_X.flatten()),
                                ma.masked_invalid(test_X.flatten()))
        coefs.append(corr_coef[0][1])
    print(f"average corr coef: {sum(coefs)/len(coefs)}")
    return coefs
Ejemplo n.º 20
0
def angular_distance(x: np.array, y: np.array) -> float:
    """
    Returns angular distance between two vectors. Angular distance is a slight modification of correlation which
    satisfies metric conditions.

    :param x: (np.array) X vector.
    :param y: (np.array) Y vector.
    :return: (float) angular distance.
    """
    corr_coef = ma.corrcoef(ma.masked_invalid(x), ma.masked_invalid(y))[0][1]
    return np.sqrt(0.5 * (1 - corr_coef))
Ejemplo n.º 21
0
def absolute_angular_distance(x: np.array, y: np.array) -> float:
    """
    Returns a modification of angular distance where absolute value of correlation coefficient is used.

    :param x: (np.array) x vector
    :param y: (np.array) y vector
    :return: (float) absolute angular distance
    """

    corr_coef = ma.corrcoef(ma.masked_invalid(x), ma.masked_invalid(y))[0][1]
    return np.sqrt(0.5 * (1 - abs(corr_coef)))
Ejemplo n.º 22
0
def ccf(x, y, lags):
    x = x - x.mean()  # remove mean
    y = y - y.mean()
    if type(lags) is int:
        lags = range(lags)
    C = ma.zeros((len(lags), 1))
    for i, l in enumerate(lags):
        if l == 0:
            C[i] = 1
        else:
            C[i] = ma.corrcoef(x[:-l], y[l:])[0, 1]
    return C
Ejemplo n.º 23
0
def maxcorr(x, y, **options):
    """
    (rmax,lag,ind) = maxcorr(x,y,**'maxlag'=int(len(x)/4)):
    Calculate the maximum lagged correlation between two 1D arrays
    Inputs:
    x,y are 1D arrays
    Options
    'maxlag' the maximum number of lagged correlations to calculate (default: 1/4 of array length)
    Output:
    r is the correlation coefficient with the maximum absolute value
    lag is the lag of the maximum correlation (positive: y lags x)
    """

    nrows = len(x)
    maxlag = int(np.floor(nrows / 4))
    if ('maxlag' in options):
        maxlag = options['maxlag']

    # use masked arrays (mask NaNs)
    x = ma.masked_invalid(x)
    y = ma.masked_invalid(y)

    lags = np.arange(-maxlag, maxlag + 1)
    rs = np.zeros(np.shape(lags))
    for ni, lag in enumerate(lags):
        lag = lags[ni]
        if lag < 0:
            rs[ni] = ma.corrcoef(x[-lag:], y[:lag])[0, 1]
        elif lag > 0:
            rs[ni] = ma.corrcoef(x[:-lag], y[lag:])[0, 1]
        else:
            rs[ni] = ma.corrcoef(x, y)[0, 1]

    ind = ma.argmax(np.abs(rs))
    rmax = rs[ind]
    lag = lags[ind]

    return (rmax, lag, ind)
def rm_pears_corr_obs(corr_obs_ts, corr_ts, use_log):
    """
    Pearson-Correlation of modeled and observed damages, applying a running
    mean before (3yr)
    ----------
    corr_obs_ts : np.array
        observed damages
    corr_ts : np.array
        damages to be correlated
    use_log : string
        correlation in log space
    Returns
    -------
    CorrelationObject

    """
    rm_obs = runmean(np.array(corr_obs_ts), 1)
    rm_ts = runmean(np.array(corr_ts), 1)

    if use_log:
        a = ma.masked_invalid(np.log10(rm_obs).replace([-np.inf, np.inf],
                                                       [np.nan, np.nan]))
        b = ma.masked_invalid(np.log10(rm_ts))
        msk = (~a.mask & ~b.mask)
        corrcoef = ma.corrcoef(a[msk], b[msk])

        #  corrcoef = stats.spearmanr(a[msk], b[msk])

    else:
        a = ma.masked_invalid(rm_obs)
        b = ma.masked_invalid(rm_ts)
        msk = (~a.mask & ~b.mask)
        corrcoef = ma.corrcoef(a[msk], b[msk])

        # corrcoef = stats.spearmanr(a[msk], b[msk])

    return corrcoef
Ejemplo n.º 25
0
def normalised_corr(dataFrame, tot_mod_dam, tot_pred_dam):
    """
    This function adjusts for vulnerability, applying a GDP fit, either in the
    log space or the linear space. All relevant columns are normalised before
    they are correlated.
    Parameters
    ----------
    ratio : Column of DataFrame
        Ratio of recorded to modeled damages

    Returns
    -------
    np.arrays
        Ratios with different window sizes

    """
    facE = tot_pred_dam/tot_mod_dam
    facV = tot_pred_dam/tot_pred_dam
    facNatCat = tot_pred_dam/dataFrame['natcat_flood_damages_2005_CPI'].sum()

    pred_norm = dataFrame['Impact_Pred'] * facV
    mod_norm = dataFrame['Impact_2y_Flopros'] * facE
    natCat_norm = dataFrame['natcat_flood_damages_2005_CPI'] * facNatCat

    a = ma.masked_invalid(natCat_norm.replace([-np.inf, np.inf],
                                              [np.nan, np.nan]))
    b = ma.masked_invalid(pred_norm)
    msk = (~a.mask & ~b.mask)
    pred_corrcoef = ma.corrcoef(a[msk], b[msk])

    a = ma.masked_invalid(natCat_norm.replace([-np.inf, np.inf],
                                              [np.nan, np.nan]))
    b = ma.masked_invalid(mod_norm)
    msk = (~a.mask & ~b.mask)
    mod_corrcoef = ma.corrcoef(a[msk], b[msk])

    return pred_corrcoef, mod_corrcoef
def apply_pca_pearson(client_trace, server_trace):
    """
    Applies PCA to input data and compares transformed data via
    Pearson correlation.
    """

    coeffs = []
    n_components = 3

    for feature in xrange(0, len(client_trace[0])):
        tmp_coeff = 0
        try:
            pca = decomposition.PCA(n_components)
            pca.fit(client_trace)
            client_pca = pca.transform(client_trace)

            pca.fit(server_trace)
            server_pca = pca.transform(server_trace)

        except Exception as err:
            print 'Problem applying PCA: ', err

        try:
            for i in xrange(0, n_components):
                shrinked_client = client_pca[0:1000]
                shrinked_server = server_pca[0:1000]

                shrinked_client = [row[i] for row in shrinked_client]
                shrinked_server = [row[i] for row in shrinked_server]

                limitation = min(len(shrinked_client), len(shrinked_server))

                shrinked_client = shrinked_client[0:limitation]
                shrinked_server = shrinked_server[0:limitation]

                cor = corrcoef(transpose(shrinked_client),
                               transpose(shrinked_server))
                correlation_coefficient = abs(cor.data[1][0])

                if correlation_coefficient > tmp_coeff:
                    tmp_coeff = correlation_coefficient

        except Exception as err:
            print 'Error applying PCA-Pearson: ', err

        coeffs.append(tmp_coeff)

    return coeffs
Ejemplo n.º 27
0
    def test_corrcoef(self):

        r = ma.masked_equal(np.load("data/ml-1m/rating.npy"), 0)
        # sim = ma.corrcoef(r[0], r[2412])
        # print(sim)

        # print(np.corrcoef(r[0].filled(0), r[2412].filled(0)))

        sim2 = ma.corrcoef(ma.vstack([r[0], r[2412]]))
        print(sim2)

        print(ma.dot(r[0], r[2412])/math.sqrt(ma.dot(r[0],r[0]))/math.sqrt(ma.dot(r[2412],r[2412])))

        r0_m = r[0] - ma.mean(r[0])
        r1_m = r[2412] - ma.mean(r[2412])
        print(ma.dot(r0_m, r1_m)/math.sqrt(ma.dot(r0_m,r0_m))/math.sqrt(ma.dot(r1_m,r1_m)))
Ejemplo n.º 28
0
    def predict_corr(self, *views: Tuple[np.ndarray, ...], **kwargs) -> np.ndarray:
        """
        Predicts the correlation for the given data using the fit model

        :param views: numpy arrays with the same number of rows (samples) separated by commas
        :param kwargs: any additional keyword arguments required by the given model
        :return: all_corrs: an array of the pairwise correlations (k,k,self.latent_dims) where k is the number of views
        :rtype: np.ndarray
        """
        # Takes two views and predicts their out of sample correlation using trained model
        transformed_views = self.transform(*views, **kwargs)
        all_corrs = []
        for x, y in itertools.product(transformed_views, repeat=2):
            all_corrs.append(np.diag(ma.corrcoef(x.T, y.T)[:self.latent_dims, self.latent_dims:]))
        all_corrs = np.array(all_corrs).reshape((len(views), len(views), self.latent_dims))
        return all_corrs
Ejemplo n.º 29
0
def masked_corrcoef2d(arr1, arr2):
    """
    Correlation coefficient of two 2 dimensional masked arrays.

    Parameters
    ----------
    arr1 : np.array
        2D array.
    arr2 : np.array
        2D array.

    See also
    --------
    numpy.corrcoef : NumPy corrcoef function.
    numpy.ma : NumPy mask module.

    Returns
    -------
    corr : np.array
        correlation coefficient from np.corrcoef.

    Example
    --------
    >>> import numpy.ma as ma
    >>> a = np.reshape(np.arange(10), (2,5))
    >>> v = np.reshape(np.arange(10), (2,5))
    >>> mask = np.zeros((2, 5), dtype=bool)
    >>> mask[1:, 3:] = True
    >>> v = ma.masked_array(v, mask=mask)
    >>> print(v)
    [[0 1 2 3 4]
     [5 6 7 -- --]]
    >>> masked_corrcoef2d(a, v)
    masked_array(data =
     [[1.0 1.0]
     [1.0 1.0]],
                 mask =
     [[False False]
     [False False]],
           fill_value = 1e+20)
    <BLANKLINE>
    """
    import numpy.ma as ma
    a_ = np.reshape(arr1, (1, arr1.size))
    v_ = np.reshape(arr2, (1, arr2.size))
    corr = ma.corrcoef(a_, v_)
    return corr
def apply_pearson(client_trace, server_trace):

    coeffs = []
    for feature in xrange(0, len(client_trace[0])):
        feature_client = [row[feature] for row in client_trace]
        feature_server = [row[feature] for row in server_trace]

        limitation = min(len(feature_client), len(feature_server))

        feature_client = feature_client[0:limitation]
        feature_server = feature_server[0:limitation]
        
        try:
            cor = corrcoef(feature_client, feature_server)
            correlation_coefficient = abs(cor.data[1][0])
            coeffs.append(correlation_coefficient)

        except Exception as err:
            print 'Error applying Pearson: ', err
            coeffs.append(0)

    return coeffs
Ejemplo n.º 31
0
def plotMinFFD(df):
    from statsmodels.tsa.stattools import adfuller
    import numpy.ma as ma
    out = pd.DataFrame(
        columns=['adfStat', 'pVal', 'lags', 'nObs', '95% conf', 'corr'])
    for d in np.linspace(0, 1, 21):
        df1 = np.log(df[[
            'Close'
        ]]).resample('1D').last()  # Pasar a observaciones diarias
        df2 = fracDiff(df1, d, thres=.01)
        corr = ma.corrcoef(ma.masked_invalid(df1.loc[df2.index, 'Close']),
                           ma.masked_invalid(df2['Close']))[0, 1]
        df2 = adfuller(df2['Close'], maxlag=1, regression='c', autolag=None)
        out.loc[d] = list(df2[:4]) + [df2[4]['5%']
                                      ] + [corr]  # Aportar valores criticos
    out[['adfStat', 'corr']].plot(secondary_y='adfStat')
    plt.axhline(out['95% conf'].mean(),
                linewidth=1,
                color='r',
                linestyle='dotted')
    plt.show()
    return out
Ejemplo n.º 32
0
def acf(x, lags=500, exclude=None):
    if exclude is None:
        exclude = np.zeros(x.shape)
    exclude = np.cumsum(exclude.astype(int))

    # from stackexchange
    x = x - x.mean()  # remove mean
    if type(lags) is int:
        lags = range(lags)

    C = ma.zeros((len(lags),))
    for i, l in enumerate(lags):
        if l == 0:
            C[i] = 1
        else:
            x0 = x[:-l].copy()
            x1 = x[l:].copy()
            reject = (exclude[l:]-exclude[:-l])>0
            x0[reject] = ma.masked
            x1[reject] = ma.masked
            C[i] = ma.corrcoef(x0, x1)[0, 1]
    return C
Ejemplo n.º 33
0
def KGEglobal(s, o):
    warnings.filterwarnings("ignore", message="divide by zero encountered")
    warnings.filterwarnings("ignore", message="invalid value encountered")
    warnings.filterwarnings("ignore", message="Mean of empty slice")
    warnings.filterwarnings("ignore", message="Degrees of freedom")

    B = np.nanmean(s, axis=0) / np.nanmean(o, axis=0)
    pbias = np.nansum((s - o), axis=0) / np.nansum(o, axis=0)
    y = (np.nanstd(s, axis=0) / np.nanmean(s, axis=0)) / (
        np.nanstd(o, axis=0) / np.nanmean(o, axis=0))
    NS = 1 - np.nansum((s - o)**2, axis=0) / np.nansum(
        (o - np.nanmean(o, axis=0))**2, axis=0)

    r = np.empty(s.shape[1])
    for i in range(s.shape[1]):
        s1 = ma.masked_invalid(s[:, i])
        o1 = ma.masked_invalid(o[:, i])
        msk = (~o1.mask & ~s1.mask)
        r[i] = ma.corrcoef(o1[msk], s1[msk]).data[0, 1]

    KGE = 1 - np.sqrt((r - 1)**2 + (B - 1)**2 + (y - 1)**2)

    return KGE, NS, r, pbias
Ejemplo n.º 34
0
# Calculate BM x/y ratio.

bm_x = np.nanstd(x_list, axis=0)
bm_y = np.nanstd(y_list, axis=0)
bm_avr = bm_x / bm_y

# %%
column_fail_xy_ratio = [
    column for (column, bm) in enumerate(bm_avr)
    if bm < xy_ratio_min or bm > xy_ratio_max
]

corrcoefs = [
    abs(
        ma.corrcoef(ma.masked_invalid(x_list[:, aoi]),
                    ma.masked_invalid(y_list[:, aoi]))[0, 1])
    for aoi in range(valid_num_aoi)
]

column_fail_corrcoef = [
    column for (column, corrcoef) in enumerate(corrcoefs)
    if corrcoef > try_corrcoef
]

column_delete = sorted(list(set(column_fail_xy_ratio + column_fail_corrcoef)))

x_list = np.delete(x_list, column_delete, axis=1)
y_list = np.delete(y_list, column_delete, axis=1)
aoi_ids = np.delete(aoi_ids, column_delete)

valid_num_aoi = len(aoi_ids)
                   y_array[j, :, :],
                   delimiter=",")
    else:
        np.savetxt("saved_data/x_" + str(t) + "_degree_warming_cmip5.csv",
                   x_array[j, :, :],
                   delimiter=",")
        np.savetxt("saved_data/y_" + str(t) + "_degree_warming_cmip5.csv",
                   y_array[j, :, :],
                   delimiter=",")

    # saving the r coefficient for x_array and y_array at each temperature change
    x_array_flatten = x_array[j, :, :]
    x_array_flatten = x_array_flatten.flatten()
    y_array_flatten = y_array[j, :, :]
    y_array_flatten = y_array_flatten.flatten()
    r_coeffient = ma.corrcoef(ma.masked_invalid(x_array_flatten),
                              ma.masked_invalid(y_array_flatten))
    print('CMIP5 r-coefficient (all rcps)', t, r_coeffient)
    if temperature_change_options[j] == 0.5:
        np.savetxt("saved_data/cmip5_xy_rcoefficient_05_degree_warming.csv",
                   r_coeffient,
                   delimiter=",")
    else:
        np.savetxt("saved_data/cmip5_xy_rcoefficient_" + str(t) +
                   "_degree_warming.csv",
                   r_coeffient,
                   delimiter=",")

    # saving the observational derived constrained values
    if temperature_change_options[j] == 0.5:
        np.savetxt("saved_data/obs_constraint_05_degree_warming_cmip5.csv",
                   obs_array[j, :],
Ejemplo n.º 36
0
def Taylor_diag(series,names):
    """ Taylor Diagram : obs is reference data sample
        in a full diagram (0 --> npi)
        --------------------------------------------------------------------------
        Input: series     - dict with all time series (lists) to analyze  
               series[0]  - is the observation, the reference by default.
    """
    from matplotlib.projections import PolarAxes
    corr,std ={},{}
    for i in series.keys():
        corr[i] = ma.corrcoef(series[0],series[i])[1,0]
        std[i] = ma.std(series[i])/ma.std(series[0])
       
    ref = 1# ma.std(series[0])
    #print corr
    
    rlocs = np.concatenate((np.arange(0,-10,-0.25),[-0.95,-0.99],np.arange(0,10,0.25),[0.95,0.99]))
    str_rlocs = np.concatenate((np.arange(0,10,0.25),[0.95,0.99],np.arange(0,10,0.25),[0.95,0.99]))
    tlocs = np.arccos(rlocs)        # Conversion to polar angles
    gl1 = GF.FixedLocator(tlocs)    # Positions
    tf1 = GF.DictFormatter(dict(zip(tlocs, map(str,rlocs))))
    

    str_locs2 = np.arange(-10,11,0.5)
    tlocs2 =  np.arange(-10,11,0.5)      # Conversion to polar angles
       
    g22 = GF.FixedLocator(tlocs2)  
    tf2 = GF.DictFormatter(dict(zip(tlocs2, map(str,str_locs2))))

    
    
    
    tr = PolarAxes.PolarTransform()
    
    smin = 0
    smax = 2.5

    ghelper = FA.GridHelperCurveLinear(tr,
                                           extremes=(0,np.pi, # 1st quadrant
                                                     smin,smax),
                                           grid_locator1=gl1,
                                           #grid_locator2=g11,
                                           tick_formatter1=tf1,
                                           tick_formatter2=tf2,
                                           )
    fig = plt.figure(figsize=(10,5), dpi=100)
    ax = FA.FloatingSubplot(fig, 111, grid_helper=ghelper)

    fig.add_subplot(ax)
    ax.axis["top"].set_axis_direction("bottom") 
    ax.axis["top"].toggle(ticklabels=True, label=True)
    ax.axis["top"].major_ticklabels.set_axis_direction("top")
    ax.axis["top"].label.set_axis_direction("top")
    ax.axis["top"].label.set_text("Correlation Coefficient")

    ax.axis["left"].set_axis_direction("bottom") 
    ax.axis["left"].label.set_text("Standard Deviation")

    ax.axis["right"].set_axis_direction("top") 
    ax.axis["right"].toggle(ticklabels=True, label=True)
    ax.axis["right"].set_visible(True)
    ax.axis["right"].major_ticklabels.set_axis_direction("bottom")
    #ax.axis["right"].label.set_text("Standard Deviation")

    ax.axis["bottom"].set_visible(False) 

    ax.grid(True)

    ax = ax.get_aux_axes(tr)

    t = np.linspace(0, np.pi)
    r = np.zeros_like(t) + ref
    ax.plot(t,r, 'k--', label='_')


    rs,ts = np.meshgrid(np.linspace(smin,smax),
                            np.linspace(0,np.pi))

    
    rms = np.sqrt(ref**2 + rs**2 - 2*ref*rs*np.cos(ts))
    CS =ax.contour(ts, rs,rms,cmap=cm.bone)
    plt.clabel(CS, inline=1, fontsize=10)
    

    ax.plot(np.arccos(0.9999),ref,'k',marker='*',ls='', ms=10)
    aux = range(1,len(corr))
    #del aux[ref]


    
    colors = plt.matplotlib.cm.jet(np.linspace(0,1,len(corr)))
    
    for i in aux:
        ax.plot(np.arccos(corr[i]), std[i],c=colors[i],alpha=0.7,marker='o',label="%s" %names[i])
        ax.text(np.arccos(corr[i]), std[i],"%s"%i)
    legend(bbox_to_anchor=(1.5, 1),prop=dict(size='large'),loc='best')
    plt.savefig('example.png', dpi=500)
    return
Ejemplo n.º 37
0
def process(t,matrix_name,normalization,order,iterations,exposant,gaussian_number,convolution_sigma):
    s = np.copy(t)
    mat = s
    if matrix_name != "raw":
    
        print "Normalizing with "+str(normalization)+" norm..."
        
        if normalization == "fragment-wise":
            floatorder = np.float64(order)
            s_norm_x = np.linalg.norm(s, ord=floatorder, axis=0)
            s_norm_y = np.linalg.norm(s, ord=floatorder, axis=1)
            s_norm = np.tensordot(s_norm_x,s_norm_y,axes=0)
            s[s_norm!=0] = s[s_norm!=0]/s_norm[s_norm!=0]
            print "Normalized "+str(normalization)+" with order "+str(order)
            
        elif normalization == "matrix-wise":
            
            floatorder = np.float64(order)
            s_norm = np.linalg.norm(s, ord=floatorder)
            s = s/s_norm
            print "Normalized "+str(normalization)+" with order "+str(order)
            
        elif normalization == "SCN":
            
            for iteration in range(1,iterations):
                sumrow = s.sum(axis=1)[:,None]
                sumcols = s.sum(axis=0)[None,:]
                s[sumrow!=0] = s[sumrow!=0]/sumrow[sumrow!=0]
                s[sumcols!=0] = s[sumcols!=0]/sumcols[sumcols!=0]
                print "Normalized "+str(iteration+1)+" time"+str("" if iteration <= 1 else "s")
            
            s = (s+s.T)/2
        
        elif normalization == "mirnylib":
            
            s_mirny = ntls.iterativeCorrection(s, iterations)[0]
            s = s_mirny
            print "Normalized "+str(iterations)+" time"+str("" if iterations <= 1 else "s")

        elif normalization == "sparsity":
            M = s.sum()
            sums = s.sum(axis=0)
            C = [[sums[i]*sums[j] for i in range(len(sums))] for j in range(len(sums))]/M
            s_coverage = s
            s_coverage[C!=0] /= C[C!=0]
            s = s_coverage
            
            print "Normalized for "+str(normalization)
            
        else:
            print "Error in normalization, using matrix-wise by default"
            s_norm = np.linalg.norm(s)
            s /= s_norm
        
        #Apply log or power
        try:
            s_exp = s**exposant
            s = s_exp
            print "Applied "+str(exposant)+" power to matrix"
        except ValueError:
            if exposant in ["log10", "log", "ln10"]:
                s = log10(s.astype(float))
                print "Applied base-10 logarithm to matrix"
            elif exposant in ["ln", "logarithm", "logarithme"]:
                s = log(s.astype(float))
                print "Applied natural logarithm to matrix"
            elif exposant in ["ln2", "log2"]:
                s = log2(s.astype(float))
                print "Applied base-2 logarithm to matrix"
            else:
                print "Warning, no valid normalization function encounter, ignoring"
        
        if matrix_name != "normalized":
            
            if "correlation" in matrix_name:
                s_corr = corrcoef(s)
                s_corr[s_corr<0] = 0
                s = s_corr
                print "Applied correlation function"
            
            if matrix_name != "correlation":
                
                if not "convolution" in matrix_name:
                    print "Error in matrix mode, using raw by default"
                    s = mat
                    
                else:
                    print "Convoluting..."
                    for i in range(0,gaussian_number):
                        s_gauss = ndimage.filters.gaussian_filter(s,convolution_sigma)
                        s = s_gauss
                        print "Convoluted "+str(i+1)+" time"+str("" if i+1 <= 1 else "s")
    return s
Ejemplo n.º 38
0
def corr(a,b):
    phi0 = a.matrix.flatten()
    phi1 = b.matrix.flatten()
    return ma.corrcoef(phi0,phi1)
Ejemplo n.º 39
0
                # Could also do each year's pattern corr rather than cumulative mean DONE
                # OR, use the ens mean pattern as the pattern to compare against -- not useful I think,
                #        for what we want to know: is the pattern of response for each ensemble member random
                #        or dependent on the boundary condition.
                # @@ Also, would be nice to have multiple variables in one plot and/or multiple simulations

                if pattcorryr:
                    # yearly anomaly pattern corr w/ the time mean pattern
                    tmp = fldpseazm[yr, lat > corrlim, ...] - fldcseazmtm[lat > corrlim, ...]
                else:
                    # time-integrated anomaly pattern corr w/ the end anomaly pattern
                    tmp = np.mean(fldpseazm[0:yr, lat > corrlim, ...], axis=0) - fldcseazmtm[lat > corrlim, ...]

                tmpmean = fldpseazmtm[lat > corrlim, ...] - fldcseazmtm[lat > corrlim, ...]  # the end pattern

                tmpcorr = ma.corrcoef(tmp.flatten() * weights.flatten(), tmpmean.flatten() * weights.flatten())
                plotd[yr] = tmpcorr[0, 1]
                testd[yr] = cutl.pattcorr(
                    tmp.flatten() * weights.flatten(), tmpmean.flatten() * weights.flatten()
                )  # @@ same result as built-in method

                """ from canam4sims_analens.py. modify for here
                ensmem = fldpdict[sim][moidx,lat>corrlim,...] - fldcdict[sim][moidx,lat>corrlim,...]
                obsbc = fldp2[moidx,lat>corrlim,...] - fldc2[moidx,lat>corrlim,...]

                # weight the fields by area
                areas = cutl.calc_cellareas(lat,lon)
                areas = areas[lat>corrlim,:]
                areas = ma.masked_where(lmask[lat>corrlim,:]==-1,areas)
                weights = areas / np.sum(np.sum(areas,axis=1),axis=0)
Ejemplo n.º 40
0
    def metric(self, sim, obs, time, obsbase = None): return corrcoef(sim, obs)[0, 1]

class VarRatio(Metrics):