Esempio n. 1
0
    def _calc_mean(self, data=None):

        if data is None:
            data = self._data

        if self._min_cov_det:
            lg.details("Use MCD for expectation value estimation")
            mcd = MCD(support_fraction=self._mcd_supp_frac).fit(
                self._data.transpose())
            ydata = mcd.location_
        else:
            ydata = np.mean(data, axis=1)

        return ydata
Esempio n. 2
0
    def _calc_cov(self, data=None):

        if data is None:
            data = self._data
        if self._min_cov_det:
            lg.details("Use MCD for covariance estimation")
            mcd = MCD(support_fraction=self._mcd_supp_frac).fit(
                self._data.transpose())
            cov = mcd.covariance_
        else:
            cov = calc_cov(data)

        if not self._sample_data:
            cov /= self._nconfs  # For fit we have to normalize like an error
        return cov
Esempio n. 3
0
    def _calc_cov_and_mean(self, data=None):

        if data is None:
            data = self._data
        if self._min_cov_det:
            ind = self._xdata < self.xmax()
            mcd = MCD(support_fraction=self._mcd_supp_frac).fit(
                self._data.transpose())
            ydata = mcd.location_
            lg.details("Use MCD for covariance estimation")
            cov = mcd.covariance_
        else:
            ydata = std_mean(self._data, axis=1)
            cov = calc_cov(data)

        if not self._sample_data:
            cov /= self._nconfs  # For fit we have to normalize like an error
        edata = np.sqrt(np.diag(cov))
        return ydata, edata, cov
Esempio n. 4
0
    return corr

if has_sklearn:
    from sklearn.covariance import LedoitWolf, OAS, MCD

    lw = LedoitWolf(store_precision=False)
    lw.fit(rr, assume_centered=False)
    cov_lw = lw.covariance_
    corr_lw = cov2corr(cov_lw)

    oas = OAS(store_precision=False)
    oas.fit(rr, assume_centered=False)
    cov_oas = oas.covariance_
    corr_oas = cov2corr(cov_oas)

    mcd = MCD()#.fit(rr, reweight=None)
    mcd.fit(rr, assume_centered=False)
    cov_mcd = mcd.covariance_
    corr_mcd = cov2corr(cov_mcd)

    titles = ['raw correlation', 'lw', 'oas', 'mcd']
    normcolor = None
    fig = plt.figure()
    for i, c in enumerate([rrcorr, corr_lw, corr_oas, corr_mcd]):
    #for i, c in enumerate([np.cov(rr, rowvar=0), cov_lw, cov_oas, cov_mcd]):
        ax = fig.add_subplot(2,2,i+1)
        plot_corr(c, xnames=None, title=titles[i],
              normcolor=normcolor, ax=ax)

    images = [c for ax in fig.axes for c in ax.get_children() if isinstance(c, mpl.image.AxesImage)]
    fig. subplots_adjust(bottom=0.1, right=0.9, top=0.9)
Esempio n. 5
0

if has_sklearn:
    from sklearn.covariance import LedoitWolf, OAS, MCD

    lw = LedoitWolf(store_precision=False)
    lw.fit(rr, assume_centered=False)
    cov_lw = lw.covariance_
    corr_lw = cov2corr(cov_lw)

    oas = OAS(store_precision=False)
    oas.fit(rr, assume_centered=False)
    cov_oas = oas.covariance_
    corr_oas = cov2corr(cov_oas)

    mcd = MCD()  #.fit(rr, reweight=None)
    mcd.fit(rr, assume_centered=False)
    cov_mcd = mcd.covariance_
    corr_mcd = cov2corr(cov_mcd)

    titles = ['raw correlation', 'lw', 'oas', 'mcd']
    normcolor = None
    fig = plt.figure()
    for i, c in enumerate([rrcorr, corr_lw, corr_oas, corr_mcd]):
        #for i, c in enumerate([np.cov(rr, rowvar=0), cov_lw, cov_oas, cov_mcd]):
        ax = fig.add_subplot(2, 2, i + 1)
        plot_corr(c, xnames=None, title=titles[i], normcolor=normcolor, ax=ax)

    images = [
        c for ax in fig.axes for c in ax.get_children()
        if isinstance(c, mpl.image.AxesImage)
def RobustMD_flsification(d_var, d_obs, plt_OrNot, Q_quantile):
    '''
    This function falsifies the prior using Robust Mahalanobis Distance RMD.  
    d_var: the data variable, (nXp)
    d_obs: the data observation variable, (1xp)
    plt_OrNot: True or False, to create the distribution plot of the calculated RMDs. 
    Q_quantile:the Q_quantile of the RMD distribution, 95 or 97.5 is suggested
    example: MD_flsification(d_pri, d_obs, True, 95) will produce the RMD_obs, RMD_pri, RMD_Q95, and plot them. 
    '''

    mcd = MCD(random_state=0).fit(d_var)
    new_obs = d_obs - mcd.location_
    md_obs = np.sqrt(
        new_obs.dot(np.linalg.inv(mcd.covariance_)).dot(new_obs.T))
    print('Robust Mahalanobis Distance of d_obs = ',
          md_obs[0, 0].round(decimals=3))
    md_samples = []
    for i in range(len(d_var)):
        sample = d_var[i:i + 1, :] - mcd.location_
        md_samp = np.sqrt(
            sample.dot(np.linalg.inv(mcd.covariance_)).dot(sample.T))[0, 0]
        md_samples.append(md_samp)
    md_samples = np.asarray(md_samples)
    print(str(Q_quantile)+'th Quantile of Robust Mahalanobis Distance is', \
          stats.scoreatpercentile(md_samples, Q_quantile).round(decimals=3))

    if plt_OrNot == True:
        plt.figure(figsize=(6, 5))
        plt.scatter(np.arange(1, (len(d_var) + 1)),
                    md_samples,
                    c=abs(md_samples),
                    cmap='winter_r',
                    s=50,
                    vmax=md_samples.max(),
                    vmin=md_samples.min(),
                    linewidths=1,
                    edgecolor='k')
        plt.scatter([0],
                    md_obs,
                    c=md_obs,
                    cmap='winter_r',
                    marker='D',
                    s=110,
                    vmax=md_samples.max(),
                    vmin=md_samples.min(),
                    linewidths=3,
                    edgecolor='red')
        plt.ylabel('Robust Mahalanobis dist', fontsize=12)
        plt.xlabel('realization No.', fontsize=12)
        plt.xlim(-8, 259)
        plt.hlines(y=stats.scoreatpercentile(md_samples, Q_quantile),
                   xmin=-10,
                   xmax=259,
                   colors='red',
                   linewidths=2,
                   linestyles='--')
        cbar = plt.colorbar(fraction=0.035)
        cbar.ax.set_ylabel('RMD')
        plt.title(
            'Prior falsification using Robust Mahalanobis Distance outlier dectection',
            fontsize=18,
            loc='left',
            style='italic')

    return md_obs[0, 0].round(decimals=3), stats.scoreatpercentile(
        md_samples, Q_quantile).round(decimals=3)
n_samples = 125
n_outliers = 25
n_features = 2

# generate data
gen_cov = np.eye(n_features)
gen_cov[0, 0] = 2.
X = np.dot(np.random.randn(n_samples, n_features), gen_cov)
# add some outliers
outliers_cov = np.eye(n_features)
outliers_cov[np.arange(1, n_features), np.arange(1, n_features)] = 7.
X[-n_outliers:] = np.dot(np.random.randn(n_outliers, n_features), outliers_cov)

# fit a Minimum Covariance Determinant (MCD) robust estimator to data
robust_cov = MCD().fit(X, reweight=None)

# compare estimators learnt from the full data set with true parameters
emp_cov = EmpiricalCovariance().fit(X)


# Display results
fig = pl.figure()
# variables and parameters for cosmetic
offset_left = fig.subplotpars.left
offset_bottom = fig.subplotpars.bottom
width = fig.subplotpars.right - offset_left
subfig1 = pl.subplot(3, 1, 1)
subfig2 = pl.subplot(3, 1, 2)
subfig3 = pl.subplot(3, 1, 3)
# computation
for i, n_outliers in enumerate(range_n_outliers):
    for j in range(repeat):
        # generate data
        X = np.random.randn(n_samples, n_features)
        # add some outliers
        outliers_index = np.random.permutation(n_samples)[:n_outliers]
        outliers_offset = 10. * \
            (np.random.randint(2, size=(n_outliers, n_features)) - 0.5)
        X[outliers_index] += outliers_offset
        inliers_mask = np.ones(n_samples).astype(bool)
        inliers_mask[outliers_index] = False

        # fit a Minimum Covariance Determinant (MCD) robust estimator to data
        S = MCD().fit(X, reweight=None)
        # compare robust estimates with the true location and covariance
        err_loc_mcd[i, j] = np.sum(S.location_ ** 2)
        err_cov_mcd[i, j] = S.error_norm(np.eye(n_features))
        # fit a reweighted MCD robust estimator to data
        S = MCD().fit(X)
        # compare robust estimates with the true location and covariance
        err_loc_mcd_reweighted[i, j] = np.sum(S.location_ ** 2)
        err_cov_mcd_reweighted[i, j] = S.error_norm(np.eye(n_features))
        # compare estimators learnt from the full data set with true parameters
        err_loc_emp_full[i, j] = np.sum(X.mean(0) ** 2)
        err_cov_emp_full[i, j] = EmpiricalCovariance().fit(X).error_norm(
            np.eye(n_features))
        # compare with an empirical covariance learnt from a pure data set
        # (i.e. "perfect" MCD)
        pure_X = X[inliers_mask]