예제 #1
0
    def __init__(self, window=float('inf'), mu_estimator=None, cov_estimator=None,
                 min_history=None, max_leverage=1., method='mpt', q=0.01, gamma=0., allow_cash=False, **kwargs):
        """
        :param window: Window for calculating mean and variance. Use float('inf') for entire history.
        :param mu_estimator: TODO
        :param cov_estimator: TODO
        :param min_history: Use zero weights for first min_periods.
        :param max_leverage: Max leverage to use.
        :param method: optimization objective - can be "mpt", "sharpe" and "variance"
        :param q: depends on method, e.g. for "mpt" it is risk aversion parameter (higher means lower aversion to risk)
        :param gamma: Penalize changing weights (can be number or Series with individual weights such as fees)
        :param allow_cash: Allow holding cash (weights doesn't have to sum to 1)
        """
        if np.isinf(window):
            window = int(1e+8)
            min_history = min_history or 50
        else:
            min_history = min_history or window

        super(MPT, self).__init__(min_history=min_history, **kwargs)
        self.window = window
        self.max_leverage = max_leverage
        self.method = method
        self.q = q
        self.gamma = gamma
        self.allow_cash = allow_cash

        if cov_estimator is None:
            cov_estimator = 'empirical'

        if isinstance(cov_estimator, basestring):
            if cov_estimator == 'empirical':
                # use pandas covariance in init_step
                cov_estimator = covariance.EmpiricalCovariance()
            elif cov_estimator == 'ledoit-wolf':
                cov_estimator = covariance.LedoitWolf()
            elif cov_estimator == 'graph-lasso':
                cov_estimator = covariance.GraphLasso()
            elif cov_estimator == 'oas':
                cov_estimator = covariance.OAS()
            else:
                raise NotImplemented('Unknown covariance estimator {}'.format(cov_estimator))

        # handle sklearn models
        if isinstance(cov_estimator, BaseEstimator):
            cov_estimator = CovarianceEstimator(cov_estimator)

        if mu_estimator is None:
            mu_estimator = MuEstimator()

        if isinstance(mu_estimator, basestring):
            if mu_estimator == 'historical':
                mu_estimator = HistoricalEstimator(window)
            elif mu_estimator == 'sharpe':
                mu_estimator = MuEstimator()
            else:
                raise NotImplemented('Unknown mu estimator {}'.format(mu_estimator))

        self.cov_estimator = cov_estimator
        self.mu_estimator = mu_estimator
예제 #2
0
def robust_mahalanobis_with_chi2(feat, prob_reject, ret_dist=False):
    '''Reject outliers using one-class classification based on the mahalanobis distance
    estimate from a robust covariance as calculated by minimum covariance determinant.
    
    :Parameters:
        
        feat : array
               2D array where each row is a feature and each column a factor
        prob_reject : float
                      Probability threshold for rejecting outliers
        extra : dict
                Unused keyword arguments
    
    :Returns:
        
        sel : array
              Boolean selection array for each feature
    '''

    feat -= numpy.median(
        feat, axis=0)  #feat.mean(axis=0)#scipy.stats.mstats.mode(feat, 0)[0]
    try:
        robust_cov = skcov.MinCovDet().fit(feat)
    except:
        robust_cov = skcov.EmpiricalCovariance().fit(feat)
    dist = robust_cov.mahalanobis(
        feat)  # - scipy.stats.mstats.mode(feat, 0)[0])
    cut = scipy.stats.chi2.ppf(prob_reject, feat.shape[1])
    sel = dist < cut
    return (sel, dist) if ret_dist else sel
예제 #3
0
 def __init__(self, dim, estimator='OAS', **kwargs):
     """
     TODO
     """
     super(SKGaussianParams, self).__init__(dim, **kwargs)
     if estimator == 'EmpiricalCovariance':
         self._estimator = covariance.EmpiricalCovariance(
             assume_centered=True)
     elif estimator == 'LedoitWolf':
         self._estimator = covariance.LedoitWolf(assume_centered=True)
     elif estimator == 'MinCovDet':
         self._estimator = covariance.MinCovDet(assume_centered=True)
     elif estimator == 'OAS':
         self._estimator = covariance.OAS(assume_centered=True)
     elif estimator == 'ShrunkCovariance':
         self._estimator = covariance.ShrunkCovariance(assume_centered=True)
     else:
         raise ValueError('Unknown estimator: {}'.format(estimator))
예제 #4
0
    def __init__(
        self,
        window=None,
        mu_estimator=None,
        cov_estimator=None,
        mu_window=None,
        cov_window=None,
        min_history=None,
        bounds=None,
        max_leverage=1.0,
        method="mpt",
        q=0.01,
        gamma=0.0,
        optimizer_options=None,
        force_weights=None,
        **kwargs,
    ):
        """
        :param window: Window for calculating mean and variance. Use None for entire history.
        :param mu_estimator: TODO
        :param cov_estimator: TODO
        :param min_history: Use zero weights for first min_periods. Default is 1 year
        :param max_leverage: Max leverage to use.
        :param method: optimization objective - can be "mpt", "sharpe" and "variance"
        :param q: depends on method, e.g. for "mpt" it is risk aversion parameter (higher means lower aversion to risk)
            from https://en.wikipedia.org/wiki/Modern_portfolio_theory#Efficient_frontier_with_no_risk-free_asset
            q=2 is equivalent to full-kelly, q=1 is equivalent to half kelly
        :param gamma: Penalize changing weights (can be number or Series with individual weights such as fees)
        """
        super().__init__(min_history=min_history, **kwargs)
        mu_window = mu_window or window
        cov_window = cov_window or window
        self.method = method
        self.q = q
        self.gamma = gamma
        self.bounds = bounds or {}
        self.force_weights = force_weights
        self.max_leverage = max_leverage
        self.optimizer_options = optimizer_options or {}

        if bounds and max_leverage != 1:
            raise NotImplemented(
                "max_leverage cannot be used with bounds, consider removing max_leverage and replace it with bounds1"
            )

        if cov_estimator is None:
            cov_estimator = "empirical"

        if isinstance(cov_estimator, string_types):
            if cov_estimator == "empirical":
                # use pandas covariance in init_step
                cov_estimator = covariance.EmpiricalCovariance()
            elif cov_estimator == "ledoit-wolf":
                cov_estimator = covariance.LedoitWolf()
            elif cov_estimator == "graph-lasso":
                cov_estimator = covariance.GraphLasso()
            elif cov_estimator == "oas":
                cov_estimator = covariance.OAS()
            elif cov_estimator == "single-index":
                cov_estimator = SingleIndexCovariance()
            else:
                raise NotImplemented(
                    "Unknown covariance estimator {}".format(cov_estimator)
                )

        # handle sklearn models
        if isinstance(cov_estimator, BaseEstimator):
            cov_estimator = CovarianceEstimator(cov_estimator, window=cov_window)

        if mu_estimator is None:
            mu_estimator = SharpeEstimator()

        if isinstance(mu_estimator, string_types):
            if mu_estimator == "historical":
                mu_estimator = HistoricalEstimator(window=mu_window)
            elif mu_estimator == "sharpe":
                mu_estimator = SharpeEstimator()
            else:
                raise NotImplemented("Unknown mu estimator {}".format(mu_estimator))

        self.cov_estimator = cov_estimator
        self.mu_estimator = mu_estimator
예제 #5
0
    def __init__(self,
                 mu_estimator=None,
                 cov_estimator=None,
                 cov_window=None,
                 min_history=None,
                 bounds=None,
                 max_leverage=1.,
                 method='mpt',
                 q=0.01,
                 gamma=0.,
                 optimizer_options=None,
                 force_weights=None,
                 **kwargs):
        """
        :param window: Window for calculating mean and variance. Use None for entire history.
        :param mu_estimator: TODO
        :param cov_estimator: TODO
        :param min_history: Use zero weights for first min_periods. Default is 1 year
        :param max_leverage: Max leverage to use.
        :param method: optimization objective - can be "mpt", "sharpe" and "variance"
        :param q: depends on method, e.g. for "mpt" it is risk aversion parameter (higher means lower aversion to risk)
        :param gamma: Penalize changing weights (can be number or Series with individual weights such as fees)
        """
        super().__init__(min_history=min_history, **kwargs)
        self.method = method
        self.q = q
        self.gamma = gamma
        self.bounds = bounds
        self.force_weights = force_weights
        self.max_leverage = max_leverage
        self.optimizer_options = optimizer_options or {}

        if cov_estimator is None:
            cov_estimator = 'empirical'

        if isinstance(cov_estimator, string_types):
            if cov_estimator == 'empirical':
                # use pandas covariance in init_step
                cov_estimator = covariance.EmpiricalCovariance()
            elif cov_estimator == 'ledoit-wolf':
                cov_estimator = covariance.LedoitWolf()
            elif cov_estimator == 'graph-lasso':
                cov_estimator = covariance.GraphLasso()
            elif cov_estimator == 'oas':
                cov_estimator = covariance.OAS()
            elif cov_estimator == 'single-index':
                cov_estimator = SingleIndexCovariance()
            else:
                raise NotImplemented(
                    'Unknown covariance estimator {}'.format(cov_estimator))

        # handle sklearn models
        if isinstance(cov_estimator, BaseEstimator):
            cov_estimator = CovarianceEstimator(cov_estimator,
                                                window=cov_window)

        if mu_estimator is None:
            mu_estimator = SharpeEstimator()

        if isinstance(mu_estimator, string_types):
            if mu_estimator == 'historical':
                mu_estimator = HistoricalEstimator(window=cov_window)
            elif mu_estimator == 'sharpe':
                mu_estimator = SharpeEstimator()
            else:
                raise NotImplemented(
                    'Unknown mu estimator {}'.format(mu_estimator))

        self.cov_estimator = cov_estimator
        self.mu_estimator = mu_estimator
예제 #6
0
    def __init__(self, data_root = '../Astrometric_Data/Gaia_DR2_subsamples/',
                        data_file_name = 'GaiaDR2_RC_sample_Mcut_0p0_0p75_Ccut_1p0_1p5Nstars_1333998.csv',
                        binning_type = 'linear', #linear #input
                        Rmin = 6000, Rmax = 10000, num_R_bins = 10,
                        phimin = -np.pi/4, phimax=np.pi/4, num_phi_bins = 3,
                        Zmin = -2000, Zmax = 2000, num_Z_bins = 10,
                        input_R_edges = None,
                        input_phi_edges = None,
                        input_Z_edges = None,
                        N_samplings = 100,
                        N_cores = 1,
                        solar_pomo_means = np.array([8200.,0.,20.8, 10.,248.,7.]),
                        solar_pomo_stds = np.array([100., 0., 0.3, 1., 3., 0.5]),
                        calculate_covariance = True,
                        positions_only = False,
                        velocities_to_zero = False
                        ):
        self.data_root = data_root
        self.data_file_name = data_file_name

        self.binning_type = binning_type
        self.Rmin = Rmin
        self.Rmax = Rmax
        self.num_R_bins = num_R_bins
        self.phimin = phimin
        self.phimax = phimax
        self.num_phi_bins = num_phi_bins
        self.Zmin = Zmin
        self.Zmax = Zmax
        self.num_Z_bins = num_Z_bins
        self.input_R_edges = input_R_edges
        self.input_phi_edges = input_phi_edges
        self.input_Z_edges = input_Z_edges
        self.N_samplings = N_samplings
        self.N_cores = N_cores
        self.calculate_covariance = calculate_covariance
        self.positions_only = positions_only
        self.velocities_to_zero = velocities_to_zero

        # Set Constants and Parameters
        deg_to_rad = np.pi/180
        mas_to_rad = (np.pi/6.48E8)
        maspyr_to_radps = np.pi/(6.48E8 * 31557600)

        # Solar Position and Motion model
        self.solar_pomo_means = solar_pomo_means
        self.solar_pomo_stds = solar_pomo_stds
        self.solar_pomo_covariances = np.identity(6) * self.solar_pomo_stds**2
        """
        Bland Hawthorn et al 2016 review
        R0 = 8200±100 pc
        Z0 = 25±5 pc
        Vgsun = 248±3 km/s, tangential velocity relative to Sgr A*
        Usun = 10.0±1 km/s, radial, positive towards the galactic center
        Vsun = 11.0±2 km/s, in direction of rotation
        Wsun = 7.0±0.5 km/s, vertical upwards positive

        Bennet & Bovy 2018
        Z0 = 20.8 ± 0.3 pc
        """
        # Open data file
        datab = pd.read_csv(self.data_root + self.data_file_name) #astrometric_data_table

        # Construct Means and Covarriance Matrices
        if self.positions_only:
            astrometric_means = np.array([datab['ra'].values * deg_to_rad, #rad
                                    datab['dec'].values * deg_to_rad, #rad
                                    datab['parallax'].values]).T #mas
        elif self.velocities_to_zero:
            astrometric_means = np.array([datab['ra'].values * deg_to_rad, #rad
                                    datab['dec'].values * deg_to_rad, #rad
                                    datab['parallax'].values, #mas
                                    0. * datab['pmra'].values,
                                    0. * datab['pmdec'].values,
                                    0. * datab['radial_velocity'].values]).T #km/s
        else:
            astrometric_means = np.array([datab['ra'].values * deg_to_rad, #rad
                                    datab['dec'].values * deg_to_rad, #rad
                                    datab['parallax'].values, #mas
                                    datab['pmra'].values * maspyr_to_radps, #rad/s
                                    datab['pmdec'].values * maspyr_to_radps, #rad/s
                                    datab['radial_velocity'].values]).T #km/s

        Nstars = datab['ra'].values.shape[0]
        Nzeros = np.zeros(Nstars)

        if self.positions_only:
            astrometric_covariances = np.array([[(datab['ra_error'].values*mas_to_rad)**2,
                datab['ra_dec_corr'].values * datab['ra_error'].values * datab['dec_error'].values * mas_to_rad**2,
                datab['ra_parallax_corr'].values * datab['ra_error'].values * datab['parallax_error'].values * mas_to_rad],
                [Nzeros, (datab['dec_error'].values*mas_to_rad)**2,
                datab['dec_parallax_corr'].values * datab['dec_error'].values * datab['parallax_error'].values * mas_to_rad],
                [Nzeros, Nzeros, datab['parallax_error'].values**2]])
            astrometric_covariances = np.transpose(astrometric_covariances, (2,0,1)) #Rearrange
            astrometric_covariances = np.array([astrometric_covariances[ii] + astrometric_covariances[ii].T - \
                                            np.diagonal(astrometric_covariances[ii])*np.identity(3) \
                                            for ii in range(Nstars)]) #Symmetrize
        else:
            astrometric_covariances = np.array([[(datab['ra_error'].values*mas_to_rad)**2,
                datab['ra_dec_corr'].values * datab['ra_error'].values * datab['dec_error'].values * mas_to_rad**2,
                datab['ra_parallax_corr'].values * datab['ra_error'].values * datab['parallax_error'].values * mas_to_rad,
                datab['ra_pmra_corr'].values * datab['ra_error'].values * datab['pmra_error'].values * mas_to_rad * maspyr_to_radps,
                datab['ra_pmdec_corr'].values * datab['ra_error'].values * datab['pmdec_error'].values * mas_to_rad * maspyr_to_radps,
                Nzeros],
                [Nzeros, (datab['dec_error'].values*mas_to_rad)**2,
                datab['dec_parallax_corr'].values * datab['dec_error'].values * datab['parallax_error'].values * mas_to_rad,
                datab['dec_pmra_corr'].values * datab['dec_error'].values * datab['pmra_error'].values * mas_to_rad * maspyr_to_radps,
                datab['dec_pmdec_corr'].values * datab['dec_error'].values * datab['pmdec_error'].values * mas_to_rad * maspyr_to_radps,
                Nzeros],
                [Nzeros, Nzeros, datab['parallax_error'].values**2,
                datab['parallax_pmra_corr'].values * datab['parallax_error'].values * datab['pmra_error'].values * maspyr_to_radps,
                datab['parallax_pmdec_corr'].values * datab['parallax_error'].values * datab['pmdec_error'].values * maspyr_to_radps,
                Nzeros],
                [Nzeros,Nzeros,Nzeros, (datab['pmra_error'].values * maspyr_to_radps)**2,
                datab['pmra_pmdec_corr'].values * datab['pmra_error'].values * datab['pmdec_error'].values * maspyr_to_radps**2,
                Nzeros],
                [Nzeros, Nzeros, Nzeros, Nzeros, (datab['pmdec_error'].values * maspyr_to_radps)**2, Nzeros],
                [Nzeros, Nzeros, Nzeros, Nzeros, Nzeros, datab['radial_velocity_error'].values**2]])

            astrometric_covariances = np.transpose(astrometric_covariances, (2,0,1)) #Rearrange
            astrometric_covariances = np.array([astrometric_covariances[ii] + astrometric_covariances[ii].T - \
                                        np.diagonal(astrometric_covariances[ii])*np.identity(6) \
                                        for ii in range(Nstars)]) #Symmetrize
        cholesky_astrometric_covariances = np.linalg.cholesky(astrometric_covariances)

        #Calculate epoch_T matrix
        epoch_T = calc_epoch_T('J2000')

        # Determine Binning
        if binning_type == 'input':
            self.R_edges = self.input_R_edges
            self.phi_edges = self.input_phi_edges
            self.Z_edges = self.input_Z_edges
            self.num_R_bins = len(self.input_R_edges)-1
            self.num_phi_bins = len(self.input_phi_edges)-1
            self.num_Z_bins = len(self.input_Z_edges)-1
        elif binning_type == 'linear':
            self.R_edges = np.linspace(self.Rmin, self.Rmax, self.num_R_bins+1)
            self.phi_edges = np.linspace(self.phimin, self.phimax, self.num_phi_bins+1)
            self.Z_edges = np.linspace(self.Zmin, self.Zmax, self.num_Z_bins+1)
        elif binning_type == 'quartile':
            galactocentric_means = astrometric_to_galactocentric(
                    astrometric_means[:,0], astrometric_means[:,1],
                    astrometric_means[:,2], Nzeros,
                    Nzeros, Nzeros,
                    self.solar_pomo_means[0], self.solar_pomo_means[1],
                    self.solar_pomo_means[2], 0.,0.,0.,
                    epoch_T)
            Rg_vec_means = galactocentric_means[0]
            phig_vec_means = galactocentric_means[1]
            Zg_vec_means = galactocentric_means[2]

            physt_hist = physt_h3([Rg_vec_means, phig_vec_means, Zg_vec_means], "quantile",
                        (self.num_R_bins+2,self.num_phi_bins+2, self.num_Z_bins+2))
            self.R_edges = physt_hist.numpy_bins[0][1:-1]
            self.phi_edges = physt_hist.numpy_bins[1][1:-1]
            self.Z_edges = physt_hist.numpy_bins[2][1:-1]

        # Calculate bin centers,edge mesh, and volumes
        self.R_bin_centers = (self.R_edges[1:] + self.R_edges[:-1])/2
        self.phi_bin_centers = (self.phi_edges[1:] + self.phi_edges[:-1])/2
        self.Z_bin_centers = (self.Z_edges[1:] + self.Z_edges[:-1])/2
        self.R_data_coords_mesh, self.phi_data_coords_mesh, self.Z_data_coords_mesh\
            = np.meshgrid(self.R_bin_centers, self.phi_bin_centers, self.Z_bin_centers, indexing='ij')

        self.R_edges_mesh, self.phi_edges_mesh, self.Z_edges_mesh \
            = np.meshgrid(self.R_edges, self.phi_edges, self.Z_edges, indexing='ij')


        self.bin_vol_grid= np.zeros([len(self.R_edges) - 1, len(self.phi_edges)-1, len(self.Z_edges)-1])
        for (rr,pp,zz), dummy in np.ndenumerate(self.bin_vol_grid):
            self.bin_vol_grid[rr,pp,zz] = 0.5 * abs(self.phi_edges[pp+1]-self.phi_edges[pp])\
                            * abs(self.R_edges[rr+1]**2 - self.R_edges[rr]**2)\
                            * abs(self.Z_edges[zz+1] - self.Z_edges[zz])

        # Build cache file name
        if not os.path.isdir(data_root + '/oscar_cache_files/'):
            os.mkdir(data_root + '/oscar_cache_files/')

        if self.positions_only:
            cache_file_name = 'oscar_cache_positions_only_' \
                + hashlib.md5(np.concatenate([self.R_edges,self.phi_edges,self.Z_edges])).hexdigest()\
                + hashlib.md5(np.concatenate([self.solar_pomo_means, self.solar_pomo_covariances.flatten()])).hexdigest()\
                + '_' + str(self.N_samplings)\
                + data_file_name.split('.')[0] + '.dat'
        elif self.velocities_to_zero:
            cache_file_name = 'oscar_cache_velocities_to_zero_' \
                + hashlib.md5(np.concatenate([self.R_edges,self.phi_edges,self.Z_edges])).hexdigest()\
                + hashlib.md5(np.concatenate([self.solar_pomo_means, self.solar_pomo_covariances.flatten()])).hexdigest()\
                + '_' + str(self.N_samplings)\
                + data_file_name.split('.')[0] + '.dat'
        else:
            cache_file_name = 'oscar_cache_' \
                + hashlib.md5(np.concatenate([self.R_edges,self.phi_edges,self.Z_edges])).hexdigest()\
                + hashlib.md5(np.concatenate([self.solar_pomo_means, self.solar_pomo_covariances.flatten()])).hexdigest()\
                + '_' + str(self.N_samplings)\
                + data_file_name.split('.')[0] + '.dat'

        # Search for cache file
        if os.path.isfile(data_root + '/oscar_cache_files/' + cache_file_name):
            print('Previous sampling found, pulling data from cache.')
            cache_dataframe = pd.read_pickle(data_root + '/oscar_cache_files/' + cache_file_name)

            self.data_mean = cache_dataframe['data_mean']
            self.data_cov = cache_dataframe['data_cov']
            self.data_corr = cache_dataframe['data_corr']
            self.data_std_total = cache_dataframe['data_std_total']
            self.data_mean_grids = cache_dataframe['data_mean_grids']
            self.data_var_from_cov = cache_dataframe['data_var_from_cov']
            self.data_var_avg_from_samples = cache_dataframe['data_var_avg_from_samples']
            self.data_std_total_grids = cache_dataframe['data_std_total_grids']
            self.skewness_stat_grids = cache_dataframe['skewness_stat_grids']
            self.skewness_pval_grids = cache_dataframe['skewness_pval_grids']
            self.kurtosis_stat_grids = cache_dataframe['kurtosis_stat_grids']
            self.kurtosis_pval_grids = cache_dataframe['kurtosis_pval_grids']
            self.gaussianity_stat_grids = cache_dataframe['gaussianity_stat_grids']
            self.gaussianity_pval_grids = cache_dataframe['gaussianity_pval_grids']
            self.R_data_coords_mesh = cache_dataframe['R_data_coords_mesh']
            self.phi_data_coords_mesh = cache_dataframe['phi_data_coords_mesh']
            self.Z_data_coords_mesh = cache_dataframe['Z_data_coords_mesh']
            self.R_edges_mesh = cache_dataframe['R_edges_mesh']
            self.phi_edges_mesh = cache_dataframe['phi_edges_mesh']
            self.Z_edges_mesh = cache_dataframe['Z_edges_mesh']
            self.counts_grid = cache_dataframe['counts_grid']
            self.nu_dat_grid = cache_dataframe['nu_dat_grid']
            self.vbar_R1_dat_grid = cache_dataframe['vbar_R1_dat_grid']
            self.vbar_p1_dat_grid = cache_dataframe['vbar_p1_dat_grid']
            self.vbar_T1_dat_grid = cache_dataframe['vbar_T1_dat_grid']
            self.vbar_Z1_dat_grid = cache_dataframe['vbar_Z1_dat_grid']
            self.vbar_RR_dat_grid = cache_dataframe['vbar_RR_dat_grid']
            self.vbar_pp_dat_grid = cache_dataframe['vbar_pp_dat_grid']
            self.vbar_TT_dat_grid = cache_dataframe['vbar_TT_dat_grid']
            self.vbar_ZZ_dat_grid = cache_dataframe['vbar_ZZ_dat_grid']
            self.vbar_Rp_dat_grid = cache_dataframe['vbar_Rp_dat_grid']
            self.vbar_RT_dat_grid = cache_dataframe['vbar_RT_dat_grid']
            self.vbar_RZ_dat_grid = cache_dataframe['vbar_RZ_dat_grid']
            self.vbar_pZ_dat_grid = cache_dataframe['vbar_pZ_dat_grid']
            self.vbar_TZ_dat_grid = cache_dataframe['vbar_TZ_dat_grid']
            self.counts_std_grid = cache_dataframe['counts_std_grid']
            self.nu_std_grid = cache_dataframe['nu_std_grid']
            self.vbar_R1_std_grid = cache_dataframe['vbar_R1_std_grid']
            self.vbar_p1_std_grid = cache_dataframe['vbar_p1_std_grid']
            self.vbar_T1_std_grid = cache_dataframe['vbar_T1_std_grid']
            self.vbar_Z1_std_grid = cache_dataframe['vbar_Z1_std_grid']
            self.vbar_RR_std_grid = cache_dataframe['vbar_RR_std_grid']
            self.vbar_pp_std_grid = cache_dataframe['vbar_pp_std_grid']
            self.vbar_TT_std_grid = cache_dataframe['vbar_TT_std_grid']
            self.vbar_ZZ_std_grid = cache_dataframe['vbar_ZZ_std_grid']
            self.vbar_Rp_std_grid = cache_dataframe['vbar_Rp_std_grid']
            self.vbar_RT_std_grid = cache_dataframe['vbar_RT_std_grid']
            self.vbar_RZ_std_grid = cache_dataframe['vbar_RZ_std_grid']
            self.vbar_pZ_std_grid = cache_dataframe['vbar_pZ_std_grid']
            self.vbar_TZ_std_grid = cache_dataframe['vbar_TZ_std_grid']

            self.median_vertex_dev_vector = cache_dataframe['median_vertex_dev_vector']
            self.mean_vertex_dev_vector = cache_dataframe['mean_vertex_dev_vector']
            self.vertex_dev_3sig_lower = cache_dataframe['vertex_dev_3sig_lower']
            self.vertex_dev_2sig_lower = cache_dataframe['vertex_dev_2sig_lower']
            self.vertex_dev_1sig_lower = cache_dataframe['vertex_dev_1sig_lower']
            self.vertex_dev_1sig_upper = cache_dataframe['vertex_dev_1sig_upper']
            self.vertex_dev_2sig_upper = cache_dataframe['vertex_dev_2sig_upper']
            self.vertex_dev_3sig_upper = cache_dataframe['vertex_dev_3sig_upper']

        else:
            print('No previous sampling found, running from scratch')

            if N_cores == 1:
                #Linear Sample Transform Bin
                all_binned_data_vectors = []
                all_binned_std_vectors = []
                all_vertex_dev_vectors = []
                start = time.time()
                for jj in range(N_samplings):
                    print('Sample ', jj+1, ' of ', N_samplings)
                    binned_data_vector, binned_std_vector,\
                    vertex_deviation_vector = sample_transform_bin(
                                        astrometric_means, astrometric_covariances,
                                        cholesky_astrometric_covariances,
                                        self.solar_pomo_means, self.solar_pomo_covariances,
                                        epoch_T,jj,
                                        self.R_edges, self.phi_edges, self.Z_edges,
                                        positions_only = self.positions_only)
                    all_binned_data_vectors.append(binned_data_vector)
                    all_binned_std_vectors.append(binned_std_vector)
                    all_vertex_dev_vectors.append(vertex_deviation_vector)

                all_binned_data_vectors = np.array(all_binned_data_vectors)
                all_binned_std_vectors = np.array(all_binned_std_vectors)
                all_vertex_dev_vectors = np.array(all_vertex_dev_vectors)
                print('\nLinear Sampling, Transforming, Binning takes ', time.time()-start, ' s')
                print('Time per sample: ', (time.time()-start)/N_samplings, ' s\n')

            else:
                #Multiprocessor Pool
                print('Starting Parallel Sampling')
                start = time.time()
                pool = mp.Pool(processes=self.N_cores)
                results = [pool.apply_async(sample_transform_bin,
                                            (astrometric_means, astrometric_covariances,
                                            cholesky_astrometric_covariances,
                                            self.solar_pomo_means, self.solar_pomo_covariances,
                                            epoch_T, seed,
                                            self.R_edges, self.phi_edges, self.Z_edges),
                                            dict(positions_only = self.positions_only)) for seed in range(N_samplings)]

                output = [p.get() for p in results]
                all_binned_data_vectors = np.array([output[ii][0] for ii in range(N_samplings)])
                all_binned_std_vectors = np.array([output[ii][1] for ii in range(N_samplings)])
                all_vertex_dev_vectors = np.array([output[ii][2] for ii in range(N_samplings)])
                end = time.time()
                print('Parallel Sampling, Transforming, Binning takes ', end-start, ' s')
                print('Wall time per sample: ', (end-start)/N_samplings)

            #Calculate means and covariances, Skewness, Kurtosis
            if self.positions_only:
                grid_shape = (1, len(self.R_edges)-1, len(self.phi_edges)-1, len(self.Z_edges)-1)
            else:
                grid_shape = (14, len(self.R_edges)-1, len(self.phi_edges)-1, len(self.Z_edges)-1)
            subvector_length = (len(self.R_edges)-1)*(len(self.phi_edges)-1)*(len(self.Z_edges)-1)

            plot_sample_hist(all_binned_data_vectors, grid_shape, subvector_length,
                                    number_of_samples = 10)

            self.data_mean = np.mean(all_binned_data_vectors, axis=0)
            self.data_median = np.median(all_binned_data_vectors, axis=0)

            self.std_mean = np.mean(all_binned_std_vectors, axis=0)
            self.std_median = np.median(all_binned_std_vectors, axis=0)

            if self.calculate_covariance:
                covariance_fit = sklcov.EmpiricalCovariance().fit(all_binned_data_vectors)
                self.data_cov = covariance_fit.covariance_
                self.data_var_from_cov = np.diag(self.data_cov)
                data_sigma_inv = 1/np.sqrt(np.diag(self.data_cov))
                data_sigma_inv = data_sigma_inv.reshape(len(data_sigma_inv), 1)
                self.data_corr = np.dot(data_sigma_inv, data_sigma_inv.T) * self.data_cov
            else:
                self.data_cov = np.zeros(1)
                self.data_var_from_cov = np.var(all_binned_data_vectors, axis=0)
                self.data_corr = np.zeros(1)

            #Combine the mean sample variances with variances from the covariance fit
            #   (eg the variance between the means).
            counts_subvectors = all_binned_data_vectors[:,0:subvector_length]
            if positions_only:
                counts_repeated = np.hstack([counts_subvectors]*1)
            else:
                counts_repeated = np.hstack([counts_subvectors]*14)

            self.data_var_avg_from_samples = np.sum(counts_repeated * \
                (np.nan_to_num(all_binned_std_vectors)**2),axis=0)/np.sum(counts_repeated,axis=0)
            self.data_std_total = np.sqrt(self.data_var_from_cov + self.data_var_avg_from_samples)

            #BODGE TEST CODE 5 JUNE 2019
            #Standard dev on the means
            self.data_std_total = np.sqrt(self.data_var_from_cov)
            #Standard error on the means plue mean error from each sample
            # self.data_std_total = np.sqrt(self.data_var_from_cov/N_samplings
            #                                 + self.data_var_avg_from_samples)

            #Gaussianity test using D’Agostino and Pearson’s tests
            self.skewness_stat, self.skewness_pval = stats.skewtest(all_binned_data_vectors)
            self.kurtosis_stat, self.kurtosis_pval = stats.kurtosistest(all_binned_data_vectors)
            self.gaussianity_stat, self.gaussianity_pval = stats.normaltest(all_binned_data_vectors)

            # Reshape
            self.data_mean_grids = self.data_mean.reshape(grid_shape)
            self.data_std_total_grids = self.data_std_total.reshape(grid_shape)
            self.skewness_stat_grids = self.skewness_stat.reshape(grid_shape)
            self.skewness_pval_grids = self.skewness_pval.reshape(grid_shape)
            self.kurtosis_stat_grids = self.kurtosis_stat.reshape(grid_shape)
            self.kurtosis_pval_grids = self.kurtosis_pval.reshape(grid_shape)
            self.gaussianity_stat_grids = self.gaussianity_stat.reshape(grid_shape)
            self.gaussianity_pval_grids = self.gaussianity_pval.reshape(grid_shape)

            # Pull out means and errors
            if positions_only:
                self.counts_grid = self.data_mean_grids[0]
                self.counts_std_grid = self.data_std_total_grids[0]

                self.vbar_R1_dat_grid = self.vbar_p1_dat_grid = \
                self.vbar_T1_dat_grid = self.vbar_Z1_dat_grid = \
                self.vbar_RR_dat_grid = self.vbar_pp_dat_grid = \
                self.vbar_TT_dat_grid = self.vbar_ZZ_dat_grid = \
                self.vbar_Rp_dat_grid = self.vbar_RT_dat_grid = \
                self.vbar_RZ_dat_grid = self.vbar_pZ_dat_grid = \
                self.vbar_TZ_dat_grid = \
                self.vbar_R1_std_grid = self.vbar_p1_std_grid = \
                self.vbar_T1_std_grid = self.vbar_Z1_std_grid = \
                self.vbar_RR_std_grid = self.vbar_pp_std_grid = \
                self.vbar_TT_std_grid = self.vbar_ZZ_std_grid = \
                self.vbar_Rp_std_grid = self.vbar_RT_std_grid = \
                self.vbar_RZ_std_grid = self.vbar_pZ_std_grid = \
                self.vbar_TZ_std_grid = 0.*self.counts_grid

            else:
                self.counts_grid,\
                self.vbar_R1_dat_grid, self.vbar_p1_dat_grid,\
                self.vbar_T1_dat_grid, self.vbar_Z1_dat_grid,\
                self.vbar_RR_dat_grid, self.vbar_pp_dat_grid,\
                self.vbar_TT_dat_grid, self.vbar_ZZ_dat_grid,\
                self.vbar_Rp_dat_grid, self.vbar_RT_dat_grid,\
                self.vbar_RZ_dat_grid, self.vbar_pZ_dat_grid,\
                self.vbar_TZ_dat_grid = self.data_mean_grids

                self.counts_std_grid,\
                self.vbar_R1_std_grid, self.vbar_p1_std_grid,\
                self.vbar_T1_std_grid, self.vbar_Z1_std_grid,\
                self.vbar_RR_std_grid, self.vbar_pp_std_grid,\
                self.vbar_TT_std_grid, self.vbar_ZZ_std_grid,\
                self.vbar_Rp_std_grid, self.vbar_RT_std_grid,\
                self.vbar_RZ_std_grid, self.vbar_pZ_std_grid,\
                self.vbar_TZ_std_grid = self.data_std_total_grids

            # Calculate tracer density
            self.nu_dat_grid = self.counts_grid/self.bin_vol_grid
            self.nu_std_grid = self.counts_std_grid/self.bin_vol_grid

            # Process Vertex Deviation
            all_vertex_dev_vectors = np.ma.masked_where(np.isnan(all_vertex_dev_vectors), all_vertex_dev_vectors)

            self.median_vertex_dev_vector = np.median(all_vertex_dev_vectors, axis=0).reshape(grid_shape[1:])
            self.mean_vertex_dev_vector = np.mean(all_vertex_dev_vectors, axis=0).reshape(grid_shape[1:])

            self.vertex_dev_3sig_lower = np.percentile(all_vertex_dev_vectors, 100*0.0015, axis=0).reshape(grid_shape[1:])
            self.vertex_dev_2sig_lower = np.percentile(all_vertex_dev_vectors, 100*0.0225, axis=0).reshape(grid_shape[1:])
            self.vertex_dev_1sig_lower = np.percentile(all_vertex_dev_vectors, 100*0.158, axis=0).reshape(grid_shape[1:])
            self.vertex_dev_1sig_upper = np.percentile(all_vertex_dev_vectors, 100*0.8415, axis=0).reshape(grid_shape[1:])
            self.vertex_dev_2sig_upper = np.percentile(all_vertex_dev_vectors, 100*0.9775, axis=0).reshape(grid_shape[1:])
            self.vertex_dev_3sig_upper = np.percentile(all_vertex_dev_vectors, 100*0.9985, axis=0).reshape(grid_shape[1:])

            # Build dictionary then save to dataframe
            dictionary = {'data_mean' : self.data_mean,
                            'data_cov': self.data_cov,
                            'data_corr' : self.data_corr,
                            'data_var_from_cov' : self.data_var_from_cov,
                            'data_var_avg_from_samples' : self.data_var_avg_from_samples,
                            'data_std_total' : self.data_std_total,
                            'data_mean_grids' : self.data_mean_grids,
                            'data_std_total_grids': self.data_std_total_grids,
                            'skewness_stat_grids' : self.skewness_stat_grids,
                            'skewness_pval_grids' : self.skewness_pval_grids,
                            'kurtosis_stat_grids' : self.kurtosis_stat_grids,
                            'kurtosis_pval_grids' : self.kurtosis_pval_grids,
                            'gaussianity_stat_grids' : self.gaussianity_stat_grids,
                            'gaussianity_pval_grids' : self.gaussianity_pval_grids,
                            'R_data_coords_mesh' : self.R_data_coords_mesh,
                            'phi_data_coords_mesh' : self.phi_data_coords_mesh,
                            'Z_data_coords_mesh' : self.Z_data_coords_mesh,
                            'R_edges_mesh' : self.R_edges_mesh,
                            'phi_edges_mesh' : self.phi_edges_mesh,
                            'Z_edges_mesh' : self.Z_edges_mesh,
                            'counts_grid' : self.counts_grid,
                            'nu_dat_grid' : self.nu_dat_grid,
                            'vbar_R1_dat_grid' : self.vbar_R1_dat_grid,
                            'vbar_p1_dat_grid' : self.vbar_p1_dat_grid,
                            'vbar_T1_dat_grid' : self.vbar_T1_dat_grid,
                            'vbar_Z1_dat_grid' : self.vbar_Z1_dat_grid,
                            'vbar_RR_dat_grid' : self.vbar_RR_dat_grid,
                            'vbar_pp_dat_grid' : self.vbar_pp_dat_grid,
                            'vbar_TT_dat_grid' : self.vbar_TT_dat_grid,
                            'vbar_ZZ_dat_grid' : self.vbar_ZZ_dat_grid,
                            'vbar_Rp_dat_grid' : self.vbar_Rp_dat_grid,
                            'vbar_RT_dat_grid' : self.vbar_RT_dat_grid,
                            'vbar_RZ_dat_grid' : self.vbar_RZ_dat_grid,
                            'vbar_pZ_dat_grid' : self.vbar_pZ_dat_grid,
                            'vbar_TZ_dat_grid' : self.vbar_TZ_dat_grid,
                            'counts_std_grid' : self.counts_std_grid,
                            'nu_std_grid' : self.nu_std_grid,
                            'vbar_R1_std_grid' : self.vbar_R1_std_grid,
                            'vbar_p1_std_grid' : self.vbar_p1_std_grid,
                            'vbar_T1_std_grid' : self.vbar_T1_std_grid,
                            'vbar_Z1_std_grid' : self.vbar_Z1_std_grid,
                            'vbar_RR_std_grid' : self.vbar_RR_std_grid,
                            'vbar_pp_std_grid' : self.vbar_pp_std_grid,
                            'vbar_TT_std_grid' : self.vbar_TT_std_grid,
                            'vbar_ZZ_std_grid' : self.vbar_ZZ_std_grid,
                            'vbar_Rp_std_grid' : self.vbar_Rp_std_grid,
                            'vbar_RT_std_grid' : self.vbar_RT_std_grid,
                            'vbar_RZ_std_grid' : self.vbar_RZ_std_grid,
                            'vbar_pZ_std_grid' : self.vbar_pZ_std_grid,
                            'vbar_TZ_std_grid' : self.vbar_TZ_std_grid,
                            'median_vertex_dev_vector' : self.median_vertex_dev_vector,
                            'mean_vertex_dev_vector' : self.mean_vertex_dev_vector,
                            'vertex_dev_3sig_lower' : self.vertex_dev_3sig_lower ,
                            'vertex_dev_2sig_lower' : self.vertex_dev_2sig_lower ,
                            'vertex_dev_1sig_lower' : self.vertex_dev_1sig_lower ,
                            'vertex_dev_1sig_upper' : self.vertex_dev_1sig_upper,
                            'vertex_dev_2sig_upper' : self.vertex_dev_2sig_upper,
                            'vertex_dev_3sig_upper' : self.vertex_dev_3sig_upper
                            }

            cache_dataframe = pd.Series(dictionary)
            cache_dataframe.to_pickle(data_root + '/oscar_cache_files/' + cache_file_name)