def __init__(self, window=float('inf'), mu_estimator=None, cov_estimator=None, min_history=None, max_leverage=1., method='mpt', q=0.01, gamma=0., allow_cash=False, **kwargs): """ :param window: Window for calculating mean and variance. Use float('inf') for entire history. :param mu_estimator: TODO :param cov_estimator: TODO :param min_history: Use zero weights for first min_periods. :param max_leverage: Max leverage to use. :param method: optimization objective - can be "mpt", "sharpe" and "variance" :param q: depends on method, e.g. for "mpt" it is risk aversion parameter (higher means lower aversion to risk) :param gamma: Penalize changing weights (can be number or Series with individual weights such as fees) :param allow_cash: Allow holding cash (weights doesn't have to sum to 1) """ if np.isinf(window): window = int(1e+8) min_history = min_history or 50 else: min_history = min_history or window super(MPT, self).__init__(min_history=min_history, **kwargs) self.window = window self.max_leverage = max_leverage self.method = method self.q = q self.gamma = gamma self.allow_cash = allow_cash if cov_estimator is None: cov_estimator = 'empirical' if isinstance(cov_estimator, basestring): if cov_estimator == 'empirical': # use pandas covariance in init_step cov_estimator = covariance.EmpiricalCovariance() elif cov_estimator == 'ledoit-wolf': cov_estimator = covariance.LedoitWolf() elif cov_estimator == 'graph-lasso': cov_estimator = covariance.GraphLasso() elif cov_estimator == 'oas': cov_estimator = covariance.OAS() else: raise NotImplemented('Unknown covariance estimator {}'.format(cov_estimator)) # handle sklearn models if isinstance(cov_estimator, BaseEstimator): cov_estimator = CovarianceEstimator(cov_estimator) if mu_estimator is None: mu_estimator = MuEstimator() if isinstance(mu_estimator, basestring): if mu_estimator == 'historical': mu_estimator = HistoricalEstimator(window) elif mu_estimator == 'sharpe': mu_estimator = MuEstimator() else: raise NotImplemented('Unknown mu estimator {}'.format(mu_estimator)) self.cov_estimator = cov_estimator self.mu_estimator = mu_estimator
def robust_mahalanobis_with_chi2(feat, prob_reject, ret_dist=False): '''Reject outliers using one-class classification based on the mahalanobis distance estimate from a robust covariance as calculated by minimum covariance determinant. :Parameters: feat : array 2D array where each row is a feature and each column a factor prob_reject : float Probability threshold for rejecting outliers extra : dict Unused keyword arguments :Returns: sel : array Boolean selection array for each feature ''' feat -= numpy.median( feat, axis=0) #feat.mean(axis=0)#scipy.stats.mstats.mode(feat, 0)[0] try: robust_cov = skcov.MinCovDet().fit(feat) except: robust_cov = skcov.EmpiricalCovariance().fit(feat) dist = robust_cov.mahalanobis( feat) # - scipy.stats.mstats.mode(feat, 0)[0]) cut = scipy.stats.chi2.ppf(prob_reject, feat.shape[1]) sel = dist < cut return (sel, dist) if ret_dist else sel
def __init__(self, dim, estimator='OAS', **kwargs): """ TODO """ super(SKGaussianParams, self).__init__(dim, **kwargs) if estimator == 'EmpiricalCovariance': self._estimator = covariance.EmpiricalCovariance( assume_centered=True) elif estimator == 'LedoitWolf': self._estimator = covariance.LedoitWolf(assume_centered=True) elif estimator == 'MinCovDet': self._estimator = covariance.MinCovDet(assume_centered=True) elif estimator == 'OAS': self._estimator = covariance.OAS(assume_centered=True) elif estimator == 'ShrunkCovariance': self._estimator = covariance.ShrunkCovariance(assume_centered=True) else: raise ValueError('Unknown estimator: {}'.format(estimator))
def __init__( self, window=None, mu_estimator=None, cov_estimator=None, mu_window=None, cov_window=None, min_history=None, bounds=None, max_leverage=1.0, method="mpt", q=0.01, gamma=0.0, optimizer_options=None, force_weights=None, **kwargs, ): """ :param window: Window for calculating mean and variance. Use None for entire history. :param mu_estimator: TODO :param cov_estimator: TODO :param min_history: Use zero weights for first min_periods. Default is 1 year :param max_leverage: Max leverage to use. :param method: optimization objective - can be "mpt", "sharpe" and "variance" :param q: depends on method, e.g. for "mpt" it is risk aversion parameter (higher means lower aversion to risk) from https://en.wikipedia.org/wiki/Modern_portfolio_theory#Efficient_frontier_with_no_risk-free_asset q=2 is equivalent to full-kelly, q=1 is equivalent to half kelly :param gamma: Penalize changing weights (can be number or Series with individual weights such as fees) """ super().__init__(min_history=min_history, **kwargs) mu_window = mu_window or window cov_window = cov_window or window self.method = method self.q = q self.gamma = gamma self.bounds = bounds or {} self.force_weights = force_weights self.max_leverage = max_leverage self.optimizer_options = optimizer_options or {} if bounds and max_leverage != 1: raise NotImplemented( "max_leverage cannot be used with bounds, consider removing max_leverage and replace it with bounds1" ) if cov_estimator is None: cov_estimator = "empirical" if isinstance(cov_estimator, string_types): if cov_estimator == "empirical": # use pandas covariance in init_step cov_estimator = covariance.EmpiricalCovariance() elif cov_estimator == "ledoit-wolf": cov_estimator = covariance.LedoitWolf() elif cov_estimator == "graph-lasso": cov_estimator = covariance.GraphLasso() elif cov_estimator == "oas": cov_estimator = covariance.OAS() elif cov_estimator == "single-index": cov_estimator = SingleIndexCovariance() else: raise NotImplemented( "Unknown covariance estimator {}".format(cov_estimator) ) # handle sklearn models if isinstance(cov_estimator, BaseEstimator): cov_estimator = CovarianceEstimator(cov_estimator, window=cov_window) if mu_estimator is None: mu_estimator = SharpeEstimator() if isinstance(mu_estimator, string_types): if mu_estimator == "historical": mu_estimator = HistoricalEstimator(window=mu_window) elif mu_estimator == "sharpe": mu_estimator = SharpeEstimator() else: raise NotImplemented("Unknown mu estimator {}".format(mu_estimator)) self.cov_estimator = cov_estimator self.mu_estimator = mu_estimator
def __init__(self, mu_estimator=None, cov_estimator=None, cov_window=None, min_history=None, bounds=None, max_leverage=1., method='mpt', q=0.01, gamma=0., optimizer_options=None, force_weights=None, **kwargs): """ :param window: Window for calculating mean and variance. Use None for entire history. :param mu_estimator: TODO :param cov_estimator: TODO :param min_history: Use zero weights for first min_periods. Default is 1 year :param max_leverage: Max leverage to use. :param method: optimization objective - can be "mpt", "sharpe" and "variance" :param q: depends on method, e.g. for "mpt" it is risk aversion parameter (higher means lower aversion to risk) :param gamma: Penalize changing weights (can be number or Series with individual weights such as fees) """ super().__init__(min_history=min_history, **kwargs) self.method = method self.q = q self.gamma = gamma self.bounds = bounds self.force_weights = force_weights self.max_leverage = max_leverage self.optimizer_options = optimizer_options or {} if cov_estimator is None: cov_estimator = 'empirical' if isinstance(cov_estimator, string_types): if cov_estimator == 'empirical': # use pandas covariance in init_step cov_estimator = covariance.EmpiricalCovariance() elif cov_estimator == 'ledoit-wolf': cov_estimator = covariance.LedoitWolf() elif cov_estimator == 'graph-lasso': cov_estimator = covariance.GraphLasso() elif cov_estimator == 'oas': cov_estimator = covariance.OAS() elif cov_estimator == 'single-index': cov_estimator = SingleIndexCovariance() else: raise NotImplemented( 'Unknown covariance estimator {}'.format(cov_estimator)) # handle sklearn models if isinstance(cov_estimator, BaseEstimator): cov_estimator = CovarianceEstimator(cov_estimator, window=cov_window) if mu_estimator is None: mu_estimator = SharpeEstimator() if isinstance(mu_estimator, string_types): if mu_estimator == 'historical': mu_estimator = HistoricalEstimator(window=cov_window) elif mu_estimator == 'sharpe': mu_estimator = SharpeEstimator() else: raise NotImplemented( 'Unknown mu estimator {}'.format(mu_estimator)) self.cov_estimator = cov_estimator self.mu_estimator = mu_estimator
def __init__(self, data_root = '../Astrometric_Data/Gaia_DR2_subsamples/', data_file_name = 'GaiaDR2_RC_sample_Mcut_0p0_0p75_Ccut_1p0_1p5Nstars_1333998.csv', binning_type = 'linear', #linear #input Rmin = 6000, Rmax = 10000, num_R_bins = 10, phimin = -np.pi/4, phimax=np.pi/4, num_phi_bins = 3, Zmin = -2000, Zmax = 2000, num_Z_bins = 10, input_R_edges = None, input_phi_edges = None, input_Z_edges = None, N_samplings = 100, N_cores = 1, solar_pomo_means = np.array([8200.,0.,20.8, 10.,248.,7.]), solar_pomo_stds = np.array([100., 0., 0.3, 1., 3., 0.5]), calculate_covariance = True, positions_only = False, velocities_to_zero = False ): self.data_root = data_root self.data_file_name = data_file_name self.binning_type = binning_type self.Rmin = Rmin self.Rmax = Rmax self.num_R_bins = num_R_bins self.phimin = phimin self.phimax = phimax self.num_phi_bins = num_phi_bins self.Zmin = Zmin self.Zmax = Zmax self.num_Z_bins = num_Z_bins self.input_R_edges = input_R_edges self.input_phi_edges = input_phi_edges self.input_Z_edges = input_Z_edges self.N_samplings = N_samplings self.N_cores = N_cores self.calculate_covariance = calculate_covariance self.positions_only = positions_only self.velocities_to_zero = velocities_to_zero # Set Constants and Parameters deg_to_rad = np.pi/180 mas_to_rad = (np.pi/6.48E8) maspyr_to_radps = np.pi/(6.48E8 * 31557600) # Solar Position and Motion model self.solar_pomo_means = solar_pomo_means self.solar_pomo_stds = solar_pomo_stds self.solar_pomo_covariances = np.identity(6) * self.solar_pomo_stds**2 """ Bland Hawthorn et al 2016 review R0 = 8200±100 pc Z0 = 25±5 pc Vgsun = 248±3 km/s, tangential velocity relative to Sgr A* Usun = 10.0±1 km/s, radial, positive towards the galactic center Vsun = 11.0±2 km/s, in direction of rotation Wsun = 7.0±0.5 km/s, vertical upwards positive Bennet & Bovy 2018 Z0 = 20.8 ± 0.3 pc """ # Open data file datab = pd.read_csv(self.data_root + self.data_file_name) #astrometric_data_table # Construct Means and Covarriance Matrices if self.positions_only: astrometric_means = np.array([datab['ra'].values * deg_to_rad, #rad datab['dec'].values * deg_to_rad, #rad datab['parallax'].values]).T #mas elif self.velocities_to_zero: astrometric_means = np.array([datab['ra'].values * deg_to_rad, #rad datab['dec'].values * deg_to_rad, #rad datab['parallax'].values, #mas 0. * datab['pmra'].values, 0. * datab['pmdec'].values, 0. * datab['radial_velocity'].values]).T #km/s else: astrometric_means = np.array([datab['ra'].values * deg_to_rad, #rad datab['dec'].values * deg_to_rad, #rad datab['parallax'].values, #mas datab['pmra'].values * maspyr_to_radps, #rad/s datab['pmdec'].values * maspyr_to_radps, #rad/s datab['radial_velocity'].values]).T #km/s Nstars = datab['ra'].values.shape[0] Nzeros = np.zeros(Nstars) if self.positions_only: astrometric_covariances = np.array([[(datab['ra_error'].values*mas_to_rad)**2, datab['ra_dec_corr'].values * datab['ra_error'].values * datab['dec_error'].values * mas_to_rad**2, datab['ra_parallax_corr'].values * datab['ra_error'].values * datab['parallax_error'].values * mas_to_rad], [Nzeros, (datab['dec_error'].values*mas_to_rad)**2, datab['dec_parallax_corr'].values * datab['dec_error'].values * datab['parallax_error'].values * mas_to_rad], [Nzeros, Nzeros, datab['parallax_error'].values**2]]) astrometric_covariances = np.transpose(astrometric_covariances, (2,0,1)) #Rearrange astrometric_covariances = np.array([astrometric_covariances[ii] + astrometric_covariances[ii].T - \ np.diagonal(astrometric_covariances[ii])*np.identity(3) \ for ii in range(Nstars)]) #Symmetrize else: astrometric_covariances = np.array([[(datab['ra_error'].values*mas_to_rad)**2, datab['ra_dec_corr'].values * datab['ra_error'].values * datab['dec_error'].values * mas_to_rad**2, datab['ra_parallax_corr'].values * datab['ra_error'].values * datab['parallax_error'].values * mas_to_rad, datab['ra_pmra_corr'].values * datab['ra_error'].values * datab['pmra_error'].values * mas_to_rad * maspyr_to_radps, datab['ra_pmdec_corr'].values * datab['ra_error'].values * datab['pmdec_error'].values * mas_to_rad * maspyr_to_radps, Nzeros], [Nzeros, (datab['dec_error'].values*mas_to_rad)**2, datab['dec_parallax_corr'].values * datab['dec_error'].values * datab['parallax_error'].values * mas_to_rad, datab['dec_pmra_corr'].values * datab['dec_error'].values * datab['pmra_error'].values * mas_to_rad * maspyr_to_radps, datab['dec_pmdec_corr'].values * datab['dec_error'].values * datab['pmdec_error'].values * mas_to_rad * maspyr_to_radps, Nzeros], [Nzeros, Nzeros, datab['parallax_error'].values**2, datab['parallax_pmra_corr'].values * datab['parallax_error'].values * datab['pmra_error'].values * maspyr_to_radps, datab['parallax_pmdec_corr'].values * datab['parallax_error'].values * datab['pmdec_error'].values * maspyr_to_radps, Nzeros], [Nzeros,Nzeros,Nzeros, (datab['pmra_error'].values * maspyr_to_radps)**2, datab['pmra_pmdec_corr'].values * datab['pmra_error'].values * datab['pmdec_error'].values * maspyr_to_radps**2, Nzeros], [Nzeros, Nzeros, Nzeros, Nzeros, (datab['pmdec_error'].values * maspyr_to_radps)**2, Nzeros], [Nzeros, Nzeros, Nzeros, Nzeros, Nzeros, datab['radial_velocity_error'].values**2]]) astrometric_covariances = np.transpose(astrometric_covariances, (2,0,1)) #Rearrange astrometric_covariances = np.array([astrometric_covariances[ii] + astrometric_covariances[ii].T - \ np.diagonal(astrometric_covariances[ii])*np.identity(6) \ for ii in range(Nstars)]) #Symmetrize cholesky_astrometric_covariances = np.linalg.cholesky(astrometric_covariances) #Calculate epoch_T matrix epoch_T = calc_epoch_T('J2000') # Determine Binning if binning_type == 'input': self.R_edges = self.input_R_edges self.phi_edges = self.input_phi_edges self.Z_edges = self.input_Z_edges self.num_R_bins = len(self.input_R_edges)-1 self.num_phi_bins = len(self.input_phi_edges)-1 self.num_Z_bins = len(self.input_Z_edges)-1 elif binning_type == 'linear': self.R_edges = np.linspace(self.Rmin, self.Rmax, self.num_R_bins+1) self.phi_edges = np.linspace(self.phimin, self.phimax, self.num_phi_bins+1) self.Z_edges = np.linspace(self.Zmin, self.Zmax, self.num_Z_bins+1) elif binning_type == 'quartile': galactocentric_means = astrometric_to_galactocentric( astrometric_means[:,0], astrometric_means[:,1], astrometric_means[:,2], Nzeros, Nzeros, Nzeros, self.solar_pomo_means[0], self.solar_pomo_means[1], self.solar_pomo_means[2], 0.,0.,0., epoch_T) Rg_vec_means = galactocentric_means[0] phig_vec_means = galactocentric_means[1] Zg_vec_means = galactocentric_means[2] physt_hist = physt_h3([Rg_vec_means, phig_vec_means, Zg_vec_means], "quantile", (self.num_R_bins+2,self.num_phi_bins+2, self.num_Z_bins+2)) self.R_edges = physt_hist.numpy_bins[0][1:-1] self.phi_edges = physt_hist.numpy_bins[1][1:-1] self.Z_edges = physt_hist.numpy_bins[2][1:-1] # Calculate bin centers,edge mesh, and volumes self.R_bin_centers = (self.R_edges[1:] + self.R_edges[:-1])/2 self.phi_bin_centers = (self.phi_edges[1:] + self.phi_edges[:-1])/2 self.Z_bin_centers = (self.Z_edges[1:] + self.Z_edges[:-1])/2 self.R_data_coords_mesh, self.phi_data_coords_mesh, self.Z_data_coords_mesh\ = np.meshgrid(self.R_bin_centers, self.phi_bin_centers, self.Z_bin_centers, indexing='ij') self.R_edges_mesh, self.phi_edges_mesh, self.Z_edges_mesh \ = np.meshgrid(self.R_edges, self.phi_edges, self.Z_edges, indexing='ij') self.bin_vol_grid= np.zeros([len(self.R_edges) - 1, len(self.phi_edges)-1, len(self.Z_edges)-1]) for (rr,pp,zz), dummy in np.ndenumerate(self.bin_vol_grid): self.bin_vol_grid[rr,pp,zz] = 0.5 * abs(self.phi_edges[pp+1]-self.phi_edges[pp])\ * abs(self.R_edges[rr+1]**2 - self.R_edges[rr]**2)\ * abs(self.Z_edges[zz+1] - self.Z_edges[zz]) # Build cache file name if not os.path.isdir(data_root + '/oscar_cache_files/'): os.mkdir(data_root + '/oscar_cache_files/') if self.positions_only: cache_file_name = 'oscar_cache_positions_only_' \ + hashlib.md5(np.concatenate([self.R_edges,self.phi_edges,self.Z_edges])).hexdigest()\ + hashlib.md5(np.concatenate([self.solar_pomo_means, self.solar_pomo_covariances.flatten()])).hexdigest()\ + '_' + str(self.N_samplings)\ + data_file_name.split('.')[0] + '.dat' elif self.velocities_to_zero: cache_file_name = 'oscar_cache_velocities_to_zero_' \ + hashlib.md5(np.concatenate([self.R_edges,self.phi_edges,self.Z_edges])).hexdigest()\ + hashlib.md5(np.concatenate([self.solar_pomo_means, self.solar_pomo_covariances.flatten()])).hexdigest()\ + '_' + str(self.N_samplings)\ + data_file_name.split('.')[0] + '.dat' else: cache_file_name = 'oscar_cache_' \ + hashlib.md5(np.concatenate([self.R_edges,self.phi_edges,self.Z_edges])).hexdigest()\ + hashlib.md5(np.concatenate([self.solar_pomo_means, self.solar_pomo_covariances.flatten()])).hexdigest()\ + '_' + str(self.N_samplings)\ + data_file_name.split('.')[0] + '.dat' # Search for cache file if os.path.isfile(data_root + '/oscar_cache_files/' + cache_file_name): print('Previous sampling found, pulling data from cache.') cache_dataframe = pd.read_pickle(data_root + '/oscar_cache_files/' + cache_file_name) self.data_mean = cache_dataframe['data_mean'] self.data_cov = cache_dataframe['data_cov'] self.data_corr = cache_dataframe['data_corr'] self.data_std_total = cache_dataframe['data_std_total'] self.data_mean_grids = cache_dataframe['data_mean_grids'] self.data_var_from_cov = cache_dataframe['data_var_from_cov'] self.data_var_avg_from_samples = cache_dataframe['data_var_avg_from_samples'] self.data_std_total_grids = cache_dataframe['data_std_total_grids'] self.skewness_stat_grids = cache_dataframe['skewness_stat_grids'] self.skewness_pval_grids = cache_dataframe['skewness_pval_grids'] self.kurtosis_stat_grids = cache_dataframe['kurtosis_stat_grids'] self.kurtosis_pval_grids = cache_dataframe['kurtosis_pval_grids'] self.gaussianity_stat_grids = cache_dataframe['gaussianity_stat_grids'] self.gaussianity_pval_grids = cache_dataframe['gaussianity_pval_grids'] self.R_data_coords_mesh = cache_dataframe['R_data_coords_mesh'] self.phi_data_coords_mesh = cache_dataframe['phi_data_coords_mesh'] self.Z_data_coords_mesh = cache_dataframe['Z_data_coords_mesh'] self.R_edges_mesh = cache_dataframe['R_edges_mesh'] self.phi_edges_mesh = cache_dataframe['phi_edges_mesh'] self.Z_edges_mesh = cache_dataframe['Z_edges_mesh'] self.counts_grid = cache_dataframe['counts_grid'] self.nu_dat_grid = cache_dataframe['nu_dat_grid'] self.vbar_R1_dat_grid = cache_dataframe['vbar_R1_dat_grid'] self.vbar_p1_dat_grid = cache_dataframe['vbar_p1_dat_grid'] self.vbar_T1_dat_grid = cache_dataframe['vbar_T1_dat_grid'] self.vbar_Z1_dat_grid = cache_dataframe['vbar_Z1_dat_grid'] self.vbar_RR_dat_grid = cache_dataframe['vbar_RR_dat_grid'] self.vbar_pp_dat_grid = cache_dataframe['vbar_pp_dat_grid'] self.vbar_TT_dat_grid = cache_dataframe['vbar_TT_dat_grid'] self.vbar_ZZ_dat_grid = cache_dataframe['vbar_ZZ_dat_grid'] self.vbar_Rp_dat_grid = cache_dataframe['vbar_Rp_dat_grid'] self.vbar_RT_dat_grid = cache_dataframe['vbar_RT_dat_grid'] self.vbar_RZ_dat_grid = cache_dataframe['vbar_RZ_dat_grid'] self.vbar_pZ_dat_grid = cache_dataframe['vbar_pZ_dat_grid'] self.vbar_TZ_dat_grid = cache_dataframe['vbar_TZ_dat_grid'] self.counts_std_grid = cache_dataframe['counts_std_grid'] self.nu_std_grid = cache_dataframe['nu_std_grid'] self.vbar_R1_std_grid = cache_dataframe['vbar_R1_std_grid'] self.vbar_p1_std_grid = cache_dataframe['vbar_p1_std_grid'] self.vbar_T1_std_grid = cache_dataframe['vbar_T1_std_grid'] self.vbar_Z1_std_grid = cache_dataframe['vbar_Z1_std_grid'] self.vbar_RR_std_grid = cache_dataframe['vbar_RR_std_grid'] self.vbar_pp_std_grid = cache_dataframe['vbar_pp_std_grid'] self.vbar_TT_std_grid = cache_dataframe['vbar_TT_std_grid'] self.vbar_ZZ_std_grid = cache_dataframe['vbar_ZZ_std_grid'] self.vbar_Rp_std_grid = cache_dataframe['vbar_Rp_std_grid'] self.vbar_RT_std_grid = cache_dataframe['vbar_RT_std_grid'] self.vbar_RZ_std_grid = cache_dataframe['vbar_RZ_std_grid'] self.vbar_pZ_std_grid = cache_dataframe['vbar_pZ_std_grid'] self.vbar_TZ_std_grid = cache_dataframe['vbar_TZ_std_grid'] self.median_vertex_dev_vector = cache_dataframe['median_vertex_dev_vector'] self.mean_vertex_dev_vector = cache_dataframe['mean_vertex_dev_vector'] self.vertex_dev_3sig_lower = cache_dataframe['vertex_dev_3sig_lower'] self.vertex_dev_2sig_lower = cache_dataframe['vertex_dev_2sig_lower'] self.vertex_dev_1sig_lower = cache_dataframe['vertex_dev_1sig_lower'] self.vertex_dev_1sig_upper = cache_dataframe['vertex_dev_1sig_upper'] self.vertex_dev_2sig_upper = cache_dataframe['vertex_dev_2sig_upper'] self.vertex_dev_3sig_upper = cache_dataframe['vertex_dev_3sig_upper'] else: print('No previous sampling found, running from scratch') if N_cores == 1: #Linear Sample Transform Bin all_binned_data_vectors = [] all_binned_std_vectors = [] all_vertex_dev_vectors = [] start = time.time() for jj in range(N_samplings): print('Sample ', jj+1, ' of ', N_samplings) binned_data_vector, binned_std_vector,\ vertex_deviation_vector = sample_transform_bin( astrometric_means, astrometric_covariances, cholesky_astrometric_covariances, self.solar_pomo_means, self.solar_pomo_covariances, epoch_T,jj, self.R_edges, self.phi_edges, self.Z_edges, positions_only = self.positions_only) all_binned_data_vectors.append(binned_data_vector) all_binned_std_vectors.append(binned_std_vector) all_vertex_dev_vectors.append(vertex_deviation_vector) all_binned_data_vectors = np.array(all_binned_data_vectors) all_binned_std_vectors = np.array(all_binned_std_vectors) all_vertex_dev_vectors = np.array(all_vertex_dev_vectors) print('\nLinear Sampling, Transforming, Binning takes ', time.time()-start, ' s') print('Time per sample: ', (time.time()-start)/N_samplings, ' s\n') else: #Multiprocessor Pool print('Starting Parallel Sampling') start = time.time() pool = mp.Pool(processes=self.N_cores) results = [pool.apply_async(sample_transform_bin, (astrometric_means, astrometric_covariances, cholesky_astrometric_covariances, self.solar_pomo_means, self.solar_pomo_covariances, epoch_T, seed, self.R_edges, self.phi_edges, self.Z_edges), dict(positions_only = self.positions_only)) for seed in range(N_samplings)] output = [p.get() for p in results] all_binned_data_vectors = np.array([output[ii][0] for ii in range(N_samplings)]) all_binned_std_vectors = np.array([output[ii][1] for ii in range(N_samplings)]) all_vertex_dev_vectors = np.array([output[ii][2] for ii in range(N_samplings)]) end = time.time() print('Parallel Sampling, Transforming, Binning takes ', end-start, ' s') print('Wall time per sample: ', (end-start)/N_samplings) #Calculate means and covariances, Skewness, Kurtosis if self.positions_only: grid_shape = (1, len(self.R_edges)-1, len(self.phi_edges)-1, len(self.Z_edges)-1) else: grid_shape = (14, len(self.R_edges)-1, len(self.phi_edges)-1, len(self.Z_edges)-1) subvector_length = (len(self.R_edges)-1)*(len(self.phi_edges)-1)*(len(self.Z_edges)-1) plot_sample_hist(all_binned_data_vectors, grid_shape, subvector_length, number_of_samples = 10) self.data_mean = np.mean(all_binned_data_vectors, axis=0) self.data_median = np.median(all_binned_data_vectors, axis=0) self.std_mean = np.mean(all_binned_std_vectors, axis=0) self.std_median = np.median(all_binned_std_vectors, axis=0) if self.calculate_covariance: covariance_fit = sklcov.EmpiricalCovariance().fit(all_binned_data_vectors) self.data_cov = covariance_fit.covariance_ self.data_var_from_cov = np.diag(self.data_cov) data_sigma_inv = 1/np.sqrt(np.diag(self.data_cov)) data_sigma_inv = data_sigma_inv.reshape(len(data_sigma_inv), 1) self.data_corr = np.dot(data_sigma_inv, data_sigma_inv.T) * self.data_cov else: self.data_cov = np.zeros(1) self.data_var_from_cov = np.var(all_binned_data_vectors, axis=0) self.data_corr = np.zeros(1) #Combine the mean sample variances with variances from the covariance fit # (eg the variance between the means). counts_subvectors = all_binned_data_vectors[:,0:subvector_length] if positions_only: counts_repeated = np.hstack([counts_subvectors]*1) else: counts_repeated = np.hstack([counts_subvectors]*14) self.data_var_avg_from_samples = np.sum(counts_repeated * \ (np.nan_to_num(all_binned_std_vectors)**2),axis=0)/np.sum(counts_repeated,axis=0) self.data_std_total = np.sqrt(self.data_var_from_cov + self.data_var_avg_from_samples) #BODGE TEST CODE 5 JUNE 2019 #Standard dev on the means self.data_std_total = np.sqrt(self.data_var_from_cov) #Standard error on the means plue mean error from each sample # self.data_std_total = np.sqrt(self.data_var_from_cov/N_samplings # + self.data_var_avg_from_samples) #Gaussianity test using D’Agostino and Pearson’s tests self.skewness_stat, self.skewness_pval = stats.skewtest(all_binned_data_vectors) self.kurtosis_stat, self.kurtosis_pval = stats.kurtosistest(all_binned_data_vectors) self.gaussianity_stat, self.gaussianity_pval = stats.normaltest(all_binned_data_vectors) # Reshape self.data_mean_grids = self.data_mean.reshape(grid_shape) self.data_std_total_grids = self.data_std_total.reshape(grid_shape) self.skewness_stat_grids = self.skewness_stat.reshape(grid_shape) self.skewness_pval_grids = self.skewness_pval.reshape(grid_shape) self.kurtosis_stat_grids = self.kurtosis_stat.reshape(grid_shape) self.kurtosis_pval_grids = self.kurtosis_pval.reshape(grid_shape) self.gaussianity_stat_grids = self.gaussianity_stat.reshape(grid_shape) self.gaussianity_pval_grids = self.gaussianity_pval.reshape(grid_shape) # Pull out means and errors if positions_only: self.counts_grid = self.data_mean_grids[0] self.counts_std_grid = self.data_std_total_grids[0] self.vbar_R1_dat_grid = self.vbar_p1_dat_grid = \ self.vbar_T1_dat_grid = self.vbar_Z1_dat_grid = \ self.vbar_RR_dat_grid = self.vbar_pp_dat_grid = \ self.vbar_TT_dat_grid = self.vbar_ZZ_dat_grid = \ self.vbar_Rp_dat_grid = self.vbar_RT_dat_grid = \ self.vbar_RZ_dat_grid = self.vbar_pZ_dat_grid = \ self.vbar_TZ_dat_grid = \ self.vbar_R1_std_grid = self.vbar_p1_std_grid = \ self.vbar_T1_std_grid = self.vbar_Z1_std_grid = \ self.vbar_RR_std_grid = self.vbar_pp_std_grid = \ self.vbar_TT_std_grid = self.vbar_ZZ_std_grid = \ self.vbar_Rp_std_grid = self.vbar_RT_std_grid = \ self.vbar_RZ_std_grid = self.vbar_pZ_std_grid = \ self.vbar_TZ_std_grid = 0.*self.counts_grid else: self.counts_grid,\ self.vbar_R1_dat_grid, self.vbar_p1_dat_grid,\ self.vbar_T1_dat_grid, self.vbar_Z1_dat_grid,\ self.vbar_RR_dat_grid, self.vbar_pp_dat_grid,\ self.vbar_TT_dat_grid, self.vbar_ZZ_dat_grid,\ self.vbar_Rp_dat_grid, self.vbar_RT_dat_grid,\ self.vbar_RZ_dat_grid, self.vbar_pZ_dat_grid,\ self.vbar_TZ_dat_grid = self.data_mean_grids self.counts_std_grid,\ self.vbar_R1_std_grid, self.vbar_p1_std_grid,\ self.vbar_T1_std_grid, self.vbar_Z1_std_grid,\ self.vbar_RR_std_grid, self.vbar_pp_std_grid,\ self.vbar_TT_std_grid, self.vbar_ZZ_std_grid,\ self.vbar_Rp_std_grid, self.vbar_RT_std_grid,\ self.vbar_RZ_std_grid, self.vbar_pZ_std_grid,\ self.vbar_TZ_std_grid = self.data_std_total_grids # Calculate tracer density self.nu_dat_grid = self.counts_grid/self.bin_vol_grid self.nu_std_grid = self.counts_std_grid/self.bin_vol_grid # Process Vertex Deviation all_vertex_dev_vectors = np.ma.masked_where(np.isnan(all_vertex_dev_vectors), all_vertex_dev_vectors) self.median_vertex_dev_vector = np.median(all_vertex_dev_vectors, axis=0).reshape(grid_shape[1:]) self.mean_vertex_dev_vector = np.mean(all_vertex_dev_vectors, axis=0).reshape(grid_shape[1:]) self.vertex_dev_3sig_lower = np.percentile(all_vertex_dev_vectors, 100*0.0015, axis=0).reshape(grid_shape[1:]) self.vertex_dev_2sig_lower = np.percentile(all_vertex_dev_vectors, 100*0.0225, axis=0).reshape(grid_shape[1:]) self.vertex_dev_1sig_lower = np.percentile(all_vertex_dev_vectors, 100*0.158, axis=0).reshape(grid_shape[1:]) self.vertex_dev_1sig_upper = np.percentile(all_vertex_dev_vectors, 100*0.8415, axis=0).reshape(grid_shape[1:]) self.vertex_dev_2sig_upper = np.percentile(all_vertex_dev_vectors, 100*0.9775, axis=0).reshape(grid_shape[1:]) self.vertex_dev_3sig_upper = np.percentile(all_vertex_dev_vectors, 100*0.9985, axis=0).reshape(grid_shape[1:]) # Build dictionary then save to dataframe dictionary = {'data_mean' : self.data_mean, 'data_cov': self.data_cov, 'data_corr' : self.data_corr, 'data_var_from_cov' : self.data_var_from_cov, 'data_var_avg_from_samples' : self.data_var_avg_from_samples, 'data_std_total' : self.data_std_total, 'data_mean_grids' : self.data_mean_grids, 'data_std_total_grids': self.data_std_total_grids, 'skewness_stat_grids' : self.skewness_stat_grids, 'skewness_pval_grids' : self.skewness_pval_grids, 'kurtosis_stat_grids' : self.kurtosis_stat_grids, 'kurtosis_pval_grids' : self.kurtosis_pval_grids, 'gaussianity_stat_grids' : self.gaussianity_stat_grids, 'gaussianity_pval_grids' : self.gaussianity_pval_grids, 'R_data_coords_mesh' : self.R_data_coords_mesh, 'phi_data_coords_mesh' : self.phi_data_coords_mesh, 'Z_data_coords_mesh' : self.Z_data_coords_mesh, 'R_edges_mesh' : self.R_edges_mesh, 'phi_edges_mesh' : self.phi_edges_mesh, 'Z_edges_mesh' : self.Z_edges_mesh, 'counts_grid' : self.counts_grid, 'nu_dat_grid' : self.nu_dat_grid, 'vbar_R1_dat_grid' : self.vbar_R1_dat_grid, 'vbar_p1_dat_grid' : self.vbar_p1_dat_grid, 'vbar_T1_dat_grid' : self.vbar_T1_dat_grid, 'vbar_Z1_dat_grid' : self.vbar_Z1_dat_grid, 'vbar_RR_dat_grid' : self.vbar_RR_dat_grid, 'vbar_pp_dat_grid' : self.vbar_pp_dat_grid, 'vbar_TT_dat_grid' : self.vbar_TT_dat_grid, 'vbar_ZZ_dat_grid' : self.vbar_ZZ_dat_grid, 'vbar_Rp_dat_grid' : self.vbar_Rp_dat_grid, 'vbar_RT_dat_grid' : self.vbar_RT_dat_grid, 'vbar_RZ_dat_grid' : self.vbar_RZ_dat_grid, 'vbar_pZ_dat_grid' : self.vbar_pZ_dat_grid, 'vbar_TZ_dat_grid' : self.vbar_TZ_dat_grid, 'counts_std_grid' : self.counts_std_grid, 'nu_std_grid' : self.nu_std_grid, 'vbar_R1_std_grid' : self.vbar_R1_std_grid, 'vbar_p1_std_grid' : self.vbar_p1_std_grid, 'vbar_T1_std_grid' : self.vbar_T1_std_grid, 'vbar_Z1_std_grid' : self.vbar_Z1_std_grid, 'vbar_RR_std_grid' : self.vbar_RR_std_grid, 'vbar_pp_std_grid' : self.vbar_pp_std_grid, 'vbar_TT_std_grid' : self.vbar_TT_std_grid, 'vbar_ZZ_std_grid' : self.vbar_ZZ_std_grid, 'vbar_Rp_std_grid' : self.vbar_Rp_std_grid, 'vbar_RT_std_grid' : self.vbar_RT_std_grid, 'vbar_RZ_std_grid' : self.vbar_RZ_std_grid, 'vbar_pZ_std_grid' : self.vbar_pZ_std_grid, 'vbar_TZ_std_grid' : self.vbar_TZ_std_grid, 'median_vertex_dev_vector' : self.median_vertex_dev_vector, 'mean_vertex_dev_vector' : self.mean_vertex_dev_vector, 'vertex_dev_3sig_lower' : self.vertex_dev_3sig_lower , 'vertex_dev_2sig_lower' : self.vertex_dev_2sig_lower , 'vertex_dev_1sig_lower' : self.vertex_dev_1sig_lower , 'vertex_dev_1sig_upper' : self.vertex_dev_1sig_upper, 'vertex_dev_2sig_upper' : self.vertex_dev_2sig_upper, 'vertex_dev_3sig_upper' : self.vertex_dev_3sig_upper } cache_dataframe = pd.Series(dictionary) cache_dataframe.to_pickle(data_root + '/oscar_cache_files/' + cache_file_name)