def analyse(self, data_frame=None): """Perform PCE analysis on input `data_frame`. Parameters ---------- data_frame : :obj:`pandas.DataFrame` Input data for analysis. Returns ------- dict: Contains analysis results in sub-dicts with keys - ['statistical_moments', 'percentiles', 'sobol_indices', 'correlation_matrices', 'output_distributions'] """ if data_frame is None: raise RuntimeError("Analysis element needs a data frame to " "analyse") elif data_frame.empty: raise RuntimeError( "No data in data frame passed to analyse element") qoi_cols = self.qoi_cols results = { 'statistical_moments': {}, 'percentiles': {}, 'sobols_first': {k: {} for k in qoi_cols}, 'sobols_second': {k: {} for k in qoi_cols}, 'sobols_total': {k: {} for k in qoi_cols}, 'correlation_matrices': {}, 'output_distributions': {}, } # Get the Polynomial P = self.sampler.P # Get the PCE variante to use (Regression or Projection) regression = self.sampler.regression # Compute nodes (and weights) if regression: nodes = cp.generate_samples(order=self.sampler.n_samples, domain=self.sampler.distribution, rule=self.sampler.rule) else: nodes, weights = cp.generate_quadrature( order=self.sampler.quad_order, dist=self.sampler.distribution, rule=self.sampler.rule, sparse=self.sampler.quad_sparse, growth=self.sampler.quad_growth) # Extract output values for each quantity of interest from Dataframe samples = {k: [] for k in qoi_cols} for run_id in data_frame.run_id.unique(): for k in qoi_cols: data = data_frame.loc[data_frame['run_id'] == run_id][k] samples[k].append(data.values) # Compute descriptive statistics for each quantity of interest for k in qoi_cols: # Approximation solver if regression: if samples[k][0].dtype == object: for i in range(self.sampler.count): samples[k][i] = samples[k][i].astype("float64") fit = cp.fit_regression(P, nodes, samples[k], "T") else: fit = cp.fit_quadrature(P, nodes, weights, samples[k]) # Statistical moments mean = cp.E(fit, self.sampler.distribution) var = cp.Var(fit, self.sampler.distribution) std = cp.Std(fit, self.sampler.distribution) results['statistical_moments'][k] = { 'mean': mean, 'var': var, 'std': std } # Percentiles (Pxx) P10 = cp.Perc(fit, 10, self.sampler.distribution) P90 = cp.Perc(fit, 90, self.sampler.distribution) results['percentiles'][k] = {'p10': P10, 'p90': P90} # Sensitivity Analysis: First, Second and Total Sobol indices sobols_first_narr = cp.Sens_m(fit, self.sampler.distribution) sobols_second_narr = cp.Sens_m2(fit, self.sampler.distribution) sobols_total_narr = cp.Sens_t(fit, self.sampler.distribution) sobols_first_dict = {} sobols_second_dict = {} sobols_total_dict = {} ipar = 0 i = 0 for param_name in self.sampler.vary.get_keys(): j = self.sampler.params_size[ipar] sobols_first_dict[param_name] = sobols_first_narr[i:i + j] sobols_second_dict[param_name] = sobols_second_narr[i:i + j] sobols_total_dict[param_name] = sobols_total_narr[i:i + j] i += j ipar += 1 results['sobols_first'][k] = sobols_first_dict results['sobols_second'][k] = sobols_second_dict results['sobols_total'][k] = sobols_total_dict # Correlation matrix results['correlation_matrices'][k] = cp.Corr( fit, self.sampler.distribution) # Output distributions results['output_distributions'][k] = cp.QoI_Dist( fit, self.sampler.distribution) return results
def analyse(self, data_frame=None): """Perform PCE analysis on input `data_frame`. Parameters ---------- data_frame : pandas DataFrame Input data for analysis. Returns ------- PCEAnalysisResults Use it to get the sobol indices and other information. """ def sobols(P, coefficients): """ Utility routine to calculate sobols based on coefficients """ A = np.array(P.coefficients) != 0 multi_indices = np.array( [P.exponents[A[:, i]].sum(axis=0) for i in range(A.shape[1])]) sobol_mask = multi_indices != 0 _, index = np.unique(sobol_mask, axis=0, return_index=True) index = np.sort(index) sobol_idx_bool = sobol_mask[index] sobol_idx_bool = np.delete(sobol_idx_bool, [0], axis=0) n_sobol_available = sobol_idx_bool.shape[0] if len(coefficients.shape) == 1: n_out = 1 else: n_out = coefficients.shape[1] n_coeffs = coefficients.shape[0] sobol_poly_idx = np.zeros([n_coeffs, n_sobol_available]) for i_sobol in range(n_sobol_available): sobol_poly_idx[:, i_sobol] = np.all( sobol_mask == sobol_idx_bool[i_sobol], axis=1) sobol = np.zeros([n_sobol_available, n_out]) for i_sobol in range(n_sobol_available): sobol[i_sobol] = np.sum(np.square( coefficients[sobol_poly_idx[:, i_sobol] == 1]), axis=0) idx_sort_descend_1st = np.argsort(sobol[:, 0], axis=0)[::-1] sobol = sobol[idx_sort_descend_1st, :] sobol_idx_bool = sobol_idx_bool[idx_sort_descend_1st] sobol_idx = [0 for _ in range(sobol_idx_bool.shape[0])] for i_sobol in range(sobol_idx_bool.shape[0]): sobol_idx[i_sobol] = np.array( [i for i, x in enumerate(sobol_idx_bool[i_sobol, :]) if x]) var = ((coefficients[1:]**2).sum(axis=0)) sobol = sobol / var return sobol, sobol_idx, sobol_idx_bool if data_frame is None: raise RuntimeError("Analysis element needs a data frame to " "analyse") elif data_frame.empty: raise RuntimeError( "No data in data frame passed to analyse element") qoi_cols = self.qoi_cols results = { 'statistical_moments': {}, 'percentiles': {}, 'sobols_first': {k: {} for k in qoi_cols}, 'sobols_second': {k: {} for k in qoi_cols}, 'sobols_total': {k: {} for k in qoi_cols}, 'correlation_matrices': {}, 'output_distributions': {}, 'fit': {}, 'Fourier_coefficients': {}, } # Get sampler informations P = self.sampler.P nodes = self.sampler._nodes weights = self.sampler._weights regression = self.sampler.regression # Extract output values for each quantity of interest from Dataframe # samples = {k: [] for k in qoi_cols} # for run_id in data_frame[('run_id', 0)].unique(): # for k in qoi_cols: # data = data_frame.loc[data_frame[('run_id', 0)] == run_id][k] # samples[k].append(data.values.flatten()) samples = {k: [] for k in qoi_cols} for k in qoi_cols: samples[k] = data_frame[k].values # Compute descriptive statistics for each quantity of interest for k in qoi_cols: # Approximation solver if regression: fit, fc = cp.fit_regression(P, nodes, samples[k], retall=1) else: fit, fc = cp.fit_quadrature(P, nodes, weights, samples[k], retall=1) results['fit'][k] = fit results['Fourier_coefficients'][k] = fc # Percentiles: 1%, 10%, 50%, 90% and 99% P01, P10, P50, P90, P99 = cp.Perc( fit, [1, 10, 50, 90, 99], self.sampler.distribution).squeeze() results['percentiles'][k] = { 'p01': P01, 'p10': P10, 'p50': P50, 'p90': P90, 'p99': P99 } if self.sampling: # use chaospy's sampling method # Statistical moments mean = cp.E(fit, self.sampler.distribution) var = cp.Var(fit, self.sampler.distribution) std = cp.Std(fit, self.sampler.distribution) results['statistical_moments'][k] = { 'mean': mean, 'var': var, 'std': std } # Sensitivity Analysis: First, Second and Total Sobol indices sobols_first_narr = cp.Sens_m(fit, self.sampler.distribution) sobols_second_narr = cp.Sens_m2(fit, self.sampler.distribution) sobols_total_narr = cp.Sens_t(fit, self.sampler.distribution) sobols_first_dict = {} sobols_second_dict = {} sobols_total_dict = {} for i, param_name in enumerate(self.sampler.vary.vary_dict): sobols_first_dict[param_name] = sobols_first_narr[i] sobols_second_dict[param_name] = sobols_second_narr[i] sobols_total_dict[param_name] = sobols_total_narr[i] results['sobols_first'][k] = sobols_first_dict results['sobols_second'][k] = sobols_second_dict results['sobols_total'][k] = sobols_total_dict else: # use PCE coefficients # Statistical moments mean = fc[0] var = np.sum(fc[1:]**2, axis=0) std = np.sqrt(var) results['statistical_moments'][k] = { 'mean': mean, 'var': var, 'std': std } # Sensitivity Analysis: First, Second and Total Sobol indices sobol, sobol_idx, _ = sobols(P, fc) varied = [_ for _ in self.sampler.vary.get_keys()] S1 = {_: np.zeros(sobol.shape[-1]) for _ in varied} ST = {_: np.zeros(sobol.shape[-1]) for _ in varied} #S2 = {_ : {__: np.zeros(sobol.shape[-1]) for __ in varied} for _ in varied} #for v in varied: del S2[v][v] S2 = { _: np.zeros((len(varied), sobol.shape[-1])) for _ in varied } for n, si in enumerate(sobol_idx): if len(si) == 1: v = varied[si[0]] S1[v] = sobol[n] elif len(si) == 2: v1 = varied[si[0]] v2 = varied[si[1]] #S2[v1][v2] = sobol[n] #S2[v2][v1] = sobol[n] S2[v1][si[1]] = sobol[n] S2[v2][si[0]] = sobol[n] for i in si: ST[varied[i]] += sobol[n] results['sobols_first'][k] = S1 results['sobols_second'][k] = S2 results['sobols_total'][k] = ST # Correlation matrix results['correlation_matrices'][k] = cp.Corr( fit, self.sampler.distribution) # Output distributions results['output_distributions'][k] = cp.QoI_Dist( fit, self.sampler.distribution) return PCEAnalysisResults(raw_data=results, samples=data_frame, qois=self.qoi_cols, inputs=list(self.sampler.vary.get_keys()))
def analyse(self, data_frame=None): """Perform PCE analysis on input `data_frame`. Parameters ---------- data_frame : :obj:`pandas.DataFrame` Input data for analysis. Returns ------- dict: Contains analysis results in sub-dicts with keys - ['statistical_moments', 'percentiles', 'sobol_indices', 'correlation_matrices', 'output_distributions'] """ if data_frame is None: raise RuntimeError("Analysis element needs a data frame to " "analyse") elif data_frame.empty: raise RuntimeError( "No data in data frame passed to analyse element") qoi_cols = self.qoi_cols results = { 'statistical_moments': {}, 'percentiles': {}, 'sobols_first': {k: {} for k in qoi_cols}, 'sobols_second': {k: {} for k in qoi_cols}, 'sobols_total': {k: {} for k in qoi_cols}, 'correlation_matrices': {}, 'output_distributions': {}, } # Get sampler informations P = self.sampler.P nodes = self.sampler._nodes weights = self.sampler._weights regression = self.sampler.regression # Extract output values for each quantity of interest from Dataframe samples = {k: [] for k in qoi_cols} for run_id in data_frame[('run_id', 0)].unique(): for k in qoi_cols: data = data_frame.loc[data_frame[('run_id', 0)] == run_id][k] samples[k].append(data.values.flatten()) # Compute descriptive statistics for each quantity of interest for k in qoi_cols: # Approximation solver if regression: fit = cp.fit_regression(P, nodes, samples[k]) else: fit = cp.fit_quadrature(P, nodes, weights, samples[k]) # Statistical moments mean = cp.E(fit, self.sampler.distribution) var = cp.Var(fit, self.sampler.distribution) std = cp.Std(fit, self.sampler.distribution) results['statistical_moments'][k] = { 'mean': mean, 'var': var, 'std': std } # Percentiles: 10% and 90% P10 = cp.Perc(fit, 10, self.sampler.distribution) P90 = cp.Perc(fit, 90, self.sampler.distribution) results['percentiles'][k] = {'p10': P10, 'p90': P90} # Sensitivity Analysis: First, Second and Total Sobol indices sobols_first_narr = cp.Sens_m(fit, self.sampler.distribution) sobols_second_narr = cp.Sens_m2(fit, self.sampler.distribution) sobols_total_narr = cp.Sens_t(fit, self.sampler.distribution) sobols_first_dict = {} sobols_second_dict = {} sobols_total_dict = {} for i, param_name in enumerate(self.sampler.vary.vary_dict): sobols_first_dict[param_name] = sobols_first_narr[i] sobols_second_dict[param_name] = sobols_second_narr[i] sobols_total_dict[param_name] = sobols_total_narr[i] results['sobols_first'][k] = sobols_first_dict results['sobols_second'][k] = sobols_second_dict results['sobols_total'][k] = sobols_total_dict # Correlation matrix results['correlation_matrices'][k] = cp.Corr( fit, self.sampler.distribution) # Output distributions results['output_distributions'][k] = cp.QoI_Dist( fit, self.sampler.distribution) return PCEAnalysisResults(raw_data=results, samples=data_frame, qois=self.qoi_cols, inputs=list(self.sampler.vary.get_keys()))