# * LICENSE. * # ********************************************************************************** import logging log = logging.getLogger('macro.esk204_apply_query_to_pandas_df') from eskapade import ConfigObject, ProcessManager, DataStore from eskapade import core_ops, analysis log.debug('Now parsing configuration file esk204_apply_query_to_pandas_df') ######################################################################################### # --- minimal analysis information settings = ProcessManager().service(ConfigObject) settings['analysisName'] = 'esk204_apply_query_to_pandas_df' settings['version'] = 0 ######################################################################################### # --- Analysis values, settings, helper functions, configuration flags. # generate a dummy dataframe and add to datastore # to this dataset selections are applied below, during link execution. # NB: realize that, normally, such a dataframe is read or constructed on the fly # during link execution. from numpy.random import randn from pandas import DataFrame
# * modification, are permitted according to the terms listed in the file * # * LICENSE. * # ********************************************************************************** import logging log = logging.getLogger('macro.esk109_debugging_tips') from eskapade import ConfigObject, ProcessManager, DataStore from eskapade import core_ops log.debug('Now parsing configuration file esk109_debugging_tips') ######################################################################################### # --- minimal analysis information proc_mgr = ProcessManager() settings = proc_mgr.service(ConfigObject) settings['analysisName'] = 'esk109_debugging_tips' settings['version'] = 0 ######################################################################################### # --- Analysis values, settings, helper functions, configuration flags. msg = r""" To end the run_eskapade.py session with an interactive ipython shell, from the cmd line use the this flag: -i """ log.info(msg)
# ********************************************************************************** import logging log = logging.getLogger('macro.esk410_testing_correlations_between_categories') from eskapade import ConfigObject, ProcessManager from eskapade import core_ops, analysis, root_analysis, visualization log.debug( 'Now parsing configuration file esk410_testing_correlations_between_categories' ) ######################################################################################### # --- minimal analysis information proc_mgr = ProcessManager() settings = proc_mgr.service(ConfigObject) settings['analysisName'] = 'esk410_testing_correlations_between_categories' settings['version'] = 0 ######################################################################################### # --- Analysis values, settings, helper functions, configuration flags. input_files = [os.environ['ESKAPADE'] + '/data/mock_accounts.csv.gz'] ######################################################################################### # --- now set up the chains and links based on configuration flags ch = proc_mgr.add_chain('Data')
# * # * Authors: * # * KPMG Big Data team, Amstelveen, The Netherlands # * * # * Redistribution and use in source and binary forms, with or without * # * modification, are permitted according to the terms listed in the file * # * LICENSE. * # ********************************************************************************** from eskapade import ConfigObject, ProcessManager from eskapade import core_ops import tempfile ######################################################################################### # --- minimal analysis information settings = ProcessManager().service(ConfigObject) settings['analysisName'] = 'esk108_eventlooper' settings['version'] = 0 ######################################################################################### # --- Analysis values, settings, helper functions, configuration flags. settings['do_map'] = False if not 'do_map' in settings else settings['do_map'] settings['do_reduce'] = False if not 'do_reduce' in settings else settings[ 'do_reduce'] settings[ 'TESTING'] = False if not 'TESTING' in settings else settings['TESTING'] # --- create dummy example dataset, which is used below if settings['TESTING']:
def execute(self): """Execute ConvertDataFrame2RooDataSet""" proc_mgr = ProcessManager() settings = proc_mgr.service(ConfigObject) ds = proc_mgr.service(DataStore) ws = proc_mgr.service(RooFitManager).ws # 1a. basic checks on contensts of the data frame assert self.read_key in list( ds.keys()), 'key %s not in DataStore' % self.read_key df = ds[self.read_key] if not isinstance(df, pd.DataFrame): raise TypeError( 'retrieved object "%s" not of type pandas DataFrame' % self.read_key) assert len(df.index) > 0, 'dataframe "%s" is empty' % self.read_key # 1b. retrieve map_to_factorized from ds if it's a string if self.map_to_factorized: if isinstance(self.map_to_factorized, str): assert len(self.map_to_factorized ), 'map_to_factorized needs to be a filled string' assert self.map_to_factorized in ds, 'map_to_factorized key "%s" not found in datastore' self.map_to_factorized = ds[self.map_to_factorized] assert isinstance(self.map_to_factorized, dict), 'map_to_factorized needs to be a dict' # 1c. retrieve read_key_vars rooargset from datastore if self.read_key_vars: assert isinstance(self.read_key_vars, str) and len(self.read_key_vars), \ 'read_key_vars should be a filled string' assert self.read_key_vars in ds, 'read_key_vars not in datastore' varset = ds[self.read_key_vars] assert isinstance( varset, ROOT.RooArgSet), 'read_key_vars is not a RooArgSet' self._varset = varset if self._varset: # varset overrules provided columns self.columns = [rv.GetName() for rv in self._varset] # 1d. check all columns if not self.columns: self.columns = df.columns.tolist() for col in self.columns[:]: assert col in df.columns, 'column "%s" not in dataframe "%s"' % ( col, self.read_key) dt = df[col].dtype.type # keep categorical observables -- convert these to roocategories in conversion if issubclass(dt, pd.types.dtypes.CategoricalDtypeType): continue # reject all string-based columns if (dt is np.string_) or (dt is np.object_): self.log().warning('Skipping string-based column "%s"', col) self.columns.remove(col) if col in self.ignore_columns: self.columns.remove(col) self.log().debug('Picking up columns: %s', self.columns) # 2. do conversion of df to roodataset # self.map_to_factorized are categorical variables to be turned into roocategories rds, obs_vars, mtf, map_to_original = data_conversion.df_to_rds( df[self.columns], rf_varset=self._varset, category_vars=self.map_to_factorized, name=self.read_key, store_index=self.store_index) # create pdf of dataset as well? if self.create_keys_pdf: obs_list = ROOT.RooArgList(obs_vars) keys_name = self.create_keys_pdf keys_pdf = ROOT.RooNDKeysPdf(keys_name, keys_name, obs_list, rds, 'ma') # 3a. remove original df? if self.rm_original: del ds[self.read_key] # 3b. put objects from the datastore into the workspace if self.into_ws: try: ws[self.store_key] = rds ws.defineSet(self.store_key_vars, obs_vars) except: raise RuntimeError( 'could not import object "%s" into rooworkspace' % self.read_key) # 3c. put objects into datastore else: ds[self.store_key_vars] = obs_vars ds[self.store_key] = rds # 3d. workspace doesn't like keys pdf, so always keep in ds if self.create_keys_pdf: ds[keys_name] = keys_pdf # 3e. ds[self.sk_map_to_original] = map_to_original n_rds = rds.numEntries() ds['n_' + self.store_key] = n_rds self.log().debug('Stored roodataset "%s" with length: %d', self.store_key, n_rds) return StatusCode.Success
def execute(self): """ Execute ApplySelectionToDf Applies queries or column selection to a pandas DataFrame. Input dataframe is not overwritten, unless told to do so in kwargs. 1. Apply queries, in order of provided query list. 2. Select columns (if provided). """ ds = ProcessManager().service(DataStore) assert self.readKey in list( ds.keys()), 'Key %s not in DataStore.' % self.readKey assert isinstance( ds[self.readKey], pd.DataFrame ), 'Object with key %s is not a pandas DataFrame.' % self.readKey # 1. apply queries to input dataframe. # input dataframe is not overwritten, unless told to do so in kwargs. do_continue = True if len(self.querySet): # apply first query query = self.querySet[0] try: df = ds[self.readKey].query(query, **self.kwargs) except: if not self.continueIfFailure: raise ValueError( 'Failed to apply query <%s> to dataframe <%s>.' % (query, self.readKey)) else: orig_df_cols = (ds[self.readKey]).columns df = pd.DataFrame(columns=orig_df_cols) do_continue = False # apply rest of the queries if any if do_continue: for query in self.querySet[1:]: try: df = df.query(query, **self.kwargs) except: if not self.continueIfFailure: raise ValueError( 'Failed to apply query <%s> to dataframe <%s>.' % (query, self.readKey)) else: orig_df_cols = (ds[self.readKey]).columns df = pd.DataFrame(columns=orig_df_cols) break # 2. apply column selection to input dataframe. # input dataframe is not overwritten. if len(self.selectColumns): if not 'df' in vars(): df = (ds[self.readKey]).copy(deep=False) try: df = df[self.selectColumns] except: if not self.continueIfFailure: raise ValueError( 'Failed to select columns <%s> of dataframe <%s>.' % (str(self.selectColumns), self.readKey)) else: df = pd.DataFrame(columns=self.selectColumns) assert 'df' in vars(), 'No dataframe available for storage?' ds[self.storeKey] = df ds['n_' + self.storeKey] = len(df.index) self.log().info('Stored dataframe with key <%s> and length <%d>.' % (self.storeKey, len(df.index))) return StatusCode.Success
def execute(self): """Execute UncorrelationHypothesisTester""" proc_mgr = ProcessManager() settings = proc_mgr.service(ConfigObject) ds = proc_mgr.service(DataStore) # 1a. basic checks on contents of the roodataset if self.from_ws: ws = proc_mgr.service(RooFitManager).ws rds = ws.data(self.read_key) assert rds is not None, 'Key %s not in workspace' % self.read_key else: assert self.read_key in ds, 'key "%s" not found in datastore' % self.read_key rds = ds[self.read_key] if not isinstance(rds, ROOT.RooDataSet): raise TypeError('retrieved object "%s" not of type RooDataSet, but: %s' % (self.read_key, type(rds))) assert rds.numEntries() > 0, 'RooDataSet "%s" is empty' % self.read_key # 1b. retrieve read_key_vars rooargset from datastore if self.read_key_vars: assert isinstance(self.read_key_vars, str) and len(self.read_key_vars), \ 'read_key_vars should be a filled string' assert self.read_key_vars in ds, 'read_key_vars not in datastore' varset = ds[self.read_key_vars] assert isinstance(varset, ROOT.RooArgSet), 'read_key_vars is not a RooArgSet' else: # first record in dataset varset = rds.get(0) self._all_columns = [rv.GetName() for rv in varset] assert len(self._all_columns) >= 2, 'need at least two variables in roodataset %s.' % self.read_key # 1c. check provided columns # match all columns/pattern in self.columns to _all_columns if isinstance(self.columns, bool): self.columns = self._all_columns if self.columns else [] matched_columns = [] for c in self.columns: match_c = fnmatch.filter(self._all_columns, c) if not match_c: raise AssertionError('column or pattern "%s" not present in roodataset' % (c, self.read_key)) matched_columns += match_c self.columns = sorted(list(set(matched_columns))) # sorted unique list # 1d. retrieve left and right pair columns (multiplied as left x right) matched_columns = [] for c in self.x_columns: match_c = fnmatch.filter(self._all_columns, c) if not match_c: raise AssertionError('column or pattern "%s" not present in roodataset' % (c, self.read_key)) matched_columns += match_c self.x_columns = sorted(list(set(matched_columns))) # sorted unique list matched_columns = [] for c in self.y_columns: match_c = fnmatch.filter(self._all_columns, c) if not match_c: raise AssertionError('column or pattern "%s" not present in roodataset' % (c, self.read_key)) matched_columns += match_c self.y_columns = sorted(list(set(matched_columns))) # sorted unique list self.y_columns = sorted([c for c in self.y_columns if c not in self.x_columns]) # 1e. retrieve map_to_original from ds if self.map_to_original: if isinstance(self.map_to_original, str): assert len(self.map_to_original), 'map_to_original needs to be a filled string' assert self.map_to_original in ds, 'map_to_original key not found in datastore' mto = ds[self.map_to_original] elif isinstance(self.map_to_original, dict): mto = self.map_to_original assert isinstance(mto, dict), 'map_to_original needs to be a dict' # pandas replace() will not do transformations that are identical, # including int 0/1 to bool. skip those column-tranformations self.mto = copy.copy(mto) for c, c_mto in mto.items(): k = list(c_mto.keys()) v = list(c_mto.values()) if set(k) & set(v): # true in case of indentical transformation self.log().debug('Identical transformation for column "%s". Skipping column', c) del self.mto[c] # 1f. create report pages # data scientis report self.pages = [] if self.pages_key: self.pages = ds.get(self.pages_key, []) assert isinstance(self.pages, list), 'Pages key %s does not refer to a list' % self.pages_key # client report self.clientpages = [] if self.clientpages_key: self.clientpages = ds.get(self.clientpages_key, []) assert isinstance(self.clientpages, list), 'Client pages key %s does not refer to a list' % self.clientpages_key # 1g. initialize significance_matrix nx = ny = 0 x_cols = y_cols = [] if len(self.columns): nx = len(self.columns) ny = len(self.columns) x_cols = self.columns y_cols = self.columns if len(self.x_columns) or len(self.y_columns): nx = len(self.x_columns) ny = len(self.y_columns) x_cols = self.x_columns y_cols = self.y_columns significance_matrix = np.zeros((ny, nx)) symmetrize = True if self.columns else False n_bins = nx * ny if not symmetrize else nx * nx - nx n_unique = n_bins if not symmetrize else (nx * nx - nx) / 2 # 2a. loop over unique column pairs and add to combinations for idx, c1 in enumerate(self.columns): for c2 in self.columns[idx + 1:]: self.combinations.append([c1, c2]) # add left-right pair combinations if self.x_columns and self.inproduct: assert len(self.x_columns) == len(self.y_columns) for i, c1 in enumerate(self.x_columns): if self.inproduct: c2 = self.y_columns[i] self.combinations.append([c1, c2]) else: for j, c2 in enumerate(self.y_columns): self.combinations.append([c1, c2]) # 2b. loop over all combinations: calculate significance and residuals n_combos = len(self.combinations) n_entries = rds.numEntries() for i_c, combo in enumerate(self.combinations): combo_name = ':'.join(combo) # make roodatahist for each combination obsset = ROOT.RooArgSet() for c in combo: obsset.add(varset.find(c)) catCutStr = '1' for j, var in enumerate(obsset): if isinstance(var, ROOT.RooRealVar): n_bins = self._n_bins(combo, j) var.setBins(n_bins) elif isinstance(var, ROOT.RooCategory): ignore_categories = self._ignore_categories(combo, j) for ic in ignore_categories: if not var.isValidLabel(ic): continue catCutStr += ' && (%s!=%s::%s)' % (var.GetName(), var.GetName(), ic) rdh = ROOT.RooDataHist(combo_name, combo_name, obsset) # remove specific categories (e.g. nan) if this has been requested so. red = rds.reduce(ROOT.RooFit.Cut(catCutStr)) rdh.add(red) del red # rdh.add(rds) # a) calculate global significance of combo self.log().debug('Now processing combination (%d/%d): %s with %d bins and %d entries' % (i_c + 1, n_combos, str(combo), rdh.numEntries(), rdh.sumEntries())) Zi = ROOT.Eskapade.ABCD.SignificanceOfUncorrelatedHypothesis(rdh, obsset, self.nsims_per_significance) self.significance_map[combo_name] = Zi if len(combo) == 2: x = x_cols.index(combo[0]) y = y_cols.index(combo[1]) if x < nx and y < ny: significance_matrix[y, x] = Zi if symmetrize: significance_matrix[x, y] = Zi # b) calculate residuals success = ROOT.Eskapade.ABCD.checkInputData(rdh) self.log().debug('Combination %s has significance: %f. Can calculate residuals? %s' % (str(combo), Zi, success)) if not success: self.log().warning('Cannot calculate residuals for combination: %s. Skipping.' % str(combo)) del rdh continue residi = ROOT.Eskapade.ABCD.GetNormalizedResiduals(rdh, obsset) dfri = data_conversion.rds_to_df(residi) del rdh del residi # do the mapping of roofit categories back to original format if self.mto: dfri.replace(self.mto, inplace=True) self.residuals_map[combo_name] = dfri # below, create report page for each variable in data frame # create resulting heatmaps and histograms # 1. make significance heatmap f_path = self.results_path + self.prefix + 'all_correlation_significance.pdf' var_label = 'Significance correlation matrix (s.d.)' vis_utils.plot_correlation_matrix(significance_matrix, x_cols, y_cols, f_path, var_label, -5, 5) stats = [('entries', n_entries), ('bins', n_bins), ('unique', n_unique), ('> 0', (significance_matrix.ravel() > 0).sum()), ('< 0', (significance_matrix.ravel() < 0).sum()), ('avg', np.average(significance_matrix.ravel())), ('max', max(significance_matrix.ravel())), ('min', min(significance_matrix.ravel()))] if nx > 0 and ny > 0 else [] stats_table = tabulate.tabulate(stats, tablefmt='latex') self.pages.append(self.page_template.replace('VAR_LABEL', var_label) .replace('VAR_STATS_TABLE', stats_table) .replace('VAR_HISTOGRAM_PATH', f_path)) significance = self.significance_map.copy() for key in list(significance.keys()): significance[key] = [significance[key]] dfsignificance = pd.DataFrame(significance).stack().reset_index(level=1)\ .rename(columns={'level_1': 'Questions', 0: 'Significance'})\ .sort_values(by='Significance', ascending=False) keep_cols = ['Questions', 'Significance'] table = latex_residuals_table(dfsignificance, keep_cols, self.z_threshold, normResidCol='Significance') if table: self.clientpages.append(self.table_template.replace('VAR_LABEL', 'Significance').replace('VAR_STATS_TABLE', table)) # 2a. create one residual table containing the top non-noncorrelating answers resid_all=[] if len(self.combinations) > 1: # create one dataframe containing all data resid_list = [] ndim_max = 2 for key in list(self.residuals_map.keys()): if abs(self.significance_map[key]) < self.z_threshold: continue dftmp = self.residuals_map[key].copy() resid_list.append(self._format_df(dftmp, key)) if len(key.split(':')) > ndim_max: ndim_max = len(key.split(':')) # convert top residuals into latex table if len(resid_list) >= 1: resid_all = resid_list[0] if len(resid_list) > 1: resid_all = resid_list[0].append(resid_list[1:], ignore_index=True) resid_all = resid_all.reindex(resid_all.normResid.abs().sort_values(ascending=False).index) keep_cols = ['question_%d' % i for i in range(ndim_max)] + \ ['answer_%d' % i for i in range(ndim_max)] + \ ['num_entries', 'abcd', 'abcd_error', 'pValue', 'normResid'] table = latex_residuals_table(resid_all, keep_cols, self.z_threshold) self.pages.append(self.table_template.replace('VAR_LABEL', 'Most significant outliers').replace('VAR_STATS_TABLE', table)) keep_cols = ['question_%d' % i for i in range(ndim_max)] + \ ['answer_%d' % i for i in range(ndim_max)] + \ ['num_entries', 'abcd', 'normResid'] table = latex_residuals_table(resid_all, keep_cols, self.z_threshold) self.clientpages.append(self.table_template.replace('VAR_LABEL', 'Most significant outliers').replace('VAR_STATS_TABLE', table)) # 2b. make residuals heatmaps for combo in self.combinations: if len(combo) != 2: continue combo_name = ':'.join(combo) residi = self.residuals_map[combo_name] mat_normresiduals, x_vals, y_vals = extract_matrix(residi, combo[0], combo[1]) mat_observed, x_vals, y_vals = extract_matrix(residi, combo[0], combo[1], 'num_entries') f_path = self.results_path + self.prefix + 'normalized_residuals_heatmap_' + '_'.join(combo) + '.pdf' vis_utils.plot_correlation_matrix(mat_normresiduals, x_vals, y_vals, f_path, 'significance relation', -5, 5, x_label=combo[0], y_label=combo[1], matrix_numbers=mat_observed, print_both_numbers=self.verbose_plots) stats = [('entries', residi['num_entries'].sum()), ('bins', len(residi.index)), ('> 0', (residi['normResid'] > 0).sum()), ('< 0', (residi['normResid'] < 0).sum()), ('avg', residi['normResid'].mean()), ('max', residi['normResid'].max()), ('min', residi['normResid'].min())] stats_table = tabulate.tabulate(stats, tablefmt='latex') self.pages.append(self.page_template.replace('VAR_LABEL', 'relation (abcd): ' + ' vs '.join(combo)) .replace('VAR_STATS_TABLE', stats_table) .replace('VAR_HISTOGRAM_PATH', f_path)) # 2c. make residuals tables for combo in self.combinations: combo_name = ':'.join(combo) residi = self.residuals_map[combo_name] keep_cols = combo + ['num_entries', 'abcd', 'abcd_error', 'pValue', 'normResid'] table = latex_residuals_table(residi, keep_cols, self.z_threshold) if not table: continue self.pages.append(self.table_template.replace('VAR_LABEL', 'outliers: ' + ' vs '.join(combo)).replace('VAR_STATS_TABLE', table)) # 2d. make residuals histograms p_all = ROOT.TH1F('p_all', 'p_all', 20, 0, 1) z_all = ROOT.TH1F('z_all', 'z_all', 50, -10, 10) for combo in self.combinations: combo_name = ':'.join(combo) residi = self.residuals_map[combo_name] root_numpy.fill_hist(p_all, residi['pValue'].values) root_numpy.fill_hist(z_all, residi['normResid'].values) p_i = ROOT.TH1F('p_' + combo_name, 'p_' + combo_name, 20, 0, 1) z_i = ROOT.TH1F('z_' + combo_name, 'z_' + combo_name, 40, -8, 8) root_numpy.fill_hist(p_i, residi['pValue'].values) root_numpy.fill_hist(z_i, residi['normResid'].values) self.hist_dict['normalized residuals: ' + ' vs '.join(combo)] = z_i self.hist_dict['p-values: ' + ' vs '.join(combo)] = p_i self.hist_dict['all normalized residuals'] = z_all self.hist_dict['all p-values'] = p_all # 3. storage if self.hist_dict_key: ds[self.hist_dict_key] = self.hist_dict if self.pages_key: ds[self.pages_key] = self.pages if self.sk_significance_map: ds[self.sk_significance_map] = self.significance_map self.log().debug('Stored significance map in data store under key: %s' % self.sk_significance_map) if self.sk_residuals_map: ds[self.sk_residuals_map] = self.residuals_map self.log().debug('Stored residuals map in data store under key: %s' % self.sk_residuals_map) if self.sk_residuals_overview and len(resid_all)>0: ds[self.sk_residuals_overview] = resid_all self.log().debug('Stored residuals list in data store under key: %s' % self.sk_residuals_overview) return StatusCode.Success
def initialize(self): """Initialize UncorrelationHypothesisTester""" # check input arguments self.check_arg_types(read_key=str, significance_key=str, sk_significance_map=str, sk_residuals_map=str, sk_residuals_overview=str, default_number_of_bins=int, nsims_per_significance=int, prefix=str, z_threshold=float, pages_key=str, clientpages_key=str, hist_dict_key=str) self.check_arg_types(recurse=True, allow_none=True, columns=str) self.check_arg_types(recurse=True, allow_none=True, x_columns=str) self.check_arg_types(recurse=True, allow_none=True, y_columns=str) self.check_arg_types(recurse=True, allow_none=True, ignore_categories=str) self.check_arg_types(recurse=True, allow_none=True, var_ignore_categories=str) self.check_arg_vals('read_key') self.check_arg_vals('significance_key') if self.map_to_original and not isinstance(self.map_to_original, str) \ and not isinstance(self.map_to_original, dict): raise TypeError('map_to_original needs to be a dict or string (to fetch a dict from the datastore)') # get I/O configuration io_conf = ProcessManager().service(ConfigObject).io_conf() # read report templates with open(persistence.io_path('templates', io_conf, 'df_summary_report.tex')) as templ_file: self.report_template = templ_file.read() with open(persistence.io_path('templates', io_conf, 'df_summary_report_page.tex')) as templ_file: self.page_template = templ_file.read() with open(persistence.io_path('templates', io_conf, 'df_summary_table_page.tex')) as templ_file: self.table_template = templ_file.read() # get path to results directory if not self.results_path: self.results_path = persistence.io_path('results_data', io_conf, 'report') if self.results_path and not self.results_path.endswith('/'): self.results_path = self.results_path + '/' # check if output directory exists if os.path.exists(self.results_path): # check if path is a directory if not os.path.isdir(self.results_path): self.log().critical('output path "%s" is not a directory', self.results_path) raise AssertionError('output path is not a directory') else: # create directory self.log().debug('Making output directory "%s"', self.results_path) os.makedirs(self.results_path) # prefix for file storage if self.prefix and not self.prefix.endswith('_'): self.prefix = self.prefix + '_' # check provided columns if len(self.columns): assert len(self.x_columns) == 0 and len(self.y_columns) == 0, \ 'Set either columns OR x_columns and y_columns.' if len(self.x_columns): assert len(self.columns) == 0 and len(self.y_columns) > 0, \ 'Set either columns OR x_columns and y_columns.' self._all_columns = [] # check that var_ignore_categories are set correctly. for col, ic in self.var_ignore_categories.items(): if isinstance(ic, str): self.var_ignore_categories[col] = [ic] elif not isinstance(ic, list): raise TypeError('var_ignore_categories key "%s" needs to be a string or list of strings' % col) # load roofit classes roofit_utils.load_libesroofit() return StatusCode.Success
def execute(self): """Execute WsUtils Operations are executed in this order: 1. put objects from the datastore into rooworkspace 2. execute rooworkspace factory commands 3. pass the workspace to (a list of) functions, to execute bits of (workspace) code 4. simulate data from a pdf 5. fit a pdf to a dataset 6. make a plot of a dataset, pdf, or function 7. move objects from the workspace to the datastore """ proc_mgr = ProcessManager() settings = proc_mgr.service(ConfigObject) ds = proc_mgr.service(DataStore) ws = proc_mgr.service(RooFitManager).ws # --- open existing report pages if self.pages_key: self.pages = ds.get(self.pages_key, []) if not isinstance(self.pages, list): raise TypeError('pages key "{}" does not refer to a list'.format(self.pages_key)) elif len(self.pages) > 0: self.log().debug('Retrieved %d report pages under key "%s"', len(self.pages), self.pages_key) # --- put objects from the datastore into the workspace # by doing this here, the object can be picked up by the factory for key in self.copy_into_ws: assert key in ds, 'key "%s" not found in datastore' % key try: ws[key] = ds[key] if self.rm_original: del ds[key] except BaseException: raise RuntimeError('could not import object "%s" into rooworkspace' % key) # --- workspace factory commands # by doing this here, the object previously imported objects # can be picked up by the factory for cmd in self.factory: ws.factory(cmd) # --- pass ws to list of functions, to execute bits of (workspace) code # by doing this here, the objects previously created can be picked up. for func in self.apply: func(ws) # --- simulation # needs input pdf and observables to generate for i, tp in enumerate(self._simulate): assert isinstance(tp, tuple) and len(tp) == 2, 'simulate item "%d" needs to be an args, kwargs tuple' % i self.do_simulate(ds, ws, *tp[0], **tp[1]) # --- fitting # needs input pdf and dataset to fit for i, tp in enumerate(self._fit): assert isinstance(tp, tuple) and len(tp) == 2, 'fit item "%d" needs to be an args, kwargs tuple' % i self.do_fit(ds, ws, *tp[0], **tp[1]) # --- plotting # needs single observable, pdf and/or datset for i, tp in enumerate(self._plot): assert isinstance(tp, tuple) and len(tp) == 2, 'plot item "%d" needs to be an args, kwargs tuple' % i self.do_plot(ds, ws, *tp[0], **tp[1]) # --- storage into ws # put objects from the workspace into the datastore for key in self.copy_into_ds: assert key in ws, 'key "%s" not found in workspace' % key try: ds[key] = ws[key].Clone() if self.rm_original: self.rm_from_ws.append(key) except BaseException: raise RuntimeError('could not import object "%s" from workspace into ds' % key) # --- deletion # try to remove keys from the workspace for key in self.rm_from_ws: try: ws.cd() ROOT.gDirectory.Delete("%s;*" % key) except BaseException: self.log().warning('Could not remove "%s" from workspace. Pass', key) # storage if self.pages_key: ds[self.pages_key] = self.pages self.log().debug('%d report pages stored under key: %s', len(self.pages), self.pages_key) return StatusCode.Success
# * Redistribution and use in source and binary forms, with or without * # * modification, are permitted according to the terms listed in the file * # * LICENSE. * # ********************************************************************************** import logging log = logging.getLogger('macro.esk103_printdatastore') from eskapade import ConfigObject, ProcessManager from eskapade import core_ops log.debug('Now parsing configuration file esk103_printdatastore') ######################################################################################### # --- minimal analysis information settings = ProcessManager().service(ConfigObject) settings['analysisName'] = 'esk103_printdatastore' settings['version'] = 0 ######################################################################################### # --- for this macro, fill the datastore with some dummy information from eskapade import DataStore ProcessManager().service(DataStore)['hello'] = 'world' ProcessManager().service(DataStore)['d'] = {'a': 1, 'b': 2, 'c': 3} ######################################################################################### # --- now set up the chains and links based on configuration flags proc_mgr = ProcessManager()