def across_cols(self, opath_leaf: str, all_cols: tp.List[str], col_index: int, inc_exps: tp.Optional[str]) -> None: """ The criteria of interest varies across the rows of controller .csvs. We take row `index` from a given dataframe and take the rows specified by the `inc_exps` and append them to a results dataframe column-wise, which we then write the file system. """ for k in config.kStatsExtensions.keys(): stat_ipath = os.path.join(self.ipath_stem, self.ipath_leaf + config.kStatsExtensions[k]) stat_opath = os.path.join(self.opath_stem, opath_leaf + config.kStatsExtensions[k]) df = self._accum_df_by_col( stat_ipath, stat_opath, all_cols, col_index, inc_exps) if df is not None: storage.DataFrameWriter('storage.csv')(df, os.path.join(self.opath_stem, opath_leaf + config.kStatsExtensions[k]), index=False)
def __call__(self): csv_concat = pd.concat(self.gathered_dfs) # Create directory for averaged .csv files for imagizing later. if self.gather_spec.for_imagizing: sierra.core.utils.dir_create_checked(os.path.join( self.stat_root, self.gather_spec.csv_stem), exist_ok=True) by_row_index = csv_concat.groupby(csv_concat.index) if self.avg_opts['dist_stats'] in ['none', 'all']: dfs = sierra.core.stat_kernels.mean.from_groupby(by_row_index) if self.avg_opts['dist_stats'] in ['conf95', 'all']: dfs = sierra.core.stat_kernels.conf95.from_groupby(by_row_index) if self.avg_opts['dist_stats'] in ['bw', 'all']: dfs = sierra.core.stat_kernels.bw.from_groupby(by_row_index) for ext in dfs.keys(): opath = os.path.join(self.stat_root, self.gather_spec.csv_stem, self.gather_spec.csv_leaf + ext) writer = storage.DataFrameWriter(self.avg_opts['storage_medium']) writer(dfs[ext].fillna(0), opath, index=False)
def __call__(self): collated = {} for run in self.gathered_runs: run_dfs = self.gathered_dfs[self.gathered_runs.index(run)] for csv_leaf, col in run_dfs.keys(): csv_df = run_dfs[(csv_leaf, col)] # Invert performance if configured. if self.invert_perf and csv_leaf in self.intra_perf_csv: csv_df = 1.0 / csv_df # Because of the requirement that P(N) >= 0 for flexibility # (1/0 = inf gives a crash with DTW), if the current level # of performance is 0, it stays 0. # # This is a bit of a hack. But also not a hack at all, # because infinite performance is not possible. This # is... Schrodinger's Hack. csv_df = csv_df.replace([-np.inf, np.inf], 0) if (csv_leaf, col) not in collated: collated[(csv_leaf, col)] = pd.DataFrame(index=csv_df.index, columns=self.gathered_runs) collated[(csv_leaf, col)][run] = csv_df for (csv_leaf, col) in collated.keys(): writer = storage.DataFrameWriter(self.storage_medium) writer(collated[(csv_leaf, col)].fillna(0), os.path.join( self.batch_stat_collate_root, self.exp_leaf + '-' + csv_leaf + '-' + col + '.csv'), index=False)
def __call__(self, main_config: types.YAMLDict, criteria: bc.IConcreteBatchCriteria) -> None: exp_to_run = utils.exp_range_calc(self.cmdopts, self.cmdopts['batch_output_root'], criteria) exp_dirnames = criteria.gen_exp_dirnames(self.cmdopts) for i, exp in enumerate(exp_to_run): exp = os.path.split(exp)[1] exp_index = exp_dirnames.index(exp) cmdopts = copy.deepcopy(self.cmdopts) cmdopts["exp0_output_root"] = os.path.join( self.cmdopts["batch_output_root"], exp_dirnames[0]) cmdopts["exp0_stat_root"] = os.path.join( self.cmdopts["batch_stat_root"], exp_dirnames[0]) cmdopts["exp_input_root"] = os.path.join( self.cmdopts['batch_input_root'], exp) cmdopts["exp_output_root"] = os.path.join( self.cmdopts['batch_output_root'], exp) cmdopts["exp_graph_root"] = os.path.join( self.cmdopts['batch_graph_root'], exp) cmdopts["exp_stat_root"] = os.path.join( self.cmdopts["batch_stat_root"], exp) cmdopts["exp_model_root"] = os.path.join( cmdopts['batch_model_root'], exp) utils.dir_create_checked(cmdopts['exp_model_root'], exist_ok=True) for model in self.models: if not model.run_for_exp(criteria, cmdopts, exp_index): self.logger.debug( "Skip running intra-experiment model from '%s' for exp%s", str(model), exp_index) continue # Run the model self.logger.debug("Run intra-experiment model '%s' for exp%s", str(model), exp_index) dfs = model.run(criteria, exp_index, cmdopts) for df, csv_stem in zip(dfs, model.target_csv_stems()): path_stem = os.path.join(cmdopts['exp_model_root'], csv_stem) # Write model legend file so the generated graph can find it with open(path_stem + '.legend', 'w') as f: for i, search in enumerate(dfs): if search.values.all() == df.values.all(): legend = model.legend_names()[i] f.write(legend) break # Write model .csv file storage.DataFrameWriter('storage.csv')(df, path_stem + '.model', index=False)
def _gen_paired_heatmaps(self, batch_leaf: str, criteria: bc.BivarBatchCriteria, cmdopts: types.Cmdopts, dest_stem: str, title: str, label: str, comp_type: str) -> None: """ Generates a set of :class:`~sierra.core.graphs.heatmap.Heatmap` graphs a controller of primary interest against all other controllers (one graph per pairing), after input files have been gathered from each controller into :attr:`cc_csv_root`. Only valid if the comparison type is ``scale2D`` or ``diff2D``. """ opath_leaf = LeafGenerator.from_batch_leaf(batch_leaf, dest_stem, None) csv_pattern_root = os.path.join(self.cc_csv_root, opath_leaf) pattern = csv_pattern_root + "*.csv" self.logger.debug("Generating paired heatmaps from pattern='%s'", pattern) paths = [f for f in glob.glob(pattern) if re.search('_[0-9]+', f)] if len(paths) < 2: self.logger.warn(("Not enough matches from pattern='%s'--skipping " "paired heatmap generation"), pattern) return ref_df = storage.DataFrameReader('storage.csv')(paths[0]) for i in range(1, len(paths)): df = storage.DataFrameReader('storage.csv')(paths[i]) if comp_type == 'HMscale': plot_df = df / ref_df elif comp_type == 'HMdiff': plot_df = df - ref_df leaf = LeafGenerator.from_batch_leaf( batch_leaf, dest_stem, [0, i]) ipath = os.path.join(self.cc_csv_root, leaf) + ".csv" opath = os.path.join(self.cc_graph_root, leaf) + config.kImageExt storage.DataFrameWriter( 'storage.csv')(plot_df, ipath, index=False) Heatmap(input_fpath=ipath, output_fpath=opath, title=title, transpose=self.cmdopts['transpose_graphs'], zlabel=self._gen_zaxis_label(label, comp_type), xlabel=criteria.graph_xlabel(cmdopts), ylabel=criteria.graph_ylabel(cmdopts), xtick_labels=criteria.graph_xticklabels(cmdopts), ytick_labels=criteria.graph_yticklabels(cmdopts)).generate()
def __call__(self, criteria: bc.IConcreteBatchCriteria, target: dict, stat_collate_root: str) -> None: self.logger.info( "Stage4: Collating bivariate files from batch in %s for graph '%s'...", self.cmdopts['batch_output_root'], target['src_stem']) self.logger.trace(json.dumps(target, indent=4)) exp_dirs = utils.exp_range_calc(self.cmdopts, self.cmdopts['batch_output_root'], criteria) xlabels, ylabels = utils.bivar_exp_labels_calc(exp_dirs) if self.cmdopts['dist_stats'] in ['conf95', 'all']: exts = [ config.kStatsExtensions['mean'], config.kStatsExtensions['stddev'] ] elif self.cmdopts['dist_stats'] in ['bw', 'all']: exts = [ config.kStatsExtensions['min'], config.kStatsExtensions['max'], config.kStatsExtensions['mean'], config.kStatsExtensions['whislo'], config.kStatsExtensions['whishi'], config.kStatsExtensions['cilo'], config.kStatsExtensions['cihi'], config.kStatsExtensions['median'] ] stats = [ BivarGraphCollationInfo(df_ext=ext, xlabels=xlabels, ylabels=ylabels) for ext in exts ] for i, diri in enumerate(exp_dirs): # We get full paths back from the exp dirs calculation, and we need to work with path # leaves diri = os.path.split(diri)[1] self._collate_exp(target, diri, stats) for stat in stats: if stat.all_srcs_exist: storage.DataFrameWriter('storage.csv')( stat.df, os.path.join(stat_collate_root, target['dest_stem'] + stat.df_ext), index=False) elif stat.some_srcs_exist: self.logger.warning( "Not all experiments in '%s' produced '%s%s'", self.cmdopts['batch_output_root'], target['src_stem'], stat.df_ext)
def _gen_csvs_for_2D_or_3D(self, cmdopts: types.Cmdopts, batch_leaf: str, controller: str, src_stem: str, dest_stem: str) -> None: """Helper function for generating a set of .csv files for use in intra-scenario graph generation (1 per controller) for 2D/3D comparison types. Because each ``.csv`` file corresponding to performance measures are 2D arrays, we actually just copy and rename the performance measure ``.csv`` files for each controllers into :attr:`cc_csv_root`. :class:`~sierra.core.graphs.stacked_surface_graph.StackedSurfaceGraph` expects an ``_[0-9]+.csv`` pattern for each 2D surfaces to graph in order to disambiguate which files belong to which controller without having the controller name in the filepath (contains dots), so we do that here. :class:`~sierra.core.graphs.heatmap.Heatmap` does not require that, but for the heatmap set we generate it IS helpful to have an easy way to differentiate primary vs. other controllers, so we do it unconditionally here to handle both cases. """ self.logger.debug("Gathering data for '%s' from %s -> %s", controller, src_stem, dest_stem) csv_ipath = os.path.join( cmdopts['batch_stat_collate_root'], src_stem + ".csv") # Some experiments might not generate the necessary performance measure .csvs for # graph generation, which is OK. if not utils.path_exists(csv_ipath): self.logger.warning( "%s missing for controller '%s'", csv_ipath, controller) return df = storage.DataFrameReader('storage.csv')(csv_ipath) opath_leaf = LeafGenerator.from_batch_leaf(batch_leaf, dest_stem, [self.controllers.index(controller)]) csv_opath_stem = os.path.join(self.cc_csv_root, opath_leaf) storage.DataFrameWriter('storage.csv')( df, csv_opath_stem + '.csv', index=False)
def __call__(self, main_config: types.YAMLDict, criteria: bc.IConcreteBatchCriteria) -> None: cmdopts = copy.deepcopy(self.cmdopts) utils.dir_create_checked(cmdopts['batch_model_root'], exist_ok=True) utils.dir_create_checked(cmdopts['batch_graph_collate_root'], exist_ok=True) for model in self.models: if not model.run_for_batch(criteria, cmdopts): self.logger.debug("Skip running inter-experiment model '%s'", str(model)) continue # Run the model self.logger.debug("Run inter-experiment model '%s'", str(model)) dfs = model.run(criteria, cmdopts) for df, csv_stem in zip(dfs, model.target_csv_stems()): path_stem = os.path.join(cmdopts['batch_model_root'], csv_stem) # Write model .csv file storage.DataFrameWriter('storage.csv')(df, path_stem + '.model', index=False) # 1D dataframe -> line graph with legend if len(df.index) == 1: # Write model legend file so the generated graph can find it with open(path_stem + '.legend', 'w') as f: for i, search in enumerate(dfs): if search.values.all() == df.values.all(): legend = model.legend_names()[i] f.write(legend) break
def generate(self): data_ipath = os.path.join(self.exp_stat_root, self.target_stem + '.csv') data_opath = os.path.join(self.exp_graph_root, self.target_stem + '-HM' + config.kImageExt) stddev_ipath = os.path.join(self.exp_stat_root, self.target_stem + '.stddev') stddev_opath = os.path.join(self.exp_graph_root, self.target_stem + '-HM-stddev' + config.kImageExt) model_ipath = os.path.join(self.exp_model_root, self.target_stem + '.model') model_opath = os.path.join(self.exp_graph_root, self.target_stem + '-HM-model' + config.kImageExt) model_error_ipath = os.path.join(self.exp_model_root, self.target_stem + '-HM-model-error.csv') model_error_opath = os.path.join(self.exp_graph_root, self.target_stem + '-HM-model-error' + config.kImageExt) # Write the error .csv to the filesystem data_df = storage.DataFrameReader('storage.csv')(data_ipath) model_df = storage.DataFrameReader('storage.csv')(model_ipath) storage.DataFrameWriter('storage.csv')(model_df - data_df, model_error_ipath, index=False) HeatmapSet(ipaths=[data_ipath, stddev_ipath, model_ipath, model_error_ipath], opaths=[data_opath, stddev_opath, model_opath, model_error_opath], titles=[self.target_title, self.target_title + ' (Stddev)', self.target_title + ' (Model)', self.target_title + ' (Model Error)'], xlabel='X', ylabel='Y', **self.kwargs).generate()
def _gen_csv(self, cmdopts: types.Cmdopts, batch_leaf: str, src_stem: str, dest_stem: str) -> None: """ Helper function for generating a set of .csv files for use in inter-scenario graph generation. Generates: - ``.csv`` file containing results for each scenario the controller is being compared across, 1 per line. - ``.stddev`` file containing stddev for the generated ``.csv`` file, 1 per line. - ``.model`` file containing model predictions for controller behavior during each scenario, 1 per line (not generated if models were not run the performance measures we are generating graphs for). - ``.legend`` file containing legend values for models to plot (not generated if models were not run for the performance measures we are generating graphs for). """ csv_ipath = os.path.join(cmdopts['batch_output_root'], cmdopts['batch_stat_collate_root'], src_stem + ".csv") stddev_ipath = os.path.join(cmdopts['batch_output_root'], cmdopts['batch_stat_collate_root'], src_stem + ".stddev") model_ipath_stem = os.path.join(cmdopts['batch_model_root'], src_stem) model_opath_stem = os.path.join(self.sc_model_root, dest_stem + "-" + self.controller) opath_stem = os.path.join(self.sc_csv_root, dest_stem + "-" + self.controller) # Some experiments might not generate the necessary performance measure # .csvs for graph generation, which is OK. if not utils.path_exists(csv_ipath): self.logger.warning("%s missing for controller %s", csv_ipath, self.controller) return # Collect performance measure results. Append to existing dataframe if # it exists, otherwise start a new one. data_df = self._accum_df(csv_ipath, opath_stem + '.csv', src_stem) storage.DataFrameWriter('storage.csv')(data_df, opath_stem + '.csv', index=False) # Collect performance results stddev. Append to existing dataframe if it # exists, otherwise start a new one. stddev_df = self._accum_df(stddev_ipath, opath_stem + '.stddev', src_stem) if stddev_df is not None: storage.DataFrameWriter('storage.csv')(stddev_df, opath_stem + '.stddev', index=False) # Collect performance results models and legends. Append to existing # dataframes if they exist, otherwise start new ones. model_df = self._accum_df(model_ipath_stem + '.model', model_opath_stem + '.model', src_stem) if model_df is not None: storage.DataFrameWriter('storage.csv')(model_df, model_opath_stem + '.model', index=False) with open(model_opath_stem + '.legend', 'a') as f: _, scenario, _ = rdg.parse_batch_leaf(batch_leaf) sgp = pm.module_load_tiered( project=cmdopts['project'], path='generators.scenario_generator_parser') kw = sgp.ScenarioGeneratorParser().to_dict(scenario) f.write("{0} Prediction\n".format(kw['scenario_tag']))