def _accum_df_by_row(self, ipath: str, opath: str, index: int, inc_exps: tp.Optional[str]) -> pd.DataFrame: if utils.path_exists(opath): cum_df = storage.DataFrameReader('storage.csv')(opath) else: cum_df = None if utils.path_exists(ipath): t = storage.DataFrameReader('storage.csv')(ipath) if inc_exps is not None: cols = utils.exp_include_filter( inc_exps, list(t.columns), self.n_exp) else: cols = t.columns if cum_df is None: cum_df = pd.DataFrame(columns=cols) cum_df = cum_df.append(t.loc[index, cols]) return cum_df return None
def _gen_paired_heatmaps(self, batch_leaf: str, criteria: bc.BivarBatchCriteria, cmdopts: types.Cmdopts, dest_stem: str, title: str, label: str, comp_type: str) -> None: """ Generates a set of :class:`~sierra.core.graphs.heatmap.Heatmap` graphs a controller of primary interest against all other controllers (one graph per pairing), after input files have been gathered from each controller into :attr:`cc_csv_root`. Only valid if the comparison type is ``scale2D`` or ``diff2D``. """ opath_leaf = LeafGenerator.from_batch_leaf(batch_leaf, dest_stem, None) csv_pattern_root = os.path.join(self.cc_csv_root, opath_leaf) pattern = csv_pattern_root + "*.csv" self.logger.debug("Generating paired heatmaps from pattern='%s'", pattern) paths = [f for f in glob.glob(pattern) if re.search('_[0-9]+', f)] if len(paths) < 2: self.logger.warn(("Not enough matches from pattern='%s'--skipping " "paired heatmap generation"), pattern) return ref_df = storage.DataFrameReader('storage.csv')(paths[0]) for i in range(1, len(paths)): df = storage.DataFrameReader('storage.csv')(paths[i]) if comp_type == 'HMscale': plot_df = df / ref_df elif comp_type == 'HMdiff': plot_df = df - ref_df leaf = LeafGenerator.from_batch_leaf( batch_leaf, dest_stem, [0, i]) ipath = os.path.join(self.cc_csv_root, leaf) + ".csv" opath = os.path.join(self.cc_graph_root, leaf) + config.kImageExt storage.DataFrameWriter( 'storage.csv')(plot_df, ipath, index=False) Heatmap(input_fpath=ipath, output_fpath=opath, title=title, transpose=self.cmdopts['transpose_graphs'], zlabel=self._gen_zaxis_label(label, comp_type), xlabel=criteria.graph_xlabel(cmdopts), ylabel=criteria.graph_ylabel(cmdopts), xtick_labels=criteria.graph_xticklabels(cmdopts), ytick_labels=criteria.graph_yticklabels(cmdopts)).generate()
def _gather_item_from_sims( self, item: GatherSpec, runs: tp.List[str]) -> tp.Dict[GatherSpec, tp.List[pd.DataFrame]]: gathered = dict() # type: tp.Dict[GatherSpec, pd.DataFrame] for run in runs: run_output_root = os.path.join(self.exp_output_root, run, self.run_metrics_leaf) if item.for_imagizing(): item_path = os.path.join(run_output_root, item.csv_stem, item.csv_leaf + '.csv') else: item_path = os.path.join(run_output_root, item.csv_leaf + '.csv') reader = storage.DataFrameReader( self.gather_opts['storage_medium']) df = reader(item_path, index_col=False) if df.dtypes[0] == 'object': df[df.columns[0]] = df[df.columns[0]].apply(lambda x: float(x)) if item not in gathered: gathered[item] = [] gathered[item].append(df) return gathered
def generate(self) -> None: if not sierra.core.utils.path_exists(self.input_fpath): self.logger.debug( "Not generating 2D scatterplot: %s does not exist", self.input_fpath) return # Read .csv and scaffold graph df = storage.DataFrameReader('storage.csv')(self.input_fpath) ax = df.plot.scatter(x=self.xcol, y=self.ycol) # Plot regression line if self.regression: self._plot_regression(df) # Plot ticks and labels ax.tick_params(labelsize=self.text_size['tick_label']) ax.set_xlabel(self.xlabel, fontsize=self.text_size['xyz_label']) ax.set_ylabel(self.ylabel, fontsize=self.text_size['xyz_label']) # Add title ax.set_title(self.title, fontsize=self.text_size['title']) # Output figure fig = ax.get_figure() fig.set_size_inches(sierra.core.config.kGraphBaseSize, sierra.core.config.kGraphBaseSize) fig.savefig(self.output_fpath, bbox_inches='tight', dpi=sierra.core.config.kGraphDPI) # Prevent memory accumulation (fig.clf() does not close everything) plt.close(fig)
def gather_csvs_from_run( self, run: str) -> tp.Dict[tp.Tuple[str, str], pd.DataFrame]: """ Gather all data from a single run within an experiment, so that it can be placed in the queue for processing. Returns: A dictionary of <(``.csv`` file name, ``.csv`` performance column), dataframe> key-value pairs. The ``.csv`` file name is the leaf part of the path with the extension included. """ intra_perf_csv = self.main_config['sierra']['perf']['intra_perf_csv'] intra_perf_leaf = intra_perf_csv.split('.')[0] intra_perf_col = self.main_config['sierra']['perf']['intra_perf_col'] run_output_root = os.path.join(self.exp_output_root, run, self.run_metrics_leaf) reader = storage.DataFrameReader(self.storage_medium) perf_df = reader(os.path.join(run_output_root, intra_perf_leaf + '.csv'), index_col=False) return { (intra_perf_leaf, intra_perf_col): perf_df[intra_perf_col], }
def generate(self) -> None: if not utils.path_exists(self.input_fpath): self.logger.debug("Not generating heatmap: %s does not exist", self.input_fpath) return # Read .csv and create raw heatmap from default configuration data_df = storage.DataFrameReader('storage.csv')(self.input_fpath) self._plot_df(data_df, self.output_fpath)
def generate(self) -> None: dfs = [ storage.DataFrameReader('storage.csv')(f) for f in glob.glob(self.input_stem_pattern + '*.csv') if re.search('_[0-9]+', f) ] if not dfs: # empty list self.logger.debug( "Not generating stacked surface graph: %s did not match any .csv files", self.input_stem_pattern) return assert len(dfs) <= StackedSurfaceGraph.kMaxSurfaces,\ "Too many surfaces to plot: {0} > {1}".format(len(dfs), StackedSurfaceGraph.kMaxSurfaces) # Scaffold graph plt.figure(figsize=(config.kGraphBaseSize, config.kGraphBaseSize)) ax = plt.axes(projection='3d') x = np.arange(len(dfs[0].columns)) y = dfs[0].index X, Y = np.meshgrid(x, y) # Use non-quantitative colormaps in order to get really nice looking surfaces that change # color with Z value. From # https://stackoverflow.com/questions/55501860/how-to-put-multiple-colormap-patches-in-a-matplotlib-legend colors = [plt.cm.Greens, plt.cm.Reds, plt.cm.Purples, plt.cm.Oranges] legend_cmap_handles = [ mpl.patches.Rectangle((0, 0), 1, 1) for _ in colors ] legend_handler_map = dict( zip(legend_cmap_handles, [HandlerColormap(c, num_stripes=8) for c in colors])) # Plot surfaces self._plot_surfaces(X, Y, ax, colors, dfs) # Add title ax.set_title(self.title, fontsize=24) # Add X,Y,Z labels self._plot_labels(ax) # Add X,Y ticks self._plot_ticks(ax, x, y) # Add legend self._plot_legend(ax, legend_cmap_handles, legend_handler_map) # Output figures fig = ax.get_figure() fig.set_size_inches(10, 10) self._save_figs(fig, ax)
def _read_stats(self) -> tp.Dict[str, pd.DataFrame]: dfs = {} if self.stats in ['conf95', 'all']: stddev_ipath = os.path.join(self.stats_root, self.input_stem + config.kStatsExtensions['stddev']) if utils.path_exists(stddev_ipath): dfs['stddev'] = storage.DataFrameReader( 'storage.csv')(stddev_ipath) else: self.logger.warning( "Stddev file not found for '%s'", self.input_stem) return dfs
def _accum_df(self, ipath: str, opath: str, src_stem: str) -> pd.DataFrame: if utils.path_exists(opath): cum_df = storage.DataFrameReader('storage.csv')(opath) else: cum_df = None if utils.path_exists(ipath): t = storage.DataFrameReader('storage.csv')(ipath) if cum_df is None: cum_df = pd.DataFrame(columns=t.columns) if len(t.index) != 1: self.logger.warning( "'%s.csv' is a collated inter-experiment csv, not a summary inter-experiment csv: # rows %s != 1", src_stem, len(t.index)) self.logger.warning("Truncating '%s.csv' to last row", src_stem) cum_df = cum_df.append(t.loc[t.index[-1], t.columns.to_list()]) return cum_df return None
def generate(self): data_ipath = os.path.join(self.exp_stat_root, self.target_stem + '.csv') data_opath = os.path.join(self.exp_graph_root, self.target_stem + '-HM' + config.kImageExt) stddev_ipath = os.path.join(self.exp_stat_root, self.target_stem + '.stddev') stddev_opath = os.path.join(self.exp_graph_root, self.target_stem + '-HM-stddev' + config.kImageExt) model_ipath = os.path.join(self.exp_model_root, self.target_stem + '.model') model_opath = os.path.join(self.exp_graph_root, self.target_stem + '-HM-model' + config.kImageExt) model_error_ipath = os.path.join(self.exp_model_root, self.target_stem + '-HM-model-error.csv') model_error_opath = os.path.join(self.exp_graph_root, self.target_stem + '-HM-model-error' + config.kImageExt) # Write the error .csv to the filesystem data_df = storage.DataFrameReader('storage.csv')(data_ipath) model_df = storage.DataFrameReader('storage.csv')(model_ipath) storage.DataFrameWriter('storage.csv')(model_df - data_df, model_error_ipath, index=False) HeatmapSet(ipaths=[data_ipath, stddev_ipath, model_ipath, model_error_ipath], opaths=[data_opath, stddev_opath, model_opath, model_error_opath], titles=[self.target_title, self.target_title + ' (Stddev)', self.target_title + ' (Model)', self.target_title + ' (Model Error)'], xlabel='X', ylabel='Y', **self.kwargs).generate()
def _accum_df_by_col(self, ipath: str, opath: str, all_cols: tp.List[str], col_index: int, inc_exps: tp.Optional[str]) -> pd.DataFrame: if utils.path_exists(opath): cum_df = storage.DataFrameReader('storage.csv')(opath) else: cum_df = None if utils.path_exists(ipath): t = storage.DataFrameReader('storage.csv')(ipath) if inc_exps is not None: cols_from_index = utils.exp_include_filter( inc_exps, list(t.index), self.n_exp) else: cols_from_index = slice(None, None, None) if cum_df is None: cum_df = pd.DataFrame(columns=all_cols) # We need to turn each column of the .csv on the filesystem into a # row in the .csv which we want to write out, so we transpose, fix # the index, and then set the columns of the new transposed # dataframe. tp_df = t.transpose() tp_df = tp_df.reset_index(drop=True) tp_df = tp_df[cols_from_index] tp_df.columns = all_cols cum_df = cum_df.append(tp_df.loc[col_index, :]) return cum_df return None
def generate(self) -> None: input_fpath = os.path.join(self.stats_root, self.input_stem + config.kStatsExtensions['mean']) if not utils.path_exists(input_fpath): self.logger.debug("Not generating %s: %s does not exist", self.output_fpath, input_fpath) return data_df = storage.DataFrameReader('storage.csv')(input_fpath) model = self._read_models() stat_dfs = self._read_stats() # Plot specified columns from dataframe. if self.cols is None: ncols = max(1, int(len(data_df.columns) / 2.0)) ax = self._plot_selected_cols( data_df, stat_dfs, data_df.columns, model) else: ncols = max(1, int(len(self.cols) / 2.0)) ax = self._plot_selected_cols(data_df, stat_dfs, self.cols, model) self._plot_ticks(ax) self._plot_legend(ax, model[1], ncols) # Add title ax.set_title(self.title, fontsize=self.text_size['title']) # Add X,Y labels if self.xlabel is not None: ax.set_xlabel(self.xlabel, fontsize=self.text_size['xyz_label']) if self.ylabel is not None: ax.set_ylabel(self.ylabel, fontsize=self.text_size['xyz_label']) # Output figure fig = ax.get_figure() fig.set_size_inches(config.kGraphBaseSize, config.kGraphBaseSize) fig.savefig(self.output_fpath, bbox_inches='tight', dpi=config.kGraphDPI) # Prevent memory accumulation (fig.clf() does not close everything) plt.close(fig)
def _gen_csvs_for_2D_or_3D(self, cmdopts: types.Cmdopts, batch_leaf: str, controller: str, src_stem: str, dest_stem: str) -> None: """Helper function for generating a set of .csv files for use in intra-scenario graph generation (1 per controller) for 2D/3D comparison types. Because each ``.csv`` file corresponding to performance measures are 2D arrays, we actually just copy and rename the performance measure ``.csv`` files for each controllers into :attr:`cc_csv_root`. :class:`~sierra.core.graphs.stacked_surface_graph.StackedSurfaceGraph` expects an ``_[0-9]+.csv`` pattern for each 2D surfaces to graph in order to disambiguate which files belong to which controller without having the controller name in the filepath (contains dots), so we do that here. :class:`~sierra.core.graphs.heatmap.Heatmap` does not require that, but for the heatmap set we generate it IS helpful to have an easy way to differentiate primary vs. other controllers, so we do it unconditionally here to handle both cases. """ self.logger.debug("Gathering data for '%s' from %s -> %s", controller, src_stem, dest_stem) csv_ipath = os.path.join( cmdopts['batch_stat_collate_root'], src_stem + ".csv") # Some experiments might not generate the necessary performance measure .csvs for # graph generation, which is OK. if not utils.path_exists(csv_ipath): self.logger.warning( "%s missing for controller '%s'", csv_ipath, controller) return df = storage.DataFrameReader('storage.csv')(csv_ipath) opath_leaf = LeafGenerator.from_batch_leaf(batch_leaf, dest_stem, [self.controllers.index(controller)]) csv_opath_stem = os.path.join(self.cc_csv_root, opath_leaf) storage.DataFrameWriter('storage.csv')( df, csv_opath_stem + '.csv', index=False)
def _read_models(self) -> tp.Tuple[pd.DataFrame, tp.List[str]]: if self.model_root is not None: model_fpath = os.path.join( self.model_root, self.input_stem + '.model') model_legend_fpath = os.path.join( self.model_root, self.input_stem + '.legend') if utils.path_exists(model_fpath): model = storage.DataFrameReader('storage.csv')(model_fpath) if utils.path_exists(model_legend_fpath): with open(model_legend_fpath, 'r') as f: model_legend = f.read().splitlines() else: self.logger.warning( "No legend file for model '%s' found", model_fpath) model_legend = ['Model Prediction'] return (model, model_legend) return (None, [])
def generate(self) -> None: input_fpath = os.path.join(self.stats_root, self.input_stem + config.kStatsExtensions['mean']) if not utils.path_exists(input_fpath): self.logger.debug("Not generating %s: %s does not exist", self.output_fpath, input_fpath) return data_dfy = storage.DataFrameReader('storage.csv')(input_fpath) model = self._read_models() fig, ax = plt.subplots() # Plot lines self._plot_lines(data_dfy, model) # Add legend self._plot_legend(model) # Add statistics according to configuration stat_dfs = self._read_stats() self._plot_stats(ax, self.xticks, data_dfy, stat_dfs) # Add X,Y labels plt.ylabel(self.ylabel, fontsize=self.text_size['xyz_label']) plt.xlabel(self.xlabel, fontsize=self.text_size['xyz_label']) # Add ticks self._plot_ticks(ax) # Add title plt.title(self.title, fontsize=self.text_size['title']) # Output figure fig = ax.get_figure() fig.set_size_inches(config.kGraphBaseSize, config.kGraphBaseSize) fig.savefig(self.output_fpath, bbox_inches='tight', dpi=config.kGraphDPI) # Prevent memory accumulation (fig.clf() does not close everything) plt.close(fig)
def _collate_exp(self, target: dict, exp_dir: str, stats: tp.List[BivarGraphCollationInfo]) -> None: exp_stat_root = os.path.join(self.cmdopts['batch_stat_root'], exp_dir) for stat in stats: csv_ipath = os.path.join(exp_stat_root, target['src_stem'] + stat.df_ext) if not utils.path_exists(csv_ipath): stat.all_srcs_exist = False continue stat.some_srcs_exist = True data_df = storage.DataFrameReader('storage.csv')(csv_ipath) assert target['col'] in data_df.columns.values,\ "{0} not in columns of {1}, which has {2}".format(target['col'], csv_ipath, data_df.columns) xlabel, ylabel = exp_dir.split('+') stat.df.loc[xlabel, ylabel] = data_df[target['col']].to_numpy()
def _collate_exp(self, target: dict, exp_dir: str, stats: tp.List[UnivarGraphCollationInfo]) -> None: exp_stat_root = os.path.join(self.cmdopts['batch_stat_root'], exp_dir) for stat in stats: csv_ipath = os.path.join(exp_stat_root, target['src_stem'] + stat.df_ext) if not utils.path_exists(csv_ipath): stat.all_srcs_exist = False continue stat.some_srcs_exist = True data_df = storage.DataFrameReader('storage.csv')(csv_ipath) assert target['col'] in data_df.columns.values,\ "{0} not in columns of {1}".format(target['col'], target['src_stem'] + stat.df_ext) if target.get('summary', False): stat.df.loc[0, exp_dir] = data_df.loc[data_df.index[-1], target['col']] else: stat.df[exp_dir] = data_df[target['col']]
def _verify_exp(self): """ Verify the integrity of all :term:`Experimental Runs <Experimental Run>` in an :term:`Experiment`. Specifically: - All runs produced all ``.csv`` files. - All runs ``.csv`` files with the same name have the same # rows and columns. - No simulation ``.csv``files contain NaNs. """ experiments = os.listdir(self.exp_output_root) self.logger.info('Verifying results in %s...', self.exp_output_root) start = time.time() for exp1 in experiments: csv_root1 = os.path.join(self.exp_output_root, exp1, self.run_metrics_leaf) for exp2 in experiments: csv_root2 = os.path.join(self.exp_output_root, exp2, self.run_metrics_leaf) if not os.path.isdir(csv_root2): continue for csv in os.listdir(csv_root2): path1 = os.path.join(csv_root1, csv) path2 = os.path.join(csv_root2, csv) # .csvs for rendering that we don't verify (for now...) if os.path.isdir(path1) or os.path.isdir(path2): self.logger.debug( "Not verifying '%s': contains rendering data", path1) continue assert (sierra.core.utils.path_exists(path1) and sierra.core.utils.path_exists(path2)),\ "Either {0} or {1} does not exist".format( path1, path2) # Verify both dataframes have same # columns, and that column sets are identical reader = storage.DataFrameReader( self.gather_opts['storage_medium']) df1 = reader(path1) df2 = reader(path2) assert (len(df1.columns) == len(df2.columns)), \ "Dataframes from {0} and {1} do not have same # columns".format( path1, path2) assert(sorted(df1.columns) == sorted(df2.columns)),\ "Columns from {0} and {1} not identical".format( path1, path2) # Verify the length of all columns in both dataframes is the same for c1 in df1.columns: assert(all(len(df1[c1]) == len(df1[c2]) for c2 in df1.columns)),\ "Not all columns from {0} have same length".format( path1) assert(all(len(df1[c1]) == len(df2[c2]) for c2 in df1.columns)),\ "Not all columns from {0} and {1} have same length".format(path1, path2) elapsed = int(time.time() - start) sec = datetime.timedelta(seconds=elapsed) self.logger.info("Done verifying results in %s: %s", self.exp_output_root, sec)
def _read_stats(self) -> tp.Dict[str, list]: dfs = {} if self.stats == 'conf95' or self.stats == 'all': stddev_ipath = os.path.join(self.stats_root, self.input_stem + config.kStatsExtensions['stddev']) if utils.path_exists(stddev_ipath): dfs['stddev'] = storage.DataFrameReader( 'storage.csv')(stddev_ipath) else: self.logger.warning( "stddev file not found for '%s'", self.input_stem) if self.stats == 'bw' or self.stats == 'all': whislo_ipath = os.path.join(self.stats_root, self.input_stem + config.kStatsExtensions['whislo']) whishi_ipath = os.path.join(self.stats_root, self.input_stem + config.kStatsExtensions['whishi']) median_ipath = os.path.join(self.stats_root, self.input_stem + config.kStatsExtensions['median']) q1_ipath = os.path.join(self.stats_root, self.input_stem + config.kStatsExtensions['q1']) q3_ipath = os.path.join(self.stats_root, self.input_stem + config.kStatsExtensions['q3']) cihi_ipath = os.path.join(self.stats_root, self.input_stem + config.kStatsExtensions['cihi']) cilo_ipath = os.path.join(self.stats_root, self.input_stem + config.kStatsExtensions['cilo']) if utils.path_exists(whislo_ipath): dfs['whislo'] = storage.DataFrameReader( 'storage.csv')(whislo_ipath) else: self.logger.warning( "whislo file not found for '%s'", self.input_stem) if utils.path_exists(whishi_ipath): dfs['whishi'] = storage.DataFrameReader( 'storage.csv')(whishi_ipath) else: self.logger.warning( "whishi file not found for '%s'", self.input_stem) if utils.path_exists(cilo_ipath): dfs['cilo'] = storage.DataFrameReader('storage.csv')(cilo_ipath) else: self.logger.warning( "cilo file not found for '%s'", self.input_stem) if utils.path_exists(cihi_ipath): dfs['cihi'] = storage.DataFrameReader('storage.csv')(cihi_ipath) else: self.logger.warning( "cihi file not found for '%s'", self.input_stem) if utils.path_exists(median_ipath): dfs['median'] = storage.DataFrameReader( 'storage.csv')(median_ipath) else: self.logger.warning( "median file not found for '%s'", self.input_stem) if utils.path_exists(q1_ipath): dfs['q1'] = storage.DataFrameReader('storage.csv')(q1_ipath) else: self.logger.warning( "q1 file not found for '%s'", self.input_stem) if utils.path_exists(q3_ipath): dfs['q3'] = storage.DataFrameReader('storage.csv')(q3_ipath) else: self.logger.warning( "q3 file not found for '%s'", self.input_stem) return dfs
def _gen_csvs_for_1D(self, cmdopts: types.Cmdopts, criteria: bc.IConcreteBatchCriteria, batch_leaf: str, controller: str, src_stem: str, dest_stem: str, primary_axis: int, inc_exps: tp.Optional[str]) -> None: """Helper function for generating a set of .csv files for use in intra-scenario graph generation. Because we are targeting linegraphs, we draw the the i-th row/col (as configured) from the performance results of each controller .csv, and concatenate them into a new .csv file which can be given to :class:`~sierra.core.graphs.summary_line_graph.SummaryLineGraph`. """ self.logger.debug("Gathering data for '%s' from %s -> %s", controller, src_stem, dest_stem) csv_ipath = os.path.join( cmdopts['batch_stat_collate_root'], src_stem + ".csv") # Some experiments might not generate the necessary performance measure .csvs for # graph generation, which is OK. if not utils.path_exists(csv_ipath): self.logger.warning( "%s missing for controller '%s'", csv_ipath, controller) return if primary_axis == 0: preparer = StatsPreparer(ipath_stem=cmdopts['batch_stat_collate_root'], ipath_leaf=src_stem, opath_stem=self.cc_csv_root, n_exp=criteria.criteria2.n_exp()) n_rows = len(storage.DataFrameReader('storage.csv')(os.path.join(cmdopts['batch_stat_collate_root'], src_stem + ".csv")).index) for i in range(0, n_rows): opath_leaf = LeafGenerator.from_batch_leaf( batch_leaf, dest_stem, [i]) preparer.across_rows(opath_leaf=opath_leaf, index=i, inc_exps=inc_exps) else: preparer = StatsPreparer(ipath_stem=cmdopts['batch_stat_collate_root'], ipath_leaf=src_stem, opath_stem=self.cc_csv_root, n_exp=criteria.criteria1.n_exp()) exp_dirs = criteria.gen_exp_dirnames(cmdopts) xlabels, ylabels = utils.bivar_exp_labels_calc(exp_dirs) xlabels = utils.exp_include_filter( inc_exps, xlabels, criteria.criteria1.n_exp()) for col in ylabels: col_index = ylabels.index(col) opath_leaf = LeafGenerator.from_batch_leaf( batch_leaf, dest_stem, [col_index]) preparer.across_cols(opath_leaf=opath_leaf, col_index=col_index, all_cols=xlabels, inc_exps=inc_exps)
def generate(self) -> None: dfs = [ storage.DataFrameReader('storage.csv')(f) for f in glob.glob(self.input_stem_pattern) if re.search('_[0-9]+', f) ] if not dfs or len(dfs) != DualHeatmap.kCardinality: self.logger.debug( "Not generating dual heatmap graph: %s did not match %s .csv files", self.input_stem_pattern, DualHeatmap.kCardinality) return # Scaffold graph fig, axes = plt.subplots(ncols=2, figsize=(config.kGraphBaseSize * 2.0, config.kGraphBaseSize)) y = np.arange(len(dfs[0].columns)) x = dfs[0].index ax1, ax2 = axes # Find min, max so the shared colorbar makes sense minval = min(dfs[0].min().min(), dfs[1].min().min()) maxval = max(dfs[0].max().max(), dfs[1].max().max()) # Plot heatmaps im1 = ax1.matshow(dfs[0], interpolation='none', vmin=minval, vmax=maxval) im2 = ax2.matshow(dfs[1], interpolation='none', vmin=minval, vmax=maxval) # Add titles fig.suptitle(self.title, fontsize=self.text_size['title']) ax1.xaxis.set_ticks_position('bottom') ax1.yaxis.set_ticks_position('left') ax2.xaxis.set_ticks_position('bottom') ax2.yaxis.set_ticks_position('left') if self.legend is not None: ax1.set_title("\n".join(textwrap.wrap(self.legend[0], 20)), size=self.text_size['legend_label']) ax2.set_title("\n".join(textwrap.wrap(self.legend[1], 20)), size=self.text_size['legend_label']) # Add colorbar. # # Add, then remove the colorbar for the heatmap on the left so that they # both end up the same size. Not pythonic, but it works. self._plot_colorbar(fig, im1, ax1, remove=True) self._plot_colorbar(fig, im2, ax2, remove=False) # Add X,Y,Z labels: # # - X labels are needed on both heatmaps. # - Y label only needed on left heatmap. self._plot_labels(ax1, xlabel=True, ylabel=True) self._plot_labels(ax2, xlabel=True, ylabel=False) # Add X,Y ticks: # # - X tick labels needed on both heatmaps # - Y tick labels only needed on left heatmap. self._plot_ticks(ax1, x, y, xlabels=True, ylabels=True) self._plot_ticks(ax2, x, y, xlabels=True, ylabels=False) # Output figures fig.subplots_adjust(wspace=0.0, hspace=0.0) fig.savefig(self.output_fpath, bbox_inches='tight', dpi=config.kGraphDPI) # Prevent memory accumulation (fig.clf() does not close everything) plt.close(fig)