def across_cols(self,
                    opath_leaf: str,
                    all_cols: tp.List[str],
                    col_index: int,
                    inc_exps: tp.Optional[str]) -> None:
        """
        The criteria of interest varies across the rows of controller .csvs. We take
        row `index` from a given dataframe and take the rows specified by the
        `inc_exps` and append them to a results dataframe column-wise, which we
        then write the file system.

        """
        for k in config.kStatsExtensions.keys():
            stat_ipath = os.path.join(self.ipath_stem,
                                      self.ipath_leaf + config.kStatsExtensions[k])
            stat_opath = os.path.join(self.opath_stem,
                                      opath_leaf + config.kStatsExtensions[k])
            df = self._accum_df_by_col(
                stat_ipath, stat_opath, all_cols, col_index, inc_exps)

            if df is not None:
                storage.DataFrameWriter('storage.csv')(df,
                                                       os.path.join(self.opath_stem,
                                                                    opath_leaf + config.kStatsExtensions[k]),
                                                       index=False)
Exemple #2
0
    def __call__(self):
        csv_concat = pd.concat(self.gathered_dfs)

        # Create directory for averaged .csv files for imagizing later.
        if self.gather_spec.for_imagizing:
            sierra.core.utils.dir_create_checked(os.path.join(
                self.stat_root, self.gather_spec.csv_stem),
                                                 exist_ok=True)

        by_row_index = csv_concat.groupby(csv_concat.index)

        if self.avg_opts['dist_stats'] in ['none', 'all']:
            dfs = sierra.core.stat_kernels.mean.from_groupby(by_row_index)
        if self.avg_opts['dist_stats'] in ['conf95', 'all']:
            dfs = sierra.core.stat_kernels.conf95.from_groupby(by_row_index)

        if self.avg_opts['dist_stats'] in ['bw', 'all']:
            dfs = sierra.core.stat_kernels.bw.from_groupby(by_row_index)

        for ext in dfs.keys():
            opath = os.path.join(self.stat_root, self.gather_spec.csv_stem,
                                 self.gather_spec.csv_leaf + ext)
            writer = storage.DataFrameWriter(self.avg_opts['storage_medium'])

            writer(dfs[ext].fillna(0), opath, index=False)
    def __call__(self):
        collated = {}
        for run in self.gathered_runs:
            run_dfs = self.gathered_dfs[self.gathered_runs.index(run)]
            for csv_leaf, col in run_dfs.keys():
                csv_df = run_dfs[(csv_leaf, col)]
                # Invert performance if configured.
                if self.invert_perf and csv_leaf in self.intra_perf_csv:
                    csv_df = 1.0 / csv_df

                    # Because of the requirement that P(N) >= 0 for flexibility
                    # (1/0 = inf gives a crash with DTW), if the current level
                    # of performance is 0, it stays 0.
                    #
                    # This is a bit of a hack. But also not a hack at all,
                    # because infinite performance is not possible. This
                    # is... Schrodinger's Hack.
                    csv_df = csv_df.replace([-np.inf, np.inf], 0)

                if (csv_leaf, col) not in collated:
                    collated[(csv_leaf,
                              col)] = pd.DataFrame(index=csv_df.index,
                                                   columns=self.gathered_runs)
                collated[(csv_leaf, col)][run] = csv_df

        for (csv_leaf, col) in collated.keys():

            writer = storage.DataFrameWriter(self.storage_medium)
            writer(collated[(csv_leaf, col)].fillna(0),
                   os.path.join(
                       self.batch_stat_collate_root,
                       self.exp_leaf + '-' + csv_leaf + '-' + col + '.csv'),
                   index=False)
    def __call__(self, main_config: types.YAMLDict,
                 criteria: bc.IConcreteBatchCriteria) -> None:
        exp_to_run = utils.exp_range_calc(self.cmdopts,
                                          self.cmdopts['batch_output_root'],
                                          criteria)
        exp_dirnames = criteria.gen_exp_dirnames(self.cmdopts)

        for i, exp in enumerate(exp_to_run):
            exp = os.path.split(exp)[1]
            exp_index = exp_dirnames.index(exp)

            cmdopts = copy.deepcopy(self.cmdopts)
            cmdopts["exp0_output_root"] = os.path.join(
                self.cmdopts["batch_output_root"], exp_dirnames[0])
            cmdopts["exp0_stat_root"] = os.path.join(
                self.cmdopts["batch_stat_root"], exp_dirnames[0])

            cmdopts["exp_input_root"] = os.path.join(
                self.cmdopts['batch_input_root'], exp)
            cmdopts["exp_output_root"] = os.path.join(
                self.cmdopts['batch_output_root'], exp)
            cmdopts["exp_graph_root"] = os.path.join(
                self.cmdopts['batch_graph_root'], exp)
            cmdopts["exp_stat_root"] = os.path.join(
                self.cmdopts["batch_stat_root"], exp)
            cmdopts["exp_model_root"] = os.path.join(
                cmdopts['batch_model_root'], exp)

            utils.dir_create_checked(cmdopts['exp_model_root'], exist_ok=True)

            for model in self.models:
                if not model.run_for_exp(criteria, cmdopts, exp_index):
                    self.logger.debug(
                        "Skip running intra-experiment model from '%s' for exp%s",
                        str(model), exp_index)
                    continue

                # Run the model
                self.logger.debug("Run intra-experiment model '%s' for exp%s",
                                  str(model), exp_index)
                dfs = model.run(criteria, exp_index, cmdopts)
                for df, csv_stem in zip(dfs, model.target_csv_stems()):
                    path_stem = os.path.join(cmdopts['exp_model_root'],
                                             csv_stem)

                    # Write model legend file so the generated graph can find it
                    with open(path_stem + '.legend', 'w') as f:
                        for i, search in enumerate(dfs):
                            if search.values.all() == df.values.all():
                                legend = model.legend_names()[i]
                                f.write(legend)
                                break

                    # Write model .csv file
                    storage.DataFrameWriter('storage.csv')(df,
                                                           path_stem +
                                                           '.model',
                                                           index=False)
    def _gen_paired_heatmaps(self,
                             batch_leaf: str,
                             criteria: bc.BivarBatchCriteria,
                             cmdopts: types.Cmdopts,
                             dest_stem: str,
                             title: str,
                             label: str,
                             comp_type: str) -> None:
        """
        Generates a set of :class:`~sierra.core.graphs.heatmap.Heatmap` graphs a
        controller of primary interest against all other controllers (one graph
        per pairing), after input files have been gathered from each controller
        into :attr:`cc_csv_root`. Only valid if the comparison type is
        ``scale2D`` or ``diff2D``.

        """
        opath_leaf = LeafGenerator.from_batch_leaf(batch_leaf, dest_stem, None)
        csv_pattern_root = os.path.join(self.cc_csv_root, opath_leaf)
        pattern = csv_pattern_root + "*.csv"
        self.logger.debug("Generating paired heatmaps from pattern='%s'",
                          pattern)

        paths = [f for f in glob.glob(pattern) if re.search('_[0-9]+', f)]

        if len(paths) < 2:
            self.logger.warn(("Not enough matches from pattern='%s'--skipping "
                              "paired heatmap generation"),
                             pattern)
            return

        ref_df = storage.DataFrameReader('storage.csv')(paths[0])

        for i in range(1, len(paths)):
            df = storage.DataFrameReader('storage.csv')(paths[i])

            if comp_type == 'HMscale':
                plot_df = df / ref_df
            elif comp_type == 'HMdiff':
                plot_df = df - ref_df

            leaf = LeafGenerator.from_batch_leaf(
                batch_leaf, dest_stem, [0, i])
            ipath = os.path.join(self.cc_csv_root, leaf) + ".csv"
            opath = os.path.join(self.cc_graph_root,
                                 leaf) + config.kImageExt

            storage.DataFrameWriter(
                'storage.csv')(plot_df, ipath, index=False)

            Heatmap(input_fpath=ipath,
                    output_fpath=opath,
                    title=title,
                    transpose=self.cmdopts['transpose_graphs'],
                    zlabel=self._gen_zaxis_label(label, comp_type),
                    xlabel=criteria.graph_xlabel(cmdopts),
                    ylabel=criteria.graph_ylabel(cmdopts),
                    xtick_labels=criteria.graph_xticklabels(cmdopts),
                    ytick_labels=criteria.graph_yticklabels(cmdopts)).generate()
Exemple #6
0
    def __call__(self, criteria: bc.IConcreteBatchCriteria, target: dict,
                 stat_collate_root: str) -> None:
        self.logger.info(
            "Stage4: Collating bivariate files from batch in %s for graph '%s'...",
            self.cmdopts['batch_output_root'], target['src_stem'])
        self.logger.trace(json.dumps(target, indent=4))

        exp_dirs = utils.exp_range_calc(self.cmdopts,
                                        self.cmdopts['batch_output_root'],
                                        criteria)

        xlabels, ylabels = utils.bivar_exp_labels_calc(exp_dirs)

        if self.cmdopts['dist_stats'] in ['conf95', 'all']:
            exts = [
                config.kStatsExtensions['mean'],
                config.kStatsExtensions['stddev']
            ]
        elif self.cmdopts['dist_stats'] in ['bw', 'all']:
            exts = [
                config.kStatsExtensions['min'], config.kStatsExtensions['max'],
                config.kStatsExtensions['mean'],
                config.kStatsExtensions['whislo'],
                config.kStatsExtensions['whishi'],
                config.kStatsExtensions['cilo'],
                config.kStatsExtensions['cihi'],
                config.kStatsExtensions['median']
            ]

        stats = [
            BivarGraphCollationInfo(df_ext=ext,
                                    xlabels=xlabels,
                                    ylabels=ylabels) for ext in exts
        ]

        for i, diri in enumerate(exp_dirs):
            # We get full paths back from the exp dirs calculation, and we need to work with path
            # leaves
            diri = os.path.split(diri)[1]
            self._collate_exp(target, diri, stats)

        for stat in stats:
            if stat.all_srcs_exist:
                storage.DataFrameWriter('storage.csv')(
                    stat.df,
                    os.path.join(stat_collate_root,
                                 target['dest_stem'] + stat.df_ext),
                    index=False)

            elif stat.some_srcs_exist:
                self.logger.warning(
                    "Not all experiments in '%s' produced '%s%s'",
                    self.cmdopts['batch_output_root'], target['src_stem'],
                    stat.df_ext)
    def _gen_csvs_for_2D_or_3D(self,
                               cmdopts: types.Cmdopts,
                               batch_leaf: str,
                               controller: str,
                               src_stem: str,
                               dest_stem: str) -> None:
        """Helper function for generating a set of .csv files for use in intra-scenario
        graph generation (1 per controller) for 2D/3D comparison types. Because
        each ``.csv`` file corresponding to performance measures are 2D arrays,
        we actually just copy and rename the performance measure ``.csv`` files
        for each controllers into :attr:`cc_csv_root`.

        :class:`~sierra.core.graphs.stacked_surface_graph.StackedSurfaceGraph`
        expects an ``_[0-9]+.csv`` pattern for each 2D surfaces to graph in
        order to disambiguate which files belong to which controller without
        having the controller name in the filepath (contains dots), so we do
        that here. :class:`~sierra.core.graphs.heatmap.Heatmap` does not require
        that, but for the heatmap set we generate it IS helpful to have an easy
        way to differentiate primary vs. other controllers, so we do it
        unconditionally here to handle both cases.

        """
        self.logger.debug("Gathering data for '%s' from %s -> %s",
                          controller, src_stem, dest_stem)

        csv_ipath = os.path.join(
            cmdopts['batch_stat_collate_root'], src_stem + ".csv")

        # Some experiments might not generate the necessary performance measure .csvs for
        # graph generation, which is OK.
        if not utils.path_exists(csv_ipath):
            self.logger.warning(
                "%s missing for controller '%s'", csv_ipath, controller)
            return

        df = storage.DataFrameReader('storage.csv')(csv_ipath)

        opath_leaf = LeafGenerator.from_batch_leaf(batch_leaf,
                                                   dest_stem,
                                                   [self.controllers.index(controller)])

        csv_opath_stem = os.path.join(self.cc_csv_root, opath_leaf)
        storage.DataFrameWriter('storage.csv')(
            df, csv_opath_stem + '.csv', index=False)
    def __call__(self, main_config: types.YAMLDict,
                 criteria: bc.IConcreteBatchCriteria) -> None:

        cmdopts = copy.deepcopy(self.cmdopts)

        utils.dir_create_checked(cmdopts['batch_model_root'], exist_ok=True)
        utils.dir_create_checked(cmdopts['batch_graph_collate_root'],
                                 exist_ok=True)

        for model in self.models:
            if not model.run_for_batch(criteria, cmdopts):
                self.logger.debug("Skip running inter-experiment model '%s'",
                                  str(model))
                continue

            # Run the model
            self.logger.debug("Run inter-experiment model '%s'", str(model))

            dfs = model.run(criteria, cmdopts)

            for df, csv_stem in zip(dfs, model.target_csv_stems()):
                path_stem = os.path.join(cmdopts['batch_model_root'], csv_stem)

                # Write model .csv file
                storage.DataFrameWriter('storage.csv')(df,
                                                       path_stem + '.model',
                                                       index=False)

                # 1D dataframe -> line graph with legend
                if len(df.index) == 1:
                    # Write model legend file so the generated graph can find it
                    with open(path_stem + '.legend', 'w') as f:
                        for i, search in enumerate(dfs):
                            if search.values.all() == df.values.all():
                                legend = model.legend_names()[i]
                                f.write(legend)
                                break
Exemple #9
0
    def generate(self):
        data_ipath = os.path.join(self.exp_stat_root,
                                  self.target_stem + '.csv')
        data_opath = os.path.join(self.exp_graph_root,
                                  self.target_stem + '-HM' + config.kImageExt)
        stddev_ipath = os.path.join(self.exp_stat_root,
                                    self.target_stem + '.stddev')
        stddev_opath = os.path.join(self.exp_graph_root,
                                    self.target_stem + '-HM-stddev' + config.kImageExt)

        model_ipath = os.path.join(self.exp_model_root,
                                   self.target_stem + '.model')
        model_opath = os.path.join(self.exp_graph_root,
                                   self.target_stem + '-HM-model' + config.kImageExt)

        model_error_ipath = os.path.join(self.exp_model_root,
                                         self.target_stem + '-HM-model-error.csv')
        model_error_opath = os.path.join(self.exp_graph_root,
                                         self.target_stem + '-HM-model-error' + config.kImageExt)

        # Write the error .csv to the filesystem
        data_df = storage.DataFrameReader('storage.csv')(data_ipath)
        model_df = storage.DataFrameReader('storage.csv')(model_ipath)
        storage.DataFrameWriter('storage.csv')(model_df - data_df,
                                               model_error_ipath,
                                               index=False)

        HeatmapSet(ipaths=[data_ipath, stddev_ipath, model_ipath, model_error_ipath],
                   opaths=[data_opath, stddev_opath,
                           model_opath, model_error_opath],
                   titles=[self.target_title,
                           self.target_title + ' (Stddev)',
                           self.target_title + ' (Model)',
                           self.target_title + ' (Model Error)'],
                   xlabel='X',
                   ylabel='Y',
                   **self.kwargs).generate()
    def _gen_csv(self, cmdopts: types.Cmdopts, batch_leaf: str, src_stem: str,
                 dest_stem: str) -> None:
        """
        Helper function for generating a set of .csv files for use in
        inter-scenario graph generation.

        Generates:

        - ``.csv`` file containing results for each scenario the controller is
          being compared across, 1 per line.

        - ``.stddev`` file containing stddev for the generated ``.csv`` file, 1
          per line.

        - ``.model`` file containing model predictions for controller behavior
          during each scenario, 1 per line (not generated if models were not run
          the performance measures we are generating graphs for).

        - ``.legend`` file containing legend values for models to plot (not
          generated if models were not run for the performance measures we are
          generating graphs for).

        """

        csv_ipath = os.path.join(cmdopts['batch_output_root'],
                                 cmdopts['batch_stat_collate_root'],
                                 src_stem + ".csv")
        stddev_ipath = os.path.join(cmdopts['batch_output_root'],
                                    cmdopts['batch_stat_collate_root'],
                                    src_stem + ".stddev")

        model_ipath_stem = os.path.join(cmdopts['batch_model_root'], src_stem)
        model_opath_stem = os.path.join(self.sc_model_root,
                                        dest_stem + "-" + self.controller)

        opath_stem = os.path.join(self.sc_csv_root,
                                  dest_stem + "-" + self.controller)

        # Some experiments might not generate the necessary performance measure
        # .csvs for graph generation, which is OK.
        if not utils.path_exists(csv_ipath):
            self.logger.warning("%s missing for controller %s", csv_ipath,
                                self.controller)
            return

        # Collect performance measure results. Append to existing dataframe if
        # it exists, otherwise start a new one.
        data_df = self._accum_df(csv_ipath, opath_stem + '.csv', src_stem)
        storage.DataFrameWriter('storage.csv')(data_df,
                                               opath_stem + '.csv',
                                               index=False)

        # Collect performance results stddev. Append to existing dataframe if it
        # exists, otherwise start a new one.
        stddev_df = self._accum_df(stddev_ipath, opath_stem + '.stddev',
                                   src_stem)
        if stddev_df is not None:
            storage.DataFrameWriter('storage.csv')(stddev_df,
                                                   opath_stem + '.stddev',
                                                   index=False)

        # Collect performance results models and legends. Append to existing
        # dataframes if they exist, otherwise start new ones.
        model_df = self._accum_df(model_ipath_stem + '.model',
                                  model_opath_stem + '.model', src_stem)
        if model_df is not None:
            storage.DataFrameWriter('storage.csv')(model_df,
                                                   model_opath_stem + '.model',
                                                   index=False)
            with open(model_opath_stem + '.legend', 'a') as f:
                _, scenario, _ = rdg.parse_batch_leaf(batch_leaf)
                sgp = pm.module_load_tiered(
                    project=cmdopts['project'],
                    path='generators.scenario_generator_parser')
                kw = sgp.ScenarioGeneratorParser().to_dict(scenario)
                f.write("{0} Prediction\n".format(kw['scenario_tag']))