Beispiel #1
0
def _updated_jb_config(report_config):
    """Override default jupyter-book options.

    report_config: PRESC config options for the report

    Returns the updated JB config file as a YAML-formatted string that can be
    written to a _config.yml.
    """
    with open(REPORT_SOURCE_PATH / JB_CONFIG_FILENAME) as f:
        jb_config = yaml.load(f, Loader=yaml.FullLoader)

    jb_config["title"] = report_config["title"].get()
    jb_config["author"] = report_config["author"].get()

    # Add any page exclusions
    # First compile the overall list of report pages from the TOC.
    with open(REPORT_SOURCE_PATH / JB_TOC_FILENAME) as f:
        toc_str = f.read()

    stripped_lines = [x.strip() for x in toc_str.split("\n")]
    all_pages = [x[8:] for x in stripped_lines if x.startswith("- file: ")]
    incl_pages = include_exclude_list(
        all_pages,
        report_config["evaluations_include"].get(),
        report_config["evaluations_exclude"].get(),
    )
    if "landing" not in incl_pages:
        incl_pages.append("landing")

    to_exclude = [f"{p}.ipynb" for p in all_pages if p not in incl_pages]
    if to_exclude:
        jb_config["exclude_patterns"] = to_exclude

    return yaml.dump(jb_config)
Beispiel #2
0
    def display(self, colnames=None):
        """Computes and displays the conditional metric result for each specified column.

        Parameters
        ----------
        colnames : list of str
            A list of column names to run the evaluation over, creating a plot
            for each. If not supplied, defaults to columns specifed in the config.
        metric_name : str
            Display name identifying the metric to show on the plot
        """
        eval_config = self._config["evaluations"]["conditional_metric"]
        if colnames:
            incl = colnames
            excl = None
        else:
            incl = eval_config["columns_include"].get()
            excl = eval_config["columns_exclude"].get()
        cols = include_exclude_list(self._test_dataset.column_names,
                                    included=incl,
                                    excluded=excl)

        for colname in cols:
            metrics = _get_metrics_for_column(colname=colname,
                                              eval_config=eval_config)
            for metric in metrics:
                function = metric.get("function")
                display_name = metric.get("display_name")
                eval_result = self.compute_for_column(colname, metric=function)
                eval_result.display_result(xlab=colname, ylab=display_name)
Beispiel #3
0
def test_include_exclude():
    vals = ["a", "b", "c", "d", "e"]

    assert include_exclude_list(vals) == vals
    assert include_exclude_list(vals, included=vals) == vals
    assert include_exclude_list(vals, included=None) == []
    assert include_exclude_list(vals, excluded="*") == []
    assert include_exclude_list(vals, excluded=vals) == []

    assert include_exclude_list(vals, included=["c", "b", "e",
                                                "y"]) == ["c", "b", "e"]
    assert include_exclude_list(vals, excluded=["a", "e",
                                                "z"]) == ["b", "c", "d"]
    assert include_exclude_list(vals,
                                included=["c", "b", "e", "y"],
                                excluded=["e"]) == ["c", "b"]
    assert include_exclude_list(vals,
                                included=["c", "b", "e", "y"],
                                excluded="*") == []
    assert include_exclude_list(vals, included=None, excluded=["a"]) == []
Beispiel #4
0
    def compute(self, **kwargs):
        """Compute the evaluation for the given datasets.

        Parameters
        ----------
        kwargs:
            On-the-fly overrides to the config option values for the computation.

        Returns
        -------
        SpatialDistributionResult
        """
        eval_config = PrescConfig(self._config)
        eval_config = eval_config["evaluations"]["spatial_distribution"]
        if kwargs:
            eval_config.set(kwargs)

        # Feature columns to include in the distance computation.
        feats_incl = eval_config["features_include"].get()
        feats_excl = eval_config["features_exclude"].get()
        feats = include_exclude_list(
            self._test_dataset.feature_names, included=feats_incl, excluded=feats_excl
        )
        num_feats = []
        categ_feats = []
        for col in feats:
            if is_discrete(self._test_dataset.features[col]):
                categ_feats.append(col)
            else:
                num_feats.append(col)

        # Figure the metric to use for each feature.
        dist_metrics_num, dist_metrics_categ = _get_distance_metrics_by_column(
            num_feats, categ_feats, eval_config
        )

        return compute_spatial_distribution(
            test_features=self._test_dataset.features,
            test_labs_true=self._test_dataset.labels,
            test_labs_pred=self._test_pred,
            base_features=self._train_dataset.features,
            base_labs=self._train_dataset.labels,
            numerical_dist_metric=dist_metrics_num,
            categorical_dist_metric=dist_metrics_categ,
            summary=eval_config["summary_agg"].get(),
        )
    def display(self, colnames=None):
        """Computes and displays the conditional distribution result for each
        specified column.

        Parameters
        ----------
        colnames : list of str
            A list of column names to run the evaluation over, creating a plot
            for each. If not supplied, defaults to columns specifed in the config.
        """
        if colnames:
            incl = colnames
            excl = None
        else:
            eval_config = self._config["evaluations"]["conditional_distribution"]
            incl = eval_config["columns_include"].get()
            excl = eval_config["columns_exclude"].get()
        cols = include_exclude_list(
            self._test_dataset.column_names, included=incl, excluded=excl
        )

        for colname in cols:
            eval_result = self.compute_for_column(colname)
            eval_result.display_result(xlab=colname)