예제 #1
0
    def show(self, short: bool = False, keys_limit: int = 10):
        from arche.report import Report

        Report.write_summary(self)
        Report.write_rule_details(self, short=short, keys_limit=keys_limit)
        for f in self.figures:
            pio.show(f)
예제 #2
0
    def show(self, short: bool = False, keys_limit: int = 10):
        from arche.report import Report

        IPython.display.clear_output()
        Report.write_summary(self)
        Report.write_rule_details(self, short=short, keys_limit=keys_limit)
        for f in self.figures:
            f.show()
예제 #3
0
def test_write_detailed_errors(mocker, errors, short, keys_limit,
                               expected_messages):
    mocker.patch("pandas.Series.sample",
                 return_value=pd.Series("5"),
                 autospec=True)
    md_mock = mocker.patch("arche.report.display_markdown", autospec=True)
    Report.write_detailed_errors(errors, short, keys_limit)
    md_mock.assert_has_calls(mocker.call(m) for m in expected_messages)
예제 #4
0
def test_write_detailed_errors(mocker, errors, short, keys_limit,
                               expected_messages):
    mocker.patch("pandas.Series.sample", return_value=["5"], autospec=True)
    html_mock = mocker.patch("arche.report.HTML", autospec=True)
    Report.write_detailed_errors(errors, short, keys_limit)
    calls = []
    for m in expected_messages:
        calls.append(mocker.call(m))
    html_mock.assert_has_calls(calls, any_order=True)
예제 #5
0
def test_report_call(mocker, get_df, capsys, messages, expected_details):
    mocked_display = mocker.patch("arche.report.display_html", autospec=True)

    r = Report()
    for m in messages:
        result = create_result(*m, stats=[get_df])
        r.save(result)
    r()

    report_html = get_report_from_iframe(mocked_display.mock_calls[0][1][0])
    assert report_html.count("Plotly.newPlot") == 2
    assert report_html.count("rule name here - INFO") == 2
    assert report_html.count("other result there - INFO") == 2
예제 #6
0
파일: arche.py 프로젝트: zanachka/arche
 def __init__(
     self,
     source: Union[str, pd.DataFrame, RawItems],
     schema: Optional[SchemaSource] = None,
     target: Optional[Union[str, pd.DataFrame]] = None,
     count: Optional[int] = None,
     start: Union[str, int] = None,
     filters: Optional[api.Filters] = None,
     expand: bool = None,
 ):
     """
     Args:
         source: a data source to validate, accepts job keys, pandas df, lists
         schema: a JSON schema source used to run validation
         target: a data source to compare with
         count: the amount of items to read from start
         start: an item key to start reading from
         filters: Scrapinghub filtering, see
         https://python-scrapinghub.readthedocs.io/en/latest/client/apidocs.html#scrapinghub.client.items.Items # noqa
     """
     if expand:
         maintenance.deprecate(
             "'expand' parameter is deprecated and will be removed in the next 0.3.7"
             " release. See CHANGES.md for more details.",
             gone_in="0.3.7",
         )
     if isinstance(source, str) and target == source:
         raise ValueError(
             "'target' is equal to 'source'. Data to compare should have different sources."
         )
     if isinstance(source, pd.DataFrame):
         logger.warning(
             "Pandas stores `NA` (missing) data differently, "
             "which might affect schema validation. "
             "Should you care, consider passing raw data in array-like types.\n"
             "For more details, see https://pandas.pydata.org/pandas-docs/"
             "stable/user_guide/gotchas.html#nan-integer-na-values-and-na-type-promotions"
         )
     self.source = source
     self._schema = None
     self.schema_source = None
     if schema:
         self.schema = schema
     self.target = target
     self.start = start
     self.count = count
     self.filters = filters
     self._source_items = None
     self._target_items = None
     self.report = Report()
예제 #7
0
    def __init__(
        self,
        source: Union[str, pd.DataFrame, RawItems],
        schema: Optional[sr.SchemaSource] = None,
        target: Optional[Union[str, pd.DataFrame]] = None,
        start: int = 0,
        count: Optional[int] = None,
        filters: Optional[api.Filters] = None,
        expand: bool = True,
    ):
        """
        Args:
            source: a data source to validate, accepts job keys, pandas df, lists
            schema: a JSON schema source used to run validation
            target: a data source to compare with
            start: an item number to start reading from
            count: the amount of items to read from start
            filters: Scrapinghub filtering, see
            https://python-scrapinghub.readthedocs.io/en/latest/client/apidocs.html#scrapinghub.client.items.Items # noqa
            expand: if True, use flattened data in garbage rules, affects performance
            see flatten_df
        """
        if isinstance(source, str) and target == source:
            raise ValueError(
                "'target' is equal to 'source'. Data to compare should have different sources."
            )
        if isinstance(source, pd.DataFrame):
            logging.warning(
                "Pandas stores `NA` (missing) data differently, "
                "which might affect schema validation. "
                "Should you care, consider passing raw data in array-like types.\n"
                "For more details, see https://pandas.pydata.org/pandas-docs/"
                "stable/user_guide/gotchas.html#nan-integer-na-values-and-na-type-promotions"
            )
        self.source = source
        self._schema = None
        self.schema_source = None
        if schema:
            self.schema = sr.get_schema(schema)
        self.target = target
        self.start = start
        self.count = count
        self.filters = filters
        self.expand = expand
        self._source_items = None
        self._target_items = None

        self.report = Report()
예제 #8
0
def test_sample_keys(mocker, keys, limit, expected_sample):
    mocker.patch(
        "pandas.Series.sample",
        return_value=[f"{SH_URL}/112358/13/21/item/5"],
        autospec=True,
    )
    assert Report().sample_keys(keys, limit) == expected_sample
예제 #9
0
def test_report_call_arguments(mocker):
    message = {Level.INFO: [("summary", "very detailed message")]}

    mocked_display = mocker.patch("arche.report.display_html", autospec=True)
    outcome = create_result("rule name here", message)

    Report()(outcome)
    report_html = get_report_from_iframe(mocked_display.mock_calls[0][1][0])
    assert report_html.count("very detailed message") == 1
예제 #10
0
    def __init__(
        self,
        source: str,
        schema: Optional[SchemaSource] = None,
        target: Optional[str] = None,
        start: int = 0,
        count: Optional[int] = None,
        filters: Optional[api.Filters] = None,
        expand: bool = True,
    ):
        """
        Args:
            source: a data source to validate. Supports job or collection keys
            schema: a JSON schema source used to run validation
            target: a data source to compare with
            start: an item number to start reading from
            count: the amount of items to read from start
            filters: Scrapinghub filtering
            expand: if enabled, use flattened data in garbage rules, affects performance, see flatten_df # noqa
        """
        self.source = source
        if target == self.source:
            logger.warning(
                "'target' is the same as 'source', and will be ignored")
            self.target = None
        else:
            self.target = target
        self.start = start
        self.count = count
        self.filters = filters
        self.expand = expand
        self.schema_source = schema
        if schema:
            self._schema = get_schema(schema)
        else:
            self._schema = None
        self._source_items = None
        self._target_items = None

        self.report = Report()
예제 #11
0
def test_write_details(mocker, get_df, capsys, messages, expected_details):
    mock_pio_show = mocker.patch("plotly.io.show", autospec=True)
    r = Report()
    for m in messages:
        result = create_result(*m, stats=[get_df])
        r.save(result)
    r.write_details()
    mock_pio_show.assert_called_with(result.figures[0])
    assert capsys.readouterr().out == expected_details
예제 #12
0
def test_write_details(mocker, get_df, capsys, messages, expected_details):
    mock_pio_show = mocker.patch("plotly.io.show", autospec=True)
    md_mock = mocker.patch("arche.report.display_markdown", autospec=True)

    r = Report()
    for m in messages:
        result = create_result(*m, stats=[get_df])
        r.save(result)
    r.write_details()
    mock_pio_show.assert_called_with(result.figures[0])
    calls = [mocker.call(e) for e in expected_details]
    md_mock.assert_has_calls(calls, any_order=True)
예제 #13
0
def test_dqr_empty_report(mocker, get_job_items, get_schema):
    mocker.patch(
        "arche.data_quality_report.DataQualityReport.plot_to_notebook",
        autospec=True)
    mocker.patch(
        "arche.tools.api.get_response_status_count",
        return_value=(10, 0, 0, 0),
        autospec=True,
    )
    mocker.patch("arche.tools.api.get_runtime_s",
                 return_value=60,
                 autospec=True)
    mocker.patch("arche.tools.api.get_items_count",
                 return_value=1000,
                 autospec=True)
    mocker.patch("arche.tools.api.get_requests_count",
                 return_value=1000,
                 autospec=True)
    dqr = DataQualityReport(items=get_job_items,
                            schema=Schema(get_schema),
                            report=Report())
    assert len(dqr.figures) == 4
예제 #14
0
파일: arche.py 프로젝트: zanachka/arche
class Arche:
    def __init__(
        self,
        source: Union[str, pd.DataFrame, RawItems],
        schema: Optional[SchemaSource] = None,
        target: Optional[Union[str, pd.DataFrame]] = None,
        count: Optional[int] = None,
        start: Union[str, int] = None,
        filters: Optional[api.Filters] = None,
        expand: bool = None,
    ):
        """
        Args:
            source: a data source to validate, accepts job keys, pandas df, lists
            schema: a JSON schema source used to run validation
            target: a data source to compare with
            count: the amount of items to read from start
            start: an item key to start reading from
            filters: Scrapinghub filtering, see
            https://python-scrapinghub.readthedocs.io/en/latest/client/apidocs.html#scrapinghub.client.items.Items # noqa
        """
        if expand:
            maintenance.deprecate(
                "'expand' parameter is deprecated and will be removed in the next 0.3.7"
                " release. See CHANGES.md for more details.",
                gone_in="0.3.7",
            )
        if isinstance(source, str) and target == source:
            raise ValueError(
                "'target' is equal to 'source'. Data to compare should have different sources."
            )
        if isinstance(source, pd.DataFrame):
            logger.warning(
                "Pandas stores `NA` (missing) data differently, "
                "which might affect schema validation. "
                "Should you care, consider passing raw data in array-like types.\n"
                "For more details, see https://pandas.pydata.org/pandas-docs/"
                "stable/user_guide/gotchas.html#nan-integer-na-values-and-na-type-promotions"
            )
        self.source = source
        self._schema = None
        self.schema_source = None
        if schema:
            self.schema = schema
        self.target = target
        self.start = start
        self.count = count
        self.filters = filters
        self._source_items = None
        self._target_items = None
        self.report = Report()

    @property
    def source_items(self):
        if not self._source_items:
            self._source_items = self.get_items(self.source, self.count,
                                                self.start, self.filters)
        return self._source_items

    @property
    def target_items(self):
        if self.target is None:
            return None
        if not self._target_items:
            self._target_items = self.get_items(self.target, self.count,
                                                self.start, self.filters)
        return self._target_items

    @property
    def schema(self):
        if not self._schema and self.schema_source:
            self._schema = Schema(self.schema_source)
        return self._schema

    @schema.setter
    def schema(self, schema_source):
        self.schema_source = schema_source
        self._schema = Schema(schema_source)

    @staticmethod
    def get_items(
        source: Union[str, pd.DataFrame, RawItems],
        count: Optional[int],
        start: Optional[str],
        filters: Optional[api.Filters],
    ) -> Items:
        if isinstance(source, pd.DataFrame):
            return Items.from_df(source)
        elif isinstance(source, Iterable) and not isinstance(source, str):
            return Items.from_array(cast(RawItems, source))
        elif helpers.is_job_key(source):
            return JobItems(source, count, int(start or 0), filters)
        elif helpers.is_collection_key(source):
            return CollectionItems(source, count, start, filters)
        else:
            raise ValueError(
                f"'{source}' is not a valid job or collection key")

    def save_result(self, rule_result):
        self.report.save(rule_result)

    def report_all(self,
                   short: bool = False,
                   uniques: List[Union[str, List[str]]] = None) -> None:
        """Report on all included rules.

        Args:
            uniques: see `arche.rules.duplicates.find_by`
        """
        if uniques:
            self.uniques = uniques
        self.run_all_rules()
        IPython.display.clear_output()
        self.report(keys_limit=10 if short else None)

    def run_all_rules(self) -> None:
        if isinstance(self.source_items, JobItems):
            self.check_metadata(self.source_items.job)
            if self.target_items:
                self.compare_metadata(self.source_items.job,
                                      self.target_items.job)
        self.run_general_rules()
        self.run_comparison_rules()
        self.run_schema_rules()

    def data_quality_report(self, bucket: Optional[str] = None):
        if helpers.is_collection_key(str(self.source)):
            raise ValueError("Collections are not supported")
        if not self.schema:
            raise ValueError("Schema is empty")
        IPython.display.clear_output()
        DataQualityReport(self.source_items, self.schema, self.report, bucket)

    def run_general_rules(self):
        self.save_result(garbage_symbols(self.source_items.df))
        df = self.source_items.df
        self.save_result(
            coverage_rules.check_fields_coverage(
                df.drop(columns=df.columns[df.columns.str.startswith("_")])))
        self.save_result(category_rules.get_categories(df))
        if getattr(self, "uniques", None):
            self.save_result(
                duplicate_rules.find_by(self.source_items.df, self.uniques))

    def validate_with_json_schema(self) -> None:
        """Run JSON schema check and output results. It will try to find all errors, but
        there are no guarantees. Slower than `check_with_json_schema()`
        """
        res = schema_rules.validate(self.schema.raw, self.source_items.raw,
                                    self.source_items.df.index)
        self.save_result(res)
        self.report(res)

    def glance(self) -> None:
        """Run JSON schema check and output results. In most cases it will return
        only the first error per item. Usable for big jobs as it's about 100x faster than
        `validate_with_json_schema()`.
        """
        res = schema_rules.validate(
            self.schema.raw,
            self.source_items.raw,
            self.source_items.df.index,
            fast=True,
        )
        self.save_result(res)
        res.show()

    def run_schema_rules(self) -> None:
        if not self.schema:
            return
        self.save_result(
            schema_rules.validate(self.schema.raw, self.source_items.raw,
                                  self.source_items.df.index))

        target_columns = (self.target_items.df.columns.values
                          if self.target_items else None)

        check_tags_result = schema_rules.check_tags(
            self.source_items.df.columns.values, target_columns,
            self.schema.tags)
        self.save_result(check_tags_result)
        if check_tags_result.errors:
            return

        self.run_customized_rules(self.source_items, self.schema.tags)
        self.compare_with_customized_rules(self.source_items,
                                           self.target_items, self.schema.tags)

    def run_customized_rules(self, items, tagged_fields):
        self.save_result(price_rules.compare_was_now(items.df, tagged_fields))
        self.save_result(duplicate_rules.find_by_tags(items.df, tagged_fields))
        self.save_result(
            category_rules.get_coverage_per_category(
                items.df,
                tagged_fields.get("category", []) + self.schema.enums))

    @lru_cache(maxsize=32)
    def check_metadata(self, job):
        self.save_result(metadata_rules.check_outcome(job))
        self.save_result(metadata_rules.check_errors(job))

    @lru_cache(maxsize=32)
    def compare_metadata(self, source_job, target_job):
        self.save_result(
            metadata_rules.compare_spider_names(source_job, target_job))
        self.save_result(
            metadata_rules.compare_number_of_scraped_items(
                source_job, target_job))
        self.save_result(coverage_rules.get_difference(source_job, target_job))
        self.save_result(
            metadata_rules.compare_response_ratio(source_job, target_job))
        self.save_result(metadata_rules.compare_runtime(
            source_job, target_job))
        self.save_result(
            metadata_rules.compare_finish_time(source_job, target_job))

    @lru_cache(maxsize=32)
    def run_comparison_rules(self):
        if not self.target_items:
            return
        for r in [
                coverage_rules.compare_scraped_fields, compare_boolean_fields
        ]:
            self.save_result(r(self.source_items.df, self.target_items.df))

    def compare_with_customized_rules(self, source_items, target_items,
                                      tagged_fields):
        if not target_items:
            return
        self.save_result(
            category_rules.get_difference(
                source_items.df,
                target_items.df,
                tagged_fields.get("category", []) + self.schema.enums,
            ))
        for r in [
                price_rules.compare_prices_for_same_urls,
                price_rules.compare_names_for_same_urls,
                price_rules.compare_prices_for_same_names,
        ]:
            self.save_result(r(source_items.df, target_items.df,
                               tagged_fields))
        self.save_result(
            compare.tagged_fields(
                source_items.df,
                target_items.df,
                tagged_fields,
                ["product_url_field", "name_field"],
            ))
예제 #15
0
def test_wipe():
    r = Report()
    r.save(create_result("dummy", {Level.INFO: [("outcome",)]}))
    r.wipe()
    assert r.results == {}
예제 #16
0
class Arche:
    def __init__(
        self,
        source: str,
        schema: Optional[sr.SchemaSource] = None,
        target: Optional[str] = None,
        start: int = 0,
        count: Optional[int] = None,
        filters: Optional[api.Filters] = None,
        expand: bool = True,
    ):
        """
        Args:
            source: a data source to validate. Supports job or collection keys
            schema: a JSON schema source used to run validation
            target: a data source to compare with
            start: an item number to start reading from
            count: the amount of items to read from start
            filters: Scrapinghub filtering
            expand: if enabled, use flattened data in garbage rules, affects performance, see flatten_df # noqa
        """
        self.source = source
        if target == self.source:
            logger.warning("'target' is the same as 'source', and will be ignored")
            self.target = None
        else:
            self.target = target
        self.start = start
        self.count = count
        self.filters = filters
        self.expand = expand
        self.schema_source = None
        self._schema = None
        if schema:
            self.schema = sr.get_schema(schema)
        self._source_items = None
        self._target_items = None

        self.report = Report()

    @property
    def source_items(self):
        if not self._source_items:
            self._source_items = self.get_items(
                self.source, self.start, self.count, self.filters, self.expand
            )
        return self._source_items

    @property
    def target_items(self):
        if not self.target:
            return None
        if not self._target_items:
            self._target_items = self.get_items(
                self.target, self.start, self.count, self.filters, self.expand
            )
        return self._target_items

    @property
    def schema(self):
        if not self._schema and self.schema_source:
            self._schema = sr.get_schema(self.schema_source)
        return self._schema

    @schema.setter
    def schema(self, schema_source):
        self.schema_source = schema_source
        self._schema = sr.get_schema(schema_source)

    @staticmethod
    def get_items(
        source: str,
        start: int,
        count: Optional[int],
        filters: Optional[api.Filters],
        expand: bool,
    ) -> Union[JobItems, CollectionItems]:
        if helpers.is_job_key(source):
            return JobItems(
                key=source, start=start, count=count, filters=filters, expand=expand
            )
        elif helpers.is_collection_key(source):
            if start:
                raise ValueError("Collections API does not support 'start' parameter")
            return CollectionItems(
                key=source, count=count, filters=filters, expand=expand
            )
        else:
            raise ValueError(f"'{source}' is not a valid job or collection key")

    def save_result(self, rule_result):
        self.report.save(rule_result)

    def basic_json_schema(self, items_numbers: List[int] = None):
        """Prints a json schema based on data from `self.source`

        Args:
            items_numbers: array of item numbers to create a schema from
        """
        maintenance.deprecate(
            "'Arche.basic_json_schema()' was deprecated in 2019.03.25 and "
            "will be removed in 2019.04.22.",
            replacement="Use 'basic_json_schema()' instead",
            gone_in="0.4.0",
        )
        schema.basic_json_schema(self.source, items_numbers)

    def report_all(self):
        self.run_all_rules()
        self.report.write_summaries()
        self.report.write("\n" * 2)
        self.report.write_details(short=True)

    def run_all_rules(self):
        if helpers.is_job_key(self.source_items.key):
            self.check_metadata(self.source_items.job)
            if self.target_items:
                self.compare_metadata(self.source_items.job, self.target_items.job)
        self.run_general_rules()
        self.run_comparison_rules()
        self.run_schema_rules()

    def data_quality_report(self, bucket: Optional[str] = None):
        if helpers.is_collection_key(self.source):
            raise ValueError("Collections are not supported")
        if not self.schema:
            raise ValueError("Schema is empty")
        if not self.report.results:
            self.save_result(
                schema_rules.validate(
                    self.schema, items_dicts=self.source_items.dicts, fast=False
                )
            )

        DataQualityReport(self.source_items, self.schema, self.report, bucket)

    @lru_cache(maxsize=32)
    def run_general_rules(self):
        self.save_result(garbage_symbols(self.source_items))
        self.save_result(
            coverage_rules.check_fields_coverage(
                self.source_items.df.drop(columns=["_type", "_key"])
            )
        )

    def validate_with_json_schema(self):
        """Run JSON schema check and output results. It will try to find all errors, but
        there are no guarantees. Slower than `check_with_json_schema()`
        """
        res = schema_rules.validate(
            self.schema, items_dicts=self.source_items.dicts, fast=False
        )
        self.save_result(res)
        res.show()

    def glance(self):
        """Run JSON schema check and output results. In most cases it will stop after
        the first error per item. Usable for big jobs as it's about 100x faster than
        `validate_with_json_schema()`.
        """
        res = schema_rules.validate(
            self.schema, items_dicts=self.source_items.dicts, fast=True
        )
        self.save_result(res)
        res.show()

    def run_schema_rules(self):
        if not self.schema:
            return

        self.save_result(schema_rules.validate(self.schema, self.source_items.dicts))

        tagged_fields = sr.Tags().get(self.schema)
        target_columns = (
            self.target_items.df.columns.values if self.target_items else None
        )

        check_tags_result = schema_rules.check_tags(
            self.source_items.df.columns.values, target_columns, tagged_fields
        )
        self.save_result(check_tags_result)
        if check_tags_result.errors:
            return

        self.run_customized_rules(self.source_items, tagged_fields)
        self.compare_with_customized_rules(
            self.source_items, self.target_items, tagged_fields
        )

    def run_customized_rules(self, items, tagged_fields):
        self.save_result(price_rules.compare_was_now(items.df, tagged_fields))
        self.save_result(duplicate_rules.check_uniqueness(items.df, tagged_fields))
        self.save_result(duplicate_rules.check_items(items.df, tagged_fields))
        self.save_result(
            category_rules.get_coverage_per_category(
                items.df, tagged_fields.get("category", [])
            )
        )

    @lru_cache(maxsize=32)
    def check_metadata(self, job):
        self.save_result(metadata_rules.check_outcome(job))
        self.save_result(metadata_rules.check_errors(job))
        self.save_result(metadata_rules.check_response_ratio(job))

    @lru_cache(maxsize=32)
    def compare_metadata(self, source_job, target_job):
        self.save_result(metadata_rules.compare_spider_names(source_job, target_job))
        self.save_result(metadata_rules.compare_errors(source_job, target_job))
        self.save_result(
            metadata_rules.compare_number_of_scraped_items(source_job, target_job)
        )
        self.save_result(coverage_rules.get_difference(source_job, target_job))
        self.save_result(metadata_rules.compare_response_ratio(source_job, target_job))
        self.save_result(metadata_rules.compare_runtime(source_job, target_job))
        self.save_result(metadata_rules.compare_finish_time(source_job, target_job))

    @lru_cache(maxsize=32)
    def run_comparison_rules(self):
        if not self.target_items:
            return
        self.save_result(
            coverage_rules.compare_scraped_fields(
                self.source_items.df, self.target_items.df
            )
        )
        self.save_result(
            compare_boolean_fields(self.source_items.df, self.target_items.df)
        )

    def compare_with_customized_rules(self, source_items, target_items, tagged_fields):
        if not target_items:
            return
        self.save_result(
            category_rules.get_difference(
                source_items.key,
                target_items.key,
                source_items.df,
                target_items.df,
                tagged_fields.get("category", []),
            )
        )
        self.save_result(
            price_rules.compare_prices_for_same_urls(
                source_items.df, target_items.df, tagged_fields
            )
        )
        self.save_result(
            price_rules.compare_names_for_same_urls(
                source_items.df, target_items.df, tagged_fields
            )
        )
        self.save_result(
            price_rules.compare_prices_for_same_names(
                source_items.df, target_items.df, tagged_fields
            )
        )
예제 #17
0
def test_write_details(capsys, messages, expected_details):
    r = Report()
    for m in messages:
        r.save(create_result(*m))
    r.write_details()
    assert capsys.readouterr().out == expected_details
예제 #18
0
def test_write_rule_details(capsys, message, expected_details):
    outcome = create_result("rule name here", message)
    Report.write_rule_details(outcome)
    assert capsys.readouterr().out == expected_details
예제 #19
0
def test_save():
    r = Report()
    dummy_result = create_result("dummy", {Level.INFO: [("outcome", )]})
    r.save(dummy_result)
    assert r.results == {dummy_result.name: dummy_result}
예제 #20
0
def test_sample_keys(mocker, keys, limit, sample_mock, expected_sample):
    mocker.patch("pandas.Series.sample",
                 return_value=sample_mock,
                 autospec=True)
    assert Report.sample_keys(keys, limit) == expected_sample
예제 #21
0
    def show(self, short: bool = False, keys_limit: int = 10):
        from arche.report import Report

        IPython.display.clear_output()
        Report()(self, keys_limit if short else None)
예제 #22
0
def test_write_none_rule_details(capsys):
    outcome = create_result("rule name here", {Level.INFO: [("summary", )]})
    Report.write_rule_details(outcome)
    assert not capsys.readouterr().out
예제 #23
0
def test_write_rule_details(capsys, message, expected_details):
    outcome = create_result("rule name here", message)
    Report.write_rule_details(outcome)
    assert capsys.readouterr(
    ).out == f"{{'text/markdown': '{expected_details}'}}\n"