def parse_page(page, url, dump=False): """ Parse the page using all parsing methods available. :param str page: the content of the fatality page :param str url: detail page URL :return: a dictionary representing a fatality. :rtype: dict """ report = model.Report(case='19-123456') # Parse the twitter fields. twitter_report, twitter_err = twitter.parse(page) report.update(twitter_report) # Parse the page. article_report, artricle_err = article.parse_content(page) report.update(article_report) if twitter_err or artricle_err: # pragma: no cover twitter_err_str = f'\nTwitter fields:\n\t * ' + "\n\t * ".join( twitter_err) if twitter_err else '' article_err_str = f'\nArticle fields:\n\t * ' + "\n\t * ".join( artricle_err) if artricle_err else '' logger.debug( f'Errors while parsing {url}:{twitter_err_str}{article_err_str}') # Dump the file. if dump: dumpr_dir = Path(constant.DUMP_DIR) dumpr_dir.mkdir(parents=True, exist_ok=True) dump_file_name = url.split('/')[-1] dump_file = dumpr_dir / dump_file_name dump_file.write_text(page) return report
def test_parse_page_content_02(self, page, expected, errors): """Ensure special cases are handled.""" actual, err = article.parse_content(page) if errors or err: assert errors == len(err) else: assert actual == expected
def test_parse_page_content_00(self, page, expected, errors): """Ensure location information is properly extracted from the page.""" p = load_test_page(page) actual, err = article.parse_content(p) if errors or err: assert errors == len(err) else: assert actual == expected
def test_dumped_page(page_dump): """ Helper test to allow debugging offline. Run the following command: `pytest -s -n0 -x -vvv -m dump` """ try: page = load_dumped_page(page_dump) except FileNotFoundError: raise FileNotFoundError( f'Dump file "{page_dump}" not found: run "scrapd --dump" first.') else: twitter_report, twitter_err = twitter.parse(page) assert not twitter_err article_report, artricle_err = article.parse_content(page) assert not artricle_err
def test_parse_page_content_03(self, page, expected, errors): """Ensure special cases are handled.""" with pytest.raises(ValueError, match='a date is mandatory'): actual, err = article.parse_content(page)
def test_parse_page_content_01(self): """Ensure a missing case number raises an exception.""" with pytest.raises(ValueError): article.parse_content('There is no case number here.')