async def fetch_and_parse(session, url): """ Parse a fatality page from a URL. :param aiohttp.ClientSession session: aiohttp session :param str url: detail page URL :return: a dictionary representing a fatality. :rtype: dict """ # Retrieve the page. page = await fetch_detail_page(session, url) if not page: raise ValueError(f'The URL {url} returned a 0-length content.') # Parse it. deceased_people = parsing.parse_page(page, url) entries = [] person_index = 0 for d in deceased_people: # Add the link. d[Fields.LINK] = url # Add a unique ID d[Fields.ID] = f"{d[Fields.CASE]}-{person_index}" person_index += 1 entries.append(d) if not entries: raise ValueError(f'No data could be extracted from the page {url}.') return entries
def test_parse_page_00(filename, expected): """Ensure information are properly extracted from the page. Don't compare notes if parsed from details page.""" page_fd = TEST_DATA_DIR / filename page = page_fd.read_text() actual = next(parsing.parse_page(page, fake.uri())) if 'Notes' in actual and 'Notes' not in expected: del actual['Notes'] assert actual == expected
def test_no_DOB_field_when_DOB_not_provided(): """ Test that "Hispanic male, 19 years of age" does not generate a DOB field. """ page_fd = TEST_DATA_DIR / 'traffic-fatality-20-4' page = page_fd.read_text() parsed_content = next(parsing.parse_page(page, 'fake_url')) assert not parsed_content.get(Fields.DOB)
def test_parse_page_01(mocker, filename, expected): """Ensuri ng .""" data = {} parsing_errors = ['one error'] page_fd = TEST_DATA_DIR / filename page = page_fd.read_text() pc = mocker.patch('scrapd.core.parsing.parse_page_content', return_value=(data, parsing_errors)) _ = parsing.parse_page(page, fake.uri()) assert pc.called_once
def test_multiple_deceased(filename, expected): """ Ensure that the second record yielded by parsing.parse_page is the second deceased person from a collision. """ page_text = load_test_page(filename) content_parser = parsing.parse_page(page_text, 'fake_url') _ = next(content_parser) second_person = next(content_parser) for key in expected: assert second_person[key] == expected[key]
def test_parse_page_get_location(filename, expected): """Ensure location information is properly extracted from the page.""" page_fd = TEST_DATA_DIR / filename page = page_fd.read_text() actual = parsing.parse_page(page, fake.uri()) assert next(actual)['Location'] == expected
def test_parse_page_with_missing_data(): records = parsing.parse_page("Case: 19-1234567", fake.uri()) with pytest.raises(StopIteration): next(records)