async def test_filter(self, m_aioresponses) -> None: entry_url = 'https://en.wikipedia.org/wiki/Amsterdam' links = [ Link(entry_url), # Add a link to Wikipedia, but using a locale that's not used by the site, to test it's ignored. Link('https://nl.wikipedia.org/wiki/Amsterdam'), # Add a link that doesn't point to Wikipedia at all to test it's ignored. Link('https://example.com'), ] api_url = 'https://en.wikipedia.org/w/api.php?action=query&titles=Amsterdam&prop=extracts&exintro&format=json&formatversion=2' title = 'Amstelredam' extract = 'De hoofdstad van Nederland.' api_response_body = { 'query': { 'pages': [ { 'title': title, 'extract': extract, }, ], } } m_aioresponses.get(api_url, payload=api_response_body) with TemporaryDirectory() as output_directory_path: with TemporaryDirectory() as cache_directory_path: configuration = Configuration(output_directory_path, 'https://ancestry.example.com') configuration.cache_directory_path = cache_directory_path configuration.plugins[Wikipedia] = None async with Site(configuration) as site: actual = await site.jinja2_environment.from_string( '{% for entry in (links | wikipedia) %}{{ entry.content }}{% endfor %}' ).render_async(links=links) self.assertEquals(extract, actual)
async def test_populate_should_populate_existing_link(self, m_retriever) -> None: entry_language = 'en' entry_name = 'Amsterdam' entry_title = 'Amsterdam' entry_content = 'Capitol of the Netherlands' entry = Entry(entry_language, entry_name, entry_title, entry_content) m_retriever.get_entry.return_value = entry resource = IdentifiableSource('the_source', 'The Source') link = Link('https://en.wikipedia.org/wiki/Amsterdam') resource.links.add(link) with TemporaryDirectory() as output_directory_path: with TemporaryDirectory() as cache_directory_path: configuration = Configuration(output_directory_path, 'https://example.com') configuration.cache_directory_path = cache_directory_path async with Site(configuration) as site: site.ancestry.sources[resource.id] = resource sut = _Populator(site, m_retriever) await sut.populate() m_retriever.get_entry.assert_called_once_with(entry_language, entry_name) self.assertEqual(1, len(resource.links)) self.assertEqual('Amsterdam', link.label) self.assertEqual('en', link.locale) self.assertEqual(MediaType('text/html'), link.media_type) self.assertIsNotNone(link.description) self.assertEqual('external', link.relationship)
async def test_populate_should_ignore_resource_without_links(self, m_retriever) -> None: resource = Source('the_source', 'The Source') with TemporaryDirectory() as output_directory_path: with TemporaryDirectory() as cache_directory_path: configuration = Configuration( output_directory_path, 'https://example.com') configuration.cache_directory_path = cache_directory_path async with App(configuration) as app: app.ancestry.entities.append(resource) sut = _Populator(app, m_retriever) await sut.populate() self.assertSetEqual(set(), resource.links)
async def test_populate_link_should_set_locale(self, expected: str, entry_language: str, locale: Optional[str], m_retriever) -> None: link = Link('http://%s.wikipedia.org/wiki/Amsterdam' % entry_language) link.locale = locale with TemporaryDirectory() as output_directory_path: with TemporaryDirectory() as cache_directory_path: configuration = Configuration( output_directory_path, 'https://example.com') configuration.cache_directory_path = cache_directory_path async with App(configuration) as app: sut = _Populator(app, m_retriever) await sut.populate_link(link, entry_language) self.assertEqual(expected, link.locale)
async def test_populate_should_ignore_resource_without_link_support(self, m_retriever) -> None: source = Source('The Source') resource = Citation('the_citation', source) with TemporaryDirectory() as output_directory_path: with TemporaryDirectory() as cache_directory_path: configuration = Configuration( output_directory_path, 'https://example.com') configuration.cache_directory_path = cache_directory_path async with App(configuration) as app: app.ancestry.entities.append(resource) sut = _Populator(app, m_retriever) await sut.populate()
async def test_populate_link_should_set_relationship(self, expected: str, relationship: Optional[str], m_retriever) -> None: link = Link('http://en.wikipedia.org/wiki/Amsterdam') link.relationship = relationship with TemporaryDirectory() as output_directory_path: with TemporaryDirectory() as cache_directory_path: configuration = Configuration( output_directory_path, 'https://example.com') configuration.cache_directory_path = cache_directory_path async with App(configuration) as app: sut = _Populator(app, m_retriever) await sut.populate_link(link, 'en') self.assertEqual(expected, link.relationship)
async def test_populate_link_should_convert_http_to_https(self, m_retriever) -> None: link = Link('http://en.wikipedia.org/wiki/Amsterdam') entry_language = 'nl' with TemporaryDirectory() as output_directory_path: with TemporaryDirectory() as cache_directory_path: configuration = Configuration( output_directory_path, 'https://example.com') configuration.cache_directory_path = cache_directory_path async with App(configuration) as app: sut = _Populator(app, m_retriever) await sut.populate_link(link, entry_language) self.assertEqual('https://en.wikipedia.org/wiki/Amsterdam', link.url)
async def test_populate_should_add_translation_links(self, m_retriever) -> None: sut = Populator(m_retriever) entry_language = 'en' entry_name = 'Amsterdam' entry_title = 'Amsterdam' entry_content = 'Capitol of the Netherlands' entry = Entry(entry_language, entry_name, entry_title, entry_content) added_entry_language = 'nl' added_entry_name = 'Amsterdam' added_entry_title = 'Amsterdam' added_entry_content = 'Hoofdstad van Nederland' added_entry = Entry(added_entry_language, added_entry_name, added_entry_title, added_entry_content) m_retriever.get_entry.side_effect = [entry, added_entry] m_retriever.get_translations.return_value = { entry_language: entry_name, added_entry_language: added_entry_name, } resource = IdentifiableSource('the_source', 'The Source') link_en = Link('https://en.wikipedia.org/wiki/Amsterdam') resource.links.add(link_en) ancestry = Ancestry() ancestry.sources[resource.id] = resource with TemporaryDirectory() as output_directory_path: with TemporaryDirectory() as cache_directory_path: configuration = Configuration(output_directory_path, 'https://example.com') configuration.cache_directory_path = cache_directory_path configuration.locales.clear() configuration.locales['en-US'] = LocaleConfiguration( 'en-US', 'en') configuration.locales['nl-NL'] = LocaleConfiguration( 'nl-NL', 'nl') async with Site(configuration) as site: await sut.populate(ancestry, site) m_retriever.get_entry.assert_has_calls([ call(entry_language, entry_name), call(added_entry_language, added_entry_name), ]) m_retriever.get_translations.assert_called_once_with( entry_language, entry_name) self.assertEqual(2, len(resource.links)) link_nl = resource.links.difference({link_en}).pop() self.assertEqual('Amsterdam', link_nl.label) self.assertEqual('nl', link_nl.locale) self.assertEqual('text/html', link_nl.media_type) self.assertIsNotNone(link_nl.description) self.assertEqual('external', link_nl.relationship)
async def test_populate_should_ignore_resource_without_links( self, m_retriever) -> None: resource = IdentifiableSource('the_source', 'The Source') with TemporaryDirectory() as output_directory_path: with TemporaryDirectory() as cache_directory_path: configuration = Configuration(output_directory_path, 'https://example.com') configuration.cache_directory_path = cache_directory_path async with Site(configuration) as site: site.ancestry.sources[resource.id] = resource sut = _Populator(site, m_retriever) await sut.populate() self.assertSetEqual(set(), resource.links)
async def test_populate_should_ignore_resource_without_link_support( self, m_retriever) -> None: source = Source('The Source') resource = IdentifiableCitation('the_citation', source) with TemporaryDirectory() as output_directory_path: with TemporaryDirectory() as cache_directory_path: configuration = Configuration(output_directory_path, 'https://example.com') configuration.cache_directory_path = cache_directory_path async with Site(configuration) as site: site.ancestry.citations[resource.id] = resource sut = _Populator(site, m_retriever) await sut.populate()
async def test_populate_link_should_set_label(self, expected: str, label: Optional[str], m_retriever) -> None: link = Link('http://en.wikipedia.org/wiki/Amsterdam') link.label = label entry = Entry('en', 'The_city_of_Amsterdam', 'The city of Amsterdam', 'Amsterdam, such a lovely place!') with TemporaryDirectory() as output_directory_path: with TemporaryDirectory() as cache_directory_path: configuration = Configuration( output_directory_path, 'https://example.com') configuration.cache_directory_path = cache_directory_path async with App(configuration) as app: sut = _Populator(app, m_retriever) await sut.populate_link(link, 'en', entry) self.assertEqual(expected, link.label)
async def test_populate_link_should_set_description( self, expected: str, description: str, m_retriever) -> None: link = Link('http://en.wikipedia.org/wiki/Amsterdam') link.description = description entry_language = 'en' with TemporaryDirectory() as output_directory_path: with TemporaryDirectory() as cache_directory_path: configuration = Configuration(output_directory_path, 'https://example.com') configuration.cache_directory_path = cache_directory_path async with Site(configuration) as site: sut = _Populator(site, m_retriever) await sut.populate_link(link, entry_language) self.assertEqual(expected, link.description)
async def test_populate_link_should_set_media_type( self, expected: MediaType, media_type: Optional[MediaType], m_retriever) -> None: link = Link('http://en.wikipedia.org/wiki/Amsterdam') link.media_type = media_type with TemporaryDirectory() as output_directory_path: with TemporaryDirectory() as cache_directory_path: configuration = Configuration(output_directory_path, 'https://example.com') configuration.cache_directory_path = cache_directory_path async with Site(configuration) as site: sut = _Populator(site, m_retriever) await sut.populate_link(link, 'en') self.assertEqual(expected, link.media_type)
async def test_populate_should_ignore_non_wikipedia_links( self, m_retriever) -> None: link = Link('https://example.com') resource = IdentifiableSource('the_source', 'The Source') resource.links.add(link) with TemporaryDirectory() as output_directory_path: with TemporaryDirectory() as cache_directory_path: configuration = Configuration(output_directory_path, 'https://example.com') configuration.cache_directory_path = cache_directory_path async with App(configuration) as app: app.ancestry.sources[resource.id] = resource sut = _Populator(app, m_retriever) await sut.populate() self.assertSetEqual({link}, resource.links)
async def test_post_parse(self, m_aioresponses) -> None: resource = IdentifiableSource('the_source', 'The Source') link = Link('https://en.wikipedia.org/wiki/Amsterdam') resource.links.add(link) entry_title = 'Amstelredam' entry_extract = 'Capitol of the Netherlands' entry_api_response_body = { 'query': { 'pages': [ { 'title': entry_title, 'extract': entry_extract, }, ], } } entry_api_url = 'https://en.wikipedia.org/w/api.php?action=query&titles=Amsterdam&prop=extracts&exintro&format=json&formatversion=2' m_aioresponses.get(entry_api_url, payload=entry_api_response_body) translations_api_response_body = { 'query': { 'pages': [ { 'langlinks': [], }, ], }, } translations_api_url = 'https://en.wikipedia.org/w/api.php?action=query&titles=Amsterdam&prop=langlinks&lllimit=500&format=json&formatversion=2' m_aioresponses.get(translations_api_url, payload=translations_api_response_body) with TemporaryDirectory() as output_directory_path: with TemporaryDirectory() as cache_directory_path: configuration = Configuration(output_directory_path, 'https://example.com') configuration.cache_directory_path = cache_directory_path configuration.plugins[Wikipedia] = None async with Site(configuration) as site: site.ancestry.sources[resource.id] = resource await parse(site) self.assertEqual(1, len(resource.links)) self.assertEqual(entry_title, link.label) self.assertEqual('en', link.locale) self.assertEqual(MediaType('text/html'), link.media_type) self.assertIsNotNone(link.description) self.assertEqual('external', link.relationship)