Beispiel #1
0
    async def test_post_parse(self):
        person = Person('P0')
        Presence(person, Subject(), Event(Birth()))

        source_file = File('F0', __file__)
        source = IdentifiableSource('S0', 'The Source')
        source.private = True
        source.files.append(source_file)

        citation_file = File('F0', __file__)
        citation_source = Source('The Source')
        citation = IdentifiableCitation('C0', citation_source)
        citation.private = True
        citation.files.append(citation_file)

        with TemporaryDirectory() as output_directory_path:
            configuration = Configuration(output_directory_path,
                                          'https://example.com')
            configuration.plugins[Privatizer] = None
            async with Site(configuration) as site:
                site.ancestry.people[person.id] = person
                site.ancestry.sources[source.id] = source
                site.ancestry.citations[citation.id] = citation
                await parse(site)

            self.assertTrue(person.private)
            self.assertTrue(source_file.private)
            self.assertTrue(citation_file.private)
Beispiel #2
0
 def test_should_remove_contained_by(self) -> None:
     source = IdentifiableSource('S0', 'The Source')
     contained_by = Source('The Source')
     source.contained_by = contained_by
     anonymous_source = AnonymousSource()
     anonymize_source(source, anonymous_source)
     self.assertIsNone(source.contained_by)
Beispiel #3
0
 def test_with_private_source_should_anonymize(self,
                                               m_anonymize_source) -> None:
     source = IdentifiableSource('S0', 'The Source')
     source.private = True
     ancestry = Ancestry()
     ancestry.sources[source.id] = source
     anonymize(ancestry)
     m_anonymize_source.assert_called_once_with(source, ANY)
Beispiel #4
0
 def test_with_public_source_should_not_anonymize(
         self, m_anonymize_source) -> None:
     source = IdentifiableSource('S0', 'The Source')
     source.private = False
     ancestry = Ancestry()
     ancestry.sources[source.id] = source
     anonymize(ancestry)
     m_anonymize_source.assert_not_called()
Beispiel #5
0
 def test_privatize_source_should_not_privatize_if_public(self):
     file = File('F0', __file__)
     source = IdentifiableSource('S0', 'The Source')
     source.private = False
     source.files.append(file)
     privatize_source(source)
     self.assertEqual(False, source.private)
     self.assertIsNone(file.private)
Beispiel #6
0
 def test_privatize_source_should_privatize_if_private(self):
     file = File('F0', __file__)
     source = IdentifiableSource('S0', 'The Source')
     source.private = True
     source.files.append(file)
     privatize_source(source)
     self.assertTrue(source.private)
     self.assertTrue(file.private)
Beispiel #7
0
 def test_privatize_source_should_privatize_if_private(self):
     file = File('F0', __file__)
     source = IdentifiableSource('S0', 'The Source')
     source.private = True
     source.files.append(file)
     ancestry = Ancestry()
     ancestry.sources[source.id] = source
     privatize(ancestry)
     self.assertTrue(source.private)
     self.assertTrue(file.private)
Beispiel #8
0
    def test_clean_should_not_clean_source_with_contains(self) -> None:
        ancestry = Ancestry()

        source = IdentifiableSource('S0', 'The Source')
        ancestry.sources[source.id] = source

        contains = IdentifiableSource('S1', 'The Source')
        contains.contained_by = source
        ancestry.sources[contains.id] = contains

        clean(ancestry)

        self.assertEqual(source, ancestry.sources[source.id])
        self.assertEqual(source, contains.contained_by)
        self.assertEqual(contains, ancestry.sources[contains.id])
Beispiel #9
0
 def test_source_should_encode_full(self):
     source = IdentifiableSource('the_source', 'The Source')
     source.author = 'The Author'
     source.publisher = 'The Publisher'
     source.date = Date(2000, 1, 1)
     source.contained_by = IdentifiableSource(
         'the_containing_source', 'The Containing Source')
     link = Link('https://example.com/the-source')
     link.label = 'The Source Online'
     source.links.add(link)
     source.contains.append(
         IdentifiableSource('the_contained_source', 'The Contained Source'))
     IdentifiableCitation('the_citation', source)
     expected = {
         '$schema': '/schema.json#/definitions/source',
         '@context': {
             'name': 'https://schema.org/name',
         },
         '@type': 'https://schema.org/Thing',
         'id': 'the_source',
         'name': 'The Source',
         'author': 'The Author',
         'publisher': 'The Publisher',
         'contains': [
             '/en/source/the_contained_source/index.json',
         ],
         'citations': [
             '/en/citation/the_citation/index.json',
         ],
         'containedBy': '/en/source/the_containing_source/index.json',
         'date': {
             'year': 2000,
             'month': 1,
             'day': 1,
         },
         'links': [
             {
                 'url': '/en/source/the_source/index.json',
                 'relationship': 'canonical',
                 'mediaType': 'application/json',
             },
             {
                 'url': '/nl/source/the_source/index.json',
                 'relationship': 'alternate',
                 'locale': 'nl-NL',
             },
             {
                 'url': '/en/source/the_source/index.html',
                 'relationship': 'alternate',
                 'mediaType': 'text/html',
             },
             {
                 'url': 'https://example.com/the-source',
                 'label': 'The Source Online',
             },
         ],
     }
     self.assert_encodes(expected, source, 'source')
Beispiel #10
0
    def test_clean_should_not_clean_event_with_presences_with_people(
            self) -> None:
        ancestry = Ancestry()

        source = IdentifiableSource('S1', 'The Source')
        ancestry.sources[source.id] = source

        citation = IdentifiableCitation('C1', source)
        ancestry.citations[citation.id] = citation

        file = File('F1', __file__)
        ancestry.files[file.id] = file

        place = Place('P0', [PlaceName('The Place')])
        ancestry.places[place.id] = place

        person = Person('P0')

        event = IdentifiableEvent('E0', Birth())
        event.citations.append(citation)
        event.files.append(file)
        event.place = place
        ancestry.events[event.id] = event

        Presence(person, Subject(), event)

        clean(ancestry)

        self.assertEqual(event, ancestry.events[event.id])
        self.assertIn(event, place.events)
        self.assertEqual(place, ancestry.places[place.id])
        self.assertIn(event, citation.facts)
        self.assertEqual(citation, ancestry.citations[citation.id])
        self.assertIn(event, file.resources)
        self.assertEqual(file, ancestry.files[file.id])
Beispiel #11
0
 def test_citation_should_encode_full(self):
     citation = IdentifiableCitation(
         'the_citation', IdentifiableSource('the_source', 'The Source'))
     citation.description = 'The Source Description'
     citation.facts.append(IdentifiableEvent('the_event', Birth()))
     expected = {
         '$schema':
         '/schema.json#/definitions/citation',
         '@type':
         'https://schema.org/Thing',
         'id':
         'the_citation',
         'source':
         '/en/source/the_source/index.json',
         'facts': ['/en/event/the_event/index.json'],
         'links': [
             {
                 'url': '/en/citation/the_citation/index.json',
                 'relationship': 'canonical',
                 'mediaType': 'application/json',
             },
             {
                 'url': '/nl/citation/the_citation/index.json',
                 'relationship': 'alternate',
                 'locale': 'nl-NL',
             },
             {
                 'url': '/en/citation/the_citation/index.html',
                 'relationship': 'alternate',
                 'mediaType': 'text/html',
             },
         ],
     }
     self.assert_encodes(expected, citation, 'citation')
Beispiel #12
0
 def test_source_should_encode_minimal(self):
     source = IdentifiableSource('the_source', 'The Source')
     expected = {
         '$schema':
         '/schema.json#/definitions/source',
         '@context': {
             'name': 'https://schema.org/name',
         },
         '@type':
         'https://schema.org/Thing',
         'id':
         'the_source',
         'name':
         'The Source',
         'contains': [],
         'citations': [],
         'links': [
             {
                 'url': '/en/source/the_source/index.json',
                 'relationship': 'canonical',
                 'mediaType': 'application/json',
             },
             {
                 'url': '/nl/source/the_source/index.json',
                 'relationship': 'alternate',
                 'locale': 'nl-NL',
             },
             {
                 'url': '/en/source/the_source/index.html',
                 'relationship': 'alternate',
                 'mediaType': 'text/html',
             },
         ],
     }
     self.assert_encodes(expected, source, 'source')
Beispiel #13
0
    async def test_populate_should_populate_existing_link(self,
                                                          m_retriever) -> None:
        entry_language = 'en'
        entry_name = 'Amsterdam'
        entry_title = 'Amsterdam'
        entry_content = 'Capitol of the Netherlands'
        entry = Entry(entry_language, entry_name, entry_title, entry_content)
        m_retriever.get_entry.return_value = entry

        resource = IdentifiableSource('the_source', 'The Source')
        link = Link('https://en.wikipedia.org/wiki/Amsterdam')
        resource.links.add(link)
        with TemporaryDirectory() as output_directory_path:
            with TemporaryDirectory() as cache_directory_path:
                configuration = Configuration(output_directory_path,
                                              'https://example.com')
                configuration.cache_directory_path = cache_directory_path
                async with Site(configuration) as site:
                    site.ancestry.sources[resource.id] = resource
                    sut = _Populator(site, m_retriever)
                    await sut.populate()
        m_retriever.get_entry.assert_called_once_with(entry_language,
                                                      entry_name)
        self.assertEqual(1, len(resource.links))
        self.assertEqual('Amsterdam', link.label)
        self.assertEqual('en', link.locale)
        self.assertEqual(MediaType('text/html'), link.media_type)
        self.assertIsNotNone(link.description)
        self.assertEqual('external', link.relationship)
Beispiel #14
0
    def test_clean_should_clean_event(self) -> None:
        ancestry = Ancestry()

        source = IdentifiableSource('S1', 'The Source')
        ancestry.sources[source.id] = source

        citation = IdentifiableCitation('C1', source)
        ancestry.citations[citation.id] = citation

        file = File('F1', __file__)
        ancestry.files[file.id] = file

        place = Place('P0', [PlaceName('The Place')])
        ancestry.places[place.id] = place

        event = IdentifiableEvent('E0', Birth())
        event.citations.append(citation)
        event.files.append(file)
        event.place = place
        ancestry.events[event.id] = event

        clean(ancestry)

        self.assertNotIn(event.id, ancestry.events)
        self.assertIsNone(event.place)
        self.assertNotIn(event, place.events)
        self.assertNotIn(place.id, ancestry.places)
        self.assertNotIn(event, citation.facts)
        self.assertNotIn(citation.id, ancestry.citations)
        self.assertNotIn(event, file.resources)
        self.assertNotIn(file.id, ancestry.files)
Beispiel #15
0
def _load_repository(loader: _Loader, element: ElementTree.Element) -> None:
    handle = element.get('handle')

    source = IdentifiableSource(element.get('id'), _xpath1(element, './ns:rname').text)

    _load_urls(source, element)

    loader._sources[handle] = source
Beispiel #16
0
 def test_should_remove_files(self) -> None:
     source = IdentifiableSource('S0', 'The Source')
     file = File('F0', __file__)
     source.files.append(file)
     anonymous_source = AnonymousSource()
     anonymize_source(source, anonymous_source)
     self.assertEquals(0, len(source.files))
     self.assertIn(file, anonymous_source.files)
Beispiel #17
0
 def test_should_remove_contains(self) -> None:
     source = IdentifiableSource('S0', 'The Source')
     contains = Source('The Source')
     source.contains.append(contains)
     anonymous_source = AnonymousSource()
     anonymize_source(source, anonymous_source)
     self.assertEquals(0, len(source.contains))
     self.assertIn(contains, anonymous_source.contains)
Beispiel #18
0
 def test_should_remove_citations(self) -> None:
     source = IdentifiableSource('S0', 'The Source')
     citation = Citation(source)
     source.citations.append(citation)
     anonymous_source = AnonymousSource()
     anonymize_source(source, anonymous_source)
     self.assertEquals(0, len(source.citations))
     self.assertIn(citation, anonymous_source.citations)
Beispiel #19
0
    def test_clean_should_clean_source(self) -> None:
        ancestry = Ancestry()

        source = IdentifiableSource('S0', 'The source')
        ancestry.sources[source.id] = source

        clean(ancestry)

        self.assertNotIn(source.id, ancestry.sources)
Beispiel #20
0
def _parse_repository(ancestry: _IntermediateAncestry,
                      element: Element) -> None:
    handle = _xpath1(element, './@handle')

    source = IdentifiableSource(_xpath1(element, './@id'),
                                _xpath1(element, './ns:rname').text)

    _parse_urls(source, element)

    ancestry.sources[handle] = source
Beispiel #21
0
    async def test_populate_should_add_translation_links(self,
                                                         m_retriever) -> None:
        sut = Populator(m_retriever)

        entry_language = 'en'
        entry_name = 'Amsterdam'
        entry_title = 'Amsterdam'
        entry_content = 'Capitol of the Netherlands'
        entry = Entry(entry_language, entry_name, entry_title, entry_content)
        added_entry_language = 'nl'
        added_entry_name = 'Amsterdam'
        added_entry_title = 'Amsterdam'
        added_entry_content = 'Hoofdstad van Nederland'
        added_entry = Entry(added_entry_language, added_entry_name,
                            added_entry_title, added_entry_content)
        m_retriever.get_entry.side_effect = [entry, added_entry]

        m_retriever.get_translations.return_value = {
            entry_language: entry_name,
            added_entry_language: added_entry_name,
        }

        resource = IdentifiableSource('the_source', 'The Source')
        link_en = Link('https://en.wikipedia.org/wiki/Amsterdam')
        resource.links.add(link_en)
        ancestry = Ancestry()
        ancestry.sources[resource.id] = resource
        with TemporaryDirectory() as output_directory_path:
            with TemporaryDirectory() as cache_directory_path:
                configuration = Configuration(output_directory_path,
                                              'https://example.com')
                configuration.cache_directory_path = cache_directory_path
                configuration.locales.clear()
                configuration.locales['en-US'] = LocaleConfiguration(
                    'en-US', 'en')
                configuration.locales['nl-NL'] = LocaleConfiguration(
                    'nl-NL', 'nl')
                async with Site(configuration) as site:
                    await sut.populate(ancestry, site)

        m_retriever.get_entry.assert_has_calls([
            call(entry_language, entry_name),
            call(added_entry_language, added_entry_name),
        ])
        m_retriever.get_translations.assert_called_once_with(
            entry_language, entry_name)
        self.assertEqual(2, len(resource.links))
        link_nl = resource.links.difference({link_en}).pop()
        self.assertEqual('Amsterdam', link_nl.label)
        self.assertEqual('nl', link_nl.locale)
        self.assertEqual('text/html', link_nl.media_type)
        self.assertIsNotNone(link_nl.description)
        self.assertEqual('external', link_nl.relationship)
Beispiel #22
0
 async def test_populate_should_ignore_resource_without_links(
         self, m_retriever) -> None:
     resource = IdentifiableSource('the_source', 'The Source')
     with TemporaryDirectory() as output_directory_path:
         with TemporaryDirectory() as cache_directory_path:
             configuration = Configuration(output_directory_path,
                                           'https://example.com')
             configuration.cache_directory_path = cache_directory_path
             async with Site(configuration) as site:
                 site.ancestry.sources[resource.id] = resource
                 sut = _Populator(site, m_retriever)
                 await sut.populate()
     self.assertSetEqual(set(), resource.links)
Beispiel #23
0
    def test_clean_should_clean_citation(self) -> None:
        ancestry = Ancestry()

        source = IdentifiableSource('S0', 'The source')
        ancestry.sources[source.id] = source

        citation = IdentifiableCitation('C0', source)
        ancestry.citations[citation.id] = citation

        clean(ancestry)

        self.assertNotIn(citation.id, ancestry.citations)
        self.assertNotIn(citation, source.citations)
Beispiel #24
0
def _load_source(loader: _Loader, element: ElementTree.Element) -> None:
    handle = element.get('handle')

    source = IdentifiableSource(element.get('id'), _xpath1(element, './ns:stitle').text)

    repository_source_handle_element = _xpath1(element, './ns:reporef')
    if repository_source_handle_element is not None:
        source.contained_by = loader._sources[repository_source_handle_element.get('hlink')]

    # Load the author.
    sauthor_element = _xpath1(element, './ns:sauthor')
    if sauthor_element is not None:
        source.author = sauthor_element.text

    # Load the publication info.
    spubinfo_element = _xpath1(element, './ns:spubinfo')
    if spubinfo_element is not None:
        source.publisher = spubinfo_element.text

    _load_objref(loader, source, element)
    _load_attribute_privacy(source, element, 'srcattribute')

    loader._sources[handle] = source
Beispiel #25
0
    def test_clean_should_not_clean_source_with_files(self) -> None:
        ancestry = Ancestry()

        source = IdentifiableSource('S0', 'The Source')
        ancestry.sources[source.id] = source

        file = File('F0', __file__)
        file.resources.append(source)
        ancestry.files[file.id] = file

        clean(ancestry)

        self.assertEqual(source, ancestry.sources[source.id])
        self.assertIn(source, file.sources)
        self.assertEqual(file, ancestry.files[file.id])
Beispiel #26
0
def _parse_source(ancestry: _IntermediateAncestry, element: Element) -> None:
    handle = _xpath1(element, './@handle')

    source = IdentifiableSource(_xpath1(element, './@id'),
                                _xpath1(element, './ns:stitle').text)

    repository_source_handle = _xpath1(element, './ns:reporef/@hlink')
    if repository_source_handle is not None:
        source.contained_by = ancestry.sources[repository_source_handle]

    # Parse the author.
    sauthor_element = _xpath1(element, './ns:sauthor')
    if sauthor_element is not None:
        source.author = sauthor_element.text

    # Parse the publication info.
    spubinfo_element = _xpath1(element, './ns:spubinfo')
    if spubinfo_element is not None:
        source.publisher = spubinfo_element.text

    _parse_objref(ancestry, source, element)
    _parse_attribute_privacy(source, element, 'srcattribute')

    ancestry.sources[handle] = source
Beispiel #27
0
    def test_clean_should_not_clean_source_with_citations(self) -> None:
        ancestry = Ancestry()

        source = IdentifiableSource('S0', 'The Source')
        ancestry.sources[source.id] = source

        citation = IdentifiableCitation('C0', source)
        citation.facts.append(PersonName('Jane'))
        ancestry.citations[citation.id] = citation

        clean(ancestry)

        self.assertEqual(source, ancestry.sources[source.id])
        self.assertEqual(source, citation.source)
        self.assertEqual(citation, ancestry.citations[citation.id])
Beispiel #28
0
 async def test_populate_should_ignore_non_wikipedia_links(
         self, m_retriever) -> None:
     link = Link('https://example.com')
     resource = IdentifiableSource('the_source', 'The Source')
     resource.links.add(link)
     with TemporaryDirectory() as output_directory_path:
         with TemporaryDirectory() as cache_directory_path:
             configuration = Configuration(output_directory_path,
                                           'https://example.com')
             configuration.cache_directory_path = cache_directory_path
             async with App(configuration) as app:
                 app.ancestry.sources[resource.id] = resource
                 sut = _Populator(app, m_retriever)
                 await sut.populate()
     self.assertSetEqual({link}, resource.links)
Beispiel #29
0
    async def test_post_parse(self, m_aioresponses) -> None:
        resource = IdentifiableSource('the_source', 'The Source')
        link = Link('https://en.wikipedia.org/wiki/Amsterdam')
        resource.links.add(link)
        entry_title = 'Amstelredam'
        entry_extract = 'Capitol of the Netherlands'
        entry_api_response_body = {
            'query': {
                'pages': [
                    {
                        'title': entry_title,
                        'extract': entry_extract,
                    },
                ],
            }
        }
        entry_api_url = 'https://en.wikipedia.org/w/api.php?action=query&titles=Amsterdam&prop=extracts&exintro&format=json&formatversion=2'
        m_aioresponses.get(entry_api_url, payload=entry_api_response_body)
        translations_api_response_body = {
            'query': {
                'pages': [
                    {
                        'langlinks': [],
                    },
                ],
            },
        }
        translations_api_url = 'https://en.wikipedia.org/w/api.php?action=query&titles=Amsterdam&prop=langlinks&lllimit=500&format=json&formatversion=2'
        m_aioresponses.get(translations_api_url,
                           payload=translations_api_response_body)

        with TemporaryDirectory() as output_directory_path:
            with TemporaryDirectory() as cache_directory_path:
                configuration = Configuration(output_directory_path,
                                              'https://example.com')
                configuration.cache_directory_path = cache_directory_path
                configuration.plugins[Wikipedia] = None
                async with Site(configuration) as site:
                    site.ancestry.sources[resource.id] = resource
                    await parse(site)

        self.assertEqual(1, len(resource.links))
        self.assertEqual(entry_title, link.label)
        self.assertEqual('en', link.locale)
        self.assertEqual(MediaType('text/html'), link.media_type)
        self.assertIsNotNone(link.description)
        self.assertEqual('external', link.relationship)
Beispiel #30
0
    def test_clean_should_not_clean_citation_with_facts(self) -> None:
        ancestry = Ancestry()

        source = IdentifiableSource('S0', 'The Source')
        ancestry.sources[source.id] = source

        citation = IdentifiableCitation('C0', source)
        citation.facts.append(PersonName('Jane'))
        ancestry.citations[citation.id] = citation

        fact = Person('P0')
        fact.citations.append(citation)
        ancestry.people[fact.id] = fact

        clean(ancestry)

        self.assertEqual(citation, ancestry.citations[citation.id])
        self.assertIn(citation, fact.citations)
        self.assertEqual(fact, ancestry.people[fact.id])