class TestVIAFClient(DatabaseTest):

    def setup(self):
        super(TestVIAFClient, self).setup()
        self.client = VIAFClient(self._db)
        self.log = logging.getLogger("VIAF Client Test")

    def sample_data(self, filename):
        return sample_data(filename, "viaf")

    def queue_file_in_mock_http(self, filename):
        h = DummyHTTPClient()
        xml = self.sample_data(filename)
        h.queue_response(200, media_type='text/xml', content=xml)
        return h

    def test_process_contributor(self):
        client = MockVIAFClientLookup(self._db, self.log)
        contributor = self._contributor()[0]

        # If lookup returns an empty array (as in the case of
        # VIAFParser#parse_multiple), the contributor is not updated.
        client.queue_lookup([])
        client.process_contributor(contributor)
        eq_(contributor.sort_name, '2001')
        eq_(contributor.display_name, None)

        def queue_lookup_result():
            http = self.queue_file_in_mock_http("mindy_kaling.xml")
            lookup = self.client.lookup_by_viaf(viaf="9581122", do_get=http.do_get)
            client.results = [lookup]

        # When lookup is successful, the contributor is updated.
        queue_lookup_result()
        client.process_contributor(contributor)
        eq_(contributor.sort_name, "Kaling, Mindy")
        eq_(contributor.display_name, "Mindy Kaling")

        # If a contributor with the same VIAF number already exists,
        # the original contributor will be updated with VIAF data
        # and the processed contributor will be merged into the original.
        earliest_contributor = contributor
        # Reset the contributors sort name to confirm the data update.
        earliest_contributor.sort_name = None

        # Create a new contributor and contribution to confirm the merge.
        contributor = self._contributor()[0]
        edition = self._edition(authors=contributor.sort_name)
        eq_(edition.contributors, set([contributor]))

        queue_lookup_result()
        client.process_contributor(contributor)
        eq_(earliest_contributor.sort_name, "Kaling, Mindy")
        eq_(edition.contributors, set([earliest_contributor]))
        # The new contributor has been deleted.
        assert contributor not in self._db

        # If the display name of the original contributor is suspiciously
        # different from the VIAF display name, the new contributor will be
        # updated without being merged.
        earliest_contributor.display_name = "Mindy L. Kaling"
        earliest_contributor.sort_name = None
        contributor = self._contributor()[0]
        edition = self._edition(authors=contributor.sort_name)

        queue_lookup_result()
        client.process_contributor(contributor)
        eq_(contributor.viaf, "9581122")
        eq_(contributor.sort_name, "Kaling, Mindy")
        # Earlier contributor has not been updated or merged.
        eq_(earliest_contributor.sort_name, None)
        assert earliest_contributor not in edition.contributors

    def test_lookup_by_viaf(self):
        # there can be one and only one Mindy
        h = self.queue_file_in_mock_http("mindy_kaling.xml")

        contributor_candidate = self.client.lookup_by_viaf(viaf="9581122", do_get=h.do_get)
        (selected_candidate, match_confidences, contributor_titles) = contributor_candidate
        eq_(selected_candidate.viaf, "9581122")
        eq_(selected_candidate.sort_name, "Kaling, Mindy")

    def test_lookup_by_name(self):
        # there can be one and only one Mindy
        h = self.queue_file_in_mock_http("mindy_kaling.xml")

        (selected_candidate,
         match_confidences,
         contributor_titles) = self.client.lookup_by_name(sort_name="Mindy Kaling", do_get=h.do_get)
        eq_(selected_candidate.viaf, "9581122")
        eq_(selected_candidate.sort_name, "Kaling, Mindy")
Esempio n. 2
0
    Contributor,
)
from viaf import VIAFClient

_db = production_session()
viaf_client = VIAFClient(_db)
from sqlalchemy.sql import text
contributors = _db.query(Contributor).filter(
    text("contributors.display_name ~ '^Q[0-9]'")).order_by(Contributor.id)
print contributors.count()
for contributor in contributors:
    if contributor.viaf:
        viaf, display_name, family_name, sort_name, wikipedia_name = viaf_client.lookup_by_viaf(
            contributor.viaf)
    else:
        viaf, display_name, family_name, sort_name, wikipedia_name = viaf_client.lookup_by_name(
            contributor.name)
    print "%s: %s => %s, %s => %s" % (contributor.id, contributor.display_name,
                                      display_name, contributor.wikipedia_name,
                                      wikipedia_name)
    contributor.display_name = display_name
    contributor.wikipedia_name = wikipedia_name
    contributor.family_name = family_name
    viaf, display_name, family_name, sort_name, wikipedia_name = viaf_client.lookup_by_viaf(
        contributor.viaf)
    for contribution in contributor.contributions:
        edition = contribution.edition
        if edition.work:
            edition.work.calculate_presentation()
        else:
            edition.calculate_presentation()
    _db.commit()
Esempio n. 3
0
class TestVIAFClient(DatabaseTest):
    def setup(self):
        super(TestVIAFClient, self).setup()
        self.client = VIAFClient(self._db)
        self.log = logging.getLogger("VIAF Client Test")

    def sample_data(self, filename):
        return sample_data(filename, "viaf")

    def queue_file_in_mock_http(self, filename):
        h = DummyHTTPClient()
        xml = self.sample_data(filename)
        h.queue_response(200, media_type='text/xml', content=xml)
        return h

    def test_process_contributor(self):
        client = MockVIAFClientLookup(self._db, self.log)
        contributor = self._contributor()[0]

        # If lookup returns an empty array (as in the case of
        # VIAFParser#parse_multiple), the contributor is not updated.
        client.queue_lookup([])
        client.process_contributor(contributor)
        eq_(contributor.sort_name, '2001')
        eq_(contributor.display_name, None)

        def queue_lookup_result():
            http = self.queue_file_in_mock_http("mindy_kaling.xml")
            lookup = self.client.lookup_by_viaf(viaf="9581122",
                                                do_get=http.do_get)
            client.results = [lookup]

        # When lookup is successful, the contributor is updated.
        queue_lookup_result()
        client.process_contributor(contributor)
        eq_(contributor.sort_name, "Kaling, Mindy")
        eq_(contributor.display_name, "Mindy Kaling")

        # If a contributor with the same VIAF number already exists,
        # the original contributor will be updated with VIAF data
        # and the processed contributor will be merged into the original.
        earliest_contributor = contributor
        # Reset the contributors sort name to confirm the data update.
        earliest_contributor.sort_name = None

        # Create a new contributor and contribution to confirm the merge.
        contributor = self._contributor()[0]
        edition = self._edition(authors=contributor.sort_name)
        eq_(edition.contributors, set([contributor]))

        queue_lookup_result()
        client.process_contributor(contributor)
        eq_(earliest_contributor.sort_name, "Kaling, Mindy")
        eq_(edition.contributors, set([earliest_contributor]))
        # The new contributor has been deleted.
        assert contributor not in self._db

        # If the display name of the original contributor is suspiciously
        # different from the VIAF display name, the new contributor will be
        # updated without being merged.
        earliest_contributor.display_name = "Mindy L. Kaling"
        earliest_contributor.sort_name = None
        contributor = self._contributor()[0]
        edition = self._edition(authors=contributor.sort_name)

        queue_lookup_result()
        client.process_contributor(contributor)
        eq_(contributor.viaf, "9581122")
        eq_(contributor.sort_name, "Kaling, Mindy")
        # Earlier contributor has not been updated or merged.
        eq_(earliest_contributor.sort_name, None)
        assert earliest_contributor not in edition.contributors

    def test_lookup_by_viaf(self):
        # there can be one and only one Mindy
        h = self.queue_file_in_mock_http("mindy_kaling.xml")

        contributor_candidate = self.client.lookup_by_viaf(viaf="9581122",
                                                           do_get=h.do_get)
        (selected_candidate, match_confidences,
         contributor_titles) = contributor_candidate
        eq_(selected_candidate.viaf, "9581122")
        eq_(selected_candidate.sort_name, "Kaling, Mindy")

    def test_lookup_by_name(self):
        # there can be one and only one Mindy
        h = self.queue_file_in_mock_http("mindy_kaling.xml")

        (selected_candidate, match_confidences,
         contributor_titles) = self.client.lookup_by_name(
             sort_name="Mindy Kaling", do_get=h.do_get)
        eq_(selected_candidate.viaf, "9581122")
        eq_(selected_candidate.sort_name, "Kaling, Mindy")
    Contributor, 
)
from viaf import VIAFClient

_db = production_session()
viaf_client = VIAFClient(_db)
from sqlalchemy.sql import text
contributors = _db.query(Contributor).filter(
    text("contributors.display_name ~ '^Q[0-9]'")
).order_by(Contributor.id)
print contributors.count()
for contributor in contributors:
    if contributor.viaf:
        viaf, display_name, family_name, sort_name, wikipedia_name = viaf_client.lookup_by_viaf(contributor.viaf)
    else:
        viaf, display_name, family_name, sort_name, wikipedia_name = viaf_client.lookup_by_name(contributor.name)
    print "%s: %s => %s, %s => %s" % (
        contributor.id, 
        contributor.display_name, display_name,
        contributor.wikipedia_name, wikipedia_name
    )
    contributor.display_name = display_name
    contributor.wikipedia_name = wikipedia_name
    contributor.family_name = family_name
    viaf, display_name, family_name, sort_name, wikipedia_name = viaf_client.lookup_by_viaf(contributor.viaf)
    for contribution in contributor.contributions:
        edition = contribution.edition
        if edition.work:
            edition.work.calculate_presentation()
        else:
            edition.calculate_presentation()