Esempio n. 1
0
 def __init__(self, test_session=None):
     # Allows tests to run without db session overlap.
     if test_session:
         self._session = test_session
     self.coverage = LinkedDataCoverageProvider(self._db)
     self.oclc_classify = OCLCClassifyCoverageProvider(self._db)
     self.viaf = VIAFClient(self._db)
Esempio n. 2
0
 def __init__(self, _db, *args, **kwargs):
     if 'api' in kwargs:
         self.api = kwargs['api']
         del kwargs['api']
     else:
         self.api = OCLCLinkedData(_db)
     if 'viaf_api' in kwargs:
         self.viaf = kwargs['viaf_api']
         del kwargs['viaf_api']
     else:
         self.viaf = VIAFClient(_db)
     super(LinkedDataCoverageProvider, self).__init__(_db, *args, **kwargs)
    def __init__(self, collection, *args, **kwargs):
        _db = Session.object_session(collection)
        api = kwargs.pop('api', None)
        if not api:
            api = OCLCLinkedData(_db)
        self.api = api

        viaf = kwargs.pop('viaf', None)
        if not viaf:
            viaf = VIAFClient(_db)
        self.viaf = viaf

        kwargs['registered_only'] = True
        super(LinkedDataCoverageProvider,
              self).__init__(collection, *args, **kwargs)
Esempio n. 4
0
    def __init__(self, collection, viaf=None, **kwargs):
        _db = Session.object_session(collection)
        api_class = kwargs.pop('api_class', OverdriveAPI)
        if callable(api_class):
            api = self.generic_overdrive_api(_db, api_class)
        else:
            # The API 'class' is actually an object, probably a mock.
            api = api_class
        if not api:
            raise CannotLoadConfiguration(
                """OverdriveBibliographicCoverageProvider requires at least one fully configured Overdrive collection."""
            )

        self.viaf = viaf or VIAFClient(_db)

        kwargs['registered_only'] = True
        super(OverdriveBibliographicCoverageProvider,
              self).__init__(collection, api_class=api, **kwargs)
 def __init__(self, _db, oclcld=None, viaf=None):
     self._db = _db
     self.oclcld = oclcld or OCLCLinkedData(_db)
     self.viaf = viaf or VIAFClient(_db)
     self.log = logging.getLogger("Author name canonicalizer")
Esempio n. 6
0
    def __init__(self,
                 collection,
                 uploader=None,
                 viaf_client=None,
                 linked_data_coverage_provider=None,
                 content_cafe_api=None,
                 overdrive_api_class=OverdriveAPI,
                 **kwargs):

        super(IdentifierResolutionCoverageProvider,
              self).__init__(collection, **kwargs)

        # Since we are the metadata wrangler, any resources we find,
        # we mirror to S3.
        if not uploader:
            uploader = S3Uploader.from_config(self._db)
        self.uploader = uploader

        # We're going to be aggressive about recalculating the presentation
        # for this work because either the work is currently not set up
        # at all, or something went wrong trying to set it up.
        self.policy = PresentationCalculationPolicy(
            regenerate_opds_entries=True)

        self.overdrive_api = self.create_overdrive_api(overdrive_api_class)

        self.content_cafe_api = content_cafe_api

        # Determine the optional and required coverage providers.
        # Each Identifier in this Collection's catalog will be run
        # through all relevant providers.
        self.required_coverage_providers, self.optional_coverage_providers = self.providers(
        )

        # When we need to look up a contributor via VIAF we will use this
        # client.
        self.viaf_client = viaf_client or VIAFClient(self._db)

        # Books are not looked up in OCLC Linked Data directly, since
        # there is no Collection that identifies a book by its OCLC Number.
        # However, when a book is looked up through OCLC Classify, some
        # OCLC Numbers may be associated with it, and _those_ numbers
        # can be run through OCLC Linked Data.
        #
        # TODO: We get many books identified by ISBN, and those books
        # _could_ be run through a LinkedDataCoverageProvider if it
        # worked a little differently. However, I don't think this
        # would be very useful, since those books will get looked up
        # through OCLC Classify, which will probably result in us
        # finding that same ISBN via OCLC Number.
        self.oclc_linked_data = (linked_data_coverage_provider
                                 or LinkedDataCoverageProvider(
                                     self._db, viaf_api=self.viaf_client))

        # The ordinary OverdriveBibliographicCoverageProvider
        # doesn't upload images, so we need to create our own
        # mirror and scaler.
        #
        # TODO: This class would be neater if we were to subclass
        # OverdriveBibliographicCoverageProvider to do the scaling and
        # uploading.
        self.image_mirrors = {
            DataSource.OVERDRIVE:
            OverdriveCoverImageMirror(self._db, uploader=uploader)
        }
        self.image_scaler = ImageScaler(self._db,
                                        self.image_mirrors.values(),
                                        uploader=uploader)
Esempio n. 7
0
 def __init__(self, viaf=None):
     self.viaf = viaf or VIAFClient(self._db)
Esempio n. 8
0
 def run(self):
     """Fill in all author names with information from VIAF."""
     VIAFClient(self._db).run(self.force)
Esempio n. 9
0
 def setup(self):
     super(TestVIAFClient, self).setup()
     self.client = VIAFClient(self._db)
     self.log = logging.getLogger("VIAF Client Test")
Esempio n. 10
0
from pdb import set_trace
import os
import sys
bin_dir = os.path.split(__file__)[0]
package_dir = os.path.join(bin_dir, "..")
sys.path.append(os.path.abspath(package_dir))

import re
from core.model import (
    production_session,
    Contributor,
)
from viaf import VIAFClient

_db = production_session()
viaf_client = VIAFClient(_db)
from sqlalchemy.sql import text
contributors = _db.query(Contributor).filter(
    text("contributors.display_name ~ '^Q[0-9]'")).order_by(Contributor.id)
print contributors.count()
for contributor in contributors:
    if contributor.viaf:
        viaf, display_name, family_name, sort_name, wikipedia_name = viaf_client.lookup_by_viaf(
            contributor.viaf)
    else:
        viaf, display_name, family_name, sort_name, wikipedia_name = viaf_client.lookup_by_name(
            contributor.name)
    print "%s: %s => %s, %s => %s" % (contributor.id, contributor.display_name,
                                      display_name, contributor.wikipedia_name,
                                      wikipedia_name)
    contributor.display_name = display_name
Esempio n. 11
0
class IdentifierResolutionCoverageProvider(CatalogCoverageProvider):
    """Make sure all Identifiers associated with some Collection become
    Works.

    Coverage happens by running the Identifier through _other_
    CoverageProviders, which fill in the blanks with data from
    third-party entities.

    This CoverageProvider may force those other CoverageProviders to
    do their work for each Identifier immediately, or it may simply
    register its Identifiers with those CoverageProviders and allow
    them to complete the work at their own pace.

    Unlike most CoverageProviders, which are invoked from a script,
    this CoverageProvider is invoked from
    URNLookupController.process_urns, and only when a client expresses
    a desire that we look into a specific identifier.
    """

    SERVICE_NAME = "Identifier Resolution Coverage Provider"
    DATA_SOURCE_NAME = DataSource.INTERNAL_PROCESSING

    # These are the only identifier types we have any hope of providing
    # insight into.
    INPUT_IDENTIFIER_TYPES = [
        Identifier.OVERDRIVE_ID,
        Identifier.ISBN,
        Identifier.URI,
    ]
    OPERATION = CoverageRecord.RESOLVE_IDENTIFIER_OPERATION

    # We cover all Collections, regardless of their protocol.
    PROTOCOL = None

    def __init__(self,
                 collection,
                 mirror=None,
                 http_get=None,
                 viaf=None,
                 provide_coverage_immediately=False,
                 force=False,
                 provider_kwargs=None,
                 **kwargs):
        """Constructor.

        :param collection: Handle all Identifiers from this Collection
        that were previously registered with this CoverageProvider.

        :param mirror: A MirrorUploader to use if coverage requires
        uploading any cover images to external storage.

        :param http_get: A drop-in replacement for
        Representation.simple_http_get, to be used if any information
        (such as a book cover) needs to be obtained from the public
        Internet.

        :param viaf_client: A VIAFClient to use if coverage requires
        gathering information about authors from VIAF.

        :param force: Force CoverageProviders to cover identifiers
        even if they believe they have already done the work.

        :param provide_coverage_immediately: If this is True, then
        resolving an identifier means registering it with all of its
        other CoverageProviders *and then attempting to provide
        coverage*.  Registration is considered a success even if the
        other CoverageProviders fail, but the attempt must be made
        immediately.

        If this is False (the default), then resolving an identifier
        just means registering it with all other relevant
        CoverageProviders.

        :param provider_kwargs: Pass this object in as provider_kwargs
        when calling gather_providers at the end of the
        constructor. Used only in testing.

        """
        _db = Session.object_session(collection)

        # Since we are the metadata wrangler, any resources we find,
        # we mirror using the sitewide MirrorUploader.
        if not mirror:
            try:
                mirror = MirrorUploader.sitewide(_db)
            except CannotLoadConfiguration, e:
                logging.error(
                    "No storage integration is configured. Cover images will not be stored anywhere.",
                    exc_info=e)
        self.mirror = mirror

        # We're going to be aggressive about recalculating the presentation
        # for this work because either the work is currently not set up
        # at all, or something went wrong trying to set it up.
        presentation = PresentationCalculationPolicy(
            regenerate_opds_entries=True)
        replacement_policy = ReplacementPolicy.from_metadata_source(
            presentation_calculation_policy=presentation,
            mirror=self.mirror,
            http_get=http_get,
        )
        super(IdentifierResolutionCoverageProvider,
              self).__init__(collection,
                             replacement_policy=replacement_policy,
                             **kwargs)

        self.provide_coverage_immediately = provide_coverage_immediately
        self.force = force or provide_coverage_immediately

        self.viaf = viaf or VIAFClient(self._db)

        # Instantiate the coverage providers that may be needed to
        # relevant to any given Identifier.
        #
        # Each Identifier in this Collection's catalog will be registered
        # with all relevant providers (if provide_coverage_immediately
        # is False) or immediately covered by all relevant providers
        # (if provide_coverage_immediately is True).
        self.providers = self.gather_providers(provider_kwargs)