コード例 #1
0
 def process_urn(self, urn, collection=None, **kwargs):
     """Turn a URN into a Work suitable for use in an OPDS feed.
     """
     try:
         identifier, is_new = Identifier.parse_urn(self._db, urn)
     except ValueError, e:
         identifier = None
コード例 #2
0
class AnnotationParser(object):
    @classmethod
    def parse(cls, _db, data, patron):
        if patron.synchronize_annotations != True:
            return PATRON_NOT_OPTED_IN_TO_ANNOTATION_SYNC

        try:
            data = json.loads(data)
            if 'id' in data and data['id'] is None:
                del data['id']
            data = jsonld.expand(data)
        except ValueError, e:
            return INVALID_ANNOTATION_FORMAT

        if not data or not len(data) == 1:
            return INVALID_ANNOTATION_TARGET
        data = data[0]

        target = data.get("http://www.w3.org/ns/oa#hasTarget")
        if not target or not len(target) == 1:
            return INVALID_ANNOTATION_TARGET
        target = target[0]

        source = target.get("http://www.w3.org/ns/oa#hasSource")

        if not source or not len(source) == 1:
            return INVALID_ANNOTATION_TARGET
        source = source[0].get('@id')

        try:
            identifier, ignore = Identifier.parse_urn(_db, source)
        except ValueError, e:
            return INVALID_ANNOTATION_TARGET
コード例 #3
0
ファイル: controller.py プロジェクト: rskm1/metadata_wrangler
 def process_urn(self, urn, collection_details=None, **kwargs):
     """Turn a URN into a Work suitable for use in an OPDS feed.
     """
     try:
         identifier, is_new = Identifier.parse_urn(self._db, urn)
     except ValueError, e:
         identifier = None
コード例 #4
0
ファイル: opds.py プロジェクト: datalogics-tsmith/circulation
    def page(cls, _db, title, url, annotator=None,
             use_materialized_works=True):

        """Create a feed of content to preload on devices."""
        configured_content = Configuration.policy(Configuration.PRELOADED_CONTENT)

        identifiers = [Identifier.parse_urn(_db, urn)[0] for urn in configured_content]
        identifier_ids = [identifier.id for identifier in identifiers]

        if use_materialized_works:
            from core.model import MaterializedWork
            q = _db.query(MaterializedWork)
            q = q.filter(MaterializedWork.primary_identifier_id.in_(identifier_ids))

            # Avoid eager loading of objects that are contained in the 
            # materialized view.
            q = q.options(
                lazyload(MaterializedWork.license_pool, LicensePool.data_source),
                lazyload(MaterializedWork.license_pool, LicensePool.identifier),
                lazyload(MaterializedWork.license_pool, LicensePool.edition),
            )
        else:
            q = _db.query(Work).join(Work.primary_edition)
            q = q.filter(Edition.primary_identifier_id.in_(identifier_ids))

        works = q.all()
        feed = cls(_db, title, url, works, annotator)

        annotator.annotate_feed(feed, None)
        content = unicode(feed)
        return content
コード例 #5
0
    def test_run_once(self):
        # Setup authentication and Metadata Wrangler details.
        lp = self._licensepool(
            None, data_source_name=DataSource.BIBLIOTHECA,
            collection=self.collection
        )
        lp.identifier.type = Identifier.BIBLIOTHECA_ID
        isbn = Identifier.parse_urn(self._db, u'urn:isbn:9781594632556')[0]
        lp.identifier.equivalent_to(
            DataSource.lookup(self._db, DataSource.BIBLIOTHECA), isbn, 1
        )
        eq_([], lp.identifier.links)
        eq_([], lp.identifier.measurements)

        # Queue some data to be found.
        responses = (
            'metadata_updates_response.opds',
            'metadata_updates_empty_response.opds',
        )
        for filename in responses:
            data = sample_data(filename, 'opds')
            self.lookup.queue_response(
                200, {'content-type' : OPDSFeed.ACQUISITION_FEED_TYPE}, data
            )

        timestamp = self.ts
        new_timestamp = self.monitor.run_once(timestamp)

        # We have a new value to use for the Monitor's timestamp -- the
        # earliest date seen in the last OPDS feed that contained
        # any entries.
        eq_(datetime.datetime(2016, 9, 20, 19, 37, 2), new_timestamp.finish)
        eq_("Editions processed: 1", new_timestamp.achievements)

        # Normally run_once() doesn't update the monitor's timestamp,
        # but this implementation does, so that work isn't redone if
        # run_once() crashes or the monitor is killed.
        eq_(new_timestamp.finish, self.monitor.timestamp().finish)

        # The original Identifier has information from the
        # mock Metadata Wrangler.
        mw_source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER)
        eq_(3, len(lp.identifier.links))
        [quality] = lp.identifier.measurements
        eq_(mw_source, quality.data_source)

        # Check the URLs we processed.
        url1, url2 = [x[0] for x in self.lookup.requests]

        # The first URL processed was the default one for the
        # MetadataWranglerOPDSLookup.
        eq_(self.lookup.get_collection_url(self.lookup.UPDATES_ENDPOINT), url1)

        # The second URL processed was whatever we saw in the 'next' link.
        eq_("http://next-link/", url2)
コード例 #6
0
    def test_run_once(self):
        # Setup authentication and Metadata Wrangler details.
        lp = self._licensepool(None,
                               data_source_name=DataSource.BIBLIOTHECA,
                               collection=self.collection)
        lp.identifier.type = Identifier.BIBLIOTHECA_ID
        isbn = Identifier.parse_urn(self._db, "urn:isbn:9781594632556")[0]
        lp.identifier.equivalent_to(
            DataSource.lookup(self._db, DataSource.BIBLIOTHECA), isbn, 1)
        assert [] == lp.identifier.links
        assert [] == lp.identifier.measurements

        # Queue some data to be found.
        responses = (
            "metadata_updates_response.opds",
            "metadata_updates_empty_response.opds",
        )
        for filename in responses:
            data = sample_data(filename, "opds")
            self.lookup.queue_response(
                200, {"content-type": OPDSFeed.ACQUISITION_FEED_TYPE}, data)

        timestamp = self.ts
        new_timestamp = self.monitor.run_once(timestamp)

        # We have a new value to use for the Monitor's timestamp -- the
        # earliest date seen in the last OPDS feed that contained
        # any entries.
        assert datetime_utc(2016, 9, 20, 19, 37, 2) == new_timestamp.finish
        assert "Editions processed: 1" == new_timestamp.achievements

        # Normally run_once() doesn't update the monitor's timestamp,
        # but this implementation does, so that work isn't redone if
        # run_once() crashes or the monitor is killed.
        assert new_timestamp.finish == self.monitor.timestamp().finish

        # The original Identifier has information from the
        # mock Metadata Wrangler.
        mw_source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER)
        assert 3 == len(lp.identifier.links)
        [quality] = lp.identifier.measurements
        assert mw_source == quality.data_source

        # Check the URLs we processed.
        url1, url2 = [x[0] for x in self.lookup.requests]

        # The first URL processed was the default one for the
        # MetadataWranglerOPDSLookup.
        assert self.lookup.get_collection_url(
            self.lookup.UPDATES_ENDPOINT) == url1

        # The second URL processed was whatever we saw in the 'next' link.
        assert "http://next-link/" == url2
コード例 #7
0
    def parse_identifier(self, urn):
        """Try to parse a URN into an identifier.

        :return: An Identifier if possible; otherwise None.
        """
        if not urn:
            return None
        try:
            result = Identifier.parse_urn(self._db, urn, False)
        except ValueError, e:
            # The identifier is parseable but invalid, e.g. an
            # ASIN used as an ISBN. Ignore it.
            return None
コード例 #8
0
    def parse_identifier(self, urn):
        """Try to parse a URN into an identifier.

        :return: An Identifier if possible; otherwise None.
        """
        if not urn:
            return None
        try:
            result = Identifier.parse_urn(self._db, urn, False)
        except ValueError, e:
            # The identifier is parseable but invalid, e.g. an
            # ASIN used as an ISBN. Ignore it.
            return None
コード例 #9
0
    def test_run_once(self):
        # Setup authentication and Metadata Wrangler details.
        lp = self._licensepool(None,
                               data_source_name=DataSource.BIBLIOTHECA,
                               collection=self.collection)
        lp.identifier.type = Identifier.BIBLIOTHECA_ID
        isbn = Identifier.parse_urn(self._db, u'urn:isbn:9781594632556')[0]
        lp.identifier.equivalent_to(
            DataSource.lookup(self._db, DataSource.BIBLIOTHECA), isbn, 1)
        eq_([], lp.identifier.links)
        eq_([], lp.identifier.measurements)

        # Queue some data to be found.
        responses = (
            'metadata_updates_response.opds',
            'metadata_updates_empty_response.opds',
        )
        for filename in responses:
            data = sample_data(filename, 'opds')
            self.lookup.queue_response(
                200, {'content-type': OPDSFeed.ACQUISITION_FEED_TYPE}, data)

        new_timestamp = self.monitor.run_once(None, None)

        # We have a new value to use for the Monitor's timestamp -- the
        # earliest date seen in the last OPDS feed that contained
        # any entries.
        eq_(datetime.datetime(2016, 9, 20, 19, 37, 2), new_timestamp)

        # Normally run_once() doesn't update the monitor's timestamp,
        # but this implementation does, so that work isn't redone if
        # run_once() crashes or the monitor is killed.
        eq_(new_timestamp, self.monitor.timestamp().timestamp)

        # The original Identifier has information from the
        # mock Metadata Wrangler.
        mw_source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER)
        eq_(3, len(lp.identifier.links))
        [quality] = lp.identifier.measurements
        eq_(mw_source, quality.data_source)

        # Check the URLs we processed.
        url1, url2 = [x[0] for x in self.lookup.requests]

        # The first URL processed was the default one for the
        # MetadataWranglerOPDSLookup.
        eq_(self.lookup.get_collection_url(self.lookup.UPDATES_ENDPOINT), url1)

        # The second URL processed was whatever we saw in the 'next' link.
        eq_("http://next-link/", url2)
コード例 #10
0
ファイル: controller.py プロジェクト: rskm1/metadata_wrangler
    def add_items(self, collection_details):
        """Adds identifiers to a Collection's catalog"""
        client = authenticated_client_from_request(self._db)
        if isinstance(client, ProblemDetail):
            return client

        collection, ignore = Collection.from_metadata_identifier(
            self._db, collection_details)

        urns = request.args.getlist('urn')
        messages = []
        for urn in urns:
            message = None
            identifier = None
            try:
                identifier, ignore = Identifier.parse_urn(self._db, urn)
            except Exception as e:
                identifier = None

            if not identifier:
                message = OPDSMessage(urn, INVALID_URN.status_code,
                                      INVALID_URN.detail)
            else:
                status = HTTP_OK
                description = "Already in catalog"

                if identifier not in collection.catalog:
                    collection.catalog_identifier(self._db, identifier)
                    status = HTTP_CREATED
                    description = "Successfully added"

                message = OPDSMessage(urn, status, description)

            messages.append(message)

        title = "%s Catalog Item Additions for %s" % (collection.protocol,
                                                      client.url)
        url = cdn_url_for("add",
                          collection_metadata_identifier=collection.name,
                          urn=urns)
        addition_feed = AcquisitionFeed(self._db,
                                        title,
                                        url, [],
                                        VerboseAnnotator,
                                        precomposed_entries=messages)

        return feed_response(addition_feed)
コード例 #11
0
 def handle_import_messages(self, messages_by_id):
     """Turn import messages from the OPDS importer into CoverageFailure
     objects.
     """
     for identifier, message in messages_by_id.items():
         # If the message indicates success but we didn't actually
         # get the data, treat it as a transient error.
         #
         # If the message does not indicate success, create a
         # CoverageRecord with the error so we stop trying this
         # book.
         if not message.success:
             exception = str(message.status_code)
             if message.message:
                 exception += ": %s" % message.message
             transient = message.transient
             identifier_obj, ignore = Identifier.parse_urn(self._db, identifier)
             yield CoverageFailure(self, identifier_obj, exception, transient)
コード例 #12
0
ファイル: controller.py プロジェクト: rskm1/metadata_wrangler
    def remove_items(self, collection_details):
        """Removes identifiers from a Collection's catalog"""
        client = authenticated_client_from_request(self._db)
        if isinstance(client, ProblemDetail):
            return client

        collection, ignore = Collection.from_metadata_identifier(
            self._db, collection_details)

        urns = request.args.getlist('urn')
        messages = []
        for urn in urns:
            message = None
            identifier = None
            try:
                identifier, ignore = Identifier.parse_urn(self._db, urn)
            except Exception as e:
                identifier = None

            if not identifier:
                message = OPDSMessage(urn, INVALID_URN.status_code,
                                      INVALID_URN.detail)
            else:
                if identifier in collection.catalog:
                    collection.catalog.remove(identifier)
                    message = OPDSMessage(urn, HTTP_OK, "Successfully removed")
                else:
                    message = OPDSMessage(urn, HTTP_NOT_FOUND,
                                          "Not in catalog")

            messages.append(message)

        title = "%s Catalog Item Removal for %s" % (collection.protocol,
                                                    client.url)
        url = cdn_url_for("remove",
                          collection_metadata_identifier=collection.name,
                          urn=urns)
        removal_feed = AcquisitionFeed(self._db,
                                       title,
                                       url, [],
                                       VerboseAnnotator,
                                       precomposed_entries=messages)

        return feed_response(removal_feed)
コード例 #13
0
ファイル: controller.py プロジェクト: rskm1/metadata_wrangler
    def canonicalize_author_name(self):
        urn = request.args.get('urn')
        display_name = request.args.get('display_name')
        if urn:
            identifier, is_new = Identifier.parse_urn(self._db, urn, False)
            if not isinstance(identifier, Identifier):
                return INVALID_URN
        else:
            identifier = None

        author_name = self.canonicalizer.canonicalize_author_name(
            identifier, display_name)
        self.log.info(
            "Incoming display name/identifier: %r/%s. Canonicalizer said: %s",
            display_name, identifier, author_name)

        if not author_name:
            return make_response("", HTTP_NOT_FOUND)
        return make_response(author_name, HTTP_OK,
                             {"Content-Type": "text/plain"})
コード例 #14
0
ファイル: opds.py プロジェクト: datalogics-tarar/circulation
    def page(cls,
             _db,
             title,
             url,
             annotator=None,
             use_materialized_works=True):
        """Create a feed of content to preload on devices."""
        configured_content = Configuration.policy(
            Configuration.PRELOADED_CONTENT)

        identifiers = [
            Identifier.parse_urn(_db, urn)[0] for urn in configured_content
        ]
        identifier_ids = [identifier.id for identifier in identifiers]

        if use_materialized_works:
            from core.model import MaterializedWork
            q = _db.query(MaterializedWork)
            q = q.filter(
                MaterializedWork.primary_identifier_id.in_(identifier_ids))

            # Avoid eager loading of objects that are contained in the
            # materialized view.
            q = q.options(
                lazyload(MaterializedWork.license_pool,
                         LicensePool.data_source),
                lazyload(MaterializedWork.license_pool,
                         LicensePool.identifier),
                lazyload(MaterializedWork.license_pool,
                         LicensePool.presentation_edition),
            )
        else:
            q = _db.query(Work).join(Work.presentation_edition)
            q = q.filter(Edition.primary_identifier_id.in_(identifier_ids))

        works = q.all()
        feed = cls(_db, title, url, works, annotator)

        annotator.annotate_feed(feed, None)
        content = unicode(feed)
        return content
コード例 #15
0
    def test_run_once(self):
        # Setup authentication and Metadata Wrangler details.
        self._external_integration(ExternalIntegration.METADATA_WRANGLER,
                                   ExternalIntegration.METADATA_GOAL,
                                   username=u'abc',
                                   password=u'def',
                                   url=self._url)

        # Create an identifier and its equivalent to work with the OPDS
        # feed.
        collection = self._collection(protocol=ExternalIntegration.BIBLIOTHECA,
                                      external_account_id=u'lib')
        lp = self._licensepool(None,
                               data_source_name=DataSource.BIBLIOTHECA,
                               collection=collection)
        lp.identifier.type = Identifier.BIBLIOTHECA_ID
        isbn = Identifier.parse_urn(self._db, u'urn:isbn:9781594632556')[0]
        lp.identifier.equivalent_to(
            DataSource.lookup(self._db, DataSource.BIBLIOTHECA), isbn, 1)
        eq_([], lp.identifier.links)
        eq_([], lp.identifier.measurements)

        # Queue some data to be found.
        data = sample_data('metadata_isbn_response.opds', 'opds')
        lookup = MockMetadataWranglerOPDSLookup.from_config(
            self._db, collection)
        lookup.queue_response(200,
                              {'content-type': OPDSFeed.ACQUISITION_FEED_TYPE},
                              data)

        monitor = MetadataWranglerCollectionUpdateMonitor(
            self._db, collection, lookup)
        monitor.run_once(None, None)

        # The original Identifier has information from the
        # mock Metadata Wrangler.
        mw_source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER)
        eq_(3, len(lp.identifier.links))
        [quality] = lp.identifier.measurements
        eq_(mw_source, quality.data_source)
コード例 #16
0
    def remove_items(self):
        collection = self.authenticated_collection_from_request()
        if isinstance(collection, ProblemDetail):
            return collection

        urns = request.args.getlist('urn')
        messages = []
        for urn in urns:
            message = None
            identifier = None
            try:
                identifier, ignore = Identifier.parse_urn(self._db, urn)
            except Exception as e:
                identifier = None
            if not identifier:
                message = OPDSMessage(
                    urn, INVALID_URN.status_code, INVALID_URN.detail
                )
            else:
                if identifier in collection.catalog:
                    collection.catalog.remove(identifier)
                    message = OPDSMessage(
                        urn, HTTP_OK, "Successfully removed"
                    )
                else:
                    message = OPDSMessage(
                        urn, HTTP_NOT_FOUND, "Not in collection catalog"
                    )
            if message:
                messages.append(message)

        title = "%s Catalog Item Removal" % collection.name
        url = cdn_url_for("remove", urn=urns)
        removal_feed = AcquisitionFeed(
            self._db, title, url, [], VerboseAnnotator,
            precomposed_entries=messages
        )

        return feed_response(removal_feed)
コード例 #17
0
ファイル: coverage.py プロジェクト: datalogics/circulation
    def _process_batch(self, client_method, success_codes, batch):
        results = list()
        id_mapping = self.create_identifier_mapping(batch)
        mapped_batch = id_mapping.keys()

        try:
            response = client_method(mapped_batch)
            self.lookup_client.check_content_type(response)
        except RemoteIntegrationException as e:
            return [
                self.failure(id_mapping[obj], e.debug_message)
                for obj in mapped_batch
            ]

        for message in self.process_feed_response(response, id_mapping):
            try:
                identifier, _new = Identifier.parse_urn(self._db, message.urn)
                mapped_batch.remove(identifier)
            except ValueError as e:
                # For some reason this URN can't be parsed. This
                # shouldn't happen.
                continue

            if message.status_code in success_codes:
                result = id_mapping[identifier]
                results.append(result)
            elif message.status_code == 400:
                # The URN couldn't be recognized. (This shouldn't happen,
                # since if we can parse it here, we can parse it on MW, too.)
                exception = "%s: %s" % (message.status_code, message.message)
                failure = self.failure(identifier, exception)
                results.append(failure)
            else:
                exception = "Unknown OPDSMessage status: %s" % message.status_code
                failure = self.failure(identifier, exception)
                results.append(failure)

        return results
コード例 #18
0
    )
from threem import ThreeMAPI
from overdrive import OverdriveAPI
from axis import Axis360API

from circulation import CirculationAPI
from circulation_exceptions import *

barcode, pin, borrow_urn, hold_urn = sys.argv[1:5]
email = os.environ.get('DEFAULT_NOTIFICATION_EMAIL_ADDRESS', '*****@*****.**')

_db = production_session()
patron, ignore = get_one_or_create(
    _db, Patron, authorization_identifier=barcode)

borrow_identifier = Identifier.parse_urn(_db, borrow_urn, True)[0]
hold_identifier = Identifier.parse_urn(_db, hold_urn, True)[0]
borrow_pool = borrow_identifier.licensed_through
hold_pool = hold_identifier.licensed_through

if any(x.type == Identifier.THREEM_ID for x in [borrow_identifier, hold_identifier]):
    threem = ThreeMAPI(_db)
else:
    threem = None

if any(x.type == Identifier.OVERDRIVE_ID for x in [borrow_identifier, hold_identifier]):
    overdrive = OverdriveAPI(_db)
else:
    overdrive = None

if any(x.type == Identifier.AXIS_360_ID for x in [borrow_identifier, hold_identifier]):
コード例 #19
0
class AnnotationParser(object):
    @classmethod
    def parse(cls, _db, data, patron):
        if patron.synchronize_annotations != True:
            return PATRON_NOT_OPTED_IN_TO_ANNOTATION_SYNC

        try:
            data = json.loads(data)
            data = jsonld.expand(data)
        except ValueError, e:
            return INVALID_ANNOTATION_FORMAT

        if not data or not len(data) == 1:
            return INVALID_ANNOTATION_TARGET
        data = data[0]

        target = data.get("http://www.w3.org/ns/oa#hasTarget")
        if not target or not len(target) == 1:
            return INVALID_ANNOTATION_TARGET
        target = target[0]

        source = target.get("http://www.w3.org/ns/oa#hasSource")

        if not source or not len(source) == 1:
            return INVALID_ANNOTATION_TARGET
        source = source[0].get('@id')

        identifier, ignore = Identifier.parse_urn(_db, source)

        motivation = data.get("http://www.w3.org/ns/oa#motivatedBy")
        if not motivation or not len(motivation) == 1:
            return INVALID_ANNOTATION_MOTIVATION
        motivation = motivation[0].get('@id')
        if motivation not in Annotation.MOTIVATIONS:
            return INVALID_ANNOTATION_MOTIVATION

        loans = patron.loans
        loan_identifiers = [loan.license_pool.identifier for loan in loans]
        if identifier not in loan_identifiers:
            return INVALID_ANNOTATION_TARGET

        content = data.get("http://www.w3.org/ns/oa#hasBody")
        if content and len(content) == 1:
            content = content[0]
        else:
            content = None

        target = json.dumps(target)
        extra_kwargs = {}
        if motivation == Annotation.IDLING:
            # A given book can only have one 'idling' annotation.
            pass
        elif motivation == Annotation.BOOKMARKING:
            # A given book can only have one 'bookmarking' annotation
            # per target.
            extra_kwargs['target'] = target

        annotation, ignore = Annotation.get_one_or_create(
            _db,
            patron=patron,
            identifier=identifier,
            motivation=motivation,
            **extra_kwargs)
        annotation.target = target
        if content:
            annotation.content = json.dumps(content)
        annotation.active = True
        annotation.timestamp = datetime.now()

        return annotation
コード例 #20
0
from overdrive import OverdriveAPI
from threem import ThreeMAPI

from circulation import CirculationAPI
from core.model import Identifier, Patron, get_one_or_create, production_session

barcode, pin, borrow_urn, hold_urn = sys.argv[1:5]
email = os.environ.get("DEFAULT_NOTIFICATION_EMAIL_ADDRESS",
                       "*****@*****.**")

_db = production_session()
patron, ignore = get_one_or_create(_db,
                                   Patron,
                                   authorization_identifier=barcode)

borrow_identifier = Identifier.parse_urn(_db, borrow_urn, True)[0]
hold_identifier = Identifier.parse_urn(_db, hold_urn, True)[0]
borrow_pool = borrow_identifier.licensed_through
hold_pool = hold_identifier.licensed_through

if any(x.type == Identifier.THREEM_ID
       for x in [borrow_identifier, hold_identifier]):
    threem = ThreeMAPI(_db)
else:
    threem = None

if any(x.type == Identifier.OVERDRIVE_ID
       for x in [borrow_identifier, hold_identifier]):
    overdrive = OverdriveAPI(_db)
else:
    overdrive = None