def process_urn(self, urn, collection=None, **kwargs): """Turn a URN into a Work suitable for use in an OPDS feed. """ try: identifier, is_new = Identifier.parse_urn(self._db, urn) except ValueError, e: identifier = None
class AnnotationParser(object): @classmethod def parse(cls, _db, data, patron): if patron.synchronize_annotations != True: return PATRON_NOT_OPTED_IN_TO_ANNOTATION_SYNC try: data = json.loads(data) if 'id' in data and data['id'] is None: del data['id'] data = jsonld.expand(data) except ValueError, e: return INVALID_ANNOTATION_FORMAT if not data or not len(data) == 1: return INVALID_ANNOTATION_TARGET data = data[0] target = data.get("http://www.w3.org/ns/oa#hasTarget") if not target or not len(target) == 1: return INVALID_ANNOTATION_TARGET target = target[0] source = target.get("http://www.w3.org/ns/oa#hasSource") if not source or not len(source) == 1: return INVALID_ANNOTATION_TARGET source = source[0].get('@id') try: identifier, ignore = Identifier.parse_urn(_db, source) except ValueError, e: return INVALID_ANNOTATION_TARGET
def process_urn(self, urn, collection_details=None, **kwargs): """Turn a URN into a Work suitable for use in an OPDS feed. """ try: identifier, is_new = Identifier.parse_urn(self._db, urn) except ValueError, e: identifier = None
def page(cls, _db, title, url, annotator=None, use_materialized_works=True): """Create a feed of content to preload on devices.""" configured_content = Configuration.policy(Configuration.PRELOADED_CONTENT) identifiers = [Identifier.parse_urn(_db, urn)[0] for urn in configured_content] identifier_ids = [identifier.id for identifier in identifiers] if use_materialized_works: from core.model import MaterializedWork q = _db.query(MaterializedWork) q = q.filter(MaterializedWork.primary_identifier_id.in_(identifier_ids)) # Avoid eager loading of objects that are contained in the # materialized view. q = q.options( lazyload(MaterializedWork.license_pool, LicensePool.data_source), lazyload(MaterializedWork.license_pool, LicensePool.identifier), lazyload(MaterializedWork.license_pool, LicensePool.edition), ) else: q = _db.query(Work).join(Work.primary_edition) q = q.filter(Edition.primary_identifier_id.in_(identifier_ids)) works = q.all() feed = cls(_db, title, url, works, annotator) annotator.annotate_feed(feed, None) content = unicode(feed) return content
def test_run_once(self): # Setup authentication and Metadata Wrangler details. lp = self._licensepool( None, data_source_name=DataSource.BIBLIOTHECA, collection=self.collection ) lp.identifier.type = Identifier.BIBLIOTHECA_ID isbn = Identifier.parse_urn(self._db, u'urn:isbn:9781594632556')[0] lp.identifier.equivalent_to( DataSource.lookup(self._db, DataSource.BIBLIOTHECA), isbn, 1 ) eq_([], lp.identifier.links) eq_([], lp.identifier.measurements) # Queue some data to be found. responses = ( 'metadata_updates_response.opds', 'metadata_updates_empty_response.opds', ) for filename in responses: data = sample_data(filename, 'opds') self.lookup.queue_response( 200, {'content-type' : OPDSFeed.ACQUISITION_FEED_TYPE}, data ) timestamp = self.ts new_timestamp = self.monitor.run_once(timestamp) # We have a new value to use for the Monitor's timestamp -- the # earliest date seen in the last OPDS feed that contained # any entries. eq_(datetime.datetime(2016, 9, 20, 19, 37, 2), new_timestamp.finish) eq_("Editions processed: 1", new_timestamp.achievements) # Normally run_once() doesn't update the monitor's timestamp, # but this implementation does, so that work isn't redone if # run_once() crashes or the monitor is killed. eq_(new_timestamp.finish, self.monitor.timestamp().finish) # The original Identifier has information from the # mock Metadata Wrangler. mw_source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER) eq_(3, len(lp.identifier.links)) [quality] = lp.identifier.measurements eq_(mw_source, quality.data_source) # Check the URLs we processed. url1, url2 = [x[0] for x in self.lookup.requests] # The first URL processed was the default one for the # MetadataWranglerOPDSLookup. eq_(self.lookup.get_collection_url(self.lookup.UPDATES_ENDPOINT), url1) # The second URL processed was whatever we saw in the 'next' link. eq_("http://next-link/", url2)
def test_run_once(self): # Setup authentication and Metadata Wrangler details. lp = self._licensepool(None, data_source_name=DataSource.BIBLIOTHECA, collection=self.collection) lp.identifier.type = Identifier.BIBLIOTHECA_ID isbn = Identifier.parse_urn(self._db, "urn:isbn:9781594632556")[0] lp.identifier.equivalent_to( DataSource.lookup(self._db, DataSource.BIBLIOTHECA), isbn, 1) assert [] == lp.identifier.links assert [] == lp.identifier.measurements # Queue some data to be found. responses = ( "metadata_updates_response.opds", "metadata_updates_empty_response.opds", ) for filename in responses: data = sample_data(filename, "opds") self.lookup.queue_response( 200, {"content-type": OPDSFeed.ACQUISITION_FEED_TYPE}, data) timestamp = self.ts new_timestamp = self.monitor.run_once(timestamp) # We have a new value to use for the Monitor's timestamp -- the # earliest date seen in the last OPDS feed that contained # any entries. assert datetime_utc(2016, 9, 20, 19, 37, 2) == new_timestamp.finish assert "Editions processed: 1" == new_timestamp.achievements # Normally run_once() doesn't update the monitor's timestamp, # but this implementation does, so that work isn't redone if # run_once() crashes or the monitor is killed. assert new_timestamp.finish == self.monitor.timestamp().finish # The original Identifier has information from the # mock Metadata Wrangler. mw_source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER) assert 3 == len(lp.identifier.links) [quality] = lp.identifier.measurements assert mw_source == quality.data_source # Check the URLs we processed. url1, url2 = [x[0] for x in self.lookup.requests] # The first URL processed was the default one for the # MetadataWranglerOPDSLookup. assert self.lookup.get_collection_url( self.lookup.UPDATES_ENDPOINT) == url1 # The second URL processed was whatever we saw in the 'next' link. assert "http://next-link/" == url2
def parse_identifier(self, urn): """Try to parse a URN into an identifier. :return: An Identifier if possible; otherwise None. """ if not urn: return None try: result = Identifier.parse_urn(self._db, urn, False) except ValueError, e: # The identifier is parseable but invalid, e.g. an # ASIN used as an ISBN. Ignore it. return None
def parse_identifier(self, urn): """Try to parse a URN into an identifier. :return: An Identifier if possible; otherwise None. """ if not urn: return None try: result = Identifier.parse_urn(self._db, urn, False) except ValueError, e: # The identifier is parseable but invalid, e.g. an # ASIN used as an ISBN. Ignore it. return None
def test_run_once(self): # Setup authentication and Metadata Wrangler details. lp = self._licensepool(None, data_source_name=DataSource.BIBLIOTHECA, collection=self.collection) lp.identifier.type = Identifier.BIBLIOTHECA_ID isbn = Identifier.parse_urn(self._db, u'urn:isbn:9781594632556')[0] lp.identifier.equivalent_to( DataSource.lookup(self._db, DataSource.BIBLIOTHECA), isbn, 1) eq_([], lp.identifier.links) eq_([], lp.identifier.measurements) # Queue some data to be found. responses = ( 'metadata_updates_response.opds', 'metadata_updates_empty_response.opds', ) for filename in responses: data = sample_data(filename, 'opds') self.lookup.queue_response( 200, {'content-type': OPDSFeed.ACQUISITION_FEED_TYPE}, data) new_timestamp = self.monitor.run_once(None, None) # We have a new value to use for the Monitor's timestamp -- the # earliest date seen in the last OPDS feed that contained # any entries. eq_(datetime.datetime(2016, 9, 20, 19, 37, 2), new_timestamp) # Normally run_once() doesn't update the monitor's timestamp, # but this implementation does, so that work isn't redone if # run_once() crashes or the monitor is killed. eq_(new_timestamp, self.monitor.timestamp().timestamp) # The original Identifier has information from the # mock Metadata Wrangler. mw_source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER) eq_(3, len(lp.identifier.links)) [quality] = lp.identifier.measurements eq_(mw_source, quality.data_source) # Check the URLs we processed. url1, url2 = [x[0] for x in self.lookup.requests] # The first URL processed was the default one for the # MetadataWranglerOPDSLookup. eq_(self.lookup.get_collection_url(self.lookup.UPDATES_ENDPOINT), url1) # The second URL processed was whatever we saw in the 'next' link. eq_("http://next-link/", url2)
def add_items(self, collection_details): """Adds identifiers to a Collection's catalog""" client = authenticated_client_from_request(self._db) if isinstance(client, ProblemDetail): return client collection, ignore = Collection.from_metadata_identifier( self._db, collection_details) urns = request.args.getlist('urn') messages = [] for urn in urns: message = None identifier = None try: identifier, ignore = Identifier.parse_urn(self._db, urn) except Exception as e: identifier = None if not identifier: message = OPDSMessage(urn, INVALID_URN.status_code, INVALID_URN.detail) else: status = HTTP_OK description = "Already in catalog" if identifier not in collection.catalog: collection.catalog_identifier(self._db, identifier) status = HTTP_CREATED description = "Successfully added" message = OPDSMessage(urn, status, description) messages.append(message) title = "%s Catalog Item Additions for %s" % (collection.protocol, client.url) url = cdn_url_for("add", collection_metadata_identifier=collection.name, urn=urns) addition_feed = AcquisitionFeed(self._db, title, url, [], VerboseAnnotator, precomposed_entries=messages) return feed_response(addition_feed)
def handle_import_messages(self, messages_by_id): """Turn import messages from the OPDS importer into CoverageFailure objects. """ for identifier, message in messages_by_id.items(): # If the message indicates success but we didn't actually # get the data, treat it as a transient error. # # If the message does not indicate success, create a # CoverageRecord with the error so we stop trying this # book. if not message.success: exception = str(message.status_code) if message.message: exception += ": %s" % message.message transient = message.transient identifier_obj, ignore = Identifier.parse_urn(self._db, identifier) yield CoverageFailure(self, identifier_obj, exception, transient)
def remove_items(self, collection_details): """Removes identifiers from a Collection's catalog""" client = authenticated_client_from_request(self._db) if isinstance(client, ProblemDetail): return client collection, ignore = Collection.from_metadata_identifier( self._db, collection_details) urns = request.args.getlist('urn') messages = [] for urn in urns: message = None identifier = None try: identifier, ignore = Identifier.parse_urn(self._db, urn) except Exception as e: identifier = None if not identifier: message = OPDSMessage(urn, INVALID_URN.status_code, INVALID_URN.detail) else: if identifier in collection.catalog: collection.catalog.remove(identifier) message = OPDSMessage(urn, HTTP_OK, "Successfully removed") else: message = OPDSMessage(urn, HTTP_NOT_FOUND, "Not in catalog") messages.append(message) title = "%s Catalog Item Removal for %s" % (collection.protocol, client.url) url = cdn_url_for("remove", collection_metadata_identifier=collection.name, urn=urns) removal_feed = AcquisitionFeed(self._db, title, url, [], VerboseAnnotator, precomposed_entries=messages) return feed_response(removal_feed)
def canonicalize_author_name(self): urn = request.args.get('urn') display_name = request.args.get('display_name') if urn: identifier, is_new = Identifier.parse_urn(self._db, urn, False) if not isinstance(identifier, Identifier): return INVALID_URN else: identifier = None author_name = self.canonicalizer.canonicalize_author_name( identifier, display_name) self.log.info( "Incoming display name/identifier: %r/%s. Canonicalizer said: %s", display_name, identifier, author_name) if not author_name: return make_response("", HTTP_NOT_FOUND) return make_response(author_name, HTTP_OK, {"Content-Type": "text/plain"})
def page(cls, _db, title, url, annotator=None, use_materialized_works=True): """Create a feed of content to preload on devices.""" configured_content = Configuration.policy( Configuration.PRELOADED_CONTENT) identifiers = [ Identifier.parse_urn(_db, urn)[0] for urn in configured_content ] identifier_ids = [identifier.id for identifier in identifiers] if use_materialized_works: from core.model import MaterializedWork q = _db.query(MaterializedWork) q = q.filter( MaterializedWork.primary_identifier_id.in_(identifier_ids)) # Avoid eager loading of objects that are contained in the # materialized view. q = q.options( lazyload(MaterializedWork.license_pool, LicensePool.data_source), lazyload(MaterializedWork.license_pool, LicensePool.identifier), lazyload(MaterializedWork.license_pool, LicensePool.presentation_edition), ) else: q = _db.query(Work).join(Work.presentation_edition) q = q.filter(Edition.primary_identifier_id.in_(identifier_ids)) works = q.all() feed = cls(_db, title, url, works, annotator) annotator.annotate_feed(feed, None) content = unicode(feed) return content
def test_run_once(self): # Setup authentication and Metadata Wrangler details. self._external_integration(ExternalIntegration.METADATA_WRANGLER, ExternalIntegration.METADATA_GOAL, username=u'abc', password=u'def', url=self._url) # Create an identifier and its equivalent to work with the OPDS # feed. collection = self._collection(protocol=ExternalIntegration.BIBLIOTHECA, external_account_id=u'lib') lp = self._licensepool(None, data_source_name=DataSource.BIBLIOTHECA, collection=collection) lp.identifier.type = Identifier.BIBLIOTHECA_ID isbn = Identifier.parse_urn(self._db, u'urn:isbn:9781594632556')[0] lp.identifier.equivalent_to( DataSource.lookup(self._db, DataSource.BIBLIOTHECA), isbn, 1) eq_([], lp.identifier.links) eq_([], lp.identifier.measurements) # Queue some data to be found. data = sample_data('metadata_isbn_response.opds', 'opds') lookup = MockMetadataWranglerOPDSLookup.from_config( self._db, collection) lookup.queue_response(200, {'content-type': OPDSFeed.ACQUISITION_FEED_TYPE}, data) monitor = MetadataWranglerCollectionUpdateMonitor( self._db, collection, lookup) monitor.run_once(None, None) # The original Identifier has information from the # mock Metadata Wrangler. mw_source = DataSource.lookup(self._db, DataSource.METADATA_WRANGLER) eq_(3, len(lp.identifier.links)) [quality] = lp.identifier.measurements eq_(mw_source, quality.data_source)
def remove_items(self): collection = self.authenticated_collection_from_request() if isinstance(collection, ProblemDetail): return collection urns = request.args.getlist('urn') messages = [] for urn in urns: message = None identifier = None try: identifier, ignore = Identifier.parse_urn(self._db, urn) except Exception as e: identifier = None if not identifier: message = OPDSMessage( urn, INVALID_URN.status_code, INVALID_URN.detail ) else: if identifier in collection.catalog: collection.catalog.remove(identifier) message = OPDSMessage( urn, HTTP_OK, "Successfully removed" ) else: message = OPDSMessage( urn, HTTP_NOT_FOUND, "Not in collection catalog" ) if message: messages.append(message) title = "%s Catalog Item Removal" % collection.name url = cdn_url_for("remove", urn=urns) removal_feed = AcquisitionFeed( self._db, title, url, [], VerboseAnnotator, precomposed_entries=messages ) return feed_response(removal_feed)
def _process_batch(self, client_method, success_codes, batch): results = list() id_mapping = self.create_identifier_mapping(batch) mapped_batch = id_mapping.keys() try: response = client_method(mapped_batch) self.lookup_client.check_content_type(response) except RemoteIntegrationException as e: return [ self.failure(id_mapping[obj], e.debug_message) for obj in mapped_batch ] for message in self.process_feed_response(response, id_mapping): try: identifier, _new = Identifier.parse_urn(self._db, message.urn) mapped_batch.remove(identifier) except ValueError as e: # For some reason this URN can't be parsed. This # shouldn't happen. continue if message.status_code in success_codes: result = id_mapping[identifier] results.append(result) elif message.status_code == 400: # The URN couldn't be recognized. (This shouldn't happen, # since if we can parse it here, we can parse it on MW, too.) exception = "%s: %s" % (message.status_code, message.message) failure = self.failure(identifier, exception) results.append(failure) else: exception = "Unknown OPDSMessage status: %s" % message.status_code failure = self.failure(identifier, exception) results.append(failure) return results
) from threem import ThreeMAPI from overdrive import OverdriveAPI from axis import Axis360API from circulation import CirculationAPI from circulation_exceptions import * barcode, pin, borrow_urn, hold_urn = sys.argv[1:5] email = os.environ.get('DEFAULT_NOTIFICATION_EMAIL_ADDRESS', '*****@*****.**') _db = production_session() patron, ignore = get_one_or_create( _db, Patron, authorization_identifier=barcode) borrow_identifier = Identifier.parse_urn(_db, borrow_urn, True)[0] hold_identifier = Identifier.parse_urn(_db, hold_urn, True)[0] borrow_pool = borrow_identifier.licensed_through hold_pool = hold_identifier.licensed_through if any(x.type == Identifier.THREEM_ID for x in [borrow_identifier, hold_identifier]): threem = ThreeMAPI(_db) else: threem = None if any(x.type == Identifier.OVERDRIVE_ID for x in [borrow_identifier, hold_identifier]): overdrive = OverdriveAPI(_db) else: overdrive = None if any(x.type == Identifier.AXIS_360_ID for x in [borrow_identifier, hold_identifier]):
class AnnotationParser(object): @classmethod def parse(cls, _db, data, patron): if patron.synchronize_annotations != True: return PATRON_NOT_OPTED_IN_TO_ANNOTATION_SYNC try: data = json.loads(data) data = jsonld.expand(data) except ValueError, e: return INVALID_ANNOTATION_FORMAT if not data or not len(data) == 1: return INVALID_ANNOTATION_TARGET data = data[0] target = data.get("http://www.w3.org/ns/oa#hasTarget") if not target or not len(target) == 1: return INVALID_ANNOTATION_TARGET target = target[0] source = target.get("http://www.w3.org/ns/oa#hasSource") if not source or not len(source) == 1: return INVALID_ANNOTATION_TARGET source = source[0].get('@id') identifier, ignore = Identifier.parse_urn(_db, source) motivation = data.get("http://www.w3.org/ns/oa#motivatedBy") if not motivation or not len(motivation) == 1: return INVALID_ANNOTATION_MOTIVATION motivation = motivation[0].get('@id') if motivation not in Annotation.MOTIVATIONS: return INVALID_ANNOTATION_MOTIVATION loans = patron.loans loan_identifiers = [loan.license_pool.identifier for loan in loans] if identifier not in loan_identifiers: return INVALID_ANNOTATION_TARGET content = data.get("http://www.w3.org/ns/oa#hasBody") if content and len(content) == 1: content = content[0] else: content = None target = json.dumps(target) extra_kwargs = {} if motivation == Annotation.IDLING: # A given book can only have one 'idling' annotation. pass elif motivation == Annotation.BOOKMARKING: # A given book can only have one 'bookmarking' annotation # per target. extra_kwargs['target'] = target annotation, ignore = Annotation.get_one_or_create( _db, patron=patron, identifier=identifier, motivation=motivation, **extra_kwargs) annotation.target = target if content: annotation.content = json.dumps(content) annotation.active = True annotation.timestamp = datetime.now() return annotation
from overdrive import OverdriveAPI from threem import ThreeMAPI from circulation import CirculationAPI from core.model import Identifier, Patron, get_one_or_create, production_session barcode, pin, borrow_urn, hold_urn = sys.argv[1:5] email = os.environ.get("DEFAULT_NOTIFICATION_EMAIL_ADDRESS", "*****@*****.**") _db = production_session() patron, ignore = get_one_or_create(_db, Patron, authorization_identifier=barcode) borrow_identifier = Identifier.parse_urn(_db, borrow_urn, True)[0] hold_identifier = Identifier.parse_urn(_db, hold_urn, True)[0] borrow_pool = borrow_identifier.licensed_through hold_pool = hold_identifier.licensed_through if any(x.type == Identifier.THREEM_ID for x in [borrow_identifier, hold_identifier]): threem = ThreeMAPI(_db) else: threem = None if any(x.type == Identifier.OVERDRIVE_ID for x in [borrow_identifier, hold_identifier]): overdrive = OverdriveAPI(_db) else: overdrive = None