def metadata_needed_for(self, collection_details): """Returns identifiers in the collection that could benefit from distributor metadata on the circulation manager. """ client = authenticated_client_from_request(self._db) if isinstance(client, ProblemDetail): return client collection = collection_from_details( self._db, client, collection_details ) resolver = IdentifierResolutionCoverageProvider unresolved_identifiers = collection.unresolved_catalog( self._db, resolver.DATA_SOURCE_NAME, resolver.OPERATION ) # Omit identifiers that currently have metadata pending for # the IntegrationClientCoverImageCoverageProvider. data_source = DataSource.lookup( self._db, collection.name, autocreate=True ) is_awaiting_metadata = self._db.query( CoverageRecord.id, CoverageRecord.identifier_id ).filter( CoverageRecord.data_source_id==data_source.id, CoverageRecord.status==CoverageRecord.REGISTERED, CoverageRecord.operation==IntegrationClientCoverImageCoverageProvider.OPERATION, ).subquery() unresolved_identifiers = unresolved_identifiers.outerjoin( is_awaiting_metadata, Identifier.id==is_awaiting_metadata.c.identifier_id ).filter(is_awaiting_metadata.c.id==None) # Add a message for each unresolved identifier pagination = load_pagination_from_request(default_size=25) feed_identifiers = pagination.apply(unresolved_identifiers).all() messages = list() for identifier in feed_identifiers: messages.append(OPDSMessage( identifier.urn, HTTP_ACCEPTED, "Metadata needed." )) title = "%s Metadata Requests for %s" % (collection.protocol, client.url) metadata_request_url = self.collection_feed_url( 'metadata_needed_for', collection ) request_feed = AcquisitionFeed( self._db, title, metadata_request_url, [], VerboseAnnotator, precomposed_entries=messages ) self.add_pagination_links_to_feed( pagination, unresolved_identifiers, request_feed, 'metadata_needed_for', collection ) return feed_response(request_feed)
def metadata_needed_for(self, collection_details): """Returns identifiers in the collection that could benefit from distributor metadata on the circulation manager. """ client = authenticated_client_from_request(self._db) if isinstance(client, ProblemDetail): return client collection = collection_from_details(self._db, client, collection_details) resolver = IdentifierResolutionCoverageProvider unresolved_identifiers = collection.unresolved_catalog( self._db, resolver.DATA_SOURCE_NAME, resolver.OPERATION) # Omit identifiers that currently have metadata pending for # the IntegrationClientCoverImageCoverageProvider. data_source = DataSource.lookup(self._db, collection.name, autocreate=True) is_awaiting_metadata = self._db.query( CoverageRecord.id, CoverageRecord.identifier_id).filter( CoverageRecord.data_source_id == data_source.id, CoverageRecord.status == CoverageRecord.REGISTERED, CoverageRecord.operation == IntegrationClientCoverImageCoverageProvider.OPERATION, ).subquery() unresolved_identifiers = unresolved_identifiers.outerjoin( is_awaiting_metadata, Identifier.id == is_awaiting_metadata.c.identifier_id).filter( is_awaiting_metadata.c.id == None) # Add a message for each unresolved identifier pagination = load_pagination_from_request(default_size=25) feed_identifiers = pagination.apply(unresolved_identifiers).all() messages = list() for identifier in feed_identifiers: messages.append( OPDSMessage(identifier.urn, HTTP_ACCEPTED, "Metadata needed.")) title = "%s Metadata Requests for %s" % (collection.protocol, client.url) metadata_request_url = self.collection_feed_url( 'metadata_needed_for', collection) request_feed = AcquisitionFeed(self._db, title, metadata_request_url, [], VerboseAnnotator, precomposed_entries=messages) self.add_pagination_links_to_feed(pagination, unresolved_identifiers, request_feed, 'metadata_needed_for', collection) return feed_response(request_feed)
def remove_items(self, collection_details): """Removes identifiers from a Collection's catalog""" client = authenticated_client_from_request(self._db) if isinstance(client, ProblemDetail): return client collection = collection_from_details( self._db, client, collection_details ) urns = request.args.getlist('urn') messages = [] identifiers_by_urn, failures = Identifier.parse_urns(self._db, urns) for urn in failures: message = OPDSMessage( urn, INVALID_URN.status_code, INVALID_URN.detail ) messages.append(message) # Find the IDs of the subset of provided identifiers that are # in the catalog, so we know which ones to delete and give a # 200 message. Also get a SQLAlchemy clause that selects only # those IDs. matching_ids, identifier_match_clause = self._in_catalog_subset( collection, identifiers_by_urn ) # Use that clause to delete all of the relevant catalog # entries. delete_stmt = collections_identifiers.delete().where( identifier_match_clause ) self._db.execute(delete_stmt) # IDs that matched get a 200 message; all others get a 404 # message. for urn, identifier in identifiers_by_urn.items(): if identifier.id in matching_ids: status = HTTP_OK description = "Successfully removed" else: status = HTTP_NOT_FOUND description = "Not in catalog" message = OPDSMessage(urn, status, description) messages.append(message) title = "%s Catalog Item Removal for %s" % (collection.protocol, client.url) url = self.collection_feed_url("remove", collection, urn=urns) removal_feed = AcquisitionFeed( self._db, title, url, [], VerboseAnnotator, precomposed_entries=messages ) return feed_response(removal_feed)
def remove_items(self, collection_details): """Removes identifiers from a Collection's catalog""" client = authenticated_client_from_request(self._db) if isinstance(client, ProblemDetail): return client collection = collection_from_details(self._db, client, collection_details) urns = request.args.getlist('urn') messages = [] identifiers_by_urn, failures = Identifier.parse_urns(self._db, urns) for urn in failures: message = OPDSMessage(urn, INVALID_URN.status_code, INVALID_URN.detail) messages.append(message) # Find the IDs of the subset of provided identifiers that are # in the catalog, so we know which ones to delete and give a # 200 message. Also get a SQLAlchemy clause that selects only # those IDs. matching_ids, identifier_match_clause = self._in_catalog_subset( collection, identifiers_by_urn) # Use that clause to delete all of the relevant catalog # entries. delete_stmt = collections_identifiers.delete().where( identifier_match_clause) self._db.execute(delete_stmt) # IDs that matched get a 200 message; all others get a 404 # message. for urn, identifier in identifiers_by_urn.items(): if identifier.id in matching_ids: status = HTTP_OK description = "Successfully removed" else: status = HTTP_NOT_FOUND description = "Not in catalog" message = OPDSMessage(urn, status, description) messages.append(message) title = "%s Catalog Item Removal for %s" % (collection.protocol, client.url) url = self.collection_feed_url("remove", collection, urn=urns) removal_feed = AcquisitionFeed(self._db, title, url, [], VerboseAnnotator, precomposed_entries=messages) return feed_response(removal_feed)
def suppressed(self): this_url = self.url_for('suppressed') annotator = AdminAnnotator(self.circulation) pagination = load_pagination_from_request() if isinstance(pagination, ProblemDetail): return pagination opds_feed = AdminFeed.suppressed( _db=self._db, title="Hidden Books", url=this_url, annotator=annotator, pagination=pagination ) return feed_response(opds_feed)
def complaints(self): this_url = self.url_for('complaints') annotator = AdminAnnotator(self.circulation) pagination = load_pagination_from_request() if isinstance(pagination, ProblemDetail): return pagination opds_feed = AdminFeed.complaints( _db=self._db, title="Complaints", url=this_url, annotator=annotator, pagination=pagination ) return feed_response(opds_feed)
def add_items(self, collection_details): """Adds identifiers to a Collection's catalog""" client = authenticated_client_from_request(self._db) if isinstance(client, ProblemDetail): return client collection = collection_from_details( self._db, client, collection_details ) urns = request.args.getlist('urn') messages = [] identifiers_by_urn, failures = Identifier.parse_urns(self._db, urns) for urn in failures: message = OPDSMessage( urn, INVALID_URN.status_code, INVALID_URN.detail ) messages.append(message) # Find the subset of incoming identifiers that are already # in the catalog. already_in_catalog, ignore = self._in_catalog_subset( collection, identifiers_by_urn ) # Everything else needs to be added to the catalog. needs_to_be_added = [ x for x in identifiers_by_urn.values() if x.id not in already_in_catalog ] collection.catalog_identifiers(needs_to_be_added) for urn, identifier in identifiers_by_urn.items(): if identifier.id in already_in_catalog: status = HTTP_OK description = "Already in catalog" else: status = HTTP_CREATED description = "Successfully added" messages.append(OPDSMessage(urn, status, description)) title = "%s Catalog Item Additions for %s" % (collection.protocol, client.url) url = self.collection_feed_url('add', collection, urn=urns) addition_feed = AcquisitionFeed( self._db, title, url, [], VerboseAnnotator, precomposed_entries=messages ) return feed_response(addition_feed)
def add_items(self, collection_details): """Adds identifiers to a Collection's catalog""" client = authenticated_client_from_request(self._db) if isinstance(client, ProblemDetail): return client collection = collection_from_details(self._db, client, collection_details) urns = request.args.getlist('urn') messages = [] identifiers_by_urn, failures = Identifier.parse_urns(self._db, urns) for urn in failures: message = OPDSMessage(urn, INVALID_URN.status_code, INVALID_URN.detail) messages.append(message) # Find the subset of incoming identifiers that are already # in the catalog. already_in_catalog, ignore = self._in_catalog_subset( collection, identifiers_by_urn) # Everything else needs to be added to the catalog. needs_to_be_added = [ x for x in identifiers_by_urn.values() if x.id not in already_in_catalog ] collection.catalog_identifiers(needs_to_be_added) for urn, identifier in identifiers_by_urn.items(): if identifier.id in already_in_catalog: status = HTTP_OK description = "Already in catalog" else: status = HTTP_CREATED description = "Successfully added" messages.append(OPDSMessage(urn, status, description)) title = "%s Catalog Item Additions for %s" % (collection.protocol, client.url) url = self.collection_feed_url('add', collection, urn=urns) addition_feed = AcquisitionFeed(self._db, title, url, [], VerboseAnnotator, precomposed_entries=messages) return feed_response(addition_feed)
def add_items(self, collection_details): """Adds identifiers to a Collection's catalog""" client = authenticated_client_from_request(self._db) if isinstance(client, ProblemDetail): return client collection, ignore = Collection.from_metadata_identifier( self._db, collection_details) urns = request.args.getlist('urn') messages = [] for urn in urns: message = None identifier = None try: identifier, ignore = Identifier.parse_urn(self._db, urn) except Exception as e: identifier = None if not identifier: message = OPDSMessage(urn, INVALID_URN.status_code, INVALID_URN.detail) else: status = HTTP_OK description = "Already in catalog" if identifier not in collection.catalog: collection.catalog_identifier(self._db, identifier) status = HTTP_CREATED description = "Successfully added" message = OPDSMessage(urn, status, description) messages.append(message) title = "%s Catalog Item Additions for %s" % (collection.protocol, client.url) url = cdn_url_for("add", collection_metadata_identifier=collection.name, urn=urns) addition_feed = AcquisitionFeed(self._db, title, url, [], VerboseAnnotator, precomposed_entries=messages) return feed_response(addition_feed)
def remove_items(self, collection_details): """Removes identifiers from a Collection's catalog""" client = authenticated_client_from_request(self._db) if isinstance(client, ProblemDetail): return client collection, ignore = Collection.from_metadata_identifier( self._db, collection_details) urns = request.args.getlist('urn') messages = [] for urn in urns: message = None identifier = None try: identifier, ignore = Identifier.parse_urn(self._db, urn) except Exception as e: identifier = None if not identifier: message = OPDSMessage(urn, INVALID_URN.status_code, INVALID_URN.detail) else: if identifier in collection.catalog: collection.catalog.remove(identifier) message = OPDSMessage(urn, HTTP_OK, "Successfully removed") else: message = OPDSMessage(urn, HTTP_NOT_FOUND, "Not in catalog") messages.append(message) title = "%s Catalog Item Removal for %s" % (collection.protocol, client.url) url = cdn_url_for("remove", collection_metadata_identifier=collection.name, urn=urns) removal_feed = AcquisitionFeed(self._db, title, url, [], VerboseAnnotator, precomposed_entries=messages) return feed_response(removal_feed)
def updates_feed(self): collection = self.authenticated_collection_from_request() if isinstance(collection, ProblemDetail): return collection last_update_time = request.args.get('last_update_time', None) if last_update_time: last_update_time = datetime.strptime(last_update_time, "%Y-%m-%dT%H:%M:%SZ") updated_works = collection.works_updated_since(self._db, last_update_time) pagination = load_pagination_from_request() works = pagination.apply(updated_works).all() title = "%s Updates" % collection.name def update_url(time=last_update_time, page=None): kw = dict(_external=True) if time: kw.update({'last_update_time' : last_update_time}) if page: kw.update(page.items()) return cdn_url_for("updates", **kw) update_feed = AcquisitionFeed( self._db, title, update_url(), works, VerboseAnnotator ) if len(updated_works.all()) > pagination.size + pagination.offset: update_feed.add_link_to_feed( update_feed.feed, rel="next", href=update_url(page=pagination.next_page) ) if pagination.offset > 0: update_feed.add_link_to_feed( update_feed.feed, rel="first", href=update_url(page=pagination.first_page) ) previous_page = pagination.previous_page if previous_page: update_feed.add_link_to_feed( update_feed.feed, rel="previous", href=update_url(page=previous_page) ) return feed_response(update_feed)
def remove_items(self): collection = self.authenticated_collection_from_request() if isinstance(collection, ProblemDetail): return collection urns = request.args.getlist('urn') messages = [] for urn in urns: message = None identifier = None try: identifier, ignore = Identifier.parse_urn(self._db, urn) except Exception as e: identifier = None if not identifier: message = OPDSMessage( urn, INVALID_URN.status_code, INVALID_URN.detail ) else: if identifier in collection.catalog: collection.catalog.remove(identifier) message = OPDSMessage( urn, HTTP_OK, "Successfully removed" ) else: message = OPDSMessage( urn, HTTP_NOT_FOUND, "Not in collection catalog" ) if message: messages.append(message) title = "%s Catalog Item Removal" % collection.name url = cdn_url_for("remove", urn=urns) removal_feed = AcquisitionFeed( self._db, title, url, [], VerboseAnnotator, precomposed_entries=messages ) return feed_response(removal_feed)
def updates_feed(self, collection_details): client = authenticated_client_from_request(self._db) if isinstance(client, ProblemDetail): return client collection, ignore = Collection.from_metadata_identifier( self._db, collection_details) last_update_time = request.args.get('last_update_time', None) if last_update_time: last_update_time = datetime.strptime(last_update_time, "%Y-%m-%dT%H:%M:%SZ") updated_works = collection.works_updated_since(self._db, last_update_time) pagination = load_pagination_from_request() works = pagination.apply(updated_works).all() title = "%s Collection Updates for %s" % (collection.protocol, client.url) def update_url(time=last_update_time, page=None): kw = dict(_external=True, collection_metadata_identifier=collection_details) if time: kw.update({'last_update_time': last_update_time}) if page: kw.update(page.items()) return cdn_url_for("updates", **kw) entries = [] for work in works[:]: entry = work.verbose_opds_entry or work.simple_opds_entry entry = etree.fromstring(entry) if entry: entries.append(entry) works.remove(work) works = [(work.identifier, work) for work in works] update_feed = LookupAcquisitionFeed(self._db, title, update_url(), works, VerboseAnnotator, precomposed_entries=entries) if len(updated_works.all()) > pagination.size + pagination.offset: update_feed.add_link_to_feed( update_feed.feed, rel="next", href=update_url(page=pagination.next_page)) if pagination.offset > 0: update_feed.add_link_to_feed( update_feed.feed, rel="first", href=update_url(page=pagination.first_page)) previous_page = pagination.previous_page if previous_page: update_feed.add_link_to_feed(update_feed.feed, rel="previous", href=update_url(page=previous_page)) return feed_response(update_feed)
def add_with_metadata(self, collection_details): """Adds identifiers with their metadata to a Collection's catalog""" client = authenticated_client_from_request(self._db) if isinstance(client, ProblemDetail): return client collection = collection_from_details( self._db, client, collection_details ) data_source = DataSource.lookup( self._db, collection.name, autocreate=True ) messages = [] feed = feedparser.parse(request.data) entries = feed.get("entries", []) entries_by_urn = { entry.get('id') : entry for entry in entries } identifiers_by_urn, invalid_urns = Identifier.parse_urns( self._db, entries_by_urn.keys() ) messages = list() for urn in invalid_urns: messages.append(OPDSMessage( urn, INVALID_URN.status_code, INVALID_URN.detail )) for urn, identifier in identifiers_by_urn.items(): entry = entries_by_urn[urn] status = HTTP_OK description = "Already in catalog" if identifier not in collection.catalog: collection.catalog_identifier(identifier) status = HTTP_CREATED description = "Successfully added" message = OPDSMessage(urn, status, description) # Get a cover if it exists. image_types = set([Hyperlink.IMAGE, Hyperlink.THUMBNAIL_IMAGE]) images = [l for l in entry.get("links", []) if l.get("rel") in image_types] links = [LinkData(image.get("rel"), image.get("href")) for image in images] # Create an edition to hold the title and author. LicensePool.calculate_work # refuses to create a Work when there's no title, and if we have a title, author # and language we can attempt to look up the edition in OCLC. title = entry.get("title") or "Unknown Title" author = ContributorData( sort_name=(entry.get("author") or Edition.UNKNOWN_AUTHOR), roles=[Contributor.PRIMARY_AUTHOR_ROLE] ) language = entry.get("dcterms_language") presentation = PresentationCalculationPolicy( choose_edition=False, set_edition_metadata=False, classify=False, choose_summary=False, calculate_quality=False, choose_cover=False, regenerate_opds_entries=False, ) replace = ReplacementPolicy(presentation_calculation_policy=presentation) metadata = Metadata( data_source, primary_identifier=IdentifierData(identifier.type, identifier.identifier), title=title, language=language, contributors=[author], links=links, ) edition, ignore = metadata.edition(self._db) metadata.apply(edition, collection, replace=replace) messages.append(message) title = "%s Catalog Item Additions for %s" % (collection.protocol, client.url) url = self.collection_feed_url("add_with_metadata", collection) addition_feed = AcquisitionFeed( self._db, title, url, [], VerboseAnnotator, precomposed_entries=messages ) return feed_response(addition_feed)
def add_with_metadata(self, collection_details): """Adds identifiers with their metadata to a Collection's catalog""" client = authenticated_client_from_request(self._db) if isinstance(client, ProblemDetail): return client collection = collection_from_details(self._db, client, collection_details) data_source = DataSource.lookup(self._db, collection.name, autocreate=True) messages = [] feed = feedparser.parse(request.data) entries = feed.get("entries", []) entries_by_urn = {entry.get('id'): entry for entry in entries} identifiers_by_urn, invalid_urns = Identifier.parse_urns( self._db, entries_by_urn.keys()) messages = list() for urn in invalid_urns: messages.append( OPDSMessage(urn, INVALID_URN.status_code, INVALID_URN.detail)) for urn, identifier in identifiers_by_urn.items(): entry = entries_by_urn[urn] status = HTTP_OK description = "Already in catalog" if identifier not in collection.catalog: collection.catalog_identifier(identifier) status = HTTP_CREATED description = "Successfully added" message = OPDSMessage(urn, status, description) # Get a cover if it exists. image_types = set([Hyperlink.IMAGE, Hyperlink.THUMBNAIL_IMAGE]) images = [ l for l in entry.get("links", []) if l.get("rel") in image_types ] links = [ LinkData(image.get("rel"), image.get("href")) for image in images ] # Create an edition to hold the title and author. LicensePool.calculate_work # refuses to create a Work when there's no title, and if we have a title, author # and language we can attempt to look up the edition in OCLC. title = entry.get("title") or "Unknown Title" author = ContributorData(sort_name=(entry.get("author") or Edition.UNKNOWN_AUTHOR), roles=[Contributor.PRIMARY_AUTHOR_ROLE]) language = entry.get("dcterms_language") presentation = PresentationCalculationPolicy( choose_edition=False, set_edition_metadata=False, classify=False, choose_summary=False, calculate_quality=False, choose_cover=False, regenerate_opds_entries=False, ) replace = ReplacementPolicy( presentation_calculation_policy=presentation) metadata = Metadata( data_source, primary_identifier=IdentifierData(identifier.type, identifier.identifier), title=title, language=language, contributors=[author], links=links, ) edition, ignore = metadata.edition(self._db) metadata.apply(edition, collection, replace=replace) messages.append(message) title = "%s Catalog Item Additions for %s" % (collection.protocol, client.url) url = self.collection_feed_url("add_with_metadata", collection) addition_feed = AcquisitionFeed(self._db, title, url, [], VerboseAnnotator, precomposed_entries=messages) return feed_response(addition_feed)
class CatalogController(object): """A controller to manage a Collection's catalog""" log = logging.getLogger("Catalog Controller") # Setting a default updates feed size lower than the Pagination.DEFAULT_SIZE # of 50. Because the updates feed paginates works and isbns separately, # this not-quite-half should keep the feed at about the expected size # overall without impacting non-ISBN collections too much. UPDATES_SIZE = 35 TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%SZ" def __init__(self, _db): self._db = _db @classmethod def collection_feed_url(cls, endpoint, collection, page=None, **param_kwargs): kw = dict(_external=True, collection_metadata_identifier=collection.name) kw.update(param_kwargs) if page: kw.update(page.items()) return cdn_url_for(endpoint, **kw) @classmethod def add_pagination_links_to_feed(cls, pagination, query, feed, endpoint, collection, **url_param_kwargs): """Adds links for pagination to a given collection's feed.""" def href_for(page): return cls.collection_feed_url(endpoint, collection, page=page, **url_param_kwargs) if fast_query_count(query) > (pagination.size + pagination.offset): feed.add_link_to_feed(feed.feed, rel="next", href=href_for(pagination.next_page)) if pagination.offset > 0: feed.add_link_to_feed(feed.feed, rel="first", href=href_for(pagination.first_page)) previous_page = pagination.previous_page if previous_page: feed.add_link_to_feed(feed.feed, rel="previous", href=href_for(previous_page)) def updates_feed(self, collection_details): client = authenticated_client_from_request(self._db) if isinstance(client, ProblemDetail): return client collection = collection_from_details(self._db, client, collection_details) last_update_time = request.args.get('last_update_time', None) if last_update_time: try: last_update_time = datetime.strptime(last_update_time, self.TIMESTAMP_FORMAT) except ValueError, e: message = 'The timestamp "%s" is not in the expected format (%s)' return INVALID_INPUT.detailed( message % (last_update_time, self.TIMESTAMP_FORMAT)) pagination = load_pagination_from_request( default_size=self.UPDATES_SIZE) precomposed_entries = [] # Add entries for Works associated with the collection's catalog. updated_works = collection.works_updated_since(self._db, last_update_time) works = pagination.apply(updated_works).all() annotator = VerboseAnnotator() works_for_feed = [] for work, licensepool, identifier in works: entry = work.verbose_opds_entry or work.simple_opds_entry if entry: # A cached OPDS entry for this Work already # exists. annotate it with LicensePool and # Identifier-specific information. We have to do this # ourselves because we're asking LookupAcquisitionFeed # to treat these as precomposed entries, meaning # they must be complete as-is. entry = etree.fromstring(entry) annotator.annotate_work_entry(work, licensepool, None, identifier, None, entry) precomposed_entries.append(entry) else: # There is no cached OPDS entry. We'll create one later. works_for_feed.append((work, identifier)) title = "%s Collection Updates for %s" % (collection.protocol, client.url) url_params = dict() if last_update_time: url_params = dict(last_update_time=last_update_time.strftime( self.TIMESTAMP_FORMAT)) url = self.collection_feed_url('updates', collection, **url_params) update_feed = LookupAcquisitionFeed( self._db, title, url, works_for_feed, annotator, precomposed_entries=precomposed_entries) self.add_pagination_links_to_feed(pagination, updated_works, update_feed, 'updates', collection, **url_params) return feed_response(update_feed)