def run_once(self, progress): """Check to see if any identifiers we know about are no longer present on the remote. If there are any, remove them. :param progress: A TimestampData, ignored. """ super(OPDSForDistributorsReaperMonitor, self).run_once(progress) # self.seen_identifiers is full of URNs. We need the values # that go in Identifier.identifier. identifiers, failures = Identifier.parse_urns(self._db, self.seen_identifiers) identifier_ids = [x.id for x in identifiers.values()] # At this point we've gone through the feed and collected all the identifiers. # If there's anything we didn't see, we know it's no longer available. qu = self._db.query(LicensePool).join(Identifier).filter( LicensePool.collection_id == self.collection.id).filter( ~Identifier.id.in_(identifier_ids)).filter( LicensePool.licenses_available > 0) pools_reaped = qu.count() self.log.info("Reaping %s license pools for collection %s." % (pools_reaped, self.collection.name)) for pool in qu: pool.licenses_available = 0 pool.licenses_owned = 0 self._db.commit() achievements = "License pools removed: %d." % pools_reaped return TimestampData(achievements=achievements)
def get_identifiers(self, url=None): """Pulls mapped identifiers from a feed of SimplifiedOPDSMessages.""" response = self.get_response(url=url) feed = response.text etree_feed = etree.parse(StringIO(response.text)) messages = self.importer.extract_messages(self.parser, etree_feed) urns = [m.urn for m in messages] identifiers_by_urn, _failures = Identifier.parse_urns(self._db, urns, autocreate=False) urns = identifiers_by_urn.keys() identifiers = identifiers_by_urn.values() self.importer.build_identifier_mapping(urns) mapped_identifiers = list() for identifier in identifiers: mapped_identifier = self.importer.identifier_mapping.get( identifier, identifier) mapped_identifiers.append(mapped_identifier) parsed_feed = feedparser.parse(feed) next_links = self.importer.extract_next_links(parsed_feed) return mapped_identifiers, next_links
def get_identifiers(self, url=None): """Pulls mapped identifiers from a feed of SimplifiedOPDSMessages.""" response = self.get_response(url=url) feed = response.text etree_feed = etree.parse(StringIO(response.text)) messages = self.importer.extract_messages(self.parser, etree_feed) urns = [m.urn for m in messages] identifiers_by_urn, _failures = Identifier.parse_urns( self._db, urns, autocreate=False ) urns = identifiers_by_urn.keys() identifiers = identifiers_by_urn.values() self.importer.build_identifier_mapping(urns) mapped_identifiers = list() for identifier in identifiers: mapped_identifier = self.importer.identifier_mapping.get( identifier, identifier ) mapped_identifiers.append(mapped_identifier) parsed_feed = feedparser.parse(feed) next_links = self.importer.extract_next_links(parsed_feed) return mapped_identifiers, next_links
def remove_items(self, collection_details): """Removes identifiers from a Collection's catalog""" client = authenticated_client_from_request(self._db) if isinstance(client, ProblemDetail): return client collection = collection_from_details( self._db, client, collection_details ) urns = request.args.getlist('urn') messages = [] identifiers_by_urn, failures = Identifier.parse_urns(self._db, urns) for urn in failures: message = OPDSMessage( urn, INVALID_URN.status_code, INVALID_URN.detail ) messages.append(message) # Find the IDs of the subset of provided identifiers that are # in the catalog, so we know which ones to delete and give a # 200 message. Also get a SQLAlchemy clause that selects only # those IDs. matching_ids, identifier_match_clause = self._in_catalog_subset( collection, identifiers_by_urn ) # Use that clause to delete all of the relevant catalog # entries. delete_stmt = collections_identifiers.delete().where( identifier_match_clause ) self._db.execute(delete_stmt) # IDs that matched get a 200 message; all others get a 404 # message. for urn, identifier in identifiers_by_urn.items(): if identifier.id in matching_ids: status = HTTP_OK description = "Successfully removed" else: status = HTTP_NOT_FOUND description = "Not in catalog" message = OPDSMessage(urn, status, description) messages.append(message) title = "%s Catalog Item Removal for %s" % (collection.protocol, client.url) url = self.collection_feed_url("remove", collection, urn=urns) removal_feed = AcquisitionFeed( self._db, title, url, [], VerboseAnnotator, precomposed_entries=messages ) return feed_response(removal_feed)
def remove_items(self, collection_details): """Removes identifiers from a Collection's catalog""" client = authenticated_client_from_request(self._db) if isinstance(client, ProblemDetail): return client collection = collection_from_details(self._db, client, collection_details) urns = request.args.getlist('urn') messages = [] identifiers_by_urn, failures = Identifier.parse_urns(self._db, urns) for urn in failures: message = OPDSMessage(urn, INVALID_URN.status_code, INVALID_URN.detail) messages.append(message) # Find the IDs of the subset of provided identifiers that are # in the catalog, so we know which ones to delete and give a # 200 message. Also get a SQLAlchemy clause that selects only # those IDs. matching_ids, identifier_match_clause = self._in_catalog_subset( collection, identifiers_by_urn) # Use that clause to delete all of the relevant catalog # entries. delete_stmt = collections_identifiers.delete().where( identifier_match_clause) self._db.execute(delete_stmt) # IDs that matched get a 200 message; all others get a 404 # message. for urn, identifier in identifiers_by_urn.items(): if identifier.id in matching_ids: status = HTTP_OK description = "Successfully removed" else: status = HTTP_NOT_FOUND description = "Not in catalog" message = OPDSMessage(urn, status, description) messages.append(message) title = "%s Catalog Item Removal for %s" % (collection.protocol, client.url) url = self.collection_feed_url("remove", collection, urn=urns) removal_feed = AcquisitionFeed(self._db, title, url, [], VerboseAnnotator, precomposed_entries=messages) return feed_response(removal_feed)
def add_items(self, collection_details): """Adds identifiers to a Collection's catalog""" client = authenticated_client_from_request(self._db) if isinstance(client, ProblemDetail): return client collection = collection_from_details( self._db, client, collection_details ) urns = request.args.getlist('urn') messages = [] identifiers_by_urn, failures = Identifier.parse_urns(self._db, urns) for urn in failures: message = OPDSMessage( urn, INVALID_URN.status_code, INVALID_URN.detail ) messages.append(message) # Find the subset of incoming identifiers that are already # in the catalog. already_in_catalog, ignore = self._in_catalog_subset( collection, identifiers_by_urn ) # Everything else needs to be added to the catalog. needs_to_be_added = [ x for x in identifiers_by_urn.values() if x.id not in already_in_catalog ] collection.catalog_identifiers(needs_to_be_added) for urn, identifier in identifiers_by_urn.items(): if identifier.id in already_in_catalog: status = HTTP_OK description = "Already in catalog" else: status = HTTP_CREATED description = "Successfully added" messages.append(OPDSMessage(urn, status, description)) title = "%s Catalog Item Additions for %s" % (collection.protocol, client.url) url = self.collection_feed_url('add', collection, urn=urns) addition_feed = AcquisitionFeed( self._db, title, url, [], VerboseAnnotator, precomposed_entries=messages ) return feed_response(addition_feed)
def add_items(self, collection_details): """Adds identifiers to a Collection's catalog""" client = authenticated_client_from_request(self._db) if isinstance(client, ProblemDetail): return client collection = collection_from_details(self._db, client, collection_details) urns = request.args.getlist('urn') messages = [] identifiers_by_urn, failures = Identifier.parse_urns(self._db, urns) for urn in failures: message = OPDSMessage(urn, INVALID_URN.status_code, INVALID_URN.detail) messages.append(message) # Find the subset of incoming identifiers that are already # in the catalog. already_in_catalog, ignore = self._in_catalog_subset( collection, identifiers_by_urn) # Everything else needs to be added to the catalog. needs_to_be_added = [ x for x in identifiers_by_urn.values() if x.id not in already_in_catalog ] collection.catalog_identifiers(needs_to_be_added) for urn, identifier in identifiers_by_urn.items(): if identifier.id in already_in_catalog: status = HTTP_OK description = "Already in catalog" else: status = HTTP_CREATED description = "Successfully added" messages.append(OPDSMessage(urn, status, description)) title = "%s Catalog Item Additions for %s" % (collection.protocol, client.url) url = self.collection_feed_url('add', collection, urn=urns) addition_feed = AcquisitionFeed(self._db, title, url, [], VerboseAnnotator, precomposed_entries=messages) return feed_response(addition_feed)
def process_urns(self, urns, collection_details=None, **kwargs): """Processes URNs submitted via lookup request An authenticated request can process up to 30 URNs at once, but must specify a collection under which to catalog the URNs. This is used when initially recording the fact that certain URNs are in a collection, to get a baseline set of metadata. Updates on the books should be obtained through the CatalogController. An unauthenticated request is used for testing. Such a request does not have to specify a collection (the "Unaffiliated" collection is used), but can only process one URN at a time. :return: None or ProblemDetail """ client = authenticated_client_from_request(self._db, required=False) if isinstance(client, ProblemDetail): return client resolve_now = request.args.get('resolve_now', None) is not None collection = collection_from_details( self._db, client, collection_details ) if client: # Authenticated access. if not collection: return INVALID_INPUT.detailed(_("No collection provided.")) limit = 30 else: # Anonymous access. collection = self.default_collection limit = 1 if resolve_now: # You can't force-resolve more than one Identifier at a time. limit = 1 if len(urns) > limit: return INVALID_INPUT.detailed( _("The maximum number of URNs you can provide at once is %d. (You sent %d)") % (limit, len(urns)) ) identifiers_by_urn, failures = Identifier.parse_urns( self._db, urns, allowed_types=self.VALID_TYPES ) self.add_urn_failure_messages(failures) # Catalog all identifiers. collection.catalog_identifiers(identifiers_by_urn.values()) # Load all coverage records in a single query to speed up the # code that reports on the status of Identifiers that aren't # ready. self.bulk_load_coverage_records(identifiers_by_urn.values()) resolver = IdentifierResolutionCoverageProvider( collection, provide_coverage_immediately=resolve_now, **self.coverage_provider_kwargs ) for urn, identifier in identifiers_by_urn.items(): self.process_identifier( identifier, urn, resolver=resolver )
def add_with_metadata(self, collection_details): """Adds identifiers with their metadata to a Collection's catalog""" client = authenticated_client_from_request(self._db) if isinstance(client, ProblemDetail): return client collection = collection_from_details( self._db, client, collection_details ) data_source = DataSource.lookup( self._db, collection.name, autocreate=True ) messages = [] feed = feedparser.parse(request.data) entries = feed.get("entries", []) entries_by_urn = { entry.get('id') : entry for entry in entries } identifiers_by_urn, invalid_urns = Identifier.parse_urns( self._db, entries_by_urn.keys() ) messages = list() for urn in invalid_urns: messages.append(OPDSMessage( urn, INVALID_URN.status_code, INVALID_URN.detail )) for urn, identifier in identifiers_by_urn.items(): entry = entries_by_urn[urn] status = HTTP_OK description = "Already in catalog" if identifier not in collection.catalog: collection.catalog_identifier(identifier) status = HTTP_CREATED description = "Successfully added" message = OPDSMessage(urn, status, description) # Get a cover if it exists. image_types = set([Hyperlink.IMAGE, Hyperlink.THUMBNAIL_IMAGE]) images = [l for l in entry.get("links", []) if l.get("rel") in image_types] links = [LinkData(image.get("rel"), image.get("href")) for image in images] # Create an edition to hold the title and author. LicensePool.calculate_work # refuses to create a Work when there's no title, and if we have a title, author # and language we can attempt to look up the edition in OCLC. title = entry.get("title") or "Unknown Title" author = ContributorData( sort_name=(entry.get("author") or Edition.UNKNOWN_AUTHOR), roles=[Contributor.PRIMARY_AUTHOR_ROLE] ) language = entry.get("dcterms_language") presentation = PresentationCalculationPolicy( choose_edition=False, set_edition_metadata=False, classify=False, choose_summary=False, calculate_quality=False, choose_cover=False, regenerate_opds_entries=False, ) replace = ReplacementPolicy(presentation_calculation_policy=presentation) metadata = Metadata( data_source, primary_identifier=IdentifierData(identifier.type, identifier.identifier), title=title, language=language, contributors=[author], links=links, ) edition, ignore = metadata.edition(self._db) metadata.apply(edition, collection, replace=replace) messages.append(message) title = "%s Catalog Item Additions for %s" % (collection.protocol, client.url) url = self.collection_feed_url("add_with_metadata", collection) addition_feed = AcquisitionFeed( self._db, title, url, [], VerboseAnnotator, precomposed_entries=messages ) return feed_response(addition_feed)
def process_urns(self, urns, collection_details=None, **kwargs): """Processes URNs submitted via lookup request An authenticated request can process up to 30 URNs at once, but must specify a collection under which to catalog the URNs. This is used when initially recording the fact that certain URNs are in a collection, to get a baseline set of metadata. Updates on the books should be obtained through the CatalogController. An unauthenticated request is used for testing. Such a request does not have to specify a collection (the "Unaffiliated" collection is used), but can only process one URN at a time. :return: None or ProblemDetail """ client = authenticated_client_from_request(self._db, required=False) if isinstance(client, ProblemDetail): return client resolve_now = request.args.get('resolve_now', None) is not None collection = collection_from_details(self._db, client, collection_details) if client: # Authenticated access. if not collection: return INVALID_INPUT.detailed(_("No collection provided.")) limit = 30 else: # Anonymous access. collection = self.default_collection limit = 1 if resolve_now: # You can't force-resolve more than one Identifier at a time. limit = 1 if len(urns) > limit: return INVALID_INPUT.detailed( _("The maximum number of URNs you can provide at once is %d. (You sent %d)" ) % (limit, len(urns))) identifiers_by_urn, failures = Identifier.parse_urns( self._db, urns, allowed_types=self.VALID_TYPES) self.add_urn_failure_messages(failures) # Catalog all identifiers. collection.catalog_identifiers(identifiers_by_urn.values()) # Load all coverage records in a single query to speed up the # code that reports on the status of Identifiers that aren't # ready. self.bulk_load_coverage_records(identifiers_by_urn.values()) resolver = IdentifierResolutionCoverageProvider( collection, provide_coverage_immediately=resolve_now, **self.coverage_provider_kwargs) for urn, identifier in identifiers_by_urn.items(): self.process_identifier(identifier, urn, resolver=resolver)
def add_with_metadata(self, collection_details): """Adds identifiers with their metadata to a Collection's catalog""" client = authenticated_client_from_request(self._db) if isinstance(client, ProblemDetail): return client collection = collection_from_details(self._db, client, collection_details) data_source = DataSource.lookup(self._db, collection.name, autocreate=True) messages = [] feed = feedparser.parse(request.data) entries = feed.get("entries", []) entries_by_urn = {entry.get('id'): entry for entry in entries} identifiers_by_urn, invalid_urns = Identifier.parse_urns( self._db, entries_by_urn.keys()) messages = list() for urn in invalid_urns: messages.append( OPDSMessage(urn, INVALID_URN.status_code, INVALID_URN.detail)) for urn, identifier in identifiers_by_urn.items(): entry = entries_by_urn[urn] status = HTTP_OK description = "Already in catalog" if identifier not in collection.catalog: collection.catalog_identifier(identifier) status = HTTP_CREATED description = "Successfully added" message = OPDSMessage(urn, status, description) # Get a cover if it exists. image_types = set([Hyperlink.IMAGE, Hyperlink.THUMBNAIL_IMAGE]) images = [ l for l in entry.get("links", []) if l.get("rel") in image_types ] links = [ LinkData(image.get("rel"), image.get("href")) for image in images ] # Create an edition to hold the title and author. LicensePool.calculate_work # refuses to create a Work when there's no title, and if we have a title, author # and language we can attempt to look up the edition in OCLC. title = entry.get("title") or "Unknown Title" author = ContributorData(sort_name=(entry.get("author") or Edition.UNKNOWN_AUTHOR), roles=[Contributor.PRIMARY_AUTHOR_ROLE]) language = entry.get("dcterms_language") presentation = PresentationCalculationPolicy( choose_edition=False, set_edition_metadata=False, classify=False, choose_summary=False, calculate_quality=False, choose_cover=False, regenerate_opds_entries=False, ) replace = ReplacementPolicy( presentation_calculation_policy=presentation) metadata = Metadata( data_source, primary_identifier=IdentifierData(identifier.type, identifier.identifier), title=title, language=language, contributors=[author], links=links, ) edition, ignore = metadata.edition(self._db) metadata.apply(edition, collection, replace=replace) messages.append(message) title = "%s Catalog Item Additions for %s" % (collection.protocol, client.url) url = self.collection_feed_url("add_with_metadata", collection) addition_feed = AcquisitionFeed(self._db, title, url, [], VerboseAnnotator, precomposed_entries=messages) return feed_response(addition_feed)