def save_proquest_token(self, db, patron, duration, token): """Save a ProQuest JWT bearer token for later use. :param db: Database session :type db: sqlalchemy.orm.session.Session :param patron: Patron object :type patron: core.model.patron.Patron :param duration: How long this token can be valid :type duration: datetime.timedelta :param token: ProQuest JWT bearer token :type token: str :return: Credential object containing a new ProQuest JWT bearer token :rtype: Optional[core.model.credential.Credential] """ if not is_session(db): raise ValueError('"db" argument must be a valid SQLAlchemy session') if not isinstance(patron, Patron): raise ValueError('"patron" argument must be an instance of Patron class') if not isinstance(duration, datetime.timedelta): raise ValueError( '"duration" argument must be an instance of datetime.timedelta class' ) if not isinstance(token, str) or not token: raise ValueError('"token" argument must be a non-empty string') self._logger.debug( "Started saving a ProQuest JWT bearer token {0}".format(token) ) data_source = DataSource.lookup( db, DataSourceConstants.PROQUEST, autocreate=True ) credential, is_new = Credential.temporary_token_create( db, data_source, ProQuestCredentialType.PROQUEST_JWT_TOKEN.value, patron, duration, token, ) self._logger.debug( "Finished saving a ProQuest JWT bearer token {0}: {1} (new = {2})".format( token, credential, is_new ) ) return credential
def download_all_feed_pages(self, db): """Download all available feed pages. :param db: Database session :type db: sqlalchemy.orm.session.Session :return: Iterable list of feed pages in a form of Python dictionaries :rtype: Iterable[dict] """ if not is_session(db): raise ValueError( '"db" argument must be a valid SQLAlchemy session') self._logger.info( "Started downloading all of the pages of a paginated OPDS 2.0 feed" ) with self._get_configuration(db) as configuration: page = 1 while True: try: feed = self._download_feed_page(configuration, page, configuration.page_size) if self._is_feed_page_empty_or_incorrect(feed): break page += 1 yield feed except HTTPError as error: self._logger.debug( "Got an HTTP error {0}, assuming we reached the end of the feed" .format(error)) break except ProQuestAPIInvalidJSONResponseError: self._logger.exception( "Got unexpected ProQuestAPIIncorrectResponseError, assuming we reached the end of the feed" ) break self._logger.info( "Finished downloading all of the pages of a paginated OPDS 2.0 feed" )
def download_feed_page(self, db, page, hits_per_page): """Download a single page of a paginated OPDS 2.0 feed. :param db: Database session :type db: sqlalchemy.orm.session.Session :param page: Page index (max = 32,766) :type page: int :param hits_per_page: Number of publications on a single page (max = 32,766) :type hits_per_page: int :return: Python dictionary object containing the feed's page :rtype: dict """ if not is_session(db): raise ValueError( '"db" argument must be a valid SQLAlchemy session') if not isinstance(page, int): raise ValueError('"page" argument must be an integer') if page < 0 or page > self.MAX_PAGE_INDEX: raise ValueError( "Page argument must a non-negative number less than {0}". format(self.MAX_PAGE_INDEX)) if not isinstance(hits_per_page, int): raise ValueError('"hits_per_page" argument must be an integer') if hits_per_page < 0 or hits_per_page > self.MAX_PAGE_SIZE: raise ValueError( "Hits per page argument must a non-negative number less than {0}" .format(self.MAX_PAGE_SIZE)) self._logger.info( "Started downloading page # {0} ({1} hits) of a paginated OPDS 2.0 feed " .format(page, hits_per_page)) with self._get_configuration(db) as configuration: feed = self._download_feed_page(configuration, page, hits_per_page) self._logger.info( "Finished downloading page # {0} ({1} hits) of a paginated OPDS 2.0 feed" .format(page, hits_per_page)) return feed
def create_token(self, db, affiliation_id): """Create a new JWT bearer token. :param db: Database session :type db: sqlalchemy.orm.session.Session :param affiliation_id: SAML affiliation ID used as a patron's unique identifier by ProQuest :type affiliation_id: str :return: New JWT bearer token :rtype: str """ if not is_session(db): raise ValueError( '"db" argument must be a valid SQLAlchemy session') if not affiliation_id or not isinstance(affiliation_id, str): raise ValueError( '"affiliation_id" argument must be a non-empty string') self._logger.info( "Started creating a new JWT bearer token for affiliation ID {0}". format(affiliation_id)) with self._get_configuration(db) as configuration: parameters = {"userName": affiliation_id} response, response_json = self._send_request( configuration, "get", configuration.partner_auth_token_service_url, parameters, response_must_be_json=True, ) self._logger.info( "Finished creating a new JWT bearer token for affiliation ID {0}: {1}" .format(affiliation_id, response_json)) if self.TOKEN_FIELD not in response_json: raise ProQuestAPIMissingJSONPropertyError( response, self.TOKEN_FIELD) return response_json[self.TOKEN_FIELD]
def lookup_proquest_token(self, db, patron): """Look up for a JWT bearer token used required to use ProQuest API. :param db: Database session :type db: sqlalchemy.orm.session.Session :param patron: Patron object :type patron: core.model.patron.Patron :return: Credential object containing the existing ProQuest JWT bearer token (if any) :rtype: Optional[core.model.credential.Credential] """ if not is_session(db): raise ValueError('"db" argument must be a valid SQLAlchemy session') if not isinstance(patron, Patron): raise ValueError('"patron" argument must be an instance of Patron class') self._logger.debug("Started looking up for a ProQuest JWT token") credential = Credential.lookup_by_patron( db, DataSourceConstants.PROQUEST, ProQuestCredentialType.PROQUEST_JWT_TOKEN.value, patron, allow_persistent_token=False, auto_create_datasource=True, ) self._logger.debug( "Finished looking up for a ProQuest JWT token: {0}".format(credential) ) if credential: return credential return None
def get_book(self, db, token, document_id): """Get a book by it's ProQuest Doc ID. NOTE: There are two different cases to consider: - Open-access books: in this case ProQuest API returns the book content. - Adobe DRM copy protected books: in this case ProQuest API returns an ACSM file containing information about downloading a digital publication. :param db: Database session :type db: sqlalchemy.orm.session.Session :param token: JWT bearer token created using `ProQuestAPIClient.create_token` method :type token: str :param document_id: ProQuest Doc ID :type document_id: str :return: Book instance containing either an ACS link to the book or the book content :rtype: ProQuestBook """ if not is_session(db): raise ValueError( '"db" argument must be a valid SQLAlchemy session') if not token or not isinstance(token, str): raise ValueError('"token" argument must be a non-empty string') if not document_id or not isinstance(document_id, str): raise ValueError('"document_id" must be a non-empty string') self._logger.info( "Started fetching a book link for Doc ID {0} using JWT token {1}". format(document_id, token)) with self._get_configuration(db) as configuration: parameters = {"docID": document_id} response, response_json = self._send_request( configuration, "get", configuration.download_link_service_url, parameters, token, ) if response_json: self._logger.info( "Finished fetching a download link for Doc ID {0} using JWT token {1}: {2}" .format(document_id, token, response_json)) if self.DOWNLOAD_LINK_FIELD not in response_json: raise ProQuestAPIMissingJSONPropertyError( response, self.DOWNLOAD_LINK_FIELD) # The API returns another link leading to either a DRM-free book or ACSM file: # - DRM-free books are publicly accessible, meaning that their download links # are not protected by IP whitelisting and we shall pass the link to the client # to avoid proxying the content through Circulation Manager. # - DRM-protected download links are protected by IP whitelisting # and can be called only from Circulation Manager, # meaning that Circulation Manager has to download an ACSM file # and proxy it to the client. # However, it shouldn't incur any bad consequences because # ACSM files are usually relatively small. link = response_json[self.DOWNLOAD_LINK_FIELD] # In the case of DRM-free books we return a link immediately # and we'll pass it to the client app. if self.DRM_FREE_DOWNLOAD_LINK_KEYWORD in link: return ProQuestBook(link=link) # In the case of Adobe DRM-protected books we have to download an ACSM file # and pass its content to the client app. response, _ = self._send_request(configuration, "get", link, {}, token, response_must_be_json=False) self._logger.info( "Finished fetching an ACSM file for Doc ID {0} using JWT token {1}" .format(document_id, token)) return ProQuestBook( content=bytes(response.content), content_type=DeliveryMechanism.ADOBE_DRM, ) else: self._logger.info( "Finished fetching an open-access book for Doc ID {0} using JWT token {1}" .format(document_id, token)) return ProQuestBook(content=bytes(response.content))
def lookup_patron_affiliation_id( self, db, patron, affiliation_attributes=( SAMLAttributeType.eduPersonPrincipalName.name, SAMLAttributeType.eduPersonScopedAffiliation.name, ), ): """Look up for patron's SAML affiliation ID. :param db: Database session :type db: sqlalchemy.orm.session.Session :param patron: Patron object :type patron: core.model.patron.Patron :param affiliation_attributes: SAML attributes containing an affiliation ID :type affiliation_attributes: Tuple :return: Patron's SAML affiliation ID (if any) :rtype: Optional[str] """ if not is_session(db): raise ValueError('"db" argument must be a valid SQLAlchemy session') if not isinstance(patron, Patron): raise ValueError('"patron" argument must be an instance of Patron class') if affiliation_attributes and not isinstance(affiliation_attributes, tuple): raise ValueError('"affiliation_attributes" argument must be a tuple') self._logger.debug( "Started looking for SAML affiliation ID in for patron {0} in {1}".format( patron, affiliation_attributes ) ) saml_credential = self._lookup_saml_token(db, patron) if not saml_credential: self._logger.debug("Patron {0} does not have a SAML token".format(patron)) return None saml_subject = self._extract_saml_subject(saml_credential) self._logger.debug( "Patron {0} has the following SAML subject: {1}".format( patron, saml_subject ) ) affiliation_id = None for attribute_name in affiliation_attributes: self._logger.debug("Trying to find attribute {0}".format(attribute_name)) if attribute_name in saml_subject.attribute_statement.attributes: attribute = saml_subject.attribute_statement.attributes[attribute_name] self._logger.debug( "Found {0} with the following values: {1}".format( attribute, attribute.values ) ) affiliation_id = first_or_default(attribute.values) break self._logger.debug( "Finished looking for SAML affiliation ID in for patron {0} in {1}: {2}".format( patron, affiliation_attributes, affiliation_id ) ) return affiliation_id