Example #1
0
    def save_proquest_token(self, db, patron, duration, token):
        """Save a ProQuest JWT bearer token for later use.

        :param db: Database session
        :type db: sqlalchemy.orm.session.Session

        :param patron: Patron object
        :type patron: core.model.patron.Patron

        :param duration: How long this token can be valid
        :type duration: datetime.timedelta

        :param token: ProQuest JWT bearer token
        :type token: str

        :return: Credential object containing a new ProQuest JWT bearer token
        :rtype: Optional[core.model.credential.Credential]
        """
        if not is_session(db):
            raise ValueError('"db" argument must be a valid SQLAlchemy session')
        if not isinstance(patron, Patron):
            raise ValueError('"patron" argument must be an instance of Patron class')
        if not isinstance(duration, datetime.timedelta):
            raise ValueError(
                '"duration" argument must be an instance of datetime.timedelta class'
            )
        if not isinstance(token, str) or not token:
            raise ValueError('"token" argument must be a non-empty string')

        self._logger.debug(
            "Started saving a ProQuest JWT bearer token {0}".format(token)
        )

        data_source = DataSource.lookup(
            db, DataSourceConstants.PROQUEST, autocreate=True
        )
        credential, is_new = Credential.temporary_token_create(
            db,
            data_source,
            ProQuestCredentialType.PROQUEST_JWT_TOKEN.value,
            patron,
            duration,
            token,
        )

        self._logger.debug(
            "Finished saving a ProQuest JWT bearer token {0}: {1} (new = {2})".format(
                token, credential, is_new
            )
        )

        return credential
Example #2
0
    def download_all_feed_pages(self, db):
        """Download all available feed pages.

        :param db: Database session
        :type db: sqlalchemy.orm.session.Session

        :return: Iterable list of feed pages in a form of Python dictionaries
        :rtype: Iterable[dict]
        """
        if not is_session(db):
            raise ValueError(
                '"db" argument must be a valid SQLAlchemy session')

        self._logger.info(
            "Started downloading all of the pages of a paginated OPDS 2.0 feed"
        )

        with self._get_configuration(db) as configuration:
            page = 1

            while True:
                try:
                    feed = self._download_feed_page(configuration, page,
                                                    configuration.page_size)

                    if self._is_feed_page_empty_or_incorrect(feed):
                        break

                    page += 1

                    yield feed
                except HTTPError as error:
                    self._logger.debug(
                        "Got an HTTP error {0}, assuming we reached the end of the feed"
                        .format(error))
                    break
                except ProQuestAPIInvalidJSONResponseError:
                    self._logger.exception(
                        "Got unexpected ProQuestAPIIncorrectResponseError, assuming we reached the end of the feed"
                    )
                    break

        self._logger.info(
            "Finished downloading all of the pages of a paginated OPDS 2.0 feed"
        )
Example #3
0
    def download_feed_page(self, db, page, hits_per_page):
        """Download a single page of a paginated OPDS 2.0 feed.

        :param db: Database session
        :type db: sqlalchemy.orm.session.Session

        :param page: Page index (max = 32,766)
        :type page: int

        :param hits_per_page: Number of publications on a single page (max = 32,766)
        :type hits_per_page: int

        :return: Python dictionary object containing the feed's page
        :rtype: dict
        """
        if not is_session(db):
            raise ValueError(
                '"db" argument must be a valid SQLAlchemy session')
        if not isinstance(page, int):
            raise ValueError('"page" argument must be an integer')
        if page < 0 or page > self.MAX_PAGE_INDEX:
            raise ValueError(
                "Page argument must a non-negative number less than {0}".
                format(self.MAX_PAGE_INDEX))
        if not isinstance(hits_per_page, int):
            raise ValueError('"hits_per_page" argument must be an integer')
        if hits_per_page < 0 or hits_per_page > self.MAX_PAGE_SIZE:
            raise ValueError(
                "Hits per page argument must a non-negative number less than {0}"
                .format(self.MAX_PAGE_SIZE))

        self._logger.info(
            "Started downloading page # {0} ({1} hits) of a paginated OPDS 2.0 feed "
            .format(page, hits_per_page))

        with self._get_configuration(db) as configuration:
            feed = self._download_feed_page(configuration, page, hits_per_page)

            self._logger.info(
                "Finished downloading page # {0} ({1} hits) of a paginated OPDS 2.0 feed"
                .format(page, hits_per_page))

            return feed
Example #4
0
    def create_token(self, db, affiliation_id):
        """Create a new JWT bearer token.

        :param db: Database session
        :type db: sqlalchemy.orm.session.Session

        :param affiliation_id: SAML affiliation ID used as a patron's unique identifier by ProQuest
        :type affiliation_id: str

        :return: New JWT bearer token
        :rtype: str
        """
        if not is_session(db):
            raise ValueError(
                '"db" argument must be a valid SQLAlchemy session')
        if not affiliation_id or not isinstance(affiliation_id, str):
            raise ValueError(
                '"affiliation_id" argument must be a non-empty string')

        self._logger.info(
            "Started creating a new JWT bearer token for affiliation ID {0}".
            format(affiliation_id))

        with self._get_configuration(db) as configuration:
            parameters = {"userName": affiliation_id}
            response, response_json = self._send_request(
                configuration,
                "get",
                configuration.partner_auth_token_service_url,
                parameters,
                response_must_be_json=True,
            )

            self._logger.info(
                "Finished creating a new JWT bearer token for affiliation ID {0}: {1}"
                .format(affiliation_id, response_json))

            if self.TOKEN_FIELD not in response_json:
                raise ProQuestAPIMissingJSONPropertyError(
                    response, self.TOKEN_FIELD)

            return response_json[self.TOKEN_FIELD]
Example #5
0
    def lookup_proquest_token(self, db, patron):
        """Look up for a JWT bearer token used required to use ProQuest API.

        :param db: Database session
        :type db: sqlalchemy.orm.session.Session

        :param patron: Patron object
        :type patron: core.model.patron.Patron

        :return: Credential object containing the existing ProQuest JWT bearer token (if any)
        :rtype: Optional[core.model.credential.Credential]
        """
        if not is_session(db):
            raise ValueError('"db" argument must be a valid SQLAlchemy session')
        if not isinstance(patron, Patron):
            raise ValueError('"patron" argument must be an instance of Patron class')

        self._logger.debug("Started looking up for a ProQuest JWT token")

        credential = Credential.lookup_by_patron(
            db,
            DataSourceConstants.PROQUEST,
            ProQuestCredentialType.PROQUEST_JWT_TOKEN.value,
            patron,
            allow_persistent_token=False,
            auto_create_datasource=True,
        )

        self._logger.debug(
            "Finished looking up for a ProQuest JWT token: {0}".format(credential)
        )

        if credential:
            return credential

        return None
Example #6
0
    def get_book(self, db, token, document_id):
        """Get a book by it's ProQuest Doc ID.

        NOTE: There are two different cases to consider:
        - Open-access books: in this case ProQuest API returns the book content.
        - Adobe DRM copy protected books: in this case ProQuest API returns an ACSM file containing
        information about downloading a digital publication.

        :param db: Database session
        :type db: sqlalchemy.orm.session.Session

        :param token: JWT bearer token created using `ProQuestAPIClient.create_token` method
        :type token: str

        :param document_id: ProQuest Doc ID
        :type document_id: str

        :return: Book instance containing either an ACS link to the book or the book content
        :rtype: ProQuestBook
        """
        if not is_session(db):
            raise ValueError(
                '"db" argument must be a valid SQLAlchemy session')
        if not token or not isinstance(token, str):
            raise ValueError('"token" argument must be a non-empty string')
        if not document_id or not isinstance(document_id, str):
            raise ValueError('"document_id" must be a non-empty string')

        self._logger.info(
            "Started fetching a book link for Doc ID {0} using JWT token {1}".
            format(document_id, token))

        with self._get_configuration(db) as configuration:
            parameters = {"docID": document_id}
            response, response_json = self._send_request(
                configuration,
                "get",
                configuration.download_link_service_url,
                parameters,
                token,
            )

            if response_json:
                self._logger.info(
                    "Finished fetching a download link for Doc ID {0} using JWT token {1}: {2}"
                    .format(document_id, token, response_json))

                if self.DOWNLOAD_LINK_FIELD not in response_json:
                    raise ProQuestAPIMissingJSONPropertyError(
                        response, self.DOWNLOAD_LINK_FIELD)

                # The API returns another link leading to either a DRM-free book or ACSM file:
                # - DRM-free books are publicly accessible, meaning that their download links
                #   are not protected by IP whitelisting and we shall pass the link to the client
                #   to avoid proxying the content through Circulation Manager.
                # - DRM-protected download links are protected by IP whitelisting
                #   and can be called only from Circulation Manager,
                #   meaning that Circulation Manager has to download an ACSM file
                #   and proxy it to the client.
                #   However, it shouldn't incur any bad consequences because
                #   ACSM files are usually relatively small.
                link = response_json[self.DOWNLOAD_LINK_FIELD]

                # In the case of DRM-free books we return a link immediately
                # and we'll pass it to the client app.
                if self.DRM_FREE_DOWNLOAD_LINK_KEYWORD in link:
                    return ProQuestBook(link=link)

                # In the case of Adobe DRM-protected books we have to download an ACSM file
                # and pass its content to the client app.
                response, _ = self._send_request(configuration,
                                                 "get",
                                                 link, {},
                                                 token,
                                                 response_must_be_json=False)

                self._logger.info(
                    "Finished fetching an ACSM file for Doc ID {0} using JWT token {1}"
                    .format(document_id, token))

                return ProQuestBook(
                    content=bytes(response.content),
                    content_type=DeliveryMechanism.ADOBE_DRM,
                )
            else:
                self._logger.info(
                    "Finished fetching an open-access book for Doc ID {0} using JWT token {1}"
                    .format(document_id, token))

                return ProQuestBook(content=bytes(response.content))
Example #7
0
    def lookup_patron_affiliation_id(
        self,
        db,
        patron,
        affiliation_attributes=(
            SAMLAttributeType.eduPersonPrincipalName.name,
            SAMLAttributeType.eduPersonScopedAffiliation.name,
        ),
    ):
        """Look up for patron's SAML affiliation ID.

        :param db: Database session
        :type db: sqlalchemy.orm.session.Session

        :param patron: Patron object
        :type patron: core.model.patron.Patron

        :param affiliation_attributes: SAML attributes containing an affiliation ID
        :type affiliation_attributes: Tuple

        :return: Patron's SAML affiliation ID (if any)
        :rtype: Optional[str]
        """
        if not is_session(db):
            raise ValueError('"db" argument must be a valid SQLAlchemy session')
        if not isinstance(patron, Patron):
            raise ValueError('"patron" argument must be an instance of Patron class')
        if affiliation_attributes and not isinstance(affiliation_attributes, tuple):
            raise ValueError('"affiliation_attributes" argument must be a tuple')

        self._logger.debug(
            "Started looking for SAML affiliation ID in for patron {0} in {1}".format(
                patron, affiliation_attributes
            )
        )

        saml_credential = self._lookup_saml_token(db, patron)

        if not saml_credential:
            self._logger.debug("Patron {0} does not have a SAML token".format(patron))
            return None

        saml_subject = self._extract_saml_subject(saml_credential)

        self._logger.debug(
            "Patron {0} has the following SAML subject: {1}".format(
                patron, saml_subject
            )
        )

        affiliation_id = None

        for attribute_name in affiliation_attributes:
            self._logger.debug("Trying to find attribute {0}".format(attribute_name))

            if attribute_name in saml_subject.attribute_statement.attributes:
                attribute = saml_subject.attribute_statement.attributes[attribute_name]

                self._logger.debug(
                    "Found {0} with the following values: {1}".format(
                        attribute, attribute.values
                    )
                )

                affiliation_id = first_or_default(attribute.values)
                break

        self._logger.debug(
            "Finished looking for SAML affiliation ID in for patron {0} in {1}: {2}".format(
                patron, affiliation_attributes, affiliation_id
            )
        )

        return affiliation_id