Python FormatData Examples, core.metadata_layer.FormatData Python Examples

Example #1

0

Show file

File: odl.py Project: datalogics-dans/circulation

 def _detail_for_elementtree_entry(cls, parser, entry_tag, feed_url=None):
     subtag = parser.text_of_optional_subtag
     data = OPDSImporter._detail_for_elementtree_entry(
         parser, entry_tag, feed_url)
     formats = []
     odl_license_tags = parser._xpath(entry_tag, 'odl:license') or []
     for odl_license_tag in odl_license_tags:
         content_type = subtag(odl_license_tag, 'dcterms:format')
         drm_schemes = []
         protection_tags = parser._xpath(odl_license_tag,
                                         'odl:protection') or []
         for protection_tag in protection_tags:
             drm_scheme = subtag(protection_tag, 'dcterms:format')
             drm_schemes.append(drm_scheme)
         if not drm_schemes:
             formats.append(
                 FormatData(
                     content_type=content_type,
                     drm_scheme=None,
                     rights_uri=RightsStatus.IN_COPYRIGHT,
                 ))
         for drm_scheme in drm_schemes:
             formats.append(
                 FormatData(
                     content_type=content_type,
                     drm_scheme=drm_scheme,
                     rights_uri=RightsStatus.IN_COPYRIGHT,
                 ))
         if not data.get('circulation'):
             data['circulation'] = dict()
         if not data['circulation'].get('formats'):
             data['circulation']['formats'] = []
         data['circulation']['formats'].extend(formats)
     return data

Example #2

0

Show file

File: enki.py Project: jonathangreen/circulation

    def extract_circulation(self, primary_identifier, availability, formattype):
        """Turn the 'availability' portion of an Enki API response into
        a CirculationData.
        """
        if not availability:
            return None
        licenses_owned = availability.get("totalCopies", 0)
        licenses_available = availability.get("availableCopies", 0)
        hold = availability.get("onHold", 0)
        drm_type = EnkiAPI.no_drm
        if availability.get("accessType") == "acs":
            drm_type = EnkiAPI.adobe_drm
        formats = []

        content_type = None
        if formattype == "PDF":
            content_type = Representation.PDF_MEDIA_TYPE
        elif formattype == "EPUB":
            content_type = Representation.EPUB_MEDIA_TYPE
        if content_type != None:
            formats.append(FormatData(content_type, drm_scheme=drm_type))
        else:
            self.log.error("Unrecognized formattype: %s", formattype)

        circulationdata = CirculationData(
            data_source=DataSource.ENKI,
            primary_identifier=primary_identifier,
            formats=formats,
            licenses_owned=int(licenses_owned),
            licenses_available=int(licenses_available),
            licenses_reserved=0,
            patrons_in_hold_queue=int(hold),
        )
        return circulationdata

Example #3

0

Show file

    def test_circulationdata_can_be_deepcopied(self):
        # Check that we didn't put something in the CirculationData that
        # will prevent it from being copied. (e.g., self.log)

        subject = SubjectData(Subject.TAG, "subject")
        contributor = ContributorData()
        identifier = IdentifierData(Identifier.GUTENBERG_ID, "1")
        link = LinkData(Hyperlink.OPEN_ACCESS_DOWNLOAD, "example.epub")
        format = FormatData(Representation.EPUB_MEDIA_TYPE,
                            DeliveryMechanism.NO_DRM)
        rights_uri = RightsStatus.GENERIC_OPEN_ACCESS

        circulation_data = CirculationData(
            DataSource.GUTENBERG,
            primary_identifier=identifier,
            links=[link],
            licenses_owned=5,
            licenses_available=5,
            licenses_reserved=None,
            patrons_in_hold_queue=None,
            formats=[format],
            default_rights_uri=rights_uri,
        )

        circulation_data_copy = deepcopy(circulation_data)

        # If deepcopy didn't throw an exception we're ok.
        assert circulation_data_copy is not None

Example #4

0

Show file

    def test_rights_status_commercial_link_with_rights(self):
        identifier = IdentifierData(
            Identifier.OVERDRIVE_ID,
            "abcd",
        )
        link = LinkData(
            rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url,
            rights_uri=RightsStatus.IN_COPYRIGHT,
        )
        format = FormatData(
            content_type=link.media_type,
            drm_scheme=DeliveryMechanism.ADOBE_DRM,
            link=link,
            rights_uri=RightsStatus.IN_COPYRIGHT,
        )

        circulation_data = CirculationData(
            data_source=DataSource.OVERDRIVE,
            primary_identifier=identifier,
            links=[link],
            formats=[format],
        )

        replace = ReplacementPolicy(formats=True, )

        pool, ignore = circulation_data.license_pool(self._db,
                                                     self._default_collection)
        circulation_data.apply(self._db, pool.collection, replace)
        assert False == pool.open_access
        assert 1 == len(pool.delivery_mechanisms)
        assert (RightsStatus.IN_COPYRIGHT ==
                pool.delivery_mechanisms[0].rights_status.uri)

Example #5

0

Show file

    def test_license_pool_sets_default_license_values(self):
        """We have no information about how many copies of the book we've
        actually licensed, but a LicensePool can be created anyway,
        so we can store format information.
        """
        identifier = IdentifierData(Identifier.OVERDRIVE_ID, "1")
        drm_format = FormatData(
            content_type=Representation.PDF_MEDIA_TYPE,
            drm_scheme=DeliveryMechanism.ADOBE_DRM,
        )
        circulation = CirculationData(
            data_source=DataSource.OVERDRIVE,
            primary_identifier=identifier,
            formats=[drm_format],
        )
        collection = self._default_collection
        pool, is_new = circulation.license_pool(self._db, collection)
        assert True == is_new
        assert collection == pool.collection

        # We start with the conservative assumption that we own no
        # licenses for the book.
        assert 0 == pool.licenses_owned
        assert 0 == pool.licenses_available
        assert 0 == pool.licenses_reserved
        assert 0 == pool.patrons_in_hold_queue

Example #6

0

Show file

    def test_explicit_formatdata(self):
        # Creating an edition with an open-access download will
        # automatically create a delivery mechanism.
        edition, pool = self._edition(with_open_access_download=True)

        # Let's also add a DRM format.
        drm_format = FormatData(
            content_type=Representation.PDF_MEDIA_TYPE,
            drm_scheme=DeliveryMechanism.ADOBE_DRM,
        )

        circulation_data = CirculationData(
            formats=[drm_format],
            data_source=edition.data_source,
            primary_identifier=edition.primary_identifier,
        )
        circulation_data.apply(self._db, pool.collection)

        [epub, pdf] = sorted(pool.delivery_mechanisms,
                             key=lambda x: x.delivery_mechanism.content_type)
        assert epub.resource == pool.best_open_access_resource

        assert Representation.PDF_MEDIA_TYPE == pdf.delivery_mechanism.content_type
        assert DeliveryMechanism.ADOBE_DRM == pdf.delivery_mechanism.drm_scheme

        # If we tell Metadata to replace the list of formats, we only
        # have the one format we manually created.
        replace = ReplacementPolicy(formats=True, )
        circulation_data.apply(self._db, pool.collection, replace=replace)
        [pdf] = pool.delivery_mechanisms
        assert Representation.PDF_MEDIA_TYPE == pdf.delivery_mechanism.content_type

Example #7

0

Show file

File: opds_for_distributors.py Project: ejcepas/circulation

 def _add_format_data(cls, circulation):
     for link in circulation.links:
         if link.rel == Hyperlink.GENERIC_OPDS_ACQUISITION and link.media_type in OPDSForDistributorsAPI.SUPPORTED_MEDIA_TYPES:
             circulation.formats.append(
                 FormatData(
                     content_type=link.media_type,
                     drm_scheme=DeliveryMechanism.BEARER_TOKEN,
                     link=link,
                     rights_uri=RightsStatus.IN_COPYRIGHT,
                 ))

Example #8

0

Show file

    def test_apply_removes_old_formats_based_on_replacement_policy(self):
        edition, pool = self._edition(with_license_pool=True)

        # Start with one delivery mechanism for this pool.
        for lpdm in pool.delivery_mechanisms:
            self._db.delete(lpdm)

        old_lpdm = pool.set_delivery_mechanism(
            Representation.PDF_MEDIA_TYPE,
            DeliveryMechanism.ADOBE_DRM,
            RightsStatus.IN_COPYRIGHT,
            None,
        )

        # And it has been loaned.
        patron = self._patron()
        loan, ignore = pool.loan_to(patron, fulfillment=old_lpdm)
        assert old_lpdm == loan.fulfillment

        # We have new circulation data that has a different format.
        format = FormatData(
            content_type=Representation.EPUB_MEDIA_TYPE,
            drm_scheme=DeliveryMechanism.ADOBE_DRM,
        )
        circulation_data = CirculationData(
            formats=[format],
            data_source=edition.data_source,
            primary_identifier=edition.primary_identifier,
        )

        # If we apply the new CirculationData with formats false in the policy,
        # we'll add the new format, but keep the old one as well.
        replacement_policy = ReplacementPolicy(formats=False)
        circulation_data.apply(self._db, pool.collection, replacement_policy)

        assert 2 == len(pool.delivery_mechanisms)
        assert set(
            [Representation.PDF_MEDIA_TYPE,
             Representation.EPUB_MEDIA_TYPE]) == set([
                 lpdm.delivery_mechanism.content_type
                 for lpdm in pool.delivery_mechanisms
             ])
        assert old_lpdm == loan.fulfillment

        # But if we make formats true in the policy, we'll delete the old format
        # and remove it from its loan.
        replacement_policy = ReplacementPolicy(formats=True)
        circulation_data.apply(self._db, pool.collection, replacement_policy)

        assert 1 == len(pool.delivery_mechanisms)
        assert (Representation.EPUB_MEDIA_TYPE ==
                pool.delivery_mechanisms[0].delivery_mechanism.content_type)
        assert None == loan.fulfillment

Example #9

0

Show file

    def extract_bibliographic(self, element):
        identifiers = []
        contributors = []
        identifiers.append(IdentifierData(Identifier.ISBN, element["isbn"]))
        sort_name = element["author"]
        if not sort_name:
            sort_name = Edition.UNKNOWN_AUTHOR
        contributors.append(ContributorData(sort_name=sort_name))
        primary_identifier = IdentifierData(EnkiAPI.ENKI_ID, element["id"])
        image_url = element["large_image"]
        thumbnail_url = element["large_image"]
        images = [
            LinkData(rel=Hyperlink.THUMBNAIL_IMAGE,
                     href=thumbnail_url,
                     media_type=Representation.PNG_MEDIA_TYPE),
            LinkData(rel=Hyperlink.IMAGE,
                     href=image_url,
                     media_type=Representation.PNG_MEDIA_TYPE)
        ]
        metadata = Metadata(
            data_source=DataSource.ENKI,
            title=element["title"],
            language="eng",
            medium=Edition.BOOK_MEDIUM,
            publisher=element["publisher"],
            primary_identifier=primary_identifier,
            identifiers=identifiers,
            contributors=contributors,
            links=images,
        )
        licenses_owned = element["availability"]["totalCopies"]
        licenses_available = element["availability"]["availableCopies"]
        hold = element["availability"]["onHold"]
        drm_type = EnkiAPI.adobe_drm if (element["availability"]["accessType"]
                                         == 'acs') else EnkiAPI.no_drm
        formats = []
        formats.append(
            FormatData(content_type=Representation.EPUB_MEDIA_TYPE,
                       drm_scheme=drm_type))

        circulationdata = CirculationData(
            data_source=DataSource.ENKI,
            primary_identifier=primary_identifier,
            formats=formats,
            licenses_owned=int(licenses_owned),
            licenses_available=int(licenses_available),
            patrons_in_hold_queue=int(hold))

        metadata.circulation = circulationdata
        return metadata

Example #10

0

Show file

    def test_format_change_may_change_open_access_status(self):

        # In this test, whenever we call CirculationData.apply(), we
        # want to destroy the old list of formats and recreate it.
        replace_formats = ReplacementPolicy(formats=True)

        # Here's a seemingly ordinary non-open-access LicensePool.
        edition, pool = self._edition(with_license_pool=True)
        assert False == pool.open_access

        # One day, we learn that it has an open-access delivery mechanism.
        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url,
            rights_uri=RightsStatus.CC_BY_ND,
        )

        circulation_data = CirculationData(
            data_source=pool.data_source,
            primary_identifier=pool.identifier,
            links=[link],
        )

        # Applying this information turns the pool into an open-access pool.
        circulation_data.apply(self._db,
                               pool.collection,
                               replace=replace_formats)
        assert True == pool.open_access

        # Then we find out it was a mistake -- the book is in copyright.
        format = FormatData(
            Representation.EPUB_MEDIA_TYPE,
            DeliveryMechanism.NO_DRM,
            rights_uri=RightsStatus.IN_COPYRIGHT,
        )
        circulation_data = CirculationData(
            data_source=pool.data_source,
            primary_identifier=pool.identifier,
            formats=[format],
        )
        circulation_data.apply(self._db,
                               pool.collection,
                               replace=replace_formats)

        # The original LPDM has been removed and only the new one remains.
        assert False == pool.open_access
        assert 1 == len(pool.delivery_mechanisms)

Example #11

0

Show file

    def test_circulationdata_may_require_collection(self):
        """Depending on the information provided in a CirculationData
        object, it might or might not be possible to call apply()
        without providing a Collection.
        """

        identifier = IdentifierData(Identifier.OVERDRIVE_ID, "1")
        format = FormatData(
            Representation.EPUB_MEDIA_TYPE,
            DeliveryMechanism.NO_DRM,
            rights_uri=RightsStatus.IN_COPYRIGHT,
        )
        circdata = CirculationData(DataSource.OVERDRIVE,
                                   primary_identifier=identifier,
                                   formats=[format])
        circdata.apply(self._db, collection=None)

        # apply() has created a LicensePoolDeliveryMechanism for this
        # title, even though there are no LicensePools for it.
        identifier_obj, ignore = identifier.load(self._db)
        assert [] == identifier_obj.licensed_through
        [lpdm] = identifier_obj.delivery_mechanisms
        assert DataSource.OVERDRIVE == lpdm.data_source.name
        assert RightsStatus.IN_COPYRIGHT == lpdm.rights_status.uri

        mechanism = lpdm.delivery_mechanism
        assert Representation.EPUB_MEDIA_TYPE == mechanism.content_type
        assert DeliveryMechanism.NO_DRM == mechanism.drm_scheme

        # But if we put some information in the CirculationData
        # that can only be stored in a LicensePool, there's trouble.
        circdata.licenses_owned = 0
        with pytest.raises(ValueError) as excinfo:
            circdata.apply(self._db, collection=None)
        assert (
            "Cannot store circulation information because no Collection was provided."
            in str(excinfo.value))

Example #12

0

Show file

 def set_format(cls, format_received, formats):
     content_type, drm_scheme = cls.format_data_for_odilo_format.get(
         format_received)
     formats.append(FormatData(content_type, drm_scheme))
     return cls.odilo_medium_to_simplified_medium.get(format_received)

Example #13

0

Show file

File: odl2.py Project: ThePalaceProject/circulation

    def _extract_publication_metadata(self, feed, publication, data_source_name):
        """Extract a Metadata object from webpub-manifest-parser's publication.

        :param publication: Feed object
        :type publication: opds2_ast.OPDS2Feed

        :param publication: Publication object
        :type publication: opds2_ast.OPDS2Publication

        :param data_source_name: Data source's name
        :type data_source_name: str

        :return: Publication's metadata
        :rtype: Metadata
        """
        metadata = super(ODL2Importer, self)._extract_publication_metadata(
            feed, publication, data_source_name
        )
        formats = []
        licenses = []
        medium = None

        with self._get_configuration(self._db) as configuration:
            skipped_license_formats = configuration.skipped_license_formats

            if skipped_license_formats:
                skipped_license_formats = set(skipped_license_formats)

        if publication.licenses:
            for odl_license in publication.licenses:
                identifier = odl_license.metadata.identifier
                checkout_link = first_or_default(
                    odl_license.links.get_by_rel(OPDS2LinkRelationsRegistry.BORROW.key)
                )
                if checkout_link:
                    checkout_link = checkout_link.href

                license_info_document_link = first_or_default(
                    odl_license.links.get_by_rel(OPDS2LinkRelationsRegistry.SELF.key)
                )
                if license_info_document_link:
                    license_info_document_link = license_info_document_link.href

                expires = (
                    to_utc(odl_license.metadata.terms.expires)
                    if odl_license.metadata.terms
                    else None
                )
                concurrency = (
                    int(odl_license.metadata.terms.concurrency)
                    if odl_license.metadata.terms
                    else None
                )

                if not license_info_document_link:
                    parsed_license = None
                else:
                    parsed_license = ODLImporter.get_license_data(
                        license_info_document_link,
                        checkout_link,
                        identifier,
                        expires,
                        concurrency,
                        self.http_get,
                    )

                if parsed_license is not None:
                    licenses.append(parsed_license)

                # DPLA feed doesn't have information about a DRM protection used for audiobooks.
                # We want to try to extract that information from the License Info Document it's present there.
                license_formats = set(odl_license.metadata.formats)
                if parsed_license and parsed_license.content_types:
                    license_formats |= set(parsed_license.content_types)

                for license_format in license_formats:
                    if (
                        skipped_license_formats
                        and license_format in skipped_license_formats
                    ):
                        continue

                    if not medium:
                        medium = Edition.medium_from_media_type(license_format)

                    if license_format in ODLImporter.LICENSE_FORMATS:
                        # Special case to handle DeMarque audiobooks which
                        # include the protection in the content type
                        drm_schemes = [
                            ODLImporter.LICENSE_FORMATS[license_format][
                                ODLImporter.DRM_SCHEME
                            ]
                        ]
                        license_format = ODLImporter.LICENSE_FORMATS[license_format][
                            ODLImporter.CONTENT_TYPE
                        ]
                    else:
                        drm_schemes = (
                            odl_license.metadata.protection.formats
                            if odl_license.metadata.protection
                            else []
                        )

                    for drm_scheme in drm_schemes or [None]:
                        formats.append(
                            FormatData(
                                content_type=license_format,
                                drm_scheme=drm_scheme,
                                rights_uri=RightsStatus.IN_COPYRIGHT,
                            )
                        )

        metadata.circulation.licenses = licenses
        metadata.circulation.licenses_owned = None
        metadata.circulation.licenses_available = None
        metadata.circulation.licenses_reserved = None
        metadata.circulation.patrons_in_hold_queue = None
        metadata.circulation.formats.extend(formats)
        metadata.medium = medium

        return metadata

Example #14

0

Show file

    def update_licensepool_for_identifier(self,
                                          isbn,
                                          availability,
                                          medium,
                                          policy=None):
        """Update availability information for a single book.

        If the book has never been seen before, a new LicensePool
        will be created for the book.

        The book's LicensePool will be updated with current approximate 
        circulation information (we can tell if it's available, but 
        not how many copies). 
        Bibliographic coverage will be ensured for the OneClick Identifier. 
        Work will be created for the LicensePool and set as presentation-ready.

        :param isbn the identifier OneClick uses
        :param availability boolean denoting if book can be lent to patrons 
        :param medium: The name OneClick uses for the book's medium.
        """

        # find a license pool to match the isbn, and see if it'll need a metadata update later
        license_pool, is_new_pool = LicensePool.for_foreign_id(
            self._db,
            DataSource.RB_DIGITAL,
            Identifier.RB_DIGITAL_ID,
            isbn,
            collection=self.collection)
        if is_new_pool:
            # This is the first time we've seen this book. Make sure its
            # identifier has bibliographic coverage.
            self.bibliographic_coverage_provider.ensure_coverage(
                license_pool.identifier)

        # now tell the licensepool if it's lendable

        # We don't know exactly how many licenses are available, but
        # we know that it's either zero (book is not lendable) or greater
        # than zero (book is lendable)
        licenses_available = 1
        if not availability:
            licenses_available = 0

        # Because the book showed up in availability, we know we own
        # at least one license to it.
        licenses_owned = 1

        if (not is_new_pool and license_pool.licenses_owned == licenses_owned
                and license_pool.licenses_available == licenses_available):
            # Optimization: Nothing has changed, so don't even bother
            # calling CirculationData.apply()
            return license_pool, is_new_pool, False

        # If possible, create a FormatData object representing
        # how the book is available.
        formats = []

        # Note that these strings are different from the similar strings
        # found in "fileFormat" when looking at a patron's loans.
        # "ebook" (a medium) versus "EPUB" (a format). Unfortunately we
        # don't get the file format when checking the book's
        # availability before a patron has checked it out.
        delivery_type = None
        drm_scheme = None
        medium = medium.lower()
        if medium == 'ebook':
            delivery_type = Representation.EPUB_MEDIA_TYPE
            # OneClick doesn't tell us the DRM scheme at this
            # point, but some of their EPUBs do have Adobe DRM.
            # Also, their DRM usage may change in the future.
            drm_scheme = DeliveryMechanism.ADOBE_DRM
        elif medium == 'eaudio':
            # TODO: we can't deliver on this promise yet, but this is
            # how we will be delivering audiobook manifests.
            delivery_type = Representation.AUDIOBOOK_MANIFEST_MEDIA_TYPE

        if delivery_type:
            formats.append(FormatData(delivery_type, drm_scheme))

        circulation_data = CirculationData(
            data_source=DataSource.RB_DIGITAL,
            primary_identifier=license_pool.identifier,
            licenses_owned=licenses_owned,
            licenses_available=licenses_available,
            formats=formats,
        )

        policy = policy or self.default_circulation_replacement_policy
        license_pool, circulation_changed = circulation_data.apply(
            self._db,
            self.collection,
            replace=policy,
        )

        return license_pool, is_new_pool, circulation_changed