Python ContributorData Examples, metadata_layer.ContributorData Python Examples

Example #1

0

Show file

    def test_circulationdata_can_be_deepcopied(self):
        # Check that we didn't put something in the CirculationData that
        # will prevent it from being copied. (e.g., self.log)

        subject = SubjectData(Subject.TAG, "subject")
        contributor = ContributorData()
        identifier = IdentifierData(Identifier.GUTENBERG_ID, "1")
        link = LinkData(Hyperlink.OPEN_ACCESS_DOWNLOAD, "example.epub")
        format = FormatData(Representation.EPUB_MEDIA_TYPE, DeliveryMechanism.NO_DRM)
        rights_uri = RightsStatus.GENERIC_OPEN_ACCESS

        circulation_data = CirculationData(
            DataSource.GUTENBERG,
            primary_identifier=identifier,
            links=[link],
            licenses_owned=5,
            licenses_available=5,
            licenses_reserved=None,
            patrons_in_hold_queue=None,
            formats=[format],
            default_rights_uri=rights_uri,
        )

        circulation_data_copy = deepcopy(circulation_data)

        # If deepcopy didn't throw an exception we're ok.
        assert circulation_data_copy is not None

Example #2

0

Show file

File: test_metadata.py Project: datalogics/server_core

    def test_update_contributions(self):
        edition = self._edition()

        # A test edition is created with a test contributor. This
        # particular contributor is about to be destroyed and replaced by
        # new data.
        [old_contributor] = edition.contributors

        contributor = ContributorData(display_name="Robert Jordan",
                                      sort_name="Jordan, Robert",
                                      wikipedia_name="Robert_Jordan",
                                      viaf="79096089",
                                      lc="123",
                                      roles=[Contributor.PRIMARY_AUTHOR_ROLE])

        metadata = Metadata(DataSource.OVERDRIVE, contributors=[contributor])
        metadata.update_contributions(self._db, edition, replace=True)

        # The old contributor has been removed and replaced with the new
        # one.
        [contributor] = edition.contributors
        assert contributor != old_contributor

        # And the new one has all the information provided by
        # the Metadata object.
        eq_("Jordan, Robert", contributor.sort_name)
        eq_("Robert Jordan", contributor.display_name)
        eq_("79096089", contributor.viaf)
        eq_("123", contributor.lc)
        eq_("Robert_Jordan", contributor.wikipedia_name)

Example #3

0

Show file

File: test_metadata.py Project: datalogics/server_core

    def test_from_contribution(self):
        # Makes sure ContributorData.from_contribution copies all the fields over.

        # make author with that name, add author to list and pass to edition
        contributors = ["PrimaryAuthor"]
        edition, pool = self._edition(with_license_pool=True,
                                      authors=contributors)

        contribution = edition.contributions[0]
        contributor = contribution.contributor
        contributor.lc = "1234567"
        contributor.viaf = "ABC123"
        contributor.aliases = ["Primo"]
        contributor.display_name = "Test Author For The Win"
        contributor.family_name = "TestAuttie"
        contributor.wikipedia_name = "TestWikiAuth"
        contributor.biography = "He was born on Main Street."

        contributor_data = ContributorData.from_contribution(contribution)

        # make sure contributor fields are still what I expect
        eq_(contributor_data.lc, contributor.lc)
        eq_(contributor_data.viaf, contributor.viaf)
        eq_(contributor_data.aliases, contributor.aliases)
        eq_(contributor_data.display_name, contributor.display_name)
        eq_(contributor_data.family_name, contributor.family_name)
        eq_(contributor_data.wikipedia_name, contributor.wikipedia_name)
        eq_(contributor_data.biography, contributor.biography)

Example #4

0

Show file

    def extract_contributor(cls, parser, author_tag):
        """Turn an <atom:author> tag into a ContributorData object."""
        subtag = parser.text_of_optional_subtag
        sort_name = subtag(author_tag, 'simplified:sort_name')
        display_name = subtag(author_tag, 'atom:name')
        family_name = subtag(author_tag, "simplified:family_name")
        wikipedia_name = subtag(author_tag, "simplified:wikipedia_name")
        # TODO: we need a way of conveying roles. I believe Bibframe
        # has the answer.

        # TODO: Also collect VIAF and LC numbers if present.  This
        # requires parsing the URIs. Only the metadata wrangler will
        # provide this information.

        viaf = None
        if sort_name or display_name or viaf:
            return ContributorData(
                sort_name=sort_name, display_name=display_name,
                family_name=family_name,
                wikipedia_name=wikipedia_name,
                roles=None
            )

        logging.info("Refusing to create ContributorData for contributor with no sort name, display name, or VIAF.")
        return None

Example #5

0

Show file

    def contributors_from_string(cls, string, role=Contributor.AUTHOR_ROLE):
        contributors = []
        if not string:
            return contributors

        for sort_name in string.split(';'):
            sort_name = cls.parenthetical.sub("", sort_name.strip())
            contributors.append(
                ContributorData(sort_name=sort_name.strip(), roles=[role]))
        return contributors

Example #6

0

Show file

File: test_metadata.py Project: datalogics/server_core

    def test_apply(self):
        # Makes sure ContributorData.apply copies all the fields over when there's changes to be made.

        contributor_old, made_new = self._contributor(sort_name="Doe, John",
                                                      viaf="viaf12345")

        kwargs = dict()
        kwargs[Contributor.BIRTH_DATE] = '2001-01-01'

        contributor_data = ContributorData(
            sort_name="Doerr, John",
            lc="1234567",
            viaf="ABC123",
            aliases=["Primo"],
            display_name="Test Author For The Win",
            family_name="TestAuttie",
            wikipedia_name="TestWikiAuth",
            biography="He was born on Main Street.",
            extra=kwargs,
        )

        contributor_new, changed = contributor_data.apply(contributor_old)

        eq_(changed, True)
        eq_(contributor_new.sort_name, u"Doerr, John")
        eq_(contributor_new.lc, u"1234567")
        eq_(contributor_new.viaf, u"ABC123")
        eq_(contributor_new.aliases, [u"Primo"])
        eq_(contributor_new.display_name, u"Test Author For The Win")
        eq_(contributor_new.family_name, u"TestAuttie")
        eq_(contributor_new.wikipedia_name, u"TestWikiAuth")
        eq_(contributor_new.biography, u"He was born on Main Street.")

        eq_(contributor_new.extra[Contributor.BIRTH_DATE], u"2001-01-01")
        #eq_(contributor_new.contributions, u"Audio")
        #eq_(contributor_new.work_contributions, u"Audio")

        contributor_new, changed = contributor_data.apply(contributor_new)
        eq_(changed, False)

Example #7

0

Show file

 def parse_contributor(cls, author, primary_author_found=False):
     if primary_author_found:
         default_author_role = Contributor.AUTHOR_ROLE
     else:
         default_author_role = Contributor.PRIMARY_AUTHOR_ROLE
     role = default_author_role
     match = cls.role_abbreviation.search(author)
     if match:
         role_type = match.groups()[0]
         role = cls.role_abbreviation_to_role.get(role_type,
                                                  Contributor.UNKNOWN_ROLE)
         if role is cls.generic_author:
             role = default_author_role
         author = author[:-5].strip()
     return ContributorData(sort_name=author, roles=role)

Example #8

0

Show file

File: test_metadata.py Project: datalogics/server_core

    def test_metadata_can_be_deepcopied(self):
        # Check that we didn't put something in the metadata that
        # will prevent it from being copied. (e.g., self.log)

        subject = SubjectData(Subject.TAG, "subject")
        contributor = ContributorData()
        identifier = IdentifierData(Identifier.GUTENBERG_ID, "1")
        link = LinkData(Hyperlink.OPEN_ACCESS_DOWNLOAD, "example.epub")
        measurement = MeasurementData(Measurement.RATING, 5)
        circulation = CirculationData(data_source=DataSource.GUTENBERG,
                                      primary_identifier=identifier,
                                      licenses_owned=0,
                                      licenses_available=0,
                                      licenses_reserved=0,
                                      patrons_in_hold_queue=0)
        primary_as_data = IdentifierData(type=identifier.type,
                                         identifier=identifier.identifier)
        other_data = IdentifierData(type=u"abc", identifier=u"def")

        m = Metadata(
            DataSource.GUTENBERG,
            subjects=[subject],
            contributors=[contributor],
            primary_identifier=identifier,
            links=[link],
            measurements=[measurement],
            circulation=circulation,
            title="Hello Title",
            subtitle="Subtle Hello",
            sort_title="Sorting Howdy",
            language="US English",
            medium=Edition.BOOK_MEDIUM,
            series="1",
            series_position=1,
            publisher="Hello World Publishing House",
            imprint=u"Follywood",
            issued=datetime.datetime.utcnow(),
            published=datetime.datetime.utcnow(),
            identifiers=[primary_as_data, other_data],
            data_source_last_updated=datetime.datetime.utcnow(),
        )

        m_copy = deepcopy(m)

        # If deepcopy didn't throw an exception we're ok.
        assert m_copy is not None

Example #9

0

Show file

    def parse_contributor(cls,
                          author,
                          primary_author_found=False,
                          force_role=None):
        """Parse an Axis 360 contributor string.

        The contributor string looks like "Butler, Octavia" or "Walt
        Disney Pictures (COR)" or "Rex, Adam (ILT)". The optional
        three-letter code describes the contributor's role in the
        book.

        :param author: The string to parse.

        :param primary_author_found: If this is false, then a
            contributor with no three-letter code will be treated as
            the primary author. If this is true, then a contributor
            with no three-letter code will be treated as just a
            regular author.

        :param force_role: If this is set, the contributor will be
            assigned this role, no matter what. This takes precedence
            over the value implied by primary_author_found.
        """
        if primary_author_found:
            default_author_role = Contributor.AUTHOR_ROLE
        else:
            default_author_role = Contributor.PRIMARY_AUTHOR_ROLE
        role = default_author_role
        match = cls.role_abbreviation.search(author)
        if match:
            role_type = match.groups()[0]
            role = cls.role_abbreviation_to_role.get(role_type,
                                                     Contributor.UNKNOWN_ROLE)
            if role is cls.generic_author:
                role = default_author_role
            author = author[:-5].strip()
        if force_role:
            role = force_role
        return ContributorData(sort_name=author, roles=[role])

Example #10

0

Show file

    def extract_bibliographic(self, element, ns):
        identifiers = []
        contributors = []
        identifiers.append(IdentifierData(Identifier.ISBN, element["isbn"]))
        sort_name = element["author"]
        if not sort_name:
            sort_name = "Unknown"
        contributors.append(ContributorData(sort_name=sort_name))
        primary_identifier = IdentifierData(Identifier.ENKI_ID, element["id"])
        metadata = Metadata(
            data_source=DataSource.ENKI,
            title=element["title"],
            language="ENGLISH",
            medium=Edition.BOOK_MEDIUM,
            #series=series,
            publisher=element["publisher"],
            #imprint=imprint,
            #published=publication_date,
            primary_identifier=primary_identifier,
            identifiers=identifiers,
            #subjects=subjects,
            contributors=contributors,
        )
        #TODO: This should parse the content type and look it up in the Enki Delivery Data above. Currently,
        # we assume everything is an ePub that uses Adobe DRM, which is a safe assumption only for now.
        formats = []
        formats.append(
            FormatData(content_type=Representation.EPUB_MEDIA_TYPE,
                       drm_scheme=DeliveryMechanism.ADOBE_DRM))

        circulationdata = CirculationData(
            data_source=DataSource.ENKI,
            primary_identifier=primary_identifier,
            formats=formats,
        )

        metadata.circulation = circulationdata
        return metadata

Example #11

0

Show file

File: overdrive.py Project: mdellabitta/server_core

    def book_info_to_metadata(cls,
                              book,
                              include_bibliographic=True,
                              include_formats=True):
        """Turn Overdrive's JSON representation of a book into a Metadata
        object.

        Note:  The json data passed into this method is from a different file/stream
        from the json data that goes into the book_info_to_circulation() method.
        """
        if not 'id' in book:
            return None
        overdrive_id = book['id']
        primary_identifier = IdentifierData(Identifier.OVERDRIVE_ID,
                                            overdrive_id)

        if include_bibliographic:
            title = book.get('title', None)
            sort_title = book.get('sortTitle')
            subtitle = book.get('subtitle', None)
            series = book.get('series', None)
            publisher = book.get('publisher', None)
            imprint = book.get('imprint', None)

            if 'publishDate' in book:
                published = datetime.datetime.strptime(
                    book['publishDate'][:10], cls.DATE_FORMAT)
            else:
                published = None

            languages = [l['code'] for l in book.get('languages', [])]
            if 'eng' in languages or not languages:
                language = 'eng'
            else:
                language = sorted(languages)[0]

            contributors = []
            for creator in book.get('creators', []):
                sort_name = creator['fileAs']
                display_name = creator['name']
                role = creator['role']
                roles = cls.parse_roles(overdrive_id,
                                        role) or [Contributor.UNKNOWN_ROLE]
                contributor = ContributorData(sort_name=sort_name,
                                              display_name=display_name,
                                              roles=roles,
                                              biography=creator.get(
                                                  'bioText', None))
                contributors.append(contributor)

            subjects = []
            for sub in book.get('subjects', []):
                subject = SubjectData(type=Subject.OVERDRIVE,
                                      identifier=sub['value'],
                                      weight=100)
                subjects.append(subject)

            for sub in book.get('keywords', []):
                subject = SubjectData(type=Subject.TAG,
                                      identifier=sub['value'],
                                      weight=1)
                subjects.append(subject)

            extra = dict()
            if 'grade_levels' in book:
                # n.b. Grade levels are measurements of reading level, not
                # age appropriateness. We can use them as a measure of age
                # appropriateness in a pinch, but we weight them less
                # heavily than other information from Overdrive.
                for i in book['grade_levels']:
                    subject = SubjectData(type=Subject.GRADE_LEVEL,
                                          identifier=i['value'],
                                          weight=10)
                    subjects.append(subject)

            overdrive_medium = book.get('mediaType', None)
            if overdrive_medium and overdrive_medium not in cls.overdrive_medium_to_simplified_medium:
                cls.log.error("Could not process medium %s for %s",
                              overdrive_medium, overdrive_id)

            medium = cls.overdrive_medium_to_simplified_medium.get(
                overdrive_medium, Edition.BOOK_MEDIUM)

            measurements = []
            if 'awards' in book:
                extra['awards'] = book.get('awards', [])
                num_awards = len(extra['awards'])
                measurements.append(
                    MeasurementData(Measurement.AWARDS, str(num_awards)))

            for name, subject_type in (('ATOS', Subject.ATOS_SCORE),
                                       ('lexileScore', Subject.LEXILE_SCORE),
                                       ('interestLevel',
                                        Subject.INTEREST_LEVEL)):
                if not name in book:
                    continue
                identifier = str(book[name])
                subjects.append(
                    SubjectData(type=subject_type,
                                identifier=identifier,
                                weight=100))

            for grade_level_info in book.get('gradeLevels', []):
                grade_level = grade_level_info.get('value')
                subjects.append(
                    SubjectData(type=Subject.GRADE_LEVEL,
                                identifier=grade_level,
                                weight=100))

            identifiers = []
            links = []
            for format in book.get('formats', []):
                for new_id in format.get('identifiers', []):
                    t = new_id['type']
                    v = new_id['value']
                    orig_v = v
                    type_key = None
                    if t == 'ASIN':
                        type_key = Identifier.ASIN
                    elif t == 'ISBN':
                        type_key = Identifier.ISBN
                        if len(v) == 10:
                            v = isbnlib.to_isbn13(v)
                        if v is None or not isbnlib.is_isbn13(v):
                            # Overdrive sometimes uses invalid values
                            # like "n/a" as placeholders. Ignore such
                            # values to avoid a situation where hundreds of
                            # books appear to have the same ISBN. ISBNs
                            # which fail check digit checks or are invalid
                            # also can occur. Log them for review.
                            cls.log.info("Bad ISBN value provided: %s", orig_v)
                            continue
                    elif t == 'DOI':
                        type_key = Identifier.DOI
                    elif t == 'UPC':
                        type_key = Identifier.UPC
                    elif t == 'PublisherCatalogNumber':
                        continue
                    if type_key and v:
                        identifiers.append(IdentifierData(type_key, v, 1))

                # Samples become links.
                if 'samples' in format:

                    if not format['id'] in cls.format_data_for_overdrive_format:
                        # Useless to us.
                        continue
                    content_type, drm_scheme = cls.format_data_for_overdrive_format.get(
                        format['id'])
                    if Representation.is_media_type(content_type):
                        for sample_info in format['samples']:
                            href = sample_info['url']
                            links.append(
                                LinkData(rel=Hyperlink.SAMPLE,
                                         href=href,
                                         media_type=content_type))

            # A cover and its thumbnail become a single LinkData.
            if 'images' in book:
                images = book['images']
                image_data = cls.image_link_to_linkdata(
                    images.get('cover'), Hyperlink.IMAGE)
                for name in ['cover300Wide', 'cover150Wide', 'thumbnail']:
                    # Try to get a thumbnail that's as close as possible
                    # to the size we use.
                    image = images.get(name)
                    thumbnail_data = cls.image_link_to_linkdata(
                        image, Hyperlink.THUMBNAIL_IMAGE)
                    if not image_data:
                        image_data = cls.image_link_to_linkdata(
                            image, Hyperlink.IMAGE)
                    if thumbnail_data:
                        break

                if image_data:
                    if thumbnail_data:
                        image_data.thumbnail = thumbnail_data
                    links.append(image_data)

            # Descriptions become links.
            short = book.get('shortDescription')
            full = book.get('fullDescription')
            if full:
                links.append(
                    LinkData(
                        rel=Hyperlink.DESCRIPTION,
                        content=full,
                        media_type="text/html",
                    ))

            if short and (not full or not full.startswith(short)):
                links.append(
                    LinkData(
                        rel=Hyperlink.SHORT_DESCRIPTION,
                        content=short,
                        media_type="text/html",
                    ))

            # Add measurements: rating and popularity
            if book.get('starRating') is not None and book['starRating'] > 0:
                measurements.append(
                    MeasurementData(quantity_measured=Measurement.RATING,
                                    value=book['starRating']))

            if book.get('popularity'):
                measurements.append(
                    MeasurementData(quantity_measured=Measurement.POPULARITY,
                                    value=book['popularity']))

            metadata = Metadata(
                data_source=DataSource.OVERDRIVE,
                title=title,
                subtitle=subtitle,
                sort_title=sort_title,
                language=language,
                medium=medium,
                series=series,
                publisher=publisher,
                imprint=imprint,
                published=published,
                primary_identifier=primary_identifier,
                identifiers=identifiers,
                subjects=subjects,
                contributors=contributors,
                measurements=measurements,
                links=links,
            )
        else:
            metadata = Metadata(
                data_source=DataSource.OVERDRIVE,
                primary_identifier=primary_identifier,
            )

        if include_formats:
            formats = []
            for format in book.get('formats', []):
                format_id = format['id']
                if format_id in cls.format_data_for_overdrive_format:
                    content_type, drm_scheme = cls.format_data_for_overdrive_format.get(
                        format_id)
                    formats.append(FormatData(content_type, drm_scheme))
                elif format_id not in cls.ignorable_overdrive_formats:
                    cls.log.error(
                        "Could not process Overdrive format %s for %s",
                        format_id, overdrive_id)

            # Also make a CirculationData so we can write the formats,
            circulationdata = CirculationData(
                data_source=DataSource.OVERDRIVE,
                primary_identifier=primary_identifier,
                formats=formats,
            )

            metadata.circulation = circulationdata

        return metadata

Example #12

0

Show file

File: test_coverage.py Project: datalogics-dans/server_core-1

class TestBibliographicCoverageProvider(DatabaseTest):

    BIBLIOGRAPHIC_DATA = Metadata(
        DataSource.OVERDRIVE,
        publisher=u'Perfection Learning',
        language='eng',
        title=u'A Girl Named Disaster',
        published=datetime.datetime(1998, 3, 1, 0, 0),
        primary_identifier=IdentifierData(
            type=Identifier.OVERDRIVE_ID,
            identifier=u'ba9b3419-b0bd-4ca7-a24f-26c4246b6b44'),
        identifiers=[
            IdentifierData(type=Identifier.OVERDRIVE_ID,
                           identifier=u'ba9b3419-b0bd-4ca7-a24f-26c4246b6b44'),
            IdentifierData(type=Identifier.ISBN, identifier=u'9781402550805')
        ],
        contributors=[
            ContributorData(sort_name=u"Nancy Farmer",
                            roles=[Contributor.PRIMARY_AUTHOR_ROLE])
        ],
        subjects=[
            SubjectData(type=Subject.TOPIC, identifier=u'Action & Adventure'),
            SubjectData(type=Subject.FREEFORM_AUDIENCE,
                        identifier=u'Young Adult'),
            SubjectData(type=Subject.PLACE, identifier=u'Africa')
        ],
    )

    CIRCULATION_DATA = CirculationData(
        DataSource.OVERDRIVE,
        primary_identifier=BIBLIOGRAPHIC_DATA.primary_identifier,
    )

    def test_edition(self):
        provider = MockBibliographicCoverageProvider(self._db)
        provider.CAN_CREATE_LICENSE_POOLS = False
        identifier = self._identifier(identifier_type=Identifier.OVERDRIVE_ID)
        test_metadata = self.BIBLIOGRAPHIC_DATA

        # Returns a CoverageFailure if the identifier doesn't have a
        # license pool and none can be created.
        result = provider.work(identifier)
        assert isinstance(result, CoverageFailure)
        eq_("No license pool available", result.exception)

        # Returns an Edition otherwise, creating it if necessary.
        edition, lp = self._edition(with_license_pool=True)
        identifier = edition.primary_identifier
        eq_(edition, provider.edition(identifier))

        # The Edition will be created if necessary.
        lp.identifier.primarily_identifies = []
        e2 = provider.edition(identifier)
        assert edition != e2
        assert isinstance(e2, Edition)

    def test_work(self):
        provider = MockBibliographicCoverageProvider(self._db)
        identifier = self._identifier(identifier_type=Identifier.OVERDRIVE_ID)
        test_metadata = self.BIBLIOGRAPHIC_DATA
        provider.CAN_CREATE_LICENSE_POOLS = False

        # Returns a CoverageFailure if the identifier doesn't have a
        # license pool.
        result = provider.work(identifier)
        assert isinstance(result, CoverageFailure)
        eq_("No license pool available", result.exception)

        # Returns a CoverageFailure if there's no work available.
        edition, lp = self._edition(with_license_pool=True)
        # Remove edition so that the work won't be calculated
        lp.identifier.primarily_identifies = []
        result = provider.work(lp.identifier)
        assert isinstance(result, CoverageFailure)
        eq_("Work could not be calculated", result.exception)

        # Returns the work if it can be created or found.
        ed, lp = self._edition(with_license_pool=True)
        result = provider.work(lp.identifier)
        eq_(result, lp.work)

    def test_set_metadata(self):
        provider = MockBibliographicCoverageProvider(self._db)
        provider.CAN_CREATE_LICENSE_POOLS = False
        identifier = self._identifier(identifier_type=Identifier.OVERDRIVE_ID)
        test_metadata = self.BIBLIOGRAPHIC_DATA
        test_circulationdata = self.CIRCULATION_DATA

        # If there is no LicensePool and it can't be autocreated, a
        # CoverageRecord results.
        result = provider.work(identifier)
        assert isinstance(result, CoverageFailure)
        eq_("No license pool available", result.exception)

        edition, lp = self._edition(data_source_name=DataSource.OVERDRIVE,
                                    identifier_type=Identifier.OVERDRIVE_ID,
                                    identifier_id=self.BIBLIOGRAPHIC_DATA.
                                    primary_identifier.identifier,
                                    with_license_pool=True)

        # If no metadata is passed in, a CoverageFailure results.
        result = provider.set_metadata_and_circulation_data(
            edition.primary_identifier, None, None)

        assert isinstance(result, CoverageFailure)
        eq_("Received neither metadata nor circulation data from input source",
            result.exception)

        # If no work can be created (in this case, because there's no title),
        # a CoverageFailure results.
        edition.title = None
        old_title = test_metadata.title
        test_metadata.title = None
        result = provider.set_metadata_and_circulation_data(
            edition.primary_identifier, test_metadata, test_circulationdata)
        assert isinstance(result, CoverageFailure)
        eq_("Work could not be calculated", result.exception)
        test_metadata.title = old_title

        # Test success
        result = provider.set_metadata_and_circulation_data(
            edition.primary_identifier, test_metadata, test_circulationdata)
        eq_(result, edition.primary_identifier)

        # If there's an exception setting the metadata, a
        # CoverageRecord results. This call raises a ValueError
        # because the primary identifier & the edition's primary
        # identifier don't match.
        test_metadata.primary_identifier = self._identifier(
            identifier_type=Identifier.OVERDRIVE_ID)
        result = provider.set_metadata_and_circulation_data(
            lp.identifier, test_metadata, test_circulationdata)
        assert isinstance(result, CoverageFailure)
        assert "ValueError" in result.exception

    def test_autocreate_licensepool(self):
        provider = MockBibliographicCoverageProvider(self._db)
        identifier = self._identifier(identifier_type=Identifier.OVERDRIVE_ID)

        # If this constant is set to False, the coverage provider cannot
        # autocreate LicensePools for identifiers.
        provider.CAN_CREATE_LICENSE_POOLS = False
        eq_(None, provider.license_pool(identifier))

        # If it's set to True, the coverage provider can autocreate
        # LicensePools for identifiers.
        provider.CAN_CREATE_LICENSE_POOLS = True
        pool = provider.license_pool(identifier)
        eq_(pool.data_source, provider.output_source)
        eq_(pool.identifier, identifier)

    def test_set_presentation_ready(self):
        provider = MockBibliographicCoverageProvider(self._db)
        identifier = self._identifier(identifier_type=Identifier.OVERDRIVE_ID)
        test_metadata = self.BIBLIOGRAPHIC_DATA

        # If the work can't be found, it can't be made presentation ready.
        provider.CAN_CREATE_LICENSE_POOLS = False
        result = provider.set_presentation_ready(identifier)
        assert isinstance(result, CoverageFailure)
        eq_("No license pool available", result.exception)

        # Test success.
        ed, lp = self._edition(with_license_pool=True)
        result = provider.set_presentation_ready(ed.primary_identifier)
        eq_(result, ed.primary_identifier)

    def test_process_batch_sets_work_presentation_ready(self):

        work = self._work(with_license_pool=True,
                          with_open_access_download=True)
        identifier = work.license_pools[0].identifier
        work.presentation_ready = False
        provider = MockBibliographicCoverageProvider(self._db)
        [result] = provider.process_batch([identifier])
        eq_(result, identifier)
        eq_(True, work.presentation_ready)

        # ensure_coverage does the same thing.
        work.presentation_ready = False
        result = provider.ensure_coverage(identifier)
        assert isinstance(result, CoverageRecord)
        eq_(result.identifier, identifier)
        eq_(True, work.presentation_ready)

    def test_failure_does_not_set_work_presentation_ready(self):
        work = self._work(with_license_pool=True,
                          with_open_access_download=True)
        identifier = work.license_pools[0].identifier
        work.presentation_ready = False
        provider = MockFailureBibliographicCoverageProvider(self._db)
        [result] = provider.process_batch([identifier])
        assert isinstance(result, CoverageFailure)
        eq_(False, work.presentation_ready)

Example #13

0

Show file

    def record_info_to_metadata(cls, book, availability):
        """Turn Odilo's JSON representation of a book into a Metadata
        object.

        Note:  The json data passed into this method is from a different file/stream
        from the json data that goes into the book_info_to_circulation() method.
        """
        if 'id' not in book:
            return None

        odilo_id = book['id']
        primary_identifier = IdentifierData(Identifier.ODILO_ID, odilo_id)
        active = book.get('active')

        title = book.get('title')
        subtitle = book.get('subtitle')
        series = book.get('series')
        series_position = book.get('seriesPosition')

        contributors = []
        sort_author = book.get('author')
        if sort_author:
            roles = [Contributor.AUTHOR_ROLE]
            display_author = sort_name_to_display_name(sort_author)
            contributor = ContributorData(sort_name=sort_author,
                                          display_name=display_author,
                                          roles=roles,
                                          biography=None)
            contributors.append(contributor)

        publisher = book.get('publisher')

        # Metadata --> Marc21 260$c
        published = book.get('publicationDate')
        if not published:
            # yyyyMMdd --> record creation date
            published = book.get('releaseDate')

        if published:
            try:
                published = datetime.datetime.strptime(published, "%Y%m%d")
            except ValueError as e:
                cls.log.warn('Cannot parse publication date from: ' +
                             published + ', message: ' + e.message)

        # yyyyMMdd --> record last modification date
        last_update = book.get('modificationDate')
        if last_update:
            try:
                last_update = datetime.datetime.strptime(last_update, "%Y%m%d")
            except ValueError as e:
                cls.log.warn('Cannot parse last update date from: ' +
                             last_update + ', message: ' + e.message)

        language = book.get('language', 'spa')

        subjects = []
        for subject in book.get('subjects', []):
            subjects.append(
                SubjectData(type=Subject.TAG, identifier=subject, weight=100))

        for subjectBisacCode in book.get('subjectsBisacCodes', []):
            subjects.append(
                SubjectData(type=Subject.BISAC,
                            identifier=subjectBisacCode,
                            weight=100))

        grade_level = book.get('gradeLevel')
        if grade_level:
            subject = SubjectData(type=Subject.GRADE_LEVEL,
                                  identifier=grade_level,
                                  weight=10)
            subjects.append(subject)

        medium = None
        file_format = book.get('fileFormat')
        formats = []
        for format_received in book.get('formats', []):
            if format_received in cls.format_data_for_odilo_format:
                medium = cls.set_format(format_received, formats)
            elif format_received == OdiloAPI.ACSM and file_format:
                medium = cls.set_format(
                    format_received + '_' + file_format.upper(), formats)
            else:
                cls.log.warn('Unrecognized format received: ' +
                             format_received)

        if not medium:
            medium = Edition.BOOK_MEDIUM

        identifiers = []
        isbn = book.get('isbn')
        if isbn:
            if isbnlib.is_isbn10(isbn):
                isbn = isbnlib.to_isbn13(isbn)
            identifiers.append(IdentifierData(Identifier.ISBN, isbn, 1))

        # A cover
        links = []
        cover_image_url = book.get('coverImageUrl')
        if cover_image_url:
            image_data = cls.image_link_to_linkdata(cover_image_url,
                                                    Hyperlink.THUMBNAIL_IMAGE)
            if image_data:
                links.append(image_data)

        original_image_url = book.get('originalImageUrl')
        if original_image_url:
            image_data = cls.image_link_to_linkdata(original_image_url,
                                                    Hyperlink.IMAGE)
            if image_data:
                links.append(image_data)

        # Descriptions become links.
        description = book.get('description')
        if description:
            links.append(
                LinkData(rel=Hyperlink.DESCRIPTION,
                         content=description,
                         media_type="text/html"))

        metadata = Metadata(data_source=DataSource.ODILO,
                            title=title,
                            subtitle=subtitle,
                            language=language,
                            medium=medium,
                            series=series,
                            series_position=series_position,
                            publisher=publisher,
                            published=published,
                            primary_identifier=primary_identifier,
                            identifiers=identifiers,
                            subjects=subjects,
                            contributors=contributors,
                            links=links,
                            data_source_last_updated=last_update)

        metadata.circulation = OdiloRepresentationExtractor.record_info_to_circulation(
            availability)
        # 'active' --> means that the book exists but it's no longer in the collection
        # (it could be available again in the future)
        if not active:
            metadata.circulation.licenses_owned = 0
        metadata.circulation.formats = formats

        return metadata, active

Example #14

0

Show file

            # other books in the same series, as well as ISBNs that
            # are just wrong. Assign these equivalencies at a low
            # level of confidence.
            for isbn in d.get('isbns', []):
                isbn13 = isbn.get('isbn13', None)
                if isbn13:
                    other_isbns.append(
                        IdentifierData(Identifier.ISBN, isbn13, 0.50))

        primary_isbn = primary_isbn13 or primary_isbn10
        if primary_isbn:
            primary_isbn = IdentifierData(Identifier.ISBN, primary_isbn, 0.90)

        contributors = []
        if display_author:
            contributors.append(ContributorData(display_name=display_author))

        metadata = Metadata(
            data_source=DataSource.NYT,
            title=title,
            language='eng',
            published=published_date,
            publisher=publisher,
            contributors=contributors,
            primary_identifier=primary_isbn,
            identifiers=other_isbns,
        )

        super(NYTBestSellerListTitle,
              self).__init__(metadata, first_appearance,
                             most_recent_appearance, annotation)

Example #15

0

Show file

    def isbn_info_to_metadata(cls,
                              book,
                              include_bibliographic=True,
                              include_formats=True):
        """Turn OneClick's JSON representation of a book into a Metadata object.
        Assumes the JSON is in the format that comes from the media/{isbn} endpoint.

        TODO:  Use the seriesTotal field.

        :param book a json response-derived dictionary of book attributes
        """
        if not 'isbn' in book:
            return None
        oneclick_id = book['isbn']
        primary_identifier = IdentifierData(Identifier.ONECLICK_ID,
                                            oneclick_id)

        metadata = Metadata(
            data_source=DataSource.ONECLICK,
            primary_identifier=primary_identifier,
        )

        if include_bibliographic:
            title = book.get('title', None)
            # NOTE: An item that's part of a series, will have the seriesName field, and
            # will have its seriesPosition and seriesTotal fields set to >0.
            # An item not part of a series will have the seriesPosition and seriesTotal fields
            # set to 0, and will not have a seriesName at all.
            # Sometimes, series position and total == 0, for many series items (ex: "seriesName": "EngLits").
            # Sometimes, seriesName is set to "Default Blank", meaning "not actually a series".
            series_name = book.get('seriesName', None)

            series_position = book.get('seriesPosition', None)
            if series_position:
                try:
                    series_position = int(series_position)
                except ValueError:
                    # not big enough deal to stop the whole process
                    series_position = None

            # ignored for now
            series_total = book.get('seriesTotal', None)
            # ignored for now
            has_digital_rights = book.get('hasDigitalRights', None)

            publisher = book.get('publisher', None)
            if 'publicationDate' in book:
                published = datetime.datetime.strptime(
                    book['publicationDate'][:10], cls.DATE_FORMAT)
            else:
                published = None

            if 'language' in book:
                language = LanguageCodes.string_to_alpha_3(book['language'])
            else:
                language = 'eng'

            contributors = []
            if 'authors' in book:
                authors = book['authors']
                for author in authors.split(";"):
                    sort_name = author.strip()
                    if sort_name:
                        sort_name = name_tidy(sort_name)
                        display_name = sort_name_to_display_name(sort_name)
                        roles = [Contributor.AUTHOR_ROLE]
                        contributor = ContributorData(
                            sort_name=sort_name,
                            display_name=display_name,
                            roles=roles)
                        contributors.append(contributor)

            if 'narrators' in book:
                narrators = book['narrators']
                for narrator in narrators.split(";"):
                    sort_name = narrator.strip()
                    if sort_name:
                        sort_name = name_tidy(sort_name)
                        display_name = sort_name_to_display_name(sort_name)
                        roles = [Contributor.NARRATOR_ROLE]
                        contributor = ContributorData(
                            sort_name=sort_name,
                            display_name=display_name,
                            roles=roles)
                        contributors.append(contributor)

            subjects = []
            if 'genres' in book:
                # example: "FICTION / Humorous / General"
                genres = book['genres']
                subject = SubjectData(type=Subject.BISAC,
                                      identifier=genres,
                                      weight=100)
                subjects.append(subject)

            if 'primaryGenre' in book:
                # example: "humorous-fiction,mystery,womens-fiction"
                genres = book['primaryGenre']
                for genre in genres.split(","):
                    subject = SubjectData(type=Subject.ONECLICK,
                                          identifier=genre.strip(),
                                          weight=100)
                    subjects.append(subject)

            # audience options are: adult, beginning-reader, childrens, young-adult
            # NOTE: In OneClick metadata, audience can be set to "Adult" while publisher is "HarperTeen".
            audience = book.get('audience', None)
            if audience:
                subject = SubjectData(type=Subject.ONECLICK_AUDIENCE,
                                      identifier=audience.strip().lower(),
                                      weight=10)
                subjects.append(subject)

            # options are: "eBook", "eAudio"
            oneclick_medium = book.get('mediaType', None)
            if oneclick_medium and oneclick_medium not in cls.oneclick_medium_to_simplified_medium:
                cls.log.error("Could not process medium %s for %s",
                              oneclick_medium, oneclick_id)

            medium = cls.oneclick_medium_to_simplified_medium.get(
                oneclick_medium, Edition.BOOK_MEDIUM)

            # passed to metadata.apply, the isbn_identifier will create an equivalency
            # between the OneClick-labeled and the ISBN-labeled identifier rows, which
            # will in turn allow us to ask the MetadataWrangler for more info about the book.
            isbn_identifier = IdentifierData(Identifier.ISBN, oneclick_id)

            identifiers = [primary_identifier, isbn_identifier]

            links = []
            # A cover and its thumbnail become a single LinkData.
            # images come in small (ex: 71x108px), medium (ex: 95x140px),
            # and large (ex: 128x192px) sizes
            if 'images' in book:
                images = book['images']
                for image in images:
                    if image['name'] == "large":
                        image_data = cls.image_link_to_linkdata(
                            image['url'], Hyperlink.IMAGE)
                    if image['name'] == "medium":
                        thumbnail_data = cls.image_link_to_linkdata(
                            image['url'], Hyperlink.THUMBNAIL_IMAGE)
                    if image['name'] == "small":
                        thumbnail_data_backup = cls.image_link_to_linkdata(
                            image['url'], Hyperlink.THUMBNAIL_IMAGE)

                if not thumbnail_data and thumbnail_data_backup:
                    thumbnail_data = thumbnail_data_backup

                if image_data:
                    if thumbnail_data:
                        image_data.thumbnail = thumbnail_data
                    links.append(image_data)

            # Descriptions become links.
            description = book.get('description', None)
            if description:
                links.append(
                    LinkData(
                        # there can be fuller descriptions in the search endpoint output
                        rel=Hyperlink.SHORT_DESCRIPTION,
                        content=description,
                        media_type="text/html",
                    ))

            metadata.title = title
            metadata.language = language
            metadata.medium = medium
            metadata.series = series_name
            metadata.series_position = series_position
            metadata.publisher = publisher
            metadata.published = published
            metadata.identifiers = identifiers
            metadata.subjects = subjects
            metadata.contributors = contributors
            metadata.links = links

        if include_formats:
            formats = []
            if metadata.medium == Edition.BOOK_MEDIUM:
                content_type, drm_scheme = cls.oneclick_formats.get(
                    "ebook-epub-oneclick")
                formats.append(FormatData(content_type, drm_scheme))
            elif metadata.medium == Edition.AUDIO_MEDIUM:
                content_type, drm_scheme = cls.oneclick_formats.get(
                    "audiobook-mp3-oneclick")
                formats.append(FormatData(content_type, drm_scheme))
            else:
                cls.log.warn("Unfamiliar format: %s", format_id)

            # Make a CirculationData so we can write the formats,
            circulationdata = CirculationData(
                data_source=DataSource.ONECLICK,
                primary_identifier=primary_identifier,
                formats=formats,
            )

            metadata.circulation = circulationdata

        return metadata