def test_strptime_utc_error(self):
     # You can only use strptime_utc for time formats that don't
     # mention a timezone.
     with pytest.raises(ValueError) as excinfo:
         strptime_utc("2020-01-01T12:00:00+0300", "%Y-%m-%dT%H:%M:%S%z")
     assert (
         "Cannot use strptime_utc with timezone-aware format %Y-%m-%dT%H:%M:%S%z"
         in str(excinfo.value))
Exemple #2
0
 def _epoch_to_struct(cls, epoch_string):
     # This will turn the time string we get from Enki into a
     # struct that the Circulation Manager can make use of.
     time_format = "%Y-%m-%dT%H:%M:%S"
     return strptime_utc(
         time.strftime(time_format, time.gmtime(float(epoch_string))), time_format
     )
Exemple #3
0
 def _get_datetime(self, data, key):
     date = data.get(key, None)
     if not date:
         return None
     elif isinstance(date, datetime.date):
         return date
     else:
         return strptime_utc(date, CirculationEvent.TIME_FORMAT)
Exemple #4
0
    def test_row_to_metadata_complete_success(self):

        row = self.create_row()
        metadata = self.l.row_to_metadata(row)
        assert row[self.l.title_field] == metadata.title
        assert row["author"] == metadata.contributors[0].display_name
        assert row["isbn"] == metadata.identifiers[0].identifier

        expect_pub = strptime_utc(row["published"], self.DATE_FORMAT)
        assert expect_pub == metadata.published
        assert self.l.default_language == metadata.language
Exemple #5
0
    def test_metadata_to_list_entry_complete_success(self):
        row = self.create_row(display_author="Octavia Butler")
        metadata = self.l.row_to_metadata(row)
        list_entry = self.l.metadata_to_list_entry(
            self.custom_list, self.data_source, self.now, metadata
        )
        e = list_entry.edition

        assert row[self.l.title_field] == e.title
        assert "Octavia Butler" == e.author
        assert "Butler, Octavia" == e.sort_author

        i = e.primary_identifier
        assert Identifier.ISBN == i.type
        assert row["isbn"] == i.identifier

        # There should be one description.
        expect = row[self.l.annotation_field] + self.l.annotation_citation(row)
        assert expect == list_entry.annotation

        classifications = i.classifications
        # There should be six classifications, two of type 'tag', two
        # of type 'schema:audience', and two of type
        # 'schema:typicalAgeRange'
        assert 6 == len(classifications)

        tags = [x for x in classifications if x.subject.type == Subject.TAG]
        assert 2 == len(tags)

        audiences = [
            x for x in classifications if x.subject.type == Subject.FREEFORM_AUDIENCE
        ]
        assert 2 == len(audiences)

        age_ranges = [x for x in classifications if x.subject.type == Subject.AGE_RANGE]
        assert 2 == len(age_ranges)

        expect_first = strptime_utc(
            row[self.l.first_appearance_field], self.DATE_FORMAT
        )
        assert expect_first == list_entry.first_appearance
        assert self.now == list_entry.most_recent_appearance
 def test_strptime_utc(self, expect, date_string, format):
     assert strptime_utc(date_string, format) == datetime_utc(*expect)
Exemple #7
0
    def record_info_to_metadata(cls, book, availability):
        """Turn Odilo's JSON representation of a book into a Metadata
        object.

        Note:  The json data passed into this method is from a different file/stream
        from the json data that goes into the book_info_to_circulation() method.
        """
        if "id" not in book:
            return None

        odilo_id = book["id"]
        primary_identifier = IdentifierData(Identifier.ODILO_ID, odilo_id)
        active = book.get("active")

        title = book.get("title")
        subtitle = book.get("subtitle")
        series = book.get("series").strip() or None
        series_position = book.get("seriesPosition").strip() or None

        contributors = []
        sort_author = book.get("author")
        if sort_author:
            roles = [Contributor.AUTHOR_ROLE]
            display_author = sort_name_to_display_name(sort_author)
            contributor = ContributorData(
                sort_name=sort_author,
                display_name=display_author,
                roles=roles,
                biography=None,
            )
            contributors.append(contributor)

        publisher = book.get("publisher")

        # Metadata --> Marc21 260$c
        published = book.get("publicationDate")
        if not published:
            # yyyyMMdd --> record creation date
            published = book.get("releaseDate")

        if published:
            try:
                published = strptime_utc(published, "%Y%m%d")
            except ValueError as e:
                cls.log.warn("Cannot parse publication date from: " +
                             published + ", message: " + str(e))

        # yyyyMMdd --> record last modification date
        last_update = book.get("modificationDate")
        if last_update:
            try:
                last_update = strptime_utc(last_update, "%Y%m%d")
            except ValueError as e:
                cls.log.warn("Cannot parse last update date from: " +
                             last_update + ", message: " + str(e))

        language = book.get("language", "spa")

        subjects = []
        trusted_weight = Classification.TRUSTED_DISTRIBUTOR_WEIGHT
        for subject in book.get("subjects", []):
            subjects.append(
                SubjectData(type=Subject.TAG,
                            identifier=subject,
                            weight=trusted_weight))

        for subjectBisacCode in book.get("subjectsBisacCodes", []):
            subjects.append(
                SubjectData(
                    type=Subject.BISAC,
                    identifier=subjectBisacCode,
                    weight=trusted_weight,
                ))

        grade_level = book.get("gradeLevel")
        if grade_level:
            subject = SubjectData(type=Subject.GRADE_LEVEL,
                                  identifier=grade_level,
                                  weight=trusted_weight)
            subjects.append(subject)

        medium = None
        file_format = book.get("fileFormat")
        formats = []
        for format_received in book.get("formats", []):
            if format_received in cls.format_data_for_odilo_format:
                medium = cls.set_format(format_received, formats)
            elif format_received == cls.ACSM and file_format:
                medium = cls.set_format(
                    format_received + "_" + file_format.upper(), formats)
            else:
                cls.log.warn("Unrecognized format received: " +
                             format_received)

        if not medium:
            medium = Edition.BOOK_MEDIUM

        identifiers = []
        isbn = book.get("isbn")
        if isbn:
            if isbnlib.is_isbn10(isbn):
                isbn = isbnlib.to_isbn13(isbn)
            identifiers.append(IdentifierData(Identifier.ISBN, isbn, 1))

        # A cover
        links = []
        cover_image_url = book.get("coverImageUrl")
        if cover_image_url:
            image_data = cls.image_link_to_linkdata(cover_image_url,
                                                    Hyperlink.THUMBNAIL_IMAGE)
            if image_data:
                links.append(image_data)

        original_image_url = book.get("originalImageUrl")
        if original_image_url:
            image_data = cls.image_link_to_linkdata(original_image_url,
                                                    Hyperlink.IMAGE)
            if image_data:
                links.append(image_data)

        # Descriptions become links.
        description = book.get("description")
        if description:
            links.append(
                LinkData(
                    rel=Hyperlink.DESCRIPTION,
                    content=description,
                    media_type="text/html",
                ))

        metadata = Metadata(
            data_source=DataSource.ODILO,
            title=title,
            subtitle=subtitle,
            language=language,
            medium=medium,
            series=series,
            series_position=series_position,
            publisher=publisher,
            published=published,
            primary_identifier=primary_identifier,
            identifiers=identifiers,
            subjects=subjects,
            contributors=contributors,
            links=links,
            data_source_last_updated=last_update,
        )

        metadata.circulation = OdiloRepresentationExtractor.record_info_to_circulation(
            availability)
        # 'active' --> means that the book exists but it's no longer in the collection
        # (it could be available again in the future)
        if metadata.circulation:
            if not active:
                metadata.circulation.licenses_owned = 0
            metadata.circulation.formats = formats

        return metadata, active
Exemple #8
0
    def edit(self, identifier_type, identifier):
        """Edit a work's metadata."""
        self.require_librarian(flask.request.library)

        # TODO: It would be nice to use the metadata layer for this, but
        # this code handles empty values differently than other metadata
        # sources. When a staff member deletes a value, that indicates
        # they think it should be empty. This needs to be indicated in the
        # db so that it can overrule other data sources that set a value,
        # unlike other sources which set empty fields to None.

        work = self.load_work(flask.request.library, identifier_type,
                              identifier)
        if isinstance(work, ProblemDetail):
            return work

        changed = False

        staff_data_source = DataSource.lookup(self._db,
                                              DataSource.LIBRARY_STAFF)
        primary_identifier = work.presentation_edition.primary_identifier
        staff_edition, is_new = get_one_or_create(
            self._db,
            Edition,
            primary_identifier_id=primary_identifier.id,
            data_source_id=staff_data_source.id,
        )
        self._db.expire(primary_identifier)

        new_title = flask.request.form.get("title")
        if new_title and work.title != new_title:
            staff_edition.title = str(new_title)
            changed = True

        new_subtitle = flask.request.form.get("subtitle")
        if work.subtitle != new_subtitle:
            if work.subtitle and not new_subtitle:
                new_subtitle = NO_VALUE
            staff_edition.subtitle = str(new_subtitle)
            changed = True

        # The form data includes roles and names for contributors in the same order.
        new_contributor_roles = flask.request.form.getlist("contributor-role")
        new_contributor_names = [
            str(n) for n in flask.request.form.getlist("contributor-name")
        ]
        # The first author in the form is considered the primary author, even
        # though there's no separate MARC code for that.
        for i, role in enumerate(new_contributor_roles):
            if role == Contributor.AUTHOR_ROLE:
                new_contributor_roles[i] = Contributor.PRIMARY_AUTHOR_ROLE
                break
        roles_and_names = list(
            zip(new_contributor_roles, new_contributor_names))

        # Remove any contributions that weren't in the form, and remove contributions
        # that already exist from the list so they won't be added again.
        deleted_contributions = False
        for contribution in staff_edition.contributions:
            if (
                    contribution.role,
                    contribution.contributor.display_name,
            ) not in roles_and_names:
                self._db.delete(contribution)
                deleted_contributions = True
                changed = True
            else:
                roles_and_names.remove(
                    (contribution.role, contribution.contributor.display_name))
        if deleted_contributions:
            # Ensure the staff edition's contributions are up-to-date when
            # calculating the presentation edition later.
            self._db.refresh(staff_edition)

        # Any remaining roles and names are new contributions.
        for role, name in roles_and_names:
            # There may be one extra role at the end from the input for
            # adding a contributor, in which case it will have no
            # corresponding name and can be ignored.
            if name:
                if role not in list(Contributor.MARC_ROLE_CODES.keys()):
                    self._db.rollback()
                    return UNKNOWN_ROLE.detailed(
                        _(
                            "Role %(role)s is not one of the known contributor roles.",
                            role=role,
                        ))
                contributor = staff_edition.add_contributor(name=name,
                                                            roles=[role])
                contributor.display_name = name
                changed = True

        new_series = flask.request.form.get("series")
        if work.series != new_series:
            if work.series and not new_series:
                new_series = NO_VALUE
            staff_edition.series = str(new_series)
            changed = True

        new_series_position = flask.request.form.get("series_position")
        if new_series_position != None and new_series_position != "":
            try:
                new_series_position = int(new_series_position)
            except ValueError:
                self._db.rollback()
                return INVALID_SERIES_POSITION
        else:
            new_series_position = None
        if work.series_position != new_series_position:
            if work.series_position and new_series_position == None:
                new_series_position = NO_NUMBER
            staff_edition.series_position = new_series_position
            changed = True

        new_medium = flask.request.form.get("medium")
        if new_medium:
            if new_medium not in list(
                    Edition.medium_to_additional_type.keys()):
                self._db.rollback()
                return UNKNOWN_MEDIUM.detailed(
                    _(
                        "Medium %(medium)s is not one of the known media.",
                        medium=new_medium,
                    ))
            staff_edition.medium = new_medium
            changed = True

        new_language = flask.request.form.get("language")
        if new_language != None and new_language != "":
            new_language = LanguageCodes.string_to_alpha_3(new_language)
            if not new_language:
                self._db.rollback()
                return UNKNOWN_LANGUAGE
        else:
            new_language = None
        if new_language != staff_edition.language:
            staff_edition.language = new_language
            changed = True

        new_publisher = flask.request.form.get("publisher")
        if new_publisher != staff_edition.publisher:
            if staff_edition.publisher and not new_publisher:
                new_publisher = NO_VALUE
            staff_edition.publisher = str(new_publisher)
            changed = True

        new_imprint = flask.request.form.get("imprint")
        if new_imprint != staff_edition.imprint:
            if staff_edition.imprint and not new_imprint:
                new_imprint = NO_VALUE
            staff_edition.imprint = str(new_imprint)
            changed = True

        new_issued = flask.request.form.get("issued")
        if new_issued != None and new_issued != "":
            try:
                new_issued = strptime_utc(new_issued, "%Y-%m-%d")
            except ValueError:
                self._db.rollback()
                return INVALID_DATE_FORMAT
        else:
            new_issued = None
        if new_issued != staff_edition.issued:
            staff_edition.issued = new_issued
            changed = True

        # TODO: This lets library staff add a 1-5 rating, which is used in the
        # quality calculation. However, this doesn't work well if there are any
        # other measurements that contribute to the quality. The form will show
        # the calculated quality rather than the staff rating, which will be
        # confusing. It might also be useful to make it more clear how this
        # relates to the quality threshold in the library settings.
        changed_rating = False
        new_rating = flask.request.form.get("rating")
        if new_rating != None and new_rating != "":
            try:
                new_rating = float(new_rating)
            except ValueError:
                self._db.rollback()
                return INVALID_RATING
            scale = Measurement.RATING_SCALES[DataSource.LIBRARY_STAFF]
            if new_rating < scale[0] or new_rating > scale[1]:
                self._db.rollback()
                return INVALID_RATING.detailed(
                    _(
                        "The rating must be a number between %(low)s and %(high)s.",
                        low=scale[0],
                        high=scale[1],
                    ))
            if (new_rating - scale[0]) / (scale[1] - scale[0]) != work.quality:
                primary_identifier.add_measurement(
                    staff_data_source,
                    Measurement.RATING,
                    new_rating,
                    weight=WorkController.STAFF_WEIGHT,
                )
                changed = True
                changed_rating = True

        changed_summary = False
        new_summary = flask.request.form.get("summary") or ""
        if new_summary != work.summary_text:
            old_summary = None
            if work.summary and work.summary.data_source == staff_data_source:
                old_summary = work.summary

            work.presentation_edition.primary_identifier.add_link(
                Hyperlink.DESCRIPTION,
                None,
                staff_data_source,
                content=new_summary)

            # Delete previous staff summary
            if old_summary:
                for link in old_summary.links:
                    self._db.delete(link)
                self._db.delete(old_summary)

            changed = True
            changed_summary = True

        if changed:
            # Even if the presentation doesn't visibly change, we want
            # to regenerate the OPDS entries and update the search
            # index for the work, because that might be the 'real'
            # problem the user is trying to fix.
            policy = PresentationCalculationPolicy(
                classify=True,
                regenerate_opds_entries=True,
                regenerate_marc_record=True,
                update_search_index=True,
                calculate_quality=changed_rating,
                choose_summary=changed_summary,
            )
            work.calculate_presentation(policy=policy)

        return Response("", 200)