예제 #1
0
    def process_item(self, identifier):
        info = self.api.metadata_lookup(identifier)
        error = None
        if info.get('errorCode') == 'NotFound':
            error = "ID not recognized by Overdrive: %s" % identifier.identifier
        elif info.get('errorCode') == 'InvalidGuid':
            error = "Invalid Overdrive ID: %s" % identifier.identifier

        if error:
            return CoverageFailure(identifier,
                                   error,
                                   data_source=self.output_source,
                                   transient=False)

        metadata = OverdriveRepresentationExtractor.book_info_to_metadata(info)

        if not metadata:
            e = "Could not extract metadata from Overdrive data: %r" % info
            return CoverageFailure(identifier,
                                   e,
                                   data_source=self.output_source,
                                   transient=True)

        return self.set_metadata(
            identifier,
            metadata,
            metadata_replacement_policy=self.metadata_replacement_policy)
예제 #2
0
    def import_from_feed(self, feed, even_if_no_author=False, 
                         immediately_presentation_ready=False,
                         feed_url=None):

        # Keep track of editions that were imported. Pools and works
        # for those editions may be looked up or created.
        imported_editions = {}
        pools = {}
        works = {}
        # CoverageFailures that note business logic errors and non-success download statuses
        failures = {}

        # If parsing the overall feed throws an exception, we should address that before
        # moving on. Let the exception propagate.
        metadata_objs, failures = self.extract_feed_data(feed, feed_url)

        # make editions.  if have problem, make sure associated pool and work aren't created.
        for key, metadata in metadata_objs.iteritems():
            # key is identifier.urn here

            # If there's a status message about this item, don't try to import it.
            if key in failures.keys():
                continue

            try:
                # Create an edition. This will also create a pool if there's circulation data.
                edition = self.import_edition_from_metadata(
                    metadata, even_if_no_author, immediately_presentation_ready
                )
                if edition:
                    imported_editions[key] = edition
            except Exception, e:
                # Rather than scratch the whole import, treat this as a failure that only applies
                # to this item.
                self.log.error("Error importing an OPDS item", exc_info=e)
                identifier, ignore = Identifier.parse_urn(self._db, key)
                data_source = self.data_source
                failure = CoverageFailure(identifier, traceback.format_exc(), data_source=data_source, transient=False)
                failures[key] = failure
                # clean up any edition might have created
                if key in imported_editions:
                    del imported_editions[key]
                # Move on to the next item, don't create a work.
                continue

            try:
                pool, work = self.update_work_for_edition(
                    edition, even_if_no_author, immediately_presentation_ready
                )
                if pool:
                    pools[key] = pool
                if work:
                    works[key] = work
            except Exception, e:
                identifier, ignore = Identifier.parse_urn(self._db, key)
                data_source = self.data_source
                failure = CoverageFailure(identifier, traceback.format_exc(), data_source=data_source, transient=False)
                failures[key] = failure
예제 #3
0
    def detail_for_elementtree_entry(
            cls, parser, entry_tag, data_source, feed_url=None,
    ):

        """Turn an <atom:entry> tag into a dictionary of metadata that can be
        used as keyword arguments to the Metadata contructor.

        :return: A 2-tuple (identifier, kwargs)
        """

        identifier = parser._xpath1(entry_tag, 'atom:id')
        if identifier is None or not identifier.text:
            # This <entry> tag doesn't identify a book so we 
            # can't derive any information from it.
            return None, None, None
        identifier = identifier.text
            
        try:
            data = cls._detail_for_elementtree_entry(
                parser, entry_tag, feed_url
            )
            return identifier, data, None

        except Exception, e:
            _db = Session.object_session(data_source)
            identifier_obj, ignore = Identifier.parse_urn(_db, identifier)
            failure = CoverageFailure(
                identifier_obj, traceback.format_exc(), data_source,
                transient=True
            )
            return identifier, None, failure
예제 #4
0
    def process_batch(self, identifiers):
        identifier_strings = self.api.create_identifier_strings(identifiers)
        response = self.api.availability(title_ids=identifier_strings)
        seen_identifiers = set()
        batch_results = []
        for metadata, availability in self.parser.process_all(
                response.content):
            identifier, is_new = metadata.primary_identifier.load(self._db)
            if not identifier in identifiers:
                # Theta told us about a book we didn't ask
                # for. This shouldn't happen, but if it does we should
                # do nothing further.
                continue
            seen_identifiers.add(identifier.identifier)
            result = self.set_metadata(identifier, metadata)
            if not isinstance(result, CoverageFailure):
                result = self.handle_success(identifier)
            batch_results.append(result)

        # Create a CoverageFailure object for each original identifier
        # not mentioned in the results.
        for identifier_string in identifier_strings:
            if identifier_string not in seen_identifiers:
                identifier, ignore = Identifier.for_foreign_id(
                    self._db, Identifier.THETA_ID, identifier_string)
                result = CoverageFailure(identifier,
                                         "Book not in collection",
                                         data_source=self.output_source,
                                         transient=False)
                batch_results.append(result)
        return batch_results
예제 #5
0
    def update_metadata(self,
                        catalog_item,
                        identifier=None,
                        metadata_replacement_policy=None):
        """
        Creates db objects corresponding to the book info passed in.

        Note: It is expected that CoverageProvider.handle_success, which is responsible for 
        setting the work to be presentation-ready is handled in the calling code.

        :catalog_item - JSON representation of the book's metadata, coming from OneClick.
        :return CoverageFailure or a database object (Work, Identifier, etc.)
        """
        metadata = OneClickRepresentationExtractor.isbn_info_to_metadata(
            catalog_item)

        if not metadata:
            # generate a CoverageFailure to let the system know to revisit this book
            # TODO:  if did not create a Work, but have a CoverageFailure for the isbn,
            # check that re-processing that coverage would generate the work.
            e = "Could not extract metadata from OneClick data: %r" % catalog_item
            return CoverageFailure(identifier,
                                   e,
                                   data_source=self.output_source,
                                   transient=True)

        # convert IdentifierData into Identifier, if can
        if not identifier:
            identifier, made_new = metadata.primary_identifier.load(
                _db=self._db)

        if not identifier:
            e = "Could not create identifier for OneClick data: %r" % catalog_item
            return CoverageFailure(identifier,
                                   e,
                                   data_source=self.output_source,
                                   transient=True)

        result = self.set_metadata(
            identifier,
            metadata,
            metadata_replacement_policy=metadata_replacement_policy)

        return result
예제 #6
0
    def process_batch(self, works):
        """
        :return: a mixed list of Works and CoverageFailure objects.
        """
        successes, failures = self.search_index_client.bulk_update(works)

        records = list(successes)
        for (work, error) in failures:
            records.append(CoverageFailure(work, error))

        return records
예제 #7
0
 def process_item(self, identifier):
     # We don't accept a representation from the cache because
     # either this is being run for the first time (in which case
     # there is nothing in the cache) or it's being run to correct
     # for an earlier failure (in which case the representation
     # in the cache might be wrong).
     metadata = self.api.bibliographic_lookup(identifier, max_age=0)
     if not metadata:
         return CoverageFailure(
             identifier, "3M bibliographic lookup failed.",
             data_source=self.output_source, transient=True
         )
     return self.set_metadata(identifier, metadata)
예제 #8
0
    def process_item(self, identifier):
        """ OneClick availability information is served separately from 
        the book's metadata.  Furthermore, the metadata returned by the 
        "book by isbn" request is less comprehensive than the data returned 
        by the "search titles/genres/etc." endpoint.

        This method hits the "by isbn" endpoint and updates the bibliographic 
        metadata returned by it. 
        """
        try:
            response_dictionary = self.api.get_metadata_by_isbn(identifier)
        except BadResponseException as error:
            return CoverageFailure(identifier,
                                   error.message,
                                   data_source=self.output_source,
                                   transient=True)
        except IOError as error:
            return CoverageFailure(identifier,
                                   error.message,
                                   data_source=self.output_source,
                                   transient=True)

        if not response_dictionary:
            message = "Cannot find OneClick metadata for %r" % identifier
            return CoverageFailure(identifier,
                                   message,
                                   data_source=self.output_source,
                                   transient=True)

        result = self.update_metadata(response_dictionary, identifier,
                                      self.metadata_replacement_policy)

        if isinstance(result, Identifier):
            # calls work.set_presentation_ready() for us
            self.handle_success(result)

        return result
예제 #9
0
    def data_detail_for_feedparser_entry(cls, entry, data_source):
        """Turn an entry dictionary created by feedparser into dictionaries of data
        that can be used as keyword arguments to the Metadata and CirculationData constructors.

        :return: A 3-tuple (identifier, kwargs for Metadata constructor, failure)
        """
        identifier = entry.get('id')
        if not identifier:
            return None, None, None

        # At this point we can assume that we successfully got some
        # metadata, and possibly a link to the actual book.
        try:
            kwargs_meta = cls._data_detail_for_feedparser_entry(entry, data_source)
            return identifier, kwargs_meta, None
        except Exception, e:
            _db = Session.object_session(data_source)
            identifier_obj, ignore = Identifier.parse_urn(_db, identifier)
            failure = CoverageFailure(
                identifier_obj, traceback.format_exc(), data_source,
                transient=True
            )
            return identifier, None, failure
예제 #10
0
    def test_to_work_coverage_record(self):
        work = self._work()

        transient_failure = CoverageFailure(work, "Bah!", transient=True)
        rec = transient_failure.to_work_coverage_record("the_operation")
        assert isinstance(rec, WorkCoverageRecord)
        eq_(work, rec.work)
        eq_("the_operation", rec.operation)
        eq_(CoverageRecord.TRANSIENT_FAILURE, rec.status)
        eq_("Bah!", rec.exception)

        persistent_failure = CoverageFailure(work,
                                             "Bah forever!",
                                             transient=False)
        rec = persistent_failure.to_work_coverage_record(
            operation="the_operation")
        eq_(CoverageRecord.PERSISTENT_FAILURE, rec.status)
        eq_("Bah forever!", rec.exception)
예제 #11
0
    def test_to_coverage_record(self):
        source = DataSource.lookup(self._db, DataSource.GUTENBERG)
        identifier = self._identifier()

        transient_failure = CoverageFailure(identifier,
                                            "Bah!",
                                            data_source=source,
                                            transient=True)
        rec = transient_failure.to_coverage_record(operation="the_operation")
        assert isinstance(rec, CoverageRecord)
        eq_(identifier, rec.identifier)
        eq_(source, rec.data_source)
        eq_("the_operation", rec.operation)
        eq_(CoverageRecord.TRANSIENT_FAILURE, rec.status)
        eq_("Bah!", rec.exception)

        persistent_failure = CoverageFailure(identifier,
                                             "Bah forever!",
                                             data_source=source,
                                             transient=False)
        rec = persistent_failure.to_coverage_record(operation="the_operation")
        eq_(CoverageRecord.PERSISTENT_FAILURE, rec.status)
        eq_("Bah forever!", rec.exception)
예제 #12
0
        description = message.message
        status_code = message.status_code
        if description and status_code:
            exception = u"%s: %s" % (status_code, description)
        elif status_code:
            exception = unicode(status_code)
        elif description:
            exception = description
        else:
            exception = 'No detail provided.'
            
        # All these CoverageFailures are transient because ATM we can
        # only assume that the server will eventually have the data.
        return CoverageFailure(
            identifier, exception, data_source, transient=True
        )
    
    @classmethod
    def detail_for_elementtree_entry(
            cls, parser, entry_tag, data_source, feed_url=None,
    ):

        """Turn an <atom:entry> tag into a dictionary of metadata that can be
        used as keyword arguments to the Metadata contructor.

        :return: A 2-tuple (identifier, kwargs)
        """

        identifier = parser._xpath1(entry_tag, 'atom:id')
        if identifier is None or not identifier.text:
예제 #13
0
 def process_item(self, identifier):
     return CoverageFailure(self,
                            identifier,
                            "Bitter failure",
                            transient=True)
예제 #14
0
 def process_item(self, item):
     self.attempts.append(item)
     return CoverageFailure(
         item, "What did you expect?", self.output_source, self.transient
     )
예제 #15
0
 def process_item(self, item):
     self.attempts.append(item)
     return CoverageFailure(item, "What did you expect?", None, False)
예제 #16
0
    def populate_delta(self, months=1):
        """ Call get_delta for the last month to get all of the library's book info changes 
        from OneClick.  Update Work, Edition, LicensePool objects in our database.
        """
        today = datetime.datetime.utcnow()
        time_ago = relativedelta(months=months)

        delta = self.get_delta(from_date=(today - time_ago), to_date=today)
        if not delta or len(delta) < 1:
            return None, None

        items_added = delta[0].get("addedTitles", None)
        items_removed = delta[0].get("removedTitles", None)

        items_transmitted = len(items_added) + len(items_removed)
        items_updated = 0
        coverage_provider = OneClickBibliographicCoverageProvider(_db=self._db)
        for catalog_item in items_added:
            result = coverage_provider.update_metadata(catalog_item)
            if not isinstance(result, CoverageFailure):
                items_updated += 1

                if isinstance(result, Identifier):
                    # calls work.set_presentation_ready() for us
                    coverage_provider.handle_success(result)

        for catalog_item in items_removed:
            metadata = OneClickRepresentationExtractor.isbn_info_to_metadata(
                catalog_item)

            if not metadata:
                # generate a CoverageFailure to let the system know to revisit this book
                # TODO:  if did not create a Work, but have a CoverageFailure for the isbn,
                # check that re-processing that coverage would generate the work.
                e = "Could not extract metadata from OneClick data: %r" % catalog_item
                make_note = CoverageFailure(identifier,
                                            e,
                                            data_source=self.output_source,
                                            transient=True)

            # convert IdentifierData into Identifier, if can
            identifier, made_new = metadata.primary_identifier.load(
                _db=self._db)
            if identifier and not made_new:
                # Don't delete works from the database.  Set them to "not ours anymore".
                pool = identifier.licensed_through
                if not pool:
                    continue
                if pool.licenses_owned > 0:
                    if pool.presentation_edition:
                        self.log.warn("Removing %s (%s) from circulation",
                                      pool.presentation_edition.title,
                                      pool.presentation_edition.author)
                    else:
                        self.log.warn(
                            "Removing unknown work %s from circulation.",
                            identifier.identifier)
                pool.licenses_owned = 0
                pool.licenses_available = 0
                pool.licenses_reserved = 0
                pool.patrons_in_hold_queue = 0
                pool.last_checked = today

                items_updated += 1

        # stay data, stay!
        self._db.commit()

        return items_transmitted, items_updated
예제 #17
0
 def process_item(self, item):
     self.attempts.append(item)
     return CoverageFailure(item, "Oops!", self.output_source, True)
예제 #18
0
 def process_item(self, item):
     self.attempts.append(item)
     return CoverageFailure(item, "Oops!", None, True)