Ejemplo n.º 1
0
    def process_batch(self, items):
        identifiers = [x.primary_identifier for x in items]
        response = self.content_lookup.lookup(identifiers)
        importer = OPDSImporter(self._db, DataSource.OA_CONTENT_SERVER)
        imported, messages_by_id, next_links = importer.import_from_feed(
            response.content
        )

        results = []

        # Handle the successes and seeming successes
        for edition in imported:
            if edition.open_access_download_url:
                self.log.info(
                    "Successfully located open access download ID for %r: %s", 
                    edition, edition.open_access_download_url
                )
                results.append(edition.primary_identifier)
            else:
                exception = "Open access content server acknowledged book but gave no open-access download URL."
                failure = CoverageFailure(
                    self, edition, exception=exception, transient=False
                )
                results.append(failure)

        # Handle the outright failures.
        for failure in self.handle_import_messages(messages_by_id):
            results.append(failure)
        return results
Ejemplo n.º 2
0
 def import_feed_response(self, response, id_mapping):
     """Confirms OPDS feed response and imports feed.
     """
     self.check_content_type(response)
     importer = OPDSImporter(self._db, identifier_mapping=id_mapping,
                             data_source_name=self.output_source.name)
     return importer.import_from_feed(response.text)
Ejemplo n.º 3
0
 def process_feed_response(self, response, id_mapping):
     """Extracts messages from OPDS feed"""
     importer = OPDSImporter(self._db,
                             self.collection,
                             data_source_name=self.data_source.name,
                             identifier_mapping=id_mapping)
     parser = OPDSXMLParser()
     root = etree.parse(StringIO(response.text))
     return importer.extract_messages(parser, root)
Ejemplo n.º 4
0
 def process_feed_response(self, response, id_mapping):
     """Confirms OPDS feed response and extracts messages.
     """        
     self.check_content_type(response)
     importer = OPDSImporter(self._db, identifier_mapping=id_mapping,
                             data_source_name=self.output_source.name)
     parser = OPDSXMLParser()
     root = etree.parse(StringIO(response.text))
     return importer.extract_messages(parser, root)
Ejemplo n.º 5
0
    def import_feed_response(self, response):
        """Confirms OPDS feed response and imports feed"""

        if response.status_code != 200:
            self.log.error("BAD RESPONSE CODE: %s", response.status_code)
            raise HTTPIntegrationException(response.text)
            
        content_type = response.headers['content-type']
        if content_type != OPDSFeed.ACQUISITION_FEED_TYPE:
            raise HTTPIntegrationException("Wrong media type: %s" % content_type)

        importer = OPDSImporter(self._db, identifier_mapping=id_mapping)
        return importer.import_from_feed(response.text)
Ejemplo n.º 6
0
    def import_feed_response(self, response, id_mapping):
        """Confirms OPDS feed response and imports feed.
        """
        
        content_type = response.headers['content-type']
        if content_type != OPDSFeed.ACQUISITION_FEED_TYPE:
            raise BadResponseException.from_response(
                response.url, 
                "Wrong media type: %s" % content_type,
                response
            )

        importer = OPDSImporter(self._db, identifier_mapping=id_mapping)
        return importer.import_from_feed(response.text)
Ejemplo n.º 7
0
 def _detail_for_elementtree_entry(cls, parser, entry_tag, feed_url=None):
     subtag = parser.text_of_optional_subtag
     data = OPDSImporter._detail_for_elementtree_entry(
         parser, entry_tag, feed_url)
     formats = []
     odl_license_tags = parser._xpath(entry_tag, 'odl:license') or []
     for odl_license_tag in odl_license_tags:
         content_type = subtag(odl_license_tag, 'dcterms:format')
         drm_schemes = []
         protection_tags = parser._xpath(odl_license_tag,
                                         'odl:protection') or []
         for protection_tag in protection_tags:
             drm_scheme = subtag(protection_tag, 'dcterms:format')
             drm_schemes.append(drm_scheme)
         if not drm_schemes:
             formats.append(
                 FormatData(
                     content_type=content_type,
                     drm_scheme=None,
                     rights_uri=RightsStatus.IN_COPYRIGHT,
                 ))
         for drm_scheme in drm_schemes:
             formats.append(
                 FormatData(
                     content_type=content_type,
                     drm_scheme=drm_scheme,
                     rights_uri=RightsStatus.IN_COPYRIGHT,
                 ))
         if not data.get('circulation'):
             data['circulation'] = dict()
         if not data['circulation'].get('formats'):
             data['circulation']['formats'] = []
         data['circulation']['formats'].extend(formats)
     return data
Ejemplo n.º 8
0
    def process_batch(self, batch):
        response = self.lookup.lookup(batch)

        if response.status_code != 200:
            raise Exception(response.text)
            
        content_type = response.headers['content-type']
        if content_type != OPDSFeed.ACQUISITION_FEED_TYPE:
            raise Exception("Wrong media type: %s" % content_type)

        importer = OPDSImporter(
            self._db, response.text,
            overwrite_rels=[Hyperlink.DESCRIPTION, Hyperlink.IMAGE])
        imported, messages_by_id = importer.import_from_feed()
        self.log.info("%d successes, %d failures.", 
                      len(imported), len(messages_by_id))
        self._db.commit()
Ejemplo n.º 9
0
    def process_batch(self, batch):
        response = self.lookup.lookup(batch)

        if response.status_code != 200:
            raise Exception(response.text)
            
        content_type = response.headers['content-type']
        if content_type != OPDSFeed.ACQUISITION_FEED_TYPE:
            raise Exception("Wrong media type: %s" % content_type)

        importer = OPDSImporter(
            self._db, response.text,
            overwrite_rels=[Hyperlink.DESCRIPTION, Hyperlink.IMAGE])
        imported, messages_by_id = importer.import_from_feed()
        self.log.info("%d successes, %d failures.", 
                      len(imported), len(messages_by_id))
        self._db.commit()
Ejemplo n.º 10
0
 def _importer(self):
     """Instantiate an appropriate OPDSImporter for the given Collection."""
     collection = self.collection
     metadata_client = AuthorNameCanonicalizer(self._db)
     return OPDSImporter(self._db,
                         collection,
                         data_source_name=collection.data_source.name,
                         metadata_client=metadata_client)
Ejemplo n.º 11
0
 def _importer(self):
     # Part of this test is verifying that we just import the OPDS
     # metadata and don't try to make any other HTTP requests or
     # mirror anything. If we should try to do that, we'll get a
     # crash because object() isn't really an HTTP client.
     return OPDSImporter(self._db,
                         collection=self.collection,
                         metadata_client=object(),
                         mirror=None,
                         http_get=object())
Ejemplo n.º 12
0
 def __init__(self, _db, collection, lookup=None):
     super(MetadataWranglerCollectionMonitor,
           self).__init__(_db, collection)
     self.lookup = lookup or MetadataWranglerOPDSLookup.from_config(
         self._db, collection=collection)
     self.importer = OPDSImporter(
         self._db,
         self.collection,
         data_source_name=DataSource.METADATA_WRANGLER,
         metadata_client=self.lookup,
         map_from_collection=True,
     )
Ejemplo n.º 13
0
    def run_once(self, start, cutoff):
        if not self.lookup.authenticated:
            self.keep_timestamp = False
            return

        try:
            response = self.lookup.updates(start)
            self.lookup.check_content_type(response)
        except RemoteIntegrationException as e:
            self.log.error("Error getting updates for %r: %s", self.collection,
                           e.debug_message)
            self.keep_timestamp = False
            return

        importer = OPDSImporter(
            self._db,
            self.collection,
            data_source_name=DataSource.METADATA_WRANGLER,
            metadata_client=self.lookup,
            map_from_collection=True,
        )
        importer.import_from_feed(response.text)