Python OPDSXMLParser Beispiele, core.opds_import.OPDSXMLParser Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: test_opds.py Projekt: datalogics-tsmith/circulation

    def test_active_loan_feed(self):
        patron = self.default_patron
        raw = CirculationManagerLoanAndHoldAnnotator.active_loans_for(
            None, patron, test_mode=True)
        # Nothing in the feed.
        raw = unicode(raw)
        feed = feedparser.parse(raw)
        eq_(0, len(feed['entries']))

        now = datetime.datetime.utcnow()
        tomorrow = now + datetime.timedelta(days=1)

        # A loan of an open-access book is open-ended.
        work1 = self._work(language="eng", with_open_access_download=True)
        loan1 = work1.license_pools[0].loan_to(patron, start=now)

        # A loan of some other kind of book
        work2 = self._work(language="eng", with_license_pool=True)
        loan2 = work2.license_pools[0].loan_to(patron, start=now, end=tomorrow)
        unused = self._work(language="eng", with_open_access_download=True)

        # Get the feed.
        feed_obj = CirculationManagerLoanAndHoldAnnotator.active_loans_for(
            None, patron, test_mode=True)
        raw = unicode(feed_obj)
        feed = feedparser.parse(raw)

        # The only entries in the feed is the work currently out on loan
        # to this patron.
        eq_(2, len(feed['entries']))
        e1, e2 = sorted(feed['entries'], key=lambda x: x['title'])
        eq_(work1.title, e1['title'])
        eq_(work2.title, e2['title'])

        # Make sure that the start and end dates from the loan are present
        # in an <opds:availability> child of the acquisition link.
        tree = etree.fromstring(raw)
        parser = OPDSXMLParser()
        acquisitions = parser._xpath(
            tree, "//atom:entry/atom:link[@rel='http://opds-spec.org/acquisition']"
        )
        eq_(2, len(acquisitions))

        now_s = _strftime(now)
        tomorrow_s = _strftime(tomorrow)
        availabilities = [
            parser._xpath1(x, "opds:availability") for x in acquisitions
        ]

        # One of these availability tags has 'since' but not 'until'.
        # The other one has both.
        [no_until] = [x for x in availabilities if 'until' not in x.attrib] 
        eq_(now_s, no_until.attrib['since'])

        [has_until] = [x for x in availabilities if 'until' in x.attrib]
        eq_(now_s, has_until.attrib['since'])
        eq_(tomorrow_s, has_until.attrib['until'])

Beispiel #2

0

Datei anzeigen

Datei: feedbooks.py Projekt: NYPL-Simplified/content_server

 def rights_uri_from_entry_tag(cls, entry):
     rights = OPDSXMLParser._xpath1(entry, 'atom:rights')
     if rights is not None:
         rights = rights.text
     source = OPDSXMLParser._xpath1(entry, 'dcterms:source')
     if source is not None:
         source = source.text
     publication_year = OPDSXMLParser._xpath1(entry, 'dcterms:issued')
     if publication_year is not None:
         publication_year = publication_year.text
     return RehostingPolicy.rights_uri(rights, source, publication_year)

Beispiel #3

0

Datei anzeigen

Datei: feedbooks.py Projekt: NYPL-Simplified/circulation

 def rights_uri_from_entry_tag(cls, entry):
     """Determine the URI that best encapsulates the rights
     status of the downloads associated with this book.
     """
     rights = OPDSXMLParser._xpath1(entry, 'atom:rights')
     if rights is not None:
         rights = rights.text
     source = OPDSXMLParser._xpath1(entry, 'dcterms:source')
     if source is not None:
         source = source.text
     publication_year = OPDSXMLParser._xpath1(entry, 'dcterms:issued')
     if publication_year is not None:
         publication_year = publication_year.text
     return RehostingPolicy.rights_uri(rights, source, publication_year)

Beispiel #4

0

Datei anzeigen

Datei: feedbooks.py Projekt: wjzhu-class/circulation

 def rights_uri_from_entry_tag(cls, entry):
     """Determine the URI that best encapsulates the rights
     status of the downloads associated with this book.
     """
     rights = OPDSXMLParser._xpath1(entry, 'atom:rights')
     if rights is not None:
         rights = rights.text
     source = OPDSXMLParser._xpath1(entry, 'dcterms:source')
     if source is not None:
         source = source.text
     publication_year = OPDSXMLParser._xpath1(entry, 'dcterms:issued')
     if publication_year is not None:
         publication_year = publication_year.text
     return RehostingPolicy.rights_uri(rights, source, publication_year)

Beispiel #5

0

Datei anzeigen

 def __init__(self, _db, collection, lookup=None, provider=None):
     super(MWAuxiliaryMetadataMonitor, self).__init__(_db,
                                                      collection,
                                                      lookup=lookup)
     self.parser = OPDSXMLParser()
     self.provider = provider or MetadataUploadCoverageProvider(
         collection, lookup_client=lookup)

Beispiel #6

0

Datei anzeigen

Datei: coverage.py Projekt: datalogics/circulation

 def process_feed_response(self, response, id_mapping):
     """Extracts messages from OPDS feed"""
     importer = OPDSImporter(self._db,
                             self.collection,
                             data_source_name=self.data_source.name,
                             identifier_mapping=id_mapping)
     parser = OPDSXMLParser()
     root = etree.parse(StringIO(response.text))
     return importer.extract_messages(parser, root)

Beispiel #7

0

Datei anzeigen

 def process_feed_response(self, response, id_mapping):
     """Confirms OPDS feed response and extracts messages.
     """        
     self.check_content_type(response)
     importer = OPDSImporter(self._db, identifier_mapping=id_mapping,
                             data_source_name=self.output_source.name)
     parser = OPDSXMLParser()
     root = etree.parse(StringIO(response.text))
     return importer.extract_messages(parser, root)

Beispiel #8

0

Datei anzeigen

Datei: test_controller.py Projekt: rskm1/metadata_wrangler

    def test_add_items(self):
        invalid_urn = "FAKE AS I WANNA BE"
        catalogued_id = self._identifier()
        uncatalogued_id = self._identifier()
        self.collection.catalog_identifier(self._db, catalogued_id)

        parser = OPDSXMLParser()
        message_path = '/atom:feed/simplified:message'

        with self.app.test_request_context(
                '/?urn=%s&urn=%s&urn=%s' %
            (catalogued_id.urn, uncatalogued_id.urn, invalid_urn),
                headers=self.valid_auth):

            response = self.controller.add_items(self.collection.name)

        # None of the identifiers raise or return an error.
        eq_(HTTP_OK, response.status_code)

        # It sends three messages.
        root = etree.parse(StringIO(response.data))
        catalogued, uncatalogued, invalid = self.XML_PARSE(root, message_path)

        # The uncatalogued identifier is now in the catalog.
        assert uncatalogued_id in self.collection.catalog
        # It has an accurate response message.
        eq_(uncatalogued_id.urn, self.xml_value(uncatalogued, 'atom:id'))
        eq_('201', self.xml_value(uncatalogued, 'simplified:status_code'))
        eq_('Successfully added',
            self.xml_value(uncatalogued, 'schema:description'))

        # The catalogued identifier is still in the catalog.
        assert catalogued_id in self.collection.catalog
        # And even though it responds 'OK', the message tells you it
        # was already there.
        eq_(catalogued_id.urn, self.xml_value(catalogued, 'atom:id'))
        eq_('200', self.xml_value(catalogued, 'simplified:status_code'))
        eq_('Already in catalog',
            self.xml_value(catalogued, 'schema:description'))

        # Invalid identifier return 400 errors.
        eq_(invalid_urn, self.xml_value(invalid, 'atom:id'))
        eq_('400', self.xml_value(invalid, 'simplified:status_code'))
        eq_('Could not parse identifier.',
            self.xml_value(invalid, 'schema:description'))

Beispiel #9

0

Datei anzeigen

    def test_remove_items(self):
        invalid_urn = "FAKE AS I WANNA BE"
        catalogued_id = self._identifier()
        uncatalogued_id = self._identifier()
        self.collection.catalog_identifier(self._db, catalogued_id)

        parser = OPDSXMLParser()
        message_path = '/atom:feed/simplified:message'
        with self.app.test_request_context(
                '/?urn=%s&urn=%s' % (catalogued_id.urn, uncatalogued_id.urn),
                headers=dict(Authorization=self.valid_auth)):

            # The uncatalogued identifier doesn't raise or return an error.
            response = self.controller.remove_items()
            eq_(HTTP_OK, response.status_code)            

            # It sends two <simplified:message> tags.
            root = etree.parse(StringIO(response.data))
            catalogued, uncatalogued = parser._xpath(root, message_path)
            eq_("http://www.gutenberg.org/ebooks/2013",
                parser._xpath(catalogued, 'atom:id')[0].text)
            eq_(str(HTTP_OK),
                parser._xpath(catalogued, 'simplified:status_code')[0].text)
            eq_("Successfully removed",
                parser._xpath(catalogued, 'schema:description')[0].text)

            eq_("http://www.gutenberg.org/ebooks/2014",
                parser._xpath(uncatalogued, 'atom:id')[0].text)
            eq_(str(HTTP_NOT_FOUND),
                parser._xpath(uncatalogued, 'simplified:status_code')[0].text)
            eq_("Not in collection catalog",
                parser._xpath(uncatalogued, 'schema:description')[0].text)

            # It sends no <entry> tags.
            eq_([], parser._xpath(root, "//atom:entry"))

            # The catalogued identifier isn't in the catalog.
            assert catalogued_id not in self.collection.catalog
            # But it's still in the database.
            eq_(catalogued_id, self._db.query(Identifier).filter_by(
                id=catalogued_id.id).one())

        # Try again, this time including an invalid URN.
        self.collection.catalog_identifier(self._db, catalogued_id)
        with self.app.test_request_context(
                '/?urn=%s&urn=%s' % (invalid_urn, catalogued_id.urn),
                headers=dict(Authorization=self.valid_auth)):
            response = self.controller.remove_items()
            eq_(HTTP_OK, int(response.status_code))

            # Once again we get two <simplified:message> tags.
            root = etree.parse(StringIO(response.data))
            catalogued, uncatalogued = parser._xpath(root, message_path)
            eq_(invalid_urn,
                parser._xpath(catalogued, 'atom:id')[0].text)
            eq_("400",
                parser._xpath(catalogued, 'simplified:status_code')[0].text)
            eq_("Could not parse identifier.",
                parser._xpath(catalogued, 'schema:description')[0].text)

            eq_("http://www.gutenberg.org/ebooks/2013",
                parser._xpath(uncatalogued, 'atom:id')[0].text)
            eq_("200",
                parser._xpath(uncatalogued, 'simplified:status_code')[0].text)
            eq_("Successfully removed",
                parser._xpath(uncatalogued, 'schema:description')[0].text)

            # We have no <entry> tags.
            eq_([], parser._xpath(root, "//atom:entry"))
            
            # The catalogued identifier is still removed.
            assert catalogued_id not in self.collection.catalog

Beispiel #10

0

Datei anzeigen

    def test_active_loan_feed(self):
        self.initialize_adobe(self._default_library)
        patron = self._patron()
        cls = CirculationManagerLoanAndHoldAnnotator
        raw = cls.active_loans_for(None, patron, test_mode=True)
        # No entries in the feed...
        raw = unicode(raw)
        feed = feedparser.parse(raw)
        eq_(0, len(feed['entries']))

        # ... but we have a link to the User Profile Management
        # Protocol endpoint...
        links = feed['feed']['links']
        [upmp_link] = [
            x for x in links if x['rel'] ==
            'http://librarysimplified.org/terms/rel/user-profile'
        ]
        annotator = cls(None, None, patron, test_mode=True)
        expect_url = annotator.url_for(
            'patron_profile',
            library_short_name=patron.library.short_name,
            _external=True)
        eq_(expect_url, upmp_link['href'])

        # ... and we have DRM licensing information.
        tree = etree.fromstring(raw)
        parser = OPDSXMLParser()
        licensor = parser._xpath1(tree, "//atom:feed/drm:licensor")

        adobe_patron_identifier = cls._adobe_patron_identifier(patron)

        # The DRM licensing information includes the Adobe vendor ID
        # and the patron's patron identifier for Adobe purposes.
        eq_(self.adobe_vendor_id.username,
            licensor.attrib['{http://librarysimplified.org/terms/drm}vendor'])
        [client_token, device_management_link] = licensor.getchildren()
        expected = ConfigurationSetting.for_library_and_externalintegration(
            self._db, ExternalIntegration.USERNAME, self._default_library,
            self.registry).value.upper()
        assert client_token.text.startswith(expected)
        assert adobe_patron_identifier in client_token.text
        eq_("{http://www.w3.org/2005/Atom}link", device_management_link.tag)
        eq_("http://librarysimplified.org/terms/drm/rel/devices",
            device_management_link.attrib['rel'])

        # Unlike other places this tag shows up, we use the
        # 'scheme' attribute to explicitly state that this
        # <drm:licensor> tag is talking about an ACS licensing
        # scheme. Since we're in a <feed> and not a <link> to a
        # specific book, that context would otherwise be lost.
        eq_('http://librarysimplified.org/terms/drm/scheme/ACS',
            licensor.attrib['{http://librarysimplified.org/terms/drm}scheme'])

        now = datetime.datetime.utcnow()
        tomorrow = now + datetime.timedelta(days=1)

        # A loan of an open-access book is open-ended.
        work1 = self._work(language="eng", with_open_access_download=True)
        loan1 = work1.license_pools[0].loan_to(patron, start=now)

        # A loan of some other kind of book
        work2 = self._work(language="eng", with_license_pool=True)
        loan2 = work2.license_pools[0].loan_to(patron, start=now, end=tomorrow)
        unused = self._work(language="eng", with_open_access_download=True)

        # Get the feed.
        feed_obj = CirculationManagerLoanAndHoldAnnotator.active_loans_for(
            None, patron, test_mode=True)
        raw = unicode(feed_obj)
        feed = feedparser.parse(raw)

        # The only entries in the feed is the work currently out on loan
        # to this patron.
        eq_(2, len(feed['entries']))
        e1, e2 = sorted(feed['entries'], key=lambda x: x['title'])
        eq_(work1.title, e1['title'])
        eq_(work2.title, e2['title'])

        # Make sure that the start and end dates from the loan are present
        # in an <opds:availability> child of the acquisition link.
        tree = etree.fromstring(raw)
        parser = OPDSXMLParser()
        acquisitions = parser._xpath(
            tree,
            "//atom:entry/atom:link[@rel='http://opds-spec.org/acquisition']")
        eq_(2, len(acquisitions))

        now_s = _strftime(now)
        tomorrow_s = _strftime(tomorrow)
        availabilities = [
            parser._xpath1(x, "opds:availability") for x in acquisitions
        ]

        # One of these availability tags has 'since' but not 'until'.
        # The other one has both.
        [no_until] = [x for x in availabilities if 'until' not in x.attrib]
        eq_(now_s, no_until.attrib['since'])

        [has_until] = [x for x in availabilities if 'until' in x.attrib]
        eq_(now_s, has_until.attrib['since'])
        eq_(tomorrow_s, has_until.attrib['until'])

Beispiel #11

0

Datei anzeigen

Datei: test_controller.py Projekt: rskm1/metadata_wrangler

class TestCatalogController(ControllerTest):

    XML_PARSE = OPDSXMLParser()._xpath

    def setup(self):
        super(TestCatalogController, self).setup()
        self.controller = CatalogController(self._db)

        # The collection as it exists on the circulation manager.
        remote_collection = self._collection(username='******',
                                             external_account_id=self._url)
        # The collection as it is recorded / catalogued here.
        self.collection = self._collection(
            name=remote_collection.metadata_identifier,
            protocol=remote_collection.protocol)

        self.work1 = self._work(with_license_pool=True,
                                with_open_access_download=True)
        self.work2 = self._work(with_license_pool=True,
                                with_open_access_download=True)

    def xml_value(self, message, tag):
        return self.XML_PARSE(message, tag)[0].text

    def test_updates_feed(self):
        identifier = self.work1.license_pools[0].identifier
        self.collection.catalog_identifier(self._db, identifier)

        with self.app.test_request_context('/', headers=self.valid_auth):
            response = self.controller.updates_feed(self.collection.name)
            # The catalog's updates feed is returned.
            eq_(HTTP_OK, response.status_code)
            feed = feedparser.parse(response.get_data())
            eq_(
                feed.feed.title, u"%s Collection Updates for %s" %
                (self.collection.protocol, self.client.url))

            # The feed has the catalog's catalog.
            eq_(1, len(feed['entries']))
            [entry] = feed['entries']
            eq_(self.work1.title, entry['title'])
            eq_(identifier.urn, entry['id'])

        # A time can be passed.
        time = datetime.utcnow()
        timestamp = time.strftime("%Y-%m-%dT%H:%M:%SZ")
        for record in self.work1.coverage_records:
            # Set back the clock on all of work1's time records
            record.timestamp = time - timedelta(days=1)
        with self.app.test_request_context('/?last_update_time=%s' % timestamp,
                                           headers=self.valid_auth):
            response = self.controller.updates_feed(self.collection.name)
            eq_(HTTP_OK, response.status_code)
            feed = feedparser.parse(response.get_data())
            eq_(
                feed.feed.title, u"%s Collection Updates for %s" %
                (self.collection.protocol, self.client.url))

            # The timestamp is included in the url.
            linkified_timestamp = time.strftime("%Y-%m-%d+%H:%M:%S").replace(
                ":", "%3A")
            assert feed['feed']['id'].endswith(linkified_timestamp)
            # And only works updated since the timestamp are returned.
            eq_(0, len(feed['entries']))

        # Works updated since the timestamp are returned
        self.work1.coverage_records[0].timestamp = datetime.utcnow()
        with self.app.test_request_context('/?last_update_time=%s' % timestamp,
                                           headers=self.valid_auth):
            response = self.controller.updates_feed(self.collection.name)
            feed = feedparser.parse(response.get_data())
            eq_(1, len(feed['entries']))
            [entry] = feed['entries']
            eq_(self.work1.title, entry['title'])
            eq_(identifier.urn, entry['id'])

    def test_updates_feed_is_paginated(self):
        for work in [self.work1, self.work2]:
            self.collection.catalog_identifier(
                self._db, work.license_pools[0].identifier)
        with self.app.test_request_context('/?size=1',
                                           headers=self.valid_auth):
            response = self.controller.updates_feed(self.collection.name)
            links = feedparser.parse(response.get_data())['feed']['links']
            assert any([link['rel'] == 'next' for link in links])
            assert not any([link['rel'] == 'previous' for link in links])
            assert not any([link['rel'] == 'first' for l in links])

        with self.app.test_request_context('/?size=1&after=1',
                                           headers=self.valid_auth):
            response = self.controller.updates_feed(self.collection.name)
            links = feedparser.parse(response.get_data())['feed']['links']
            assert any([link['rel'] == 'previous' for link in links])
            assert any([link['rel'] == 'first' for link in links])
            assert not any([link['rel'] == 'next' for link in links])

    def test_add_items(self):
        invalid_urn = "FAKE AS I WANNA BE"
        catalogued_id = self._identifier()
        uncatalogued_id = self._identifier()
        self.collection.catalog_identifier(self._db, catalogued_id)

        parser = OPDSXMLParser()
        message_path = '/atom:feed/simplified:message'

        with self.app.test_request_context(
                '/?urn=%s&urn=%s&urn=%s' %
            (catalogued_id.urn, uncatalogued_id.urn, invalid_urn),
                headers=self.valid_auth):

            response = self.controller.add_items(self.collection.name)

        # None of the identifiers raise or return an error.
        eq_(HTTP_OK, response.status_code)

        # It sends three messages.
        root = etree.parse(StringIO(response.data))
        catalogued, uncatalogued, invalid = self.XML_PARSE(root, message_path)

        # The uncatalogued identifier is now in the catalog.
        assert uncatalogued_id in self.collection.catalog
        # It has an accurate response message.
        eq_(uncatalogued_id.urn, self.xml_value(uncatalogued, 'atom:id'))
        eq_('201', self.xml_value(uncatalogued, 'simplified:status_code'))
        eq_('Successfully added',
            self.xml_value(uncatalogued, 'schema:description'))

        # The catalogued identifier is still in the catalog.
        assert catalogued_id in self.collection.catalog
        # And even though it responds 'OK', the message tells you it
        # was already there.
        eq_(catalogued_id.urn, self.xml_value(catalogued, 'atom:id'))
        eq_('200', self.xml_value(catalogued, 'simplified:status_code'))
        eq_('Already in catalog',
            self.xml_value(catalogued, 'schema:description'))

        # Invalid identifier return 400 errors.
        eq_(invalid_urn, self.xml_value(invalid, 'atom:id'))
        eq_('400', self.xml_value(invalid, 'simplified:status_code'))
        eq_('Could not parse identifier.',
            self.xml_value(invalid, 'schema:description'))

    def test_remove_items(self):
        invalid_urn = "FAKE AS I WANNA BE"
        catalogued_id = self._identifier()
        uncatalogued_id = self._identifier()
        self.collection.catalog_identifier(self._db, catalogued_id)

        message_path = '/atom:feed/simplified:message'
        with self.app.test_request_context(
                '/?urn=%s&urn=%s' % (catalogued_id.urn, uncatalogued_id.urn),
                headers=self.valid_auth):

            # The uncatalogued identifier doesn't raise or return an error.
            response = self.controller.remove_items(self.collection.name)
            eq_(HTTP_OK, response.status_code)

            # It sends two <simplified:message> tags.
            root = etree.parse(StringIO(response.data))
            catalogued, uncatalogued = self.XML_PARSE(root, message_path)

            eq_(catalogued_id.urn, self.xml_value(catalogued, 'atom:id'))
            eq_(str(HTTP_OK),
                self.xml_value(catalogued, 'simplified:status_code'))
            eq_("Successfully removed",
                self.xml_value(catalogued, 'schema:description'))

            eq_(uncatalogued_id.urn, self.xml_value(uncatalogued, 'atom:id'))
            eq_(str(HTTP_NOT_FOUND),
                self.xml_value(uncatalogued, 'simplified:status_code'))
            eq_("Not in catalog",
                self.xml_value(uncatalogued, 'schema:description'))

            # It sends no <entry> tags.
            eq_([], self.XML_PARSE(root, "//atom:entry"))

            # The catalogued identifier isn't in the catalog.
            assert catalogued_id not in self.collection.catalog
            # But it's still in the database.
            eq_(
                catalogued_id,
                self._db.query(Identifier).filter_by(
                    id=catalogued_id.id).one())

        # Try again, this time including an invalid URN.
        self.collection.catalog_identifier(self._db, catalogued_id)
        with self.app.test_request_context('/?urn=%s&urn=%s' %
                                           (invalid_urn, catalogued_id.urn),
                                           headers=self.valid_auth):
            response = self.controller.remove_items(self.collection.name)
            eq_(HTTP_OK, int(response.status_code))

            # Once again we get two <simplified:message> tags.
            root = etree.parse(StringIO(response.data))
            invalid, catalogued = self.XML_PARSE(root, message_path)

            eq_(invalid_urn, self.xml_value(invalid, 'atom:id'))
            eq_("400", self.xml_value(invalid, 'simplified:status_code'))
            eq_("Could not parse identifier.",
                self.xml_value(invalid, 'schema:description'))

            eq_(catalogued_id.urn, self.xml_value(catalogued, 'atom:id'))
            eq_("200", self.xml_value(catalogued, 'simplified:status_code'))
            eq_("Successfully removed",
                self.xml_value(catalogued, 'schema:description'))

            # We have no <entry> tags.
            eq_([], self.XML_PARSE(root, "//atom:entry"))

            # The catalogued identifier is still removed.
            assert catalogued_id not in self.collection.catalog

    def test_update_client_url(self):
        url = urllib.quote('https://try-me.fake.us/')
        with self.app.test_request_context('/'):
            # Without authentication a ProblemDetail is returned.
            response = self.controller.update_client_url()
            eq_(True, isinstance(response, ProblemDetail))
            eq_(INVALID_CREDENTIALS, response)

        with self.app.test_request_context('/', headers=self.valid_auth):
            # When a URL isn't provided, a ProblemDetail is returned.
            response = self.controller.update_client_url()
            eq_(True, isinstance(response, ProblemDetail))
            eq_(400, response.status_code)
            eq_(INVALID_INPUT.uri, response.uri)
            assert 'client_url' in response.detail

        with self.app.test_request_context('/?client_url=%s' % url,
                                           headers=self.valid_auth):
            response = self.controller.update_client_url()
            # The request was successful.
            eq_(HTTP_OK, response.status_code)
            # The IntegrationClient's URL has been changed.
            self.client.url = 'try-me.fake.us'