Esempio n. 1
0
    def test_retrieve_record(self):
        """
        SCENARIO:  We have a URL for a landing page.

        EXPECTED RESULT:  The identifier is retrieved.
        """
        url = ("https://www.northwestknowledge.net"
               "/data/0a42d2bc-700a-4cf2-a7ac-ad6b892da7f0/")

        # External I/O
        #
        # 1st:  landing page
        # 2nd:  XML metadata document
        package = 'tests.data.nkn.0a42d2bc-700a-4cf2-a7ac-ad6b892da7f0'
        contents = [
            ir.read_binary(package, 'index.html'),
            ir.read_binary(package, 'metadata.xml')
        ]

        harvester = NKNHarvester()

        with self.assertLogs(logger=harvester.logger, level='INFO'):
            with aioresponses() as m:
                m.get(self.regex, body=contents[0])
                m.get(self.regex, body=contents[1])

                awaitable = harvester.retrieve_record(url)
                sid, _, lastmod, doc = asyncio.run(awaitable)

        expected = '0a42d2bc-700a-4cf2-a7ac-ad6b892da7f0'
        self.assertEqual(sid, expected)
        self.assertIsNone(lastmod)
Esempio n. 2
0
    def test_retrieve_record__404_error(self):
        """
        SCENARIO:  We have a URL for a landing page.  However, the directory
        on the remote end does not have a metadata.xml document.

        EXPECTED RESULT:  A SkipError is raised.  For other clients, this is
        NOT a SkipError.
        """
        url = ("https://www.northwestknowledge.net"
               "/data/94E2D569-200F-44F7-8937-AB4BD0862C91")

        # External I/O
        #
        # 1st:  landing page
        # 2nd:  XML metadata document  (raises 404)
        package = 'tests.data.nkn.0a42d2bc-700a-4cf2-a7ac-ad6b892da7f0'
        contents = [
            ir.read_binary(package, 'index.html'),
        ]

        harvester = NKNHarvester()

        with self.assertLogs(logger=harvester.logger, level='INFO'):
            with aioresponses() as m:
                m.get(self.regex, body=contents[0])
                m.get(self.regex, status=404)

                with self.assertRaises(SkipError):
                    asyncio.run(harvester.retrieve_record(url))
Esempio n. 3
0
    def test_unsupported_xml_format(self, mock_logger):
        """
        SCENARIO:  Sometimes the XML format is not supported.  We have tests
        for this in other places, but NKN is a special case because we have to
        extract the identifier from the XML metadata rather than the JSON-LD.

        This particular file is some sort of ESRI XML file.

        EXPECTED RESULT:  XMLValidationError
        """
        url = ("https://www.northwestknowledge.net"
               "/data/A62BEE88-8F92-4649-BC8D-BC56CE96AE2B")

        package = 'tests.data.nkn.A62BEE88-8F92-4649-BC8D-BC56CE96AE2B'
        contents = [
            ir.read_binary(package, 'metadata.xml'),
            ir.read_binary(package, 'index.html'),
        ]

        harvester = NKNHarvester()

        with aioresponses() as m:
            m.get(self.regex, body=contents[0])
            m.get(self.regex, body=contents[1])

            with self.assertRaises(XMLValidationError):
                asyncio.run(harvester.retrieve_record(url))
Esempio n. 4
0
    def test_retrieve_record__500_error(self):
        """
        SCENARIO:  We have a URL for a landing page.  The response is a server
        error, however.  This is complimentary to the _404_error test below.

        EXPECTED RESULT:  An Exception is raised (but not a SkipError).
        """
        url = ("https://www.northwestknowledge.net"
               "/data/94E2D569-200F-44F7-8937-AB4BD0862C91")

        # External I/O
        #
        # 1st:  landing page
        # 2nd:  XML metadata document  (raises 404)
        package = 'tests.data.nkn.0a42d2bc-700a-4cf2-a7ac-ad6b892da7f0'
        contents = [
            ir.read_binary(package, 'index.html'),
        ]

        harvester = NKNHarvester()

        with self.assertLogs(logger=harvester.logger, level='INFO'):
            with aioresponses() as m:
                m.get(self.regex, body=contents[0])
                m.get(self.regex, status=500)

                with self.assertRaises(aiohttp.ClientResponseError):
                    asyncio.run(harvester.retrieve_record(url))