예제 #1
0
    def test__read_record__invalid_jsonld(self):
        """
        SCENARIO:  A landing page is properly retrieved, but has invalid
        JSON-LD.

        EXPECTED RESULT:  JSON-LD error
        """
        url = (
            'https://www.archive.arm.gov'
            '/metadata/adc/html/nsaqcrad1longC2.c2.invalid_jsonld.html'
        )

        harvester = ARMHarvester()

        contents = ir.read_binary('tests.data.arm', 'nsaqcrad1longC2.c2.no_dataset_in_jsonld.html')  # noqa : E501
        status_code = 200
        headers = {'Content-Type': 'text/html'}

        regex = re.compile('https://www.archive.arm.gov/metadata/adc')

        with aioresponses() as m:
            m.get(regex, body=contents, status=status_code, headers=headers)
            with self.assertRaises(JsonLdError):
                with self.assertLogs(logger=harvester.logger, level='DEBUG'):
                    asyncio.run(harvester.retrieve_record(url))
예제 #2
0
    def test__landing_page_is_empty(self):
        """
        SCENARIO:  A landing page has absolutely no content.
        JSON.

        EXPECTED RESULT:  RuntimeError
        """
        url = 'https://www.archive.arm.gov/metadata/adc/html/met.html'

        harvester = ARMHarvester()

        contents = ir.read_binary('tests.data.arm', 'met.html')
        status_code = 200
        headers = {'Content-Type': 'text/html'}

        regex = re.compile('https://www.archive.arm.gov/metadata/adc')

        with aioresponses() as m:
            m.get(regex, body=contents, status=status_code, headers=headers)
            with self.assertRaises(RuntimeError):
                with self.assertLogs(logger=harvester.logger, level='DEBUG'):
                    asyncio.run(harvester.retrieve_record(url))
예제 #3
0
    def test__retrieve_record__bad_series_identifier(self):
        """
        SCENARIO:  We have a valid landing page URL but the JSON-LD document
        has a series identifier that is not in the format that we want.

        EXPECTED RESULT:  RuntimeError
        """
        landing_page_url = (
            'https://www.archive.arm.gov/metadata/adc/html/wsacrcrcal.html'
        )

        harvester = ARMHarvester()

        # External calls to read the:
        #
        #   2) HTML document for the landing page
        #   3) XML document associated with the landing page
        #
        contents = [
            ir.read_binary('tests.data.arm', 'wsacrcrcal.bad_series_id.html'),
            ir.read_binary('tests.data.arm', 'wsacrcrcal.xml'),
        ]
        status_codes = [200, 200, 200]
        headers = [
            {'Content-Type': 'text/html'},
            {'Content-Type': 'application/xml'},
        ]

        z = zip(contents, status_codes, headers)
        with aioresponses() as m:
            for content, status_code, headers in z:
                m.get(self.regex,
                      body=content, status=status_code, headers=headers)

            with self.assertLogs(logger=harvester.logger, level='DEBUG'):
                with self.assertRaises(RuntimeError):
                    asyncio.run(harvester.retrieve_record(landing_page_url))