Esempio n. 1
0
    def test_load_run(self, mock_harvest_time, mock_check_if_identifier_exists,
                      mock_update_science_metadata,
                      mock_load_science_metadata):
        """
        SCENARIO:  One document is to be loaded for the first time, not
        updated.

        EXPECTED RESULT:  The call counts must reflect that the load routine
        is called and not the update routine. Verify that the sid is a UUID and
        that the pid is the MD5SUM of the metadata document.
        """

        mock_harvest_time.return_value = '1900-01-01T00:00:00Z'
        mock_check_if_identifier_exists.return_value = {'outcome': 'no'}
        mock_update_science_metadata.return_value = True
        mock_load_science_metadata.return_value = True

        harvester = NKNHarvester(host=self.host, port=self.port)

        # External calls to read the:
        #
        #   1) sitemap (raw HTML directory listing)
        #   2) Remote HTML document for record 1 (another directory listing)
        #   3) Remote XML document for record 1
        #
        uuid = '0a42d2bc-700a-4cf2-a7ac-ad6b892da7f0'
        contents = [
            ir.read_binary('tests.data.nkn', 'index.html'),
            ir.read_binary(f'tests.data.nkn.{uuid}', 'index.html'),
            ir.read_binary(f'tests.data.nkn.{uuid}', 'metadata.xml'),
        ]

        status_codes = [200, 200, 200]
        headers = [
            {
                'Content-Type': 'text/html;charset=UTF-8'
            },
            {
                'Content-Type': 'text/html;charset=UTF-8'
            },
            {
                'Content-Type': 'application/xml'
            },
        ]
        regex = [
            re.compile('https://www.northwestknowledge.net/data/'),
            re.compile('https://www.northwestknowledge.net/data/'),
            re.compile('https://www.northwestknowledge.net/data/'),
        ]

        with aioresponses() as m:
            z = zip(regex, contents, status_codes, headers)
            for regex, content, status_code, headers in z:
                m.get(regex, body=content, status=status_code, headers=headers)

            with self.assertLogs(logger=harvester.logger, level='DEBUG'):
                asyncio.run(harvester.run())

        self.assertEqual(mock_load_science_metadata.call_count, 1),
        self.assertEqual(mock_update_science_metadata.call_count, 0),

        # Verify the PID and SID
        args, kwargs = mock_load_science_metadata.call_args_list[0]

        actual = kwargs['system_metadata'].identifier.value()
        expected = '679742d8c458378928ed21b2868db95b'
        self.assertEqual(actual, expected)

        actual = kwargs['system_metadata'].seriesId.value()
        expected = uuid
        self.assertEqual(actual, expected)
Esempio n. 2
0
    def test_update_run(self, mock_harvest_time,
                        mock_check_if_identifier_exists,
                        mock_update_science_metadata,
                        mock_load_science_metadata):
        """
        SCENARIO:  One document is to be updated.

        EXPECTED RESULT:  The document is updated, not loaded for the first
        time.  Verify that the sid is a UUID and that the pid is the MD5SUM of
        the metadata document.
        """

        record_date = dt.datetime(2017,
                                  4,
                                  28,
                                  10,
                                  44,
                                  0,
                                  tzinfo=dt.timezone.utc)
        mock_harvest_time.return_value = record_date.strftime(DATETIME_FORMAT)
        mock_check_if_identifier_exists.return_value = {
            'outcome': 'yes',
            'record_date': record_date - dt.timedelta(days=1),
            'current_version_id': 1,
        }
        mock_update_science_metadata.return_value = True
        mock_load_science_metadata.return_value = True

        harvester = NKNHarvester(host=self.host, port=self.port)

        # External calls to read the:
        #
        #   1) sitemap (raw HTML directory listing)
        #   2) Remote HTML document for record 1 (another directory listing)
        #   3) Remote XML document for record 1
        #   4) Existing XML document for record 1 (retrieved from the member
        #      node)
        #
        uuid = '0a42d2bc-700a-4cf2-a7ac-ad6b892da7f0'
        contents = [
            ir.read_binary('tests.data.nkn', 'index.html'),
            ir.read_binary(f'tests.data.nkn.{uuid}', 'index.html'),
            ir.read_binary(f'tests.data.nkn.{uuid}', 'metadata.xml'),
            ir.read_binary(f'tests.data.nkn.{uuid}', 'metadata.prior.xml')
        ]

        status_codes = [200, 200, 200, 200]
        headers = [
            {
                'Content-Type': 'text/html;charset=UTF-8'
            },
            {
                'Content-Type': 'text/html;charset=UTF-8'
            },
            {
                'Content-Type': 'application/xml'
            },
            {
                'Content-Type': 'application/xml'
            },
        ]
        regex = [
            re.compile('https://www.northwestknowledge.net/data/'),
            re.compile('https://www.northwestknowledge.net/data/'),
            re.compile('https://www.northwestknowledge.net/data/'),
            re.compile('https://nkn.mn.org:443/mn/v2/'),
        ]

        with aioresponses() as m:
            z = zip(regex, contents, status_codes, headers)
            for regex, content, status_code, headers in z:
                m.get(regex, body=content, status=status_code, headers=headers)

            with self.assertLogs(logger=harvester.logger, level='DEBUG'):
                asyncio.run(harvester.run())

        self.assertEqual(mock_load_science_metadata.call_count, 0),
        self.assertEqual(mock_update_science_metadata.call_count, 1),

        # Verify the PID and SID
        args, kwargs = mock_update_science_metadata.call_args_list[0]

        actual = kwargs['system_metadata'].identifier.value()
        expected = '679742d8c458378928ed21b2868db95b'
        self.assertEqual(actual, expected)

        actual = kwargs['system_metadata'].seriesId.value()
        expected = uuid
        self.assertEqual(actual, expected)