def test_load_run(self, mock_harvest_time, mock_check_if_identifier_exists, mock_update_science_metadata, mock_load_science_metadata): """ SCENARIO: One document is to be loaded for the first time, not updated. EXPECTED RESULT: The call counts must reflect that the load routine is called and not the update routine. Verify that the sid is a UUID and that the pid is the MD5SUM of the metadata document. """ mock_harvest_time.return_value = '1900-01-01T00:00:00Z' mock_check_if_identifier_exists.return_value = {'outcome': 'no'} mock_update_science_metadata.return_value = True mock_load_science_metadata.return_value = True harvester = NKNHarvester(host=self.host, port=self.port) # External calls to read the: # # 1) sitemap (raw HTML directory listing) # 2) Remote HTML document for record 1 (another directory listing) # 3) Remote XML document for record 1 # uuid = '0a42d2bc-700a-4cf2-a7ac-ad6b892da7f0' contents = [ ir.read_binary('tests.data.nkn', 'index.html'), ir.read_binary(f'tests.data.nkn.{uuid}', 'index.html'), ir.read_binary(f'tests.data.nkn.{uuid}', 'metadata.xml'), ] status_codes = [200, 200, 200] headers = [ { 'Content-Type': 'text/html;charset=UTF-8' }, { 'Content-Type': 'text/html;charset=UTF-8' }, { 'Content-Type': 'application/xml' }, ] regex = [ re.compile('https://www.northwestknowledge.net/data/'), re.compile('https://www.northwestknowledge.net/data/'), re.compile('https://www.northwestknowledge.net/data/'), ] with aioresponses() as m: z = zip(regex, contents, status_codes, headers) for regex, content, status_code, headers in z: m.get(regex, body=content, status=status_code, headers=headers) with self.assertLogs(logger=harvester.logger, level='DEBUG'): asyncio.run(harvester.run()) self.assertEqual(mock_load_science_metadata.call_count, 1), self.assertEqual(mock_update_science_metadata.call_count, 0), # Verify the PID and SID args, kwargs = mock_load_science_metadata.call_args_list[0] actual = kwargs['system_metadata'].identifier.value() expected = '679742d8c458378928ed21b2868db95b' self.assertEqual(actual, expected) actual = kwargs['system_metadata'].seriesId.value() expected = uuid self.assertEqual(actual, expected)
def test_update_run(self, mock_harvest_time, mock_check_if_identifier_exists, mock_update_science_metadata, mock_load_science_metadata): """ SCENARIO: One document is to be updated. EXPECTED RESULT: The document is updated, not loaded for the first time. Verify that the sid is a UUID and that the pid is the MD5SUM of the metadata document. """ record_date = dt.datetime(2017, 4, 28, 10, 44, 0, tzinfo=dt.timezone.utc) mock_harvest_time.return_value = record_date.strftime(DATETIME_FORMAT) mock_check_if_identifier_exists.return_value = { 'outcome': 'yes', 'record_date': record_date - dt.timedelta(days=1), 'current_version_id': 1, } mock_update_science_metadata.return_value = True mock_load_science_metadata.return_value = True harvester = NKNHarvester(host=self.host, port=self.port) # External calls to read the: # # 1) sitemap (raw HTML directory listing) # 2) Remote HTML document for record 1 (another directory listing) # 3) Remote XML document for record 1 # 4) Existing XML document for record 1 (retrieved from the member # node) # uuid = '0a42d2bc-700a-4cf2-a7ac-ad6b892da7f0' contents = [ ir.read_binary('tests.data.nkn', 'index.html'), ir.read_binary(f'tests.data.nkn.{uuid}', 'index.html'), ir.read_binary(f'tests.data.nkn.{uuid}', 'metadata.xml'), ir.read_binary(f'tests.data.nkn.{uuid}', 'metadata.prior.xml') ] status_codes = [200, 200, 200, 200] headers = [ { 'Content-Type': 'text/html;charset=UTF-8' }, { 'Content-Type': 'text/html;charset=UTF-8' }, { 'Content-Type': 'application/xml' }, { 'Content-Type': 'application/xml' }, ] regex = [ re.compile('https://www.northwestknowledge.net/data/'), re.compile('https://www.northwestknowledge.net/data/'), re.compile('https://www.northwestknowledge.net/data/'), re.compile('https://nkn.mn.org:443/mn/v2/'), ] with aioresponses() as m: z = zip(regex, contents, status_codes, headers) for regex, content, status_code, headers in z: m.get(regex, body=content, status=status_code, headers=headers) with self.assertLogs(logger=harvester.logger, level='DEBUG'): asyncio.run(harvester.run()) self.assertEqual(mock_load_science_metadata.call_count, 0), self.assertEqual(mock_update_science_metadata.call_count, 1), # Verify the PID and SID args, kwargs = mock_update_science_metadata.call_args_list[0] actual = kwargs['system_metadata'].identifier.value() expected = '679742d8c458378928ed21b2868db95b' self.assertEqual(actual, expected) actual = kwargs['system_metadata'].seriesId.value() expected = uuid self.assertEqual(actual, expected)