def test_ensure_res_metadata_enhanced_refs(self): self.assertFalse(os.path.exists(self.bagdir)) self.assertIsNone(self.bagr.inpodfile) cfg = { 'enrich_refs': False, 'doi_resolver': { 'client_info': self.doiclientinfo } } self.bagr = midas.MIDASMetadataBagger(self.midasid, self.bagparent, self.revdir, self.upldir, cfg) self.bagr.ensure_res_metadata() self.assertEqual(len(self.bagr.resmd['references']), 1) self.assertIn('doi.org', self.bagr.resmd['references'][0]['location']) self.assertNotIn('citation', self.bagr.resmd['references'][0]) cfg['enrich_refs'] = True self.bagr = midas.MIDASMetadataBagger(self.midasid, self.bagparent, self.revdir, self.upldir, cfg) self.bagr.ensure_res_metadata(True) self.assertEqual(len(self.bagr.resmd['references']), 1) self.assertIn('doi.org', self.bagr.resmd['references'][0]['location']) self.assertIn('citation', self.bagr.resmd['references'][0]) rmd = self.bagr.bagbldr.bag.nerd_metadata_for('', False) self.assertEqual(len(rmd['references']), 1) self.assertIn('doi.org', rmd['references'][0]['location']) self.assertNotIn('citation', rmd['references'][0]) rmd = self.bagr.bagbldr.bag.annotations_metadata_for('') self.assertEqual(len(rmd['references']), 1) self.assertIn('doi.org', rmd['references'][0]['location']) self.assertIn('citation', rmd['references'][0])
def test_fileExaminer_autolaunch(self): # show that the async thread does its work with autolaunch self.bagr = midas.MIDASMetadataBagger(self.midasid, self.bagparent, self.revdir, asyncexamine=True) self.assertIsNotNone(self.bagr.fileExaminer) # self.bagr.fileExaminer_autolaunch = True metadir = os.path.join(self.bagdir, 'metadata') self.assertFalse(os.path.exists(self.bagdir)) self.assertIsNone(self.bagr.datafiles) try: self.bagr.prepare() except Exception as ex: self.bagr.fileExaminer.thread.join() raise self.assertTrue(os.path.exists(self.bagdir)) fmd = self.bagr.bagbldr.bag.nerd_metadata_for("trial1.json") self.assertIn('checksum', fmd) # because there's a .sha256 file # time.sleep(0.1) if self.bagr.fileExaminer.thread.is_alive(): print("waiting for file examiner thread") n = 20 while n > 0 and self.bagr.fileExaminer.thread.is_alive(): n -= 1 time.sleep(0.1) if n == 0: self.fail("file examiner is taking too long") fmd = self.bagr.bagbldr.bag.nerd_metadata_for("trial2.json") self.assertIn('checksum', fmd)
def test_fileExaminer(self): # turn on asyncexamine (but turn off autolaunch so that we can test # more easily). Show that the checksum is not calculated for # trial2.json. Start the asynchronous thread; after it is done, # show that trial2.json now has a checksum. self.bagr = midas.MIDASMetadataBagger(self.midasid, self.bagparent, self.revdir, asyncexamine=True) self.assertIsNotNone(self.bagr.fileExaminer) self.bagr.fileExaminer_autolaunch = False metadir = os.path.join(self.bagdir, 'metadata') self.assertFalse(os.path.exists(self.bagdir)) self.assertIsNone(self.bagr.datafiles) self.bagr.prepare() self.assertTrue(os.path.exists(self.bagdir)) fmd = self.bagr.bagbldr.bag.nerd_metadata_for("trial1.json") self.assertIn('checksum', fmd) # because there's a .sha256 file self.assertIn('_status', fmd) fmd = self.bagr.bagbldr.bag.nerd_metadata_for("trial2.json") self.assertIn('_status', fmd) self.assertNotIn('checksum', fmd) # self.bagr.fileExaminer.thread.run() self.bagr.fileExaminer.launch() self.bagr.fileExaminer.thread.join() fmd = self.bagr.bagbldr.bag.nerd_metadata_for("trial2.json") self.assertIn('checksum', fmd) self.assertNotIn('_status', fmd)
def setUp(self): self.tf = Tempfiles() self.bagparent = self.tf.mkdir("bagger") self.revdir = os.path.join(self.testsip, "review") self.bagr = midas.MIDASMetadataBagger(self.midasid, self.bagparent, self.revdir) self.bagdir = os.path.join(self.bagparent, self.midasid)
def test_ensure_file_metadata_resmd(self): # fix the config and recreate self.bagr.cfg.setdefault('bag_builder', {}) self.bagr.cfg['bag_builder']['distrib_service_baseurl'] = \ "https://testdata.nist.gov/od/ds" self.bagr = midas.MIDASMetadataBagger(self.midasid, self.bagparent, self.revdir, self.upldir, self.bagr.cfg) self.assertFalse(os.path.exists(self.bagdir)) self.bagr.ensure_res_metadata() destpath = os.path.join("trial3", "trial3a.json") dfile = os.path.join(self.upldir, self.midasid[32:], destpath) self.bagr.ensure_file_metadata(dfile, destpath, self.bagr.resmd) mdfile = os.path.join(self.bagdir, 'metadata', destpath, "nerdm.json") self.assertTrue(os.path.exists(self.bagdir)) self.assertTrue(os.path.exists(mdfile)) data = midas.read_nerd(mdfile) self.assertEqual(data['size'], 70) self.assertTrue(data['checksum']['hash']) self.assertEqual(data['checksum']['algorithm'], { "@type": "Thing", "tag": "sha256" }) self.assertTrue(data['downloadURL']) self.assertTrue(data['downloadURL'].startswith('https://testdata.nist.gov/'), "Unexpected downloadURL: "+ data['downloadURL']) # trial3a.json has no matching distribution in _pod.json; thus, no desc self.assertNotIn('description', data) destpath = os.path.join("trial2.json") dfile = os.path.join(self.revdir, self.midasid[32:], destpath) self.bagr.ensure_file_metadata(dfile, destpath, self.bagr.resmd) mdfile = os.path.join(self.bagdir, 'metadata', destpath, "nerdm.json") self.assertTrue(os.path.exists(self.bagdir)) self.assertTrue(os.path.exists(mdfile)) data = midas.read_nerd(mdfile) self.assertEqual(data['size'], 69) self.assertTrue(data['checksum']['hash']) self.assertEqual(data['checksum']['algorithm'], { "@type": "Thing", "tag": "sha256" }) self.assertTrue(data['downloadURL']) self.assertTrue(data['description'])
def test_ark_ediid(self): indir = os.path.join(self.bagparent, os.path.basename(self.testsip)) shutil.copytree(self.testsip, indir) self.upldir = os.path.join(indir, "upload") self.revdir = os.path.join(indir, "review") podf = os.path.join(self.upldir, "1491", "_pod.json") with open(podf) as fd: pod = json.load(fd) pod['identifier'] = self.arkid with open(podf, 'w') as fd: json.dump(pod, fd, indent=2) podf = os.path.join(self.revdir, "1491", "_pod.json") with open(podf, 'w') as fd: json.dump(pod, fd, indent=2) cfg = {'bag_builder': {'validate_id': r'(pdr\d)|(mds[01])'}} self.bagr = midas.MIDASMetadataBagger(self.arkid, self.bagparent, self.revdir, self.upldir, config=cfg) self.assertEqual(self.bagr.midasid, self.arkid) self.assertEqual(self.bagr.name, self.arkid[11:]) self.assertEqual(self.bagr._indirs[0], os.path.join(self.revdir, self.arkid[16:])) self.assertEqual(self.bagr._indirs[1], os.path.join(self.upldir, self.arkid[16:])) self.assertEqual(os.path.basename(self.bagr.bagbldr.bagdir), self.arkid[11:]) self.bagr.ensure_base_bag() self.assertEqual(self.bagr.bagbldr.id, self.arkid) self.assertEqual(os.path.basename(self.bagr.bagbldr.bag.dir), self.arkid[11:]) self.bagr.ensure_res_metadata(True) nerdm = self.bagr.bagbldr.bag.nerd_metadata_for('') self.assertEqual(nerdm['ediid'], self.arkid) self.assertEqual(nerdm['@id'], self.arkid)
def setUp(self): self.tf = Tempfiles() self.bagparent = self.tf.mkdir("bagger") self.upldir = os.path.join(self.testsip, "upload") self.bagr = midas.MIDASMetadataBagger(self.midasid, self.bagparent, None, self.upldir)
def test_wrong_ediid(self): with self.assertRaises(midas.SIPDirectoryNotFound): self.bagr = midas.MIDASMetadataBagger(self.wrongid, self.bagparent, self.revdir, self.upldir)