def convert_gml_to_ttl(self, file_name): gml_file_path = os.path.join(TestPelagiosConversion.gml_data_dir, file_name) path, gml_file = os.path.split(gml_file_path) file_name, ext = os.path.splitext(gml_file) ttl_file_path = os.path.join(TestPelagiosConversion.temp_result_dir, "%s.ttl" % file_name) file_name_parts = file_name.split("_") specific_part = randomword(5) if len(file_name_parts) != 2 else file_name_parts[1] uri_part = "http://%s:%s/earkweb/sip2aip/working_area/aip2dip/%s/" % ("127.0.0.1", "8000", "abcdefghijklmnopq") gml_proc = PeripleoGmlProcessing(gml_file_path) gml_proc.convert_gml(ttl_file_path, uri_part, specific_part)
class TestPelagiosConversion(unittest.TestCase): gml_data_dir = root_dir + '/earkresources/geodata' temp_result_dir = root_dir + '/tmp/temp-' + randomutils.randomword(10) @classmethod def setUpClass(cls): if not os.path.exists(TestPelagiosConversion.temp_result_dir): os.makedirs(TestPelagiosConversion.temp_result_dir) @classmethod def tearDownClass(cls): shutil.rmtree(TestPelagiosConversion.temp_result_dir) def convert_gml_to_ttl(self, file_name): gml_file_path = os.path.join(TestPelagiosConversion.gml_data_dir, file_name) path, gml_file = os.path.split(gml_file_path) file_name, ext = os.path.splitext(gml_file) ttl_file_path = os.path.join(TestPelagiosConversion.temp_result_dir, "%s.ttl" % file_name) file_name_parts = file_name.split("_") specific_part = randomword(5) if len(file_name_parts) != 2 else file_name_parts[1] uri_part = "http://%s:%s/earkweb/sip2aip/working_area/aip2dip/%s/" % ("127.0.0.1", "8000", "abcdefghijklmnopq") gml_proc = PeripleoGmlProcessing(gml_file_path) gml_proc.convert_gml(ttl_file_path, uri_part, specific_part) def test_convert_gml_to_ttl_1994(self): self.convert_gml_to_ttl("ob_1994.gml") def test_convert_gml_to_ttl_1995(self): self.convert_gml_to_ttl("ob_1995.gml") def test_convert_gml_to_ttl_1998(self): self.convert_gml_to_ttl("ob_1998.gml") def test_convert_gml_to_ttl_2002(self): self.convert_gml_to_ttl("ob_2002.gml") def test_convert_gml_to_ttl_2006(self): self.convert_gml_to_ttl("ob_2006.gml") def test_convert_gml_to_ttl_2010(self): self.convert_gml_to_ttl("ob_2010.gml") def test_convert_gml_to_ttl_2015(self): self.convert_gml_to_ttl("ob_2015.gml")
class TestManifestCreation(unittest.TestCase): aip_compound_dir = root_dir + '/earkresources/AIP-test/AIP-compound' temp_working_dir = root_dir + '/tmp/temp-aip-dir-' + randomutils.randomword( 10) + "/" manifest_file = os.path.join(temp_working_dir, './manifest.mf') manifest_creation = ManifestCreation(temp_working_dir) @classmethod def tearDownClass(cls): shutil.rmtree(TestManifestCreation.temp_working_dir) def get_file_entry(self, s): file_pattern = re.compile( r"""Name: (?P<name>.*)\nMtime: (?P<time>.*)\nSize: (?P<size>.*)""", re.VERBOSE) file_match = file_pattern.match(s) if file_match is not None: return { 'name': file_match.group("name").strip(), 'time': file_match.group("time").strip(), 'size': file_match.group("size").strip() } else: return None def test_create_manifest(self): self.manifest_creation.create_manifest(self.aip_compound_dir, self.manifest_file) self.assertTrue(os.path.isfile(self.manifest_file), ("File %s not found in working directory" % file)) print self.manifest_file entries = open(self.manifest_file).read().split("\n\n") for entry in entries: file_entry = self.get_file_entry(entry) if file_entry is not None: actual_size = int(os.path.getsize(file_entry['name'])) entry_size = int(file_entry['size']) self.assertTrue(os.path.isfile(file_entry['name']), "File must exist") self.assertEquals(actual_size, entry_size, "File size must match")
class TestTaskLogger(unittest.TestCase): temp_log_file = root_dir + '/tmp/temp-' + randomutils.randomword(10) @classmethod def tearDownClass(cls): os.remove(TestTaskLogger.temp_log_file) @classmethod def setUpClass(cls): if not os.path.exists(root_dir + '/tmp/'): os.makedirs(root_dir + '/tmp/') def test_log(self): print self.temp_log_file tl = TaskLogger(self.temp_log_file) self.assertFalse(tl.task_logfile.closed, "File is closed after initialising") tl.addmsg("some message", False, True) self.assertEquals(len(tl.log), 1) self.assertEquals(len(tl.err), 0) self.assertEquals("some message", tl.log[0]) tl.addmsg("another message", False, True) self.assertEquals(len(tl.log), 2) self.assertEquals(len(tl.err), 0) result = tl.close_logger() self.assertTrue(result.success, "Error log is empty, so success must be true") self.assertTrue(tl.task_logfile.closed, "File is not closed after finalizing") tl.open() tl.addmsg("an error occurred", True, True) self.assertEquals(len(tl.log), 2) self.assertEquals(len(tl.err), 1) result = tl.close_logger() self.assertFalse(result.success, "Error log is not empty, so success must be false")
class TestExtraction(unittest.TestCase): delivery_dir = root_dir + '/earkresources/Delivery-test/' temp_extract_dir = root_dir + '/tmp/temp-' + randomutils.randomword(10) @classmethod def setUpClass(cls): if not os.path.exists(TestExtraction.temp_extract_dir): os.makedirs(TestExtraction.temp_extract_dir) @classmethod def tearDownClass(cls): shutil.rmtree(TestExtraction.temp_extract_dir) def test_extract_sip(self): package_file = self.delivery_dir + 'SIP-sqldump.tar.gz' contained_sip_dir = './SIP-1f594d58-d09f-46dd-abac-8432068a7f6d/' sip_extraction = Extraction() result = sip_extraction.extract(package_file, TestExtraction.temp_extract_dir) self.assertTrue(result.success) print result.log[0] files_to_check = ( os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './METS.xml'), os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './schemas/premis-v2-2.xsd'), os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './metadata/PREMIS.xml'), os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './schemas/IP.xsd'), os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './schemas/xlink.xsd'), os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './schemas/ExtensionMETS.xsd'), os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './schemas/jhove.xsd'), os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './content/data/census.sql'), ) for file in files_to_check: self.assertTrue(os.path.isfile(file), "File %s not found in extracted directory" + file) def test_extract_sip_with_report(self): package_file = self.delivery_dir + 'SIP-sqldump.tar.gz' contained_sip_dir = './SIP-1f594d58-d09f-46dd-abac-8432068a7f6d/' sip_extraction = Extraction() result = sip_extraction.extract_with_report( package_file, TestExtraction.temp_extract_dir) self.assertTrue(result.success) print result.log[0] files_to_check = ( os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './METS.xml'), os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './schemas/premis-v2-2.xsd'), os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './metadata/PREMIS.xml'), os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './schemas/IP.xsd'), os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './schemas/xlink.xsd'), os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './schemas/ExtensionMETS.xsd'), os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './schemas/jhove.xsd'), os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './content/data/census.sql'), ) for file in files_to_check: self.assertTrue(os.path.isfile(file), "File %s not found in extracted directory" + file)
def set_default_config_if_not_exists(cfg_file, default_cfg): config = ConfigParser.RawConfigParser() config.read(cfg_file) changed = False for conf in default_cfg: section, option, value = conf if not config.has_section(section): config.add_section(section) try: config.get(section, option) except ConfigParser.NoOptionError: print "Adding missing configuration option with default values: section: %s, option: %s, value: %s" % (section, option, value) config.set(section, option, value) changed = True if changed: with open(cfg_file, 'w') as fp: config.write(fp) print "Config file written: %s" % cfg_file if __name__ == "__main__": config_file = "/tmp/configexample%s.cfg" % randomword(5) default_config = [ ("newsection", "example_a", "foo"), ("newsection", "example_b", 1), ("newsection", "example_c", "bar"), ] set_default_config_if_not_exists(config_file, default_config)
class TestExtraction(unittest.TestCase): delivery_dir = root_dir + '/earkresources/Delivery-test/' temp_extract_dir = root_dir + '/tmp/temp-' + randomutils.randomword(10) package_dir = root_dir + '/earkresources/packaging-test/' temp_extract_dir2 = root_dir + '/tmp/temp-' + randomutils.randomword(10) @classmethod def setUpClass(cls): if not os.path.exists(TestExtraction.temp_extract_dir): os.makedirs(TestExtraction.temp_extract_dir) if not os.path.exists(TestExtraction.temp_extract_dir2): os.makedirs(TestExtraction.temp_extract_dir2) @classmethod def tearDownClass(cls): shutil.rmtree(TestExtraction.temp_extract_dir) shutil.rmtree(TestExtraction.temp_extract_dir2) def test_extract_sip(self): print "Extracting to %s" % self.temp_extract_dir package_file = self.delivery_dir + 'SIP-sqldump.zip' contained_sip_dir = './SIP-1f594d58-d09f-46dd-abac-8432068a7f6d/' sip_extraction = Unzip() result = sip_extraction.extract_with_report( package_file, TestExtraction.temp_extract_dir) self.assertTrue(result.success) for log in result.log: print log # must be 8 extracted files cpt = sum([ len(files) for r, d, files in os.walk(TestExtraction.temp_extract_dir) ]) self.assertEqual(cpt, 8, "Number of extracted files not as expected") files_to_check = ( os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './METS.xml'), os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './schemas/premis-v2-2.xsd'), os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './metadata/PREMIS.xml'), os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './schemas/IP.xsd'), os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './schemas/xlink.xsd'), os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './schemas/ExtensionMETS.xsd'), os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './schemas/jhove.xsd'), os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir, './content/data/census.sql'), ) for file in files_to_check: self.assertTrue(os.path.isfile(file), "File %s not found in extracted directory" + file) def test_extract_package_with_empty_folder(self): package_file = self.package_dir + 'pkg.zip' sip_extraction = Unzip() result = sip_extraction.extract_with_report( package_file, TestExtraction.temp_extract_dir2) print "Extracting package %s to %s" % (package_file, self.temp_extract_dir2) self.assertTrue(result.success) for log in result.log: print log # must be 8 extracted files cpt = sum([ len(files) for r, d, files in os.walk(TestExtraction.temp_extract_dir2) ]) ndirs = sum([ len(d) for r, d, files in os.walk(TestExtraction.temp_extract_dir2) ]) self.assertEqual(cpt, 2, "Number of extracted files not as expected: %s" % cpt) self.assertEqual( ndirs, 3, "Number of extracted directories not as expected: %s" % ndirs) items_to_check = ( os.path.join(TestExtraction.temp_extract_dir2, "pgk", 'emptydir'), os.path.join(TestExtraction.temp_extract_dir2, "pgk", 'somedir'), os.path.join(TestExtraction.temp_extract_dir2, "pgk", 'somedir/fileinsomedir.txt'), os.path.join(TestExtraction.temp_extract_dir2, "pgk", 'testfile.txt'), ) for item in items_to_check: self.assertTrue( os.path.exists(item), "Item %s not found in extracted directory: " + item)
class TestExtract(unittest.TestCase): delivery_dir = root_dir + '/earkresources/Delivery-test/' temp_extract_dir = root_dir + '/tmp/temp-' + randomutils.randomword(10) @classmethod def setUpClass(cls): if not os.path.exists(TestExtract.temp_extract_dir): os.makedirs(TestExtract.temp_extract_dir) @classmethod def tearDownClass(cls): pass def reset_test_dir(self): shutil.rmtree(TestExtract.temp_extract_dir) os.makedirs(TestExtract.temp_extract_dir) def test_extract_sip_zip(self): package_file = self.delivery_dir + 'SIP-sqldump.zip' self.extr(package_file) self.reset_test_dir() def test_extract_sip_tar(self): package_file = self.delivery_dir + 'SIP-sqldump.tar' self.extr(package_file) self.reset_test_dir() def extr(self, package_file): contained_sip_dir = './SIP-1f594d58-d09f-46dd-abac-8432068a7f6d/' _, file_extension = os.path.splitext(package_file) sip_extraction = Extract.factory(file_extension) #sip_extraction = Unzip() result = sip_extraction.extract_with_report( package_file, TestExtract.temp_extract_dir) self.assertTrue(result.success) for log in result.log: print log # must be 8 extracted files cpt = sum([ len(files) for r, d, files in os.walk(TestExtract.temp_extract_dir) ]) self.assertEqual(cpt, 8, "Number of extracted files not as expected") files_to_check = ( os.path.join(TestExtract.temp_extract_dir, contained_sip_dir, './METS.xml'), os.path.join(TestExtract.temp_extract_dir, contained_sip_dir, './schemas/premis-v2-2.xsd'), os.path.join(TestExtract.temp_extract_dir, contained_sip_dir, './metadata/PREMIS.xml'), os.path.join(TestExtract.temp_extract_dir, contained_sip_dir, './schemas/IP.xsd'), os.path.join(TestExtract.temp_extract_dir, contained_sip_dir, './schemas/xlink.xsd'), os.path.join(TestExtract.temp_extract_dir, contained_sip_dir, './schemas/ExtensionMETS.xsd'), os.path.join(TestExtract.temp_extract_dir, contained_sip_dir, './schemas/jhove.xsd'), os.path.join(TestExtract.temp_extract_dir, contained_sip_dir, './content/data/census.sql'), ) for file in files_to_check: self.assertTrue(os.path.isfile(file), "File %s not found in extracted directory" + file)
class TestPairtreeStorage(unittest.TestCase): source_dir = root_dir + '/earkresources/storage-test/' package_file = "bar.tar" repository_storage_dir = root_dir + '/tmp/temp-' + randomutils.randomword( 10) test_repo = root_dir + '/earkresources/test-repo/' @classmethod def setUpClass(cls): if not os.path.exists(TestPairtreeStorage.repository_storage_dir): os.makedirs(TestPairtreeStorage.repository_storage_dir) shutil.copy( os.path.join(TestPairtreeStorage.test_repo, "pairtree_version0_1"), TestPairtreeStorage.repository_storage_dir) @classmethod def tearDownClass(cls): shutil.rmtree(TestPairtreeStorage.repository_storage_dir) def test_identifier_object_exists(self): pts = PairtreeStorage(TestPairtreeStorage.test_repo) existing_identifier = "bar" nonexisting_identifier = "foo" self.assertEquals(pts.identifier_object_exists(existing_identifier), True) self.assertEquals(pts.identifier_object_exists(nonexisting_identifier), False) def test_version_exists(self): pts = PairtreeStorage(TestPairtreeStorage.test_repo) identifier = "bar" self.assertEquals(pts.identifier_version_object_exists(identifier, 3), False) self.assertEquals(pts.identifier_version_object_exists(identifier, 2), True) def test_next_version(self): pts = PairtreeStorage(TestPairtreeStorage.test_repo) identifier = "bar" self.assertEquals("00003", pts._next_version(identifier)) def test_curr_version(self): pts = PairtreeStorage(TestPairtreeStorage.test_repo) identifier = "bar" self.assertEquals("00002", pts.curr_version(identifier)) def test_store(self): pts = PairtreeStorage(TestPairtreeStorage.repository_storage_dir) pts.store("bar", os.path.join(self.source_dir, self.package_file)) self.assertEqual(1, pts.curr_version_num("bar")) pts.store("bar", os.path.join(self.source_dir, self.package_file)) self.assertEqual(2, pts.curr_version_num("bar")) def test_get_object_path(self): pts = PairtreeStorage(TestPairtreeStorage.test_repo) expected = os.path.join(TestPairtreeStorage.test_repo, "pairtree_root/ba/r/data/00002/bar.tar") actual = pts.get_object_path("bar") self.assertEqual(expected, actual) def test_get_object_item_stream(self): pts = PairtreeStorage(TestPairtreeStorage.test_repo) content = pts.get_object_item_stream( "bar", "739f9c5f-c402-42af-a18b-3d0bdc4e8751/METS.xml") self.assertTrue(content.startswith("<?xml")) logger.debug(content)
def post_tar_file(self, tar_file_path, identifier, progress_reporter=default_reporter): """ Iterate over tar file and post documents it contains to Solr API (extract) @type tar_file_path: string @param tar_file_path: Absolute path to tar file @type identifier: string @param identifier: Identifier of the tar package @rtype: list(dict(string, int)) @return: Return list of urls and return codes """ progress_reporter(0) import tarfile tfile = tarfile.open(tar_file_path, 'r') extract_dir = '/tmp/temp-' + randomutils.randomword(10) results = [] numfiles = sum(1 for tarinfo in tfile if tarinfo.isreg()) logger.debug("Number of files in tarfile: %s " % numfiles) num = 0 mets_entry = "%s/METS.xml" % identifier package_type = "IP" try: tfile.extract(mets_entry, extract_dir) mets_path = os.path.join(extract_dir, identifier, "METS.xml") if os.path.exists(mets_path): try: mets = ParsedMets(extract_dir) mets.load_mets(mets_path) package_type = mets.get_package_type() except: logger.warn("Error loading METS from package during indexing, assigning default package type instead.") else: logger.warn("METS file does not exist: %s" % mets_path) except KeyError: logger.warn("METS entry does not exist in TAR file: %s" % mets_entry) for t in tfile: tfile.extract(t, extract_dir) afile = os.path.join(extract_dir, t.name) if os.path.exists(afile): params = SolrDocParams(afile).get_params() params['literal.packagetype'] = package_type params['literal.package'] = identifier params['literal.path'] = t.name files = {'file': ('userfile', open(afile, 'rb'))} post_url = '%s/update/extract?%s' % (self.url, urllib.urlencode(params)) response = requests.post(post_url, files=files) result = {"url": post_url, "status": response.status_code} if response.status_code != 200: status = self.post_file_document(afile, identifier, t.name) if status == 200: logger.info("posting file failed for url '%s' with status code: %d (posted plain document instead)" % (post_url, response.status_code)) else: logger.info("Unable to create document for url '%s'" % (post_url)) results.append(result) num += 1 percent = num * 100 / numfiles progress_reporter(percent) self.commit() logger.debug("Files extracted to %s" % extract_dir) shutil.rmtree(extract_dir) progress_reporter(100) return results