Ejemplo n.º 1
0
 def convert_gml_to_ttl(self, file_name):
     gml_file_path = os.path.join(TestPelagiosConversion.gml_data_dir, file_name)
     path, gml_file = os.path.split(gml_file_path)
     file_name, ext = os.path.splitext(gml_file)
     ttl_file_path = os.path.join(TestPelagiosConversion.temp_result_dir, "%s.ttl" % file_name)
     file_name_parts = file_name.split("_")
     specific_part = randomword(5) if len(file_name_parts) != 2 else file_name_parts[1]
     uri_part = "http://%s:%s/earkweb/sip2aip/working_area/aip2dip/%s/" % ("127.0.0.1", "8000", "abcdefghijklmnopq")
     gml_proc = PeripleoGmlProcessing(gml_file_path)
     gml_proc.convert_gml(ttl_file_path, uri_part, specific_part)
Ejemplo n.º 2
0
class TestPelagiosConversion(unittest.TestCase):

    gml_data_dir = root_dir + '/earkresources/geodata'
    temp_result_dir = root_dir + '/tmp/temp-' + randomutils.randomword(10)

    @classmethod
    def setUpClass(cls):
        if not os.path.exists(TestPelagiosConversion.temp_result_dir):
            os.makedirs(TestPelagiosConversion.temp_result_dir)

    @classmethod
    def tearDownClass(cls):
        shutil.rmtree(TestPelagiosConversion.temp_result_dir)

    def convert_gml_to_ttl(self, file_name):
        gml_file_path = os.path.join(TestPelagiosConversion.gml_data_dir, file_name)
        path, gml_file = os.path.split(gml_file_path)
        file_name, ext = os.path.splitext(gml_file)
        ttl_file_path = os.path.join(TestPelagiosConversion.temp_result_dir, "%s.ttl" % file_name)
        file_name_parts = file_name.split("_")
        specific_part = randomword(5) if len(file_name_parts) != 2 else file_name_parts[1]
        uri_part = "http://%s:%s/earkweb/sip2aip/working_area/aip2dip/%s/" % ("127.0.0.1", "8000", "abcdefghijklmnopq")
        gml_proc = PeripleoGmlProcessing(gml_file_path)
        gml_proc.convert_gml(ttl_file_path, uri_part, specific_part)

    def test_convert_gml_to_ttl_1994(self):
        self.convert_gml_to_ttl("ob_1994.gml")

    def test_convert_gml_to_ttl_1995(self):
        self.convert_gml_to_ttl("ob_1995.gml")

    def test_convert_gml_to_ttl_1998(self):
        self.convert_gml_to_ttl("ob_1998.gml")

    def test_convert_gml_to_ttl_2002(self):
        self.convert_gml_to_ttl("ob_2002.gml")

    def test_convert_gml_to_ttl_2006(self):
        self.convert_gml_to_ttl("ob_2006.gml")

    def test_convert_gml_to_ttl_2010(self):
        self.convert_gml_to_ttl("ob_2010.gml")

    def test_convert_gml_to_ttl_2015(self):
        self.convert_gml_to_ttl("ob_2015.gml")
Ejemplo n.º 3
0
class TestManifestCreation(unittest.TestCase):
    aip_compound_dir = root_dir + '/earkresources/AIP-test/AIP-compound'
    temp_working_dir = root_dir + '/tmp/temp-aip-dir-' + randomutils.randomword(
        10) + "/"
    manifest_file = os.path.join(temp_working_dir, './manifest.mf')

    manifest_creation = ManifestCreation(temp_working_dir)

    @classmethod
    def tearDownClass(cls):
        shutil.rmtree(TestManifestCreation.temp_working_dir)

    def get_file_entry(self, s):
        file_pattern = re.compile(
            r"""Name: (?P<name>.*)\nMtime: (?P<time>.*)\nSize: (?P<size>.*)""",
            re.VERBOSE)
        file_match = file_pattern.match(s)
        if file_match is not None:
            return {
                'name': file_match.group("name").strip(),
                'time': file_match.group("time").strip(),
                'size': file_match.group("size").strip()
            }
        else:
            return None

    def test_create_manifest(self):
        self.manifest_creation.create_manifest(self.aip_compound_dir,
                                               self.manifest_file)
        self.assertTrue(os.path.isfile(self.manifest_file),
                        ("File %s not found in working directory" % file))
        print self.manifest_file
        entries = open(self.manifest_file).read().split("\n\n")
        for entry in entries:
            file_entry = self.get_file_entry(entry)
            if file_entry is not None:
                actual_size = int(os.path.getsize(file_entry['name']))
                entry_size = int(file_entry['size'])
                self.assertTrue(os.path.isfile(file_entry['name']),
                                "File must exist")
                self.assertEquals(actual_size, entry_size,
                                  "File size must match")
Ejemplo n.º 4
0
class TestTaskLogger(unittest.TestCase):

    temp_log_file = root_dir + '/tmp/temp-' + randomutils.randomword(10)

    @classmethod
    def tearDownClass(cls):
        os.remove(TestTaskLogger.temp_log_file)

    @classmethod
    def setUpClass(cls):
        if not os.path.exists(root_dir + '/tmp/'):
            os.makedirs(root_dir + '/tmp/')

    def test_log(self):
        print self.temp_log_file
        tl = TaskLogger(self.temp_log_file)
        self.assertFalse(tl.task_logfile.closed,
                         "File is closed after initialising")
        tl.addmsg("some message", False, True)
        self.assertEquals(len(tl.log), 1)
        self.assertEquals(len(tl.err), 0)
        self.assertEquals("some message", tl.log[0])

        tl.addmsg("another message", False, True)
        self.assertEquals(len(tl.log), 2)
        self.assertEquals(len(tl.err), 0)
        result = tl.close_logger()
        self.assertTrue(result.success,
                        "Error log is empty, so success must be true")
        self.assertTrue(tl.task_logfile.closed,
                        "File is not closed after finalizing")

        tl.open()
        tl.addmsg("an error occurred", True, True)
        self.assertEquals(len(tl.log), 2)
        self.assertEquals(len(tl.err), 1)
        result = tl.close_logger()
        self.assertFalse(result.success,
                         "Error log is not empty, so success must be false")
Ejemplo n.º 5
0
class TestExtraction(unittest.TestCase):

    delivery_dir = root_dir + '/earkresources/Delivery-test/'
    temp_extract_dir = root_dir + '/tmp/temp-' + randomutils.randomword(10)

    @classmethod
    def setUpClass(cls):
        if not os.path.exists(TestExtraction.temp_extract_dir):
            os.makedirs(TestExtraction.temp_extract_dir)

    @classmethod
    def tearDownClass(cls):
        shutil.rmtree(TestExtraction.temp_extract_dir)

    def test_extract_sip(self):
        package_file = self.delivery_dir + 'SIP-sqldump.tar.gz'
        contained_sip_dir = './SIP-1f594d58-d09f-46dd-abac-8432068a7f6d/'
        sip_extraction = Extraction()
        result = sip_extraction.extract(package_file,
                                        TestExtraction.temp_extract_dir)
        self.assertTrue(result.success)
        print result.log[0]
        files_to_check = (
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './METS.xml'),
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './schemas/premis-v2-2.xsd'),
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './metadata/PREMIS.xml'),
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './schemas/IP.xsd'),
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './schemas/xlink.xsd'),
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './schemas/ExtensionMETS.xsd'),
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './schemas/jhove.xsd'),
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './content/data/census.sql'),
        )
        for file in files_to_check:
            self.assertTrue(os.path.isfile(file),
                            "File %s not found in extracted directory" + file)

    def test_extract_sip_with_report(self):
        package_file = self.delivery_dir + 'SIP-sqldump.tar.gz'
        contained_sip_dir = './SIP-1f594d58-d09f-46dd-abac-8432068a7f6d/'
        sip_extraction = Extraction()
        result = sip_extraction.extract_with_report(
            package_file, TestExtraction.temp_extract_dir)
        self.assertTrue(result.success)
        print result.log[0]
        files_to_check = (
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './METS.xml'),
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './schemas/premis-v2-2.xsd'),
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './metadata/PREMIS.xml'),
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './schemas/IP.xsd'),
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './schemas/xlink.xsd'),
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './schemas/ExtensionMETS.xsd'),
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './schemas/jhove.xsd'),
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './content/data/census.sql'),
        )
        for file in files_to_check:
            self.assertTrue(os.path.isfile(file),
                            "File %s not found in extracted directory" + file)
Ejemplo n.º 6
0

def set_default_config_if_not_exists(cfg_file, default_cfg):
    config = ConfigParser.RawConfigParser()
    config.read(cfg_file)
    changed = False
    for conf in default_cfg:
        section, option, value = conf
        if not config.has_section(section):
            config.add_section(section)
        try:
            config.get(section, option)
        except ConfigParser.NoOptionError:
            print "Adding missing configuration option with default values: section: %s, option: %s, value: %s" % (section, option, value)
            config.set(section, option, value)
            changed = True

    if changed:
        with open(cfg_file, 'w') as fp:
            config.write(fp)
        print "Config file written: %s" % cfg_file

if __name__ == "__main__":
    config_file = "/tmp/configexample%s.cfg" % randomword(5)
    default_config = [
        ("newsection", "example_a", "foo"),
        ("newsection", "example_b", 1),
        ("newsection", "example_c", "bar"),
    ]
    set_default_config_if_not_exists(config_file, default_config)
Ejemplo n.º 7
0
class TestExtraction(unittest.TestCase):

    delivery_dir = root_dir + '/earkresources/Delivery-test/'
    temp_extract_dir = root_dir + '/tmp/temp-' + randomutils.randomword(10)
    package_dir = root_dir + '/earkresources/packaging-test/'
    temp_extract_dir2 = root_dir + '/tmp/temp-' + randomutils.randomword(10)

    @classmethod
    def setUpClass(cls):
        if not os.path.exists(TestExtraction.temp_extract_dir):
            os.makedirs(TestExtraction.temp_extract_dir)
        if not os.path.exists(TestExtraction.temp_extract_dir2):
            os.makedirs(TestExtraction.temp_extract_dir2)

    @classmethod
    def tearDownClass(cls):
        shutil.rmtree(TestExtraction.temp_extract_dir)
        shutil.rmtree(TestExtraction.temp_extract_dir2)

    def test_extract_sip(self):
        print "Extracting to %s" % self.temp_extract_dir
        package_file = self.delivery_dir + 'SIP-sqldump.zip'
        contained_sip_dir = './SIP-1f594d58-d09f-46dd-abac-8432068a7f6d/'
        sip_extraction = Unzip()
        result = sip_extraction.extract_with_report(
            package_file, TestExtraction.temp_extract_dir)
        self.assertTrue(result.success)
        for log in result.log:
            print log
        # must be 8 extracted files
        cpt = sum([
            len(files)
            for r, d, files in os.walk(TestExtraction.temp_extract_dir)
        ])
        self.assertEqual(cpt, 8, "Number of extracted files not as expected")
        files_to_check = (
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './METS.xml'),
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './schemas/premis-v2-2.xsd'),
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './metadata/PREMIS.xml'),
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './schemas/IP.xsd'),
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './schemas/xlink.xsd'),
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './schemas/ExtensionMETS.xsd'),
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './schemas/jhove.xsd'),
            os.path.join(TestExtraction.temp_extract_dir, contained_sip_dir,
                         './content/data/census.sql'),
        )
        for file in files_to_check:
            self.assertTrue(os.path.isfile(file),
                            "File %s not found in extracted directory" + file)

    def test_extract_package_with_empty_folder(self):
        package_file = self.package_dir + 'pkg.zip'
        sip_extraction = Unzip()
        result = sip_extraction.extract_with_report(
            package_file, TestExtraction.temp_extract_dir2)
        print "Extracting package %s to %s" % (package_file,
                                               self.temp_extract_dir2)
        self.assertTrue(result.success)
        for log in result.log:
            print log
        # must be 8 extracted files
        cpt = sum([
            len(files)
            for r, d, files in os.walk(TestExtraction.temp_extract_dir2)
        ])
        ndirs = sum([
            len(d) for r, d, files in os.walk(TestExtraction.temp_extract_dir2)
        ])
        self.assertEqual(cpt, 2,
                         "Number of extracted files not as expected: %s" % cpt)
        self.assertEqual(
            ndirs, 3,
            "Number of extracted directories not as expected: %s" % ndirs)
        items_to_check = (
            os.path.join(TestExtraction.temp_extract_dir2, "pgk", 'emptydir'),
            os.path.join(TestExtraction.temp_extract_dir2, "pgk", 'somedir'),
            os.path.join(TestExtraction.temp_extract_dir2, "pgk",
                         'somedir/fileinsomedir.txt'),
            os.path.join(TestExtraction.temp_extract_dir2, "pgk",
                         'testfile.txt'),
        )
        for item in items_to_check:
            self.assertTrue(
                os.path.exists(item),
                "Item %s not found in extracted directory: " + item)
Ejemplo n.º 8
0
class TestExtract(unittest.TestCase):

    delivery_dir = root_dir + '/earkresources/Delivery-test/'
    temp_extract_dir = root_dir + '/tmp/temp-' + randomutils.randomword(10)

    @classmethod
    def setUpClass(cls):
        if not os.path.exists(TestExtract.temp_extract_dir):
            os.makedirs(TestExtract.temp_extract_dir)

    @classmethod
    def tearDownClass(cls):
        pass

    def reset_test_dir(self):
        shutil.rmtree(TestExtract.temp_extract_dir)
        os.makedirs(TestExtract.temp_extract_dir)

    def test_extract_sip_zip(self):
        package_file = self.delivery_dir + 'SIP-sqldump.zip'
        self.extr(package_file)
        self.reset_test_dir()

    def test_extract_sip_tar(self):
        package_file = self.delivery_dir + 'SIP-sqldump.tar'
        self.extr(package_file)
        self.reset_test_dir()

    def extr(self, package_file):
        contained_sip_dir = './SIP-1f594d58-d09f-46dd-abac-8432068a7f6d/'

        _, file_extension = os.path.splitext(package_file)
        sip_extraction = Extract.factory(file_extension)
        #sip_extraction = Unzip()

        result = sip_extraction.extract_with_report(
            package_file, TestExtract.temp_extract_dir)
        self.assertTrue(result.success)
        for log in result.log:
            print log
        # must be 8 extracted files
        cpt = sum([
            len(files) for r, d, files in os.walk(TestExtract.temp_extract_dir)
        ])
        self.assertEqual(cpt, 8, "Number of extracted files not as expected")
        files_to_check = (
            os.path.join(TestExtract.temp_extract_dir, contained_sip_dir,
                         './METS.xml'),
            os.path.join(TestExtract.temp_extract_dir, contained_sip_dir,
                         './schemas/premis-v2-2.xsd'),
            os.path.join(TestExtract.temp_extract_dir, contained_sip_dir,
                         './metadata/PREMIS.xml'),
            os.path.join(TestExtract.temp_extract_dir, contained_sip_dir,
                         './schemas/IP.xsd'),
            os.path.join(TestExtract.temp_extract_dir, contained_sip_dir,
                         './schemas/xlink.xsd'),
            os.path.join(TestExtract.temp_extract_dir, contained_sip_dir,
                         './schemas/ExtensionMETS.xsd'),
            os.path.join(TestExtract.temp_extract_dir, contained_sip_dir,
                         './schemas/jhove.xsd'),
            os.path.join(TestExtract.temp_extract_dir, contained_sip_dir,
                         './content/data/census.sql'),
        )
        for file in files_to_check:
            self.assertTrue(os.path.isfile(file),
                            "File %s not found in extracted directory" + file)
Ejemplo n.º 9
0
class TestPairtreeStorage(unittest.TestCase):
    source_dir = root_dir + '/earkresources/storage-test/'
    package_file = "bar.tar"
    repository_storage_dir = root_dir + '/tmp/temp-' + randomutils.randomword(
        10)
    test_repo = root_dir + '/earkresources/test-repo/'

    @classmethod
    def setUpClass(cls):
        if not os.path.exists(TestPairtreeStorage.repository_storage_dir):
            os.makedirs(TestPairtreeStorage.repository_storage_dir)
        shutil.copy(
            os.path.join(TestPairtreeStorage.test_repo, "pairtree_version0_1"),
            TestPairtreeStorage.repository_storage_dir)

    @classmethod
    def tearDownClass(cls):
        shutil.rmtree(TestPairtreeStorage.repository_storage_dir)

    def test_identifier_object_exists(self):
        pts = PairtreeStorage(TestPairtreeStorage.test_repo)
        existing_identifier = "bar"
        nonexisting_identifier = "foo"
        self.assertEquals(pts.identifier_object_exists(existing_identifier),
                          True)
        self.assertEquals(pts.identifier_object_exists(nonexisting_identifier),
                          False)

    def test_version_exists(self):
        pts = PairtreeStorage(TestPairtreeStorage.test_repo)
        identifier = "bar"
        self.assertEquals(pts.identifier_version_object_exists(identifier, 3),
                          False)
        self.assertEquals(pts.identifier_version_object_exists(identifier, 2),
                          True)

    def test_next_version(self):
        pts = PairtreeStorage(TestPairtreeStorage.test_repo)
        identifier = "bar"
        self.assertEquals("00003", pts._next_version(identifier))

    def test_curr_version(self):
        pts = PairtreeStorage(TestPairtreeStorage.test_repo)
        identifier = "bar"
        self.assertEquals("00002", pts.curr_version(identifier))

    def test_store(self):
        pts = PairtreeStorage(TestPairtreeStorage.repository_storage_dir)
        pts.store("bar", os.path.join(self.source_dir, self.package_file))
        self.assertEqual(1, pts.curr_version_num("bar"))
        pts.store("bar", os.path.join(self.source_dir, self.package_file))
        self.assertEqual(2, pts.curr_version_num("bar"))

    def test_get_object_path(self):
        pts = PairtreeStorage(TestPairtreeStorage.test_repo)
        expected = os.path.join(TestPairtreeStorage.test_repo,
                                "pairtree_root/ba/r/data/00002/bar.tar")
        actual = pts.get_object_path("bar")
        self.assertEqual(expected, actual)

    def test_get_object_item_stream(self):
        pts = PairtreeStorage(TestPairtreeStorage.test_repo)

        content = pts.get_object_item_stream(
            "bar", "739f9c5f-c402-42af-a18b-3d0bdc4e8751/METS.xml")
        self.assertTrue(content.startswith("<?xml"))
        logger.debug(content)
Ejemplo n.º 10
0
    def post_tar_file(self, tar_file_path, identifier, progress_reporter=default_reporter):
        """
        Iterate over tar file and post documents it contains to Solr API (extract)

        @type       tar_file_path: string
        @param      tar_file_path: Absolute path to tar file

        @type       identifier: string
        @param      identifier: Identifier of the tar package

        @rtype: list(dict(string, int))
        @return: Return list of urls and return codes
        """
        progress_reporter(0)
        import tarfile
        tfile = tarfile.open(tar_file_path, 'r')
        extract_dir = '/tmp/temp-' + randomutils.randomword(10)
        results = []

        numfiles = sum(1 for tarinfo in tfile if tarinfo.isreg())
        logger.debug("Number of files in tarfile: %s " % numfiles)

        num = 0

        mets_entry = "%s/METS.xml" % identifier
        package_type = "IP"
        try:
            tfile.extract(mets_entry, extract_dir)
            mets_path = os.path.join(extract_dir, identifier, "METS.xml")
            if os.path.exists(mets_path):
                try:
                    mets = ParsedMets(extract_dir)
                    mets.load_mets(mets_path)
                    package_type = mets.get_package_type()
                except:
                    logger.warn("Error loading METS from package during indexing, assigning default package type instead.")

            else:
                logger.warn("METS file does not exist: %s" % mets_path)
        except KeyError:
            logger.warn("METS entry does not exist in TAR file: %s" % mets_entry)

        for t in tfile:
            tfile.extract(t, extract_dir)
            afile = os.path.join(extract_dir, t.name)

            if os.path.exists(afile):
                params = SolrDocParams(afile).get_params()
                params['literal.packagetype'] = package_type
                params['literal.package'] = identifier
                params['literal.path'] = t.name
                files = {'file': ('userfile', open(afile, 'rb'))}
                post_url = '%s/update/extract?%s' % (self.url, urllib.urlencode(params))
                response = requests.post(post_url, files=files)
                result = {"url": post_url, "status": response.status_code}
                if response.status_code != 200:
                    status = self.post_file_document(afile, identifier, t.name)
                    if status == 200:
                        logger.info("posting file failed for url '%s' with status code: %d (posted plain document instead)" % (post_url, response.status_code))
                    else:
                        logger.info("Unable to create document for url '%s'" % (post_url))
                results.append(result)
                num += 1
                percent = num * 100 / numfiles
                progress_reporter(percent)
        self.commit()
        logger.debug("Files extracted to %s" % extract_dir)
        shutil.rmtree(extract_dir)
        progress_reporter(100)
        return results