예제 #1
0
파일: test_sync.py 프로젝트: EHRI/resydes
    def test01_no_destination_no_connection(self):
        Config().__set_prop__(Config.key_use_netloc, "False")
        DestinationMap().__remove_destination__("http://bla.com")
        des.reporter.reset_instance()

        logger.debug("\n============ no destination =============\n")
        # returns at no destination
        relisync = Relisync("http://bla.com")
        self.assertEqual(Status.init, relisync.status)
        relisync.process_source()
        self.assertEqual(1, len(relisync.exceptions))
        self.assertEqual("No destination for http://bla.com", relisync.exceptions[0])
        self.assertEqual(Status.processed_with_exceptions, relisync.status)

        # cannot get connection and ends up in caught exception
        DestinationMap().__set_destination__("http://bla.com", "destination_x")
        logger.debug("\n============destination, no connection =============\n")
        relisync = Relisync("http://bla.com")
        relisync.process_source()
        self.assertEqual(1, len(relisync.exceptions))
        self.assertEqual(Status.processed_with_exceptions, relisync.status)
        reporter = des.reporter.instance()
        self.assertEqual(2, len(reporter.sync_status))
        self.assertIsNotNone(reporter.sync_status[0].exception)

        # using net location 'bla.com' as destination, still no connection
        Config().__set_prop__(Config.key_use_netloc, "True")
        DestinationMap().__remove_destination__("http://bla.com")
        logger.debug("\n=========== using netloc, still no connection ==============\n")
        relisync = Relisync("http://bla.com")
        relisync.process_source()
        self.assertEqual(1, len(relisync.exceptions))
        self.assertEqual(Status.processed_with_exceptions, relisync.status)
        self.assertEqual(3, len(reporter.sync_status))
        self.assertIsNotNone(reporter.sync_status[1].exception)
예제 #2
0
    def test07_process_source(self):
        # connection and readable resourcesync, write sitemap to file
        try:
            shutil.rmtree("rs/destination/d6/sitemaps")
        except:
            pass
        Config.__set_config_filename__("test-files/config.txt")
        Config().__drop__()
        DestinationMap.__set_map_filename__("test-files/desmap.txt")
        DestinationMap().__drop__()
        des.reporter.reset_instance()
        Config().__set_prop__(Config.key_use_netloc, "False")
        Config().__set_prop__(Config.key_audit_only, "False")
        DestinationMap().__set_destination__(
            "http://localhost:8000/rs/source/s6", "rs/destination/d6")
        des.processor.processor_listeners.append(SitemapWriter())
        base_uri = "http://localhost:8000/rs/source/s6/"

        sdproc = Sodesproc(base_uri)
        sdproc.read_source()
        self.assertEqual(200, sdproc.source_status)
        self.assertEqual(Status.document, sdproc.status)
        self.assertTrue(
            os.path.isfile(
                "rs/destination/d6/sitemaps/.well-known/resourcesync"))
예제 #3
0
    def test_set_filename_once(self):
        DestinationMap.__set_map_filename__("test-files/desmap.txt")
        self.assertEqual("test-files/desmap.txt",
                         DestinationMap._get_map_filename())
        desmap = DestinationMap()
        self.assertEqual("test-files/desmap.txt", desmap._map_filename)

        DestinationMap.__set_map_filename__("test-files/other-desmap.txt")
        self.assertEqual("test-files/desmap.txt",
                         DestinationMap._get_map_filename())
        desmap = DestinationMap()
        self.assertEqual("test-files/desmap.txt", desmap._map_filename)
예제 #4
0
    def test_find_local_path(self):
        desmap = DestinationMap()

        desmap.__set_destination__("http://a.name.com/path/ignored",
                                   "local/folder/a")
        uri = "http://a.name.com/path/ignored/but/this/path/remains/file.txt"
        base_uri, local_path = desmap.find_local_path(uri)
        self.assertEqual("http://a.name.com/path/ignored", base_uri)
        self.assertEqual("./local/folder/a/but/this/path/remains/file.txt",
                         local_path)

        desmap.__set_destination__("http://b.name.com/path/ignored/",
                                   "local/folder/b")
        uri = "http://b.name.com/path/ignored/but/this/path/remains/file.txt"
        base_uri, local_path = desmap.find_local_path(uri)
        self.assertEqual("http://b.name.com/path/ignored", base_uri)
        self.assertEqual("./local/folder/b/but/this/path/remains/file.txt",
                         local_path)

        desmap.__set_destination__("http://c.name.com/path/ignored",
                                   "local/folder/c")
        uri = "http://c.name.com/path/ignored/but/this/path/remains/file.txt"
        base_uri, local_path = desmap.find_local_path(uri, infix="infix")
        self.assertEqual("http://c.name.com/path/ignored", base_uri)
        self.assertEqual(
            "./local/folder/c/infix/but/this/path/remains/file.txt",
            local_path)
예제 #5
0
def setUpModule():
    global server
    server_address = ('', 8000)
    handler_class = SimpleHTTPRequestHandler
    server = HTTPServer(server_address, handler_class)
    t = threading.Thread(target=server.serve_forever)
    t.daemon = True
    logger.debug("Starting server at http://localhost:8000/")
    t.start()
    proc.processor_listeners.append(SitemapWriter())
    Config.__set_config_filename__("test-files/config.txt")
    Config().__drop__()
    DestinationMap.__set_map_filename__("test-files/desmap.txt")
    DestinationMap().__drop__()
    DestinationMap().__set_destination__(
        "http://localhost:8000/rs/source/discover/", "rs/destination/discover")
예제 #6
0
파일: test_sync.py 프로젝트: EHRI/resydes
    def test_02_change(self):
        Config().__set_prop__(Config.key_use_netloc, "False")
        Config().__set_prop__(Config.key_audit_only, "False")
        DestinationMap().__set_destination__("http://localhost:8000/rs/source/s1", "rs/destination/d1")

        __clear_destination__("d1")
        __clear_sources_xml__("s1")
        __create_resourcelist__("s1")

        logger.debug("\n=========== create ==============\n")
        relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml")
        relisync.process_source()
        self.assertEqual(0, len(relisync.exceptions))
        self.assertEqual(Status.processed, relisync.status)

        __change_resource__("s1", "resource1.txt")
        __create_changelist__("s1")

        logger.debug("\n=========== change ==============\n")
        chanlisync = Chanlisync("http://localhost:8000/rs/source/s1/changelist.xml")
        chanlisync.process_source()

        self.assertEqual(0, len(chanlisync.exceptions))
        self.assertEqual(Status.processed, chanlisync.status)
        reporter = des.reporter.instance()
        self.assertEqual(4, len(reporter.sync_status))
        #self.assertEqual(1, reporter.sync_status[3].same)
        self.assertIsNone(reporter.sync_status[3].same)
        self.assertEqual(0, reporter.sync_status[3].created)
        self.assertEqual(1, reporter.sync_status[3].updated)
        self.assertEqual(0, reporter.sync_status[3].deleted)
        self.assertEqual(0, reporter.sync_status[3].to_delete)
        self.assertIsNone(reporter.sync_status[3].exception)

        reporter.sync_status_to_file("logs/incremental-change.csv")
예제 #7
0
파일: test_sync.py 프로젝트: EHRI/resydes
    def test04_process_baseline_netloc(self):
        Config().__set_prop__(Config.key_use_netloc, "True")
        Config().__set_prop__(Config.key_audit_only, "False")
        DestinationMap().__remove_destination__("http://localhost:8000/rs/source/s1")

        __clear_sources_xml__("s1")
        __create_resourcelist__("s1")
        if os.path.isdir("localhost:8000"):
            logger.debug("Expecting only audit")
            expected_sync_status_count = 1
        else:
            logger.debug("Expecting update")
            expected_sync_status_count = 2

        logger.debug("\n=========================\n")
        relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml")
        relisync.process_source()

        self.assertEqual(0, len(relisync.exceptions))
        self.assertEqual(Status.processed, relisync.status)
        reporter = des.reporter.instance()
        # depends on whether test is run individually or in group
        #self.assertEqual(expected_sync_status_count, len(reporter.sync_status))

        reporter.sync_status_to_file("logs/baseline-netloc.csv")
예제 #8
0
파일: desrunner.py 프로젝트: EHRI/resydes
    def run(self, sources, task="discover", once=False):
        """
        Run the DesRunner. A running application can be stopped by creating a file named 'stop' in the directory
        the runner was started from.
        Source urls are read from the file given in param 'sources'. This file is read each time a full round
        of synchronizing has taken place, so source urls can be extended or changed without restarting the application.

        Sources are mapped to the destinations given in the file denoted by the configuration parameter
        "location_mapper_destination_file".

        :param sources: the file containing source urls
        :param task: the task to run.
                        - If source urls can all be discovered by reading the .well-known/resourcesync
                            on each source, use 'wellknown'.
                        - If all source urls point to capability lists, use 'capability'.
                        - If source urls are heterogeneous, use 'discover'.
        :param once: True for exploring source urls once and than exit, False otherwise
        :return:
        """
        condition = True
        while condition:
            # list of urls
            self.logger.info("Reading source urls from '%s'" % sources)
            self.__read_sources_doc__(sources)
            # reset url --> destination map. New mappings may be configured
            DestinationMap.__set_map_filename__(Config().
                                                prop(Config.key_location_mapper_destination_file, "conf/desmap.txt"))
            # drop to force fresh read from file
            DestinationMap().__drop__()
            # Set the root of the destination folder if configured
            DestinationMap().set_root_folder(Config().prop(Config.key_destination_root))
            # do all the urls
            self.__do_task__(task)
            # report
            self.__do_report__(task)
            # to continue or not to continue
            condition = not (once or self.__stop__())
            if condition:
                pause = Config().int_prop(Config.key_sync_pause)
                self.logger.info("Going to sleep for %d seconds." % pause)
                self.logger.debug("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzz")
                time.sleep(pause)
                # repeat after sleep
                condition = not (once or self.__stop__())
예제 #9
0
    def test08_try_robots_with_netloc(self):
        DestinationMap().__remove_destination__(
            "http://localhost:8000/rs/source/discover/")
        Config().__set_prop__(Config.key_use_netloc, "True")
        uri = "http://localhost:8000/rs/source/discover/loc2"
        discoverer = Discoverer(uri)

        processor = discoverer.get_processor()
        self.assertIsInstance(processor, proc.Reliproc)
        processor.read_source()
예제 #10
0
파일: test_sync.py 프로젝트: EHRI/resydes
    def test_03_change_delete(self):
        Config().__set_prop__(Config.key_use_netloc, "False")
        Config().__set_prop__(Config.key_audit_only, "False")
        DestinationMap().__set_destination__("http://localhost:8000/rs/source/s2", "rs/destination/d2")

        __clear_destination__("d2")
        __clear_sources_xml__("s2")
        __add_resource__("s2", "added.txt")
        __create_resourcelist__("s2")

        logger.debug("\n=========== create ==============\n")
        relisync = Relisync("http://localhost:8000/rs/source/s2/resourcelist.xml")
        relisync.process_source()
        self.assertEqual(0, len(relisync.exceptions))
        self.assertEqual(Status.processed, relisync.status)

        __change_resource__("s2", "resource2.txt")
        __delete_resource__("s2", "added.txt")
        __create_changelist__("s2")

        des.reporter.reset_instance()
        #time.sleep(5)
        logger.debug("\n=========== update + delete ==============\n")
        chanlisync = Chanlisync("http://localhost:8000/rs/source/s2/changelist.xml")
        chanlisync.process_source()

        self.assertEqual(0, len(chanlisync.exceptions))
        self.assertEqual(Status.processed, chanlisync.status)

        reporter = des.reporter.instance()
        reporter.sync_status_to_file("logs/incremental-change-delete.csv")
        self.assertEqual(2, len(reporter.sync_status))
        self.assertIsNone(reporter.sync_status[1].same)
        self.assertEqual(0, reporter.sync_status[1].created)
        self.assertEqual(1, reporter.sync_status[1].updated)
        self.assertEqual(1, reporter.sync_status[1].deleted)
        self.assertEqual(1, reporter.sync_status[1].to_delete)
        self.assertIsNone(reporter.sync_status[1].exception)

        des.reporter.reset_instance()
        logger.debug("\n=========== no change ==============\n")
        chanlisync = Chanlisync("http://localhost:8000/rs/source/s2/changelist.xml")
        chanlisync.process_source()

        self.assertEqual(0, len(chanlisync.exceptions))
        self.assertEqual(Status.processed, chanlisync.status)

        reporter = des.reporter.instance()
        self.assertEqual(1, len(reporter.sync_status))
        self.assertIsNone(reporter.sync_status[0].same)
        self.assertEqual(0, reporter.sync_status[0].created)
        self.assertEqual(0, reporter.sync_status[0].updated)
        self.assertEqual(0, reporter.sync_status[0].deleted)
        self.assertEqual(0, reporter.sync_status[0].to_delete)
        self.assertIsNone(reporter.sync_status[0].exception)
예제 #11
0
파일: sync.py 프로젝트: EHRI/resydes
    def process_source(self):
        config = Config()
        netloc = config.boolean_prop(Config.key_use_netloc, False)
        base_uri, destination = DestinationMap().find_destination(self.uri, netloc=netloc, infix="resources")
        if destination is None:
            self.logger.debug("No destination for %s" % self.uri)
            self.exceptions.append("No destination for %s" % self.uri)
            des.reporter.instance().log_status(self.uri,
                exception="No destination specified and use of net location prohibited.")
        else:
            self.__synchronize__(destination)

        self.status = Status.processed_with_exceptions if self.has_exceptions() else Status.processed
예제 #12
0
    def test_find_destination_with_infix(self):
        desmap = DestinationMap()

        desmap.__set_destination__("http://a.name.com/path/ignored",
                                   "local/folder/a")
        uri = "http://a.name.com/path/ignored/but/this/path/remains/file.txt"
        base_uri, destination = desmap.find_destination(uri, infix="resources")
        self.assertEqual("http://a.name.com/path/ignored", base_uri)
        self.assertEqual("./local/folder/a/resources", destination)

        base_uri, destination = desmap.find_destination(uri, infix="sitemaps")
        self.assertEqual("http://a.name.com/path/ignored", base_uri)
        self.assertEqual("./local/folder/a/sitemaps", destination)
예제 #13
0
 def event_sitemap_received(self, uri, capability, text):
     config = Config()
     netloc = config.boolean_prop(Config.key_use_netloc, False)
     baser_uri, local_path = DestinationMap().find_local_path(
         uri, netloc=netloc, infix=SITEMAP_FOLDER)
     if local_path is not None:
         os.makedirs(os.path.dirname(local_path), exist_ok=True)
         with open(local_path, "w") as file:
             file.write(text)
         self.logger.debug("Saved %s '%s'" % (capability, local_path))
     else:
         self.logger.warn("Could not save %s. No local path for %s" %
                          (capability, uri))
예제 #14
0
파일: test_sync.py 프로젝트: EHRI/resydes
    def test03_process_baseline(self):
        Config().__set_prop__(Config.key_use_netloc, "False")
        Config().__set_prop__(Config.key_audit_only, "False")
        DestinationMap().__set_destination__("http://localhost:8000/rs/source/s1", "rs/destination/d1")

        __clear_destination__("d1")
        __clear_sources_xml__("s1")
        __create_resourcelist__("s1")
        des.reporter.reset_instance()

        logger.debug("\n=========== create ==============\n")
        relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml")
        relisync.process_source()

        self.assertEqual(0, len(relisync.exceptions))
        self.assertEqual(Status.processed, relisync.status)
        reporter = des.reporter.instance()
        # sync_status count: 1 for audit, 1 for create. expected 2
        # print(reporter.sync_status_to_string())
        self.assertEqual(2, len(reporter.sync_status))
        self.assertEqual(0, reporter.sync_status[0].same)
        self.assertEqual(3, reporter.sync_status[0].created)
        self.assertEqual(0, reporter.sync_status[0].updated)
        self.assertEqual(0, reporter.sync_status[0].deleted)
        self.assertEqual(0, reporter.sync_status[0].to_delete)
        self.assertIsNone(reporter.sync_status[0].exception)
        #reporter.sync_status_to_file("logs/baseline.csv")

        logger.debug("\n============ update =============\n")
        relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml")
        relisync.process_source()

        self.assertEqual(0, len(relisync.exceptions))
        self.assertEqual(Status.processed, relisync.status)
        reporter = des.reporter.instance()
        # sync_status count: 1 for audit, 1 for create (both from previous run), 1 for audit, no update. expected 3
        self.assertEqual(3, len(reporter.sync_status))
        self.assertEqual(3, reporter.sync_status[2].same)
        self.assertEqual(0, reporter.sync_status[2].created)
        self.assertEqual(0, reporter.sync_status[2].updated)
        self.assertEqual(0, reporter.sync_status[2].deleted)
        self.assertEqual(0, reporter.sync_status[2].to_delete)
        self.assertIsNone(reporter.sync_status[2].exception)
예제 #15
0
파일: test_sync.py 프로젝트: EHRI/resydes
    def test02_process_audit(self):
        Config().__set_prop__(Config.key_use_netloc, "False")
        Config().__set_prop__(Config.key_audit_only, "True")
        DestinationMap().__set_destination__("http://localhost:8000/rs/source/s1", "rs/destination/d1")

        __clear_destination__("d1")
        __clear_sources_xml__("s1")
        __create_resourcelist__("s1")
        des.reporter.reset_instance()

        logger.debug("\n=========================\n")
        relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml")
        relisync.process_source()

        self.assertEqual(0, len(relisync.exceptions))
        self.assertEqual(Status.processed, relisync.status)
        reporter = des.reporter.instance()
        self.assertEqual(1, len(reporter.sync_status))

        reporter.sync_status_to_file("logs/audit.csv")
예제 #16
0
    def base_line(self, unzipdir):
        """
        Synchronize the unzipped contents of a resource dump with the local resources
        :param unzipdir: the directory of the unzipped packed contents.
        :return:
        """
        manifest_file_name = os.path.join(unzipdir, "manifest.xml")
        try:
            sitemap = Sitemap()
            manifest_doc = sitemap.parse_xml(fh=manifest_file_name)
            # the manifest_doc is a resync.resource_container.ResourceContainer
            capability = manifest_doc.capability
            assert capability == CAPA_RESOURCEDUMP_MANIFEST, "Capability is not %s but %s" % (
                CAPA_RESOURCEDUMP_MANIFEST, capability)
            self.status = Status.parsed
            self.__inform_sitemap_received__(capability, manifest_file_name)

            config = Config()
            netloc = config.boolean_prop(Config.key_use_netloc, False)
            base_uri, destination = DestinationMap().find_destination(
                self.pack_uri, netloc=netloc)
            assert destination is not None, "Found no destination folder in DestinationMap"
            mapper = Mapper((base_uri, destination))
            rlb = ResourceListBuilder(mapper=mapper)
            dst_resource_list = rlb.from_disk()
            # Compares on uri
            same, updated, deleted, created = dst_resource_list.compare(
                manifest_doc)

            raise NotImplementedError("This class is not fully implemented.")

            print(len(same), len(updated), len(deleted), len(created))

            print("same")
            for resource in same:
                print(resource)
            print("updated")
            for resource in updated:
                print(resource)
            print("deleted")
            for resource in deleted:
                print(resource)
            print("created")
            for resource in created:
                print(resource)
                base_uri, local_path = DestinationMap().find_local_path(
                    resource.uri)
                print(base_uri, local_path)

        except AssertionError as err:
            self.logger.debug("%s Error: %s" % (self.pack_uri, str(err)))
            self.status = Status.parse_error
            self.exceptions.append(err)
        except SitemapParseError as err:
            self.logger.debug("%s Unreadable source: %s" %
                              (self.source_uri, str(err)))
            self.status = Status.parse_error
            self.exceptions.append(err)

        self.status = Status.processed_with_exceptions if self.has_exceptions(
        ) else Status.processed
예제 #17
0
 def setUp(self):
     Config.__set_config_filename__("test-files/config.txt")
     Config().__drop__()
     des.dump.dump_listeners.append(SitemapWriter())
     DestinationMap().__set_destination__("http://localhost:8000/rs/source",
                                          "rs/destination/d7")
예제 #18
0
    def test_find_destination(self):
        DestinationMap.__set_map_filename__("test-files/desmap.txt")
        desmap = DestinationMap()

        uri = "http://long.name.com/path/to/resource.xml"
        base_uri, destination = desmap.find_destination(uri)
        self.assertEqual("http://long.name.com", base_uri)
        self.assertEqual("./destination1", destination)

        uri = "http://long.name.com/path/to/"
        base_uri, destination = desmap.find_destination(uri)
        self.assertEqual("http://long.name.com", base_uri)
        self.assertEqual("./destination1", destination)

        uri = "http://long.name.com/"
        base_uri, destination = desmap.find_destination(uri)
        self.assertEqual("http://long.name.com", base_uri)
        self.assertEqual("./destination1", destination)

        uri = "http://long.name.com"
        base_uri, destination = desmap.find_destination(uri)
        self.assertEqual("http://long.name.com", base_uri)
        self.assertEqual("./destination1", destination)

        # explicit path to resource in desmap
        uri = "https://first.com:8080/path1"
        base_uri, destination = desmap.find_destination(uri)
        self.assertEqual("https://first.com:8080", base_uri)
        self.assertIsNone(destination)

        uri = "https://first.com:8080/path1/to/resource.xml"
        base_uri, destination = desmap.find_destination(uri)
        self.assertEqual("https://first.com:8080/path1/to/resource.xml",
                         base_uri)
        self.assertEqual("./destination2", destination)

        uri = "https://first.com:8080/path2/"
        base_uri, destination = desmap.find_destination(uri)
        self.assertEqual("https://first.com:8080/path2", base_uri)
        self.assertEqual("./destination3", destination)

        uri = "https://first.com:8080/path2"
        base_uri, destination = desmap.find_destination(uri)
        self.assertEqual("https://first.com:8080/path2", base_uri)
        self.assertEqual("./destination3", destination)

        #
        uri = "https://not.mapped.com/resource.xml"
        base_uri, destination = desmap.find_destination(uri, "default/path")
        self.assertEqual("https://not.mapped.com", base_uri)
        self.assertEqual("./default/path", destination)

        #
        uri = "https://not.mapped.com/resource.xml"
        base_uri, destination = desmap.find_destination(uri, netloc=True)
        self.assertEqual("https://not.mapped.com", base_uri)
        self.assertEqual("./not.mapped.com", destination)

        desmap.set_root_folder("foo/bar")

        uri = "http://long.name.com/path/to/resource.xml"
        base_uri, destination = desmap.find_destination(uri)
        self.assertEqual("http://long.name.com", base_uri)
        self.assertEqual("foo/bar/destination1", destination)
예제 #19
0
파일: test_sync.py 프로젝트: EHRI/resydes
 def setUp(self):
     Config.__set_config_filename__("test-files/config.txt")
     Config().__drop__()
     DestinationMap.__set_map_filename__("test-files/desmap.txt")
     DestinationMap().__drop__()
     des.desclient.reset_instance()
예제 #20
0
 def setUp(self):
     DestinationMap.__set_map_filename__("test-files/desmap.txt")
     DestinationMap().__drop__()