def test07_process_source(self): # connection and readable resourcesync, write sitemap to file try: shutil.rmtree("rs/destination/d6/sitemaps") except: pass Config.__set_config_filename__("test-files/config.txt") Config().__drop__() DestinationMap.__set_map_filename__("test-files/desmap.txt") DestinationMap().__drop__() des.reporter.reset_instance() Config().__set_prop__(Config.key_use_netloc, "False") Config().__set_prop__(Config.key_audit_only, "False") DestinationMap().__set_destination__( "http://localhost:8000/rs/source/s6", "rs/destination/d6") des.processor.processor_listeners.append(SitemapWriter()) base_uri = "http://localhost:8000/rs/source/s6/" sdproc = Sodesproc(base_uri) sdproc.read_source() self.assertEqual(200, sdproc.source_status) self.assertEqual(Status.document, sdproc.status) self.assertTrue( os.path.isfile( "rs/destination/d6/sitemaps/.well-known/resourcesync"))
def test_02_change(self): Config().__set_prop__(Config.key_use_netloc, "False") Config().__set_prop__(Config.key_audit_only, "False") DestinationMap().__set_destination__("http://localhost:8000/rs/source/s1", "rs/destination/d1") __clear_destination__("d1") __clear_sources_xml__("s1") __create_resourcelist__("s1") logger.debug("\n=========== create ==============\n") relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml") relisync.process_source() self.assertEqual(0, len(relisync.exceptions)) self.assertEqual(Status.processed, relisync.status) __change_resource__("s1", "resource1.txt") __create_changelist__("s1") logger.debug("\n=========== change ==============\n") chanlisync = Chanlisync("http://localhost:8000/rs/source/s1/changelist.xml") chanlisync.process_source() self.assertEqual(0, len(chanlisync.exceptions)) self.assertEqual(Status.processed, chanlisync.status) reporter = des.reporter.instance() self.assertEqual(4, len(reporter.sync_status)) #self.assertEqual(1, reporter.sync_status[3].same) self.assertIsNone(reporter.sync_status[3].same) self.assertEqual(0, reporter.sync_status[3].created) self.assertEqual(1, reporter.sync_status[3].updated) self.assertEqual(0, reporter.sync_status[3].deleted) self.assertEqual(0, reporter.sync_status[3].to_delete) self.assertIsNone(reporter.sync_status[3].exception) reporter.sync_status_to_file("logs/incremental-change.csv")
def test01_no_destination_no_connection(self): Config().__set_prop__(Config.key_use_netloc, "False") DestinationMap().__remove_destination__("http://bla.com") des.reporter.reset_instance() logger.debug("\n============ no destination =============\n") # returns at no destination relisync = Relisync("http://bla.com") self.assertEqual(Status.init, relisync.status) relisync.process_source() self.assertEqual(1, len(relisync.exceptions)) self.assertEqual("No destination for http://bla.com", relisync.exceptions[0]) self.assertEqual(Status.processed_with_exceptions, relisync.status) # cannot get connection and ends up in caught exception DestinationMap().__set_destination__("http://bla.com", "destination_x") logger.debug("\n============destination, no connection =============\n") relisync = Relisync("http://bla.com") relisync.process_source() self.assertEqual(1, len(relisync.exceptions)) self.assertEqual(Status.processed_with_exceptions, relisync.status) reporter = des.reporter.instance() self.assertEqual(2, len(reporter.sync_status)) self.assertIsNotNone(reporter.sync_status[0].exception) # using net location 'bla.com' as destination, still no connection Config().__set_prop__(Config.key_use_netloc, "True") DestinationMap().__remove_destination__("http://bla.com") logger.debug("\n=========== using netloc, still no connection ==============\n") relisync = Relisync("http://bla.com") relisync.process_source() self.assertEqual(1, len(relisync.exceptions)) self.assertEqual(Status.processed_with_exceptions, relisync.status) self.assertEqual(3, len(reporter.sync_status)) self.assertIsNotNone(reporter.sync_status[1].exception)
def test04_process_baseline_netloc(self): Config().__set_prop__(Config.key_use_netloc, "True") Config().__set_prop__(Config.key_audit_only, "False") DestinationMap().__remove_destination__("http://localhost:8000/rs/source/s1") __clear_sources_xml__("s1") __create_resourcelist__("s1") if os.path.isdir("localhost:8000"): logger.debug("Expecting only audit") expected_sync_status_count = 1 else: logger.debug("Expecting update") expected_sync_status_count = 2 logger.debug("\n=========================\n") relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml") relisync.process_source() self.assertEqual(0, len(relisync.exceptions)) self.assertEqual(Status.processed, relisync.status) reporter = des.reporter.instance() # depends on whether test is run individually or in group #self.assertEqual(expected_sync_status_count, len(reporter.sync_status)) reporter.sync_status_to_file("logs/baseline-netloc.csv")
def test_03_change_delete(self): Config().__set_prop__(Config.key_use_netloc, "False") Config().__set_prop__(Config.key_audit_only, "False") DestinationMap().__set_destination__("http://localhost:8000/rs/source/s2", "rs/destination/d2") __clear_destination__("d2") __clear_sources_xml__("s2") __add_resource__("s2", "added.txt") __create_resourcelist__("s2") logger.debug("\n=========== create ==============\n") relisync = Relisync("http://localhost:8000/rs/source/s2/resourcelist.xml") relisync.process_source() self.assertEqual(0, len(relisync.exceptions)) self.assertEqual(Status.processed, relisync.status) __change_resource__("s2", "resource2.txt") __delete_resource__("s2", "added.txt") __create_changelist__("s2") des.reporter.reset_instance() #time.sleep(5) logger.debug("\n=========== update + delete ==============\n") chanlisync = Chanlisync("http://localhost:8000/rs/source/s2/changelist.xml") chanlisync.process_source() self.assertEqual(0, len(chanlisync.exceptions)) self.assertEqual(Status.processed, chanlisync.status) reporter = des.reporter.instance() reporter.sync_status_to_file("logs/incremental-change-delete.csv") self.assertEqual(2, len(reporter.sync_status)) self.assertIsNone(reporter.sync_status[1].same) self.assertEqual(0, reporter.sync_status[1].created) self.assertEqual(1, reporter.sync_status[1].updated) self.assertEqual(1, reporter.sync_status[1].deleted) self.assertEqual(1, reporter.sync_status[1].to_delete) self.assertIsNone(reporter.sync_status[1].exception) des.reporter.reset_instance() logger.debug("\n=========== no change ==============\n") chanlisync = Chanlisync("http://localhost:8000/rs/source/s2/changelist.xml") chanlisync.process_source() self.assertEqual(0, len(chanlisync.exceptions)) self.assertEqual(Status.processed, chanlisync.status) reporter = des.reporter.instance() self.assertEqual(1, len(reporter.sync_status)) self.assertIsNone(reporter.sync_status[0].same) self.assertEqual(0, reporter.sync_status[0].created) self.assertEqual(0, reporter.sync_status[0].updated) self.assertEqual(0, reporter.sync_status[0].deleted) self.assertEqual(0, reporter.sync_status[0].to_delete) self.assertIsNone(reporter.sync_status[0].exception)
def test04__drop__(self): Config.__set_config_filename__("test-files/config.txt") config1 = Config() self.assertIsNotNone(config1) self.assertIsNone(config1.prop("this_is")) config1.__drop__() self.assertIsNone(Config.__instance__) Config.__set_config_filename__("test-files/alt-config.txt") config2 = Config() self.assertIsNotNone(config2) self.assertNotEqual(config1, config2) self.assertEqual("a_test", config2.prop("this_is")) config2.__drop__()
def __init__(self, config_filename="conf/config.txt"): ''' Create a Runner using the configuration file denoted by config_filename. :param config_filename: :return: None ''' try: Config.__set_config_filename__(config_filename) config = Config() except FileNotFoundError as err: print(err) raise err logging_configuration_file = config.prop(Config.key_logging_configuration_file, "conf/logging.conf") # logging.config.fileConfig raises "KeyError: 'formatters'" if the configuration file does not exist. # A FileNotFoundError in this case is less confusing. if not os.path.isfile(logging_configuration_file): # It seems there is no default logging configuration to the console in Python? # In that case we'll call it a day. raise FileNotFoundError("Logging configuration file not found: " + logging_configuration_file) logging.config.fileConfig(logging_configuration_file) self.logger = logging.getLogger(__name__) self.pid = os.getpid() self.sources = None self.exceptions = [] self.logger.info("Started %s with pid %d" % (__file__, self.pid)) self.logger.info("Configured %s from '%s'" % (self.__class__.__name__, config_filename)) self.logger.info("Configured logging from '%s'" % logging_configuration_file) self.__inject_dependencies__(config)
def test_inject_dependencies(self): Config.__set_config_filename__("test-files/config.txt") Config().__set_prop__( Config.key_des_processor_listeners, "des.processor_listener.SitemapWriter, des.processor.ProcessorListener" ) runner = DesRunner() self.assertEqual(2, len(des.processor.processor_listeners))
def test04_list_prop(self): Config.__set_config_filename__("test-files/config.txt") config = Config() list = config.list_prop("test_list") self.assertEqual(3, len(list)) self.assertEqual("foo.bar", list[0]) self.assertEqual("bar.foo", list[1]) self.assertEqual("foo.bar.baz", list[2])
def run(self, sources, task="discover", once=False): """ Run the DesRunner. A running application can be stopped by creating a file named 'stop' in the directory the runner was started from. Source urls are read from the file given in param 'sources'. This file is read each time a full round of synchronizing has taken place, so source urls can be extended or changed without restarting the application. Sources are mapped to the destinations given in the file denoted by the configuration parameter "location_mapper_destination_file". :param sources: the file containing source urls :param task: the task to run. - If source urls can all be discovered by reading the .well-known/resourcesync on each source, use 'wellknown'. - If all source urls point to capability lists, use 'capability'. - If source urls are heterogeneous, use 'discover'. :param once: True for exploring source urls once and than exit, False otherwise :return: """ condition = True while condition: # list of urls self.logger.info("Reading source urls from '%s'" % sources) self.__read_sources_doc__(sources) # reset url --> destination map. New mappings may be configured DestinationMap.__set_map_filename__(Config(). prop(Config.key_location_mapper_destination_file, "conf/desmap.txt")) # drop to force fresh read from file DestinationMap().__drop__() # Set the root of the destination folder if configured DestinationMap().set_root_folder(Config().prop(Config.key_destination_root)) # do all the urls self.__do_task__(task) # report self.__do_report__(task) # to continue or not to continue condition = not (once or self.__stop__()) if condition: pause = Config().int_prop(Config.key_sync_pause) self.logger.info("Going to sleep for %d seconds." % pause) self.logger.debug("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzz") time.sleep(pause) # repeat after sleep condition = not (once or self.__stop__())
def test03_process_baseline(self): Config().__set_prop__(Config.key_use_netloc, "False") Config().__set_prop__(Config.key_audit_only, "False") DestinationMap().__set_destination__("http://localhost:8000/rs/source/s1", "rs/destination/d1") __clear_destination__("d1") __clear_sources_xml__("s1") __create_resourcelist__("s1") des.reporter.reset_instance() logger.debug("\n=========== create ==============\n") relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml") relisync.process_source() self.assertEqual(0, len(relisync.exceptions)) self.assertEqual(Status.processed, relisync.status) reporter = des.reporter.instance() # sync_status count: 1 for audit, 1 for create. expected 2 # print(reporter.sync_status_to_string()) self.assertEqual(2, len(reporter.sync_status)) self.assertEqual(0, reporter.sync_status[0].same) self.assertEqual(3, reporter.sync_status[0].created) self.assertEqual(0, reporter.sync_status[0].updated) self.assertEqual(0, reporter.sync_status[0].deleted) self.assertEqual(0, reporter.sync_status[0].to_delete) self.assertIsNone(reporter.sync_status[0].exception) #reporter.sync_status_to_file("logs/baseline.csv") logger.debug("\n============ update =============\n") relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml") relisync.process_source() self.assertEqual(0, len(relisync.exceptions)) self.assertEqual(Status.processed, relisync.status) reporter = des.reporter.instance() # sync_status count: 1 for audit, 1 for create (both from previous run), 1 for audit, no update. expected 3 self.assertEqual(3, len(reporter.sync_status)) self.assertEqual(3, reporter.sync_status[2].same) self.assertEqual(0, reporter.sync_status[2].created) self.assertEqual(0, reporter.sync_status[2].updated) self.assertEqual(0, reporter.sync_status[2].deleted) self.assertEqual(0, reporter.sync_status[2].to_delete) self.assertIsNone(reporter.sync_status[2].exception)
def test08_try_robots_with_netloc(self): DestinationMap().__remove_destination__( "http://localhost:8000/rs/source/discover/") Config().__set_prop__(Config.key_use_netloc, "True") uri = "http://localhost:8000/rs/source/discover/loc2" discoverer = Discoverer(uri) processor = discoverer.get_processor() self.assertIsInstance(processor, proc.Reliproc) processor.read_source()
def test02_process_audit(self): Config().__set_prop__(Config.key_use_netloc, "False") Config().__set_prop__(Config.key_audit_only, "True") DestinationMap().__set_destination__("http://localhost:8000/rs/source/s1", "rs/destination/d1") __clear_destination__("d1") __clear_sources_xml__("s1") __create_resourcelist__("s1") des.reporter.reset_instance() logger.debug("\n=========================\n") relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml") relisync.process_source() self.assertEqual(0, len(relisync.exceptions)) self.assertEqual(Status.processed, relisync.status) reporter = des.reporter.instance() self.assertEqual(1, len(reporter.sync_status)) reporter.sync_status_to_file("logs/audit.csv")
def set_config(self, config_file, section): """ Establecer el archivo de configuración y la sección. Set config file and section. :param config_file: nombre del archivo de config. | file name of the config file :param section: la sección dentro del archivo | the section within the config file """ self.config_file = config_file self.section = section # se crea una instancia del administrador de la configuración # instantiate config manager self.config = Config(self.config_file, self.section)
def test02_new(self): Config.__set_config_filename__("test-files/config.txt") config = Config() self.assertEqual("test-files/config.txt", config._config_filename) self.assertEqual("test-files/config.txt", Config.__get_config_filename__()) self.assertEqual("logging.conf", config.prop(Config.key_logging_configuration_file)) self.assertEqual( "test-files/desmap.txt", config.prop(Config.key_location_mapper_destination_file))
def process_source(self): config = Config() netloc = config.boolean_prop(Config.key_use_netloc, False) base_uri, destination = DestinationMap().find_destination(self.uri, netloc=netloc, infix="resources") if destination is None: self.logger.debug("No destination for %s" % self.uri) self.exceptions.append("No destination for %s" % self.uri) des.reporter.instance().log_status(self.uri, exception="No destination specified and use of net location prohibited.") else: self.__synchronize__(destination) self.status = Status.processed_with_exceptions if self.has_exceptions() else Status.processed
def event_sitemap_received(self, uri, capability, text): config = Config() netloc = config.boolean_prop(Config.key_use_netloc, False) baser_uri, local_path = DestinationMap().find_local_path( uri, netloc=netloc, infix=SITEMAP_FOLDER) if local_path is not None: os.makedirs(os.path.dirname(local_path), exist_ok=True) with open(local_path, "w") as file: file.write(text) self.logger.debug("Saved %s '%s'" % (capability, local_path)) else: self.logger.warn("Could not save %s. No local path for %s" % (capability, uri))
def sync_status_to_file(self, filename=None): if filename is None: filename = Config().prop(Config.key_sync_status_report_file, "sync-status.csv") with open(filename, 'w') as file: file.write( "%s\n" % "date,uri,in_sync,incremental,audit,same,created,updated,deleted,to_delete,exception,origin" ) for item in self.sync_status: file.write("%s\n" % item) file.close() self.logger.info("Wrote %d source statuses to audit file %s" % (len(self.sync_status), filename))
def test03_boolean_prop(self): Config.__set_config_filename__("test-files/config.txt") config = Config() self.assertFalse(config.boolean_prop(Config.key_use_netloc)) config.__set_prop__(Config.key_use_netloc, str(True)) self.assertTrue(config.boolean_prop(Config.key_use_netloc)) config.__set_prop__(Config.key_use_netloc, str(False)) self.assertFalse(config.boolean_prop(Config.key_use_netloc)) self.assertTrue(config.boolean_prop("no_key", True)) self.assertFalse(config.boolean_prop("no_key", False))
def setUpModule(): global server server_address = ('', 8000) handler_class = SimpleHTTPRequestHandler server = HTTPServer(server_address, handler_class) t = threading.Thread(target=server.serve_forever) t.daemon = True logger.debug("Starting server at http://localhost:8000/") t.start() proc.processor_listeners.append(SitemapWriter()) Config.__set_config_filename__("test-files/config.txt") Config().__drop__() DestinationMap.__set_map_filename__("test-files/desmap.txt") DestinationMap().__drop__() DestinationMap().__set_destination__( "http://localhost:8000/rs/source/discover/", "rs/destination/discover")
def __synchronize__(self, destination): config = Config() checksum = config.boolean_prop(Config.key_use_checksum, True) audit_only = config.boolean_prop(Config.key_audit_only, True) allow_deletion = not audit_only desclient = des.desclient.instance() try: desclient.set_mappings((self.uri, destination)) self.do_synchronize(desclient, allow_deletion, audit_only) except ClientFatalError as err: self.logger.warn("EXCEPTION while syncing %s" % self.uri, exc_info=True) desclient.log_status(exception=err) self.exceptions.append(err) finally: # A side effect (or a bug ;) is messing around with the # class-level property Client.checksum. Make sure it is always set to initial value before the next # source is processed. desclient.checksum = checksum
def instance(): """ resync.Client is a somewhat heavy class. Desclient inherits and is adapted to be used during one run of resyncing several sources. For convenience: grab the one instance from here. :return: an instance of Desclient """ global _instance logger = logging.getLogger(__name__) if _instance is None: config = Config() # Parameters in the constructor of resync Client checksum = config.boolean_prop(Config.key_use_checksum, True) verbose = False # Parameters in the method client.baseline_or_audit audit_only = config.boolean_prop(Config.key_audit_only, True) dryrun = audit_only _instance = DesClient(checksum, verbose, dryrun) logger.debug("Created a new %s [checksum=%s, verbose=%s, dryrun=%s]" % ( _instance.__class__.__name__ , checksum, verbose, dryrun)) return _instance
def setUp(self): Config.__set_config_filename__("test-files/config.txt") Config().__drop__()
def test01_new(self): Config.__set_config_filename__("test-files/no-config.txt") with self.assertRaises(FileNotFoundError): Config()
def base_line(self, unzipdir): """ Synchronize the unzipped contents of a resource dump with the local resources :param unzipdir: the directory of the unzipped packed contents. :return: """ manifest_file_name = os.path.join(unzipdir, "manifest.xml") try: sitemap = Sitemap() manifest_doc = sitemap.parse_xml(fh=manifest_file_name) # the manifest_doc is a resync.resource_container.ResourceContainer capability = manifest_doc.capability assert capability == CAPA_RESOURCEDUMP_MANIFEST, "Capability is not %s but %s" % ( CAPA_RESOURCEDUMP_MANIFEST, capability) self.status = Status.parsed self.__inform_sitemap_received__(capability, manifest_file_name) config = Config() netloc = config.boolean_prop(Config.key_use_netloc, False) base_uri, destination = DestinationMap().find_destination( self.pack_uri, netloc=netloc) assert destination is not None, "Found no destination folder in DestinationMap" mapper = Mapper((base_uri, destination)) rlb = ResourceListBuilder(mapper=mapper) dst_resource_list = rlb.from_disk() # Compares on uri same, updated, deleted, created = dst_resource_list.compare( manifest_doc) raise NotImplementedError("This class is not fully implemented.") print(len(same), len(updated), len(deleted), len(created)) print("same") for resource in same: print(resource) print("updated") for resource in updated: print(resource) print("deleted") for resource in deleted: print(resource) print("created") for resource in created: print(resource) base_uri, local_path = DestinationMap().find_local_path( resource.uri) print(base_uri, local_path) except AssertionError as err: self.logger.debug("%s Error: %s" % (self.pack_uri, str(err))) self.status = Status.parse_error self.exceptions.append(err) except SitemapParseError as err: self.logger.debug("%s Unreadable source: %s" % (self.source_uri, str(err))) self.status = Status.parse_error self.exceptions.append(err) self.status = Status.processed_with_exceptions if self.has_exceptions( ) else Status.processed
def setUp(self): Config.__set_config_filename__("test-files/config.txt") Config().__drop__() des.dump.dump_listeners.append(SitemapWriter()) DestinationMap().__set_destination__("http://localhost:8000/rs/source", "rs/destination/d7")
def setUp(self): Config.__set_config_filename__("test-files/config.txt") Config().__drop__() DestinationMap.__set_map_filename__("test-files/desmap.txt") DestinationMap().__drop__() des.desclient.reset_instance()