def test01_no_destination_no_connection(self): Config().__set_prop__(Config.key_use_netloc, "False") DestinationMap().__remove_destination__("http://bla.com") des.reporter.reset_instance() logger.debug("\n============ no destination =============\n") # returns at no destination relisync = Relisync("http://bla.com") self.assertEqual(Status.init, relisync.status) relisync.process_source() self.assertEqual(1, len(relisync.exceptions)) self.assertEqual("No destination for http://bla.com", relisync.exceptions[0]) self.assertEqual(Status.processed_with_exceptions, relisync.status) # cannot get connection and ends up in caught exception DestinationMap().__set_destination__("http://bla.com", "destination_x") logger.debug("\n============destination, no connection =============\n") relisync = Relisync("http://bla.com") relisync.process_source() self.assertEqual(1, len(relisync.exceptions)) self.assertEqual(Status.processed_with_exceptions, relisync.status) reporter = des.reporter.instance() self.assertEqual(2, len(reporter.sync_status)) self.assertIsNotNone(reporter.sync_status[0].exception) # using net location 'bla.com' as destination, still no connection Config().__set_prop__(Config.key_use_netloc, "True") DestinationMap().__remove_destination__("http://bla.com") logger.debug("\n=========== using netloc, still no connection ==============\n") relisync = Relisync("http://bla.com") relisync.process_source() self.assertEqual(1, len(relisync.exceptions)) self.assertEqual(Status.processed_with_exceptions, relisync.status) self.assertEqual(3, len(reporter.sync_status)) self.assertIsNotNone(reporter.sync_status[1].exception)
def test07_process_source(self): # connection and readable resourcesync, write sitemap to file try: shutil.rmtree("rs/destination/d6/sitemaps") except: pass Config.__set_config_filename__("test-files/config.txt") Config().__drop__() DestinationMap.__set_map_filename__("test-files/desmap.txt") DestinationMap().__drop__() des.reporter.reset_instance() Config().__set_prop__(Config.key_use_netloc, "False") Config().__set_prop__(Config.key_audit_only, "False") DestinationMap().__set_destination__( "http://localhost:8000/rs/source/s6", "rs/destination/d6") des.processor.processor_listeners.append(SitemapWriter()) base_uri = "http://localhost:8000/rs/source/s6/" sdproc = Sodesproc(base_uri) sdproc.read_source() self.assertEqual(200, sdproc.source_status) self.assertEqual(Status.document, sdproc.status) self.assertTrue( os.path.isfile( "rs/destination/d6/sitemaps/.well-known/resourcesync"))
def test_set_filename_once(self): DestinationMap.__set_map_filename__("test-files/desmap.txt") self.assertEqual("test-files/desmap.txt", DestinationMap._get_map_filename()) desmap = DestinationMap() self.assertEqual("test-files/desmap.txt", desmap._map_filename) DestinationMap.__set_map_filename__("test-files/other-desmap.txt") self.assertEqual("test-files/desmap.txt", DestinationMap._get_map_filename()) desmap = DestinationMap() self.assertEqual("test-files/desmap.txt", desmap._map_filename)
def test_find_local_path(self): desmap = DestinationMap() desmap.__set_destination__("http://a.name.com/path/ignored", "local/folder/a") uri = "http://a.name.com/path/ignored/but/this/path/remains/file.txt" base_uri, local_path = desmap.find_local_path(uri) self.assertEqual("http://a.name.com/path/ignored", base_uri) self.assertEqual("./local/folder/a/but/this/path/remains/file.txt", local_path) desmap.__set_destination__("http://b.name.com/path/ignored/", "local/folder/b") uri = "http://b.name.com/path/ignored/but/this/path/remains/file.txt" base_uri, local_path = desmap.find_local_path(uri) self.assertEqual("http://b.name.com/path/ignored", base_uri) self.assertEqual("./local/folder/b/but/this/path/remains/file.txt", local_path) desmap.__set_destination__("http://c.name.com/path/ignored", "local/folder/c") uri = "http://c.name.com/path/ignored/but/this/path/remains/file.txt" base_uri, local_path = desmap.find_local_path(uri, infix="infix") self.assertEqual("http://c.name.com/path/ignored", base_uri) self.assertEqual( "./local/folder/c/infix/but/this/path/remains/file.txt", local_path)
def setUpModule(): global server server_address = ('', 8000) handler_class = SimpleHTTPRequestHandler server = HTTPServer(server_address, handler_class) t = threading.Thread(target=server.serve_forever) t.daemon = True logger.debug("Starting server at http://localhost:8000/") t.start() proc.processor_listeners.append(SitemapWriter()) Config.__set_config_filename__("test-files/config.txt") Config().__drop__() DestinationMap.__set_map_filename__("test-files/desmap.txt") DestinationMap().__drop__() DestinationMap().__set_destination__( "http://localhost:8000/rs/source/discover/", "rs/destination/discover")
def test_02_change(self): Config().__set_prop__(Config.key_use_netloc, "False") Config().__set_prop__(Config.key_audit_only, "False") DestinationMap().__set_destination__("http://localhost:8000/rs/source/s1", "rs/destination/d1") __clear_destination__("d1") __clear_sources_xml__("s1") __create_resourcelist__("s1") logger.debug("\n=========== create ==============\n") relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml") relisync.process_source() self.assertEqual(0, len(relisync.exceptions)) self.assertEqual(Status.processed, relisync.status) __change_resource__("s1", "resource1.txt") __create_changelist__("s1") logger.debug("\n=========== change ==============\n") chanlisync = Chanlisync("http://localhost:8000/rs/source/s1/changelist.xml") chanlisync.process_source() self.assertEqual(0, len(chanlisync.exceptions)) self.assertEqual(Status.processed, chanlisync.status) reporter = des.reporter.instance() self.assertEqual(4, len(reporter.sync_status)) #self.assertEqual(1, reporter.sync_status[3].same) self.assertIsNone(reporter.sync_status[3].same) self.assertEqual(0, reporter.sync_status[3].created) self.assertEqual(1, reporter.sync_status[3].updated) self.assertEqual(0, reporter.sync_status[3].deleted) self.assertEqual(0, reporter.sync_status[3].to_delete) self.assertIsNone(reporter.sync_status[3].exception) reporter.sync_status_to_file("logs/incremental-change.csv")
def test04_process_baseline_netloc(self): Config().__set_prop__(Config.key_use_netloc, "True") Config().__set_prop__(Config.key_audit_only, "False") DestinationMap().__remove_destination__("http://localhost:8000/rs/source/s1") __clear_sources_xml__("s1") __create_resourcelist__("s1") if os.path.isdir("localhost:8000"): logger.debug("Expecting only audit") expected_sync_status_count = 1 else: logger.debug("Expecting update") expected_sync_status_count = 2 logger.debug("\n=========================\n") relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml") relisync.process_source() self.assertEqual(0, len(relisync.exceptions)) self.assertEqual(Status.processed, relisync.status) reporter = des.reporter.instance() # depends on whether test is run individually or in group #self.assertEqual(expected_sync_status_count, len(reporter.sync_status)) reporter.sync_status_to_file("logs/baseline-netloc.csv")
def run(self, sources, task="discover", once=False): """ Run the DesRunner. A running application can be stopped by creating a file named 'stop' in the directory the runner was started from. Source urls are read from the file given in param 'sources'. This file is read each time a full round of synchronizing has taken place, so source urls can be extended or changed without restarting the application. Sources are mapped to the destinations given in the file denoted by the configuration parameter "location_mapper_destination_file". :param sources: the file containing source urls :param task: the task to run. - If source urls can all be discovered by reading the .well-known/resourcesync on each source, use 'wellknown'. - If all source urls point to capability lists, use 'capability'. - If source urls are heterogeneous, use 'discover'. :param once: True for exploring source urls once and than exit, False otherwise :return: """ condition = True while condition: # list of urls self.logger.info("Reading source urls from '%s'" % sources) self.__read_sources_doc__(sources) # reset url --> destination map. New mappings may be configured DestinationMap.__set_map_filename__(Config(). prop(Config.key_location_mapper_destination_file, "conf/desmap.txt")) # drop to force fresh read from file DestinationMap().__drop__() # Set the root of the destination folder if configured DestinationMap().set_root_folder(Config().prop(Config.key_destination_root)) # do all the urls self.__do_task__(task) # report self.__do_report__(task) # to continue or not to continue condition = not (once or self.__stop__()) if condition: pause = Config().int_prop(Config.key_sync_pause) self.logger.info("Going to sleep for %d seconds." % pause) self.logger.debug("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzz") time.sleep(pause) # repeat after sleep condition = not (once or self.__stop__())
def test08_try_robots_with_netloc(self): DestinationMap().__remove_destination__( "http://localhost:8000/rs/source/discover/") Config().__set_prop__(Config.key_use_netloc, "True") uri = "http://localhost:8000/rs/source/discover/loc2" discoverer = Discoverer(uri) processor = discoverer.get_processor() self.assertIsInstance(processor, proc.Reliproc) processor.read_source()
def test_03_change_delete(self): Config().__set_prop__(Config.key_use_netloc, "False") Config().__set_prop__(Config.key_audit_only, "False") DestinationMap().__set_destination__("http://localhost:8000/rs/source/s2", "rs/destination/d2") __clear_destination__("d2") __clear_sources_xml__("s2") __add_resource__("s2", "added.txt") __create_resourcelist__("s2") logger.debug("\n=========== create ==============\n") relisync = Relisync("http://localhost:8000/rs/source/s2/resourcelist.xml") relisync.process_source() self.assertEqual(0, len(relisync.exceptions)) self.assertEqual(Status.processed, relisync.status) __change_resource__("s2", "resource2.txt") __delete_resource__("s2", "added.txt") __create_changelist__("s2") des.reporter.reset_instance() #time.sleep(5) logger.debug("\n=========== update + delete ==============\n") chanlisync = Chanlisync("http://localhost:8000/rs/source/s2/changelist.xml") chanlisync.process_source() self.assertEqual(0, len(chanlisync.exceptions)) self.assertEqual(Status.processed, chanlisync.status) reporter = des.reporter.instance() reporter.sync_status_to_file("logs/incremental-change-delete.csv") self.assertEqual(2, len(reporter.sync_status)) self.assertIsNone(reporter.sync_status[1].same) self.assertEqual(0, reporter.sync_status[1].created) self.assertEqual(1, reporter.sync_status[1].updated) self.assertEqual(1, reporter.sync_status[1].deleted) self.assertEqual(1, reporter.sync_status[1].to_delete) self.assertIsNone(reporter.sync_status[1].exception) des.reporter.reset_instance() logger.debug("\n=========== no change ==============\n") chanlisync = Chanlisync("http://localhost:8000/rs/source/s2/changelist.xml") chanlisync.process_source() self.assertEqual(0, len(chanlisync.exceptions)) self.assertEqual(Status.processed, chanlisync.status) reporter = des.reporter.instance() self.assertEqual(1, len(reporter.sync_status)) self.assertIsNone(reporter.sync_status[0].same) self.assertEqual(0, reporter.sync_status[0].created) self.assertEqual(0, reporter.sync_status[0].updated) self.assertEqual(0, reporter.sync_status[0].deleted) self.assertEqual(0, reporter.sync_status[0].to_delete) self.assertIsNone(reporter.sync_status[0].exception)
def process_source(self): config = Config() netloc = config.boolean_prop(Config.key_use_netloc, False) base_uri, destination = DestinationMap().find_destination(self.uri, netloc=netloc, infix="resources") if destination is None: self.logger.debug("No destination for %s" % self.uri) self.exceptions.append("No destination for %s" % self.uri) des.reporter.instance().log_status(self.uri, exception="No destination specified and use of net location prohibited.") else: self.__synchronize__(destination) self.status = Status.processed_with_exceptions if self.has_exceptions() else Status.processed
def test_find_destination_with_infix(self): desmap = DestinationMap() desmap.__set_destination__("http://a.name.com/path/ignored", "local/folder/a") uri = "http://a.name.com/path/ignored/but/this/path/remains/file.txt" base_uri, destination = desmap.find_destination(uri, infix="resources") self.assertEqual("http://a.name.com/path/ignored", base_uri) self.assertEqual("./local/folder/a/resources", destination) base_uri, destination = desmap.find_destination(uri, infix="sitemaps") self.assertEqual("http://a.name.com/path/ignored", base_uri) self.assertEqual("./local/folder/a/sitemaps", destination)
def event_sitemap_received(self, uri, capability, text): config = Config() netloc = config.boolean_prop(Config.key_use_netloc, False) baser_uri, local_path = DestinationMap().find_local_path( uri, netloc=netloc, infix=SITEMAP_FOLDER) if local_path is not None: os.makedirs(os.path.dirname(local_path), exist_ok=True) with open(local_path, "w") as file: file.write(text) self.logger.debug("Saved %s '%s'" % (capability, local_path)) else: self.logger.warn("Could not save %s. No local path for %s" % (capability, uri))
def test03_process_baseline(self): Config().__set_prop__(Config.key_use_netloc, "False") Config().__set_prop__(Config.key_audit_only, "False") DestinationMap().__set_destination__("http://localhost:8000/rs/source/s1", "rs/destination/d1") __clear_destination__("d1") __clear_sources_xml__("s1") __create_resourcelist__("s1") des.reporter.reset_instance() logger.debug("\n=========== create ==============\n") relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml") relisync.process_source() self.assertEqual(0, len(relisync.exceptions)) self.assertEqual(Status.processed, relisync.status) reporter = des.reporter.instance() # sync_status count: 1 for audit, 1 for create. expected 2 # print(reporter.sync_status_to_string()) self.assertEqual(2, len(reporter.sync_status)) self.assertEqual(0, reporter.sync_status[0].same) self.assertEqual(3, reporter.sync_status[0].created) self.assertEqual(0, reporter.sync_status[0].updated) self.assertEqual(0, reporter.sync_status[0].deleted) self.assertEqual(0, reporter.sync_status[0].to_delete) self.assertIsNone(reporter.sync_status[0].exception) #reporter.sync_status_to_file("logs/baseline.csv") logger.debug("\n============ update =============\n") relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml") relisync.process_source() self.assertEqual(0, len(relisync.exceptions)) self.assertEqual(Status.processed, relisync.status) reporter = des.reporter.instance() # sync_status count: 1 for audit, 1 for create (both from previous run), 1 for audit, no update. expected 3 self.assertEqual(3, len(reporter.sync_status)) self.assertEqual(3, reporter.sync_status[2].same) self.assertEqual(0, reporter.sync_status[2].created) self.assertEqual(0, reporter.sync_status[2].updated) self.assertEqual(0, reporter.sync_status[2].deleted) self.assertEqual(0, reporter.sync_status[2].to_delete) self.assertIsNone(reporter.sync_status[2].exception)
def test02_process_audit(self): Config().__set_prop__(Config.key_use_netloc, "False") Config().__set_prop__(Config.key_audit_only, "True") DestinationMap().__set_destination__("http://localhost:8000/rs/source/s1", "rs/destination/d1") __clear_destination__("d1") __clear_sources_xml__("s1") __create_resourcelist__("s1") des.reporter.reset_instance() logger.debug("\n=========================\n") relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml") relisync.process_source() self.assertEqual(0, len(relisync.exceptions)) self.assertEqual(Status.processed, relisync.status) reporter = des.reporter.instance() self.assertEqual(1, len(reporter.sync_status)) reporter.sync_status_to_file("logs/audit.csv")
def base_line(self, unzipdir): """ Synchronize the unzipped contents of a resource dump with the local resources :param unzipdir: the directory of the unzipped packed contents. :return: """ manifest_file_name = os.path.join(unzipdir, "manifest.xml") try: sitemap = Sitemap() manifest_doc = sitemap.parse_xml(fh=manifest_file_name) # the manifest_doc is a resync.resource_container.ResourceContainer capability = manifest_doc.capability assert capability == CAPA_RESOURCEDUMP_MANIFEST, "Capability is not %s but %s" % ( CAPA_RESOURCEDUMP_MANIFEST, capability) self.status = Status.parsed self.__inform_sitemap_received__(capability, manifest_file_name) config = Config() netloc = config.boolean_prop(Config.key_use_netloc, False) base_uri, destination = DestinationMap().find_destination( self.pack_uri, netloc=netloc) assert destination is not None, "Found no destination folder in DestinationMap" mapper = Mapper((base_uri, destination)) rlb = ResourceListBuilder(mapper=mapper) dst_resource_list = rlb.from_disk() # Compares on uri same, updated, deleted, created = dst_resource_list.compare( manifest_doc) raise NotImplementedError("This class is not fully implemented.") print(len(same), len(updated), len(deleted), len(created)) print("same") for resource in same: print(resource) print("updated") for resource in updated: print(resource) print("deleted") for resource in deleted: print(resource) print("created") for resource in created: print(resource) base_uri, local_path = DestinationMap().find_local_path( resource.uri) print(base_uri, local_path) except AssertionError as err: self.logger.debug("%s Error: %s" % (self.pack_uri, str(err))) self.status = Status.parse_error self.exceptions.append(err) except SitemapParseError as err: self.logger.debug("%s Unreadable source: %s" % (self.source_uri, str(err))) self.status = Status.parse_error self.exceptions.append(err) self.status = Status.processed_with_exceptions if self.has_exceptions( ) else Status.processed
def setUp(self): Config.__set_config_filename__("test-files/config.txt") Config().__drop__() des.dump.dump_listeners.append(SitemapWriter()) DestinationMap().__set_destination__("http://localhost:8000/rs/source", "rs/destination/d7")
def test_find_destination(self): DestinationMap.__set_map_filename__("test-files/desmap.txt") desmap = DestinationMap() uri = "http://long.name.com/path/to/resource.xml" base_uri, destination = desmap.find_destination(uri) self.assertEqual("http://long.name.com", base_uri) self.assertEqual("./destination1", destination) uri = "http://long.name.com/path/to/" base_uri, destination = desmap.find_destination(uri) self.assertEqual("http://long.name.com", base_uri) self.assertEqual("./destination1", destination) uri = "http://long.name.com/" base_uri, destination = desmap.find_destination(uri) self.assertEqual("http://long.name.com", base_uri) self.assertEqual("./destination1", destination) uri = "http://long.name.com" base_uri, destination = desmap.find_destination(uri) self.assertEqual("http://long.name.com", base_uri) self.assertEqual("./destination1", destination) # explicit path to resource in desmap uri = "https://first.com:8080/path1" base_uri, destination = desmap.find_destination(uri) self.assertEqual("https://first.com:8080", base_uri) self.assertIsNone(destination) uri = "https://first.com:8080/path1/to/resource.xml" base_uri, destination = desmap.find_destination(uri) self.assertEqual("https://first.com:8080/path1/to/resource.xml", base_uri) self.assertEqual("./destination2", destination) uri = "https://first.com:8080/path2/" base_uri, destination = desmap.find_destination(uri) self.assertEqual("https://first.com:8080/path2", base_uri) self.assertEqual("./destination3", destination) uri = "https://first.com:8080/path2" base_uri, destination = desmap.find_destination(uri) self.assertEqual("https://first.com:8080/path2", base_uri) self.assertEqual("./destination3", destination) # uri = "https://not.mapped.com/resource.xml" base_uri, destination = desmap.find_destination(uri, "default/path") self.assertEqual("https://not.mapped.com", base_uri) self.assertEqual("./default/path", destination) # uri = "https://not.mapped.com/resource.xml" base_uri, destination = desmap.find_destination(uri, netloc=True) self.assertEqual("https://not.mapped.com", base_uri) self.assertEqual("./not.mapped.com", destination) desmap.set_root_folder("foo/bar") uri = "http://long.name.com/path/to/resource.xml" base_uri, destination = desmap.find_destination(uri) self.assertEqual("http://long.name.com", base_uri) self.assertEqual("foo/bar/destination1", destination)
def setUp(self): Config.__set_config_filename__("test-files/config.txt") Config().__drop__() DestinationMap.__set_map_filename__("test-files/desmap.txt") DestinationMap().__drop__() des.desclient.reset_instance()
def setUp(self): DestinationMap.__set_map_filename__("test-files/desmap.txt") DestinationMap().__drop__()