def test01_no_destination_no_connection(self): Config().__set_prop__(Config.key_use_netloc, "False") DestinationMap().__remove_destination__("http://bla.com") des.reporter.reset_instance() logger.debug("\n============ no destination =============\n") # returns at no destination relisync = Relisync("http://bla.com") self.assertEqual(Status.init, relisync.status) relisync.process_source() self.assertEqual(1, len(relisync.exceptions)) self.assertEqual("No destination for http://bla.com", relisync.exceptions[0]) self.assertEqual(Status.processed_with_exceptions, relisync.status) # cannot get connection and ends up in caught exception DestinationMap().__set_destination__("http://bla.com", "destination_x") logger.debug("\n============destination, no connection =============\n") relisync = Relisync("http://bla.com") relisync.process_source() self.assertEqual(1, len(relisync.exceptions)) self.assertEqual(Status.processed_with_exceptions, relisync.status) reporter = des.reporter.instance() self.assertEqual(2, len(reporter.sync_status)) self.assertIsNotNone(reporter.sync_status[0].exception) # using net location 'bla.com' as destination, still no connection Config().__set_prop__(Config.key_use_netloc, "True") DestinationMap().__remove_destination__("http://bla.com") logger.debug("\n=========== using netloc, still no connection ==============\n") relisync = Relisync("http://bla.com") relisync.process_source() self.assertEqual(1, len(relisync.exceptions)) self.assertEqual(Status.processed_with_exceptions, relisync.status) self.assertEqual(3, len(reporter.sync_status)) self.assertIsNotNone(reporter.sync_status[1].exception)
def test_find_destination_with_infix(self): desmap = DestinationMap() desmap.__set_destination__("http://a.name.com/path/ignored", "local/folder/a") uri = "http://a.name.com/path/ignored/but/this/path/remains/file.txt" base_uri, destination = desmap.find_destination(uri, infix="resources") self.assertEqual("http://a.name.com/path/ignored", base_uri) self.assertEqual("./local/folder/a/resources", destination) base_uri, destination = desmap.find_destination(uri, infix="sitemaps") self.assertEqual("http://a.name.com/path/ignored", base_uri) self.assertEqual("./local/folder/a/sitemaps", destination)
def setUpModule(): global server server_address = ('', 8000) handler_class = SimpleHTTPRequestHandler server = HTTPServer(server_address, handler_class) t = threading.Thread(target=server.serve_forever) t.daemon = True logger.debug("Starting server at http://localhost:8000/") t.start() proc.processor_listeners.append(SitemapWriter()) Config.__set_config_filename__("test-files/config.txt") Config().__drop__() DestinationMap.__set_map_filename__("test-files/desmap.txt") DestinationMap().__drop__() DestinationMap().__set_destination__("http://localhost:8000/rs/source/discover/", "rs/destination/discover")
def test_02_change(self): Config().__set_prop__(Config.key_use_netloc, "False") Config().__set_prop__(Config.key_audit_only, "False") DestinationMap().__set_destination__("http://localhost:8000/rs/source/s1", "rs/destination/d1") __clear_destination__("d1") __clear_sources_xml__("s1") __create_resourcelist__("s1") logger.debug("\n=========== create ==============\n") relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml") relisync.process_source() self.assertEqual(0, len(relisync.exceptions)) self.assertEqual(Status.processed, relisync.status) __change_resource__("s1", "resource1.txt") __create_changelist__("s1") logger.debug("\n=========== change ==============\n") chanlisync = Chanlisync("http://localhost:8000/rs/source/s1/changelist.xml") chanlisync.process_source() self.assertEqual(0, len(chanlisync.exceptions)) self.assertEqual(Status.processed, chanlisync.status) reporter = des.reporter.instance() self.assertEqual(4, len(reporter.sync_status)) #self.assertEqual(1, reporter.sync_status[3].same) self.assertIsNone(reporter.sync_status[3].same) self.assertEqual(0, reporter.sync_status[3].created) self.assertEqual(1, reporter.sync_status[3].updated) self.assertEqual(0, reporter.sync_status[3].deleted) self.assertEqual(0, reporter.sync_status[3].to_delete) self.assertIsNone(reporter.sync_status[3].exception) reporter.sync_status_to_file("logs/incremental-change.csv")
def test04_process_baseline_netloc(self): Config().__set_prop__(Config.key_use_netloc, "True") Config().__set_prop__(Config.key_audit_only, "False") DestinationMap().__remove_destination__("http://localhost:8000/rs/source/s1") __clear_sources_xml__("s1") __create_resourcelist__("s1") if os.path.isdir("localhost:8000"): logger.debug("Expecting only audit") expected_sync_status_count = 1 else: logger.debug("Expecting update") expected_sync_status_count = 2 logger.debug("\n=========================\n") relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml") relisync.process_source() self.assertEqual(0, len(relisync.exceptions)) self.assertEqual(Status.processed, relisync.status) reporter = des.reporter.instance() # depends on whether test is run individually or in group #self.assertEqual(expected_sync_status_count, len(reporter.sync_status)) reporter.sync_status_to_file("logs/baseline-netloc.csv")
def setUpModule(): global server server_address = ('', 8000) handler_class = SimpleHTTPRequestHandler server = HTTPServer(server_address, handler_class) t = threading.Thread(target=server.serve_forever) t.daemon = True logger.debug("Starting server at http://localhost:8000/") t.start() proc.processor_listeners.append(SitemapWriter()) Config.__set_config_filename__("test-files/config.txt") Config().__drop__() DestinationMap.__set_map_filename__("test-files/desmap.txt") DestinationMap().__drop__() DestinationMap().__set_destination__( "http://localhost:8000/rs/source/discover/", "rs/destination/discover")
def test_shorten_01(self): uri = "http://long.name.com/des/ti/na/tion/path/file.xml" new_uri, new_path = DestinationMap.shorten(uri) self.assertEqual("http://long.name.com/des/ti/na/tion/path", new_uri) self.assertEqual("/des/ti/na/tion/path", new_path) new_uri, new_path = DestinationMap.shorten(new_uri) self.assertEqual("http://long.name.com/des/ti/na/tion", new_uri) self.assertEqual("/des/ti/na/tion", new_path) new_uri, new_path = DestinationMap.shorten(new_uri) self.assertEqual("http://long.name.com/des/ti/na", new_uri) self.assertEqual("/des/ti/na", new_path) new_uri, new_path = DestinationMap.shorten(new_uri) self.assertEqual("http://long.name.com/des/ti", new_uri) self.assertEqual("/des/ti", new_path) new_uri, new_path = DestinationMap.shorten(new_uri) self.assertEqual("http://long.name.com/des", new_uri) self.assertEqual("/des", new_path) new_uri, new_path = DestinationMap.shorten(new_uri) self.assertEqual("http://long.name.com", new_uri) self.assertEqual("", new_path) new_uri, new_path = DestinationMap.shorten(new_uri) self.assertEqual("http://long.name.com", new_uri) self.assertEqual("", new_path)
def test_shorten_02(self): uri = "file:///Users/you/git" new_uri, new_path = DestinationMap.shorten(uri) self.assertEqual("file:///Users/you", new_uri) self.assertEqual("/Users/you", new_path) new_uri, new_path = DestinationMap.shorten(new_uri) self.assertEqual("file:///Users", new_uri) self.assertEqual("/Users", new_path) new_uri, new_path = DestinationMap.shorten(new_uri) self.assertEqual("file://", new_uri) self.assertEqual("", new_path) new_uri, new_path = DestinationMap.shorten(new_uri) self.assertEqual("file://", new_uri) self.assertEqual("", new_path)
def test08_try_robots_with_netloc(self): DestinationMap().__remove_destination__( "http://localhost:8000/rs/source/discover/") Config().__set_prop__(Config.key_use_netloc, "True") uri = "http://localhost:8000/rs/source/discover/loc2" discoverer = Discoverer(uri) processor = discoverer.get_processor() self.assertIsInstance(processor, proc.Reliproc) processor.read_source()
def test_set_filename_once(self): DestinationMap.__set_map_filename__("test-files/desmap.txt") self.assertEqual("test-files/desmap.txt", DestinationMap._get_map_filename()) desmap = DestinationMap() self.assertEqual("test-files/desmap.txt", desmap._map_filename) DestinationMap.__set_map_filename__("test-files/other-desmap.txt") self.assertEqual("test-files/desmap.txt", DestinationMap._get_map_filename()) desmap = DestinationMap() self.assertEqual("test-files/desmap.txt", desmap._map_filename)
def test_03_change_delete(self): Config().__set_prop__(Config.key_use_netloc, "False") Config().__set_prop__(Config.key_audit_only, "False") DestinationMap().__set_destination__("http://localhost:8000/rs/source/s2", "rs/destination/d2") __clear_destination__("d2") __clear_sources_xml__("s2") __add_resource__("s2", "added.txt") __create_resourcelist__("s2") logger.debug("\n=========== create ==============\n") relisync = Relisync("http://localhost:8000/rs/source/s2/resourcelist.xml") relisync.process_source() self.assertEqual(0, len(relisync.exceptions)) self.assertEqual(Status.processed, relisync.status) __change_resource__("s2", "resource2.txt") __delete_resource__("s2", "added.txt") __create_changelist__("s2") des.reporter.reset_instance() #time.sleep(5) logger.debug("\n=========== update + delete ==============\n") chanlisync = Chanlisync("http://localhost:8000/rs/source/s2/changelist.xml") chanlisync.process_source() self.assertEqual(0, len(chanlisync.exceptions)) self.assertEqual(Status.processed, chanlisync.status) reporter = des.reporter.instance() reporter.sync_status_to_file("logs/incremental-change-delete.csv") self.assertEqual(2, len(reporter.sync_status)) self.assertIsNone(reporter.sync_status[1].same) self.assertEqual(0, reporter.sync_status[1].created) self.assertEqual(1, reporter.sync_status[1].updated) self.assertEqual(1, reporter.sync_status[1].deleted) self.assertEqual(1, reporter.sync_status[1].to_delete) self.assertIsNone(reporter.sync_status[1].exception) des.reporter.reset_instance() logger.debug("\n=========== no change ==============\n") chanlisync = Chanlisync("http://localhost:8000/rs/source/s2/changelist.xml") chanlisync.process_source() self.assertEqual(0, len(chanlisync.exceptions)) self.assertEqual(Status.processed, chanlisync.status) reporter = des.reporter.instance() self.assertEqual(1, len(reporter.sync_status)) self.assertIsNone(reporter.sync_status[0].same) self.assertEqual(0, reporter.sync_status[0].created) self.assertEqual(0, reporter.sync_status[0].updated) self.assertEqual(0, reporter.sync_status[0].deleted) self.assertEqual(0, reporter.sync_status[0].to_delete) self.assertIsNone(reporter.sync_status[0].exception)
def test_find_local_path(self): desmap = DestinationMap() desmap.__set_destination__("http://a.name.com/path/ignored", "local/folder/a") uri = "http://a.name.com/path/ignored/but/this/path/remains/file.txt" base_uri, local_path = desmap.find_local_path(uri) self.assertEqual("http://a.name.com/path/ignored", base_uri) self.assertEqual("./local/folder/a/but/this/path/remains/file.txt", local_path) desmap.__set_destination__("http://b.name.com/path/ignored/", "local/folder/b") uri = "http://b.name.com/path/ignored/but/this/path/remains/file.txt" base_uri, local_path = desmap.find_local_path(uri) self.assertEqual("http://b.name.com/path/ignored", base_uri) self.assertEqual("./local/folder/b/but/this/path/remains/file.txt", local_path) desmap.__set_destination__("http://c.name.com/path/ignored", "local/folder/c") uri = "http://c.name.com/path/ignored/but/this/path/remains/file.txt" base_uri, local_path = desmap.find_local_path(uri, infix="infix") self.assertEqual("http://c.name.com/path/ignored", base_uri) self.assertEqual("./local/folder/c/infix/but/this/path/remains/file.txt", local_path)
def test07_process_source(self): # connection and readable resourcesync, write sitemap to file try: shutil.rmtree("rs/destination/d6/sitemaps") except: pass Config.__set_config_filename__("test-files/config.txt") Config().__drop__() DestinationMap.__set_map_filename__("test-files/desmap.txt") DestinationMap().__drop__() des.reporter.reset_instance() Config().__set_prop__(Config.key_use_netloc, "False") Config().__set_prop__(Config.key_audit_only, "False") DestinationMap().__set_destination__("http://localhost:8000/rs/source/s6", "rs/destination/d6") des.processor.processor_listeners.append(SitemapWriter()) base_uri = "http://localhost:8000/rs/source/s6/" sdproc = Sodesproc(base_uri) sdproc.read_source() self.assertEqual(200, sdproc.source_status) self.assertEqual(Status.document, sdproc.status) self.assertTrue(os.path.isfile("rs/destination/d6/sitemaps/.well-known/resourcesync"))
def process_source(self): config = Config() netloc = config.boolean_prop(Config.key_use_netloc, False) base_uri, destination = DestinationMap().find_destination(self.uri, netloc=netloc, infix="resources") if destination is None: self.logger.debug("No destination for %s" % self.uri) self.exceptions.append("No destination for %s" % self.uri) des.reporter.instance().log_status(self.uri, exception="No destination specified and use of net location prohibited.") else: self.__synchronize__(destination) self.status = Status.processed_with_exceptions if self.has_exceptions() else Status.processed
def event_sitemap_received(self, uri, capability, text): config = Config() netloc = config.boolean_prop(Config.key_use_netloc, False) baser_uri, local_path = DestinationMap().find_local_path( uri, netloc=netloc, infix=SITEMAP_FOLDER) if local_path is not None: os.makedirs(os.path.dirname(local_path), exist_ok=True) with open(local_path, "w") as file: file.write(text) self.logger.debug("Saved %s '%s'" % (capability, local_path)) else: self.logger.warn("Could not save %s. No local path for %s" % (capability, uri))
def test03_process_baseline(self): Config().__set_prop__(Config.key_use_netloc, "False") Config().__set_prop__(Config.key_audit_only, "False") DestinationMap().__set_destination__("http://localhost:8000/rs/source/s1", "rs/destination/d1") __clear_destination__("d1") __clear_sources_xml__("s1") __create_resourcelist__("s1") des.reporter.reset_instance() logger.debug("\n=========== create ==============\n") relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml") relisync.process_source() self.assertEqual(0, len(relisync.exceptions)) self.assertEqual(Status.processed, relisync.status) reporter = des.reporter.instance() # sync_status count: 1 for audit, 1 for create. expected 2 # print(reporter.sync_status_to_string()) self.assertEqual(2, len(reporter.sync_status)) self.assertEqual(0, reporter.sync_status[0].same) self.assertEqual(3, reporter.sync_status[0].created) self.assertEqual(0, reporter.sync_status[0].updated) self.assertEqual(0, reporter.sync_status[0].deleted) self.assertEqual(0, reporter.sync_status[0].to_delete) self.assertIsNone(reporter.sync_status[0].exception) #reporter.sync_status_to_file("logs/baseline.csv") logger.debug("\n============ update =============\n") relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml") relisync.process_source() self.assertEqual(0, len(relisync.exceptions)) self.assertEqual(Status.processed, relisync.status) reporter = des.reporter.instance() # sync_status count: 1 for audit, 1 for create (both from previous run), 1 for audit, no update. expected 3 self.assertEqual(3, len(reporter.sync_status)) self.assertEqual(3, reporter.sync_status[2].same) self.assertEqual(0, reporter.sync_status[2].created) self.assertEqual(0, reporter.sync_status[2].updated) self.assertEqual(0, reporter.sync_status[2].deleted) self.assertEqual(0, reporter.sync_status[2].to_delete) self.assertIsNone(reporter.sync_status[2].exception)
def test02_process_audit(self): Config().__set_prop__(Config.key_use_netloc, "False") Config().__set_prop__(Config.key_audit_only, "True") DestinationMap().__set_destination__("http://localhost:8000/rs/source/s1", "rs/destination/d1") __clear_destination__("d1") __clear_sources_xml__("s1") __create_resourcelist__("s1") des.reporter.reset_instance() logger.debug("\n=========================\n") relisync = Relisync("http://localhost:8000/rs/source/s1/resourcelist.xml") relisync.process_source() self.assertEqual(0, len(relisync.exceptions)) self.assertEqual(Status.processed, relisync.status) reporter = des.reporter.instance() self.assertEqual(1, len(reporter.sync_status)) reporter.sync_status_to_file("logs/audit.csv")
def test_find_destination(self): DestinationMap.__set_map_filename__("test-files/desmap.txt") desmap = DestinationMap() uri = "http://long.name.com/path/to/resource.xml" base_uri, destination = desmap.find_destination(uri) self.assertEqual("http://long.name.com", base_uri) self.assertEqual("./destination1", destination) uri = "http://long.name.com/path/to/" base_uri, destination = desmap.find_destination(uri) self.assertEqual("http://long.name.com", base_uri) self.assertEqual("./destination1", destination) uri = "http://long.name.com/" base_uri, destination = desmap.find_destination(uri) self.assertEqual("http://long.name.com", base_uri) self.assertEqual("./destination1", destination) uri = "http://long.name.com" base_uri, destination = desmap.find_destination(uri) self.assertEqual("http://long.name.com", base_uri) self.assertEqual("./destination1", destination) # explicit path to resource in desmap uri = "https://first.com:8080/path1" base_uri, destination = desmap.find_destination(uri) self.assertEqual("https://first.com:8080", base_uri) self.assertIsNone(destination) uri = "https://first.com:8080/path1/to/resource.xml" base_uri, destination = desmap.find_destination(uri) self.assertEqual("https://first.com:8080/path1/to/resource.xml", base_uri) self.assertEqual("./destination2", destination) uri = "https://first.com:8080/path2/" base_uri, destination = desmap.find_destination(uri) self.assertEqual("https://first.com:8080/path2", base_uri) self.assertEqual("./destination3", destination) uri = "https://first.com:8080/path2" base_uri, destination = desmap.find_destination(uri) self.assertEqual("https://first.com:8080/path2", base_uri) self.assertEqual("./destination3", destination) # uri = "https://not.mapped.com/resource.xml" base_uri, destination = desmap.find_destination(uri, "default/path") self.assertEqual("https://not.mapped.com", base_uri) self.assertEqual("./default/path", destination) # uri = "https://not.mapped.com/resource.xml" base_uri, destination = desmap.find_destination(uri, netloc=True) self.assertEqual("https://not.mapped.com", base_uri) self.assertEqual("./not.mapped.com", destination) desmap.set_root_folder("foo/bar") uri = "http://long.name.com/path/to/resource.xml" base_uri, destination = desmap.find_destination(uri) self.assertEqual("http://long.name.com", base_uri) self.assertEqual("foo/bar/destination1", destination)
def test_shorten_03(self): uri = "https://docs.python.org/3.4/library/urllib.parse.html?highlight=urlparse#urllib.parse.urlparse" new_uri, new_path = DestinationMap.shorten(uri) self.assertEqual("https://docs.python.org/3.4/library", new_uri) self.assertEqual("/3.4/library", new_path)
def test_find_local_path(self): desmap = DestinationMap() desmap.__set_destination__("http://a.name.com/path/ignored", "local/folder/a") uri = "http://a.name.com/path/ignored/but/this/path/remains/file.txt" base_uri, local_path = desmap.find_local_path(uri) self.assertEqual("http://a.name.com/path/ignored", base_uri) self.assertEqual("./local/folder/a/but/this/path/remains/file.txt", local_path) desmap.__set_destination__("http://b.name.com/path/ignored/", "local/folder/b") uri = "http://b.name.com/path/ignored/but/this/path/remains/file.txt" base_uri, local_path = desmap.find_local_path(uri) self.assertEqual("http://b.name.com/path/ignored", base_uri) self.assertEqual("./local/folder/b/but/this/path/remains/file.txt", local_path) desmap.__set_destination__("http://c.name.com/path/ignored", "local/folder/c") uri = "http://c.name.com/path/ignored/but/this/path/remains/file.txt" base_uri, local_path = desmap.find_local_path(uri, infix="infix") self.assertEqual("http://c.name.com/path/ignored", base_uri) self.assertEqual( "./local/folder/c/infix/but/this/path/remains/file.txt", local_path)
def setUp(self): DestinationMap.__set_map_filename__("test-files/desmap.txt") DestinationMap().__drop__()
def setUp(self): Config.__set_config_filename__("test-files/config.txt") Config().__drop__() DestinationMap.__set_map_filename__("test-files/desmap.txt") DestinationMap().__drop__() des.desclient.reset_instance()