def test_single_threaded_download_without_manager(self): # temp_dir = TempDir(os.path.expanduser("~/tmp"), prefix="dfs_crawler-") # try: with TempDir() as temp_dir: levels = LevelsCreator(temp_dir.get_path()).create() address = "file:"+\ Resources.path(__file__, "data/original_site/issues_1.html", convert_to_url=True) tree = TreeAccessor(_StandardNodeExtended()) navigator = HTMLMultipageNavigator(address, levels) navigator_wrapper = _NavigatorTreeWrapperExtended(navigator, tree) crawler = CrawlerThread(navigator_wrapper, tree) crawler.run() expected_dir = Resources.path(__file__, "data/expected_download") actual_dir = temp_dir.get_path() self.assert_(are_dir_trees_equal(expected_dir, actual_dir, ignore=[".gitignore"])) self.__check_tree_final_state(tree.get_root()) self.__check_if_each_node_is_processed_once( tree.get_root(), {"/root/2011-07-16/06": 0})
def test_single_threaded_download_without_manager(self): # temp_dir = TempDir(os.path.expanduser("~/tmp"), prefix="dfs_crawler-") # try: with TempDir() as temp_dir: levels = LevelsCreator(temp_dir.get_path()).create() address = "file:"+\ Resources.path(__file__, "data/original_site/issues_1.html", convert_to_url=True) tree = TreeAccessor(_StandardNodeExtended()) navigator = HTMLMultipageNavigator(address, levels) navigator_wrapper = _NavigatorTreeWrapperExtended(navigator, tree) crawler = CrawlerThread(navigator_wrapper, tree) crawler.run() expected_dir = Resources.path(__file__, "data/expected_download") actual_dir = temp_dir.get_path() self.assert_( are_dir_trees_equal(expected_dir, actual_dir, ignore=[".gitignore"])) self.__check_tree_final_state(tree.get_root()) self.__check_if_each_node_is_processed_once( tree.get_root(), {"/root/2011-07-16/06": 0})
def set_node_type(self, node, is_leaf): self.__lock.reader_acquire() try: TreeAccessor.set_node_type(self, node, is_leaf) finally: self.__lock.reader_release()
def set_error(self, node): self.__lock.reader_acquire() try: TreeAccessor.set_error(self, node) finally: self.__lock.reader_release()
def __init__(self, sentinel): TreeAccessor.__init__(self, sentinel) self.__lock = RWLock()