def build_crawl_dirs(): # build crawl directory ut.create_dir(cm.RESULTS_DIR) ut.create_dir(cm.CRAWL_DIR) ut.create_dir(cm.LOGS_DIR) copyfile(cm.CONFIG_FILE, join(cm.LOGS_DIR, 'config.ini')) add_symlink(join(cm.RESULTS_DIR, 'latest_crawl'), cm.CRAWL_DIR)
def build_crawl_dirs(): # build crawl directory ut.create_dir(cm.RESULTS_DIR) ut.create_dir(cm.CRAWL_DIR) ut.create_dir(cm.LOGS_DIR) copyfile(cm.CONFIG_FILE, join(cm.LOGS_DIR, 'config.ini')) add_symlink(join(cm.RESULTS_DIR, 'latest_crawl'), basename(cm.CRAWL_DIR))
def init_crawl_dirs(self): """ Create results and logs directions for this crawl. """ self.crawl_dir, self.crawl_logs_dir = self.create_crawl_dir() sym_link = os.path.join(cm.RESULTS_DIR, 'latest') add_symlink(sym_link, self.crawl_dir) # add a symbolic link # Create crawl log self.log_file = os.path.join(self.crawl_logs_dir, 'crawl.log')
def __init__(self, torrc_dict, url_list, tbb_version, experiment=cm.EXP_TYPE_WANG_AND_GOLDBERG, xvfb=False, capture_screen=True): # Create instance of Tor controller and sniffer used for the crawler self.crawl_dir = None self.crawl_logs_dir = None self.visit = None self.urls = url_list # keep list of urls we'll visit self.init_crawl_dirs() # initializes crawl_dir self.tor_log = os.path.join(self.crawl_logs_dir, "tor.log") linkname = os.path.join(cm.RESULTS_DIR, 'latest_tor_log') add_symlink(linkname, self.tor_log) self.tbb_version = tbb_version self.experiment = experiment self.tor_controller = TorController(torrc_dict, tbb_version, self.tor_log) self.tor_process = None self.tb_driver = None self.capture_screen = capture_screen self.xvfb = xvfb add_log_file_handler(wl_log, self.log_file) linkname = os.path.join(cm.RESULTS_DIR, 'latest_crawl_log') add_symlink(linkname, self.log_file) # add a symbolic link
def __init__(self, url_list, torrc_dict, experiment=cm.EXP_TYPE_WANG_AND_GOLDBERG, xvfb=False, capture_screen=True, output=cm.RESULTS_DIR): # Create instance of Tor controller and sniffer used for the crawler self.crawl_dir = None self.crawl_logs_dir = None self.visit = None self.output = abspath(output) self.urls = url_list # keep list of urls we'll visit self.init_crawl_dirs() # initializes crawl_dir self.tor_log = os.path.join(self.crawl_logs_dir, "tor.log") linkname = os.path.join(self.output, 'latest_tor_log') add_symlink(linkname, self.tor_log) self.tbb_version = cm.RECOMMENDED_TBB_VERSION self.experiment = experiment self.tor_controller = TorController(tbb_path=cm.TBB_PATH, torrc_dict=torrc_dict, tor_log=self.tor_log) self.tor_process = None self.tb_driver = None self.capture_screen = capture_screen self.xvfb = xvfb add_log_file_handler(wl_log, self.log_file) linkname = os.path.join(self.output, 'latest_crawl_log') add_symlink(linkname, self.log_file) # add a symbolic link
def init_crawl_dirs(self): """Creates results and logs directories for this crawl.""" self.crawl_dir, self.crawl_logs_dir = self.create_crawl_dir() sym_link = os.path.join(self.output, 'latest') add_symlink(sym_link, self.crawl_dir) # add a symbolic link # Create crawl log self.log_file = os.path.join(self.crawl_logs_dir, "crawl.log")
def __init__(self, torrc_dit, url_list, tbb_version, xvfb=False, capture_screen=True): self.crawl_dir = None self.crawl_logs_dir = None self.visit = None self.urls = url_list self.init_crawl_dirs() self.tor_log = os.path.join(self.crawl_logs_dir, 'tor.log') linkname = os.path.join(cm.RESULTS_DIR, 'latest_tor_log') add_symlink(linkname, self.tor_log) self.tbb_version = tbb_version self.tor_controller = TorController( torrc_dit, tbb_version, self.tor_log) self.tor_process = None self.tb_driver = None self.capture_screen = capture_screen self.xvfb = xvfb add_log_file_handler(wl_log, self.log_file) linkname = os.path.join(cm.RESULTS_DIR, 'latest_crawl_log') add_symlink(linkname, self.log_file)