def build_crawl_dirs():
    # build crawl directory
    ut.create_dir(cm.RESULTS_DIR)
    ut.create_dir(cm.CRAWL_DIR)
    ut.create_dir(cm.LOGS_DIR)
    copyfile(cm.CONFIG_FILE, join(cm.LOGS_DIR, 'config.ini'))
    add_symlink(join(cm.RESULTS_DIR, 'latest_crawl'), cm.CRAWL_DIR)
Example #2
0
def build_crawl_dirs():
    # build crawl directory
    ut.create_dir(cm.RESULTS_DIR)
    ut.create_dir(cm.CRAWL_DIR)
    ut.create_dir(cm.LOGS_DIR)
    copyfile(cm.CONFIG_FILE, join(cm.LOGS_DIR, 'config.ini'))
    add_symlink(join(cm.RESULTS_DIR, 'latest_crawl'), basename(cm.CRAWL_DIR))
Example #3
0
 def init_crawl_dirs(self):
     """ Create results and logs directions for this crawl. """
     self.crawl_dir, self.crawl_logs_dir = self.create_crawl_dir()
     sym_link = os.path.join(cm.RESULTS_DIR, 'latest')
     add_symlink(sym_link, self.crawl_dir)  # add a symbolic link
     # Create crawl log
     self.log_file = os.path.join(self.crawl_logs_dir, 'crawl.log')
 def __init__(self,
              torrc_dict,
              url_list,
              tbb_version,
              experiment=cm.EXP_TYPE_WANG_AND_GOLDBERG,
              xvfb=False,
              capture_screen=True):
     # Create instance of Tor controller and sniffer used for the crawler
     self.crawl_dir = None
     self.crawl_logs_dir = None
     self.visit = None
     self.urls = url_list  # keep list of urls we'll visit
     self.init_crawl_dirs()  # initializes crawl_dir
     self.tor_log = os.path.join(self.crawl_logs_dir, "tor.log")
     linkname = os.path.join(cm.RESULTS_DIR, 'latest_tor_log')
     add_symlink(linkname, self.tor_log)
     self.tbb_version = tbb_version
     self.experiment = experiment
     self.tor_controller = TorController(torrc_dict, tbb_version,
                                         self.tor_log)
     self.tor_process = None
     self.tb_driver = None
     self.capture_screen = capture_screen
     self.xvfb = xvfb
     add_log_file_handler(wl_log, self.log_file)
     linkname = os.path.join(cm.RESULTS_DIR, 'latest_crawl_log')
     add_symlink(linkname, self.log_file)  # add a symbolic link
Example #5
0
 def __init__(self, url_list, torrc_dict,
              experiment=cm.EXP_TYPE_WANG_AND_GOLDBERG, xvfb=False,
              capture_screen=True, output=cm.RESULTS_DIR):
     # Create instance of Tor controller and sniffer used for the crawler
     self.crawl_dir = None
     self.crawl_logs_dir = None
     self.visit = None
     self.output = abspath(output)
     self.urls = url_list  # keep list of urls we'll visit
     self.init_crawl_dirs()  # initializes crawl_dir
     self.tor_log = os.path.join(self.crawl_logs_dir, "tor.log")
     linkname = os.path.join(self.output, 'latest_tor_log')
     add_symlink(linkname, self.tor_log)
     self.tbb_version = cm.RECOMMENDED_TBB_VERSION
     self.experiment = experiment
     self.tor_controller = TorController(tbb_path=cm.TBB_PATH,
                                         torrc_dict=torrc_dict,
                                         tor_log=self.tor_log)
     self.tor_process = None
     self.tb_driver = None
     self.capture_screen = capture_screen
     self.xvfb = xvfb
     add_log_file_handler(wl_log, self.log_file)
     linkname = os.path.join(self.output, 'latest_crawl_log')
     add_symlink(linkname, self.log_file)  # add a symbolic link
Example #6
0
 def init_crawl_dirs(self):
     """Creates results and logs directories for this crawl."""
     self.crawl_dir, self.crawl_logs_dir = self.create_crawl_dir()
     sym_link = os.path.join(self.output, 'latest')
     add_symlink(sym_link, self.crawl_dir)  # add a symbolic link
     # Create crawl log
     self.log_file = os.path.join(self.crawl_logs_dir, "crawl.log")
Example #7
0
 def __init__(self, torrc_dit, url_list, tbb_version, xvfb=False, capture_screen=True):
     self.crawl_dir = None
     self.crawl_logs_dir = None
     self.visit = None
     self.urls = url_list
     self.init_crawl_dirs()
     self.tor_log = os.path.join(self.crawl_logs_dir, 'tor.log')
     linkname = os.path.join(cm.RESULTS_DIR, 'latest_tor_log')
     add_symlink(linkname, self.tor_log)
     self.tbb_version = tbb_version
     self.tor_controller = TorController(
         torrc_dit, tbb_version, self.tor_log)
     self.tor_process = None
     self.tb_driver = None
     self.capture_screen = capture_screen
     self.xvfb = xvfb
     add_log_file_handler(wl_log, self.log_file)
     linkname = os.path.join(cm.RESULTS_DIR, 'latest_crawl_log')
     add_symlink(linkname, self.log_file)