def start_capture(self, pcap_path=None, pcap_filter="", dumpcap_log=None): """Start capture. Configure sniffer if arguments are given.""" if pcap_filter: self.set_capture_filter(pcap_filter) if pcap_path: self.set_pcap_path(pcap_path) prefix = "" command = '{}dumpcap -P -a duration:{} -a filesize:{} -i {} -s 0 -f \'{}\' -w {}'\ .format(prefix, cm.HARD_VISIT_TIMEOUT, cm.MAX_DUMP_SIZE, self.device, self.pcap_filter, self.pcap_file) wl_log.info(command) if dumpcap_log: log_fi = open(dumpcap_log, "w+") self.p0 = subprocess.Popen(command, stdout=log_fi, stderr=log_fi, shell=True) else: self.p0 = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) timeout = DUMPCAP_START_TIMEOUT # in seconds while timeout > 0 and not self.is_dumpcap_running(): time.sleep(0.1) timeout -= 0.1 if timeout < 0: raise DumpcapTimeoutError() else: wl_log.debug("dumpcap started in %s seconds" % (DUMPCAP_START_TIMEOUT - timeout)) self.is_recording = True
def post_visit(self): guard_ips = set([ip for ip in self.controller.get_all_guard_ips()]) wl_log.debug("Found %s guards in the consensus.", len(guard_ips)) wl_log.info("Filtering packets without a guard IP.") try: ut.filter_pcap(self.job.pcap_file, guard_ips) except Exception as e: wl_log.error("ERROR: filtering pcap file: %s.", e) wl_log.error("Check pcap: %s", self.job.pcap_file)
def post_crawl(crawl_dir, mode, t): """Operations after the crawl.""" # TODO: pack crawl # TODO: sanity checks arg_mode = " -m " if mode else " " arg_t = " -t " if t else " " arg_c = " -c " if not mode else " " #check screenshot when crawl clean dataset cmd = "python3 "+ PARSERPYDIR + " --dir " + crawl_dir + arg_mode + arg_t + arg_c wl_log.info(cmd) subprocess.call(cmd,shell=True)
def post_visit(self): guard_ips = set([ip for ip in self.controller.get_all_guard_ips()]) #hard-coded bridge ips, used when we set up our own bridges guard_ips.update(cm.My_Bridge_Ips) wl_log.debug("Found %s guards in the consensus.", len(guard_ips)) wl_log.info("Filtering packets without a guard IP.") try: ut.filter_pcap(self.job.pcap_file, guard_ips) except Exception as e: wl_log.error("ERROR: filtering pcap file: %s.", e) wl_log.error("Check pcap: %s", self.job.pcap_file)
def stop_capture(self): """Kill the dumpcap process.""" ut.kill_all_children(self.p0.pid) # self.p0.pid is the shell pid self.p0.kill() self.is_recording = False if os.path.isfile(self.pcap_file): wl_log.info('Capture killed. Traffic size: %s Bytes %s' % (os.path.getsize(self.pcap_file), self.pcap_file)) else: wl_log.warning('Capture killed but cannot find capture file: %s' % self.pcap_file) wl_log.warning('Check %s for error information!' % self.log)
def _do_instance(self): for self.job.visit in range(self.job.visits): ut.create_dir(self.job.path) wl_log.info("*** Visit #%s to %s ***", self.job.visit, self.job.url) #self.job.screen_num = 0 with self.driver.launch(): try: self.driver.set_page_load_timeout(cm.SOFT_VISIT_TIMEOUT) except WebDriverException as seto_exc: wl_log.error("Setting soft timeout %s", seto_exc) self._do_visit() sleep(float(self.job.config['pause_between_loads'])) self.post_visit()
def crawl(self, job): """Crawls a set of urls in batches.""" self.job = job wl_log.info("Starting new crawl") wl_log.info(pformat(self.job)) for self.job.batch in xrange(self.job.batches): wl_log.info("**** Starting batch %s ***" % self.job.batch) self._do_batch() sleep(float(self.job.config['pause_between_batches']))
def post_visit(self): if ('usebridges' in self.controller.torrc_dict) and ( 'bridge' in self.controller.torrc_dict): #Bridge=obfs4 52.175.31.228:35000 cert=Dsk4OM5YMIQsLuqZfOVxaCbsgOxgkX1E3xgPQctB2I1Fy9cs+UvJf4CAwfo0DCGOJrOsIQ iat-mode=0 guard_ips = set([ self.controller.torrc_dict['bridge'].split(" ")[1].split(":") [0] ]) wl_log.info("Found bridge {}".format(guard_ips)) else: guard_ips = set([ip for ip in self.controller.get_all_guard_ips()]) wl_log.info("Found %s guards in the consensus.", len(guard_ips)) wl_log.info("Filtering packets without a guard IP.") try: ut.filter_pcap(self.job.pcap_file, guard_ips) except Exception as e: wl_log.error("ERROR: filtering pcap file: %s.", e) wl_log.error("Check pcap: %s", self.job.pcap_file)
def _do_visit(self): with Sniffer(path=self.job.pcap_file, filter=cm.DEFAULT_FILTER, device=self.device, dumpcap_log=self.job.pcap_log): sleep(1) # make sure dumpcap is running try: screenshot_count = 0 with ut.timeout(cm.HARD_VISIT_TIMEOUT): # begin loading page self.driver.get(self.job.url) # take first screenshot if self.screenshots: try: self.driver.get_screenshot_as_file( self.job.png_file(screenshot_count)) screenshot_count += 1 except WebDriverException: wl_log.error("Cannot get screenshot.") # check video player status status_to_string = [ 'ended', 'played', 'paused', 'buffered', 'queued', 'unstarted' ] js = "return document.getElementById('movie_player').getPlayerState()" player_status = self.driver.execute_script(js) # continue visit capture until video is has fully played ts = time() while player_status != 0: # attempt to simulate user skipping add if player_status == -1: try: skipAds = self.driver.find_elements( By.XPATH, "//button[@class=\"ytp-ad-skip-button ytp-button\"]" ) wl_log.info(len(skipAds)) for skipAd in skipAds: skipAd.click() except WebDriverException as e: pass # unpause video if state is unstarted or is for some reason paused if player_status == -1 or player_status == 2: self.driver.execute_script( "return document.getElementById('movie_player').playVideo()" ) # busy loop delay sleep(1) # check video state again new_ps = self.driver.execute_script(js) # print progress updates every time the video state changes # or on the screenshot interval ts_new = time() if player_status != new_ps or ts_new - ts > cm.SCREENSHOT_INTERVAL: wl_log.debug( 'youtube status: {} for {:.2f} seconds'.format( status_to_string[player_status], ts_new - ts)) ts = ts_new # take periodic screenshots if self.screenshots: try: self.driver.get_screenshot_as_file( self.job.png_file(screenshot_count)) screenshot_count += 1 except WebDriverException: wl_log.error("Cannot get screenshot.") player_status = new_ps except (cm.HardTimeoutException, TimeoutException): wl_log.error("Visit to %s reached hard timeout!", self.job.url) except Exception as exc: wl_log.error("Unknown exception: %s", exc)