def parse_url_list(file_path, start, stop): """Return list of urls from a file.""" try: with open(file_path) as f: # read file contents and split into elements file_contents = f.read() url_list = file_contents.splitlines() url_list = [url for url in url_list if url and not url.startswith('#')] url_list = url_list[start - 1:stop] # process urls and tokens into valid youtube video strings processed_list = [] for url in url_list: parsed = urlparse(url) # if no hostname, assume string is the video ID if not parsed.hostname: if not youtube_id_validation(url): raise ValueError('Token is not a valid youtube video ID: {}'.format(url)) url = 'https://www.youtube.com/watch?v=' + url else: token = youtube_url_validation(url) if not token: raise ValueError('Not a valid youtube URL: {}'.format(url)) url = 'https://www.youtube.com/watch?v=' + token # stop autoplay and related videos # youtube may ignore these parameters url += '&autoplay=0&rel=0' processed_list.append(url) except Exception as e: wl_log.error("while parsing URL list: {} \n{}".format(e, traceback.format_exc())) sys.exit(-1) return processed_list
def post_visit(self): guard_ips = set([ip for ip in self.controller.get_all_guard_ips()]) wl_log.debug("Found %s guards in the consensus.", len(guard_ips)) wl_log.info("Filtering packets without a guard IP.") try: ut.filter_pcap(self.job.pcap_file, guard_ips) except Exception as e: wl_log.error("ERROR: filtering pcap file: %s.", e) wl_log.error("Check pcap: %s", self.job.pcap_file)
def post_visit(self): guard_ips = set([ip for ip in self.controller.get_all_guard_ips()]) #hard-coded bridge ips, used when we set up our own bridges guard_ips.update(cm.My_Bridge_Ips) wl_log.debug("Found %s guards in the consensus.", len(guard_ips)) wl_log.info("Filtering packets without a guard IP.") try: ut.filter_pcap(self.job.pcap_file, guard_ips) except Exception as e: wl_log.error("ERROR: filtering pcap file: %s.", e) wl_log.error("Check pcap: %s", self.job.pcap_file)
def _do_instance(self): for self.job.visit in range(self.job.visits): ut.create_dir(self.job.path) wl_log.info("*** Visit #%s to %s ***", self.job.visit, self.job.url) #self.job.screen_num = 0 with self.driver.launch(): try: self.driver.set_page_load_timeout(cm.SOFT_VISIT_TIMEOUT) except WebDriverException as seto_exc: wl_log.error("Setting soft timeout %s", seto_exc) self._do_visit() sleep(float(self.job.config['pause_between_loads'])) self.post_visit()
def post_visit(self): if ('usebridges' in self.controller.torrc_dict) and ( 'bridge' in self.controller.torrc_dict): #Bridge=obfs4 52.175.31.228:35000 cert=Dsk4OM5YMIQsLuqZfOVxaCbsgOxgkX1E3xgPQctB2I1Fy9cs+UvJf4CAwfo0DCGOJrOsIQ iat-mode=0 guard_ips = set([ self.controller.torrc_dict['bridge'].split(" ")[1].split(":") [0] ]) wl_log.info("Found bridge {}".format(guard_ips)) else: guard_ips = set([ip for ip in self.controller.get_all_guard_ips()]) wl_log.info("Found %s guards in the consensus.", len(guard_ips)) wl_log.info("Filtering packets without a guard IP.") try: ut.filter_pcap(self.job.pcap_file, guard_ips) except Exception as e: wl_log.error("ERROR: filtering pcap file: %s.", e) wl_log.error("Check pcap: %s", self.job.pcap_file)
def parse_url_list(file_path, start, stop): """Return list of urls from a file.""" try: with open(file_path) as f: # read file contents and split into elements file_contents = f.read() url_list = file_contents.splitlines() url_list = [url for url in url_list if url and not url.startswith('#')] url_list = url_list[start - 1:stop] processed_list = [] for url in url_list: parsed = urlparse(url) if not parsed.hostname: raise ValueError('URL {} has invalid hostname!'.format(url)) processed_list.append(url) except Exception as e: wl_log.error("while parsing URL list: {} \n{}".format(e, traceback.format_exc())) sys.exit(-1) return processed_list
def _do_visit(self): with Sniffer(path=self.job.pcap_file, filter=cm.DEFAULT_FILTER, device=self.device, dumpcap_log=self.job.pcap_log): sleep(1) # make sure dumpcap is running try: screenshot_count = 0 with ut.timeout(cm.HARD_VISIT_TIMEOUT): # begin loading page self.driver.get(self.job.url) sleep(1) # sleep to catch some lingering AJAX-type traffic # take first screenshot if self.screenshots: try: self.driver.get_screenshot_as_file( self.job.png_file(screenshot_count)) screenshot_count += 1 except WebDriverException: wl_log.error("Cannot get screenshot.") except (cm.HardTimeoutException, TimeoutException): wl_log.error("Visit to %s reached hard timeout!", self.job.url) except Exception as exc: wl_log.error("Unknown exception: %s", exc)
def _do_visit(self): with Sniffer(path=self.job.pcap_file, filter=cm.DEFAULT_FILTER, device=self.device, dumpcap_log=self.job.pcap_log): sleep(1) # make sure dumpcap is running try: screenshot_count = 0 with ut.timeout(cm.HARD_VISIT_TIMEOUT): # begin loading page self.driver.get(self.job.url) # take first screenshot if self.screenshots: try: self.driver.get_screenshot_as_file( self.job.png_file(screenshot_count)) screenshot_count += 1 except WebDriverException: wl_log.error("Cannot get screenshot.") # check video player status status_to_string = [ 'ended', 'played', 'paused', 'buffered', 'queued', 'unstarted' ] js = "return document.getElementById('movie_player').getPlayerState()" player_status = self.driver.execute_script(js) # continue visit capture until video is has fully played ts = time() while player_status != 0: # attempt to simulate user skipping add if player_status == -1: try: skipAds = self.driver.find_elements( By.XPATH, "//button[@class=\"ytp-ad-skip-button ytp-button\"]" ) wl_log.info(len(skipAds)) for skipAd in skipAds: skipAd.click() except WebDriverException as e: pass # unpause video if state is unstarted or is for some reason paused if player_status == -1 or player_status == 2: self.driver.execute_script( "return document.getElementById('movie_player').playVideo()" ) # busy loop delay sleep(1) # check video state again new_ps = self.driver.execute_script(js) # print progress updates every time the video state changes # or on the screenshot interval ts_new = time() if player_status != new_ps or ts_new - ts > cm.SCREENSHOT_INTERVAL: wl_log.debug( 'youtube status: {} for {:.2f} seconds'.format( status_to_string[player_status], ts_new - ts)) ts = ts_new # take periodic screenshots if self.screenshots: try: self.driver.get_screenshot_as_file( self.job.png_file(screenshot_count)) screenshot_count += 1 except WebDriverException: wl_log.error("Cannot get screenshot.") player_status = new_ps except (cm.HardTimeoutException, TimeoutException): wl_log.error("Visit to %s reached hard timeout!", self.job.url) except Exception as exc: wl_log.error("Unknown exception: %s", exc)