Python error 예제들, tbcrawler.log.wl_log.error Python 예제들

예제 #1

0

파일 보기

파일: pytbcrawler.py 프로젝트: notem/tor-browser-crawler-video

def parse_url_list(file_path, start, stop):
    """Return list of urls from a file."""
    try:
        with open(file_path) as f:
            # read file contents and split into elements
            file_contents = f.read()
            url_list = file_contents.splitlines()
            url_list = [url for url in url_list if url and not url.startswith('#')]
            url_list = url_list[start - 1:stop]
            # process urls and tokens into valid youtube video strings
            processed_list = []
            for url in url_list:
                parsed = urlparse(url)
                # if no hostname, assume string is the video ID
                if not parsed.hostname:
                    if not youtube_id_validation(url):
                        raise ValueError('Token is not a valid youtube video ID: {}'.format(url))
                    url = 'https://www.youtube.com/watch?v=' + url
                else:
                    token = youtube_url_validation(url)
                    if not token:
                        raise ValueError('Not a valid youtube URL: {}'.format(url))
                    url = 'https://www.youtube.com/watch?v=' + token
                # stop autoplay and related videos
                # youtube may ignore these parameters
                url += '&autoplay=0&rel=0'
                processed_list.append(url)
    except Exception as e:
        wl_log.error("while parsing URL list: {} \n{}".format(e, traceback.format_exc()))
        sys.exit(-1)
    return processed_list

예제 #2

0

파일 보기

파일: crawler.py 프로젝트: notem/tor-browser-crawler-video

 def post_visit(self):
     guard_ips = set([ip for ip in self.controller.get_all_guard_ips()])
     wl_log.debug("Found %s guards in the consensus.", len(guard_ips))
     wl_log.info("Filtering packets without a guard IP.")
     try:
         ut.filter_pcap(self.job.pcap_file, guard_ips)
     except Exception as e:
         wl_log.error("ERROR: filtering pcap file: %s.", e)
         wl_log.error("Check pcap: %s", self.job.pcap_file)

예제 #3

0

파일 보기

파일: crawler.py 프로젝트: websitefingerprinting/tor-browser-crawler

 def post_visit(self):
     guard_ips = set([ip for ip in self.controller.get_all_guard_ips()])
     #hard-coded bridge ips, used when we set up our own bridges
     guard_ips.update(cm.My_Bridge_Ips)
     wl_log.debug("Found %s guards in the consensus.", len(guard_ips))
     wl_log.info("Filtering packets without a guard IP.")
     try:
         ut.filter_pcap(self.job.pcap_file, guard_ips)
     except Exception as e:
         wl_log.error("ERROR: filtering pcap file: %s.", e)
         wl_log.error("Check pcap: %s", self.job.pcap_file)

예제 #4

0

파일 보기

파일: crawler.py 프로젝트: websitefingerprinting/tor-browser-crawler

 def _do_instance(self):
     for self.job.visit in range(self.job.visits):
         ut.create_dir(self.job.path)
         wl_log.info("*** Visit #%s to %s ***", self.job.visit, self.job.url)
         #self.job.screen_num = 0
         with self.driver.launch():
             try:
                 self.driver.set_page_load_timeout(cm.SOFT_VISIT_TIMEOUT)
             except WebDriverException as seto_exc:
                 wl_log.error("Setting soft timeout %s", seto_exc)
             self._do_visit()
         sleep(float(self.job.config['pause_between_loads']))
         self.post_visit()

예제 #5

0

파일 보기

파일: crawler.py 프로젝트: websitefingerprinting/tbb

 def post_visit(self):
     if ('usebridges' in self.controller.torrc_dict) and (
             'bridge' in self.controller.torrc_dict):
         #Bridge=obfs4 52.175.31.228:35000 cert=Dsk4OM5YMIQsLuqZfOVxaCbsgOxgkX1E3xgPQctB2I1Fy9cs+UvJf4CAwfo0DCGOJrOsIQ iat-mode=0
         guard_ips = set([
             self.controller.torrc_dict['bridge'].split(" ")[1].split(":")
             [0]
         ])
         wl_log.info("Found bridge {}".format(guard_ips))
     else:
         guard_ips = set([ip for ip in self.controller.get_all_guard_ips()])
         wl_log.info("Found %s guards in the consensus.", len(guard_ips))
     wl_log.info("Filtering packets without a guard IP.")
     try:
         ut.filter_pcap(self.job.pcap_file, guard_ips)
     except Exception as e:
         wl_log.error("ERROR: filtering pcap file: %s.", e)
         wl_log.error("Check pcap: %s", self.job.pcap_file)

예제 #6

0

파일 보기

def parse_url_list(file_path, start, stop):
    """Return list of urls from a file."""
    try:
        with open(file_path) as f:
            # read file contents and split into elements
            file_contents = f.read()
            url_list = file_contents.splitlines()
            url_list = [url for url in url_list if url and not url.startswith('#')]
            url_list = url_list[start - 1:stop]
            processed_list = []
            for url in url_list:
                parsed = urlparse(url)
                if not parsed.hostname:
                    raise ValueError('URL {} has invalid hostname!'.format(url))
                processed_list.append(url)
    except Exception as e:
        wl_log.error("while parsing URL list: {} \n{}".format(e, traceback.format_exc()))
        sys.exit(-1)
    return processed_list

예제 #7

0

파일 보기

    def _do_visit(self):
        with Sniffer(path=self.job.pcap_file,
                     filter=cm.DEFAULT_FILTER,
                     device=self.device,
                     dumpcap_log=self.job.pcap_log):
            sleep(1)  # make sure dumpcap is running
            try:
                screenshot_count = 0
                with ut.timeout(cm.HARD_VISIT_TIMEOUT):
                    # begin loading page
                    self.driver.get(self.job.url)
                    sleep(1)  # sleep to catch some lingering AJAX-type traffic

                    # take first screenshot
                    if self.screenshots:
                        try:
                            self.driver.get_screenshot_as_file(
                                self.job.png_file(screenshot_count))
                            screenshot_count += 1
                        except WebDriverException:
                            wl_log.error("Cannot get screenshot.")

            except (cm.HardTimeoutException, TimeoutException):
                wl_log.error("Visit to %s reached hard timeout!", self.job.url)
            except Exception as exc:
                wl_log.error("Unknown exception: %s", exc)

예제 #8

0

파일 보기

파일: crawler.py 프로젝트: notem/tor-browser-crawler-video

    def _do_visit(self):
        with Sniffer(path=self.job.pcap_file,
                     filter=cm.DEFAULT_FILTER,
                     device=self.device,
                     dumpcap_log=self.job.pcap_log):
            sleep(1)  # make sure dumpcap is running
            try:
                screenshot_count = 0
                with ut.timeout(cm.HARD_VISIT_TIMEOUT):
                    # begin loading page
                    self.driver.get(self.job.url)

                    # take first screenshot
                    if self.screenshots:
                        try:
                            self.driver.get_screenshot_as_file(
                                self.job.png_file(screenshot_count))
                            screenshot_count += 1
                        except WebDriverException:
                            wl_log.error("Cannot get screenshot.")

                    # check video player status
                    status_to_string = [
                        'ended', 'played', 'paused', 'buffered', 'queued',
                        'unstarted'
                    ]
                    js = "return document.getElementById('movie_player').getPlayerState()"
                    player_status = self.driver.execute_script(js)

                    # continue visit capture until video is has fully played
                    ts = time()
                    while player_status != 0:

                        # attempt to simulate user skipping add
                        if player_status == -1:
                            try:
                                skipAds = self.driver.find_elements(
                                    By.XPATH,
                                    "//button[@class=\"ytp-ad-skip-button ytp-button\"]"
                                )
                                wl_log.info(len(skipAds))
                                for skipAd in skipAds:
                                    skipAd.click()
                            except WebDriverException as e:
                                pass

                        # unpause video if state is unstarted or is for some reason paused
                        if player_status == -1 or player_status == 2:
                            self.driver.execute_script(
                                "return document.getElementById('movie_player').playVideo()"
                            )

                        # busy loop delay
                        sleep(1)

                        # check video state again
                        new_ps = self.driver.execute_script(js)

                        # print progress updates every time the video state changes
                        # or on the screenshot interval
                        ts_new = time()
                        if player_status != new_ps or ts_new - ts > cm.SCREENSHOT_INTERVAL:
                            wl_log.debug(
                                'youtube status: {} for {:.2f} seconds'.format(
                                    status_to_string[player_status],
                                    ts_new - ts))
                            ts = ts_new
                            # take periodic screenshots
                            if self.screenshots:
                                try:
                                    self.driver.get_screenshot_as_file(
                                        self.job.png_file(screenshot_count))
                                    screenshot_count += 1
                                except WebDriverException:
                                    wl_log.error("Cannot get screenshot.")
                            player_status = new_ps

            except (cm.HardTimeoutException, TimeoutException):
                wl_log.error("Visit to %s reached hard timeout!", self.job.url)
            except Exception as exc:
                wl_log.error("Unknown exception: %s", exc)