def filter_pcap(pcap_path, iplist):
    """
    Filter capture by TCP packets addressed to any address in ``iplist``
    """
    ack_num = 0
    pkt_num = 0
    pcap_filtered = []
    orig_pcap = pcap_path + ".original"
    copyfile(pcap_path, orig_pcap)
    with PcapReader(orig_pcap) as preader:
        for p in preader:
            pkt_num += 1
            if 'TCP' in p:
                ip = p.payload
                if len(ip.payload.payload) == 0:
                    #ACK
                    ack_num += 1
                    continue
                if ip.dst in iplist or ip.src in iplist:
                    pcap_filtered.append(p)
    wrpcap(pcap_path, pcap_filtered)
    wl_log.debug("Filter out %d/%d ACK packets." % (ack_num, pkt_num))
    subprocess.call("rm " + orig_pcap, shell=True)
    subprocess.call("chmod 777 " + pcap_path, shell=True)
    wl_log.debug("Delete raw pcap and change priviledge of pcap file.")
    def start_capture(self, pcap_path=None, pcap_filter="", dumpcap_log=None):
        """Start capture. Configure sniffer if arguments are given."""
        if pcap_filter:
            self.set_capture_filter(pcap_filter)
        if pcap_path:
            self.set_pcap_path(pcap_path)
        prefix = ""
        command = '{}dumpcap -P -a duration:{} -a filesize:{} -i {} -s 0 -f \'{}\' -w {}'\
            .format(prefix, cm.HARD_VISIT_TIMEOUT, cm.MAX_DUMP_SIZE, self.device,
                    self.pcap_filter, self.pcap_file)
        wl_log.info(command)
        if dumpcap_log:
            log_fi = open(dumpcap_log, "w+")
            self.p0 = subprocess.Popen(command,
                                       stdout=log_fi,
                                       stderr=log_fi,
                                       shell=True)
        else:
            self.p0 = subprocess.Popen(command,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE,
                                       shell=True)
        timeout = DUMPCAP_START_TIMEOUT  # in seconds
        while timeout > 0 and not self.is_dumpcap_running():
            time.sleep(0.1)
            timeout -= 0.1
        if timeout < 0:
            raise DumpcapTimeoutError()
        else:
            wl_log.debug("dumpcap started in %s seconds" %
                         (DUMPCAP_START_TIMEOUT - timeout))

        self.is_recording = True
 def post_visit(self):
     guard_ips = set([ip for ip in self.controller.get_all_guard_ips()])
     wl_log.debug("Found %s guards in the consensus.", len(guard_ips))
     wl_log.info("Filtering packets without a guard IP.")
     try:
         ut.filter_pcap(self.job.pcap_file, guard_ips)
     except Exception as e:
         wl_log.error("ERROR: filtering pcap file: %s.", e)
         wl_log.error("Check pcap: %s", self.job.pcap_file)
 def post_visit(self):
     guard_ips = set([ip for ip in self.controller.get_all_guard_ips()])
     #hard-coded bridge ips, used when we set up our own bridges
     guard_ips.update(cm.My_Bridge_Ips)
     wl_log.debug("Found %s guards in the consensus.", len(guard_ips))
     wl_log.info("Filtering packets without a guard IP.")
     try:
         ut.filter_pcap(self.job.pcap_file, guard_ips)
     except Exception as e:
         wl_log.error("ERROR: filtering pcap file: %s.", e)
         wl_log.error("Check pcap: %s", self.job.pcap_file)
    def _do_visit(self):
        with Sniffer(path=self.job.pcap_file,
                     filter=cm.DEFAULT_FILTER,
                     device=self.device,
                     dumpcap_log=self.job.pcap_log):
            sleep(1)  # make sure dumpcap is running
            try:
                screenshot_count = 0
                with ut.timeout(cm.HARD_VISIT_TIMEOUT):
                    # begin loading page
                    self.driver.get(self.job.url)

                    # take first screenshot
                    if self.screenshots:
                        try:
                            self.driver.get_screenshot_as_file(
                                self.job.png_file(screenshot_count))
                            screenshot_count += 1
                        except WebDriverException:
                            wl_log.error("Cannot get screenshot.")

                    # check video player status
                    status_to_string = [
                        'ended', 'played', 'paused', 'buffered', 'queued',
                        'unstarted'
                    ]
                    js = "return document.getElementById('movie_player').getPlayerState()"
                    player_status = self.driver.execute_script(js)

                    # continue visit capture until video is has fully played
                    ts = time()
                    while player_status != 0:

                        # attempt to simulate user skipping add
                        if player_status == -1:
                            try:
                                skipAds = self.driver.find_elements(
                                    By.XPATH,
                                    "//button[@class=\"ytp-ad-skip-button ytp-button\"]"
                                )
                                wl_log.info(len(skipAds))
                                for skipAd in skipAds:
                                    skipAd.click()
                            except WebDriverException as e:
                                pass

                        # unpause video if state is unstarted or is for some reason paused
                        if player_status == -1 or player_status == 2:
                            self.driver.execute_script(
                                "return document.getElementById('movie_player').playVideo()"
                            )

                        # busy loop delay
                        sleep(1)

                        # check video state again
                        new_ps = self.driver.execute_script(js)

                        # print progress updates every time the video state changes
                        # or on the screenshot interval
                        ts_new = time()
                        if player_status != new_ps or ts_new - ts > cm.SCREENSHOT_INTERVAL:
                            wl_log.debug(
                                'youtube status: {} for {:.2f} seconds'.format(
                                    status_to_string[player_status],
                                    ts_new - ts))
                            ts = ts_new
                            # take periodic screenshots
                            if self.screenshots:
                                try:
                                    self.driver.get_screenshot_as_file(
                                        self.job.png_file(screenshot_count))
                                    screenshot_count += 1
                                except WebDriverException:
                                    wl_log.error("Cannot get screenshot.")
                            player_status = new_ps

            except (cm.HardTimeoutException, TimeoutException):
                wl_log.error("Visit to %s reached hard timeout!", self.job.url)
            except Exception as exc:
                wl_log.error("Unknown exception: %s", exc)
def parse_arguments():
    # Read configuration file
    config = ConfigParser.RawConfigParser()
    config.read(cm.CONFIG_FILE)

    # Parse arguments
    parser = argparse.ArgumentParser(description='Crawl a list of youtube URLs in multiple batches.')

    # List of urls to be crawled
    parser.add_argument('-u', '--url-file', required=True,
                        help='Path to the file that contains the list of video URLs to crawl.',
                        default=cm.VIDEO_LIST)
    parser.add_argument('-o', '--output',
                        help='Directory to dump the results (default=./results).',
                        default=cm.CRAWL_DIR)
    parser.add_argument('-c', '--config',
                        help="Crawler tor driver and controller configurations.",
                        choices=config.sections(),
                        default="default")
    parser.add_argument('-b', '--tbb-path',
                        help="Path to the Tor Browser Bundle directory.",
                        default=cm.TBB_DIR)
    parser.add_argument('-v', '--verbose', action='store_true',
                        help='increase output verbosity',
                        default=False)
    parser.add_argument('-d', '--device', type=str, default='eth0',
                        help='Device interface on which to capture traffic.')
    parser.add_argument('--timeout', type=int, default=10,
                        help='Hard timeout (minutes) before video capture is interrupted.')

    # Crawler features
    parser.add_argument('-x', '--virtual-display',
                        help='Dimensions of the virtual display, eg 1200x800',
                        default=None)
    parser.add_argument('-s', '--screenshots', action='store_true',
                        help='Capture page screenshots',
                        default=False)

    # Limit crawl
    parser.add_argument('--start', type=int,
                        help='Select URLs starting with this line number: (default: 1).',
                        default=1)
    parser.add_argument('--stop', type=int,
                        help='Select URLs after this line number: (default: EOF).',
                        default=maxsize)

    # Parse arguments
    args = parser.parse_args()

    # Set verbose level
    wl_log.setLevel(DEBUG if args.verbose else INFO)
    del args.verbose

    # Change results dir if output
    cm.CRAWL_DIR = args.output
    del args.output

    # Change video load timeout
    cm.HARD_VISIT_TIMEOUT = args.timeout*60
    del args.timeout

    wl_log.debug("Command line parameters: %s" % argv)
    return args, config