예제 #1
0
    def _load_page(self, url, outdir, trial_num=-1):

        # load the specified URL
        logging.info('Loading page: %s', url)
        try:
            # Load the page
            with Timeout(seconds=self._timeout + 5):
                headers = {}
                if self._user_agent:
                    headers['User-Agent'] = self._user_agent
                response = requests.get(url,
                                        timeout=self._timeout,
                                        headers=headers)

            # received response; may not have been successful
            if response.status_code != 200:
                return LoadResult(LoadResultFAILURE_NO_200, url)
            else:
                return LoadResult(LoadResult.SUCCESS,
                                  url,
                                  final_url=response.url,
                                  time=response.elapsed.total_seconds(),
                                  size=len(response.content))

        # problem executing request
        except (TimeoutError, requests.exceptions.Timeout):
            logging.exception('Timeout fetching %s', url)
            return LoadResult(LoadResult.FAILURE_TIMEOUT, url)
        except Exception as e:
            logging.exception('Error loading %s: %s\n%s' %
                              (url, e, traceback.format_exc()))
            return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
예제 #2
0
    def _load_page_selenium(self, url, outdir):
        # load the specified URL (with selenium)
        logging.info('Fetching page %s', url)
        try:
            # load page
            with Timeout(seconds=self._timeout + 5):
                self._selenium_driver.get(url)
                WebDriverWait(self._selenium_driver, self._timeout).until(\
                    lambda d: d.execute_script('return document.readyState') == 'complete')
                logging.debug('Page loaded.')

            # get timing information
            # http://www.w3.org/TR/navigation-timing/#processing-model
            timings = self._selenium_driver.execute_script(TIMINGS_JAVASCRIPT)
            load_time = (timings['loadEventEnd'] -
                         timings['fetchStart']) / 1000.0

            return LoadResult(LoadResult.SUCCESS, url, time=load_time,\
                final_url=self._selenium_driver.current_url)

        except TimeoutError:
            logging.exception('* Timeout fetching %s', url)
            return LoadResult(LoadResult.FAILURE_TIMEOUT, url)
        except TimeoutException:
            logging.exception('Timeout fetching %s', url)
            return LoadResult(LoadResult.FAILURE_TIMEOUT, url)
        except Exception as e:
            logging.exception('Error loading %s: %s' % (url, e))
            return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
예제 #3
0
    def _load_page_native(self, url, outdir):
        # load the specified URL (directly)
        logging.info('Fetching page %s', url)
        try:
            firefox_cmd = '%s %s' % (FIREFOX, url)
            #firefox_cmd =  '%s -profile %s %s' % (FIREFOX, self._profile_path, url)
            logging.debug('Loading: %s', firefox_cmd)
            with Timeout(seconds=self._timeout + 5):
                subprocess.check_output(firefox_cmd.split())

            # TODO: error checking
            # TODO: try to get timing info, final URL, HAR, etc.

            logging.debug('Page loaded.')
            return LoadResult(LoadResult.SUCCESS, url)

        except TimeoutError:
            logging.exception('* Timeout fetching %s', url)
            return LoadResult(LoadResult.FAILURE_TIMEOUT, url)
        except subprocess.CalledProcessError as e:
            logging.exception('Error loading %s: %s\n%s' % (
                url,
                e,
                e.output,
            ))
            return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
        except Exception as e:
            logging.exception('Error loading %s: %s' % (url, e))
            return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
예제 #4
0
    def _load_page(self, url, outdir, trial_num=-1):
        # load the specified URL
        logging.info('Loading page: %s', url)
        try:
            # Load the page
            parsed_url = urlparse.urlparse(url)
            path = '/' if parsed_url.path == '' else parsed_url.path
            if parsed_url.scheme != 'https':
                logging.warn(
                    'Specified protocol was not HTTPS; using HTTPS anyway.')
            get_request = 'GET %s HTTP/1.1\r\nHost: %s\r\n\r\n' %\
                (path, parsed_url.netloc)

            options = ''
            if self._test_false_start: options += ' -cutthrough'
            if self._test_session_resumption: options += ' -reconnect'
            cmd = '%s s_client -connect %s:443 %s' %\
                (OPENSSL_BINARY, parsed_url.netloc, options)

            logging.debug('Running tcploader: %s', cmd)
            with Timeout(seconds=self._timeout + 5):
                p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE,\
                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                (stdout, stderr) = p.communicate(input=get_request)
                #output = subprocess.check_output(cmd, shell=True)

            logging.debug('s_client returned: %s', stdout.strip())
            # TODO: better OpenSSL error checking here
            return LoadResult(
                LoadResult.SUCCESS,
                url,
                tls_false_start_supported=('false_start=yes' in stdout),
                tls_session_resumption_supported=('session_resumption=yes'
                                                  in stdout))

        # problem running tcp_loader
        except TimeoutError:
            logging.exception('* Timeout fetching %s', url)
            return LoadResult(LoadResult.FAILURE_TIMEOUT, url)
        except subprocess.CalledProcessError as e:
            logging.exception('Error loading %s: %s\n%s\n%s' %
                              (url, e, e.output, traceback.format_exc()))
            return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
        except Exception as e:
            logging.exception('Error loading %s: %s\n%s' %
                              (url, e, traceback.format_exc()))
            return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
        finally:
            try:
                subprocess.check_output('killall openssl'.split())
            except Exception as e:
                logging.debug(
                    'Error killing openssl (process might not exist): %s', e)
예제 #5
0
    def _load_page(self, url, outdir, trial_num=None, tag=None):
        # path for new HAR file
        if self._save_har:
            harpath = self._outfile_path(url,
                                         suffix='.har',
                                         trial=trial_num,
                                         tag=tag)
        else:
            harpath = '/dev/null'
        logging.debug('Will save HAR to %s', harpath)

        # build chrome-har-capturer arguments
        capturer_args = ''

        onload_delay = self._delay_after_onload
        if self._delay_first_trial_only and trial_num != 0:
            onload_delay = 0
        capturer_args += ' -d %i' % onload_delay

        if self._disable_network_cache:
            capturer_args += ' --no-network-cache'

        if self._save_content == 'always' or\
           (self._save_content == 'first' and trial_num == 0):
            capturer_args += ' -c'

        # load the specified URL
        logging.info('Fetching page %s (%s)', url, tag)
        try:
            capturer_cmd = '%s -o "%s" %s %s' %\
                (CHROME_HAR_CAPTURER, harpath, capturer_args, url)
            logging.debug('Running capturer: %s', capturer_cmd)
            with Timeout(seconds=self._timeout + 5):
                subprocess.check_call(capturer_cmd, shell=True,\
                    stdout=self._stdout_file, stderr=subprocess.STDOUT)

        except TimeoutError:
            logging.error('Timeout fetching %s', url)
            return LoadResult(LoadResult.FAILURE_TIMEOUT, url)
        except subprocess.CalledProcessError as e:
            logging.exception('Error loading %s: %s\n%s' % (url, e, e.output))
            return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
        except Exception as e:
            logging.exception('Error loading %s: %s' % (url, e))
            return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
        logging.debug('Page loaded.')

        return LoadResult(LoadResult.SUCCESS, url, har=harpath)
예제 #6
0
    def _load_page(self, url, outdir, trial_num=-1):
        # load the specified URL
        logging.info('Loading page: %s', url)
        try:
            # Load the page
            parsed_url = urlparse.urlparse(url)
            path = '/' if parsed_url.path == '' else parsed_url.path
            cmd = '%s %s %s %s' %\
                (TCPLOADER, parsed_url.scheme, parsed_url.netloc, path)
            if self._user_agent:
                cmd += ' "%s"' % self._user_agent

            logging.debug('Running tcploader: %s', cmd)
            with Timeout(seconds=self._timeout + 5):
                output = subprocess.check_output(cmd, shell=True)

            logging.debug('tcploader returned: %s', output.strip())
            returnvals = {field.split('=')[0]: field.split('=')[1]\
                for field in output.strip().split('\n')[-1].split(';')}
            return LoadResult(LoadResult.SUCCESS,
                url,
                time=float(returnvals['time_seconds']),
                size=int(returnvals['size']),
                server=returnvals['server'],
                tcp_fast_open_supported=\
                    bool(int(returnvals['tcp_fast_open_used']))
                )

        # problem running tcp_loader
        except TimeoutError:
            logging.exception('* Timeout fetching %s', url)
            return LoadResult(LoadResult.FAILURE_TIMEOUT, url)
        except subprocess.CalledProcessError as e:
            logging.exception('Error loading %s: %s\n%s\n%s' %
                              (url, e, e.output, traceback.format_exc()))
            return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
        except Exception as e:
            logging.exception('Error loading %s: %s\n%s' %
                              (url, e, traceback.format_exc()))
            return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
        finally:
            try:
                subprocess.check_output('killall tcp_loader'.split())
            except Exception as e:
                logging.debug(
                    'Error killing tcp_loader (process might not exist): %s',
                    e)
예제 #7
0
    def _load_page(self, url, outdir, trial_num=-1):
        # load the specified URL
        logging.info('Loading page: %s', url)
        try:
            # Cause a restart of the proxy
            #if self._proxy:
            #	conn = httplib.HTTPConnection(self._proxy.split(':')[0]+':5678') # Assume restart always listens on this port for now
            #	conn.request("GET", "/")
            #	resp = conn.getresponse() # Don't need to do anything with it. Just want to know that the request was acknowledge

            # Load the page
            Zombie_cmd = [
                ENV, ZombieJS, ZombieLOADER, url, '-t',
                str(self._timeout), '-v'
            ]
            if self._proxy:
                Zombie_cmd.append('-p')
                Zombie_cmd.append(self._proxy)

            logging.debug('Running ZombieJS: %s', Zombie_cmd)
            #with Timeout(seconds=self._timeout+5): The process should always end
            output = subprocess.check_output(Zombie_cmd)

            return LoadResult(LoadResult.SUCCESS, url, raw=output)

        # problem running ZombieJS
        except TimeoutError:
            logging.exception('* Timeout fetching %s', url)
            return LoadResult(LoadResult.FAILURE_TIMEOUT, url)
        except subprocess.CalledProcessError as e:
            logging.exception('Error loading %s: %s\n%s\n%s' %
                              (url, e, e.output, traceback.format_exc()))
            return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
        except Exception as e:
            logging.exception('Error loading %s: %s\n%s' %
                              (url, e, traceback.format_exc()))
            return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
예제 #8
0
    def _load_page(self, url, outdir, trial_num=-1):

        # load the specified URL
        logging.info('Loading page: %s', url)
        try:
            # prepare the curl command
            curl_cmd = CURL
            curl_cmd += ' -s -S'  # don't show progress meter
            curl_cmd += ' -L'  # follow redirects
            curl_cmd += ' -o /dev/null'  # don't print file to stdout
            curl_cmd += ' -w http_code=%{http_code};final_url=%{url_effective};time=%{time_total};size=%{size_download}'  # format for stats at end
            curl_cmd += ' --connect-timeout %i' % self._timeout  # TCP connect timeout
            if self._disable_network_cache:
                curl_cmd += ' --header "Cache-Control: max-age=0"'  # disable network caches
            if self._user_agent:
                curl_cmd += ' --user-agent "%s"' % self._user_agent  # custom user agent
            curl_cmd += ' %s' % url

            # load the page
            logging.debug('Running curl: %s', curl_cmd)
            with Timeout(seconds=self._timeout + 5):
                output = subprocess.check_output(shlex.split(curl_cmd))
                logging.debug('curl returned: %s', output.strip())

            # curl returned, but may or may not have succeeded
            returnvals = {
                field.split('=')[0]: field.split('=')[1]
                for field in output.split('\n')[-1].split(';')
            }

            if returnvals['http_code'] != '200':
                return LoadResult(LoadResult.FAILURE_NO_200, url)
            else:
                # Report status and time
                return LoadResult(LoadResult.SUCCESS,
                                  url,
                                  final_url=returnvals['final_url'],
                                  time=float(
                                      string.replace(returnvals['time'], ',',
                                                     '.')),
                                  size=returnvals['size'])

        # problem running curl
        except TimeoutError:
            logging.exception('Timeout fetching %s', url)
            return LoadResult(LoadResult.FAILURE_TIMEOUT, url)
        except subprocess.CalledProcessError as e:
            logging.exception('Error loading %s: %s\n%s' % (url, e, e.output))
            if e.returncode == 28:
                return LoadResult(LoadResult.FAILURE_TIMEOUT, url)
            else:
                return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
        except Exception as e:
            logging.exception('Error loading %s: %s\n%s' %
                              (url, e, traceback.format_exc()))
            return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
예제 #9
0
    def _load_page(self, url, outdir, trial_num=-1):
    
        # load the specified URL
        logging.info('Loading page: %s', url)
        try:
            # prepare the NODE command
            node_cmd = NODE+' '
            node_cmd += NODEHTTP2+' ' # Location of node.js client HTTP2 program
	    node_cmd += url

            # load the page
            logging.debug('Running node.js: %s', node_cmd)
            with Timeout(seconds=self._timeout+5):
                output = subprocess.check_output(shlex.split(node_cmd))
                logging.debug('NODE returned: %s', output.strip())

            # NODE returned, but may or may not have succeeded
            returnvals = {field.split('=')[0]: field.split('=')[1] for field in output.split(';')}

            if returnvals['http_code'] != '200':
                return LoadResult(LoadResult.FAILURE_NO_200, url)
            else:
                # Report status and time
                return LoadResult(LoadResult.SUCCESS,
                    url,
                    final_url=returnvals['final_url'],
                    time=float(string.replace(returnvals['time'], ',', '.')),
                    size=returnvals['size'])

        # problem running NODE
        except TimeoutError:
            logging.exception('Timeout fetching %s', url)
            return LoadResult(LoadResult.FAILURE_TIMEOUT, url)
        except subprocess.CalledProcessError as e:
            logging.exception('Error loading %s: %s\n%s' % (url, e, e.output))
            if e.returncode == 28:
                return LoadResult(LoadResult.FAILURE_TIMEOUT, url)
            else:
                return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
        except Exception as e:
            logging.exception('Error loading %s: %s\n%s' % (url, e, traceback.format_exc()))
            return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
예제 #10
0
    def _load_page(self, url, outdir, trial_num=-1):
        # path for new HAR file
        safeurl = self._sanitize_url(url)
        filename = '%s_trial%d.har' % (safeurl, trial_num)
        imagename = '%s_trial%d.png' % (safeurl, trial_num)

        harpath = os.path.join(outdir, filename)
        if self._save_har:
            logging.debug('Will save HAR to %s', harpath)

        if self._save_screenshot:
            imagepath = os.path.join(outdir, imagename)
            logging.debug('Will save screenshot to %s', imagepath)
        else:
            imagepath = '/dev/null'


        # load the specified URL
        logging.info('Loading page: %s', url)
        try:
            # Load the page
            phantom_cmd = '%s --ssl-protocol=any %s %s %s %d' %\
                (PHANTOMJS, PHANTOMLOADER, url, imagepath, self._timeout)
            phantom_cmd = phantom_cmd.split()
            if self._user_agent:
                phantom_cmd.append(' "%s"' % self._user_agent)

            logging.debug('Running PhantomJS: %s', phantom_cmd)
            with Timeout(seconds=self._timeout+5):
                output = subprocess.check_output(phantom_cmd)
                har, statusline = output.split('*=*=*=*')
                logging.debug('loadspeed.js returned: %s', statusline.strip())

            # PhantomJS returned, but may or may not have succeeded
            fields = statusline.strip().split(':')
            status = fields[0]
            message = ':'.join(fields[1:])

            if status == 'FAILURE':
                if message == 'timeout':
                    logging.error('Timeout fetching %s', url)
                    return LoadResult(LoadResult.FAILURE_TIMEOUT, url)
                else:
                    logging.error('Error fetching %s: %s', url, message)
                    return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
            elif status == 'SUCCESS':
                # Save the HAR
                if self._save_har:
                    with open(harpath, 'w') as f:
                        f.write(har)
                    f.closed

                # Report status and time
                returnvals = {field.split('=')[0]: field.split('=')[1] for field in message.split(';')}
                return LoadResult(LoadResult.SUCCESS,
                    url,
                    final_url=returnvals['final_url'],
                    time=float(returnvals['time'])/1000.0,
                    har=harpath,
                    img=imagepath)
            else:
                logging.error('loadspeed.js returned unexpected output: %s', output)
                return LoadResult(LoadResult.FAILURE_UNKNOWN, url)

        # problem running PhantomJS
        except TimeoutError:
            logging.exception('* Timeout fetching %s', url)
            return LoadResult(LoadResult.FAILURE_TIMEOUT, url)
        except subprocess.CalledProcessError as e:
            logging.exception('Error loading %s: %s\n%s\n%s' % (url, e, e.output, traceback.format_exc()))
            return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
        except Exception as e:
            logging.exception('Error loading %s: %s\n%s' % (url, e, traceback.format_exc()))
            return LoadResult(LoadResult.FAILURE_UNKNOWN, url)