def _load_page(self, url, outdir, trial_num=-1): # load the specified URL logging.info('Loading page: %s', url) try: # Load the page with Timeout(seconds=self._timeout + 5): headers = {} if self._user_agent: headers['User-Agent'] = self._user_agent response = requests.get(url, timeout=self._timeout, headers=headers) # received response; may not have been successful if response.status_code != 200: return LoadResult(LoadResultFAILURE_NO_200, url) else: return LoadResult(LoadResult.SUCCESS, url, final_url=response.url, time=response.elapsed.total_seconds(), size=len(response.content)) # problem executing request except (TimeoutError, requests.exceptions.Timeout): logging.exception('Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) except Exception as e: logging.exception('Error loading %s: %s\n%s' % (url, e, traceback.format_exc())) return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
def _load_page_selenium(self, url, outdir): # load the specified URL (with selenium) logging.info('Fetching page %s', url) try: # load page with Timeout(seconds=self._timeout + 5): self._selenium_driver.get(url) WebDriverWait(self._selenium_driver, self._timeout).until(\ lambda d: d.execute_script('return document.readyState') == 'complete') logging.debug('Page loaded.') # get timing information # http://www.w3.org/TR/navigation-timing/#processing-model timings = self._selenium_driver.execute_script(TIMINGS_JAVASCRIPT) load_time = (timings['loadEventEnd'] - timings['fetchStart']) / 1000.0 return LoadResult(LoadResult.SUCCESS, url, time=load_time,\ final_url=self._selenium_driver.current_url) except TimeoutError: logging.exception('* Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) except TimeoutException: logging.exception('Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) except Exception as e: logging.exception('Error loading %s: %s' % (url, e)) return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
def _load_page_native(self, url, outdir): # load the specified URL (directly) logging.info('Fetching page %s', url) try: firefox_cmd = '%s %s' % (FIREFOX, url) #firefox_cmd = '%s -profile %s %s' % (FIREFOX, self._profile_path, url) logging.debug('Loading: %s', firefox_cmd) with Timeout(seconds=self._timeout + 5): subprocess.check_output(firefox_cmd.split()) # TODO: error checking # TODO: try to get timing info, final URL, HAR, etc. logging.debug('Page loaded.') return LoadResult(LoadResult.SUCCESS, url) except TimeoutError: logging.exception('* Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) except subprocess.CalledProcessError as e: logging.exception('Error loading %s: %s\n%s' % ( url, e, e.output, )) return LoadResult(LoadResult.FAILURE_UNKNOWN, url) except Exception as e: logging.exception('Error loading %s: %s' % (url, e)) return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
def _load_page(self, url, outdir, trial_num=-1): # load the specified URL logging.info('Loading page: %s', url) try: # Load the page parsed_url = urlparse.urlparse(url) path = '/' if parsed_url.path == '' else parsed_url.path if parsed_url.scheme != 'https': logging.warn( 'Specified protocol was not HTTPS; using HTTPS anyway.') get_request = 'GET %s HTTP/1.1\r\nHost: %s\r\n\r\n' %\ (path, parsed_url.netloc) options = '' if self._test_false_start: options += ' -cutthrough' if self._test_session_resumption: options += ' -reconnect' cmd = '%s s_client -connect %s:443 %s' %\ (OPENSSL_BINARY, parsed_url.netloc, options) logging.debug('Running tcploader: %s', cmd) with Timeout(seconds=self._timeout + 5): p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE,\ stdout=subprocess.PIPE, stderr=subprocess.PIPE) (stdout, stderr) = p.communicate(input=get_request) #output = subprocess.check_output(cmd, shell=True) logging.debug('s_client returned: %s', stdout.strip()) # TODO: better OpenSSL error checking here return LoadResult( LoadResult.SUCCESS, url, tls_false_start_supported=('false_start=yes' in stdout), tls_session_resumption_supported=('session_resumption=yes' in stdout)) # problem running tcp_loader except TimeoutError: logging.exception('* Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) except subprocess.CalledProcessError as e: logging.exception('Error loading %s: %s\n%s\n%s' % (url, e, e.output, traceback.format_exc())) return LoadResult(LoadResult.FAILURE_UNKNOWN, url) except Exception as e: logging.exception('Error loading %s: %s\n%s' % (url, e, traceback.format_exc())) return LoadResult(LoadResult.FAILURE_UNKNOWN, url) finally: try: subprocess.check_output('killall openssl'.split()) except Exception as e: logging.debug( 'Error killing openssl (process might not exist): %s', e)
def _load_page(self, url, outdir, trial_num=None, tag=None): # path for new HAR file if self._save_har: harpath = self._outfile_path(url, suffix='.har', trial=trial_num, tag=tag) else: harpath = '/dev/null' logging.debug('Will save HAR to %s', harpath) # build chrome-har-capturer arguments capturer_args = '' onload_delay = self._delay_after_onload if self._delay_first_trial_only and trial_num != 0: onload_delay = 0 capturer_args += ' -d %i' % onload_delay if self._disable_network_cache: capturer_args += ' --no-network-cache' if self._save_content == 'always' or\ (self._save_content == 'first' and trial_num == 0): capturer_args += ' -c' # load the specified URL logging.info('Fetching page %s (%s)', url, tag) try: capturer_cmd = '%s -o "%s" %s %s' %\ (CHROME_HAR_CAPTURER, harpath, capturer_args, url) logging.debug('Running capturer: %s', capturer_cmd) with Timeout(seconds=self._timeout + 5): subprocess.check_call(capturer_cmd, shell=True,\ stdout=self._stdout_file, stderr=subprocess.STDOUT) except TimeoutError: logging.error('Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) except subprocess.CalledProcessError as e: logging.exception('Error loading %s: %s\n%s' % (url, e, e.output)) return LoadResult(LoadResult.FAILURE_UNKNOWN, url) except Exception as e: logging.exception('Error loading %s: %s' % (url, e)) return LoadResult(LoadResult.FAILURE_UNKNOWN, url) logging.debug('Page loaded.') return LoadResult(LoadResult.SUCCESS, url, har=harpath)
def _load_page(self, url, outdir, trial_num=-1): # load the specified URL logging.info('Loading page: %s', url) try: # Load the page parsed_url = urlparse.urlparse(url) path = '/' if parsed_url.path == '' else parsed_url.path cmd = '%s %s %s %s' %\ (TCPLOADER, parsed_url.scheme, parsed_url.netloc, path) if self._user_agent: cmd += ' "%s"' % self._user_agent logging.debug('Running tcploader: %s', cmd) with Timeout(seconds=self._timeout + 5): output = subprocess.check_output(cmd, shell=True) logging.debug('tcploader returned: %s', output.strip()) returnvals = {field.split('=')[0]: field.split('=')[1]\ for field in output.strip().split('\n')[-1].split(';')} return LoadResult(LoadResult.SUCCESS, url, time=float(returnvals['time_seconds']), size=int(returnvals['size']), server=returnvals['server'], tcp_fast_open_supported=\ bool(int(returnvals['tcp_fast_open_used'])) ) # problem running tcp_loader except TimeoutError: logging.exception('* Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) except subprocess.CalledProcessError as e: logging.exception('Error loading %s: %s\n%s\n%s' % (url, e, e.output, traceback.format_exc())) return LoadResult(LoadResult.FAILURE_UNKNOWN, url) except Exception as e: logging.exception('Error loading %s: %s\n%s' % (url, e, traceback.format_exc())) return LoadResult(LoadResult.FAILURE_UNKNOWN, url) finally: try: subprocess.check_output('killall tcp_loader'.split()) except Exception as e: logging.debug( 'Error killing tcp_loader (process might not exist): %s', e)
def _load_page(self, url, outdir, trial_num=-1): # load the specified URL logging.info('Loading page: %s', url) try: # Cause a restart of the proxy #if self._proxy: # conn = httplib.HTTPConnection(self._proxy.split(':')[0]+':5678') # Assume restart always listens on this port for now # conn.request("GET", "/") # resp = conn.getresponse() # Don't need to do anything with it. Just want to know that the request was acknowledge # Load the page Zombie_cmd = [ ENV, ZombieJS, ZombieLOADER, url, '-t', str(self._timeout), '-v' ] if self._proxy: Zombie_cmd.append('-p') Zombie_cmd.append(self._proxy) logging.debug('Running ZombieJS: %s', Zombie_cmd) #with Timeout(seconds=self._timeout+5): The process should always end output = subprocess.check_output(Zombie_cmd) return LoadResult(LoadResult.SUCCESS, url, raw=output) # problem running ZombieJS except TimeoutError: logging.exception('* Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) except subprocess.CalledProcessError as e: logging.exception('Error loading %s: %s\n%s\n%s' % (url, e, e.output, traceback.format_exc())) return LoadResult(LoadResult.FAILURE_UNKNOWN, url) except Exception as e: logging.exception('Error loading %s: %s\n%s' % (url, e, traceback.format_exc())) return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
def _load_page(self, url, outdir, trial_num=-1): # load the specified URL logging.info('Loading page: %s', url) try: # prepare the curl command curl_cmd = CURL curl_cmd += ' -s -S' # don't show progress meter curl_cmd += ' -L' # follow redirects curl_cmd += ' -o /dev/null' # don't print file to stdout curl_cmd += ' -w http_code=%{http_code};final_url=%{url_effective};time=%{time_total};size=%{size_download}' # format for stats at end curl_cmd += ' --connect-timeout %i' % self._timeout # TCP connect timeout if self._disable_network_cache: curl_cmd += ' --header "Cache-Control: max-age=0"' # disable network caches if self._user_agent: curl_cmd += ' --user-agent "%s"' % self._user_agent # custom user agent curl_cmd += ' %s' % url # load the page logging.debug('Running curl: %s', curl_cmd) with Timeout(seconds=self._timeout + 5): output = subprocess.check_output(shlex.split(curl_cmd)) logging.debug('curl returned: %s', output.strip()) # curl returned, but may or may not have succeeded returnvals = { field.split('=')[0]: field.split('=')[1] for field in output.split('\n')[-1].split(';') } if returnvals['http_code'] != '200': return LoadResult(LoadResult.FAILURE_NO_200, url) else: # Report status and time return LoadResult(LoadResult.SUCCESS, url, final_url=returnvals['final_url'], time=float( string.replace(returnvals['time'], ',', '.')), size=returnvals['size']) # problem running curl except TimeoutError: logging.exception('Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) except subprocess.CalledProcessError as e: logging.exception('Error loading %s: %s\n%s' % (url, e, e.output)) if e.returncode == 28: return LoadResult(LoadResult.FAILURE_TIMEOUT, url) else: return LoadResult(LoadResult.FAILURE_UNKNOWN, url) except Exception as e: logging.exception('Error loading %s: %s\n%s' % (url, e, traceback.format_exc())) return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
def _load_page(self, url, outdir, trial_num=-1): # load the specified URL logging.info('Loading page: %s', url) try: # prepare the NODE command node_cmd = NODE+' ' node_cmd += NODEHTTP2+' ' # Location of node.js client HTTP2 program node_cmd += url # load the page logging.debug('Running node.js: %s', node_cmd) with Timeout(seconds=self._timeout+5): output = subprocess.check_output(shlex.split(node_cmd)) logging.debug('NODE returned: %s', output.strip()) # NODE returned, but may or may not have succeeded returnvals = {field.split('=')[0]: field.split('=')[1] for field in output.split(';')} if returnvals['http_code'] != '200': return LoadResult(LoadResult.FAILURE_NO_200, url) else: # Report status and time return LoadResult(LoadResult.SUCCESS, url, final_url=returnvals['final_url'], time=float(string.replace(returnvals['time'], ',', '.')), size=returnvals['size']) # problem running NODE except TimeoutError: logging.exception('Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) except subprocess.CalledProcessError as e: logging.exception('Error loading %s: %s\n%s' % (url, e, e.output)) if e.returncode == 28: return LoadResult(LoadResult.FAILURE_TIMEOUT, url) else: return LoadResult(LoadResult.FAILURE_UNKNOWN, url) except Exception as e: logging.exception('Error loading %s: %s\n%s' % (url, e, traceback.format_exc())) return LoadResult(LoadResult.FAILURE_UNKNOWN, url)
def _load_page(self, url, outdir, trial_num=-1): # path for new HAR file safeurl = self._sanitize_url(url) filename = '%s_trial%d.har' % (safeurl, trial_num) imagename = '%s_trial%d.png' % (safeurl, trial_num) harpath = os.path.join(outdir, filename) if self._save_har: logging.debug('Will save HAR to %s', harpath) if self._save_screenshot: imagepath = os.path.join(outdir, imagename) logging.debug('Will save screenshot to %s', imagepath) else: imagepath = '/dev/null' # load the specified URL logging.info('Loading page: %s', url) try: # Load the page phantom_cmd = '%s --ssl-protocol=any %s %s %s %d' %\ (PHANTOMJS, PHANTOMLOADER, url, imagepath, self._timeout) phantom_cmd = phantom_cmd.split() if self._user_agent: phantom_cmd.append(' "%s"' % self._user_agent) logging.debug('Running PhantomJS: %s', phantom_cmd) with Timeout(seconds=self._timeout+5): output = subprocess.check_output(phantom_cmd) har, statusline = output.split('*=*=*=*') logging.debug('loadspeed.js returned: %s', statusline.strip()) # PhantomJS returned, but may or may not have succeeded fields = statusline.strip().split(':') status = fields[0] message = ':'.join(fields[1:]) if status == 'FAILURE': if message == 'timeout': logging.error('Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) else: logging.error('Error fetching %s: %s', url, message) return LoadResult(LoadResult.FAILURE_UNKNOWN, url) elif status == 'SUCCESS': # Save the HAR if self._save_har: with open(harpath, 'w') as f: f.write(har) f.closed # Report status and time returnvals = {field.split('=')[0]: field.split('=')[1] for field in message.split(';')} return LoadResult(LoadResult.SUCCESS, url, final_url=returnvals['final_url'], time=float(returnvals['time'])/1000.0, har=harpath, img=imagepath) else: logging.error('loadspeed.js returned unexpected output: %s', output) return LoadResult(LoadResult.FAILURE_UNKNOWN, url) # problem running PhantomJS except TimeoutError: logging.exception('* Timeout fetching %s', url) return LoadResult(LoadResult.FAILURE_TIMEOUT, url) except subprocess.CalledProcessError as e: logging.exception('Error loading %s: %s\n%s\n%s' % (url, e, e.output, traceback.format_exc())) return LoadResult(LoadResult.FAILURE_UNKNOWN, url) except Exception as e: logging.exception('Error loading %s: %s\n%s' % (url, e, traceback.format_exc())) return LoadResult(LoadResult.FAILURE_UNKNOWN, url)