class DevtoolsBrowser(object):
    """Devtools Browser base"""
    CONNECT_TIME_LIMIT = 30

    def __init__(self, options, job, use_devtools_video=True):
        self.options = options
        self.job = job
        self.devtools = None
        self.task = None
        self.event_name = None
        self.browser_version = None
        self.device_pixel_ratio = None
        self.use_devtools_video = use_devtools_video
        self.lighthouse_command = None
        self.support_path = os.path.join(
            os.path.abspath(os.path.dirname(__file__)), 'support')
        self.script_dir = os.path.join(
            os.path.abspath(os.path.dirname(__file__)), 'js')

    def connect(self, task):
        """Connect to the dev tools interface"""
        ret = False
        from internal.devtools import DevTools
        self.devtools = DevTools(self.options, self.job, task,
                                 self.use_devtools_video)
        if task['running_lighthouse']:
            ret = self.devtools.wait_for_available(self.CONNECT_TIME_LIMIT)
        else:
            if self.devtools.connect(self.CONNECT_TIME_LIMIT):
                logging.debug("Devtools connected")
                ret = True
            else:
                task['error'] = "Error connecting to dev tools interface"
                logging.critical(task['error'])
                self.devtools = None
        return ret

    def disconnect(self):
        """Disconnect from dev tools"""
        if self.devtools is not None:
            # Always navigate to about:blank after finishing in case the tab is
            # remembered across sessions
            if self.task is not None and self.task['error'] is None:
                self.devtools.send_command('Page.navigate',
                                           {'url': 'about:blank'},
                                           wait=True)
            self.devtools.close()
            self.devtools = None

    def prepare_browser(self, task):
        """Prepare the running browser (mobile emulation, UA string, etc"""
        if self.devtools is not None:
            # Figure out the native viewport size
            if not self.options.android:
                size = self.devtools.execute_js(
                    "[window.innerWidth, window.innerHeight]")
                if size is not None and len(size) == 2:
                    task['actual_viewport'] = {
                        "width": size[0],
                        "height": size[1]
                    }
            # Get the native device pixel ratio
            if self.device_pixel_ratio is None:
                self.device_pixel_ratio = 1.0
                try:
                    ratio = self.devtools.execute_js('window.devicePixelRatio')
                    if ratio is not None:
                        self.device_pixel_ratio = max(1.0, float(ratio))
                except Exception:
                    pass
            # Clear the caches
            if not task['cached']:
                self.devtools.send_command("Network.clearBrowserCache", {},
                                           wait=True)
                self.devtools.send_command("Network.clearBrowserCookies", {},
                                           wait=True)

            # Mobile Emulation
            if not self.options.android and \
                    'mobile' in self.job and self.job['mobile'] and \
                    'width' in self.job and 'height' in self.job and \
                    'dpr' in self.job:
                width = int(re.search(r'\d+', str(self.job['width'])).group())
                height = int(
                    re.search(r'\d+', str(self.job['height'])).group())
                self.devtools.send_command(
                    "Emulation.setDeviceMetricsOverride", {
                        "width": width,
                        "height": height,
                        "screenWidth": width,
                        "screenHeight": height,
                        "scale": 1,
                        "positionX": 0,
                        "positionY": 0,
                        "deviceScaleFactor": float(self.job['dpr']),
                        "mobile": True,
                        "fitWindow": False,
                        "screenOrientation": {
                            "angle": 0,
                            "type": "portraitPrimary"
                        }
                    },
                    wait=True)
                self.devtools.send_command(
                    "Emulation.setTouchEmulationEnabled", {
                        "enabled": True,
                        "configuration": "mobile"
                    },
                    wait=True)
                if not self.options.throttle and 'throttle_cpu' in self.job:
                    logging.debug('CPU Throttle target: %0.3fx',
                                  self.job['throttle_cpu'])
                    if self.job['throttle_cpu'] > 1:
                        self.devtools.send_command(
                            "Emulation.setCPUThrottlingRate",
                            {"rate": self.job['throttle_cpu']},
                            wait=True)
            # UA String
            ua_string = self.devtools.execute_js("navigator.userAgent")
            if ua_string is not None:
                match = re.search(r'Chrome\/(\d+\.\d+\.\d+\.\d+)', ua_string)
                if match:
                    self.browser_version = match.group(1)
            if 'uastring' in self.job:
                ua_string = self.job['uastring']
            if ua_string is not None and 'AppendUA' in task:
                ua_string += ' ' + task['AppendUA']
            if ua_string is not None:
                self.job['user_agent_string'] = ua_string
            # Disable js
            if self.job['noscript']:
                self.devtools.send_command(
                    "Emulation.setScriptExecutionDisabled", {"value": True},
                    wait=True)
            self.devtools.prepare_browser()

    def on_start_recording(self, task):
        """Start recording"""
        task['page_data'] = {'date': time.time()}
        task['page_result'] = None
        task['run_start_time'] = monotonic.monotonic()
        if self.browser_version is not None and 'browserVersion' not in task[
                'page_data']:
            task['page_data']['browserVersion'] = self.browser_version
            task['page_data']['browser_version'] = self.browser_version
        if not self.options.throttle and 'throttle_cpu' in self.job:
            task['page_data']['throttle_cpu_requested'] = self.job[
                'throttle_cpu_requested']
            if self.job['throttle_cpu'] > 1:
                task['page_data']['throttle_cpu'] = self.job['throttle_cpu']
        if self.devtools is not None:
            self.devtools.start_recording()

    def on_stop_recording(self, task):
        """Stop recording"""
        if self.devtools is not None:
            if self.job['pngScreenShot']:
                screen_shot = os.path.join(task['dir'],
                                           task['prefix'] + '_screen.png')
                self.devtools.grab_screenshot(screen_shot, png=True)
            else:
                screen_shot = os.path.join(task['dir'],
                                           task['prefix'] + '_screen.jpg')
                self.devtools.grab_screenshot(screen_shot,
                                              png=False,
                                              resize=600)
            # Collect end of test data from the browser
            self.collect_browser_metrics(task)
            # Stop recording dev tools (which also collects the trace)
            self.devtools.stop_recording()

    def run_task(self, task):
        """Run an individual test"""
        if self.devtools is not None:
            self.task = task
            logging.debug("Running test")
            end_time = monotonic.monotonic() + task['test_time_limit']
            task['current_step'] = 1
            recording = False
            while len(task['script']) and task['error'] is None and \
                    monotonic.monotonic() < end_time:
                self.prepare_task(task)
                command = task['script'].pop(0)
                if not recording and command['record']:
                    recording = True
                    self.on_start_recording(task)
                self.process_command(command)
                if command['record']:
                    self.devtools.wait_for_page_load()
                    if not task['combine_steps'] or not len(task['script']):
                        self.on_stop_recording(task)
                        recording = False
                        self.on_start_processing(task)
                        self.wait_for_processing(task)
                        self.process_devtools_requests(task)
                        self.step_complete(task)
                        if task['log_data']:
                            # Move on to the next step
                            task['current_step'] += 1
                            self.event_name = None
                    task['navigated'] = True
            self.task = None

    def on_start_processing(self, task):
        """Start any processing of the captured data"""
        if task['log_data']:
            # Start the processing that can run in a background thread
            optimization = OptimizationChecks(self.job, task,
                                              self.get_requests())
            optimization.start()
            # Run the video post-processing
            if self.use_devtools_video and self.job['video']:
                self.process_video()
            self.wappalyzer_detect(task, self.devtools.main_request_headers)
            # wait for the background optimization checks
            optimization.join()

    def wait_for_processing(self, task):
        """Wait for the background processing (if any)"""
        pass

    def execute_js(self, script):
        """Run javascipt"""
        ret = None
        if self.devtools is not None:
            ret = self.devtools.execute_js(script)
        return ret

    def prepare_task(self, task):
        """Format the file prefixes for multi-step testing"""
        if task['current_step'] == 1:
            task['prefix'] = task['task_prefix']
            task['video_subdirectory'] = task['task_video_prefix']
        else:
            task['prefix'] = '{0}_{1:d}'.format(task['task_prefix'],
                                                task['current_step'])
            task['video_subdirectory'] = '{0}_{1:d}'.format(
                task['task_video_prefix'], task['current_step'])
        if task['video_subdirectory'] not in task['video_directories']:
            task['video_directories'].append(task['video_subdirectory'])
        if self.event_name is not None:
            task['step_name'] = self.event_name
        else:
            task['step_name'] = 'Step_{0:d}'.format(task['current_step'])

    def process_video(self):
        """Post process the video"""
        from internal.video_processing import VideoProcessing
        video = VideoProcessing(self.options, self.job, self.task)
        video.process()

    def process_devtools_requests(self, task):
        """Process the devtools log and pull out the requests information"""
        path_base = os.path.join(self.task['dir'], self.task['prefix'])
        devtools_file = path_base + '_devtools.json.gz'
        if os.path.isfile(devtools_file):
            from internal.support.devtools_parser import DevToolsParser
            out_file = path_base + '_devtools_requests.json.gz'
            options = {
                'devtools': devtools_file,
                'cached': task['cached'],
                'out': out_file
            }
            netlog = path_base + '_netlog_requests.json.gz'
            options['netlog'] = netlog if os.path.isfile(netlog) else None
            optimization = path_base + '_optimization.json.gz'
            options['optimization'] = optimization if os.path.isfile(
                optimization) else None
            user_timing = path_base + '_user_timing.json.gz'
            options['user'] = user_timing if os.path.isfile(
                user_timing) else None
            coverage = path_base + '_coverage.json.gz'
            options['coverage'] = coverage if os.path.isfile(
                coverage) else None
            cpu = path_base + '_timeline_cpu.json.gz'
            options['cpu'] = cpu if os.path.isfile(cpu) else None
            parser = DevToolsParser(options)
            parser.process()
            # Cleanup intermediate files that are not needed
            if 'debug' not in self.job or not self.job['debug']:
                if os.path.isfile(netlog):
                    os.remove(netlog)
                if os.path.isfile(optimization):
                    os.remove(optimization)
                if os.path.isfile(coverage):
                    os.remove(coverage)
                if os.path.isfile(devtools_file):
                    os.remove(devtools_file)
            if 'page_data' in parser.result and 'result' in parser.result[
                    'page_data']:
                self.task['page_result'] = parser.result['page_data']['result']

    def run_js_file(self, file_name):
        """Execute one of our js scripts"""
        ret = None
        script = None
        script_file_path = os.path.join(self.script_dir, file_name)
        if os.path.isfile(script_file_path):
            with open(script_file_path, 'rb') as script_file:
                script = script_file.read()
        if script is not None:
            ret = self.devtools.execute_js(script)
        return ret

    def collect_browser_metrics(self, task):
        """Collect all of the in-page browser metrics that we need"""
        user_timing = self.run_js_file('user_timing.js')
        if user_timing is not None:
            path = os.path.join(task['dir'],
                                task['prefix'] + '_timed_events.json.gz')
            with gzip.open(path, 'wb', 7) as outfile:
                outfile.write(json.dumps(user_timing))
        page_data = self.run_js_file('page_data.js')
        if page_data is not None:
            task['page_data'].update(page_data)
        if 'customMetrics' in self.job:
            custom_metrics = {}
            for name in self.job['customMetrics']:
                script = 'var wptCustomMetric = function() {' +\
                         self.job['customMetrics'][name] +\
                         '};try{wptCustomMetric();}catch(e){};'
                custom_metrics[name] = self.devtools.execute_js(script)
            path = os.path.join(task['dir'],
                                task['prefix'] + '_metrics.json.gz')
            with gzip.open(path, 'wb', 7) as outfile:
                outfile.write(json.dumps(custom_metrics))

    def process_command(self, command):
        """Process an individual script command"""
        logging.debug("Processing script command:")
        logging.debug(command)
        if command['command'] == 'navigate':
            self.task['page_data']['URL'] = command['target']
            url = str(command['target']).replace('"', '\"')
            script = 'window.location="{0}";'.format(url)
            script = self.prepare_script_for_record(script)
            self.devtools.start_navigating()
            self.devtools.execute_js(script)
        elif command['command'] == 'logdata':
            self.task['combine_steps'] = False
            if int(re.search(r'\d+', str(command['target'])).group()):
                logging.debug("Data logging enabled")
                self.task['log_data'] = True
            else:
                logging.debug("Data logging disabled")
                self.task['log_data'] = False
        elif command['command'] == 'combinesteps':
            self.task['log_data'] = True
            self.task['combine_steps'] = True
        elif command['command'] == 'seteventname':
            self.event_name = command['target']
        elif command['command'] == 'exec':
            script = command['target']
            if command['record']:
                script = self.prepare_script_for_record(script)
                self.devtools.start_navigating()
            self.devtools.execute_js(script)
        elif command['command'] == 'sleep':
            delay = min(
                60,
                max(0, int(re.search(r'\d+', str(command['target'])).group())))
            if delay > 0:
                time.sleep(delay)
        elif command['command'] == 'setabm':
            self.task['stop_at_onload'] = bool('target' in command and \
                                               int(re.search(r'\d+',
                                                             str(command['target'])).group()) == 0)
        elif command['command'] == 'setactivitytimeout':
            if 'target' in command:
                milliseconds = int(
                    re.search(r'\d+', str(command['target'])).group())
                self.task['activity_time'] = max(
                    0, min(30,
                           float(milliseconds) / 1000.0))
        elif command['command'] == 'setuseragent':
            self.task['user_agent_string'] = command['target']
        elif command['command'] == 'setcookie':
            if 'target' in command and 'value' in command:
                url = command['target'].strip()
                cookie = command['value']
                pos = cookie.find(';')
                if pos > 0:
                    cookie = cookie[:pos]
                pos = cookie.find('=')
                if pos > 0:
                    name = cookie[:pos].strip()
                    value = cookie[pos + 1:].strip()
                    if len(name) and len(value) and len(url):
                        self.devtools.send_command('Network.setCookie', {
                            'url': url,
                            'name': name,
                            'value': value
                        })
        elif command['command'] == 'addheader':
            self.devtools.set_header(command['target'])
        elif command['command'] == 'setheader':
            self.devtools.set_header(command['target'])
        elif command['command'] == 'resetheaders':
            self.devtools.reset_headers()
        elif command['command'] == 'clearcache':
            self.devtools.clear_cache()

    def navigate(self, url):
        """Navigate to the given URL"""
        if self.devtools is not None:
            self.devtools.send_command('Page.navigate', {'url': url},
                                       wait=True)

    def get_requests(self):
        """Get the request details for running an optimization check"""
        requests = None
        if self.devtools is not None:
            requests = self.devtools.get_requests()
        return requests

    def lighthouse_thread(self):
        """Run lighthouse in a thread so we can kill it if it times out"""
        cmd = self.lighthouse_command
        self.task['lighthouse_log'] = cmd + "\n"
        logging.debug(cmd)
        proc = subprocess.Popen(cmd, shell=True, stderr=subprocess.PIPE)
        for line in iter(proc.stderr.readline, b''):
            logging.debug(line.rstrip())
            self.task['lighthouse_log'] += line
        proc.communicate()

    def run_lighthouse_test(self, task):
        """Run a lighthouse test against the current browser session"""
        task['lighthouse_log'] = ''
        if 'url' in self.job and self.job['url'] is not None:
            self.job['shaper'].configure(self.job)
            output_path = os.path.join(task['dir'], 'lighthouse.json')
            json_file = os.path.join(task['dir'], 'lighthouse.report.json')
            json_gzip = os.path.join(task['dir'], 'lighthouse.json.gz')
            html_file = os.path.join(task['dir'], 'lighthouse.report.html')
            html_gzip = os.path.join(task['dir'], 'lighthouse.html.gz')
            time_limit = min(int(task['time_limit']), 80)
            command = [
                'lighthouse', '--disable-network-throttling',
                '--disable-cpu-throttling', '--enable-error-reporting',
                '--max-wait-for-load',
                str(int(time_limit * 1000)), '--port',
                str(task['port']), '--output', 'html', '--output', 'json',
                '--output-path', '"{0}"'.format(output_path)
            ]
            if self.job['keep_lighthouse_trace']:
                command.append('--save-assets')
            if self.options.android or 'mobile' not in self.job or not self.job[
                    'mobile']:
                command.append('--disable-device-emulation')
            command.append('"{0}"'.format(self.job['url']))
            cmd = ' '.join(command)
            self.lighthouse_command = cmd
            # Give lighthouse up to 10 minutes to run all of the audits
            try:
                lh_thread = threading.Thread(target=self.lighthouse_thread)
                lh_thread.start()
                lh_thread.join(600)
            except Exception:
                pass
            from .os_util import kill_all
            kill_all('node', True)
            self.job['shaper'].reset()
            # Rename and compress the trace file, delete the other assets
            if self.job['keep_lighthouse_trace']:
                try:
                    lh_trace_src = os.path.join(task['dir'],
                                                'lighthouse-0.trace.json')
                    if os.path.isfile(lh_trace_src):
                        # read the JSON in and re-write it line by line to match the other traces
                        with open(lh_trace_src, 'rb') as f_in:
                            trace = json.load(f_in)
                            if trace is not None and 'traceEvents' in trace:
                                lighthouse_trace = os.path.join(
                                    task['dir'], 'lighthouse_trace.json.gz')
                            with gzip.open(lighthouse_trace, 'wb', 7) as f_out:
                                f_out.write('{"traceEvents":[{}')
                                for trace_event in trace['traceEvents']:
                                    f_out.write(",\n")
                                    f_out.write(json.dumps(trace_event))
                                f_out.write("\n]}")
                except Exception:
                    pass
            # Delete all the left-over lighthouse assets
            files = glob.glob(os.path.join(task['dir'], 'lighthouse-*'))
            for file_path in files:
                try:
                    os.remove(file_path)
                except Exception:
                    pass
            if os.path.isfile(json_file):
                # Remove the raw screenshots if they were stored with the file
                lh_report = None
                with open(json_file, 'rb') as f_in:
                    lh_report = json.load(f_in)
                if lh_report is not None and 'audits' in lh_report and \
                        'screenshots' in lh_report['audits']:
                    del lh_report['audits']['screenshots']
                    with gzip.open(json_gzip, 'wb', 7) as f_out:
                        json.dump(lh_report, f_out)
                else:
                    with open(json_file, 'rb') as f_in:
                        with gzip.open(json_gzip, 'wb', 7) as f_out:
                            shutil.copyfileobj(f_in, f_out)
                try:
                    os.remove(json_file)
                except Exception:
                    pass
                # Extract the audit scores
                if lh_report is not None:
                    audits = {}
                    if 'aggregations' in lh_report:
                        for entry in lh_report['aggregations']:
                            if 'name' in entry and 'total' in entry and \
                                    'scored' in entry and entry['scored']:
                                name = entry['name'].replace(' ', '')
                                audits[name] = entry['total']
                    elif 'reportCategories' in lh_report:
                        for category in lh_report['reportCategories']:
                            if 'name' in category and 'score' in category:
                                category_name = category['name'].replace(
                                    ' ', '')
                                score = float(category['score']) / 100.0
                                audits[category_name] = score
                                if category[
                                        'name'] == 'Performance' and 'audits' in category:
                                    for audit in category['audits']:
                                        if 'id' in audit and 'group' in audit and \
                                                audit['group'] == 'perf-metric' and \
                                                'result' in audit and \
                                                'rawValue' in audit['result']:
                                            name = category_name + '.' + \
                                                    audit['id'].replace(' ', '')
                                            audits[name] = audit['result'][
                                                'rawValue']
                    audits_gzip = os.path.join(task['dir'],
                                               'lighthouse_audits.json.gz')
                    with gzip.open(audits_gzip, 'wb', 7) as f_out:
                        json.dump(audits, f_out)
            if os.path.isfile(html_file):
                # Remove the raw screenshots if they were stored with the file
                with open(html_file, 'rb') as f_in:
                    lh_report = f_in.read()
                    start = lh_report.find('\n    &quot;screenshots')
                    if start >= 0:
                        end = lh_report.find('\n    },', start)
                        if end >= 0:
                            lh_report = lh_report[:start] + lh_report[end + 7:]
                    with gzip.open(html_gzip, 'wb', 7) as f_out:
                        f_out.write(lh_report)
                try:
                    os.remove(html_file)
                except Exception:
                    pass
Exemple #2
0
class DevtoolsBrowser(object):
    """Devtools Browser base"""
    CONNECT_TIME_LIMIT = 30
    CURRENT_VERSION = 1

    def __init__(self, job):
        self.job = job
        self.devtools = None
        self.task = None
        self.event_name = None
        self.support_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'support')
        self.script_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'js')

    def connect(self, task):
        """Connect to the dev tools interface"""
        ret = False
        from internal.devtools import DevTools
        self.devtools = DevTools(self.job, task)
        if self.devtools.connect(self.CONNECT_TIME_LIMIT):
            logging.debug("Devtools connected")
            ret = True
        else:
            task['error'] = "Error connecting to dev tools interface"
            logging.critical(task['error'])
            self.devtools = None
        return ret

    def disconnect(self):
        """Disconnect from dev tools"""
        if self.devtools is not None:
            self.devtools.close()
            self.devtools = None

    def prepare_browser(self):
        """Prepare the running browser (mobile emulation, UA string, etc"""
        if self.devtools is not None:
            # Mobile Emulation
            if 'mobile' in self.job and self.job['mobile'] and \
                    'width' in self.job and 'height' in self.job and \
                    'dpr' in self.job:
                self.devtools.send_command("Emulation.setDeviceMetricsOverride",
                                           {"width": int(self.job['width']),
                                            "height": int(self.job['height']),
                                            "screenWidth": int(self.job['width']),
                                            "screenHeight": int(self.job['height']),
                                            "scale": 1,
                                            "positionX": 0,
                                            "positionY": 0,
                                            "deviceScaleFactor": float(self.job['dpr']),
                                            "mobile": True,
                                            "fitWindow": False},
                                           wait=True)
                self.devtools.send_command("Emulation.setVisibleSize",
                                           {"width": int(self.job['width']),
                                            "height": int(self.job['height'])},
                                           wait=True)
            # UA String
            if 'uastring' in self.job:
                ua_string = self.job['uastring']
            else:
                ua_string = self.devtools.execute_js("navigator.userAgent")
            if ua_string is not None and 'keepua' not in self.job or not self.job['keepua']:
                ua_string += ' PTST/{0:d}'.format(self.CURRENT_VERSION)
            if ua_string is not None:
                self.job['user_agent_string'] = ua_string
            # Disable js
            if self.job['noscript']:
                self.devtools.send_command("Emulation.setScriptExecutionDisabled",
                                           {"value": True}, wait=True)

    def on_start_recording(self, _):
        """Start recording"""
        if self.devtools is not None:
            self.devtools.start_recording()

    def on_stop_recording(self, _):
        """Stop recording"""
        if self.devtools is not None:
            self.devtools.stop_recording()

    def run_task(self, task):
        """Run an individual test"""
        if self.devtools is not None:
            self.task = task
            logging.debug("Running test")
            end_time = monotonic.monotonic() + task['time_limit']
            task['current_step'] = 1
            recording = False
            while len(task['script']) and monotonic.monotonic() < end_time:
                self.prepare_task(task)
                command = task['script'].pop(0)
                if not recording and command['record']:
                    recording = True
                    self.on_start_recording(task)
                self.process_command(command)
                if command['record']:
                    self.devtools.wait_for_page_load()
                    if not task['combine_steps'] or not len(task['script']):
                        self.on_stop_recording(task)
                        recording = False
                        if task['log_data']:
                            # Start the processing that can run in a background thread
                            optimization = OptimizationChecks(self.job, task, self.get_requests())
                            optimization.start()
                            trace_thread = threading.Thread(target=self.process_trace)
                            trace_thread.start()
                            # Collect end of test data from the browser
                            if self.job['pngss']:
                                screen_shot = os.path.join(task['dir'],
                                                           task['prefix'] + '_screen.png')
                                self.devtools.grab_screenshot(screen_shot, png=True)
                            else:
                                screen_shot = os.path.join(task['dir'],
                                                           task['prefix'] + '_screen.jpg')
                                self.devtools.grab_screenshot(screen_shot, png=False)
                            self.collect_browser_metrics(task)
                            # Run the rest of the post-processing
                            self.process_video()
                            logging.debug('Waiting for trace processing to complete')
                            trace_thread.join()
                            optimization.join()
                            # Move on to the next step
                            task['current_step'] += 1
                            self.event_name = None
                        self.wait_for_processing()
            self.task = None

    def prepare_task(self, task):
        """Format the file prefixes for multi-step testing"""
        if task['current_step'] == 1:
            task['prefix'] = task['task_prefix']
            task['video_subdirectory'] = task['task_video_prefix']
        else:
            task['prefix'] = '{0}_{1:d}'.format(task['task_prefix'], task['current_step'])
            task['video_subdirectory'] = '{0}_{1:d}'.format(task['task_video_prefix'],
                                                            task['current_step'])
        if task['video_subdirectory'] not in task['video_directories']:
            task['video_directories'].append(task['video_subdirectory'])
        if self.event_name is not None:
            task['step_name'] = self.event_name
        else:
            task['step_name'] = 'Step_{0:d}'.format(task['current_step'])

    def process_video(self):
        """Post process the video"""
        from internal.video_processing import VideoProcessing
        video = VideoProcessing(self.job, self.task)
        video.process()

    def process_trace(self):
        """Post-process the trace file"""
        path_base = os.path.join(self.task['dir'], self.task['prefix'])
        trace_file = path_base + '_trace.json.gz'
        if os.path.isfile(trace_file):
            user_timing = path_base + '_user_timing.json.gz'
            cpu_slices = path_base + '_timeline_cpu.json.gz'
            script_timing = path_base + '_script_timing.json.gz'
            feature_usage = path_base + '_feature_usage.json.gz'
            interactive = path_base + '_interactive.json.gz'
            v8_stats = path_base + '_v8stats.json.gz'
            trace_parser = os.path.join(self.support_path, "trace-parser.py")
            cmd = ['python', trace_parser, '-t', trace_file, '-u', user_timing,
                   '-c', cpu_slices, '-j', script_timing, '-f', feature_usage,
                   '-i', interactive, '-s', v8_stats]
            logging.debug(cmd)
            subprocess.call(cmd)

    def run_js_file(self, file_name):
        """Execute one of our js scripts"""
        ret = None
        script = None
        script_file_path = os.path.join(self.script_dir, file_name)
        if os.path.isfile(script_file_path):
            with open(script_file_path, 'rb') as script_file:
                script = script_file.read()
        if script is not None:
            ret = self.devtools.execute_js(script)
        return ret

    def collect_browser_metrics(self, task):
        """Collect all of the in-page browser metrics that we need"""
        user_timing = self.run_js_file('user_timing.js')
        if user_timing is not None:
            path = os.path.join(task['dir'], task['prefix'] + '_timed_events.json.gz')
            with gzip.open(path, 'wb') as outfile:
                outfile.write(json.dumps(user_timing))
        page_data = self.run_js_file('page_data.js')
        if 'step_name' in task:
            if page_data is None:
                page_data = {}
            page_data['eventName'] = task['step_name']
        if page_data is not None:
            path = os.path.join(task['dir'], task['prefix'] + '_page_data.json.gz')
            with gzip.open(path, 'wb') as outfile:
                outfile.write(json.dumps(page_data))
        if 'customMetrics' in self.job:
            custom_metrics = {}
            for name in self.job['customMetrics']:
                script = 'var wptCustomMetric = function() {' +\
                         self.job['customMetrics'][name] +\
                         '};try{wptCustomMetric();}catch(e){};'
                custom_metrics[name] = self.devtools.execute_js(script)
            path = os.path.join(task['dir'], task['prefix'] + '_metrics.json.gz')
            with gzip.open(path, 'wb') as outfile:
                outfile.write(json.dumps(custom_metrics))

    def process_command(self, command):
        """Process an individual script command"""
        logging.debug("Processing script command:")
        logging.debug(command)
        if command['command'] == 'navigate':
            self.devtools.start_navigating()
            self.devtools.send_command('Page.navigate', {'url': command['target']})
        elif command['command'] == 'logdata':
            self.task['combine_steps'] = False
            if int(command['target']):
                logging.debug("Data logging enabled")
                self.task['log_data'] = True
            else:
                logging.debug("Data logging disabled")
                self.task['log_data'] = False
        elif command['command'] == 'combinesteps':
            self.task['log_data'] = True
            self.task['combine_steps'] = True
        elif command['command'] == 'seteventname':
            self.event_name = command['target']
        elif command['command'] == 'exec':
            if command['record']:
                self.devtools.start_navigating()
            self.devtools.execute_js(command['target'])
        elif command['command'] == 'sleep':
            delay = min(60, max(0, int(command['target'])))
            if delay > 0:
                time.sleep(delay)
        elif command['command'] == 'setabm':
            self.task['stop_at_onload'] = bool('target' in command and int(command['target']) == 0)
        elif command['command'] == 'setactivitytimeout':
            if 'target' in command:
                self.task['activity_time'] = max(0, min(30, int(command['target'])))
        elif command['command'] == 'block':
            block_list = command['target'].split()
            for block in block_list:
                block = block.strip()
                if len(block):
                    logging.debug("Blocking: %s", block)
                    self.devtools.send_command('Network.addBlockedURL', {'url': block})
        elif command['command'] == 'setuseragent':
            self.task['user_agent_string'] = command['target']
        elif command['command'] == 'setcookie':
            if 'target' in command and 'value' in command:
                url = command['target'].strip()
                cookie = command['value']
                pos = cookie.find(';')
                if pos > 0:
                    cookie = cookie[:pos]
                pos = cookie.find('=')
                if pos > 0:
                    name = cookie[:pos].strip()
                    value = cookie[pos+1:].strip()
                    if len(name) and len(value) and len(url):
                        self.devtools.send_command('Network.setCookie',
                                                   {'url': url, 'name': name, 'value': value})

    def navigate(self, url):
        """Navigate to the given URL"""
        if self.devtools is not None:
            self.devtools.send_command('Page.navigate', {'url': url}, wait=True)

    def get_requests(self):
        """Get the request details for running an optimization check"""
        requests = None
        if self.devtools is not None:
            requests = self.devtools.get_requests()
        return requests
Exemple #3
0
class DevtoolsBrowser(object):
    """Devtools Browser base"""
    CONNECT_TIME_LIMIT = 120

    def __init__(self, options, job, use_devtools_video=True):
        self.options = options
        self.job = job
        self.devtools = None
        self.task = None
        self.event_name = None
        self.browser_version = None
        self.device_pixel_ratio = None
        self.use_devtools_video = use_devtools_video
        self.lighthouse_command = None
        self.devtools_screenshot = True
        self.support_path = os.path.join(
            os.path.abspath(os.path.dirname(__file__)), 'support')
        self.script_dir = os.path.join(
            os.path.abspath(os.path.dirname(__file__)), 'js')

    def connect(self, task):
        """Connect to the dev tools interface"""
        ret = False
        from internal.devtools import DevTools
        self.devtools = DevTools(self.options, self.job, task,
                                 self.use_devtools_video)
        if task['running_lighthouse']:
            ret = self.devtools.wait_for_available(self.CONNECT_TIME_LIMIT)
        else:
            if self.devtools.connect(self.CONNECT_TIME_LIMIT):
                logging.debug("Devtools connected")
                ret = True
            else:
                task['error'] = "Error connecting to dev tools interface"
                logging.critical(task['error'])
                self.devtools = None
        return ret

    def disconnect(self):
        """Disconnect from dev tools"""
        if self.devtools is not None:
            # Always navigate to about:blank after finishing in case the tab is
            # remembered across sessions
            if self.task is not None and self.task['error'] is None:
                self.devtools.send_command('Page.navigate',
                                           {'url': 'about:blank'},
                                           wait=True)
            self.devtools.close()
            self.devtools = None

    def prepare_browser(self, task):
        """Prepare the running browser (mobile emulation, UA string, etc"""
        if self.devtools is not None:
            # Figure out the native viewport size
            if not self.options.android:
                size = self.devtools.execute_js(
                    "[window.innerWidth, window.innerHeight]")
                if size is not None and len(size) == 2:
                    task['actual_viewport'] = {
                        "width": size[0],
                        "height": size[1]
                    }
            # Get the native device pixel ratio
            if self.device_pixel_ratio is None:
                self.device_pixel_ratio = 1.0
                try:
                    ratio = self.devtools.execute_js('window.devicePixelRatio')
                    if ratio is not None:
                        self.device_pixel_ratio = max(1.0, float(ratio))
                except Exception:
                    pass
            # Clear the caches
            if not task['cached']:
                self.devtools.send_command("Network.clearBrowserCache", {},
                                           wait=True)
                self.devtools.send_command("Network.clearBrowserCookies", {},
                                           wait=True)

            # Mobile Emulation
            if not self.options.android and \
                    'mobile' in self.job and self.job['mobile'] and \
                    'width' in self.job and 'height' in self.job and \
                    'dpr' in self.job:
                width = int(re.search(r'\d+', str(self.job['width'])).group())
                height = int(
                    re.search(r'\d+', str(self.job['height'])).group())
                self.devtools.send_command(
                    "Emulation.setDeviceMetricsOverride", {
                        "width": width,
                        "height": height,
                        "screenWidth": width,
                        "screenHeight": height,
                        "scale": 1,
                        "positionX": 0,
                        "positionY": 0,
                        "deviceScaleFactor": float(self.job['dpr']),
                        "mobile": True,
                        "screenOrientation": {
                            "angle": 0,
                            "type": "portraitPrimary"
                        }
                    },
                    wait=True)
                self.devtools.send_command(
                    "Emulation.setTouchEmulationEnabled", {
                        "enabled": True,
                        "configuration": "mobile"
                    },
                    wait=True)
                self.devtools.send_command("Emulation.setScrollbarsHidden",
                                           {"hidden": True},
                                           wait=True)
                if (task['running_lighthouse'] or not self.options.throttle
                    ) and 'throttle_cpu' in self.job:
                    logging.debug('CPU Throttle target: %0.3fx',
                                  self.job['throttle_cpu'])
                    if self.job['throttle_cpu'] > 1:
                        self.devtools.send_command(
                            "Emulation.setCPUThrottlingRate",
                            {"rate": self.job['throttle_cpu']},
                            wait=True)

            # Location
            if 'lat' in self.job and 'lng' in self.job:
                try:
                    lat = float(str(self.job['lat']))
                    lng = float(str(self.job['lng']))
                    self.devtools.send_command(
                        'Emulation.setGeolocationOverride', {
                            'latitude': lat,
                            'longitude': lng,
                            'accuracy': 0
                        })
                except Exception:
                    logging.exception('Error overriding location')

            # UA String
            ua_string = self.devtools.execute_js("navigator.userAgent")
            if ua_string is not None:
                match = re.search(r'Chrome\/(\d+\.\d+\.\d+\.\d+)', ua_string)
                if match:
                    self.browser_version = match.group(1)
            if 'uastring' in self.job:
                ua_string = self.job['uastring']
            if ua_string is not None and 'AppendUA' in task:
                ua_string += ' ' + task['AppendUA']
            if ua_string is not None:
                self.job['user_agent_string'] = ua_string
            # Disable js
            if self.job['noscript']:
                self.devtools.send_command(
                    "Emulation.setScriptExecutionDisabled", {"value": True},
                    wait=True)
            self.devtools.prepare_browser()

    def on_start_recording(self, task):
        """Start recording"""
        task['page_data'] = {'date': time.time()}
        task['page_result'] = None
        task['run_start_time'] = monotonic()
        if self.browser_version is not None and 'browserVersion' not in task[
                'page_data']:
            task['page_data']['browserVersion'] = self.browser_version
            task['page_data']['browser_version'] = self.browser_version
        if not self.options.throttle and 'throttle_cpu' in self.job:
            task['page_data']['throttle_cpu_requested'] = self.job[
                'throttle_cpu_requested']
            if self.job['throttle_cpu'] > 1:
                task['page_data']['throttle_cpu'] = self.job['throttle_cpu']
        if self.devtools is not None:
            self.devtools.start_recording()

    def on_stop_capture(self, task):
        """Do any quick work to stop things that are capturing data"""
        if self.devtools is not None:
            self.devtools.stop_capture()

    def on_stop_recording(self, task):
        """Stop recording"""
        if self.devtools is not None:
            self.devtools.collect_trace()
            if self.devtools_screenshot:
                if self.job['pngScreenShot']:
                    screen_shot = os.path.join(task['dir'],
                                               task['prefix'] + '_screen.png')
                    self.devtools.grab_screenshot(screen_shot, png=True)
                else:
                    screen_shot = os.path.join(task['dir'],
                                               task['prefix'] + '_screen.jpg')
                    self.devtools.grab_screenshot(screen_shot,
                                                  png=False,
                                                  resize=600)
            # Collect end of test data from the browser
            self.collect_browser_metrics(task)
            # Stop recording dev tools (which also collects the trace)
            self.devtools.stop_recording()

    def run_task(self, task):
        """Run an individual test"""
        if self.devtools is not None:
            self.task = task
            logging.debug("Running test")
            end_time = monotonic() + task['test_time_limit']
            task['current_step'] = 1
            recording = False
            while len(task['script']) and task['error'] is None and \
                    monotonic() < end_time:
                self.prepare_task(task)
                command = task['script'].pop(0)
                if not recording and command['record']:
                    recording = True
                    self.on_start_recording(task)
                self.process_command(command)
                if command['record']:
                    self.devtools.wait_for_page_load()
                    if not task['combine_steps'] or not len(task['script']):
                        self.on_stop_capture(task)
                        self.on_stop_recording(task)
                        recording = False
                        self.on_start_processing(task)
                        self.wait_for_processing(task)
                        self.process_devtools_requests(task)
                        self.step_complete(task)  #pylint: disable=no-member
                        if task['log_data']:
                            # Move on to the next step
                            task['current_step'] += 1
                            self.event_name = None
                    task['navigated'] = True
            self.task = None

    def on_start_processing(self, task):
        """Start any processing of the captured data"""
        if task['log_data']:
            # Start the processing that can run in a background thread
            optimization = OptimizationChecks(self.job, task,
                                              self.get_requests())
            optimization.start()
            # Run the video post-processing
            if self.use_devtools_video and self.job['video']:
                self.process_video()
            self.wappalyzer_detect(task, self.devtools.main_request_headers)
            # wait for the background optimization checks
            optimization.join()

    def wait_for_processing(self, task):
        """Wait for the background processing (if any)"""
        pass

    def execute_js(self, script):
        """Run javascipt"""
        ret = None
        if self.devtools is not None:
            ret = self.devtools.execute_js(script)
        return ret

    def prepare_task(self, task):
        """Format the file prefixes for multi-step testing"""
        if task['current_step'] == 1:
            task['prefix'] = task['task_prefix']
            task['video_subdirectory'] = task['task_video_prefix']
        else:
            task['prefix'] = '{0}_{1:d}'.format(task['task_prefix'],
                                                task['current_step'])
            task['video_subdirectory'] = '{0}_{1:d}'.format(
                task['task_video_prefix'], task['current_step'])
        if task['video_subdirectory'] not in task['video_directories']:
            task['video_directories'].append(task['video_subdirectory'])
        if self.event_name is not None:
            task['step_name'] = self.event_name
        else:
            task['step_name'] = 'Step_{0:d}'.format(task['current_step'])

    def process_video(self):
        """Post process the video"""
        from internal.video_processing import VideoProcessing
        video = VideoProcessing(self.options, self.job, self.task)
        video.process()

    def process_devtools_requests(self, task):
        """Process the devtools log and pull out the requests information"""
        path_base = os.path.join(self.task['dir'], self.task['prefix'])
        devtools_file = path_base + '_devtools.json.gz'
        if os.path.isfile(devtools_file):
            from internal.support.devtools_parser import DevToolsParser
            out_file = path_base + '_devtools_requests.json.gz'
            options = {
                'devtools': devtools_file,
                'cached': task['cached'],
                'out': out_file
            }
            netlog = path_base + '_netlog_requests.json.gz'
            options['netlog'] = netlog if os.path.isfile(netlog) else None
            optimization = path_base + '_optimization.json.gz'
            options['optimization'] = optimization if os.path.isfile(
                optimization) else None
            user_timing = path_base + '_user_timing.json.gz'
            options['user'] = user_timing if os.path.isfile(
                user_timing) else None
            coverage = path_base + '_coverage.json.gz'
            options['coverage'] = coverage if os.path.isfile(
                coverage) else None
            cpu = path_base + '_timeline_cpu.json.gz'
            options['cpu'] = cpu if os.path.isfile(cpu) else None
            v8stats = path_base + '_v8stats.json.gz'
            options['v8stats'] = v8stats if os.path.isfile(v8stats) else None
            parser = DevToolsParser(options)
            parser.process()
            # Cleanup intermediate files that are not needed
            if 'debug' not in self.job or not self.job['debug']:
                if os.path.isfile(netlog):
                    os.remove(netlog)
                if os.path.isfile(optimization):
                    os.remove(optimization)
                if os.path.isfile(coverage):
                    os.remove(coverage)
                if os.path.isfile(devtools_file):
                    os.remove(devtools_file)
            if 'page_data' in parser.result and 'result' in parser.result[
                    'page_data']:
                self.task['page_result'] = parser.result['page_data']['result']

    def run_js_file(self, file_name):
        """Execute one of our js scripts"""
        ret = None
        script = None
        script_file_path = os.path.join(self.script_dir, file_name)
        if os.path.isfile(script_file_path):
            with io.open(script_file_path, 'r',
                         encoding='utf-8') as script_file:
                script = script_file.read()
        if script is not None:
            ret = self.devtools.execute_js(script)
        return ret

    def collect_browser_metrics(self, task):
        """Collect all of the in-page browser metrics that we need"""
        user_timing = self.run_js_file('user_timing.js')
        if user_timing is not None:
            path = os.path.join(task['dir'],
                                task['prefix'] + '_timed_events.json.gz')
            with gzip.open(path, GZIP_TEXT, 7) as outfile:
                outfile.write(json.dumps(user_timing))
        page_data = self.run_js_file('page_data.js')
        if page_data is not None:
            task['page_data'].update(page_data)
        if 'customMetrics' in self.job:
            custom_metrics = {}
            for name in self.job['customMetrics']:
                script = 'var wptCustomMetric = function() {' +\
                         self.job['customMetrics'][name] +\
                         '};try{wptCustomMetric();}catch(e){};'
                custom_metrics[name] = self.devtools.execute_js(script)
            path = os.path.join(task['dir'],
                                task['prefix'] + '_metrics.json.gz')
            with gzip.open(path, GZIP_TEXT, 7) as outfile:
                outfile.write(json.dumps(custom_metrics))
        if 'heroElementTimes' in self.job and self.job['heroElementTimes']:
            hero_elements = None
            custom_hero_selectors = {}
            if 'heroElements' in self.job:
                custom_hero_selectors = self.job['heroElements']
            with io.open(os.path.join(self.script_dir, 'hero_elements.js'),
                         'r',
                         encoding='utf-8') as script_file:
                hero_elements_script = script_file.read()
            script = hero_elements_script + '(' + json.dumps(
                custom_hero_selectors) + ')'
            hero_elements = self.devtools.execute_js(script)
            if hero_elements is not None:
                logging.debug('Hero Elements: %s', json.dumps(hero_elements))
                path = os.path.join(task['dir'],
                                    task['prefix'] + '_hero_elements.json.gz')
                with gzip.open(path, GZIP_TEXT, 7) as outfile:
                    outfile.write(json.dumps(hero_elements))

    def process_command(self, command):
        """Process an individual script command"""
        logging.debug("Processing script command:")
        logging.debug(command)
        if command['command'] == 'navigate':
            self.task['page_data']['URL'] = command['target']
            url = str(command['target']).replace('"', '\"')
            script = 'window.location="{0}";'.format(url)
            script = self.prepare_script_for_record(script)  #pylint: disable=no-member
            self.devtools.start_navigating()
            self.devtools.execute_js(script)
        elif command['command'] == 'logdata':
            self.task['combine_steps'] = False
            if int(re.search(r'\d+', str(command['target'])).group()):
                logging.debug("Data logging enabled")
                self.task['log_data'] = True
            else:
                logging.debug("Data logging disabled")
                self.task['log_data'] = False
        elif command['command'] == 'combinesteps':
            self.task['log_data'] = True
            self.task['combine_steps'] = True
        elif command['command'] == 'seteventname':
            self.event_name = command['target']
        elif command['command'] == 'exec':
            script = command['target']
            if command['record']:
                script = self.prepare_script_for_record(script)  #pylint: disable=no-member
                self.devtools.start_navigating()
            self.devtools.execute_js(script)
        elif command['command'] == 'sleep':
            delay = min(
                60,
                max(0, int(re.search(r'\d+', str(command['target'])).group())))
            if delay > 0:
                time.sleep(delay)
        elif command['command'] == 'setabm':
            self.task['stop_at_onload'] = bool('target' in command and int(
                re.search(r'\d+', str(command['target'])).group()) == 0)
        elif command['command'] == 'setactivitytimeout':
            if 'target' in command:
                milliseconds = int(
                    re.search(r'\d+', str(command['target'])).group())
                self.task['activity_time'] = max(
                    0, min(30,
                           float(milliseconds) / 1000.0))
        elif command['command'] == 'setuseragent':
            self.task['user_agent_string'] = command['target']
        elif command['command'] == 'setcookie':
            if 'target' in command and 'value' in command:
                url = command['target'].strip()
                cookie = command['value']
                pos = cookie.find(';')
                if pos > 0:
                    cookie = cookie[:pos]
                pos = cookie.find('=')
                if pos > 0:
                    name = cookie[:pos].strip()
                    value = cookie[pos + 1:].strip()
                    if len(name) and len(value) and len(url):
                        self.devtools.send_command('Network.setCookie', {
                            'url': url,
                            'name': name,
                            'value': value
                        })
        elif command['command'] == 'setlocation':
            try:
                if 'target' in command and command['target'].find(',') > 0:
                    accuracy = 0
                    if 'value' in command and re.match(r'\d+',
                                                       command['value']):
                        accuracy = int(
                            re.search(r'\d+', str(command['value'])).group())
                    parts = command['target'].split(',')
                    lat = float(parts[0])
                    lng = float(parts[1])
                    self.devtools.send_command(
                        'Emulation.setGeolocationOverride', {
                            'latitude': lat,
                            'longitude': lng,
                            'accuracy': accuracy
                        })
            except Exception:
                logging.exception('Error setting location')
        elif command['command'] == 'addheader':
            self.devtools.set_header(command['target'])
        elif command['command'] == 'setheader':
            self.devtools.set_header(command['target'])
        elif command['command'] == 'resetheaders':
            self.devtools.reset_headers()
        elif command['command'] == 'clearcache':
            self.devtools.clear_cache()
        elif command['command'] == 'disablecache':
            disable_cache = bool('target' in command and \
                                 int(re.search(r'\d+',
                                               str(command['target'])).group()) == 1)
            self.devtools.disable_cache(disable_cache)

    def navigate(self, url):
        """Navigate to the given URL"""
        if self.devtools is not None:
            self.devtools.send_command('Page.navigate', {'url': url},
                                       wait=True)

    def get_requests(self):
        """Get the request details for running an optimization check"""
        requests = None
        if self.devtools is not None:
            requests = self.devtools.get_requests()
        return requests

    def lighthouse_thread(self):
        """Run lighthouse in a thread so we can kill it if it times out"""
        cmd = self.lighthouse_command
        self.task['lighthouse_log'] = cmd + "\n"
        logging.debug(cmd)
        proc = subprocess.Popen(cmd, shell=True, stderr=subprocess.PIPE)
        for line in iter(proc.stderr.readline, b''):
            try:
                line = unicode(line)
                logging.debug(line.rstrip())
                self.task['lighthouse_log'] += line
            except Exception:
                logging.exception('Error recording lighthouse log')
        proc.communicate()

    def run_lighthouse_test(self, task):
        """Run a lighthouse test against the current browser session"""
        task['lighthouse_log'] = ''
        if 'url' in self.job and self.job['url'] is not None:
            self.job['shaper'].configure(self.job, task)
            output_path = os.path.join(task['dir'], 'lighthouse.json')
            json_file = os.path.join(task['dir'], 'lighthouse.report.json')
            json_gzip = os.path.join(task['dir'], 'lighthouse.json.gz')
            html_file = os.path.join(task['dir'], 'lighthouse.report.html')
            html_gzip = os.path.join(task['dir'], 'lighthouse.html.gz')
            time_limit = min(int(task['time_limit']), 80)
            command = [
                'lighthouse', '"{0}"'.format(self.job['url']), '--channel',
                'wpt', '--disable-network-throttling',
                '--disable-cpu-throttling', '--throttling-method', 'provided',
                '--enable-error-reporting', '--max-wait-for-load',
                str(int(time_limit * 1000)), '--port',
                str(task['port']), '--output', 'html', '--output', 'json',
                '--output-path', '"{0}"'.format(output_path)
            ]
            if self.job['keep_lighthouse_trace']:
                command.append('--save-assets')
            if not self.job['keep_lighthouse_screenshots']:
                command.extend(['--skip-audits', 'screenshot-thumbnails'])
            if self.options.android or 'mobile' not in self.job or not self.job[
                    'mobile']:
                command.extend(['--emulated-form-factor', 'none'])
                if 'user_agent_string' in self.job:
                    sanitized_user_agent = re.sub(
                        r'[^a-zA-Z0-9_\-.;:/()\[\] ]+', '',
                        self.job['user_agent_string'])
                    command.append(
                        '--chrome-flags="--user-agent=\'{0}\'"'.format(
                            sanitized_user_agent))
            if len(task['block']):
                for pattern in task['block']:
                    pattern = "'" + pattern.replace("'", "'\\''") + "'"
                    command.extend(['--blocked-url-patterns', pattern])
            if 'headers' in task:
                headers_file = os.path.join(task['dir'],
                                            'lighthouse-headers.json')
                with io.open(headers_file, 'w', encoding='utf-8') as f_out:
                    json.dump(task['headers'], f_out)
                command.extend(
                    ['--extra-headers', '"{0}"'.format(headers_file)])
            cmd = ' '.join(command)
            self.lighthouse_command = cmd
            # Give lighthouse up to 10 minutes to run all of the audits
            try:
                lh_thread = threading.Thread(target=self.lighthouse_thread)
                lh_thread.start()
                lh_thread.join(600)
            except Exception:
                logging.exception('Error running lighthouse audits')
            from .os_util import kill_all
            kill_all('node', True)
            self.job['shaper'].reset()
            # Rename and compress the trace file, delete the other assets
            if self.job['keep_lighthouse_trace']:
                try:
                    lh_trace_src = os.path.join(task['dir'],
                                                'lighthouse-0.trace.json')
                    if os.path.isfile(lh_trace_src):
                        # read the JSON in and re-write it line by line to match the other traces
                        with io.open(lh_trace_src, 'r',
                                     encoding='utf-8') as f_in:
                            trace = json.load(f_in)
                            if trace is not None and 'traceEvents' in trace:
                                lighthouse_trace = os.path.join(
                                    task['dir'], 'lighthouse_trace.json.gz')
                            with gzip.open(lighthouse_trace, GZIP_TEXT,
                                           7) as f_out:
                                f_out.write('{"traceEvents":[{}')
                                for trace_event in trace['traceEvents']:
                                    f_out.write(",\n")
                                    f_out.write(json.dumps(trace_event))
                                f_out.write("\n]}")
                except Exception:
                    logging.exception('Error processing lighthouse trace')
            # Delete all the left-over lighthouse assets
            files = glob.glob(os.path.join(task['dir'], 'lighthouse-*'))
            for file_path in files:
                try:
                    os.remove(file_path)
                except Exception:
                    pass
            if os.path.isfile(json_file):
                lh_report = None
                with io.open(json_file, 'r', encoding='utf-8') as f_in:
                    lh_report = json.load(f_in)

                with open(json_file, 'rb') as f_in:
                    with gzip.open(json_gzip, 'wb', 7) as f_out:
                        shutil.copyfileobj(f_in, f_out)
                try:
                    os.remove(json_file)
                except Exception:
                    pass
                # Extract the audit scores
                if lh_report is not None:
                    audits = {}
                    # v1.x
                    if 'aggregations' in lh_report:
                        for entry in lh_report['aggregations']:
                            if 'name' in entry and 'total' in entry and \
                                    'scored' in entry and entry['scored']:
                                name = entry['name'].replace(' ', '')
                                audits[name] = entry['total']
                    # v2.x
                    elif 'reportCategories' in lh_report:
                        for category in lh_report['reportCategories']:
                            if 'name' in category and 'score' in category:
                                category_name = category['name'].replace(
                                    ' ', '')
                                score = float(category['score']) / 100.0
                                audits[category_name] = score
                                if category[
                                        'name'] == 'Performance' and 'audits' in category:
                                    for audit in category['audits']:
                                        if 'id' in audit and 'group' in audit and \
                                                audit['group'] == 'perf-metric' and \
                                                'result' in audit and \
                                                'rawValue' in audit['result']:
                                            name = category_name + '.' + \
                                                audit['id'].replace(' ', '')
                                            audits[name] = audit['result'][
                                                'rawValue']
                    # v3.x
                    elif 'categories' in lh_report:
                        for categoryId in lh_report['categories']:
                            category = lh_report['categories'][categoryId]
                            if 'title' not in category or 'score' not in category:
                                continue

                            category_title = category['title'].replace(' ', '')
                            audits[category_title] = category['score']

                            if categoryId != 'performance' or 'auditRefs' not in category:
                                continue

                            for auditRef in category['auditRefs']:
                                if auditRef['id'] not in lh_report['audits']:
                                    continue
                                if 'group' not in auditRef or auditRef[
                                        'group'] != 'metrics':
                                    continue
                                audit = lh_report['audits'][auditRef['id']]
                                name = category_title + '.' + audit['id']
                                if 'rawValue' in audit:
                                    audits[name] = audit['rawValue']
                                elif 'numericValue' in audit:
                                    audits[name] = audit['numericValue']
                    audits_gzip = os.path.join(task['dir'],
                                               'lighthouse_audits.json.gz')
                    with gzip.open(audits_gzip, GZIP_TEXT, 7) as f_out:
                        json.dump(audits, f_out)
            # Compress the HTML lighthouse report
            if os.path.isfile(html_file):
                try:
                    with open(html_file, 'rb') as f_in:
                        with gzip.open(html_gzip, 'wb', 7) as f_out:
                            shutil.copyfileobj(f_in, f_out)
                    os.remove(html_file)
                except Exception:
                    logging.exception('Error compressing lighthouse report')

    def wappalyzer_detect(self, task, request_headers):
        """Run the wappalyzer detection"""
        # Run the Wappalyzer detection (give it 30 seconds at most)
        completed = False
        if self.devtools is not None:
            try:
                logging.debug('wappalyzer_detect')
                detect_script = self.wappalyzer_script(request_headers)
                response = self.devtools.send_command("Runtime.evaluate", {
                    'expression': detect_script,
                    'awaitPromise': True,
                    'returnByValue': True,
                    'timeout': 30000
                },
                                                      wait=True,
                                                      timeout=30)
                if response is not None and 'result' in response and\
                        'result' in response['result'] and\
                        'value' in response['result']['result']:
                    result = response['result']['result']['value']
                    if result:
                        completed = True
                        logging.debug(result)
                        detected = json.loads(result)
                        if 'categories' in detected:
                            task['page_data']['detected'] = dict(
                                detected['categories'])
                        if 'apps' in detected:
                            task['page_data']['detected_apps'] = dict(
                                detected['apps'])
            except Exception as err:
                logging.exception("Exception running Wappalyzer: %s",
                                  err.__str__())
        if not completed:
            task['page_data']['wappalyzer_failed'] = 1

    def wappalyzer_script(self, response_headers):
        """Build the wappalyzer script to run in-browser"""
        script = None
        try:
            with open(
                    os.path.join(self.support_path, 'Wappalyzer',
                                 'script.js')) as f_in:
                script = f_in.read()
            if script is not None:
                wappalyzer = None
                with open(
                        os.path.join(self.support_path, 'Wappalyzer',
                                     'wappalyzer.js')) as f_in:
                    wappalyzer = f_in.read()
                if wappalyzer is not None:
                    json_data = None
                    with open(
                            os.path.join(self.support_path, 'Wappalyzer',
                                         'apps.json')) as f_in:
                        json_data = f_in.read()
                    if json is not None:
                        # Format the headers as a dictionary of lists
                        headers = {}
                        if response_headers is not None:
                            if isinstance(response_headers, dict):
                                for key in response_headers:
                                    values = []
                                    entry = response_headers[key]
                                    if isinstance(entry, list):
                                        values = entry
                                    elif isinstance(entry, (str, unicode)):
                                        entries = entry.split('\n')
                                        for value in entries:
                                            values.append(value.strip())
                                    if values:
                                        headers[key.lower()] = values
                            elif isinstance(response_headers, list):
                                for pair in response_headers:
                                    if isinstance(pair, (str, unicode)):
                                        parts = pair.split(':', 1)
                                        key = parts[0].strip(' :\n\t').lower()
                                        value = parts[1].strip(' :\n\t')
                                        if key not in headers:
                                            headers[key] = []
                                        headers[key].append(value)
                        script = script.replace('%WAPPALYZER%', wappalyzer)
                        script = script.replace('%JSON%', json_data)
                        script = script.replace('%RESPONSE_HEADERS%',
                                                json.dumps(headers))
        except Exception:
            logging.exception('Error building wappalyzer script')
        return script
class DevtoolsBrowser(object):
    """Devtools Browser base"""
    CONNECT_TIME_LIMIT = 30
    CURRENT_VERSION = 1

    def __init__(self, options, job, use_devtools_video=True):
        self.options = options
        self.job = job
        self.devtools = None
        self.task = None
        self.event_name = None
        self.browser_version = None
        self.use_devtools_video = use_devtools_video
        self.support_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'support')
        self.script_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'js')

    def connect(self, task):
        """Connect to the dev tools interface"""
        ret = False
        from internal.devtools import DevTools
        self.devtools = DevTools(self.options, self.job, task, self.use_devtools_video)
        if task['running_lighthouse']:
            self.devtools.wait_for_available(self.CONNECT_TIME_LIMIT)
        else:
            if self.devtools.connect(self.CONNECT_TIME_LIMIT):
                logging.debug("Devtools connected")
                ret = True
            else:
                task['error'] = "Error connecting to dev tools interface"
                logging.critical(task['error'])
                self.devtools = None
        return ret

    def disconnect(self):
        """Disconnect from dev tools"""
        if self.devtools is not None:
            self.devtools.close()
            self.devtools = None

    def prepare_browser(self, task):
        """Prepare the running browser (mobile emulation, UA string, etc"""
        if self.devtools is not None:
            # Clear the caches
            if not task['cached']:
                self.devtools.send_command("Network.clearBrowserCache", {},
                                           wait=True)
                self.devtools.send_command("Network.clearBrowserCookies", {},
                                           wait=True)

            # Mobile Emulation
            if not self.options.android and \
                    'mobile' in self.job and self.job['mobile'] and \
                    'width' in self.job and 'height' in self.job and \
                    'dpr' in self.job:
                self.devtools.send_command("Emulation.setDeviceMetricsOverride",
                                           {"width":
                                            int(re.search(r'\d+',
                                                          str(self.job['width'])).group()),
                                            "height":
                                            int(re.search(r'\d+',
                                                          str(self.job['height'])).group()),
                                            "screenWidth":
                                            int(re.search(r'\d+',
                                                          str(self.job['width'])).group()),
                                            "screenHeight":
                                            int(re.search(r'\d+',
                                                          str(self.job['height'])).group()),
                                            "scale": 1,
                                            "positionX": 0,
                                            "positionY": 0,
                                            "deviceScaleFactor": float(self.job['dpr']),
                                            "mobile": True,
                                            "fitWindow": False},
                                           wait=True)
                self.devtools.send_command("Emulation.setVisibleSize",
                                           {"width":
                                            int(re.search(r'\d+',
                                                          str(self.job['width'])).group()),
                                            "height":
                                            int(re.search(r'\d+',
                                                          str(self.job['height'])).group())},
                                           wait=True)
            # UA String
            ua_string = self.devtools.execute_js("navigator.userAgent")
            if ua_string is not None:
                match = re.search(r'Chrome\/(\d+\.\d+\.\d+\.\d+)', ua_string)
                if match:
                    self.browser_version = match.group(1)
            if 'uastring' in self.job:
                ua_string = self.job['uastring']
            if ua_string is not None and 'keepua' not in self.job or not self.job['keepua']:
                ua_string += ' PTST/{0:d}'.format(self.CURRENT_VERSION)
            if ua_string is not None:
                self.job['user_agent_string'] = ua_string
            # Disable js
            if self.job['noscript']:
                self.devtools.send_command("Emulation.setScriptExecutionDisabled",
                                           {"value": True}, wait=True)

    def on_start_recording(self, task):
        """Start recording"""
        if self.browser_version is not None and 'browserVersion' not in task['page_data']:
            task['page_data']['browserVersion'] = self.browser_version
        if self.devtools is not None:
            self.devtools.start_recording()

    def on_stop_recording(self, task):
        """Stop recording"""
        if self.devtools is not None:
            if self.job['pngss']:
                screen_shot = os.path.join(task['dir'],
                                           task['prefix'] + '_screen.png')
                self.devtools.grab_screenshot(screen_shot, png=True)
            else:
                screen_shot = os.path.join(task['dir'],
                                           task['prefix'] + '_screen.jpg')
                self.devtools.grab_screenshot(screen_shot, png=False, resize=600)
            # Collect end of test data from the browser
            self.collect_browser_metrics(task)
            # Stop recording dev tools (which also collects the trace)
            self.devtools.stop_recording()

    def run_task(self, task):
        """Run an individual test"""
        if self.devtools is not None:
            self.task = task
            logging.debug("Running test")
            end_time = monotonic.monotonic() + task['time_limit']
            task['current_step'] = 1
            recording = False
            while len(task['script']) and task['error'] is None and \
                    monotonic.monotonic() < end_time:
                self.prepare_task(task)
                command = task['script'].pop(0)
                if not recording and command['record']:
                    recording = True
                    self.on_start_recording(task)
                self.process_command(command)
                if command['record']:
                    self.devtools.wait_for_page_load()
                    if not task['combine_steps'] or not len(task['script']):
                        self.on_stop_recording(task)
                        recording = False
                        self.on_start_processing(task)
                        self.wait_for_processing(task)
                        if task['log_data']:
                            # Move on to the next step
                            task['current_step'] += 1
                            self.event_name = None
            # Always navigate to about:blank after finishing in case the tab is
            # remembered across sessions
            self.devtools.send_command('Page.navigate', {'url': 'about:blank'}, wait=True)
            self.task = None

    def on_start_processing(self, task):
        """Start any processing of the captured data"""
        if task['log_data']:
            # Start the processing that can run in a background thread
            optimization = OptimizationChecks(self.job, task, self.get_requests())
            optimization.start()
            trace_thread = threading.Thread(target=self.process_trace)
            trace_thread.start()
            # Run the rest of the post-processing
            if self.use_devtools_video and  self.job['video']:
                self.process_video()
            # gzip the trace file
            logging.debug('Compressing the trace file')
            trace_file = os.path.join(task['dir'],
                                      task['prefix'] + '_trace.json')
            trace_gzip = trace_file + '.gz'
            if os.path.isfile(trace_file):
                with open(trace_file, 'rb') as f_in:
                    with gzip.open(trace_gzip, 'wb', 7) as f_out:
                        shutil.copyfileobj(f_in, f_out)
            logging.debug('Waiting for trace processing to complete')
            trace_thread.join()
            if os.path.isfile(trace_gzip) and os.path.isfile(trace_file):
                try:
                    os.remove(trace_file)
                except Exception:
                    pass
            optimization.join()

    def wait_for_processing(self, task):
        """Stub for override"""
        pass

    def prepare_task(self, task):
        """Format the file prefixes for multi-step testing"""
        task['page_data'] = {}
        if task['current_step'] == 1:
            task['prefix'] = task['task_prefix']
            task['video_subdirectory'] = task['task_video_prefix']
        else:
            task['prefix'] = '{0}_{1:d}'.format(task['task_prefix'], task['current_step'])
            task['video_subdirectory'] = '{0}_{1:d}'.format(task['task_video_prefix'],
                                                            task['current_step'])
        if task['video_subdirectory'] not in task['video_directories']:
            task['video_directories'].append(task['video_subdirectory'])
        if self.event_name is not None:
            task['step_name'] = self.event_name
        else:
            task['step_name'] = 'Step_{0:d}'.format(task['current_step'])

    def process_video(self):
        """Post process the video"""
        from internal.video_processing import VideoProcessing
        video = VideoProcessing(self.job, self.task)
        video.process()

    def process_trace(self):
        """Post-process the trace file"""
        path_base = os.path.join(self.task['dir'], self.task['prefix'])
        trace_file = path_base + '_trace.json'
        if os.path.isfile(trace_file):
            user_timing = path_base + '_user_timing.json.gz'
            cpu_slices = path_base + '_timeline_cpu.json.gz'
            script_timing = path_base + '_script_timing.json.gz'
            feature_usage = path_base + '_feature_usage.json.gz'
            interactive = path_base + '_interactive.json.gz'
            netlog = path_base + '_netlog_requests.json.gz'
            v8_stats = path_base + '_v8stats.json.gz'
            trace_parser = os.path.join(self.support_path, "trace-parser.py")
            cmd = ['python', trace_parser, '-t', trace_file, '-u', user_timing,
                   '-c', cpu_slices, '-j', script_timing, '-f', feature_usage,
                   '-i', interactive, '-n', netlog, '-s', v8_stats]
            logging.debug(cmd)
            start = monotonic.monotonic()
            subprocess.call(cmd)
            elapsed = monotonic.monotonic() - start
            logging.debug("Time to process trace: %0.3f sec", elapsed)
            # delete the trace file if it wasn't requested
            trace_enabled = bool('trace' in self.job and self.job['trace'])
            timeline_enabled = bool('timeline' in self.job and self.job['timeline'])
            if not trace_enabled and not timeline_enabled:
                try:
                    os.remove(trace_file)
                except Exception:
                    pass

    def run_js_file(self, file_name):
        """Execute one of our js scripts"""
        ret = None
        script = None
        script_file_path = os.path.join(self.script_dir, file_name)
        if os.path.isfile(script_file_path):
            with open(script_file_path, 'rb') as script_file:
                script = script_file.read()
        if script is not None:
            ret = self.devtools.execute_js(script)
        return ret

    def collect_browser_metrics(self, task):
        """Collect all of the in-page browser metrics that we need"""
        user_timing = self.run_js_file('user_timing.js')
        if user_timing is not None:
            path = os.path.join(task['dir'], task['prefix'] + '_timed_events.json.gz')
            with gzip.open(path, 'wb', 7) as outfile:
                outfile.write(json.dumps(user_timing))
        page_data = self.run_js_file('page_data.js')
        if page_data is not None:
            task['page_data'].update(page_data)
        if 'customMetrics' in self.job:
            custom_metrics = {}
            for name in self.job['customMetrics']:
                script = 'var wptCustomMetric = function() {' +\
                         self.job['customMetrics'][name] +\
                         '};try{wptCustomMetric();}catch(e){};'
                custom_metrics[name] = self.devtools.execute_js(script)
            path = os.path.join(task['dir'], task['prefix'] + '_metrics.json.gz')
            with gzip.open(path, 'wb', 7) as outfile:
                outfile.write(json.dumps(custom_metrics))

    def process_command(self, command):
        """Process an individual script command"""
        logging.debug("Processing script command:")
        logging.debug(command)
        if command['command'] == 'navigate':
            self.devtools.start_navigating()
            self.devtools.send_command('Page.navigate', {'url': command['target']})
        elif command['command'] == 'logdata':
            self.task['combine_steps'] = False
            if int(re.search(r'\d+', str(command['target'])).group()):
                logging.debug("Data logging enabled")
                self.task['log_data'] = True
            else:
                logging.debug("Data logging disabled")
                self.task['log_data'] = False
        elif command['command'] == 'combinesteps':
            self.task['log_data'] = True
            self.task['combine_steps'] = True
        elif command['command'] == 'seteventname':
            self.event_name = command['target']
        elif command['command'] == 'exec':
            if command['record']:
                self.devtools.start_navigating()
            self.devtools.execute_js(command['target'])
        elif command['command'] == 'sleep':
            delay = min(60, max(0, int(re.search(r'\d+', str(command['target'])).group())))
            if delay > 0:
                time.sleep(delay)
        elif command['command'] == 'setabm':
            self.task['stop_at_onload'] = bool('target' in command and \
                                               int(re.search(r'\d+',
                                                             str(command['target'])).group()) == 0)
        elif command['command'] == 'setactivitytimeout':
            if 'target' in command:
                self.task['activity_time'] = \
                    max(0, min(30, int(re.search(r'\d+', str(command['target'])).group())))
        elif command['command'] == 'setuseragent':
            self.task['user_agent_string'] = command['target']
        elif command['command'] == 'setcookie':
            if 'target' in command and 'value' in command:
                url = command['target'].strip()
                cookie = command['value']
                pos = cookie.find(';')
                if pos > 0:
                    cookie = cookie[:pos]
                pos = cookie.find('=')
                if pos > 0:
                    name = cookie[:pos].strip()
                    value = cookie[pos+1:].strip()
                    if len(name) and len(value) and len(url):
                        self.devtools.send_command('Network.setCookie',
                                                   {'url': url, 'name': name, 'value': value})

    def navigate(self, url):
        """Navigate to the given URL"""
        if self.devtools is not None:
            self.devtools.send_command('Page.navigate', {'url': url}, wait=True)

    def get_requests(self):
        """Get the request details for running an optimization check"""
        requests = None
        if self.devtools is not None:
            requests = self.devtools.get_requests()
        return requests

    def run_lighthouse_test(self, task):
        """Run a lighthouse test against the current browser session"""
        from threading import Timer
        if 'url' in self.job and self.job['url'] is not None:
            output_path = os.path.join(task['dir'], 'lighthouse.json')
            json_file = os.path.join(task['dir'], 'lighthouse.report.json')
            json_gzip = os.path.join(task['dir'], 'lighthouse.json.gz')
            html_file = os.path.join(task['dir'], 'lighthouse.report.html')
            html_gzip = os.path.join(task['dir'], 'lighthouse.html.gz')
            command = ['lighthouse',
                       '--disable-network-throttling',
                       '--port', str(task['port']),
                       '--output', 'html',
                       '--output', 'json',
                       '--output-path', '"{0}"'.format(output_path)]
            if self.options.android or 'mobile' not in self.job or not self.job['mobile']:
                command.extend(['--disable-device-emulation', '--disable-cpu-throttling'])
            command.append('"{0}"'.format(self.job['url']))
            cmd = ' '.join(command)
            logging.debug(cmd)
            # Give lighthouse up to 10 minutes to run (safety for hung test)
            proc = subprocess.Popen(cmd, shell=True)
            timer = None
            try:
                timer = Timer(600, proc.kill)
                timer.start()
                proc.communicate()
            except Exception:
                logging.debug('Timeout running lighthouse test')
            finally:
                if timer is not None:
                    timer.cancel()
            if os.path.isfile(json_file):
                # Remove the raw screenshots if they were stored with the file
                lh_report = None
                with open(json_file, 'rb') as f_in:
                    lh_report = json.load(f_in)
                if lh_report is not None and 'audits' in lh_report and \
                        'screenshots' in lh_report['audits']:
                    del lh_report['audits']['screenshots']
                    with gzip.open(json_gzip, 'wb', 7) as f_out:
                        json.dump(lh_report, f_out)
                else:
                    with open(json_file, 'rb') as f_in:
                        with gzip.open(json_gzip, 'wb', 7) as f_out:
                            shutil.copyfileobj(f_in, f_out)
                try:
                    os.remove(json_file)
                except Exception:
                    pass
            if os.path.isfile(html_file):
                # Remove the raw screenshots if they were stored with the file
                with open(html_file, 'rb') as f_in:
                    lh_report = f_in.read()
                    start = lh_report.find('\n    &quot;screenshots')
                    if start >= 0:
                        end = lh_report.find('\n    },', start)
                        if end >= 0:
                            lh_report = lh_report[:start] + lh_report[end + 7:]
                    with gzip.open(html_gzip, 'wb', 7) as f_out:
                        f_out.write(lh_report)
                try:
                    os.remove(html_file)
                except Exception:
                    pass