Ejemplo n.º 1
0
    def _check_failure_status(self):
        """ Check the status of command failures. Raise exceptions as necessary

        The failure status property is used by the various asynchronous
        command execution threads which interface with the
        remote browser manager processes. If a failure status is found, the
        appropriate steps are taken to gracefully close the infrastructure
        """
        self.logger.debug("Checking command failure status indicator...")
        if self.failure_status:
            self.logger.debug(
                "TaskManager failure status set, halting command execution.")
            self._cleanup_before_fail()
            if self.failure_status['ErrorType'] == 'ExceedCommandFailureLimit':
                raise CommandExecutionError(
                    "TaskManager exceeded maximum consecutive command "
                    "execution failures.",
                    self.failure_status['CommandSequence'])
            elif self.failure_status[
                    'ErrorType'] == 'ExceedLaunchFailureLimit':
                raise CommandExecutionError(
                    "TaskManager failed to launch browser within allowable "
                    "failure limit.", self.failure_status['CommandSequence'])
            if self.failure_status['ErrorType'] == 'CriticalChildException':
                reraise(*cPickle.loads(self.failure_status['Exception']))
Ejemplo n.º 2
0
 def _gracefully_fail(self, msg, command):
     """
     Execute shutdown commands before throwing error
     <msg>: an Exception will be raised with this message
     """
     self._shutdown_manager(failure=True)
     raise CommandExecutionError(msg, command)
Ejemplo n.º 3
0
    def screenshot_full_page(self, suffix='', timeout=30):
        """Save a screenshot of the entire page.

        NOTE: geckodriver v0.15 only supports viewport screenshots. To
        screenshot the entire page we scroll the page using javascript and take
        a viewport screenshot at each location. This method will save the
        parts and a stitched version in the `screenshot_path`. We only scroll
        vertically, so pages that are wider than the viewport will be clipped.
        See: https://github.com/mozilla/geckodriver/issues/570

        The screenshot produced will only include the area originally
        loaded at the start of the command. Sites which dynamically expand as
        the page is scrolled (i.e. infinite scroll) will only go as far as the
        original height.

        NOTE: In geckodriver v0.15 doing any scrolling (or having devtools
        open) seems to break element-only screenshots. So using this command
        will cause any future element-only screenshots to be mis-aligned
        """
        self.total_timeout += timeout
        if not self.contains_get_or_browse:
            raise CommandExecutionError(
                "No get or browse request preceding "
                "the dump page source command", self)
        command = ('SCREENSHOT_FULL_PAGE', suffix)
        self.commands_with_timeout.append((command, timeout))
Ejemplo n.º 4
0
    def _start_thread(self, browser, command_sequence, condition=None):
        """  starts the command execution thread """

        # Check status flags before starting thread
        if self.closing:
            self.logger.error(
                "Attempted to execute command on a closed TaskManager")
            return
        if self.failure_flag:
            self.logger.debug(
                "TaskManager failure threshold exceeded, raising CommandExecutionError"
            )
            self._cleanup_before_fail()
            raise CommandExecutionError(
                "TaskManager failure threshold exceeded", command_sequence)

        browser.set_visit_id(self.next_visit_id)
        self.sock.send((
            "INSERT INTO site_visits (visit_id, crawl_id, site_url) VALUES (?,?,?)",
            (self.next_visit_id, browser.crawl_id, command_sequence.url)))
        self.next_visit_id += 1

        # Start command execution thread
        args = (browser, command_sequence, condition)
        thread = threading.Thread(target=self._issue_command, args=args)
        browser.command_thread = thread
        thread.daemon = True
        thread.start()
Ejemplo n.º 5
0
    def recursive_dump_page_source(self, suffix='', timeout=30):
        """Dumps rendered source of current page visit to 'sources' dir.

        Unlike `dump_page_source`, this includes iframe sources. Archive is
        stored in `manager_params['source_dump_path']` and is keyed by the
        current `visit_id` and top-level url. The source dump is a gzipped json
        file with the following structure:

        {
            'document_url': "http://example.com",
            'source': "<html> ... </html>",
            'iframes': {
                'frame_1': {'document_url': ...,
                            'source': ...,
                            'iframes: { ... }},
                'frame_2': {'document_url': ...,
                            'source': ...,
                            'iframes: { ... }},
                'frame_3': { ... }
            }
        }
        """
        self.total_timeout += timeout
        if not self.contains_get_or_browse:
            raise CommandExecutionError(
                "No get or browse request preceding "
                "the dump page source command", self)
        command = ('RECURSIVE_DUMP_PAGE_SOURCE', suffix)
        self.commands_with_timeout.append((command, timeout))
Ejemplo n.º 6
0
 def extract_links(self, timeout=30):
     """Extracts links found on web page and dumps them externally"""
     self.total_timeout += timeout
     if not self.contains_get_or_browse:
         raise CommandExecutionError("No get or browse request preceding "
                                     "the dump storage vectors command", self)
     command = ('EXTRACT_LINKS',)
     self.commands_with_timeout.append((command, timeout))
Ejemplo n.º 7
0
 def dump_flash_cookies(self, timeout=60):
     """ dumps the local storage vectors (flash, localStorage, cookies) to db """
     self.total_timeout += timeout
     if not self.contains_get_or_browse:
         raise CommandExecutionError("No get or browse request preceding "
                                     "the dump storage vectors command", self)
     command = ('DUMP_FLASH_COOKIES',)
     self.commands_with_timeout.append((command, timeout))
Ejemplo n.º 8
0
 def run_custom_function(self, function_handle, func_args=(), timeout=30):
     """Run a custom by passing the function handle"""
     self.total_timeout += timeout
     if not self.contains_get_or_browse:
         raise CommandExecutionError("No get or browse request preceding "
                                     "the dump page source command", self)
     command = ('RUN_CUSTOM_FUNCTION', function_handle, func_args)
     self.commands_with_timeout.append((command, timeout))
Ejemplo n.º 9
0
 def dump_page_source(self, dump_name, timeout=30):
     """Dumps rendered source of current page visit to 'sources' directory."""
     self.total_timeout += timeout
     if not self.contains_get_or_browse:
         raise CommandExecutionError("No get or browse request preceding "
                                     "the dump page source command", self)
     command = ('DUMP_PAGE_SOURCE', dump_name,)
     self.commands_with_timeout.append((command, timeout))
Ejemplo n.º 10
0
 def save_screenshot(self, screenshot_name, timeout=30):
     """Saves screenshot of page to 'screenshots' directory in data directory."""
     self.total_timeout += timeout
     if not self.contains_get_or_browse:
         raise CommandExecutionError("No get or browse request preceding "
                                     "the save screenshot command", self)
     command = ('SAVE_SCREENSHOT', screenshot_name,)
     self.commands_with_timeout.append((command, timeout))
Ejemplo n.º 11
0
 def scroll_bottom(self, timeout):
     self.total_timeout += timeout
     if not self.contains_get_or_browse:
         raise CommandExecutionError(
             "No get or browse request preceding "
             "the jiggle_mouse command", self)
     command = ('SCROLL_BOTTOM', )
     self.commands_with_timeout.append((command, timeout))
 def extract_elements(self, selector, timeout=30):
     """Extracts text from elements specified by the given CSS selector."""
     self.total_timeout += timeout
     if not self.contains_get_or_browse:
         raise CommandExecutionError("No get or browse request preceding "
                                     "the extract elements command", self)
     command = ('EXTRACT_ELEMENTS', selector,)
     self.commands_with_timeout.append((command, timeout))
Ejemplo n.º 13
0
 def dump_profile_cookies(self, timeout=60):
     """ dumps from the profile path to a given file (absolute path) """
     self.total_timeout += timeout
     if not self.contains_get_or_browse:
         raise CommandExecutionError("No get or browse request preceding "
                                     "the dump storage vectors command", self)
     command = ('DUMP_PROFILE_COOKIES',)
     self.commands_with_timeout.append((command, timeout))
Ejemplo n.º 14
0
 def save_screenshot(self, suffix='', timeout=30):
     """Save a screenshot of the current viewport."""
     self.total_timeout += timeout
     if not self.contains_get_or_browse:
         raise CommandExecutionError(
             "No get or browse request preceding "
             "the save screenshot command", self)
     command = ('SAVE_SCREENSHOT', suffix)
     self.commands_with_timeout.append((command, timeout))
Ejemplo n.º 15
0
 def facebook_login(self, url, timeout=120):
     """ tries to login to facebook on <url> """
     self.total_timeout += timeout
     if not self.contains_get_or_browse:
         raise CommandExecutionError(
             "No get or browse request preceding "
             "the facebook_login command", self)
     command = ('FACEBOOK_LOGIN', url)
     self.commands_with_timeout.append((command, timeout))
Ejemplo n.º 16
0
 def login(self, logindata, timeout=30):
     '''Login to a specific site'''
     self.total_timeout += timeout
     if not self.contains_get_or_browse:
         raise CommandExecutionError(
             "No get or browse request preceding "
             "the dump page source command", self)
     command = ("LOGIN", json.dumps(logindata))
     self.commands_with_timeout.append((command, timeout))
Ejemplo n.º 17
0
    def disable_request_filter(self, filter_name, timeout=15):
        """ Disable one of the supported request filters.

        Filters are defined in:
            `automation/Extension/firefox/lib/request-filter.js`

        Supported `filter_name` values:
            * `drop-all`    Drop all subsequent requests (requests are logged)
        """
        self.total_timeout += timeout
        if not self.contains_get_or_browse:
            raise CommandExecutionError(
                "No get or browse request preceding "
                "the enable_request_filter command", self)
        command = ('REQUEST_FILTER', 'disable', filter_name)
        self.commands_with_timeout.append((command, timeout))
Ejemplo n.º 18
0
    def _start_thread(self, browser, command, reset, condition=None):
        """  starts the command execution thread """

        # Check status flags before starting thread
        if self.closing:
            self.logger.error(
                "Attempted to execute command on a closed TaskManager")
            return
        if self.failure_flag:
            self.logger.debug(
                "TaskManager failure threshold exceeded, raising CommandExecutionError"
            )
            self._cleanup_before_fail()
            raise CommandExecutionError(
                "TaskManager failure threshold exceeded", command)

        # Start command execution thread
        args = (browser, command, reset, condition)
        thread = threading.Thread(target=self._issue_command, args=args)
        browser.command_thread = thread
        thread.daemon = True
        thread.start()