Esempio n. 1
0
    def handle_page_location_changed(self, timeout=None):
        '''
		If the chrome tab has internally redirected (generally because jerberscript), this
		will walk the page navigation responses and attempt to fetch the response body for
		the tab's latest location.
		'''

        # In general, this is often called after other mechanisms have confirmed
        # that the tab has already navigated. As such, we want to not wait a while
        # to discover something went wrong, so use a timeout that basically just
        # results in checking the available buffer, and nothing else.
        if not timeout:
            timeout = 0.1

        self.log.debug("We may have redirected. Checking.")

        messages = self.transport.recv_all_filtered(
            filter_funcs.capture_loading_events, tab_key=self.tab_id)
        if not messages:
            raise ChromeError("Couldn't track redirect! No idea what to do!")

        last_message = messages[-1]
        self.log.info("Probably a redirect! New content url: '%s'",
                      last_message['params']['documentURL'])

        resp = self.transport.recv_filtered(
            filter_funcs.network_response_recieved_for_url(
                last_message['params']['documentURL'],
                last_message['params']['frameId']),
            tab_key=self.tab_id)
        resp = resp['params']

        ctype = 'application/unknown'

        resp_response = resp['response']

        if 'mimeType' in resp_response:
            ctype = resp_response['mimeType']
        if 'headers' in resp_response and 'content-type' in resp_response[
                'headers']:
            ctype = resp_response['headers']['content-type'].split(";")[0]

        # We assume the last document request was the redirect.
        # This is /probably/ kind of a poor practice, but what the hell.
        # I have no idea what this would do if there are non-html documents (or if that can even happen.)
        return self.get_unpacked_response_body(
            last_message['params']['requestId'], mimetype=ctype)
Esempio n. 2
0
    def blocking_navigate(self, url, timeout=DEFAULT_TIMEOUT_SECS):
        '''
		Do a blocking navigate to url `url`.

		This function triggers a navigation, and then waits for the browser
		to claim the page has finished loading.

		Roughly, this corresponds to the javascript `DOMContentLoaded` event,
		meaning the dom for the page is ready.


		Internals:

		A navigation command results in a sequence of events:

		 - Page.frameStartedLoading" (with frameid)
		 - Page.frameStoppedLoading" (with frameid)
		 - Page.loadEventFired" (not attached to an ID)

		Therefore, this call triggers a navigation option,
		and then waits for the expected set of response event messages.

		'''

        self.transport.flush(tab_key=self.tab_id)

        self.log.debug("Blocking navigate to URL: '%s'", url)
        ret = self.Page_navigate(url=url)

        assert ("result" in ret), "Missing return content"
        assert ("frameId"
                in ret['result']), "Missing 'frameId' in return content"
        assert ("loaderId"
                in ret['result']), "Missing 'loaderId' in return content"

        expected_id = ret['result']['frameId']
        loader_id = ret['result']['loaderId']

        try:
            self.log.debug("Waiting for frame navigated command response.")
            self.transport.recv_filtered(
                filter_funcs.check_frame_navigated_command(expected_id),
                tab_key=self.tab_id,
                timeout=timeout)
            self.log.debug("Waiting for frameStartedLoading response.")
            self.transport.recv_filtered(filter_funcs.check_frame_load_command(
                "Page.frameStartedLoading"),
                                         tab_key=self.tab_id,
                                         timeout=timeout)
            self.log.debug("Waiting for frameStoppedLoading response.")
            self.transport.recv_filtered(filter_funcs.check_frame_load_command(
                "Page.frameStoppedLoading"),
                                         tab_key=self.tab_id,
                                         timeout=timeout)
            # self.transport.recv_filtered(check_load_event_fired, tab_key=self.tab_id, timeout=timeout)

            self.log.debug("Waiting for responseReceived response.")
            resp = self.transport.recv_filtered(
                filter_funcs.network_response_recieved_for_url(
                    url=None, expected_id=expected_id),
                tab_key=self.tab_id,
                timeout=timeout)

            if resp is None:
                raise ChromeNavigateTimedOut("Blocking navigate timed out!")

            return resp['params']
        # The `Page.frameNavigated ` event does not get fired for non-markup responses.
        # Therefore, if we timeout on waiting for that, check to see if we received a binary response.
        except ChromeResponseNotReceived:
            # So this is basically broken, fix is https://bugs.chromium.org/p/chromium/issues/detail?id=831887
            # but that bug report isn't fixed yet.
            # Siiiigh.
            self.log.warning(
                "Failed to receive expected response to navigate command. Checking if response is a binary object."
            )
            resp = self.transport.recv_filtered(
                keycheck=filter_funcs.check_frame_loader_command(
                    method_name="Network.responseReceived",
                    loader_id=loader_id),
                tab_key=self.tab_id,
                timeout=timeout)

            return resp['params']