Beispiel #1
0
 def __init__(self, verbosity=0):
     super(QWebPage, self).__init__()
     self.verbosity = verbosity
     self.callbacks = {
         "on_request": [],
         "on_response_headers": [],
         "on_response": [],
     }
     self.mainFrame().urlChanged.connect(self.on_url_changed)
     self.mainFrame().titleChanged.connect(self.on_title_changed)
     self.mainFrame().loadFinished.connect(self.on_load_finished)
     # self.mainFrame().initialLayoutCompleted.connect(self.on_layout_completed) # TODO: add signal
     self.har = HarBuilder()
Beispiel #2
0
    def __init__(self, verbosity=0):
        super(QWebPage, self).__init__()
        self.verbosity = verbosity
        self.callbacks = {
            "on_request": [],
            "on_response_headers": [],
            "on_response": [],
        }
        self.mainFrame().urlChanged.connect(self.on_url_changed)
        self.mainFrame().titleChanged.connect(self.on_title_changed)
        self.mainFrame().loadFinished.connect(self.on_load_finished)
        self.mainFrame().initialLayoutCompleted.connect(
            self.on_layout_completed)
        self.har = HarBuilder()

        self.setForwardUnsupportedContent(True)
        self.unsupportedContent.connect(self.handleUnsupportedContent)

        self.unsupported_content_handler = lambda reply: None
Beispiel #3
0
 def __init__(self, verbosity=0):
     super(QWebPage, self).__init__()
     self.verbosity = verbosity
     self.callbacks = {
         "on_request": [],
         "on_response_headers": [],
         "on_response": [],
     }
     self.mainFrame().urlChanged.connect(self.on_url_changed)
     self.mainFrame().titleChanged.connect(self.on_title_changed)
     self.mainFrame().loadFinished.connect(self.on_load_finished)
     self.mainFrame().initialLayoutCompleted.connect(self.on_layout_completed)
     self.har = HarBuilder()
Beispiel #4
0
class SplashQWebPage(QWebPage):
    """
    QWebPage subclass that:

    * changes user agent;
    * logs JS console messages;
    * handles alert and confirm windows;
    * returns additional info about render errors;
    * logs HAR events;
    * stores options for various Splash components.
    """
    error_info = None
    custom_user_agent = None
    custom_headers = None
    skip_custom_headers = False
    navigation_locked = False
    resource_timeout = 0
    response_body_enabled = False

    def __init__(self, verbosity=0):
        super(QWebPage, self).__init__()
        self.verbosity = verbosity
        self.callbacks = {
            "on_request": [],
            "on_response_headers": [],
            "on_response": [],
        }
        self.mainFrame().urlChanged.connect(self.on_url_changed)
        self.mainFrame().titleChanged.connect(self.on_title_changed)
        self.mainFrame().loadFinished.connect(self.on_load_finished)
        # self.mainFrame().initialLayoutCompleted.connect(self.on_layout_completed) # TODO: add signal
        self.har = HarBuilder()

    # webengine don't need mainFrame
    def mainFrame(self):
        return self

    def reset_har(self):
        self.har.reset()

    def clear_callbacks(self, event=None):
        """
        Unregister all callbacks for an event. If event is None
        then all callbacks are removed.
        """
        if event is None:
            for ev in self.callbacks:
                assert ev is not None
                self.clear_callbacks(ev)
            return
        del self.callbacks[event][:]

    def on_title_changed(self, title):
        self.har.store_title(title)

    def on_url_changed(self, url):
        self.har.store_url(url)

    def on_load_finished(self, ok):
        self.har.store_timing("onLoad")
        self.har.store_timing("onContentLoad")

    def on_layout_completed(self):
        self.har.store_timing("onContentLoad")

    def acceptNavigationRequest(self, webFrame, networkRequest,
                                navigationType):
        if self.navigation_locked:
            return False
        self.error_info = None
        return super(SplashQWebPage,
                     self).acceptNavigationRequest(webFrame, networkRequest,
                                                   navigationType)

    def javaScriptAlert(self, frame, msg):
        return

    def javaScriptConfirm(self, frame, msg):
        return False

    def javaScriptConsoleMessage(self, level, msg, line_number, source_id):
        if self.verbosity >= 2:
            log.msg("JsConsole(%s:%d): %s" % (source_id, line_number, msg),
                    system='render')

    def userAgentForUrl(self, url):
        if self.custom_user_agent is None:
            return super(SplashQWebPage, self).userAgentForUrl(url)
        else:
            return self.custom_user_agent

    # loadFinished signal handler receives ok=False at least these cases:
    # 1. when there is an error with the page (e.g. the page is not available);
    # 2. when a redirect happened before all related resource are loaded;
    # 3. when page sends headers that are not parsed correctly
    #    (e.g. a bad Content-Type).
    # By implementing ErrorPageExtension we can catch (1) and
    # distinguish it from (2) and (3).
    def extension(self, extension, info=None, errorPage=None):
        if extension == QWebPage.ErrorPageExtension:
            # catch the error, populate self.errorInfo and return an error page

            info = sip.cast(info, QWebPage.ErrorPageExtensionOption)

            domain = 'Unknown'
            if info.domain == QWebPage.QtNetwork:
                domain = 'Network'
            elif info.domain == QWebPage.Http:
                domain = 'HTTP'
            elif info.domain == QWebPage.WebKit:
                domain = 'WebKit'

            self.error_info = RenderErrorInfo(domain, int(info.error),
                                              str(info.errorString),
                                              str(info.url.toString()))

            # XXX: this page currently goes nowhere
            content = u"""
                <html><head><title>Failed loading page</title></head>
                <body>
                    <h1>Failed loading page ({0.text})</h1>
                    <h2>{0.url}</h2>
                    <p>{0.type} error #{0.code}</p>
                </body></html>""".format(self.error_info)

            errorPage = sip.cast(errorPage, QWebPage.ErrorPageExtensionReturn)
            errorPage.content = QByteArray(content.encode('utf-8'))
            return True

        # XXX: this method always returns True, even if we haven't
        # handled the extension. Is it correct? When can this method be
        # called with extension which is not ErrorPageExtension if we
        # are returning False in ``supportsExtension`` for such extensions?
        return True

    def supportsExtension(self, extension):
        if extension == QWebPage.ErrorPageExtension:
            return True
        return False

    def maybe_redirect(self, load_finished_ok):
        """
        Return True if the current webpage state looks like a redirect.
        Use this function from loadFinished handler to ignore spurious
        signals.

        FIXME: This can return True if server returned incorrect
        Content-Type header, but there is no an additional loadFinished
        signal in this case.
        """
        return not load_finished_ok and self.error_info is None

    def is_ok(self, load_finished_ok):
        return load_finished_ok and self.error_info is None

    def error_loading(self, load_finished_ok):
        return load_finished_ok and self.error_info is not None
Beispiel #5
0
class SplashQWebPage(QWebPage):
    """
    QWebPage subclass that:

    * changes user agent;
    * logs JS console messages;
    * handles alert and confirm windows;
    * returns additional info about render errors;
    * logs HAR events;
    * stores options for various Splash components.
    """
    error_info = None
    custom_user_agent = None
    custom_headers = None
    skip_custom_headers = False
    navigation_locked = False
    resource_timeout = 0

    def __init__(self, verbosity=0):
        super(QWebPage, self).__init__()
        self.verbosity = verbosity
        self.cookiejar = SplashCookieJar(self)
        self.callbacks = {
            "on_request": [],
            "on_response_headers": [],
            "on_response": [],
        }
        self.mainFrame().urlChanged.connect(self.on_url_changed)
        self.mainFrame().titleChanged.connect(self.on_title_changed)
        self.mainFrame().loadFinished.connect(self.on_load_finished)
        self.mainFrame().initialLayoutCompleted.connect(self.on_layout_completed)
        self.har = HarBuilder()

    def reset_har(self):
        self.har.reset()

    def on_title_changed(self, title):
        self.har.store_title(title)

    def on_url_changed(self, url):
        self.har.store_url(url)

    def on_load_finished(self, ok):
        self.har.store_timing("onLoad")

    def on_layout_completed(self):
        self.har.store_timing("onContentLoad")

    def acceptNavigationRequest(self, webFrame, networkRequest, navigationType):
        if self.navigation_locked:
            return False
        self.error_info = None
        return super(SplashQWebPage, self).acceptNavigationRequest(webFrame, networkRequest, navigationType)

    def javaScriptAlert(self, frame, msg):
        return

    def javaScriptConfirm(self, frame, msg):
        return False

    def javaScriptConsoleMessage(self, msg, line_number, source_id):
        if self.verbosity >= 2:
            log.msg("JsConsole(%s:%d): %s" % (source_id, line_number, msg), system='render')

    def userAgentForUrl(self, url):
        if self.custom_user_agent is None:
            return super(SplashQWebPage, self).userAgentForUrl(url)
        else:
            return self.custom_user_agent

    # loadFinished signal handler receives ok=False at least these cases:
    # 1. when there is an error with the page (e.g. the page is not available);
    # 2. when a redirect happened before all related resource are loaded;
    # 3. when page sends headers that are not parsed correctly
    #    (e.g. a bad Content-Type).
    # By implementing ErrorPageExtension we can catch (1) and
    # distinguish it from (2) and (3).
    def extension(self, extension, info=None, errorPage=None):
        if extension == QWebPage.ErrorPageExtension:
            # catch the error, populate self.errorInfo and return an error page

            info = sip.cast(info, QWebPage.ErrorPageExtensionOption)

            domain = 'Unknown'
            if info.domain == QWebPage.QtNetwork:
                domain = 'Network'
            elif info.domain == QWebPage.Http:
                domain = 'HTTP'
            elif info.domain == QWebPage.WebKit:
                domain = 'WebKit'

            self.error_info = RenderErrorInfo(
                domain,
                int(info.error),
                six.text_type(info.errorString),
                six.text_type(info.url.toString())
            )

            # XXX: this page currently goes nowhere
            content = u"""
                <html><head><title>Failed loading page</title></head>
                <body>
                    <h1>Failed loading page ({0.text})</h1>
                    <h2>{0.url}</h2>
                    <p>{0.type} error #{0.code}</p>
                </body></html>""".format(self.error_info)

            errorPage = sip.cast(errorPage, QWebPage.ErrorPageExtensionReturn)
            errorPage.content = QByteArray(content.encode('utf-8'))
            return True

        # XXX: this method always returns True, even if we haven't
        # handled the extension. Is it correct? When can this method be
        # called with extension which is not ErrorPageExtension if we
        # are returning False in ``supportsExtension`` for such extensions?
        return True

    def supportsExtension(self, extension):
        if extension == QWebPage.ErrorPageExtension:
            return True
        return False

    def maybe_redirect(self, load_finished_ok):
        """
        Return True if the current webpage state looks like a redirect.
        Use this function from loadFinished handler to ignore spurious
        signals.

        FIXME: This can return True if server returned incorrect
        Content-Type header, but there is no an additional loadFinished
        signal in this case.
        """
        return not load_finished_ok and self.error_info is None

    def is_ok(self, load_finished_ok):
        return load_finished_ok and self.error_info is None

    def error_loading(self, load_finished_ok):
        return load_finished_ok and self.error_info is not None