Ejemplo n.º 1
0
    def doRequest(self, url, baseurl=None, wait_time=None, viewport=None, js_source=None, js_profile=None, console=False):
        self.url = url
        self.wait_time = defaults.WAIT_TIME if wait_time is None else wait_time
        self.js_source = js_source
        self.js_profile = js_profile
        self.console = console
        self.viewport = defaults.VIEWPORT if viewport is None else viewport

        request = QNetworkRequest()
        request.setUrl(QUrl(url))

        if self.viewport != 'full':
            # viewport='full' can't be set if content is not loaded yet
            self._setViewportSize(self.viewport)

        if baseurl:
            self._baseUrl = QUrl(baseurl)
            request.setOriginatingObject(self.web_page.mainFrame())
            self._reply = self.network_manager.get(request)
            self._reply.finished.connect(self._requestFinished)
        else:
            self.web_page.loadFinished.connect(self._loadFinished)
            if self.splash_request.method == 'POST':
                headers = self.splash_request.getAllHeaders()
                for header_name, header_value in headers.items():
                    request.setRawHeader(header_name, header_value)
                self.web_page.mainFrame().load(request,
                                               QNetworkAccessManager.PostOperation,
                                               self.splash_request.content.getvalue())
            else:
                self.web_page.mainFrame().load(request)
Ejemplo n.º 2
0
    def doRequest(self, url, baseurl=None, wait_time=None, viewport=None,
                  js_source=None, js_profile=None, images=None, console=False):
        self.url = url
        self.wait_time = defaults.WAIT_TIME if wait_time is None else wait_time
        self.js_source = js_source
        self.js_profile = js_profile
        self.console = console
        self.viewport = defaults.VIEWPORT if viewport is None else viewport

        self.web_page.settings().setAttribute(QWebSettings.AutoLoadImages, images)

        # setup logging
        if self.verbosity >= 4:
            self.web_page.loadStarted.connect(self._loadStarted)
            self.web_page.mainFrame().loadFinished.connect(self._frameLoadFinished)
            self.web_page.mainFrame().loadStarted.connect(self._frameLoadStarted)
            self.web_page.mainFrame().contentsSizeChanged.connect(self._contentsSizeChanged)

        if self.verbosity >= 3:
            self.web_page.mainFrame().javaScriptWindowObjectCleared.connect(self._javaScriptWindowObjectCleared)
            self.web_page.mainFrame().initialLayoutCompleted.connect(self._initialLayoutCompleted)

        self.web_page.mainFrame().urlChanged.connect(self._urlChanged)

        # do the request
        request = QNetworkRequest()
        request.setUrl(QUrl(url.decode('utf8')))

        if self.viewport != 'full':
            # viewport='full' can't be set if content is not loaded yet
            self._setViewportSize(self.viewport)

        if getattr(self.splash_request, 'pass_headers', False):
            headers = self.splash_request.getAllHeaders()
            for name, value in headers.items():
                request.setRawHeader(name, value)
                if name.lower() == 'user-agent':
                    self.web_page.custom_user_agent = value

        if baseurl:
            # If baseurl is used, we download the page manually,
            # then set its contents to the QWebPage and let it
            # download related resources and render the result.
            self._baseUrl = QUrl(baseurl.decode('utf8'))
            request.setOriginatingObject(self.web_page.mainFrame())
            self._reply = self.network_manager.get(request)
            self._reply.finished.connect(self._requestFinished)
        else:
            self.web_page.loadFinished.connect(self._loadFinished)

            if self.splash_request.method == 'POST':
                body = self.splash_request.content.getvalue()
                self.web_page.mainFrame().load(
                    request, QNetworkAccessManager.PostOperation, body
                )
            else:
                self.web_page.mainFrame().load(request)
Ejemplo n.º 3
0
    def request_obj(self, url, headers=None):
        """ Return a QNetworkRequest object """
        request = QNetworkRequest()
        request.setUrl(QUrl(url))
        request.setOriginatingObject(self.web_page.mainFrame())

        if headers is not None:
            self.web_page.skip_custom_headers = True
            self._set_request_headers(request, headers)

        return request
Ejemplo n.º 4
0
    def request_obj(self, url, headers=None):
        """ Return a QNetworkRequest object """
        request = QNetworkRequest()
        request.setUrl(QUrl(url))
        request.setOriginatingObject(self.web_page.mainFrame())

        if headers is not None:
            self.web_page.skip_custom_headers = True
            self._set_request_headers(request, headers)

        return request
Ejemplo n.º 5
0
    def get(self, url, html=None, headers=None, data=None):
        """Load given url in webkit and return html when loaded

        url: the URL to load
        html: optional HTML to set instead of downloading
        headers: the headers to attach to the request
        data: the data to POST
        """
        if isinstance(url, basestring):
            # convert string to Qt's URL object
            url = QUrl(url)
        if html:
            # load pre downloaded HTML
            self.setContent(html, baseUrl=url)
            return html

        t1 = time()
        loop = QEventLoop()
        self.loadFinished.connect(loop.quit)
        # need to make network request
        request = QNetworkRequest(url)
        if headers:
            # add headers to request when defined
            for header, value in headers:
                request.setRawHeader(header, value)
        self.page().networkAccessManager().main_url = url
        request.setOriginatingObject(self)
        if data:
            # POST request
            super(Browser,
                  self).load(request, QNetworkAccessManager.PostOperation,
                             data)
        else:
            # GET request
            super(Browser, self).load(request)

        # set a timeout on the download loop
        timer = QTimer()
        timer.setSingleShot(True)
        timer.timeout.connect(loop.quit)
        timer.start(self.timeout * 1000)
        loop.exec_()  # delay here until download finished or timeout

        if timer.isActive():
            # downloaded successfully
            timer.stop()
            parsed_html = self.current_html()
            self.wait(self.delay - (time() - t1))
        else:
            # did not download in time
            common.logger.debug('Timed out: {}'.format(url.toString()))
            parsed_html = ''
        return parsed_html
Ejemplo n.º 6
0
    def request_obj(self, url, headers=None, body=None):
        """ Return a QNetworkRequest object """
        request = QNetworkRequest()
        request.setUrl(to_qurl(url))
        request.setOriginatingObject(self.web_page.mainFrame())

        if headers is not None:
            self.web_page.skip_custom_headers = True
            self._set_request_headers(request, headers)

        if body and not request.hasRawHeader("content-type"):
            # there is POST body but no content-type
            # QT will set this header, but it will complain so better to do this here
            request.setRawHeader("content-type", "application/x-www-form-urlencoded")

        return request
Ejemplo n.º 7
0
    def request_obj(self, url, headers=None, body=None):
        """ Return a QNetworkRequest object """
        request = QNetworkRequest()
        request.setUrl(to_qurl(url))
        request.setOriginatingObject(self.web_page.mainFrame())

        if headers is not None:
            self.web_page.skip_custom_headers = True
            self._set_request_headers(request, headers)

        if body and not request.hasRawHeader("content-type"):
            # there is POST body but no content-type
            # QT will set this header, but it will complain so better to do this here
            request.setRawHeader("content-type",
                                 "application/x-www-form-urlencoded")

        return request
Ejemplo n.º 8
0
    def doRequest(self,
                  url,
                  baseurl=None,
                  wait_time=None,
                  viewport=None,
                  js_source=None,
                  js_profile=None,
                  console=False):
        self.url = url
        self.wait_time = defaults.WAIT_TIME if wait_time is None else wait_time
        self.js_source = js_source
        self.js_profile = js_profile
        self.console = console
        self.viewport = defaults.VIEWPORT if viewport is None else viewport

        request = QNetworkRequest()
        request.setUrl(QUrl(url.decode('utf8')))

        if self.viewport != 'full':
            # viewport='full' can't be set if content is not loaded yet
            self._setViewportSize(self.viewport)

        if getattr(self.splash_request, 'pass_headers', False):
            headers = self.splash_request.getAllHeaders()
            for name, value in headers.items():
                request.setRawHeader(name, value)
                if name.lower() == 'user-agent':
                    self.web_page.custom_user_agent = value

        if baseurl:
            self._baseUrl = QUrl(baseurl.decode('utf8'))
            request.setOriginatingObject(self.web_page.mainFrame())
            self._reply = self.network_manager.get(request)
            self._reply.finished.connect(self._requestFinished)
        else:
            self.web_page.loadFinished.connect(self._loadFinished)
            if self.splash_request.method == 'POST':
                self.web_page.mainFrame().load(
                    request, QNetworkAccessManager.PostOperation,
                    self.splash_request.content.getvalue())
            else:
                self.web_page.mainFrame().load(request)
Ejemplo n.º 9
0
    def doRequest(
        self, url, baseurl=None, wait_time=None, viewport=None, js_source=None, js_profile=None, console=False
    ):
        self.url = url
        self.wait_time = defaults.WAIT_TIME if wait_time is None else wait_time
        self.js_source = js_source
        self.js_profile = js_profile
        self.console = console
        self.viewport = defaults.VIEWPORT if viewport is None else viewport

        request = QNetworkRequest()
        request.setUrl(QUrl(url.decode("utf8")))

        if self.viewport != "full":
            # viewport='full' can't be set if content is not loaded yet
            self._setViewportSize(self.viewport)

        if getattr(self.splash_request, "pass_headers", False):
            headers = self.splash_request.getAllHeaders()
            for name, value in headers.items():
                request.setRawHeader(name, value)
                if name.lower() == "user-agent":
                    self.web_page.custom_user_agent = value

        if baseurl:
            self._baseUrl = QUrl(baseurl.decode("utf8"))
            request.setOriginatingObject(self.web_page.mainFrame())
            self._reply = self.network_manager.get(request)
            self._reply.finished.connect(self._requestFinished)
        else:
            self.web_page.loadFinished.connect(self._loadFinished)
            if self.splash_request.method == "POST":
                self.web_page.mainFrame().load(
                    request, QNetworkAccessManager.PostOperation, self.splash_request.content.getvalue()
                )
            else:
                self.web_page.mainFrame().load(request)
Ejemplo n.º 10
0
    def start(self, url, baseurl=None, wait=None, viewport=None,
                  js_source=None, js_profile=None, images=None, console=False,
                  headers=None, http_method='GET', body=None):

        self.web_page.har_log.store_timing("_onStarted")

        self.url = url
        self.history = []
        self.web_page.settings().setAttribute(QWebSettings.AutoLoadImages, images)
        self.wait_time = defaults.WAIT_TIME if wait is None else wait

        self.js_source = js_source
        self.js_profile = js_profile
        self.console = console
        self.viewport = defaults.VIEWPORT if viewport is None else viewport

        # setup logging
        if self.verbosity >= 4:
            self.web_page.loadStarted.connect(self._loadStarted)
            self.web_page.mainFrame().loadFinished.connect(self._frameLoadFinished)
            self.web_page.mainFrame().loadStarted.connect(self._frameLoadStarted)
            self.web_page.mainFrame().contentsSizeChanged.connect(self._contentsSizeChanged)

        if self.verbosity >= 3:
            self.web_page.mainFrame().javaScriptWindowObjectCleared.connect(self._javaScriptWindowObjectCleared)
            self.web_page.mainFrame().initialLayoutCompleted.connect(self._initialLayoutCompleted)

        self.web_page.mainFrame().urlChanged.connect(self._urlChanged)

        # do the request
        request = QNetworkRequest()
        request.setUrl(QUrl(url.decode('utf8')))
        self._setHeaders(request, headers)

        if getattr(self.splash_request, 'inspect_me', False):
            # Set http method and request body from the request
            http_method = self.splash_request.method
            body = self.splash_request.content.getvalue()

        if self.viewport != 'full':
            # viewport='full' can't be set if content is not loaded yet,
            # but in other cases it is better to set it earlier.
            self._setViewportSize(self.viewport)

        if baseurl:
            # If baseurl is used, we download the page manually,
            # then set its contents to the QWebPage and let it
            # download related resources and render the result.
            if http_method != 'GET':
                raise NotImplementedError()

            self._baseUrl = QUrl(baseurl.decode('utf8'))
            request.setOriginatingObject(self.web_page.mainFrame())
            self._reply = self.network_manager.get(request)
            self._reply.finished.connect(self._requestFinished)
        else:
            self.web_page.loadFinished.connect(self._loadFinished)
            meth = OPERATION_QT_CONSTANTS[http_method]
            if body is None:  # PyQT doesn't support body=None
                self.web_page.mainFrame().load(request, meth)
            else:
                self.web_page.mainFrame().load(request, meth, body)