def doRequest(self, url, baseurl=None, wait_time=None, viewport=None, js_source=None, js_profile=None, console=False): self.url = url self.wait_time = defaults.WAIT_TIME if wait_time is None else wait_time self.js_source = js_source self.js_profile = js_profile self.console = console self.viewport = defaults.VIEWPORT if viewport is None else viewport request = QNetworkRequest() request.setUrl(QUrl(url)) if self.viewport != 'full': # viewport='full' can't be set if content is not loaded yet self._setViewportSize(self.viewport) if baseurl: self._baseUrl = QUrl(baseurl) request.setOriginatingObject(self.web_page.mainFrame()) self._reply = self.network_manager.get(request) self._reply.finished.connect(self._requestFinished) else: self.web_page.loadFinished.connect(self._loadFinished) if self.splash_request.method == 'POST': headers = self.splash_request.getAllHeaders() for header_name, header_value in headers.items(): request.setRawHeader(header_name, header_value) self.web_page.mainFrame().load(request, QNetworkAccessManager.PostOperation, self.splash_request.content.getvalue()) else: self.web_page.mainFrame().load(request)
def doRequest(self, url, baseurl=None, wait_time=None, viewport=None, js_source=None, js_profile=None, images=None, console=False): self.url = url self.wait_time = defaults.WAIT_TIME if wait_time is None else wait_time self.js_source = js_source self.js_profile = js_profile self.console = console self.viewport = defaults.VIEWPORT if viewport is None else viewport self.web_page.settings().setAttribute(QWebSettings.AutoLoadImages, images) # setup logging if self.verbosity >= 4: self.web_page.loadStarted.connect(self._loadStarted) self.web_page.mainFrame().loadFinished.connect(self._frameLoadFinished) self.web_page.mainFrame().loadStarted.connect(self._frameLoadStarted) self.web_page.mainFrame().contentsSizeChanged.connect(self._contentsSizeChanged) if self.verbosity >= 3: self.web_page.mainFrame().javaScriptWindowObjectCleared.connect(self._javaScriptWindowObjectCleared) self.web_page.mainFrame().initialLayoutCompleted.connect(self._initialLayoutCompleted) self.web_page.mainFrame().urlChanged.connect(self._urlChanged) # do the request request = QNetworkRequest() request.setUrl(QUrl(url.decode('utf8'))) if self.viewport != 'full': # viewport='full' can't be set if content is not loaded yet self._setViewportSize(self.viewport) if getattr(self.splash_request, 'pass_headers', False): headers = self.splash_request.getAllHeaders() for name, value in headers.items(): request.setRawHeader(name, value) if name.lower() == 'user-agent': self.web_page.custom_user_agent = value if baseurl: # If baseurl is used, we download the page manually, # then set its contents to the QWebPage and let it # download related resources and render the result. self._baseUrl = QUrl(baseurl.decode('utf8')) request.setOriginatingObject(self.web_page.mainFrame()) self._reply = self.network_manager.get(request) self._reply.finished.connect(self._requestFinished) else: self.web_page.loadFinished.connect(self._loadFinished) if self.splash_request.method == 'POST': body = self.splash_request.content.getvalue() self.web_page.mainFrame().load( request, QNetworkAccessManager.PostOperation, body ) else: self.web_page.mainFrame().load(request)
def request_obj(self, url, headers=None): """ Return a QNetworkRequest object """ request = QNetworkRequest() request.setUrl(QUrl(url)) request.setOriginatingObject(self.web_page.mainFrame()) if headers is not None: self.web_page.skip_custom_headers = True self._set_request_headers(request, headers) return request
def get(self, url, html=None, headers=None, data=None): """Load given url in webkit and return html when loaded url: the URL to load html: optional HTML to set instead of downloading headers: the headers to attach to the request data: the data to POST """ if isinstance(url, basestring): # convert string to Qt's URL object url = QUrl(url) if html: # load pre downloaded HTML self.setContent(html, baseUrl=url) return html t1 = time() loop = QEventLoop() self.loadFinished.connect(loop.quit) # need to make network request request = QNetworkRequest(url) if headers: # add headers to request when defined for header, value in headers: request.setRawHeader(header, value) self.page().networkAccessManager().main_url = url request.setOriginatingObject(self) if data: # POST request super(Browser, self).load(request, QNetworkAccessManager.PostOperation, data) else: # GET request super(Browser, self).load(request) # set a timeout on the download loop timer = QTimer() timer.setSingleShot(True) timer.timeout.connect(loop.quit) timer.start(self.timeout * 1000) loop.exec_() # delay here until download finished or timeout if timer.isActive(): # downloaded successfully timer.stop() parsed_html = self.current_html() self.wait(self.delay - (time() - t1)) else: # did not download in time common.logger.debug('Timed out: {}'.format(url.toString())) parsed_html = '' return parsed_html
def request_obj(self, url, headers=None, body=None): """ Return a QNetworkRequest object """ request = QNetworkRequest() request.setUrl(to_qurl(url)) request.setOriginatingObject(self.web_page.mainFrame()) if headers is not None: self.web_page.skip_custom_headers = True self._set_request_headers(request, headers) if body and not request.hasRawHeader("content-type"): # there is POST body but no content-type # QT will set this header, but it will complain so better to do this here request.setRawHeader("content-type", "application/x-www-form-urlencoded") return request
def doRequest(self, url, baseurl=None, wait_time=None, viewport=None, js_source=None, js_profile=None, console=False): self.url = url self.wait_time = defaults.WAIT_TIME if wait_time is None else wait_time self.js_source = js_source self.js_profile = js_profile self.console = console self.viewport = defaults.VIEWPORT if viewport is None else viewport request = QNetworkRequest() request.setUrl(QUrl(url.decode('utf8'))) if self.viewport != 'full': # viewport='full' can't be set if content is not loaded yet self._setViewportSize(self.viewport) if getattr(self.splash_request, 'pass_headers', False): headers = self.splash_request.getAllHeaders() for name, value in headers.items(): request.setRawHeader(name, value) if name.lower() == 'user-agent': self.web_page.custom_user_agent = value if baseurl: self._baseUrl = QUrl(baseurl.decode('utf8')) request.setOriginatingObject(self.web_page.mainFrame()) self._reply = self.network_manager.get(request) self._reply.finished.connect(self._requestFinished) else: self.web_page.loadFinished.connect(self._loadFinished) if self.splash_request.method == 'POST': self.web_page.mainFrame().load( request, QNetworkAccessManager.PostOperation, self.splash_request.content.getvalue()) else: self.web_page.mainFrame().load(request)
def doRequest( self, url, baseurl=None, wait_time=None, viewport=None, js_source=None, js_profile=None, console=False ): self.url = url self.wait_time = defaults.WAIT_TIME if wait_time is None else wait_time self.js_source = js_source self.js_profile = js_profile self.console = console self.viewport = defaults.VIEWPORT if viewport is None else viewport request = QNetworkRequest() request.setUrl(QUrl(url.decode("utf8"))) if self.viewport != "full": # viewport='full' can't be set if content is not loaded yet self._setViewportSize(self.viewport) if getattr(self.splash_request, "pass_headers", False): headers = self.splash_request.getAllHeaders() for name, value in headers.items(): request.setRawHeader(name, value) if name.lower() == "user-agent": self.web_page.custom_user_agent = value if baseurl: self._baseUrl = QUrl(baseurl.decode("utf8")) request.setOriginatingObject(self.web_page.mainFrame()) self._reply = self.network_manager.get(request) self._reply.finished.connect(self._requestFinished) else: self.web_page.loadFinished.connect(self._loadFinished) if self.splash_request.method == "POST": self.web_page.mainFrame().load( request, QNetworkAccessManager.PostOperation, self.splash_request.content.getvalue() ) else: self.web_page.mainFrame().load(request)
def start(self, url, baseurl=None, wait=None, viewport=None, js_source=None, js_profile=None, images=None, console=False, headers=None, http_method='GET', body=None): self.web_page.har_log.store_timing("_onStarted") self.url = url self.history = [] self.web_page.settings().setAttribute(QWebSettings.AutoLoadImages, images) self.wait_time = defaults.WAIT_TIME if wait is None else wait self.js_source = js_source self.js_profile = js_profile self.console = console self.viewport = defaults.VIEWPORT if viewport is None else viewport # setup logging if self.verbosity >= 4: self.web_page.loadStarted.connect(self._loadStarted) self.web_page.mainFrame().loadFinished.connect(self._frameLoadFinished) self.web_page.mainFrame().loadStarted.connect(self._frameLoadStarted) self.web_page.mainFrame().contentsSizeChanged.connect(self._contentsSizeChanged) if self.verbosity >= 3: self.web_page.mainFrame().javaScriptWindowObjectCleared.connect(self._javaScriptWindowObjectCleared) self.web_page.mainFrame().initialLayoutCompleted.connect(self._initialLayoutCompleted) self.web_page.mainFrame().urlChanged.connect(self._urlChanged) # do the request request = QNetworkRequest() request.setUrl(QUrl(url.decode('utf8'))) self._setHeaders(request, headers) if getattr(self.splash_request, 'inspect_me', False): # Set http method and request body from the request http_method = self.splash_request.method body = self.splash_request.content.getvalue() if self.viewport != 'full': # viewport='full' can't be set if content is not loaded yet, # but in other cases it is better to set it earlier. self._setViewportSize(self.viewport) if baseurl: # If baseurl is used, we download the page manually, # then set its contents to the QWebPage and let it # download related resources and render the result. if http_method != 'GET': raise NotImplementedError() self._baseUrl = QUrl(baseurl.decode('utf8')) request.setOriginatingObject(self.web_page.mainFrame()) self._reply = self.network_manager.get(request) self._reply.finished.connect(self._requestFinished) else: self.web_page.loadFinished.connect(self._loadFinished) meth = OPERATION_QT_CONSTANTS[http_method] if body is None: # PyQT doesn't support body=None self.web_page.mainFrame().load(request, meth) else: self.web_page.mainFrame().load(request, meth, body)