class Browser(object): def __init__(self, callback, logger, options=None): self.logger = logger options = options or dict() self._request_ops = {'head': QNetworkAccessManager.HeadOperation, 'get': QNetworkAccessManager.GetOperation, 'put': QNetworkAccessManager.PutOperation, 'post': QNetworkAccessManager.PostOperation, 'delete': QNetworkAccessManager.DeleteOperation} self._timeout = int(options.pop('timeout', 30)) * 1000 max_request_retries = options.pop('max_request_retries', 3) self._network_manager = SmartNetworkAccessManager(logger, max_request_retries) self._web_page = CraftyWebPage() self._web_page.setNetworkAccessManager(self._network_manager) self._web_view = QWebView() self._web_view.setPage(self._web_page) # connect the loadFinished signal to a method defined by us. # loadFinished is the signal which is triggered when a page is loaded self._web_view.loadFinished.connect(self._load_finished) settings = self._web_view.settings() settings.setAttribute(QWebSettings.AutoLoadImages, options.pop('images', False)) settings.setAttribute(QWebSettings.JavascriptEnabled, options.pop('javascript', False)) settings.setAttribute(QWebSettings.JavascriptCanOpenWindows, options.pop('popups', False)) settings.setAttribute(QWebSettings.PrivateBrowsingEnabled, options.pop('private_browsing', False)) settings.setAttribute(QWebSettings.JavaEnabled, False) settings.setAttribute(QWebSettings.PluginsEnabled, False) settings.setAttribute(QWebSettings.DnsPrefetchEnabled, True) # store the callback function which will be called when a request is # finished self._result_callback = callback self._is_task_finished = False self._destroyed_status = dict() def _prepare_request(self, url, headers): # create an empty request request = QNetworkRequest() # assign a url to it request.setUrl(QUrl(url)) # add some custom headers to the request for (header_name, header_value) in headers.items(): request.setRawHeader(header_name, QByteArray(header_value)) return request def _urlencode_request_data(self, raw_data): # the data which we want to send to the server must be urlencoded request_data = QUrl() for (name, value) in raw_data.items(): request_data.addQueryItem(name, unicode(value)) return request_data.encodedQuery() def _load_finished(self, ok): """ Called when the page is fully loaded. It will get the html file of the loaded page and call the callback function with that result. """ if self._is_task_finished: # in case loadFinished fires more than once and we already # reported back with a result, don't do that again self.logger.info('loadFinished emitted, but task was already ' 'finished.') return pending_requests = self._network_manager.active_requests if ok == 'timed_out': self.logger.info('loadFinished emitted, request timed out.') self._network_manager.errors.append('Request timed out.') # to avoid treating the request by the driver as successful ok = False elif len(pending_requests) > 0: self.logger.info("loadFinished emitted, waiting for requests:" " {0}".format(pending_requests)) loaded = partial(lambda x: self._load_finished(x), ok) QTimer.singleShot(1000, loaded) return self.logger.info('loadFinshed emitted, returning result.') frame = self._web_view.page().mainFrame() url = smart_str(frame.url().toString()) html = frame.toHtml() result = {'html': html, 'url': url, 'successful': ok} if self._network_manager.errors: result['errors'] = self._network_manager.errors self._finish_task(result) def _start_task(self): self._is_task_finished = False # abusing the ok param of loadFinished timed_out = lambda: self._load_finished('timed_out') self._timeout_timer = QTimer() self._timeout_timer.timeout.connect(timed_out) self._timeout_timer.start(self._timeout) def _finish_task(self, result): self._is_task_finished = True self._timeout_timer.stop() # calling the callback function which we passed upon instantiation to # report the results there self._result_callback(result) def make(self, method, url, headers, raw_data=None): request = self._prepare_request(url, headers) operation = self._request_ops[method.lower()] request_data = self._urlencode_request_data(raw_data or dict()) self._start_task() self._web_view.load(request, operation, request_data) def _find_element(self, selector): main_frame = self._web_page.mainFrame() element = main_frame.findFirstElement(selector) if element.isNull(): raise ElementNotFound(selector) return element def fill_input(self, selector, value): js_fill_input = """ this.setAttribute("value", "{0}"); this.value = "{0}"; """.format(value) element = self._find_element(selector) element.evaluateJavaScript(js_fill_input) def click(self, selector): element = self._find_element(selector) offset = element.geometry() js_click = """ function mouse_click(element) {{ var event = document.createEvent('MouseEvents'); var offsetX = {0} + 2; //add 2 pixels otherwise it would var offsetY = {1} - 2; //seem like we clicked on the margin event.initMouseEvent( 'click', //event type true, //canBubble true, //cancelable document.defaultView, //view 1, //detail (window.screenX + offsetX), //screenX - The coords within (window.screenY + offsetY), //screenY - the entire page offsetX, //clientX - The coords within offsetY, //clientY - the viewport false, //ctrlKey false, //altKey false, //shiftKey false, //metaKey 0, //0=left, 1=middle, 2=right element //relatedTarget ); element.dispatchEvent(event); //Fire the event }} mouse_click(this);""".format(offset.left(), offset.top()) self._start_task() element.evaluateJavaScript(js_click) def _destroyed(self, component): self._destroyed_status[component] = True if all(self._destroyed_status.values()): self._shutdown_callback() def shutdown(self, callback): self._shutdown_callback = callback self._web_view.stop() self._web_view.close() # will immediately stop any running javascript code self._web_view.settings().setAttribute(QWebSettings.JavascriptEnabled, False) # if any requests were started by javascript after loadFinished was # emitted, and before we stopped javascript execution, cancel them self._network_manager.abort_requests() self._destroyed_status['web_page'] = False self._web_page.destroyed.connect(lambda: self._destroyed('web_page')) self._web_page.deleteLater() self._destroyed_status['web_view'] = False self._web_view.destroyed.connect(lambda: self._destroyed('web_view')) self._web_view.deleteLater() self._destroyed_status['network_manager'] = False destroyer = lambda: self._destroyed('network_manager') self._network_manager.destroyed.connect(destroyer) self._network_manager.deleteLater()