コード例 #1
0
class Browser(object):

    def __init__(self, callback, logger, options=None):
        self.logger = logger
        options = options or dict()

        self._request_ops = {'head': QNetworkAccessManager.HeadOperation,
                             'get': QNetworkAccessManager.GetOperation,
                             'put': QNetworkAccessManager.PutOperation,
                             'post': QNetworkAccessManager.PostOperation,
                             'delete': QNetworkAccessManager.DeleteOperation}

        self._timeout = int(options.pop('timeout', 30)) * 1000

        max_request_retries = options.pop('max_request_retries', 3)
        self._network_manager = SmartNetworkAccessManager(logger,
                                                          max_request_retries)
        self._web_page = CraftyWebPage()
        self._web_page.setNetworkAccessManager(self._network_manager)

        self._web_view = QWebView()
        self._web_view.setPage(self._web_page)

        # connect the loadFinished signal to a method defined by us.
        # loadFinished is the signal which is triggered when a page is loaded
        self._web_view.loadFinished.connect(self._load_finished)

        settings = self._web_view.settings()
        settings.setAttribute(QWebSettings.AutoLoadImages,
                              options.pop('images', False))
        settings.setAttribute(QWebSettings.JavascriptEnabled,
                              options.pop('javascript', False))
        settings.setAttribute(QWebSettings.JavascriptCanOpenWindows,
                              options.pop('popups', False))
        settings.setAttribute(QWebSettings.PrivateBrowsingEnabled,
                              options.pop('private_browsing', False))
        settings.setAttribute(QWebSettings.JavaEnabled, False)
        settings.setAttribute(QWebSettings.PluginsEnabled, False)
        settings.setAttribute(QWebSettings.DnsPrefetchEnabled, True)

        # store the callback function which will be called when a request is
        # finished
        self._result_callback = callback
        self._is_task_finished = False
        self._destroyed_status = dict()

    def _prepare_request(self, url, headers):
        # create an empty request
        request = QNetworkRequest()
        # assign a url to it
        request.setUrl(QUrl(url))

        # add some custom headers to the request
        for (header_name, header_value) in headers.items():
            request.setRawHeader(header_name, QByteArray(header_value))

        return request

    def _urlencode_request_data(self, raw_data):
        # the data which we want to send to the server must be urlencoded
        request_data = QUrl()
        for (name, value) in raw_data.items():
            request_data.addQueryItem(name, unicode(value))

        return request_data.encodedQuery()

    def _load_finished(self, ok):
        """
        Called when the page is fully loaded. It will get the html file of
        the loaded page and call the callback function with that result.
        """
        if self._is_task_finished:
            # in case loadFinished fires more than once and we already
            # reported back with a result, don't do that again
            self.logger.info('loadFinished emitted, but task was already '
                             'finished.')
            return

        pending_requests = self._network_manager.active_requests

        if ok == 'timed_out':
            self.logger.info('loadFinished emitted, request timed out.')
            self._network_manager.errors.append('Request timed out.')
            # to avoid treating the request by the driver as successful
            ok = False
        elif len(pending_requests) > 0:
            self.logger.info("loadFinished emitted, waiting for requests:"
                             " {0}".format(pending_requests))
            loaded = partial(lambda x: self._load_finished(x), ok)
            QTimer.singleShot(1000, loaded)
            return

        self.logger.info('loadFinshed emitted, returning result.')
        frame = self._web_view.page().mainFrame()
        url = smart_str(frame.url().toString())
        html = frame.toHtml()

        result = {'html': html,
                  'url': url,
                  'successful': ok}

        if self._network_manager.errors:
            result['errors'] = self._network_manager.errors

        self._finish_task(result)

    def _start_task(self):
        self._is_task_finished = False
        # abusing the ok param of loadFinished
        timed_out = lambda: self._load_finished('timed_out')
        self._timeout_timer = QTimer()
        self._timeout_timer.timeout.connect(timed_out)
        self._timeout_timer.start(self._timeout)

    def _finish_task(self, result):
        self._is_task_finished = True
        self._timeout_timer.stop()
        # calling the callback function which we passed upon instantiation to
        # report the results there
        self._result_callback(result)

    def make(self, method, url, headers, raw_data=None):
        request = self._prepare_request(url, headers)
        operation = self._request_ops[method.lower()]
        request_data = self._urlencode_request_data(raw_data or dict())
        self._start_task()
        self._web_view.load(request, operation, request_data)

    def _find_element(self, selector):
        main_frame = self._web_page.mainFrame()
        element = main_frame.findFirstElement(selector)

        if element.isNull():
            raise ElementNotFound(selector)

        return element

    def fill_input(self, selector, value):
        js_fill_input = """
            this.setAttribute("value", "{0}");
            this.value = "{0}";
        """.format(value)

        element = self._find_element(selector)
        element.evaluateJavaScript(js_fill_input)

    def click(self, selector):
        element = self._find_element(selector)
        offset = element.geometry()
        js_click = """
            function mouse_click(element) {{
                var event = document.createEvent('MouseEvents');
                var offsetX = {0} + 2;  //add 2 pixels otherwise it would
                var offsetY = {1} - 2;  //seem like we clicked on the margin
                event.initMouseEvent(
                    'click',                    //event type
                    true,                       //canBubble
                    true,                       //cancelable
                    document.defaultView,       //view
                    1,                          //detail
                    (window.screenX + offsetX), //screenX - The coords within
                    (window.screenY + offsetY), //screenY - the entire page
                    offsetX,                    //clientX - The coords within
                    offsetY,                    //clientY - the viewport
                    false,                      //ctrlKey
                    false,                      //altKey
                    false,                      //shiftKey
                    false,                      //metaKey
                    0,                          //0=left, 1=middle, 2=right
                    element                     //relatedTarget
                );
                element.dispatchEvent(event);   //Fire the event
            }}
            mouse_click(this);""".format(offset.left(), offset.top())

        self._start_task()
        element.evaluateJavaScript(js_click)

    def _destroyed(self, component):
        self._destroyed_status[component] = True
        if all(self._destroyed_status.values()):
            self._shutdown_callback()

    def shutdown(self, callback):
        self._shutdown_callback = callback
        self._web_view.stop()
        self._web_view.close()
        # will immediately stop any running javascript code
        self._web_view.settings().setAttribute(QWebSettings.JavascriptEnabled,
                                               False)
        # if any requests were started by javascript after loadFinished was
        # emitted, and before we stopped javascript execution, cancel them
        self._network_manager.abort_requests()

        self._destroyed_status['web_page'] = False
        self._web_page.destroyed.connect(lambda: self._destroyed('web_page'))
        self._web_page.deleteLater()

        self._destroyed_status['web_view'] = False
        self._web_view.destroyed.connect(lambda: self._destroyed('web_view'))
        self._web_view.deleteLater()

        self._destroyed_status['network_manager'] = False
        destroyer = lambda: self._destroyed('network_manager')
        self._network_manager.destroyed.connect(destroyer)
        self._network_manager.deleteLater()