Ejemplo n.º 1
0
    def test_select(self):  # {{{
        document = etree.fromstring(self.HTML_IDS)
        select = Select(document)

        from PyQt5.Qt import QApplication, QWebPage
        app = QApplication([])
        w = QWebPage()
        w.mainFrame().setHtml(self.HTML_IDS)

        def select_ids(selector):
            for elem in select(selector):
                yield elem.get('id')

        def pcss(main, *selectors, **kwargs):
            result = list(select_ids(main))
            for selector in selectors:
                self.ae(list(select_ids(selector)), result)
            if not kwargs.get('skip_webkit'):
                wk = set(run_webkit_selector(w, main))
                self.ae(
                    set(result), wk,
                    'WebKit did not match result for: %r. Result: %r WebKit: %r'
                    % (main, set(result), wk))
            return result

        all_ids = pcss('*')
        self.ae(all_ids[:6],
                ['html', None, 'link-href', 'link-nohref', None, 'outer-div'])
        self.ae(all_ids[-1:], ['foobar-span'])
        self.ae(pcss('div'), ['outer-div', 'li-div', 'foobar-div'])
        self.ae(
            pcss('DIV'),
            ['outer-div', 'li-div', 'foobar-div'])  # case-insensitive in HTML
        self.ae(pcss('div div'), ['li-div'])
        self.ae(pcss('div, div div'), ['outer-div', 'li-div', 'foobar-div'])
        self.ae(pcss('a[name]'), ['name-anchor'])
        self.ae(pcss('a[NAme]'), ['name-anchor'])  # case-insensitive in HTML:
        self.ae(pcss('a[rel]'), ['tag-anchor', 'nofollow-anchor'])
        self.ae(pcss('a[rel="tag"]'), ['tag-anchor'])
        self.ae(pcss('a[href*="localhost"]'), ['tag-anchor'])
        self.ae(pcss('a[href*=""]'), [])
        self.ae(pcss('a[href^="http"]'), ['tag-anchor', 'nofollow-anchor'])
        self.ae(pcss('a[href^="http:"]'), ['tag-anchor'])
        self.ae(pcss('a[href^=""]'), [])
        self.ae(pcss('a[href$="org"]'), ['nofollow-anchor'])
        self.ae(pcss('a[href$=""]'), [])
        self.ae(
            pcss('div[foobar~="bc"]', 'div[foobar~="cde"]', skip_webkit=True),
            ['foobar-div'])
        self.ae(pcss('[foobar~="ab bc"]', '[foobar~=""]', '[foobar~=" \t"]'),
                [])
        self.ae(pcss('div[foobar~="cd"]'), [])
        self.ae(pcss('*[lang|="En"]', '[lang|="En-us"]'), ['second-li'])
        # Attribute values are case sensitive
        self.ae(pcss('*[lang|="en"]', '[lang|="en-US"]', skip_webkit=True), [])
        self.ae(pcss('*[lang|="e"]'), [])
        self.ae(pcss(':lang("EN")', '*:lang(en-US)', skip_webkit=True),
                ['second-li', 'li-div'])
        self.ae(pcss(':lang("e")'), [])
        self.ae(pcss('li:nth-child(1)', 'li:first-child'), ['first-li'])
        self.ae(pcss('li:nth-child(3)', '#first-li ~ :nth-child(3)'),
                ['third-li'])
        self.ae(pcss('li:nth-child(10)'), [])
        self.ae(
            pcss('li:nth-child(2n)', 'li:nth-child(even)',
                 'li:nth-child(2n+0)'), ['second-li', 'fourth-li', 'sixth-li'])
        self.ae(pcss('li:nth-child(+2n+1)', 'li:nth-child(odd)'),
                ['first-li', 'third-li', 'fifth-li', 'seventh-li'])
        self.ae(pcss('li:nth-child(2n+4)'), ['fourth-li', 'sixth-li'])
        self.ae(pcss('li:nth-child(3n+1)'),
                ['first-li', 'fourth-li', 'seventh-li'])
        self.ae(pcss('li:nth-last-child(0)'), [])
        self.ae(pcss('li:nth-last-child(1)', 'li:last-child'), ['seventh-li'])
        self.ae(pcss('li:nth-last-child(2n)', 'li:nth-last-child(even)'),
                ['second-li', 'fourth-li', 'sixth-li'])
        self.ae(pcss('li:nth-last-child(2n+2)'),
                ['second-li', 'fourth-li', 'sixth-li'])
        self.ae(pcss('ol:first-of-type'), ['first-ol'])
        self.ae(pcss('ol:nth-child(1)'), [])
        self.ae(pcss('ol:nth-of-type(2)'), ['second-ol'])
        self.ae(pcss('ol:nth-last-of-type(1)'), ['second-ol'])
        self.ae(pcss('span:only-child'), ['foobar-span'])
        self.ae(pcss('li div:only-child'), ['li-div'])
        self.ae(pcss('div *:only-child'), ['li-div', 'foobar-span'])
        self.ae(pcss('p *:only-of-type', skip_webkit=True),
                ['p-em', 'fieldset'])
        self.ae(pcss('p:only-of-type', skip_webkit=True), ['paragraph'])
        self.ae(pcss('a:empty', 'a:EMpty'), ['name-anchor'])
        self.ae(pcss('li:empty'),
                ['third-li', 'fourth-li', 'fifth-li', 'sixth-li'])
        self.ae(pcss(':root', 'html:root', 'li:root'), ['html'])
        self.ae(pcss('* :root', 'p *:root'), [])
        self.ae(pcss('.a', '.b', '*.a', 'ol.a'), ['first-ol'])
        self.ae(pcss('.c', '*.c'), ['first-ol', 'third-li', 'fourth-li'])
        self.ae(pcss('ol *.c', 'ol li.c', 'li ~ li.c', 'ol > li.c'),
                ['third-li', 'fourth-li'])
        self.ae(pcss('#first-li', 'li#first-li', '*#first-li'), ['first-li'])
        self.ae(pcss('li div', 'li > div', 'div div'), ['li-div'])
        self.ae(pcss('div > div'), [])
        self.ae(pcss('div>.c', 'div > .c'), ['first-ol'])
        self.ae(pcss('div + div'), ['foobar-div'])
        self.ae(pcss('a ~ a'), ['tag-anchor', 'nofollow-anchor'])
        self.ae(pcss('a[rel="tag"] ~ a'), ['nofollow-anchor'])
        self.ae(pcss('ol#first-ol li:last-child'), ['seventh-li'])
        self.ae(pcss('ol#first-ol *:last-child'), ['li-div', 'seventh-li'])
        self.ae(pcss('#outer-div:first-child'), ['outer-div'])
        self.ae(pcss('#outer-div :first-child'), [
            'name-anchor', 'first-li', 'li-div', 'p-b',
            'checkbox-fieldset-disabled', 'area-href'
        ])
        self.ae(pcss('a[href]'), ['tag-anchor', 'nofollow-anchor'])
        self.ae(pcss(':not(*)'), [])
        self.ae(pcss('a:not([href])'), ['name-anchor'])
        self.ae(pcss('ol :Not(li[class])', skip_webkit=True), [
            'first-li', 'second-li', 'li-div', 'fifth-li', 'sixth-li',
            'seventh-li'
        ])
        self.ae(pcss(r'di\a0 v', r'div\['), [])
        self.ae(pcss(r'[h\a0 ref]', r'[h\]ref]'), [])

        del app
Ejemplo n.º 2
0
    def test_select(self):  # {{{
        document = etree.fromstring(self.HTML_IDS)
        select = Select(document)

        from PyQt5.Qt import QApplication, QWebPage
        app = QApplication([])
        w = QWebPage()
        w.mainFrame().setHtml(self.HTML_IDS)

        def select_ids(selector):
            for elem in select(selector):
                yield elem.get('id')

        def pcss(main, *selectors, **kwargs):
            result = list(select_ids(main))
            for selector in selectors:
                self.ae(list(select_ids(selector)), result)
            if not kwargs.get('skip_webkit'):
                wk = set(run_webkit_selector(w, main))
                self.ae(set(result), wk, 'WebKit did not match result for: %r. Result: %r WebKit: %r' % (main, set(result), wk))
            return result
        all_ids = pcss('*')
        self.ae(all_ids[:6], [
            'html', None, 'link-href', 'link-nohref', None, 'outer-div'])
        self.ae(all_ids[-1:], ['foobar-span'])
        self.ae(pcss('div'), ['outer-div', 'li-div', 'foobar-div'])
        self.ae(pcss('DIV'), [
            'outer-div', 'li-div', 'foobar-div'])  # case-insensitive in HTML
        self.ae(pcss('div div'), ['li-div'])
        self.ae(pcss('div, div div'), ['outer-div', 'li-div', 'foobar-div'])
        self.ae(pcss('a[name]'), ['name-anchor'])
        self.ae(pcss('a[NAme]'), ['name-anchor'])  # case-insensitive in HTML:
        self.ae(pcss('a[rel]'), ['tag-anchor', 'nofollow-anchor'])
        self.ae(pcss('a[rel="tag"]'), ['tag-anchor'])
        self.ae(pcss('a[href*="localhost"]'), ['tag-anchor'])
        self.ae(pcss('a[href*=""]'), [])
        self.ae(pcss('a[href^="http"]'), ['tag-anchor', 'nofollow-anchor'])
        self.ae(pcss('a[href^="http:"]'), ['tag-anchor'])
        self.ae(pcss('a[href^=""]'), [])
        self.ae(pcss('a[href$="org"]'), ['nofollow-anchor'])
        self.ae(pcss('a[href$=""]'), [])
        self.ae(pcss('div[foobar~="bc"]', 'div[foobar~="cde"]', skip_webkit=True), ['foobar-div'])
        self.ae(pcss('[foobar~="ab bc"]', '[foobar~=""]', '[foobar~=" \t"]'), [])
        self.ae(pcss('div[foobar~="cd"]'), [])
        self.ae(pcss('*[lang|="En"]', '[lang|="En-us"]'), ['second-li'])
        # Attribute values are case sensitive
        self.ae(pcss('*[lang|="en"]', '[lang|="en-US"]', skip_webkit=True), [])
        self.ae(pcss('*[lang|="e"]'), [])
        self.ae(pcss(':lang("EN")', '*:lang(en-US)', skip_webkit=True), ['second-li', 'li-div'])
        self.ae(pcss(':lang("e")'), [])
        self.ae(pcss('li:nth-child(1)', 'li:first-child'), ['first-li'])
        self.ae(pcss('li:nth-child(3)', '#first-li ~ :nth-child(3)'), ['third-li'])
        self.ae(pcss('li:nth-child(10)'), [])
        self.ae(pcss('li:nth-child(2n)', 'li:nth-child(even)', 'li:nth-child(2n+0)'), ['second-li', 'fourth-li', 'sixth-li'])
        self.ae(pcss('li:nth-child(+2n+1)', 'li:nth-child(odd)'), ['first-li', 'third-li', 'fifth-li', 'seventh-li'])
        self.ae(pcss('li:nth-child(2n+4)'), ['fourth-li', 'sixth-li'])
        self.ae(pcss('li:nth-child(3n+1)'), ['first-li', 'fourth-li', 'seventh-li'])
        self.ae(pcss('li:nth-last-child(0)'), [])
        self.ae(pcss('li:nth-last-child(1)', 'li:last-child'), ['seventh-li'])
        self.ae(pcss('li:nth-last-child(2n)', 'li:nth-last-child(even)'), ['second-li', 'fourth-li', 'sixth-li'])
        self.ae(pcss('li:nth-last-child(2n+2)'), ['second-li', 'fourth-li', 'sixth-li'])
        self.ae(pcss('ol:first-of-type'), ['first-ol'])
        self.ae(pcss('ol:nth-child(1)'), [])
        self.ae(pcss('ol:nth-of-type(2)'), ['second-ol'])
        self.ae(pcss('ol:nth-last-of-type(1)'), ['second-ol'])
        self.ae(pcss('span:only-child'), ['foobar-span'])
        self.ae(pcss('li div:only-child'), ['li-div'])
        self.ae(pcss('div *:only-child'), ['li-div', 'foobar-span'])
        self.ae(pcss('p *:only-of-type', skip_webkit=True), ['p-em', 'fieldset'])
        self.ae(pcss('p:only-of-type', skip_webkit=True), ['paragraph'])
        self.ae(pcss('a:empty', 'a:EMpty'), ['name-anchor'])
        self.ae(pcss('li:empty'), ['third-li', 'fourth-li', 'fifth-li', 'sixth-li'])
        self.ae(pcss(':root', 'html:root', 'li:root'), ['html'])
        self.ae(pcss('* :root', 'p *:root'), [])
        self.ae(pcss('.a', '.b', '*.a', 'ol.a'), ['first-ol'])
        self.ae(pcss('.c', '*.c'), ['first-ol', 'third-li', 'fourth-li'])
        self.ae(pcss('ol *.c', 'ol li.c', 'li ~ li.c', 'ol > li.c'), [
            'third-li', 'fourth-li'])
        self.ae(pcss('#first-li', 'li#first-li', '*#first-li'), ['first-li'])
        self.ae(pcss('li div', 'li > div', 'div div'), ['li-div'])
        self.ae(pcss('div > div'), [])
        self.ae(pcss('div>.c', 'div > .c'), ['first-ol'])
        self.ae(pcss('div + div'), ['foobar-div'])
        self.ae(pcss('a ~ a'), ['tag-anchor', 'nofollow-anchor'])
        self.ae(pcss('a[rel="tag"] ~ a'), ['nofollow-anchor'])
        self.ae(pcss('ol#first-ol li:last-child'), ['seventh-li'])
        self.ae(pcss('ol#first-ol *:last-child'), ['li-div', 'seventh-li'])
        self.ae(pcss('#outer-div:first-child'), ['outer-div'])
        self.ae(pcss('#outer-div :first-child'), [
            'name-anchor', 'first-li', 'li-div', 'p-b',
            'checkbox-fieldset-disabled', 'area-href'])
        self.ae(pcss('a[href]'), ['tag-anchor', 'nofollow-anchor'])
        self.ae(pcss(':not(*)'), [])
        self.ae(pcss('a:not([href])'), ['name-anchor'])
        self.ae(pcss('ol :Not(li[class])', skip_webkit=True), [
            'first-li', 'second-li', 'li-div',
            'fifth-li', 'sixth-li', 'seventh-li'])
        self.ae(pcss(r'di\a0 v', r'div\['), [])
        self.ae(pcss(r'[h\a0 ref]', r'[h\]ref]'), [])

        self.assertRaises(ExpressionError, lambda : tuple(select('body:nth-child')))

        select = Select(document, ignore_inappropriate_pseudo_classes=True)
        self.assertGreater(len(tuple(select('p:hover'))), 0)

        del app
Ejemplo n.º 3
0
    def __init__(self,
                 parent,
                 proxy="",
                 port=0,
                 crawl_speed=CrawlSpeed.Medium,
                 network_access_manager=None):
        QWebPage.__init__(self, parent)
        self.app = parent.app
        self._js_bridge = JsBridge(self)
        self.loadFinished.connect(self.loadFinishedHandler)
        self.mainFrame().javaScriptWindowObjectCleared.connect(
            self.jsWinObjClearedHandler)
        self.frameCreated.connect(self.frameCreatedHandler)
        self.setViewportSize(QSize(1024, 800))

        if crawl_speed == CrawlSpeed.Slow:
            self.wait_for_processing = 1
            self.wait_for_event = 2
        if crawl_speed == CrawlSpeed.Medium:
            self.wait_for_processing = 0.3
            self.wait_for_event = 1
        if crawl_speed == CrawlSpeed.Fast:
            self.wait_for_processing = 0.1
            self.wait_for_event = 0.5
        if crawl_speed == CrawlSpeed.Speed_of_Lightning:
            self.wait_for_processing = 0.01
            self.wait_for_event = 0.1

        f = open("js/lib.js", "r")
        self._lib_js = f.read()
        f.close()

        f = open("js/ajax_observer.js")
        self._xhr_observe_js = f.read()
        f.close()

        f = open("js/timing_wrapper.js")
        self._timeming_wrapper_js = f.read()
        f.close()

        f = open("js/ajax_interceptor.js")
        self._xhr_interception_js = f.read()
        f.close()

        f = open("js/addeventlistener_wrapper.js")
        self._addEventListener = f.read()
        f.close()

        f = open("js/md5.js")
        self._md5 = f.read()
        f.close()

        f = open("js/property_obs.js")
        self._property_obs_js = f.read()
        f.close()

        enablePlugins = True
        loadImages = False
        self.settings().setAttribute(QWebSettings.PluginsEnabled,
                                     enablePlugins)
        self.settings().setAttribute(QWebSettings.JavaEnabled, enablePlugins)
        #self.settings().setAttribute(QWebSettings.AutoLoadImages, loadImages)
        self.settings().setAttribute(QWebSettings.DeveloperExtrasEnabled, True)
        self.settings().setAttribute(QWebSettings.JavascriptEnabled, True)
        self.settings().setAttribute(QWebSettings.JavascriptCanOpenWindows,
                                     True)

        if network_access_manager:
            self.setNetworkAccessManager(network_access_manager)

        if proxy != "" and port != 0:
            manager = self.networkAccessManager()
            p = QNetworkProxy(QNetworkProxy.HttpProxy, proxy, port, None, None)
            manager.setProxy(p)
            self.setNetworkAccessManager(manager)

        #Have to connect it here, otherwise I could connect it to the old one and then replaces it
        self.networkAccessManager().finished.connect(self.loadComplete)