Exemple #1
0
    def test_dom_matches(self, plugin, yaml_dict):
        was_asserted = False  # At least one assert was done
        js_code = yaml_dict["dom"]

        interpreter = dukpy.JSInterpreter()
        # Create window browser object
        interpreter.evaljs("window = {};")
        interpreter.evaljs(js_code)

        for matcher in plugin.get_matchers("dom"):
            check_statement, version_statement = matcher

            is_present = interpreter.evaljs(check_statement)
            if is_present is not None:
                if version_statement:
                    version = interpreter.evaljs(version_statement)
                    assert yaml_dict["version"] == version

                    was_asserted = True
                    break
                else:
                    assert yaml_dict["presence"]
                    was_asserted = True

        assert was_asserted
Exemple #2
0
    def __init__(self, url):
        self.source = None
        self.tree = None
        self.scrolly = 0
        self.home = url
        self.history = [url]
        self.index = 0
        with open("default.css") as f:
            self.default_style = list(CSS.parse(f.read()))

        window = tkinter.Tk()
        window.bind("<Down>", self.scroll(100))
        window.bind("<space>", self.scroll(400))
        window.bind("<Up>", self.scroll(-100))
        window.bind("<Button-1>", self.handle_click)
        window.focus_set()
        canvas = tkinter.Canvas(window, width=800, height=1000)
        canvas.pack(side=tkinter.LEFT)
        self.window = window
        self.canvas = canvas

        self.js = dukpy.JSInterpreter()
        self.js.export_function("querySelector", self.js_querySelector)
        self.js.export_function("innerHTML", self.js_innerHTML)
        self.js.export_function("get_attr", self.js_getattr)
        self.js.export_function("log", self.js_log)
        self.js_handles = []

        with open("default.js") as f:
            self.js.evaljs(f.read())
Exemple #3
0
    def __init__(self, pac_js, **kwargs):
        """
        Load a PAC file from a given string of JavaScript.
        Errors during parsing and validation may raise a specialized exception.
        
        :param str pac_js: JavaScript that defines the FindProxyForURL() function.
        :raises MalformedPacError: If the JavaScript could not be parsed, does not define FindProxyForURL(),
            or is otherwise invalid.
        """
        if kwargs.get('recursion_limit'):
            import warnings
            warnings.warn(
                'recursion_limit is deprecated and has no effect. It will be removed in a future release.'
            )

        try:
            self._context = dukpy.JSInterpreter()
            for name, func in function_injections.items():
                _inject_function_into_js(self._context, name, func)
            self._context.evaljs(pac_js)

            # A test call to weed out errors like unimplemented functions.
            self.find_proxy_for_url('/', '0.0.0.0')

        except dukpy.JSRuntimeError as e:
            raise MalformedPacError(original_exc=e)  # from e
Exemple #4
0
    def parse(self):
        self.timer.start("HTML")
        self.text = lex(self.body)
        self.timer.stop()
        self.nodes = parse(self.text)
        self.timer.start("Parse CSS")
        self.rules = parse_css(DEFAULT_STYLE)
        self.timer.stop()
        self.rules.sort(key=lambda x: x[0].score())
        self.timer.start("JS")
        self.js = dukpy.JSInterpreter()
        self.js_handles = dict()

        # Registration
        self.js.export_function("log", print)
        self.js.export_function("querySelectorAll", self.js_querySelectorAll)

        # Run runtime
        self.js.evaljs(DEFAULT_JS)

        for script in find_scripts(self.nodes, []):
            lhost, lport, lpath, lfragment = parse_url(
                relative_url(script, self.history[-1]))
            header, body = request('GET', lhost, lport, lpath)
            self.js.evaljs(body)
        self.timer.stop()
        self.relayout()
Exemple #5
0
def babel_compile(source, reuse_js_ctx=True, **kwargs):
    """Compile the given `source` from ES6 to ES5 usin Babeljs."""
    global BABEL_JS_CTX
    presets = kwargs.get('presets')
    if not presets:
        kwargs['presets'] = ["es2015"]
    trans_code = (
        'var bres, res;'
        'bres = Babel.transform(dukpy.es6code, dukpy.babel_options);',
        'res = {map: bres.map, code: bres.code};')
    if reuse_js_ctx and BABEL_JS_CTX:
        result = BABEL_JS_CTX.evaljs(trans_code,
                                     es6code=source,
                                     babel_options=kwargs)
    else:
        with open(BABEL_COMPILER, 'r') as babel_js:
            if reuse_js_ctx:
                BABEL_JS_CTX = dukpy.JSInterpreter()
                eval_fn = BABEL_JS_CTX.evaljs
            else:
                eval_fn = dukpy.evaljs
            result = eval_fn(
                (babel_js.read(), 'var bres, res;'
                 'bres = Babel.transform(dukpy.es6code, dukpy.babel_options);',
                 'res = {map: bres.map, code: bres.code};'),
                es6code=source,
                babel_options=kwargs)
    return result
    def test_call_python(self):
        def _say_hello(num, who):
            return 'Hello ' + ' '.join([who] * num)

        interpreter = dukpy.JSInterpreter()
        interpreter.export_function('say_hello', _say_hello)
        res = interpreter.evaljs("call_python('say_hello', 3, 'world')")
        assert res == 'Hello world world world', res
    def test_module_loader_unexisting(self):
        interpreter = dukpy.JSInterpreter()

        try:
            interpreter.evaljs("require('missing_module');")
        except JSRuntimeError as e:
            assert 'cannot find module: missing_module' in str(e)
        else:
            assert False, 'should have raised'
Exemple #8
0
    def test_install_react(self):
        dukpy.install_jspackage('react', '0.14.8', self.tmpdir)
        dukpy.install_jspackage('react-dom', '0.14.8', self.tmpdir)

        jsx = dukpy.jsx_compile(TEST_CODE)

        jsi = dukpy.JSInterpreter()
        jsi.loader.register_path(self.tmpdir)
        res = jsi.evaljs(jsx, data={'id': 1, 'name': "Alessandro"})
        assert res == '<div class="helloworld">Hello Alessandro</div>', res
Exemple #9
0
 def test_hello_world(self):
     jsx = dukpy.jsx_compile('var react_hello = <h1>Hello, world!</h1>;')
     jsi = dukpy.JSInterpreter()
     result = jsi.evaljs([
         '''
         var React = require('react/react'),
          ReactDOM = require('react/react-dom-server');
         ''', jsx, 'ReactDOM.renderToStaticMarkup(react_hello, null);'
     ])
     assert result == '<h1>Hello, world!</h1>', res
Exemple #10
0
 def setup_js(self):
     self.js = dukpy.JSInterpreter()
     self.node_to_handle = {}
     self.handle_to_node = {}
     self.js.export_function("log", print)
     self.js.export_function("querySelectorAll", self.js_querySelectorAll)
     self.js.export_function("getAttribute", self.js_getAttribute)
     self.js.export_function("innerHTML", self.js_innerHTML)
     with open("runtime9.js") as f:
         self.js.evaljs(f.read())
Exemple #11
0
    def test_module_loader(self):
        interpreter = dukpy.JSInterpreter()
        res = interpreter.evaljs('''
    babel = require('babel-6.14.0.min');
    babel.transform(dukpy.es6code, {presets: ["es2015"]}).code;
''',
                                 es6code='let i=5;')

        expected = '''"use strict";

var i = 5;'''
        assert res == expected, report_diff(expected, res)
Exemple #12
0
 def setup_js(self):
     self.node_to_handle = {}
     self.handle_to_node = {}
     self.js_environment = dukpy.JSInterpreter()
     self.js_environment.export_function("log", print)
     self.js_environment.export_function("querySelectorAll",
                                         self.js_querySelectorAll)
     self.js_environment.export_function("getAttribute",
                                         self.js_getAttribute)
     self.js_environment.export_function("innerHTML", self.js_innerHTML)
     self.js_environment.export_function("cookie", self.js_cookie)
     with open("browser/src/runtime.js") as f:
         self.js_environment.evaljs(f.read())
Exemple #13
0
    def __post_init__(self):
        self.window = tkinter.Tk()
        self.canvas = tkinter.Canvas(self.window, width=800, height=600)
        self.canvas.pack()
        self.window.bind("<Up>", self.scroll_up)
        self.window.bind("<Down>", self.scroll_down)
        self.window.bind("<Button-1>", self.handle_click)
        self.history = []
        self.timer = Timer()

        # TODO: interpreter generation and mapping
        # Two bytes to represent input and cookie: 0b00CI
        # If 0, False if 1, True
        self.js_interpreters = { 0: dukpy.JSInterpreter(), 1: dukpy.JSInterpreter(),  2: dukpy.JSInterpreter(), 3: dukpy.JSInterpreter() }

        # map functions per interpreter
        for i in self.js_interpreters:
            js = self.js_interpreters[i]
            js.export_function("log", print)
            js.export_function("querySelectorAll", self.js_querySelectorAll)
            js.export_function("evaluate", self.js_evaluate)
            js.export_function("getAttribute", self.js_getAttribute)
            js.export_function("setAttribute", self.js_setAttribute)
            js.export_function("innerHTML", self.js_innerHTML)
            js.export_function("textContent", self.js_textContent)
            js.export_function("cookie", self.js_cookie)
            js.export_function("sendPost", self.post)
            js.export_function("registerListener", self.register_listener)

        # read in actual code
        for i in self.js_interpreters:
            with open("runtime.js") as f:
                (self.js_interpreters[i]).evaljs(f.read(), pledge=i)    # TODO: passing pledge level here

        self.js_handles = {}
        self.jar = {}
        self.pledge_map = {}
        self.PLEDGE_LEVELS = { 'input': 0b01, 'cookie': 0b10 }
    def parse_purchases_count_v1(self, products_data_js, wb_id):
        products_data_js = re.sub('\n', '', products_data_js)
        products_data_js = re.sub(r'\s{2,}', '', products_data_js)

        products_init = re.findall(r'wb\.product\.DomReady\.init\(({.*?})\);', products_data_js)[0]

        if products_init is not None and str(products_init) != '':
            interpreter = dukpy.JSInterpreter()
            evaled_data = interpreter.evaljs(f'init={products_init};init.data;')

            if evaled_data is not None and 'nomenclatures' in evaled_data.keys():
                for sku_id, data in evaled_data['nomenclatures'].items():
                    if sku_id == wb_id:
                        return data['ordersCount']
Exemple #15
0
    def test_js_matches(self, plugin, yaml_dict):
        was_asserted = False
        js_code = yaml_dict['js']

        interpreter = dukpy.JSInterpreter()
        # Create window browser object
        interpreter.evaljs('window = {};')
        interpreter.evaljs(js_code)

        for matcher in plugin.js_matchers:
            is_present = interpreter.evaljs(matcher['check'])
            if is_present is not None:
                version = interpreter.evaljs(matcher['version'])
                assert yaml_dict['version'] == version
                was_asserted = True

        assert was_asserted
Exemple #16
0
    def test_js_matches(self, plugin_name, match, plugin_list):
        was_asserted = False
        js_code = match['js']

        interpreter = dukpy.JSInterpreter()
        # Create window browser object
        interpreter.evaljs('window = {};')
        interpreter.evaljs(js_code)

        plugin = get_plugin_by_name(plugin_name, plugin_list)
        for matcher in plugin.js_matchers:
            is_present = interpreter.evaljs(matcher['check'])
            if is_present is not None:
                version = interpreter.evaljs(matcher['version'])
                assert match['version'] == version
                was_asserted = True

        assert was_asserted
    def parse_purchases_count_v2(self, products_data_js, wb_id):
        products_data_js = re.sub('\n', '', products_data_js)
        products_data_js = re.sub(r'\s{2,}', '', products_data_js)

        products_data_js = re.sub('routes: routes,', '', products_data_js)
        products_data_js = re.sub('routesDictionary: routesDictionary,', '', products_data_js)

        products_init = re.findall(r'wb\.spa\.init\(({.*?})\);', products_data_js)[0]

        if products_init is not None and str(products_init) != '':
            interpreter = dukpy.JSInterpreter()
            evaled_data = interpreter.evaljs(f'init={products_init};init.router;')

            if 'ssrModel' in evaled_data.keys():
                ssrModel = evaled_data['ssrModel']

                if 'selectedNomenclature' in ssrModel.keys():
                    selectedNomenclature = ssrModel['selectedNomenclature']

                    if 'ordersCount' in selectedNomenclature:
                        return selectedNomenclature['ordersCount']
Exemple #18
0
    def calcUnit(self):
        UnitSummary.objects.all().delete()
        max_level = UnitSummary.max_level()
        max_rank = UnitSummary.max_rank()
        max_rarity = UnitSummary.max_rarity()
        max_love = UnitSummary.max_love()

        js_path = finders.find("pcrd_unpack/scripts/elements/UnitDataModel.js")
        with open(js_path) as f:
            es6js = f.read()
        es5js = dukpy.babel_compile(es6js)["code"]
        jsi = dukpy.JSInterpreter()

        for u in UnitListView().get_queryset():
            unit_id = u.unit_id
            data = UnitJsonView().get_unit_data(unit_id=unit_id)
            context_data = UnitDetailView().get_context_data(unit_id=unit_id)

            # new an instant once of one calc, due to the dukpy bug
            r = jsi.evaljs([
                es5js, "var udm = new UnitDataModel()",
                "udm.unit_parameter = dukpy['value']",
                "udm.result_ids = dukpy['data_tags']",
                "udm.calc(dukpy['max_level'],dukpy['max_rank'],dukpy['max_rarity'],dukpy['max_love'])",
                "udm;"
            ],
                           max_level=max_level,
                           max_rank=max_rank,
                           max_rarity=max_rarity,
                           max_love=max_love,
                           value=data,
                           data_tags=context_data["data_tags"])
            us = UnitSummary(unit_id=unit_id)
            for p in context_data["data_tags"]:
                setattr(us, p, r[p])
            us.save()
Exemple #19
0
 def test_interpreter_keeps_context(self):
     interpreter = dukpy.JSInterpreter()
     ans = interpreter.evaljs("var o = {'value': 5}; o")
     assert ans == {'value': 5}
     ans = interpreter.evaljs("o.value += 1; o")
     assert ans == {'value': 6}
Exemple #20
0
    def test_module_loader_unexisting(self):
        interpreter = dukpy.JSInterpreter()

        with self.assertRaises(JSRuntimeError) as err:
            interpreter.evaljs("require('missing_module');")
        assert 'cannot find module: missing_module' in str(err.exception)
Exemple #21
0
    def parse_good(self, response):
        def clear_url_params(url):
            return url.split('?')[0]

        def generate_reviews_link(base_url, sort='Asc'):
            # at first it is like https://www.wildberries.ru/catalog/8685970/detail.aspx
            # must be like https://www.wildberries.ru/catalog/8685970/otzyvy?field=Date&order=Asc
            link_param = response.css(
                '#Comments a.show-more::attr(data-link)').get()

            return re.sub('detail\.aspx.*$',
                          f'otzyvy?field=Date&order={sort}&link={link_param}',
                          base_url)

        def add_netloc_to_url(url, base_url):
            url_parsed = urlparse(url)

            if url_parsed.scheme == '' and url_parsed.netloc == '':
                base_url_parsed = urlparse(base_url)

                url = base_url_parsed.scheme + '://' + base_url_parsed.netloc + url

            return url

        skip_images = getattr(self, 'skip_images', False)
        skip_variants = getattr(self, 'skip_variants', False)
        allow_dupes = getattr(self, 'allow_dupes', False)

        current_good_item = WildsearchCrawlerItemWildberries()
        parent_item = response.meta[
            'parent_item'] if 'parent_item' in response.meta else None

        loader = ItemLoader(item=current_good_item, response=response)

        # category position stats
        wb_category_url = response.meta[
            'category_url'] if 'category_url' in response.meta else None
        wb_category_name = response.meta[
            'category_name'] if 'category_name' in response.meta else None
        wb_category_position = response.meta[
            'current_position'] if 'current_position' in response.meta else None

        canonical_url = response.css('link[rel=canonical]::attr(href)').get()
        canonical_url = add_netloc_to_url(canonical_url, response.url)

        if canonical_url != response.url:
            yield response.follow(clear_url_params(canonical_url),
                                  self.parse_good,
                                  dont_filter=allow_dupes,
                                  meta={
                                      'current_position': wb_category_position,
                                      'category_url': wb_category_url
                                  })

            return

        # scraping brand and manufacturer countries
        wb_brand_country = ''
        wb_manufacture_country = ''

        for param in (response.css('.params .pp')):
            param_name = param.css('span:nth-of-type(1) b::text').get()
            param_value = param.css('span:nth-of-type(2)::text').get()

            if u'Страна бренда' == param_name:
                wb_brand_country = param_value

            if u'Страна производитель' == param_name:
                wb_manufacture_country = param_value

        wb_id = response.css('div.article span::text').get()

        # fill css selectors fields
        loader.add_css('product_name', '.brand-and-name .name::text')
        loader.add_css('wb_reviews_count', '.count-review i::text')
        loader.add_css('wb_price', '.final-cost::text')
        loader.add_css('wb_rating', '.product-rating span::text')
        loader.add_css('wb_id', 'div.article span::text')

        # fill non-css values
        loader.add_value('wb_id', wb_id)
        loader.add_value('parse_date', datetime.datetime.now().isoformat(" "))
        loader.add_value('marketplace', 'wildberries')
        loader.add_value('product_url', response.url)
        loader.add_value('wb_brand_name',
                         response.css('.brand-and-name .brand::text').get())
        loader.add_value('wb_brand_url',
                         response.css('.brand-logo a::attr(href)').get())
        loader.add_value('wb_brand_logo',
                         response.css('.brand-logo img::attr(src)').get())
        loader.add_value('wb_brand_country', wb_brand_country)
        loader.add_value('wb_manufacture_country', wb_manufacture_country)
        loader.add_value('wb_category_url', wb_category_url)
        loader.add_value('wb_category_name', wb_category_name)
        loader.add_value('wb_category_position', wb_category_position)

        # create list of images
        if skip_images is False:
            image_urls = []

            for tm in (
                    response.css('.pv-carousel .carousel a img::attr(src)')):
                image_urls.append(tm.get().strip().replace('tm', 'big'))

            loader.add_value('image_urls', image_urls)

        # create list of features
        features = {}

        for feature in response.css('.params .pp'):
            features[feature.css('span:nth-of-type(1) b::text').get().strip(
            )] = feature.css('span:nth-of-type(2)::text').get().strip()

        loader.add_value('features', features)

        # get purchase count from inline JavaScript block with data
        products_data_js = response.xpath(
            '//script[contains(., "wb.product.DomReady.init")]/text()').get()

        if products_data_js is not None and str(products_data_js) != '':
            products_data_js = re.sub('\n', '', products_data_js)
            products_data_js = re.sub(r'\s{2,}', '', products_data_js)

            products_init = re.findall(
                r'wb\.product\.DomReady\.init\(({.*?})\);',
                products_data_js)[0]

            if products_init is not None and str(products_init) != '':
                interpreter = dukpy.JSInterpreter()
                evaled_data = interpreter.evaljs(
                    f'init={products_init};init.data;')

                if evaled_data is not None and 'nomenclatures' in evaled_data.keys(
                ):
                    for sku_id, data in evaled_data['nomenclatures'].items():
                        if sku_id == wb_id:
                            loader.add_value('wb_purchases_count',
                                             data['ordersCount'])
                            break

        if parent_item is not None:
            loader.add_value('wb_parent_id', parent_item.get('wb_id', ''))

        # get reviews dates
        yield response.follow(generate_reviews_link(response.url, 'Asc'),
                              callback=self.parse_good_first_review_date,
                              errback=self.parse_good_errback,
                              meta={'loader': loader},
                              headers={'x-requested-with': 'XMLHttpRequest'})

        # follow goods variants only if we scrape parent item
        if skip_variants is False and parent_item is None:
            for variant in (response.css('.options ul li a::attr(href)')):
                yield response.follow(clear_url_params(variant.get()),
                                      callback=self.parse_good,
                                      meta={'parent_item': current_good_item})