def test_dom_matches(self, plugin, yaml_dict): was_asserted = False # At least one assert was done js_code = yaml_dict["dom"] interpreter = dukpy.JSInterpreter() # Create window browser object interpreter.evaljs("window = {};") interpreter.evaljs(js_code) for matcher in plugin.get_matchers("dom"): check_statement, version_statement = matcher is_present = interpreter.evaljs(check_statement) if is_present is not None: if version_statement: version = interpreter.evaljs(version_statement) assert yaml_dict["version"] == version was_asserted = True break else: assert yaml_dict["presence"] was_asserted = True assert was_asserted
def __init__(self, url): self.source = None self.tree = None self.scrolly = 0 self.home = url self.history = [url] self.index = 0 with open("default.css") as f: self.default_style = list(CSS.parse(f.read())) window = tkinter.Tk() window.bind("<Down>", self.scroll(100)) window.bind("<space>", self.scroll(400)) window.bind("<Up>", self.scroll(-100)) window.bind("<Button-1>", self.handle_click) window.focus_set() canvas = tkinter.Canvas(window, width=800, height=1000) canvas.pack(side=tkinter.LEFT) self.window = window self.canvas = canvas self.js = dukpy.JSInterpreter() self.js.export_function("querySelector", self.js_querySelector) self.js.export_function("innerHTML", self.js_innerHTML) self.js.export_function("get_attr", self.js_getattr) self.js.export_function("log", self.js_log) self.js_handles = [] with open("default.js") as f: self.js.evaljs(f.read())
def __init__(self, pac_js, **kwargs): """ Load a PAC file from a given string of JavaScript. Errors during parsing and validation may raise a specialized exception. :param str pac_js: JavaScript that defines the FindProxyForURL() function. :raises MalformedPacError: If the JavaScript could not be parsed, does not define FindProxyForURL(), or is otherwise invalid. """ if kwargs.get('recursion_limit'): import warnings warnings.warn( 'recursion_limit is deprecated and has no effect. It will be removed in a future release.' ) try: self._context = dukpy.JSInterpreter() for name, func in function_injections.items(): _inject_function_into_js(self._context, name, func) self._context.evaljs(pac_js) # A test call to weed out errors like unimplemented functions. self.find_proxy_for_url('/', '0.0.0.0') except dukpy.JSRuntimeError as e: raise MalformedPacError(original_exc=e) # from e
def parse(self): self.timer.start("HTML") self.text = lex(self.body) self.timer.stop() self.nodes = parse(self.text) self.timer.start("Parse CSS") self.rules = parse_css(DEFAULT_STYLE) self.timer.stop() self.rules.sort(key=lambda x: x[0].score()) self.timer.start("JS") self.js = dukpy.JSInterpreter() self.js_handles = dict() # Registration self.js.export_function("log", print) self.js.export_function("querySelectorAll", self.js_querySelectorAll) # Run runtime self.js.evaljs(DEFAULT_JS) for script in find_scripts(self.nodes, []): lhost, lport, lpath, lfragment = parse_url( relative_url(script, self.history[-1])) header, body = request('GET', lhost, lport, lpath) self.js.evaljs(body) self.timer.stop() self.relayout()
def babel_compile(source, reuse_js_ctx=True, **kwargs): """Compile the given `source` from ES6 to ES5 usin Babeljs.""" global BABEL_JS_CTX presets = kwargs.get('presets') if not presets: kwargs['presets'] = ["es2015"] trans_code = ( 'var bres, res;' 'bres = Babel.transform(dukpy.es6code, dukpy.babel_options);', 'res = {map: bres.map, code: bres.code};') if reuse_js_ctx and BABEL_JS_CTX: result = BABEL_JS_CTX.evaljs(trans_code, es6code=source, babel_options=kwargs) else: with open(BABEL_COMPILER, 'r') as babel_js: if reuse_js_ctx: BABEL_JS_CTX = dukpy.JSInterpreter() eval_fn = BABEL_JS_CTX.evaljs else: eval_fn = dukpy.evaljs result = eval_fn( (babel_js.read(), 'var bres, res;' 'bres = Babel.transform(dukpy.es6code, dukpy.babel_options);', 'res = {map: bres.map, code: bres.code};'), es6code=source, babel_options=kwargs) return result
def test_call_python(self): def _say_hello(num, who): return 'Hello ' + ' '.join([who] * num) interpreter = dukpy.JSInterpreter() interpreter.export_function('say_hello', _say_hello) res = interpreter.evaljs("call_python('say_hello', 3, 'world')") assert res == 'Hello world world world', res
def test_module_loader_unexisting(self): interpreter = dukpy.JSInterpreter() try: interpreter.evaljs("require('missing_module');") except JSRuntimeError as e: assert 'cannot find module: missing_module' in str(e) else: assert False, 'should have raised'
def test_install_react(self): dukpy.install_jspackage('react', '0.14.8', self.tmpdir) dukpy.install_jspackage('react-dom', '0.14.8', self.tmpdir) jsx = dukpy.jsx_compile(TEST_CODE) jsi = dukpy.JSInterpreter() jsi.loader.register_path(self.tmpdir) res = jsi.evaljs(jsx, data={'id': 1, 'name': "Alessandro"}) assert res == '<div class="helloworld">Hello Alessandro</div>', res
def test_hello_world(self): jsx = dukpy.jsx_compile('var react_hello = <h1>Hello, world!</h1>;') jsi = dukpy.JSInterpreter() result = jsi.evaljs([ ''' var React = require('react/react'), ReactDOM = require('react/react-dom-server'); ''', jsx, 'ReactDOM.renderToStaticMarkup(react_hello, null);' ]) assert result == '<h1>Hello, world!</h1>', res
def setup_js(self): self.js = dukpy.JSInterpreter() self.node_to_handle = {} self.handle_to_node = {} self.js.export_function("log", print) self.js.export_function("querySelectorAll", self.js_querySelectorAll) self.js.export_function("getAttribute", self.js_getAttribute) self.js.export_function("innerHTML", self.js_innerHTML) with open("runtime9.js") as f: self.js.evaljs(f.read())
def test_module_loader(self): interpreter = dukpy.JSInterpreter() res = interpreter.evaljs(''' babel = require('babel-6.14.0.min'); babel.transform(dukpy.es6code, {presets: ["es2015"]}).code; ''', es6code='let i=5;') expected = '''"use strict"; var i = 5;''' assert res == expected, report_diff(expected, res)
def setup_js(self): self.node_to_handle = {} self.handle_to_node = {} self.js_environment = dukpy.JSInterpreter() self.js_environment.export_function("log", print) self.js_environment.export_function("querySelectorAll", self.js_querySelectorAll) self.js_environment.export_function("getAttribute", self.js_getAttribute) self.js_environment.export_function("innerHTML", self.js_innerHTML) self.js_environment.export_function("cookie", self.js_cookie) with open("browser/src/runtime.js") as f: self.js_environment.evaljs(f.read())
def __post_init__(self): self.window = tkinter.Tk() self.canvas = tkinter.Canvas(self.window, width=800, height=600) self.canvas.pack() self.window.bind("<Up>", self.scroll_up) self.window.bind("<Down>", self.scroll_down) self.window.bind("<Button-1>", self.handle_click) self.history = [] self.timer = Timer() # TODO: interpreter generation and mapping # Two bytes to represent input and cookie: 0b00CI # If 0, False if 1, True self.js_interpreters = { 0: dukpy.JSInterpreter(), 1: dukpy.JSInterpreter(), 2: dukpy.JSInterpreter(), 3: dukpy.JSInterpreter() } # map functions per interpreter for i in self.js_interpreters: js = self.js_interpreters[i] js.export_function("log", print) js.export_function("querySelectorAll", self.js_querySelectorAll) js.export_function("evaluate", self.js_evaluate) js.export_function("getAttribute", self.js_getAttribute) js.export_function("setAttribute", self.js_setAttribute) js.export_function("innerHTML", self.js_innerHTML) js.export_function("textContent", self.js_textContent) js.export_function("cookie", self.js_cookie) js.export_function("sendPost", self.post) js.export_function("registerListener", self.register_listener) # read in actual code for i in self.js_interpreters: with open("runtime.js") as f: (self.js_interpreters[i]).evaljs(f.read(), pledge=i) # TODO: passing pledge level here self.js_handles = {} self.jar = {} self.pledge_map = {} self.PLEDGE_LEVELS = { 'input': 0b01, 'cookie': 0b10 }
def parse_purchases_count_v1(self, products_data_js, wb_id): products_data_js = re.sub('\n', '', products_data_js) products_data_js = re.sub(r'\s{2,}', '', products_data_js) products_init = re.findall(r'wb\.product\.DomReady\.init\(({.*?})\);', products_data_js)[0] if products_init is not None and str(products_init) != '': interpreter = dukpy.JSInterpreter() evaled_data = interpreter.evaljs(f'init={products_init};init.data;') if evaled_data is not None and 'nomenclatures' in evaled_data.keys(): for sku_id, data in evaled_data['nomenclatures'].items(): if sku_id == wb_id: return data['ordersCount']
def test_js_matches(self, plugin, yaml_dict): was_asserted = False js_code = yaml_dict['js'] interpreter = dukpy.JSInterpreter() # Create window browser object interpreter.evaljs('window = {};') interpreter.evaljs(js_code) for matcher in plugin.js_matchers: is_present = interpreter.evaljs(matcher['check']) if is_present is not None: version = interpreter.evaljs(matcher['version']) assert yaml_dict['version'] == version was_asserted = True assert was_asserted
def test_js_matches(self, plugin_name, match, plugin_list): was_asserted = False js_code = match['js'] interpreter = dukpy.JSInterpreter() # Create window browser object interpreter.evaljs('window = {};') interpreter.evaljs(js_code) plugin = get_plugin_by_name(plugin_name, plugin_list) for matcher in plugin.js_matchers: is_present = interpreter.evaljs(matcher['check']) if is_present is not None: version = interpreter.evaljs(matcher['version']) assert match['version'] == version was_asserted = True assert was_asserted
def parse_purchases_count_v2(self, products_data_js, wb_id): products_data_js = re.sub('\n', '', products_data_js) products_data_js = re.sub(r'\s{2,}', '', products_data_js) products_data_js = re.sub('routes: routes,', '', products_data_js) products_data_js = re.sub('routesDictionary: routesDictionary,', '', products_data_js) products_init = re.findall(r'wb\.spa\.init\(({.*?})\);', products_data_js)[0] if products_init is not None and str(products_init) != '': interpreter = dukpy.JSInterpreter() evaled_data = interpreter.evaljs(f'init={products_init};init.router;') if 'ssrModel' in evaled_data.keys(): ssrModel = evaled_data['ssrModel'] if 'selectedNomenclature' in ssrModel.keys(): selectedNomenclature = ssrModel['selectedNomenclature'] if 'ordersCount' in selectedNomenclature: return selectedNomenclature['ordersCount']
def calcUnit(self): UnitSummary.objects.all().delete() max_level = UnitSummary.max_level() max_rank = UnitSummary.max_rank() max_rarity = UnitSummary.max_rarity() max_love = UnitSummary.max_love() js_path = finders.find("pcrd_unpack/scripts/elements/UnitDataModel.js") with open(js_path) as f: es6js = f.read() es5js = dukpy.babel_compile(es6js)["code"] jsi = dukpy.JSInterpreter() for u in UnitListView().get_queryset(): unit_id = u.unit_id data = UnitJsonView().get_unit_data(unit_id=unit_id) context_data = UnitDetailView().get_context_data(unit_id=unit_id) # new an instant once of one calc, due to the dukpy bug r = jsi.evaljs([ es5js, "var udm = new UnitDataModel()", "udm.unit_parameter = dukpy['value']", "udm.result_ids = dukpy['data_tags']", "udm.calc(dukpy['max_level'],dukpy['max_rank'],dukpy['max_rarity'],dukpy['max_love'])", "udm;" ], max_level=max_level, max_rank=max_rank, max_rarity=max_rarity, max_love=max_love, value=data, data_tags=context_data["data_tags"]) us = UnitSummary(unit_id=unit_id) for p in context_data["data_tags"]: setattr(us, p, r[p]) us.save()
def test_interpreter_keeps_context(self): interpreter = dukpy.JSInterpreter() ans = interpreter.evaljs("var o = {'value': 5}; o") assert ans == {'value': 5} ans = interpreter.evaljs("o.value += 1; o") assert ans == {'value': 6}
def test_module_loader_unexisting(self): interpreter = dukpy.JSInterpreter() with self.assertRaises(JSRuntimeError) as err: interpreter.evaljs("require('missing_module');") assert 'cannot find module: missing_module' in str(err.exception)
def parse_good(self, response): def clear_url_params(url): return url.split('?')[0] def generate_reviews_link(base_url, sort='Asc'): # at first it is like https://www.wildberries.ru/catalog/8685970/detail.aspx # must be like https://www.wildberries.ru/catalog/8685970/otzyvy?field=Date&order=Asc link_param = response.css( '#Comments a.show-more::attr(data-link)').get() return re.sub('detail\.aspx.*$', f'otzyvy?field=Date&order={sort}&link={link_param}', base_url) def add_netloc_to_url(url, base_url): url_parsed = urlparse(url) if url_parsed.scheme == '' and url_parsed.netloc == '': base_url_parsed = urlparse(base_url) url = base_url_parsed.scheme + '://' + base_url_parsed.netloc + url return url skip_images = getattr(self, 'skip_images', False) skip_variants = getattr(self, 'skip_variants', False) allow_dupes = getattr(self, 'allow_dupes', False) current_good_item = WildsearchCrawlerItemWildberries() parent_item = response.meta[ 'parent_item'] if 'parent_item' in response.meta else None loader = ItemLoader(item=current_good_item, response=response) # category position stats wb_category_url = response.meta[ 'category_url'] if 'category_url' in response.meta else None wb_category_name = response.meta[ 'category_name'] if 'category_name' in response.meta else None wb_category_position = response.meta[ 'current_position'] if 'current_position' in response.meta else None canonical_url = response.css('link[rel=canonical]::attr(href)').get() canonical_url = add_netloc_to_url(canonical_url, response.url) if canonical_url != response.url: yield response.follow(clear_url_params(canonical_url), self.parse_good, dont_filter=allow_dupes, meta={ 'current_position': wb_category_position, 'category_url': wb_category_url }) return # scraping brand and manufacturer countries wb_brand_country = '' wb_manufacture_country = '' for param in (response.css('.params .pp')): param_name = param.css('span:nth-of-type(1) b::text').get() param_value = param.css('span:nth-of-type(2)::text').get() if u'Страна бренда' == param_name: wb_brand_country = param_value if u'Страна производитель' == param_name: wb_manufacture_country = param_value wb_id = response.css('div.article span::text').get() # fill css selectors fields loader.add_css('product_name', '.brand-and-name .name::text') loader.add_css('wb_reviews_count', '.count-review i::text') loader.add_css('wb_price', '.final-cost::text') loader.add_css('wb_rating', '.product-rating span::text') loader.add_css('wb_id', 'div.article span::text') # fill non-css values loader.add_value('wb_id', wb_id) loader.add_value('parse_date', datetime.datetime.now().isoformat(" ")) loader.add_value('marketplace', 'wildberries') loader.add_value('product_url', response.url) loader.add_value('wb_brand_name', response.css('.brand-and-name .brand::text').get()) loader.add_value('wb_brand_url', response.css('.brand-logo a::attr(href)').get()) loader.add_value('wb_brand_logo', response.css('.brand-logo img::attr(src)').get()) loader.add_value('wb_brand_country', wb_brand_country) loader.add_value('wb_manufacture_country', wb_manufacture_country) loader.add_value('wb_category_url', wb_category_url) loader.add_value('wb_category_name', wb_category_name) loader.add_value('wb_category_position', wb_category_position) # create list of images if skip_images is False: image_urls = [] for tm in ( response.css('.pv-carousel .carousel a img::attr(src)')): image_urls.append(tm.get().strip().replace('tm', 'big')) loader.add_value('image_urls', image_urls) # create list of features features = {} for feature in response.css('.params .pp'): features[feature.css('span:nth-of-type(1) b::text').get().strip( )] = feature.css('span:nth-of-type(2)::text').get().strip() loader.add_value('features', features) # get purchase count from inline JavaScript block with data products_data_js = response.xpath( '//script[contains(., "wb.product.DomReady.init")]/text()').get() if products_data_js is not None and str(products_data_js) != '': products_data_js = re.sub('\n', '', products_data_js) products_data_js = re.sub(r'\s{2,}', '', products_data_js) products_init = re.findall( r'wb\.product\.DomReady\.init\(({.*?})\);', products_data_js)[0] if products_init is not None and str(products_init) != '': interpreter = dukpy.JSInterpreter() evaled_data = interpreter.evaljs( f'init={products_init};init.data;') if evaled_data is not None and 'nomenclatures' in evaled_data.keys( ): for sku_id, data in evaled_data['nomenclatures'].items(): if sku_id == wb_id: loader.add_value('wb_purchases_count', data['ordersCount']) break if parent_item is not None: loader.add_value('wb_parent_id', parent_item.get('wb_id', '')) # get reviews dates yield response.follow(generate_reviews_link(response.url, 'Asc'), callback=self.parse_good_first_review_date, errback=self.parse_good_errback, meta={'loader': loader}, headers={'x-requested-with': 'XMLHttpRequest'}) # follow goods variants only if we scrape parent item if skip_variants is False and parent_item is None: for variant in (response.css('.options ul li a::attr(href)')): yield response.follow(clear_url_params(variant.get()), callback=self.parse_good, meta={'parent_item': current_good_item})