def executable_code(code_str, space, global_context=True): # parse first to check if any SyntaxErrors parsed = parse(code_str) old_tape_len = len(space.byte_generator.exe.tape) space.byte_generator.record_state() start = space.byte_generator.exe.get_new_label() skip = space.byte_generator.exe.get_new_label() space.byte_generator.emit('JUMP', skip) space.byte_generator.emit('LABEL', start) space.byte_generator.emit(parsed) space.byte_generator.emit('NOP') space.byte_generator.emit('LABEL', skip) space.byte_generator.emit('NOP') space.byte_generator.restore_state() space.byte_generator.exe.compile(start_loc=old_tape_len) # dont read the code from the beginning, dont be stupid! ctx = space.GlobalObj if global_context else space.exe.current_ctx def ex_code(): ret, status, token = space.byte_generator.exe.execute_fragment_under_context(ctx, start, skip) # todo Clean up the tape! # this is NOT a way to do that because the fragment may contain the executable code! We dont want to remove it #del space.byte_generator.exe.tape[old_tape_len:] if status == 0: return ret elif status==3: raise token else: raise RuntimeError('Unexpected return status during JIT execution: %d' % status) return ex_code
def executable_code(code_str, space, global_context=True): # parse first to check if any SyntaxErrors parsed = parse(code_str) old_tape_len = len(space.byte_generator.exe.tape) space.byte_generator.record_state() start = space.byte_generator.exe.get_new_label() skip = space.byte_generator.exe.get_new_label() space.byte_generator.emit('JUMP', skip) space.byte_generator.emit('LABEL', start) space.byte_generator.emit(parsed) space.byte_generator.emit('LABEL', skip) space.byte_generator.emit('NOP') space.byte_generator.restore_state() space.byte_generator.exe.compile(start_loc=old_tape_len) # dont read the code from the beginning, dont be stupid! ctx = space.GlobalObj if global_context else space.exe.current_ctx def ex_code(): ret, status, token = space.byte_generator.exe.execute_fragment_under_context(ctx, start, skip) # todo Clean up the tape! # this is NOT a way to do that because the fragment may contain the executable code! We dont want to remove it #del space.byte_generator.exe.tape[old_tape_len:] if status == 0: return ret elif status==3: raise token else: raise RuntimeError('Unexpected return status during JIT execution: %d' % status) return ex_code
def handle_data(self, data: str): if not self._in_script: return if not data.startswith('AF_initDataCallback'): return ast = pyjsparser.parse(data) props = ast['body'][0]['expression']['arguments'][0]['properties'] prop_key = None prop_data = None for prop in props: current_key = prop['key']['name'] if current_key == 'key': prop_key = prop['value']['value'] elif current_key == 'data': prop_data = prop['value']['elements'] if prop_key != 'ds:4' or not prop_data: return for entry in prop_data[4]['elements']: file_id = entry['elements'][0]['value'] self.file_ids.append(file_id)
def getEpisodeEndpoint(soup): var_pattern = re.compile(r'var invertir = .') container_div_tag = soup.find('div', id='container') script_tag = container_div_tag.find('script', text=var_pattern).text parsed_script = parse(script_tag) endpoint = parsed_script['body'][1]['expression']['arguments'][0]['body'][ 'body'][0]['expression']['arguments'][0]['left']['left']['value'] endpoint = 'https://jkanime.net' + endpoint + '{}/' return endpoint
def visit_files(files, visitor, ctx): for name in files: with io.open(name) as f: ctx.logger.info("%s", name) try: yield visitor().visit(pyjsparser.parse(f.read())) except Exception as e: if ctx.logger.isEnabledFor(logging.DEBUG): ctx.logger.exception("while visiting %s", name) else: ctx.logger.error("%s while visiting %s", e, name)
def SearchInputInScript(input, script): # 测试是否在js注释中 comments = getComment(script) index = 0 ret = [] for comment in comments: if input not in comment["value"]: continue # 在注释中 _type = comment["type"] if _type == "Line": # 单行注释 ret.append({ "type": "InlineComment", "position": index, "details": { "tagname": "script", "content": comment["value"], "attibutes": [] } }) elif _type == "Block": # 多行注释 ret.append({ "type": "BlockComment", "position": index, "details": { "tagname": "script", "content": comment["value"], "attibutes": [] } }) index += 1 try: nodes = parse(script) except pyjsparser.pyjsparserdata.JsSyntaxError as e: return [] index = 0 for node in nodes["body"]: found = analyse(input, node) if found: ret.append({ "type": found["type"], "position": index, "details": { "tagname": "script", "content": found["value"], "attibutes": [] } }) index += 1 return ret
def build_code(expr, filename=None): try: import pyjsparser except ImportError: raise RuntimeError("JSExpr requires pyjsparser to be installed.") if filename is None: filename = "<js_to_py:{}>".format(uuid.uuid4()) js_ast = pyjsparser.parse(expr) py_ast = _transform_js_to_python(js_ast) return compile(py_ast, filename, mode="eval")
def normal_decode(script): global hmaps, lmaps, hldel hmaps, lmaps, hldel = {}, {}, [] tree = pyjsparser.parse(script) convenience_tree_remake(tree) handle_key_value() handle_key_value_input() handle_key_values_delete() remake_switch() remake_binary_exp() # convenience_names(tree) # 函数以及参数名字压缩,只处理固定正则的名字防止销毁某些原始参数,慎用 # print("注意,自动将匹配 _0x[0-9a-zA-Z]{6} 正则的参数或函数名字压缩,请谨慎使用!!!!") return get_script_from_tree(tree)
def eval_js_vm(js): a = ByteCodeGenerator(Code()) s = Space() a.exe.space = s s.exe = a.exe d = pyjsparser.parse(js) a.emit(d) fill_space.fill_space(s, a) # print a.exe.tape a.exe.compile() return a.exe.run(a.exe.space.GlobalObj)
def getEpisodes(soup): episodes_pattern = re.compile(r"var episodes = .") episodes_script_tag = soup.find('script', text=episodes_pattern).text parsed_script = parse(episodes_script_tag) episodes_js = parsed_script['body'][1]['declarations'][0]['init']['elements'] episodes = [] for episode in episodes_js: episode_info = { 'no_episode': episode['elements'][0]['value'], 'id_episode': episode['elements'][1]['value'] } episodes.append(episode_info) episodes.reverse() return episodes
def check_sentive_content(self, resp: str) -> set: script = resp.strip() if script[0] == "{": script = "d=" + script try: nodes = parse(script)["body"] except pyjsparser.pyjsparserdata.JsSyntaxError as e: return set() literals = analyse_Literal(nodes) result = set() for item in literals: v = self.info_search(item) if v: result.add(v["content"]) return result
def getStreamOptions(soup): options_pattern = re.compile(r'var videos = .') options_script_tag = soup.find('script', text=options_pattern).text parsed_script = parse(options_script_tag) options_js = parsed_script['body'][5]['declarations'][0]['init']['properties'][0]['value']['elements'] options = [] for option in options_js: option_info = { 'server_name': option['properties'][1]['value']['value'], 'link': option['properties'][3]['value']['value'] } if option_info['server_name'] == 'MEGA': option_info['link'] = option['properties'][2]['value']['value'] options.append(option_info) return options
def eval_js_vm(js, debug=False): a = ByteCodeGenerator(Code(debug_mode=debug)) s = Space() a.exe.space = s s.exe = a.exe d = pyjsparser.parse(js) a.emit(d) fill_space.fill_space(s, a) if debug: from pprint import pprint pprint(a.exe.tape) print() a.exe.compile() return a.exe.run(a.exe.space.GlobalObj)
def main(): from space import Space import fill_space from pyjsparser import parse import json a = ByteCodeGenerator(Code()) s = Space() fill_space.fill_space(s, a) a.exe.space = s s.exe = a.exe con = get_file_contents('internals/esprima.js') d = parse(con+(''';JSON.stringify(exports.parse(%s), 4, 4)''' % json.dumps(con))) # d = parse(''' # function x(n) { # log(n) # return x(n+1) # } # x(0) # ''') # var v = 333333; # while (v) { # v-- # # } a.emit(d) print a.declared_vars print a.exe.tape print len(a.exe.tape) a.exe.compile() def log(this, args): print args[0] return 999 print a.exe.run(a.exe.space.GlobalObj)
def read_js(app, modules): """ :type app: sphinx.application.Sphinx :type modules: Dict[str, jsdoc.ModuleDoc] """ roots = map(os.path.normpath, app.config.js_roots or [os.path.join(app.confdir, '..')]) files = [ os.path.join(r, f) for root in roots for r, _, fs in os.walk(root) if 'static/src/js' in r for f in fs if f.endswith('.js') ] modules.update((mod.name, mod) for mod in ABSTRACT_MODULES) for name in it(app)(files, "Parsing javascript files...", length=len(files)): with io.open(name) as f: ast = pyjsparser.parse(f.read()) modules.update((mod.name, mod) for mod in parser.ModuleMatcher(name).visit(ast)) _resolve_references(modules)
def getParamsFromHtml(html): parse = MyHTMLParser() parse.feed(html) tokens = parse.getTokenizer() result = set() for token in tokens: tagname = token["tagname"].lower() if tagname == "input": for attibute in token["attibutes"]: key, value = attibute if key == "name": result.add(value) break elif tagname == "script": content = token["content"] try: nodes = pyjsparser.parse(content).get("body", []) except pyjsparser.pyjsparserdata.JsSyntaxError as e: return [] result |= set(analyse_js(nodes)) return list(result)
def scrapeEpisode(slug, no_episode): response = cfscraper.get('https://jkanime.net/{}/{}/'.format( slug, no_episode), headers=randomUserAgent()) if response.status_code != 200: return [] html_file = response.content soup = BeautifulSoup(html_file, 'html.parser') var_pattern = re.compile(r'var video = .') script_tag = soup.find('script', text=var_pattern).text parsed_script = parse(script_tag) parsed_array = parsed_script['body'] iframe_array = [] streaming_options = [] for element in parsed_array: if videoExists(element): iframe_array.append(element['expression']['right']['value']) links = getStreamingOptions(iframe_array) server_names = getServerNames(soup) for i in range(0, len(links)): server_info = {'server_name': server_names[i], 'link': links[i]} streaming_options.append(server_info) return streaming_options
def get_sojson_encoder(script): tree = pyjsparser.parse(script) encsname, encseles, encsnumb = None, None, None for idx, node in enumerate(tree['body']): if node['type'] == 'VariableDeclaration' and \ node.get('declarations'): for encs in node.get('declarations'): if encs.get('type') == 'VariableDeclarator' and encs.get( 'init').get('type') == 'ArrayExpression': encsname = encs.get('id').get('name') encseles = [ i.get('value') for i in encs.get('init').get('elements') ] if node['type'] == 'ExpressionStatement' and node['expression'].get( 'type') == 'CallExpression': encsnumb = node.get('expression').get('arguments')[1].get('value') if node['type'] == 'VariableDeclaration' and \ node.get('declarations') and \ node.get('declarations')[0].get('init') and \ node.get('declarations')[0].get('init').get('type') == 'FunctionExpression': break oldtree = tree['body'].copy() tree['body'] = oldtree[:idx + 1] funcname = node.get('declarations')[0].get('id').get('name') use_my_rc4 = True if use_my_rc4: s = SojsonRc4(encseles, encsnumb) decoder = {} decoder[funcname] = s.sojsonrc4 else: decoder = js2py.EvalJs() convenience_tree_null(tree) decoder.execute(get_script_from_tree(tree)) tree['body'] = oldtree[idx + 1:] return funcname, decoder, tree
def get_flatlist(html): res = BeautifulSoup(html, 'lxml') js = pyjsparser.parse( next(s for s in res.find_all('script') if '"priceRur"' in s.text).text) offers = next(o['value'] for t, o in js_traverse(js) if t == 'Property' and o['key']['value'] == 'offers') offers = [js_parse_object_expression(o) for o in offers['elements']] return [ FlatListItem( int(o['id']), o['fullUrl'], urljoin('https://cian.ru/export/pdf/', urlparse(o['fullUrl']).path[1:]), (o['bargainTerms']['priceRur'] if o['bargainTerms']['priceRur'] > 5000 else o['bargainTerms']['priceRur'] * 65), o['bargainTerms']['deposit'], o['bargainTerms']['clientFee'], (o['bargainTerms'].get('agentBonus', 0) or 0), [ug['name'] for ug in o['geo']['undergrounds']], int(o['roomsCount'] or 1), int(o['bedroomsCount'] or max(1, int(o['roomsCount'] or 0) - 1)), o['description'], o['geo']['userInput'], [p['fullUrl'] for p in o['photos']], o) for o in offers ]
def extract_strings(javascript): print("Extracting strings...", file=sys.stderr) parsed = parse(javascript) strings = [] def add_strings(tree, found): if tree is None: return if isinstance(tree, (list, tuple)): for child in tree: add_strings(child, found) elif isinstance(tree, dict): if ("type" in tree and tree["type"] == "Literal" and "value" in tree and isinstance(tree["value"], str)): found.append(tree["value"]) for value in tree.values(): add_strings(value, found) return found return add_strings(parsed, strings)
def read_js(app, modules): """ :type app: sphinx.application.Sphinx :type modules: Dict[str, jsdoc.ModuleDoc] """ roots = map(os.path.normpath, app.config.js_roots or [os.path.join(app.confdir, '..')]) files = [ os.path.join(r, f) for root in roots for r, _, fs in os.walk(root) if 'static/src/js' in r for f in fs if f.endswith('.js') ] modules.update((mod.name, mod) for mod in ABSTRACT_MODULES) for name in it(app)(files, "Parsing javascript files...", length=len(files)): with io.open(name) as f: ast = pyjsparser.parse(f.read()) modules.update( (mod.name, mod) for mod in parser.ModuleMatcher(name).visit(ast) ) _resolve_references(modules)
def _parse_line(self, line): _parsed_config = pyjsparser.parse(line) for i in _parsed_config["body"]: if i["type"] == "VariableDeclaration": if i["declarations"][0]["init"]["type"] == "Literal": self._config[i["declarations"][0]["id"]["name"]] = i[ "declarations"][0]["init"]["value"] elif i["declarations"][0]["init"]["type"] == "NewExpression": temp = [] for j in i["declarations"][0]["init"]["arguments"]: if j["type"] == "Literal": temp.append(j["value"]) self._config[i["declarations"][0]["id"]["name"]] = temp elif i["type"] == "ExpressionStatement": if i["expression"]["type"] == "AssignmentExpression" \ and i["expression"]["left"]["type"] == "MemberExpression": if i["expression"]["left"]["object"][ "type"] == "Identifier": if not self._config.get( i["expression"]["left"]["object"]["name"]): self._config[i["expression"]["left"]["object"] ["name"]] = [] values_1 = [] if i["expression"]["right"]["type"] == "Literal": values = i["expression"]["right"]["value"] elif i["expression"]["right"][ "type"] == "NewExpression": values = [] for k in i["expression"]["right"]["arguments"]: if k["type"] == "Literal": values.append(k["value"]) else: values = 0 values_1.append(values) self._config[i["expression"]["left"]["object"] ["name"]].append(values_1) elif i["expression"]["left"]["object"][ "type"] == "MemberExpression": if i["expression"]["left"]["object"]["object"][ "type"] == "MemberExpression": if i["expression"]["left"]["object"]["object"][ "object"]["type"] == "Identifier": index_1 = i["expression"]["left"]["object"][ "object"]["object"]["name"] index_2 = int(i["expression"]["left"]["object"] ["object"]["property"]["raw"]) if not self._config.get(index_1): self._config[index_1] = [] if i["expression"]["right"][ "type"] == "Literal": values = i["expression"]["right"]["value"] elif i["expression"]["right"][ "type"] == "NewExpression": values = [] for k in i["expression"]["right"][ "arguments"]: if k["type"] == "Literal": values.append(k["value"]) else: values = 0 if len(self._config[index_1][index_2]) < 2: self._config[index_1][index_2].append([]) self._config[index_1][index_2][1].append( values)
def _parse_controller(self): module = self._module lst_model_name = module.template_model_name.split(";") snippet_xml_ids = [ a for a in self.env["ir.ui.view"].search([ ( "arch_fs", "=", f"{module.template_module_name}/views/snippets.xml", ), ("inherit_id.key", "=", "website.snippets"), ]) ] if snippet_xml_ids: if len(snippet_xml_ids) == 1: item_found_snippet_type = None my_xml = minidom.parseString(snippet_xml_ids[0].arch_db) lst_xpath = my_xml.getElementsByTagName("xpath") if lst_xpath: for xml_dom in lst_xpath: for attr, str_item in xml_dom.attributes.items(): if attr == "expr": if "snippet_structure" in str_item: module.template_generate_website_snippet_type = ( "structure") item_found_snippet_type = xml_dom elif "snippet_effect" in str_item: module.template_generate_website_snippet_type = ( "effect") item_found_snippet_type = xml_dom elif "snippet_feature" in str_item: module.template_generate_website_snippet_type = ( "feature") item_found_snippet_type = xml_dom elif "snippet_content" in str_item: module.template_generate_website_snippet_type = ( "content") item_found_snippet_type = xml_dom if item_found_snippet_type: lst_field_name = [ a.name for a in self._model_extractor.model_id.field_id ] if not module.template_module_path_generated_extension: return relative_path_generated_module = module.template_module_path_generated_extension.replace( "'", "").replace(", ", "/") path_generated_module = os.path.normpath( os.path.join( module.path_sync_code, relative_path_generated_module, module.template_module_name, "static", "src", "js", f"website.{module.template_module_name}.animation.js", )) lst_js_file = glob.glob(path_generated_module) is_in_list = False # TODO optimize, this is call for each model, but it's a unique result for js_file in lst_js_file: with open(js_file, "r") as f: js_code = f.read() token_js = parse(js_code) lst_field_founded_name = [] self.recursive_search_field_text( token_js, lst_field_founded_name) # validate all the field exist in this model. If true, we find it! Suppose by default yes is_in_list = bool(lst_field_founded_name and lst_field_name) for field_name in lst_field_founded_name: if field_name not in lst_field_name: is_in_list = False break if is_in_list: if (module. template_generate_website_snippet_generic_model ): _logger.warning( "Not supported multiple model in portal" " controller about snippet.") else: module.template_generate_website_snippet_generic_model = ( self.var_model) else: _logger.warning("Not support extraction multiple snippet.")
def extractValue(data, key, dump=True, folder="", default=''): """Extract value from dict and pipe through modifiers :param data: :param multikey: :param dump: :return: """ #global jsparser try: # Parse key name, key, pipeline = parseKey(key) # Input: dict. Output: string, number, list or dict value = getDictValue(data, key, dump, default) for idx, modifier in enumerate(pipeline): value = value if type(value) is list else [value] if modifier.startswith('js:'): # Input: list of strings. # Output if dump==True: list of strings # Output if dump==False: list of dict, list, string or number selector = modifier[3:] items = [] for x in value: try: x = x.replace('\\\\"', '\\"') tree = pyjsparser.parse(x) items += jsWalkValues(tree) except Exception as e: items.append({'error': str(e)}) items = [ getDictValue(x, selector, dump=dump, default=[]) for x in items ] # Flatten list if not dumped if not dump: value = flattenList(items) else: value = items elif modifier.startswith('json:'): # Input: list of strings. # Output if dump==True: list of strings # Output if dump==False: list of dict, list, string or number selector = modifier[5:] items = [ getDictValue(json.loads(x), selector, dump=dump) for x in value ] # Flatten list if not dumped if not dump: value = flattenList(items) else: value = items elif modifier.startswith('not:'): selector = modifier[4:] check = [x == selector for x in value] value = not any(check) elif modifier.startswith('is:'): selector = modifier[3:] check = [x == selector for x in value] value = any(check) elif modifier.startswith('re:'): # Input: list of strings. # Output: list of strings selector = modifier[3:] items = [re.findall(selector, x) for x in value] # Flatten (first group in match if re.findall returns multiple groups) value = [] for matches in items: for match in matches: if (type(match) is tuple): value.append(match[0]) else: value.append(match) elif modifier.startswith('css:'): # Input: list of strings. # Output: list of strings selector = modifier[4:] value = [extractHtml(x, selector, type='css') for x in value] value = [y for x in value for y in x] elif modifier.startswith('xpath:'): # Input: list of strings. # Output: list of strings selector = modifier[6:] value = [extractHtml(x, selector, type='xpath') for x in value] value = [y for x in value for y in x] # Load file contents (using modifiers after a pipe symbol) elif modifier == 'file': value = value[0] with open(os.path.join(folder, value), 'rb') as file: value = file.read() elif modifier == 'base64': value = value[0] value = b64encode(value.encode('utf-8')).decode('utf-8') elif modifier == 'length': value = len(value) elif modifier == "timestamp": value = [ datetime.fromtimestamp(int(x)).isoformat() for x in value ] elif modifier == "shortdate": value = [ str(datetime.strptime(x, '%a %b %d %H:%M:%S %z %Y')) for x in value ] # If modified in pipeline (otherwise already handled by getDictValue)... if dump and (type(value) is dict): value = json.dumps(value) if dump and (type(value) is list): value = ";".join(value) elif dump and (isinstance(value, int)): value = str(value) return (name, value) except Exception as e: return (None, default)
import sys from pyjsparser import parse test = open("test.js").read() parsed = parse(test) print(parsed) class JSObject(object): def __init__(self, proto): self.storage = {} # type: dict[unicode, JSObject] self.proto = proto # type: JSObject def apply(self, thisArg, argumentsList): raise Exception("Not Implemented") def construct(self, args): raise Exception("Not Implemented") def defineProperty(self, key, desc): raise Exception('Not implemented') def deleteProperty(self, key): assert isinstance(key, JSString) self.storage[key.string] = None return JSUndefined() def get(self, key): assert isinstance(key, JSString)
def get_flats(html): page = BeautifulSoup(html, 'lxml') js = next(s for s in page.find_all('script') if '"offerId"' in s.text).text js = pyjsparser.parse(js) yield from js_findall_offer_data(js)
def get_js_bytecode(js): a = ByteCodeGenerator(Code()) d = pyjsparser.parse(js) a.emit(d) return a.exe.tape
def parse(s, source=None): tree = pyjsparser.parse(s) mods = parser.ModuleMatcher(source).visit(tree) post(mods) return mods
(function(){ console.log(ffff); })(); return a+b; }; try{ function nihao(){ console.log('nihao'); } console.log(123); } catch (_0x59ca51){ console.log(456+123); } var a = 123;''' import json import pyjsparser s = pyjsparser.parse(script) v = json.dumps(s, indent=4) print('========================================') print(v) s['body'][0]['id']['name'] = '傻逼了' # 修改一个函数名 import js2py.py_node_modules.escodegen as escodegen escodegen = escodegen.var.get('escodegen') v = escodegen.get('generate')(s) # 用树重新生成js代码 print('========================================') print(v.to_python())
# -*- coding: utf-8 -*- from pyjsparser import parse from common import * from json import dumps app_data_dir = "../psclient/src/main/res/raw" target_file_name = "learnsets.json" url_js_file = "http://play.pokemonshowdown.com/data/learnsets.js" data = get_remote_data(url_js_file) log("Decoding JavaScript data...") obj = parse(data) properties = obj['body'][0]['expression']['right']['properties'] learnsets = dict() set_log_p() for poke_entry in properties: species = poke_entry['key']['name'] learnset_entry = poke_entry['value']['properties'][0] has_learnset = learnset_entry['value']['type'] == 'ObjectExpression' if has_learnset: move_entries = learnset_entry['value']['properties'] moves = [] for move_entry in move_entries: log_p() key_entry = move_entry['key'] if 'raw' in key_entry.keys(): moves.append(move_entry['key']['raw']) # 'return' move case else: moves.append(move_entry['key']['name'])