def test_recursion(self): script = ( 'var testcode = unescape(""+' + '""+"%u8300"+"%u2f8d"+""+""+"%u8300"+"%u2f8d"+""+""+"%u8300"+"%u2f8d"+""+""+"%u8300"+' * 20 + '"");') r = parse(script) self.assertIsInstance(r, Script)
def test_basic(self): expected = { "sourceType": "script", "type": "Program", "body": [{ "type": "VariableDeclaration", "declarations": [{ "type": "VariableDeclarator", "id": { "type": "Identifier", "name": "$" }, "init": { "type": "Literal", "value": "Hello!", "raw": '"Hello!"' } }], "kind": "var" }] } actual = toDict(parse('var $ = "Hello!"')) self.assertEqual(expected, actual)
def ecomment_dict(self): """ Parse event IDs and eComment links from JavaScript file with lines like: activateEcomment('750', '138A085F-0AC1-4A33-B2F3-AC3D6D9F710B', 'https://metro.granicusideas.com/meetings/750-finance-budget-and-audit-committee-on-2020-03-16-5-00-pm-test'); """ if getattr(self, '_ecomment_dict', None) is None: ecomment_dict = {} # Define a callback to apply to each node, e.g., # https://esprima.readthedocs.io/en/latest/syntactic-analysis.html#example-console-calls-removal def is_activateEcomment(node, metadata): if node.callee and node.callee.name == 'activateEcomment': event_id, _, comment_url = node.arguments ecomment_dict[event_id.value] = comment_url.value for url in self.ECOMMENT_JS_URLS: response = self.get(url) esprima.parse(response.text, delegate=is_activateEcomment) self._ecomment_dict = ecomment_dict return self._ecomment_dict
async def parse_book_from_javascript(bot): try: response = await request_text("https://blaseball.com/") except ssl.CertificateError: raise Exception("Could not connect to blaseball.com") if not response: raise Exception("Could not connect to blaseball.com") soup = BeautifulSoup(response, 'html.parser') script_tags = soup.select( f'script[src^="https://{bot.config["cloudflare_id"]}.cloudfront.net/static/js/main\."]' ) if len(script_tags) == 0: script_tags = soup.select(f'script[src^="/static/js/main\."]') if len(script_tags) == 0: raise Exception('Could not find the main JS file.') if len(script_tags) > 1: raise Exception('More than one main JS files found.') script_tag = script_tags[0] src = script_tag.attrs['src'] js_url = urljoin('https://blaseball.com', src) js = await request_text(js_url) return None, js_url ast = esprima.parse(js) book_of_blaseball_visitor = BookOfBlaseballVisitor() book_function_node = book_of_blaseball_visitor.find_book_function_node(ast) if book_function_node is None: raise Exception( 'Could not find the FunctionDeclaration node for rendering the Book in the AST.' ) book_parser_visitor = BookParserVisitor() return book_parser_visitor.parse_book(book_function_node), js_url # if __name__ == '__main__': # try: # book = await parse_book_from_javascript() # print(book) # except Exception as e: # print(e)
async def pre_ast(self): while not self.target_queue.empty(): fileext = self.target_queue.get() if not self.lan: break if fileext[0] in ext_dict['php'] and 'php' in self.lan: # 下面是对于php文件的处理逻辑 for filepath in fileext[1]['list']: all_nodes = [] filepath = self.get_path(filepath) self.pre_result[filepath] = {} self.pre_result[filepath]['language'] = 'php' self.pre_result[filepath]['ast_nodes'] = [] fi = codecs.open(filepath, "r", encoding='utf-8', errors='ignore') code_content = fi.read() fi.close() # self.pre_result[filepath]['content'] = code_content try: parser = make_parser() all_nodes = parser.parse(code_content, debug=False, lexer=lexer.clone(), tracking=True) # 合并字典 self.pre_result[filepath]['ast_nodes'] = all_nodes except SyntaxError as e: logger.warning('[AST] [ERROR] parser {}: {}'.format( filepath, traceback.format_exc())) except AssertionError as e: logger.warning('[AST] [ERROR] parser {}: {}'.format( filepath, traceback.format_exc())) except: logger.warning('[AST] something error, {}'.format( traceback.format_exc())) # 搜索所有的常量 for node in all_nodes: if isinstance( node, php.FunctionCall) and node.name == "define": define_params = node.params logger.debug( "[AST][Pretreatment] new define {}={}".format( define_params[0].node, define_params[1].node)) self.define_dict[ define_params[0].node] = define_params[1].node elif fileext[0] in ext_dict[ 'chromeext'] and 'chromeext' in self.lan: # 针对chrome 拓展的预处理 # 需要提取其中的js和html? for filepath in fileext[1]['list']: child_files = [] child_files_html = [] filepath = self.get_path(filepath) self.pre_result[filepath] = {} self.pre_result[filepath]['language'] = 'chromeext' # 首先想办法解压crx try: target_files_path = un_zip(filepath) self.pre_result[filepath][ 'target_files_path'] = target_files_path except zipfile.BadZipFile: logger.warning( "[Pretreatment][Chrome Ext] file {} not zip". format(filepath)) continue except OSError: logger.warning( "[Pretreatment][Chrome Ext] file {} unzip error". format(filepath)) continue # 分析manifest.json manifest_path = os.path.join(target_files_path, "manifest.json") # target可能是单个文件,这里需要专门处理 if not (self.target_directory.endswith("/") or self.target_directory.endswith("\\") ) and not os.path.isdir(self.target_directory): path_list = re.split(r'[\\|/]', self.target_directory) relative_path = os.path.join(path_list[-1] + "_files") else: relative_path = target_files_path.split( self.target_directory)[-1] if relative_path.startswith( '\\') or relative_path.startswith("/"): relative_path = relative_path[1:] if os.path.isfile(manifest_path): fi = codecs.open(manifest_path, "r", encoding='utf-8', errors='ignore') manifest_content = fi.read() fi.close() try: manifest = json.loads(manifest_content, encoding='utf-8') except json.decoder.JSONDecodeError: logger.warning( "[Pretreatment][Chrome Ext] File {} parse error..." .format(target_files_path)) continue self.pre_result[filepath]["manifest"] = manifest # 想办法优化,如果不想深入js和html的判断,那么就跳过 if len(self.lan) and self.lan == 'chromeext': logger.debug( "[Pretreatment][Chrome Ext] pass js & html scan..." ) continue # content scripts if "content_scripts" in manifest: for script in manifest["content_scripts"]: if "js" in script: child_files.extend([ os.path.join(relative_path, js) for js in script['js'] ]) # background js if "background" in manifest: if "scripts" in manifest["background"]: child_files.extend([ os.path.join(relative_path, js) for js in manifest["background"]["scripts"] ]) # background html if "page" in manifest["background"]: child_files_html.append( os.path.join( relative_path, manifest["background"]["page"])) # popup.html if "browser_action" in manifest: if "default_popup" in manifest["browser_action"]: child_files_html.append( os.path.join( relative_path, manifest["browser_action"] ["default_popup"])) # web_accessible_resources if "web_accessible_resources" in manifest: for resource in manifest[ "web_accessible_resources"]: if ".js" in resource: child_files.append( os.path.join(relative_path, resource)) if ".html" in resource: child_files_html.append( os.path.join(relative_path, resource)) # chrome_url_overrides if "chrome_url_overrides" in manifest: for key in manifest["chrome_url_overrides"]: child_files_html.append( os.path.join( relative_path, manifest["chrome_url_overrides"][key])) self.pre_result[filepath]["child_files"] = child_files if len(child_files): # 将content_scripts加入到文件列表中构造 self.target_queue.put(('.js', { 'count': len(child_files), 'list': child_files })) # 通过浅复制操作外部传入的files self.file_list.append(('.js', { 'count': len(child_files), 'list': child_files })) if len(child_files_html): self.target_queue.put(('.html', { 'count': len(child_files_html), 'list': child_files_html })) else: logger.warning( "[Pretreatment][Chrome Ext] File {} parse error..." .format(target_files_path)) continue elif fileext[0] in ext_dict['html'] and 'javascript' in self.lan: # html only found js for filepath in fileext[1]['list']: filepath = self.get_path(filepath) script_list = [] try: fi = codecs.open(filepath, "r", encoding='utf-8', errors='ignore') code_content = fi.read() fi.close() except FileNotFoundError: continue except OSError: continue # tmp.js save all inline javascript code tmp_path = os.path.join(os.path.dirname(filepath), "tmp.js") fi2 = codecs.open(tmp_path, "a", encoding='utf-8', errors='ignore') try: soup = BeautifulSoup(code_content, "html.parser") script_tag_list = soup.find_all('script') for script_tag in script_tag_list: script_attrs = script_tag.attrs if 'src' in script_attrs: parents_path = os.path.normpath("\\".join( re.split(r'[\\|/]', filepath)[:-1])) script_path = os.path.join( parents_path, script_attrs['src']) script_list.append(script_path) else: # 如果没有src,那么代表是内联js script_content = script_tag.string fi2.write(" \n{}\n ".format(script_content)) fi2.close() if tmp_path not in script_list: script_list.append(tmp_path) # 将content_scripts加入到文件列表中构造 self.target_queue.put(('.js', { 'count': len(script_list), 'list': script_list })) # 通过浅复制操作外部传入的files self.file_list.append(('.js', { 'count': len(script_list), 'list': script_list })) except: logger.warning('[AST] something error, {}'.format( traceback.format_exc())) continue elif fileext[0] in ext_dict[ 'javascript'] and 'javascript' in self.lan: # 针对javascript的预处理 # 需要对js做语义分析 for filepath in fileext[1]['list']: filepath = self.get_path(filepath) if not filepath.endswith(".js"): continue self.pre_result[filepath] = {} self.pre_result[filepath]['language'] = 'javascript' self.pre_result[filepath]['ast_nodes'] = [] try: fi = codecs.open(filepath, "r", encoding='utf-8', errors='ignore') code_content = fi.read() fi.close() except FileNotFoundError: continue except OSError: continue # 添加代码美化并且写入新文件 new_filepath = filepath + ".pretty" try: if not os.path.isfile(new_filepath): fi2 = codecs.open(new_filepath, "w", encoding='utf-8', errors='ignore') code_content = jsbeautifier.beautify(code_content) fi2.write(code_content) fi2.close() # self.pre_result[filepath]['content'] = code_content all_nodes = esprima.parse(code_content, {"loc": True}) # 合并字典 self.pre_result[filepath]['ast_nodes'] = all_nodes except SyntaxError as e: logger.warning('[AST] [ERROR] parser {}: {}'.format( filepath, traceback.format_exc())) except AssertionError as e: logger.warning('[AST] [ERROR] parser {}: {}'.format( filepath, traceback.format_exc())) except esprima.error_handler.Error: logger.warning( '[AST] [ERROR] Invalid regular expression in {}...' .format(filepath)) except KeyboardInterrupt: logger.log('[AST stop...') exit() except: logger.warning('[AST] something error, {}'.format( traceback.format_exc())) continue # 手动回收? gc.collect() return True
async def pre_ast(self): while not self.target_queue.empty(): fileext = self.target_queue.get() if fileext[0] in ext_dict['php']: # 下面是对于php文件的处理逻辑 for filepath in fileext[1]['list']: all_nodes = [] filepath = self.get_path(filepath) self.pre_result[filepath] = {} self.pre_result[filepath]['language'] = 'php' self.pre_result[filepath]['ast_nodes'] = [] fi = codecs.open(filepath, "r", encoding='utf-8', errors='ignore') code_content = fi.read() fi.close() self.pre_result[filepath]['content'] = code_content try: parser = make_parser() all_nodes = parser.parse(code_content, debug=False, lexer=lexer.clone(), tracking=True) # 合并字典 self.pre_result[filepath]['ast_nodes'] = all_nodes except SyntaxError as e: logger.warning('[AST] [ERROR] parser {}: {}'.format( filepath, traceback.format_exc())) except AssertionError as e: logger.warning('[AST] [ERROR] parser {}: {}'.format( filepath, traceback.format_exc())) except: logger.warning('[AST] something error, {}'.format( traceback.format_exc())) # 搜索所有的常量 for node in all_nodes: if isinstance( node, php.FunctionCall) and node.name == "define": define_params = node.params logger.debug( "[AST][Pretreatment] new define {}={}".format( define_params[0].node, define_params[1].node)) self.define_dict[ define_params[0].node] = define_params[1].node elif fileext[0] in ext_dict['chromeext']: # 针对chrome 拓展的预处理 # 需要提取其中的js和html? for filepath in fileext[1]['list']: child_files = [] filepath = self.get_path(filepath) self.pre_result[filepath] = {} self.pre_result[filepath]['language'] = 'chromeext' # 首先想办法解压crx try: target_files_path = un_zip(filepath) self.pre_result[filepath][ 'target_files_path'] = target_files_path except zipfile.BadZipFile: logger.warning( "[Pretreatment][Chrome Ext] file {} not zip". format(filepath)) continue # 分析manifest.json manifest_path = os.path.join(target_files_path, "manifest.json") # target可能是单个文件,这里需要专门处理 if not (self.target_directory.endswith("/") or self.target_directory.endswith("\\") ) and not os.path.isdir(self.target_directory): relative_path = os.path.join( re.split(r'[\\|/]', self.target_directory)[-1] + "_files") else: relative_path = target_files_path.split( self.target_directory)[-1] if relative_path.startswith( '\\') or relative_path.startswith("/"): relative_path = relative_path[1:] if os.path.isfile(manifest_path): fi = codecs.open(manifest_path, "r", encoding='utf-8', errors='ignore') manifest_content = fi.read() fi.close() try: manifest = json.loads(manifest_content, encoding='utf-8') except json.decoder.JSONDecodeError: logger.warning( "[Pretreatment][Chrome Ext] File {} parse error..." .format(target_files_path)) continue self.pre_result[filepath]["manifest"] = manifest if "content_scripts" in manifest: for script in manifest["content_scripts"]: if 'js' in script: child_files.extend([ os.path.join(relative_path, js) for js in script['js'] ]) self.pre_result[filepath]["child_files"] = child_files # 将content_scripts加入到文件列表中构造 self.target_queue.put(('.js', { 'count': len(child_files), 'list': child_files })) # 通过浅复制操作外部传入的files self.file_list.append(('.js', { 'count': len(child_files), 'list': child_files })) else: logger.warning( "[Pretreatment][Chrome Ext] File {} parse error..." .format(target_files_path)) continue elif fileext[0] in ext_dict['javascript']: # 针对javascript的预处理 # 需要对js做语义分析 for filepath in fileext[1]['list']: filepath = self.get_path(filepath) self.pre_result[filepath] = {} self.pre_result[filepath]['language'] = 'javascript' self.pre_result[filepath]['ast_nodes'] = [] fi = codecs.open(filepath, "r", encoding='utf-8', errors='ignore') code_content = fi.read() fi.close() # 添加代码美化并且写入新文件 new_filepath = filepath + ".pretty" if not os.path.isfile(new_filepath): fi2 = codecs.open(new_filepath, "w", encoding='utf-8', errors='ignore') code_content = jsbeautifier.beautify(code_content) fi2.write(code_content) fi2.close() self.pre_result[filepath]['content'] = code_content try: all_nodes = esprima.parse(code_content, {"loc": True}) # 合并字典 self.pre_result[filepath]['ast_nodes'] = all_nodes except SyntaxError as e: logger.warning('[AST] [ERROR] parser {}: {}'.format( filepath, traceback.format_exc())) except AssertionError as e: logger.warning('[AST] [ERROR] parser {}: {}'.format( filepath, traceback.format_exc())) except esprima.error_handler.Error: logger.warning( '[AST] [ERROR] Invalid regular expression in {}...' .format(filepath)) except KeyboardInterrupt: logger.log('[AST stop...') exit() except: logger.warning('[AST] something error, {}'.format( traceback.format_exc())) continue # 手动回收? gc.collect() return True
def pre_ast(self, lan=None): if lan is not None: # 检查是否在可ast pasre列表中 if not list(set(lan).intersection(set(could_ast_pase_lans))): logger.info("[AST][Pretreatment] Current scan target language does not require ast pretreatment...") return True for fileext in self.file_list: if fileext[0] in ext_dict['php']: # 下面是对于php文件的处理逻辑 for filepath in fileext[1]['list']: all_nodes = [] filepath = self.get_path(filepath) self.pre_result[filepath] = {} self.pre_result[filepath]['language'] = 'php' self.pre_result[filepath]['ast_nodes'] = [] fi = codecs.open(filepath, "r", encoding='utf-8', errors='ignore') code_content = fi.read() self.pre_result[filepath]['content'] = code_content try: parser = make_parser() all_nodes = parser.parse(code_content, debug=False, lexer=lexer.clone(), tracking=True) # 合并字典 self.pre_result[filepath]['ast_nodes'] = all_nodes except SyntaxError as e: logger.warning('[AST] [ERROR] parser {}: {}'.format(filepath, traceback.format_exc())) except AssertionError as e: logger.warning('[AST] [ERROR] parser {}: {}'.format(filepath, traceback.format_exc())) except: logger.warning('[AST] something error, {}'.format(traceback.format_exc())) # 搜索所有的常量 for node in all_nodes: if isinstance(node, php.FunctionCall) and node.name == "define": define_params = node.params logger.debug("[AST][Pretreatment] new define {}={}".format(define_params[0].node, define_params[1].node)) self.define_dict[define_params[0].node] = define_params[1].node elif fileext[0] in ext_dict['chromeext']: child_files = [] # 针对chrome 拓展的预处理 # 需要提取其中的js和html? for filepath in fileext[1]['list']: filepath = self.get_path(filepath) self.pre_result[filepath] = {} self.pre_result[filepath]['language'] = 'chromeext' # 首先想办法解压crx target_files_path = un_zip(filepath) self.pre_result[filepath]['target_files_path'] = target_files_path # 分析manifest.json manifest_path = os.path.join(target_files_path, "manifest.json") relative_path = target_files_path.split(self.target_directory)[-1] if relative_path.startswith('\\') or relative_path.startswith("/"): relative_path = relative_path[1:] if os.path.isfile(manifest_path): fi = codecs.open(manifest_path, "r", encoding='utf-8', errors='ignore') manifest_content = fi.read() manifest = json.loads(manifest_content) self.pre_result[filepath]["manifest"] = manifest if "content_scripts" in manifest: for script in manifest["content_scripts"]: child_files.extend([os.path.join(relative_path, js) for js in script['js']]) self.pre_result[filepath]["child_files"] = child_files else: logger.warning("[Pretreatment][Chrome Ext] File {} parse error...".format(target_files_path)) continue elif fileext[0] in ext_dict['javascript']: # 针对javascript的预处理 # 需要对js做语义分析 for filepath in fileext[1]['list']: filepath = self.get_path(filepath) self.pre_result[filepath] = {} self.pre_result[filepath]['language'] = 'javascript' self.pre_result[filepath]['ast_nodes'] = [] fi = codecs.open(filepath, "r", encoding='utf-8', errors='ignore') code_content = fi.read() self.pre_result[filepath]['content'] = code_content try: all_nodes = esprima.parse(code_content, {"loc": True}) # 合并字典 self.pre_result[filepath]['ast_nodes'] = all_nodes except SyntaxError as e: logger.warning('[AST] [ERROR] parser {}: {}'.format(filepath, traceback.format_exc())) except AssertionError as e: logger.warning('[AST] [ERROR] parser {}: {}'.format(filepath, traceback.format_exc())) except: logger.warning('[AST] something error, {}'.format(traceback.format_exc()))
def parse_js_file(self): self.__js_file_parsed = parse(self.__js_file) self.__set_classes()
def esprima_interface(program='var help = 5'): return esprima.parse(program)
def __init_ast(self, script): self.ast = esprima.parse(script, {'loc': True, 'tolerant': True}) self.ast = esprima.toDict(self.ast)
def __init_ast(self, script): self.ast = esprima.parse(script, {'loc' : True, 'tolerant' : True }, delegate = self.visitor)
def extract(self, source): tree = esprima.parse(source, loc=True) fragments = [self._extract_fragment(node) for node in tree.body] return fragments
def measure_js_parity(pypath, jspath, outpath=None): """ Get all methods and attributes for classes in psychopy.visual and psychojs.visual, for comparison """ def _listcomp(a, b): """ Convenience function for quickly getting arrays of differences between two lists (a and b). Returns === justa : list Elements only present in list a justb : list Elements only present in b both : list Elements present in both lists """ # Get as sets a = set(a) b = set(b) # Do comparison justa = list(a.difference(b)) justb = list(b.difference(a)) both = list(a & b) either = list(a | b) return justa, justb, both, either # Pathify paths pypath = Path(pypath) jspath = Path(jspath) # Dict with classes & filenames for visual components which exist in PsychoPy and PsychoJS attrs = { "ButtonStim": { 'js': { 'file': 'ButtonStim.js', 'cls': "ButtonStim" }, 'py': { 'file': 'button.py', 'cls': "ButtonStim" }, }, "Form": { 'js': { 'file': 'Form.js', 'cls': "Form" }, 'py': { 'file': 'form.py', 'cls': "Form" }, }, "ImageStim": { 'js': { 'file': 'ImageStim.js', 'cls': "ImageStim" }, 'py': { 'file': 'image.py', 'cls': "ImageStim" }, }, "MovieStim3": { 'js': { 'file': 'MovieStim.js', 'cls': "MovieStim" }, 'py': { 'file': 'movie3.py', 'cls': "MovieStim3" }, }, "Polygon": { 'js': { 'file': 'Polygon.js', 'cls': "Polygon" }, 'py': { 'file': 'polygon.py', 'cls': "Polygon" } }, "Rect": { 'js': { 'file': 'Rect.js', 'cls': "Rect" }, 'py': { 'file': 'rect.py', 'cls': "Rect" } }, "ShapeStim": { 'js': { 'file': 'ShapeStim.js', 'cls': "ShapeStim" }, 'py': { 'file': 'shape.py', 'cls': "ShapeStim" } }, "Slider": { 'js': { 'file': 'Slider.js', 'cls': "Slider" }, 'py': { 'file': 'slider.py', 'cls': "Slider" } }, "TextBox2": { 'js': { 'file': 'TextBox.js', 'cls': "TextBox" }, 'py': { 'file': 'textbox2/textbox2.py', 'cls': "TextBox2" } }, "TextStim": { 'js': { 'file': 'TextStim.js', 'cls': "TextStim" }, 'py': { 'file': 'text.py', 'cls': "TextStim" } }, "BaseVisualStim": { 'js': { 'file': 'VisualStim.js', 'cls': "VisualStim" }, 'py': { 'file': 'basevisual.py', 'cls': "BaseVisualStim" } }, } # Create blank output arrays for name in attrs: # Create output array arr = {'init': [], 'methods': {}, 'attribs': {}} # Append to js and py attrs[name]['js'].update(deepcopy(arr)) attrs[name]['py'].update(deepcopy(arr)) # For each class, get dicts of methods and attributes for name in attrs: # --- Parse JS file --- with open(jspath / attrs[name]['js']['file'], 'r') as f: code = f.read() tree = esprima.parse(code, sourceType='module') # Get class def cls = None for node in tree.body: if node.type == "ExportNamedDeclaration": if node.declaration.type == "ClassDeclaration" and node.declaration.id.name == attrs[ name]['js']['cls']: cls = node if cls is None: raise ValueError( f"Could not find class def for {attrs[name]['js']['cls']} in {attrs[name]['js']['file']}" ) # Get methods & properties for node in cls.declaration.body.body: if node.value.type == "FunctionExpression": # Get flattened list of params paramNames = [] for param in node.value.params: if param.type == "AssignmentPattern": # If parameter is a dict style assignment pattern, break it apart if param.left.type == "ObjectPattern": for prop in param.left.properties: paramNames.append(prop.key.name) # If parameter is an expression, store name elif param.left.type == "Identifier": paramNames.append(param.left.name) elif param.type == "Identifier": paramNames.append(param.name) # Skip protected methods if node.key.name is None or node.key.name.startswith("_"): continue # If it's the constructor method, store params if node.kind == "constructor": attrs[name]['js']['init'] = paramNames # If it's a getter, store its name & whether it's settable elif node.kind == "get": attrs[name]['js']['attribs'][ node.key. name] = node.key.name in attrs[name]['js']['attribs'] # If it's a setter, store its name & the fact that it's settable elif node.kind == "set": attrs[name]['js']['attribs'][node.key.name] = True # If it's regular method, store its name and params elif node.kind == "method": attrs[name]['js']['methods'][node.key.name] = paramNames # --- Parse Py file --- with open(pypath / attrs[name]['py']['file'], 'r') as f: code = f.read() tree = ast.parse(code) # Get class def cls = None for node in tree.body: if isinstance( node, ast.ClassDef) and node.name == attrs[name]['py']['cls']: cls = node if cls is None: raise ValueError( f"Could not find class def for {attrs[name]['py']['cls']} in {attrs[name]['py']['file']}" ) # Get methods and attributes for node in cls.body: if isinstance(node, ast.FunctionDef): # Get flattened list of params paramNames = [] for param in node.args.args: if param.arg == "self": continue paramNames.append(param.arg) # Get string list of decorators decorators = [] for dec in node.decorator_list: if isinstance(dec, ast.Name): decorators.append(dec.id) if isinstance(dec, ast.Attribute): decorators.append(dec.attr) # If it's the constructor method, store params if node.name == "__init__": attrs[name]['py']['init'] = paramNames # Skip protected methods elif node.name is None or node.name.startswith("_"): continue # If it's a getter, store its name & whether it's settable elif "property" in decorators: attrs[name]['py']['attribs'][ node.name] = node.name in attrs[name]['py']['attribs'] # If it's a setter, store its name & the fact that it's settable elif "setter" in decorators: attrs[name]['py']['attribs'][node.name] = True # If it's regular method, store its name and params else: attrs[name]['py']['methods'][node.name] = paramNames # --- Compare --- compr = {} # Iterate through components for name in attrs: # Add field to comparison dict compr[name] = OrderedDict({}) # Compare init params, attributes and method names for key in ('init', 'attribs', 'methods'): # Get lists py = attrs[name]['py'][key] js = attrs[name]['js'][key] # Do comparison justpy, justjs, both, either = _listcomp(py, js) # Store in dict compr[name][f'{key}_both'] = both compr[name][f'{key}_py'] = justpy compr[name][f'{key}_js'] = justjs # Add empty column compr[name]['|||'] = [] # Compare params for each method for key in compr[name][f'methods_both']: # Get lists py = attrs[name]['py']['methods'][key] js = attrs[name]['js']['methods'][key] # Do comparison justpy, justjs, both, either = _listcomp(py, js) # Store in dict compr[name][f'{key}_both'] = both compr[name][f'{key}_py'] = justpy compr[name][f'{key}_js'] = justjs # If asked to, save to a table if outpath: # Pathify output path outpath = Path(outpath) # Save csv's for name, data in compr.items(): # Pad columns to max ncols = max([len(val) for val in data.values()]) for n in range(ncols): for key in data: while len(data[key]) < ncols: data[key].append(None) # Make a pandas dataframe df = pd.DataFrame(data) # Write to csv df.to_csv(outpath / f"{name}.csv") return attrs, compr
def test(self): with open(result_file, 'rb') as f: expected_json = f.read() expected = toDict(json.loads(expected_json.decode('utf-8'))) if isinstance(expected, dict): expected.pop( 'description', None) # Not all json failure files include description expected.pop('tokenize', None) # tokenize is not part of errors options = expected.pop( 'options', None) # Extracts options from tree (if any) else: options = None with open(filename, 'rb') as f: actual_code = f.read() if '.source.' in filename: actual_code = SOURCE_RE.sub( r'\2', actual_code).decode('unicode_escape') else: actual_code = actual_code.decode('utf-8') try: if result_type == '.tokens': if options is None: options = { 'loc': True, 'range': True, 'comment': True, 'tolerant': True, } actual = toDict(tokenize(actual_code, options=options)) else: sourceType = 'module' if '.module.' in filename else 'script' if options is None: options = { 'jsx': True, 'comment': 'comments' in expected, 'range': True, 'loc': True, 'tokens': True, 'raw': True, 'tolerant': 'errors' in expected, 'source': None, 'sourceType': expected.get('sourceType', sourceType), } if options.get('comment'): def hasAttachedComment(expected): for k, v in expected.items(): if k in ('leadingComments', 'trailingComments', 'innerComments'): return True elif isinstance(v, dict): if hasAttachedComment(v): return True elif isinstance(v, list): for i in v: if isinstance(i, dict): if hasAttachedComment(i): return True return False options['attachComment'] = hasAttachedComment(expected) if expected.get('tokens'): token = expected['tokens'][0] options['range'] = 'range' in token options['loc'] = 'loc' in token if expected.get('comments'): comment = expected['comments'][0] options['range'] = 'range' in comment options['loc'] = 'loc' in comment if options.get('loc'): options['source'] = expected.get('loc', {}).get('source') actual = toDict(parse(actual_code, options=options)) except Error as e: actual = e.toDict() self.assertEqual(expected, actual)
def astgen(self, inpath, outfile, root=None, configpath=None, pkg_name=None, pkg_version=None, evaluate_smt=False): """ There are two ways to implement the javascript ast parsing, each of them has their cons and pros. One is to directly use the npm esprima module, the other is to use the pypi esprima module. 1. The npm module is the latest version and has lots of features to use directly. But it doesn't have a visitor and requires manually implementation. 2. The pypi module is claimed to be a line by line translation of esprima in python, but it may be outdated and inactively maintained. However, it contains a visitor similar to python ast.NodeVisitor that we can directly use. To minimize the efforts, I currently choose the latter. """ analyze_path, is_decompress_path, outfile, root, configpath = self._sanitize_astgen_args( inpath=inpath, outfile=outfile, root=root, configpath=configpath, language=self.language) # load the config proto configpb = AstLookupConfig() read_proto_from_file(configpb, configpath, binary=False) logging.debug("loaded lookup config from %s:\n%s", configpath, configpb) # invoke the language specific ast generators to call functions # FIXME: current testdata sometimes fails the analyzer, inspect it! # get input files infiles, root = self._get_infiles(inpath=analyze_path, root=root, language=self.language) # initialize resultpb resultpb = PkgAstResults() pkg = resultpb.pkgs.add() pkg.config.CopyFrom(configpb) pkg.pkg_name = pkg_name if pkg_name is not None else basename( analyze_path) if pkg_version is not None: pkg.pkg_version = pkg_version pkg.language = ast_pb2.JAVASCRIPT for infile in infiles: all_source = open(infile, 'r').read() try: # tree = esprima.parseModule(), esprima.parseScript() tree = esprima.parse(all_source, options={'loc': True}) except Exception as e: logging.error( "Fatal error %s parsing file %s! Skipping this file!", e, infile) continue visitor = JavaScriptDeclRefVisitor(source=all_source, configpb=configpb) visitor.visit(tree) logging.warning("collected functions: %s", Counter(visitor.get_declrefs()).items()) filepb = self._get_filepb(infile, root) for base, name, args, source_text, source_range in visitor.get_declrefs( ): api_result = self._get_api_result(base, name, args, source_text, source_range, filepb) pkg.api_results.add().CopyFrom(api_result) # optionally evaluate smt formula if evaluate_smt: satisfied = self._check_smt(astgen_results=[resultpb], configpath=configpath) resultpb.pkgs[0].config.smt_satisfied = satisfied # save resultpb write_proto_to_file(resultpb, outfile, binary=False) # clean up residues self._cleanup_astgen(analyze_path=analyze_path, is_decompress_path=is_decompress_path)
def parse_by_syntax(str_code): str_code = str_code.replace('await ', '') str_code = str_code.replace('async ', 'function ') syntax = esprima.parse(str_code) return syntax_analysis(syntax.body)
def __init_ast(self, script): self.ast = esprima.parse(script, { 'loc': True, 'tolerant': True }, delegate=self.visitor)
from __future__ import print_function import json import esprima # Build a CallExpression expression statement manually: callee = esprima.nodes.Identifier("alert") args = [esprima.nodes.Literal("other alert", "'other alert'")] call = esprima.nodes.CallExpression(callee, args) other_alert = esprima.nodes.ExpressionStatement(call) # Add a few expression statements using `parse()`: expression_statements = { 'some_alert': esprima.parse("alert('some alert')").body[0], 'other_alert': other_alert, 'console_log': esprima.parse("console.log()").body[0], } class MyVisitor(esprima.NodeVisitor): def transform_CallExpression(self, node, metadata): # If the callee is an `alert()`, change it to `console.log()`: if node.callee.name == 'alert': new_node = expression_statements['console_log'].expression new_node.arguments = node.arguments node = new_node return self.generic_transform(node, metadata) def visit_BlockStatement(self, node): # Add the expression statements to the body: node.body.append(expression_statements['some_alert'])
def parse_file(self): self._parsed_file = parse(self._file) self._set_classes()
def __init__(self, javascript='const x = 10;\nconst y = x * 2 + 1;'): #parse code to generate tree self.tree = esprima.parse(javascript)