Python parseの例、esprima.parse Pythonの例

コード例 #1

0

ファイルを表示

 def test_recursion(self):
     script = (
         'var testcode = unescape(""+' +
         '""+"%u8300"+"%u2f8d"+""+""+"%u8300"+"%u2f8d"+""+""+"%u8300"+"%u2f8d"+""+""+"%u8300"+'
         * 20 + '"");')
     r = parse(script)
     self.assertIsInstance(r, Script)

コード例 #2

0

ファイルを表示

    def test_basic(self):
        expected = {
            "sourceType":
            "script",
            "type":
            "Program",
            "body": [{
                "type":
                "VariableDeclaration",
                "declarations": [{
                    "type": "VariableDeclarator",
                    "id": {
                        "type": "Identifier",
                        "name": "$"
                    },
                    "init": {
                        "type": "Literal",
                        "value": "Hello!",
                        "raw": '"Hello!"'
                    }
                }],
                "kind":
                "var"
            }]
        }

        actual = toDict(parse('var $ = "Hello!"'))

        self.assertEqual(expected, actual)

コード例 #3

0

ファイルを表示

ファイル: events.py プロジェクト: jayktee/python-legistar-scraper

    def ecomment_dict(self):
        """
        Parse event IDs and eComment links from JavaScript file with lines like:
        activateEcomment('750', '138A085F-0AC1-4A33-B2F3-AC3D6D9F710B', 'https://metro.granicusideas.com/meetings/750-finance-budget-and-audit-committee-on-2020-03-16-5-00-pm-test');
        """
        if getattr(self, '_ecomment_dict', None) is None:
            ecomment_dict = {}

            # Define a callback to apply to each node, e.g.,
            # https://esprima.readthedocs.io/en/latest/syntactic-analysis.html#example-console-calls-removal
            def is_activateEcomment(node, metadata):
                if node.callee and node.callee.name == 'activateEcomment':
                    event_id, _, comment_url = node.arguments
                    ecomment_dict[event_id.value] = comment_url.value

            for url in self.ECOMMENT_JS_URLS:
                response = self.get(url)
                esprima.parse(response.text, delegate=is_activateEcomment)

            self._ecomment_dict = ecomment_dict

        return self._ecomment_dict

コード例 #4

0

ファイルを表示

ファイル: parse_blaseball_book.py プロジェクト: tehstone/blaseballwatcher

async def parse_book_from_javascript(bot):
    try:
        response = await request_text("https://blaseball.com/")
    except ssl.CertificateError:
        raise Exception("Could not connect to blaseball.com")

    if not response:
        raise Exception("Could not connect to blaseball.com")

    soup = BeautifulSoup(response, 'html.parser')

    script_tags = soup.select(
        f'script[src^="https://{bot.config["cloudflare_id"]}.cloudfront.net/static/js/main\."]'
    )
    if len(script_tags) == 0:
        script_tags = soup.select(f'script[src^="/static/js/main\."]')
    if len(script_tags) == 0:
        raise Exception('Could not find the main JS file.')
    if len(script_tags) > 1:
        raise Exception('More than one main JS files found.')

    script_tag = script_tags[0]
    src = script_tag.attrs['src']

    js_url = urljoin('https://blaseball.com', src)
    js = await request_text(js_url)
    return None, js_url
    ast = esprima.parse(js)

    book_of_blaseball_visitor = BookOfBlaseballVisitor()
    book_function_node = book_of_blaseball_visitor.find_book_function_node(ast)

    if book_function_node is None:
        raise Exception(
            'Could not find the FunctionDeclaration node for rendering the Book in the AST.'
        )

    book_parser_visitor = BookParserVisitor()

    return book_parser_visitor.parse_book(book_function_node), js_url


# if __name__ == '__main__':
#     try:
#         book = await parse_book_from_javascript()
#         print(book)
#     except Exception as e:
#         print(e)

コード例 #5

0

ファイルを表示

    async def pre_ast(self):

        while not self.target_queue.empty():

            fileext = self.target_queue.get()

            if not self.lan:
                break

            if fileext[0] in ext_dict['php'] and 'php' in self.lan:
                # 下面是对于php文件的处理逻辑
                for filepath in fileext[1]['list']:
                    all_nodes = []

                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'php'
                    self.pre_result[filepath]['ast_nodes'] = []

                    fi = codecs.open(filepath,
                                     "r",
                                     encoding='utf-8',
                                     errors='ignore')
                    code_content = fi.read()
                    fi.close()

                    # self.pre_result[filepath]['content'] = code_content

                    try:
                        parser = make_parser()
                        all_nodes = parser.parse(code_content,
                                                 debug=False,
                                                 lexer=lexer.clone(),
                                                 tracking=True)

                        # 合并字典
                        self.pre_result[filepath]['ast_nodes'] = all_nodes

                    except SyntaxError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except AssertionError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except:
                        logger.warning('[AST] something error, {}'.format(
                            traceback.format_exc()))

                    # 搜索所有的常量
                    for node in all_nodes:
                        if isinstance(
                                node,
                                php.FunctionCall) and node.name == "define":
                            define_params = node.params
                            logger.debug(
                                "[AST][Pretreatment] new define {}={}".format(
                                    define_params[0].node,
                                    define_params[1].node))

                            self.define_dict[
                                define_params[0].node] = define_params[1].node

            elif fileext[0] in ext_dict[
                    'chromeext'] and 'chromeext' in self.lan:

                # 针对chrome 拓展的预处理
                # 需要提取其中的js和html？
                for filepath in fileext[1]['list']:
                    child_files = []
                    child_files_html = []

                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'chromeext'

                    # 首先想办法解压crx
                    try:
                        target_files_path = un_zip(filepath)
                        self.pre_result[filepath][
                            'target_files_path'] = target_files_path

                    except zipfile.BadZipFile:
                        logger.warning(
                            "[Pretreatment][Chrome Ext] file {} not zip".
                            format(filepath))
                        continue

                    except OSError:
                        logger.warning(
                            "[Pretreatment][Chrome Ext] file {} unzip error".
                            format(filepath))
                        continue

                    # 分析manifest.json
                    manifest_path = os.path.join(target_files_path,
                                                 "manifest.json")

                    # target可能是单个文件，这里需要专门处理
                    if not (self.target_directory.endswith("/")
                            or self.target_directory.endswith("\\")
                            ) and not os.path.isdir(self.target_directory):

                        path_list = re.split(r'[\\|/]', self.target_directory)
                        relative_path = os.path.join(path_list[-1] + "_files")
                    else:
                        relative_path = target_files_path.split(
                            self.target_directory)[-1]

                    if relative_path.startswith(
                            '\\') or relative_path.startswith("/"):
                        relative_path = relative_path[1:]

                    if os.path.isfile(manifest_path):
                        fi = codecs.open(manifest_path,
                                         "r",
                                         encoding='utf-8',
                                         errors='ignore')
                        manifest_content = fi.read()
                        fi.close()

                        try:
                            manifest = json.loads(manifest_content,
                                                  encoding='utf-8')

                        except json.decoder.JSONDecodeError:
                            logger.warning(
                                "[Pretreatment][Chrome Ext] File {} parse error..."
                                .format(target_files_path))
                            continue

                        self.pre_result[filepath]["manifest"] = manifest

                        # 想办法优化，如果不想深入js和html的判断，那么就跳过
                        if len(self.lan) and self.lan == 'chromeext':
                            logger.debug(
                                "[Pretreatment][Chrome Ext] pass js & html scan..."
                            )
                            continue

                        # content scripts
                        if "content_scripts" in manifest:
                            for script in manifest["content_scripts"]:
                                if "js" in script:
                                    child_files.extend([
                                        os.path.join(relative_path, js)
                                        for js in script['js']
                                    ])

                        # background js
                        if "background" in manifest:
                            if "scripts" in manifest["background"]:
                                child_files.extend([
                                    os.path.join(relative_path, js)
                                    for js in manifest["background"]["scripts"]
                                ])

                            # background html
                            if "page" in manifest["background"]:
                                child_files_html.append(
                                    os.path.join(
                                        relative_path,
                                        manifest["background"]["page"]))

                        # popup.html
                        if "browser_action" in manifest:
                            if "default_popup" in manifest["browser_action"]:
                                child_files_html.append(
                                    os.path.join(
                                        relative_path,
                                        manifest["browser_action"]
                                        ["default_popup"]))

                        # web_accessible_resources
                        if "web_accessible_resources" in manifest:
                            for resource in manifest[
                                    "web_accessible_resources"]:
                                if ".js" in resource:
                                    child_files.append(
                                        os.path.join(relative_path, resource))

                                if ".html" in resource:
                                    child_files_html.append(
                                        os.path.join(relative_path, resource))

                        # chrome_url_overrides
                        if "chrome_url_overrides" in manifest:
                            for key in manifest["chrome_url_overrides"]:
                                child_files_html.append(
                                    os.path.join(
                                        relative_path,
                                        manifest["chrome_url_overrides"][key]))

                        self.pre_result[filepath]["child_files"] = child_files

                        if len(child_files):
                            # 将content_scripts加入到文件列表中构造
                            self.target_queue.put(('.js', {
                                'count': len(child_files),
                                'list': child_files
                            }))

                            # 通过浅复制操作外部传入的files
                            self.file_list.append(('.js', {
                                'count': len(child_files),
                                'list': child_files
                            }))

                        if len(child_files_html):
                            self.target_queue.put(('.html', {
                                'count':
                                len(child_files_html),
                                'list':
                                child_files_html
                            }))

                    else:
                        logger.warning(
                            "[Pretreatment][Chrome Ext] File {} parse error..."
                            .format(target_files_path))
                        continue

            elif fileext[0] in ext_dict['html'] and 'javascript' in self.lan:
                # html only found js
                for filepath in fileext[1]['list']:
                    filepath = self.get_path(filepath)
                    script_list = []

                    try:
                        fi = codecs.open(filepath,
                                         "r",
                                         encoding='utf-8',
                                         errors='ignore')
                        code_content = fi.read()
                        fi.close()

                    except FileNotFoundError:
                        continue

                    except OSError:
                        continue

                    # tmp.js save all inline javascript code
                    tmp_path = os.path.join(os.path.dirname(filepath),
                                            "tmp.js")
                    fi2 = codecs.open(tmp_path,
                                      "a",
                                      encoding='utf-8',
                                      errors='ignore')

                    try:
                        soup = BeautifulSoup(code_content, "html.parser")

                        script_tag_list = soup.find_all('script')

                        for script_tag in script_tag_list:
                            script_attrs = script_tag.attrs

                            if 'src' in script_attrs:
                                parents_path = os.path.normpath("\\".join(
                                    re.split(r'[\\|/]', filepath)[:-1]))

                                script_path = os.path.join(
                                    parents_path, script_attrs['src'])
                                script_list.append(script_path)

                            else:
                                # 如果没有src，那么代表是内联js
                                script_content = script_tag.string

                                fi2.write(" \n{}\n ".format(script_content))

                        fi2.close()
                        if tmp_path not in script_list:
                            script_list.append(tmp_path)

                        # 将content_scripts加入到文件列表中构造
                        self.target_queue.put(('.js', {
                            'count': len(script_list),
                            'list': script_list
                        }))

                        # 通过浅复制操作外部传入的files
                        self.file_list.append(('.js', {
                            'count': len(script_list),
                            'list': script_list
                        }))

                    except:
                        logger.warning('[AST] something error, {}'.format(
                            traceback.format_exc()))
                        continue

            elif fileext[0] in ext_dict[
                    'javascript'] and 'javascript' in self.lan:

                # 针对javascript的预处理
                # 需要对js做语义分析
                for filepath in fileext[1]['list']:
                    filepath = self.get_path(filepath)

                    if not filepath.endswith(".js"):
                        continue

                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'javascript'
                    self.pre_result[filepath]['ast_nodes'] = []

                    try:
                        fi = codecs.open(filepath,
                                         "r",
                                         encoding='utf-8',
                                         errors='ignore')
                        code_content = fi.read()
                        fi.close()

                    except FileNotFoundError:
                        continue

                    except OSError:
                        continue

                    # 添加代码美化并且写入新文件
                    new_filepath = filepath + ".pretty"

                    try:

                        if not os.path.isfile(new_filepath):
                            fi2 = codecs.open(new_filepath,
                                              "w",
                                              encoding='utf-8',
                                              errors='ignore')
                            code_content = jsbeautifier.beautify(code_content)
                            fi2.write(code_content)
                            fi2.close()

                        # self.pre_result[filepath]['content'] = code_content

                        all_nodes = esprima.parse(code_content, {"loc": True})

                        # 合并字典
                        self.pre_result[filepath]['ast_nodes'] = all_nodes

                    except SyntaxError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except AssertionError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except esprima.error_handler.Error:
                        logger.warning(
                            '[AST] [ERROR] Invalid regular expression in {}...'
                            .format(filepath))

                    except KeyboardInterrupt:
                        logger.log('[AST stop...')
                        exit()

                    except:
                        logger.warning('[AST] something error, {}'.format(
                            traceback.format_exc()))
                        continue

            # 手动回收?
            gc.collect()

        return True

コード例 #6

0

ファイルを表示

ファイル: pretreatment.py プロジェクト: leiyuch/Cobra-W

    async def pre_ast(self):

        while not self.target_queue.empty():

            fileext = self.target_queue.get()

            if fileext[0] in ext_dict['php']:
                # 下面是对于php文件的处理逻辑
                for filepath in fileext[1]['list']:
                    all_nodes = []

                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'php'
                    self.pre_result[filepath]['ast_nodes'] = []

                    fi = codecs.open(filepath,
                                     "r",
                                     encoding='utf-8',
                                     errors='ignore')
                    code_content = fi.read()
                    fi.close()

                    self.pre_result[filepath]['content'] = code_content

                    try:
                        parser = make_parser()
                        all_nodes = parser.parse(code_content,
                                                 debug=False,
                                                 lexer=lexer.clone(),
                                                 tracking=True)

                        # 合并字典
                        self.pre_result[filepath]['ast_nodes'] = all_nodes

                    except SyntaxError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except AssertionError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except:
                        logger.warning('[AST] something error, {}'.format(
                            traceback.format_exc()))

                    # 搜索所有的常量
                    for node in all_nodes:
                        if isinstance(
                                node,
                                php.FunctionCall) and node.name == "define":
                            define_params = node.params
                            logger.debug(
                                "[AST][Pretreatment] new define {}={}".format(
                                    define_params[0].node,
                                    define_params[1].node))

                            self.define_dict[
                                define_params[0].node] = define_params[1].node

            elif fileext[0] in ext_dict['chromeext']:

                # 针对chrome 拓展的预处理
                # 需要提取其中的js和html？
                for filepath in fileext[1]['list']:
                    child_files = []

                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'chromeext'

                    # 首先想办法解压crx
                    try:
                        target_files_path = un_zip(filepath)
                        self.pre_result[filepath][
                            'target_files_path'] = target_files_path

                    except zipfile.BadZipFile:
                        logger.warning(
                            "[Pretreatment][Chrome Ext] file {} not zip".
                            format(filepath))
                        continue

                    # 分析manifest.json
                    manifest_path = os.path.join(target_files_path,
                                                 "manifest.json")

                    # target可能是单个文件，这里需要专门处理
                    if not (self.target_directory.endswith("/")
                            or self.target_directory.endswith("\\")
                            ) and not os.path.isdir(self.target_directory):
                        relative_path = os.path.join(
                            re.split(r'[\\|/]', self.target_directory)[-1] +
                            "_files")
                    else:
                        relative_path = target_files_path.split(
                            self.target_directory)[-1]

                    if relative_path.startswith(
                            '\\') or relative_path.startswith("/"):
                        relative_path = relative_path[1:]

                    if os.path.isfile(manifest_path):
                        fi = codecs.open(manifest_path,
                                         "r",
                                         encoding='utf-8',
                                         errors='ignore')
                        manifest_content = fi.read()
                        fi.close()

                        try:
                            manifest = json.loads(manifest_content,
                                                  encoding='utf-8')

                        except json.decoder.JSONDecodeError:
                            logger.warning(
                                "[Pretreatment][Chrome Ext] File {} parse error..."
                                .format(target_files_path))
                            continue

                        self.pre_result[filepath]["manifest"] = manifest

                        if "content_scripts" in manifest:
                            for script in manifest["content_scripts"]:
                                if 'js' in script:
                                    child_files.extend([
                                        os.path.join(relative_path, js)
                                        for js in script['js']
                                    ])

                        self.pre_result[filepath]["child_files"] = child_files

                        # 将content_scripts加入到文件列表中构造
                        self.target_queue.put(('.js', {
                            'count': len(child_files),
                            'list': child_files
                        }))

                        # 通过浅复制操作外部传入的files
                        self.file_list.append(('.js', {
                            'count': len(child_files),
                            'list': child_files
                        }))

                    else:
                        logger.warning(
                            "[Pretreatment][Chrome Ext] File {} parse error..."
                            .format(target_files_path))
                        continue

            elif fileext[0] in ext_dict['javascript']:

                # 针对javascript的预处理
                # 需要对js做语义分析
                for filepath in fileext[1]['list']:
                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'javascript'
                    self.pre_result[filepath]['ast_nodes'] = []

                    fi = codecs.open(filepath,
                                     "r",
                                     encoding='utf-8',
                                     errors='ignore')
                    code_content = fi.read()
                    fi.close()

                    # 添加代码美化并且写入新文件
                    new_filepath = filepath + ".pretty"

                    if not os.path.isfile(new_filepath):
                        fi2 = codecs.open(new_filepath,
                                          "w",
                                          encoding='utf-8',
                                          errors='ignore')
                        code_content = jsbeautifier.beautify(code_content)
                        fi2.write(code_content)
                        fi2.close()

                    self.pre_result[filepath]['content'] = code_content

                    try:
                        all_nodes = esprima.parse(code_content, {"loc": True})

                        # 合并字典
                        self.pre_result[filepath]['ast_nodes'] = all_nodes

                    except SyntaxError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except AssertionError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except esprima.error_handler.Error:
                        logger.warning(
                            '[AST] [ERROR] Invalid regular expression in {}...'
                            .format(filepath))

                    except KeyboardInterrupt:
                        logger.log('[AST stop...')
                        exit()

                    except:
                        logger.warning('[AST] something error, {}'.format(
                            traceback.format_exc()))
                        continue

            # 手动回收?
            gc.collect()

        return True

コード例 #7

0

ファイルを表示

ファイル: pretreatment.py プロジェクト: p0prxx/Cobra-W

    def pre_ast(self, lan=None):

        if lan is not None:
            # 检查是否在可ast pasre列表中
            if not list(set(lan).intersection(set(could_ast_pase_lans))):

                logger.info("[AST][Pretreatment] Current scan target language does not require ast pretreatment...")
                return True

        for fileext in self.file_list:

            if fileext[0] in ext_dict['php']:
                # 下面是对于php文件的处理逻辑
                for filepath in fileext[1]['list']:
                    all_nodes = []

                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'php'
                    self.pre_result[filepath]['ast_nodes'] = []

                    fi = codecs.open(filepath, "r", encoding='utf-8', errors='ignore')
                    code_content = fi.read()

                    self.pre_result[filepath]['content'] = code_content

                    try:
                        parser = make_parser()
                        all_nodes = parser.parse(code_content, debug=False, lexer=lexer.clone(), tracking=True)

                        # 合并字典
                        self.pre_result[filepath]['ast_nodes'] = all_nodes

                    except SyntaxError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(filepath, traceback.format_exc()))

                    except AssertionError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(filepath, traceback.format_exc()))

                    except:
                        logger.warning('[AST] something error, {}'.format(traceback.format_exc()))

                    # 搜索所有的常量
                    for node in all_nodes:
                        if isinstance(node, php.FunctionCall) and node.name == "define":
                            define_params = node.params
                            logger.debug("[AST][Pretreatment] new define {}={}".format(define_params[0].node, define_params[1].node))

                            self.define_dict[define_params[0].node] = define_params[1].node

            elif fileext[0] in ext_dict['chromeext']:
                child_files = []

                # 针对chrome 拓展的预处理
                # 需要提取其中的js和html？
                for filepath in fileext[1]['list']:
                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'chromeext'

                    # 首先想办法解压crx
                    target_files_path = un_zip(filepath)
                    self.pre_result[filepath]['target_files_path'] = target_files_path

                    # 分析manifest.json
                    manifest_path = os.path.join(target_files_path, "manifest.json")
                    relative_path = target_files_path.split(self.target_directory)[-1]

                    if relative_path.startswith('\\') or relative_path.startswith("/"):
                        relative_path = relative_path[1:]

                    if os.path.isfile(manifest_path):
                        fi = codecs.open(manifest_path, "r", encoding='utf-8', errors='ignore')
                        manifest_content = fi.read()
                        manifest = json.loads(manifest_content)

                        self.pre_result[filepath]["manifest"] = manifest

                        if "content_scripts" in manifest:
                            for script in manifest["content_scripts"]:
                                child_files.extend([os.path.join(relative_path, js) for js in script['js']])

                        self.pre_result[filepath]["child_files"] = child_files
                    else:
                        logger.warning("[Pretreatment][Chrome Ext] File {} parse error...".format(target_files_path))
                        continue

            elif fileext[0] in ext_dict['javascript']:

                # 针对javascript的预处理
                # 需要对js做语义分析
                for filepath in fileext[1]['list']:
                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'javascript'
                    self.pre_result[filepath]['ast_nodes'] = []

                    fi = codecs.open(filepath, "r", encoding='utf-8', errors='ignore')
                    code_content = fi.read()

                    self.pre_result[filepath]['content'] = code_content

                    try:
                        all_nodes = esprima.parse(code_content, {"loc": True})

                        # 合并字典
                        self.pre_result[filepath]['ast_nodes'] = all_nodes

                    except SyntaxError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(filepath, traceback.format_exc()))

                    except AssertionError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(filepath, traceback.format_exc()))

                    except:
                        logger.warning('[AST] something error, {}'.format(traceback.format_exc()))

コード例 #8

0

ファイルを表示

 def parse_js_file(self):
     self.__js_file_parsed = parse(self.__js_file)
     self.__set_classes()

コード例 #9

0

ファイルを表示

ファイル: main.py プロジェクト: andrewfsanchez/jdeob

def esprima_interface(program='var help = 5'):
    return esprima.parse(program)

コード例 #10

0

ファイルを表示

ファイル: AST2.py プロジェクト: wusenyan/thug

    def __init_ast(self, script):
        self.ast = esprima.parse(script, {'loc': True, 'tolerant': True})

        self.ast = esprima.toDict(self.ast)

コード例 #11

0

ファイルを表示

ファイル: AST.py プロジェクト: buffer/thug

 def __init_ast(self, script):
     self.ast = esprima.parse(script, {'loc'      : True,
                                       'tolerant' : True
                                       }, delegate = self.visitor)

コード例 #12

0

ファイルを表示

ファイル: models.py プロジェクト: kevin-klein/101wiki

    def extract(self, source):
        tree = esprima.parse(source, loc=True)

        fragments = [self._extract_fragment(node) for node in tree.body]

        return fragments

コード例 #13

0

ファイルを表示

ファイル: measure_parity.py プロジェクト: mdcutone/psychopy

def measure_js_parity(pypath, jspath, outpath=None):
    """
    Get all methods and attributes for classes in psychopy.visual and psychojs.visual, for comparison
    """
    def _listcomp(a, b):
        """
        Convenience function for quickly getting arrays of differences between two lists (a and b).

        Returns
        ===
        justa : list
            Elements only present in list a
        justb : list
            Elements only present in b
        both : list
            Elements present in both lists
        """
        # Get as sets
        a = set(a)
        b = set(b)
        # Do comparison
        justa = list(a.difference(b))
        justb = list(b.difference(a))
        both = list(a & b)
        either = list(a | b)

        return justa, justb, both, either

    # Pathify paths
    pypath = Path(pypath)
    jspath = Path(jspath)
    # Dict with classes & filenames for visual components which exist in PsychoPy and PsychoJS
    attrs = {
        "ButtonStim": {
            'js': {
                'file': 'ButtonStim.js',
                'cls': "ButtonStim"
            },
            'py': {
                'file': 'button.py',
                'cls': "ButtonStim"
            },
        },
        "Form": {
            'js': {
                'file': 'Form.js',
                'cls': "Form"
            },
            'py': {
                'file': 'form.py',
                'cls': "Form"
            },
        },
        "ImageStim": {
            'js': {
                'file': 'ImageStim.js',
                'cls': "ImageStim"
            },
            'py': {
                'file': 'image.py',
                'cls': "ImageStim"
            },
        },
        "MovieStim3": {
            'js': {
                'file': 'MovieStim.js',
                'cls': "MovieStim"
            },
            'py': {
                'file': 'movie3.py',
                'cls': "MovieStim3"
            },
        },
        "Polygon": {
            'js': {
                'file': 'Polygon.js',
                'cls': "Polygon"
            },
            'py': {
                'file': 'polygon.py',
                'cls': "Polygon"
            }
        },
        "Rect": {
            'js': {
                'file': 'Rect.js',
                'cls': "Rect"
            },
            'py': {
                'file': 'rect.py',
                'cls': "Rect"
            }
        },
        "ShapeStim": {
            'js': {
                'file': 'ShapeStim.js',
                'cls': "ShapeStim"
            },
            'py': {
                'file': 'shape.py',
                'cls': "ShapeStim"
            }
        },
        "Slider": {
            'js': {
                'file': 'Slider.js',
                'cls': "Slider"
            },
            'py': {
                'file': 'slider.py',
                'cls': "Slider"
            }
        },
        "TextBox2": {
            'js': {
                'file': 'TextBox.js',
                'cls': "TextBox"
            },
            'py': {
                'file': 'textbox2/textbox2.py',
                'cls': "TextBox2"
            }
        },
        "TextStim": {
            'js': {
                'file': 'TextStim.js',
                'cls': "TextStim"
            },
            'py': {
                'file': 'text.py',
                'cls': "TextStim"
            }
        },
        "BaseVisualStim": {
            'js': {
                'file': 'VisualStim.js',
                'cls': "VisualStim"
            },
            'py': {
                'file': 'basevisual.py',
                'cls': "BaseVisualStim"
            }
        },
    }
    # Create blank output arrays
    for name in attrs:
        # Create output array
        arr = {'init': [], 'methods': {}, 'attribs': {}}
        # Append to js and py
        attrs[name]['js'].update(deepcopy(arr))
        attrs[name]['py'].update(deepcopy(arr))

    # For each class, get dicts of methods and attributes
    for name in attrs:

        # --- Parse JS file ---

        with open(jspath / attrs[name]['js']['file'], 'r') as f:
            code = f.read()
        tree = esprima.parse(code, sourceType='module')
        # Get class def
        cls = None
        for node in tree.body:
            if node.type == "ExportNamedDeclaration":
                if node.declaration.type == "ClassDeclaration" and node.declaration.id.name == attrs[
                        name]['js']['cls']:
                    cls = node
        if cls is None:
            raise ValueError(
                f"Could not find class def for {attrs[name]['js']['cls']} in {attrs[name]['js']['file']}"
            )
        # Get methods & properties
        for node in cls.declaration.body.body:
            if node.value.type == "FunctionExpression":
                # Get flattened list of params
                paramNames = []
                for param in node.value.params:
                    if param.type == "AssignmentPattern":
                        # If parameter is a dict style assignment pattern, break it apart
                        if param.left.type == "ObjectPattern":
                            for prop in param.left.properties:
                                paramNames.append(prop.key.name)
                        # If parameter is an expression, store name
                        elif param.left.type == "Identifier":
                            paramNames.append(param.left.name)
                    elif param.type == "Identifier":
                        paramNames.append(param.name)

                # Skip protected methods
                if node.key.name is None or node.key.name.startswith("_"):
                    continue
                # If it's the constructor method, store params
                if node.kind == "constructor":
                    attrs[name]['js']['init'] = paramNames
                # If it's a getter, store its name & whether it's settable
                elif node.kind == "get":
                    attrs[name]['js']['attribs'][
                        node.key.
                        name] = node.key.name in attrs[name]['js']['attribs']
                # If it's a setter, store its name & the fact that it's settable
                elif node.kind == "set":
                    attrs[name]['js']['attribs'][node.key.name] = True
                # If it's regular method, store its name and params
                elif node.kind == "method":
                    attrs[name]['js']['methods'][node.key.name] = paramNames

        # --- Parse Py file ---

        with open(pypath / attrs[name]['py']['file'], 'r') as f:
            code = f.read()
        tree = ast.parse(code)
        # Get class def
        cls = None
        for node in tree.body:
            if isinstance(
                    node,
                    ast.ClassDef) and node.name == attrs[name]['py']['cls']:
                cls = node
        if cls is None:
            raise ValueError(
                f"Could not find class def for {attrs[name]['py']['cls']} in {attrs[name]['py']['file']}"
            )
        # Get methods and attributes
        for node in cls.body:
            if isinstance(node, ast.FunctionDef):
                # Get flattened list of params
                paramNames = []
                for param in node.args.args:
                    if param.arg == "self":
                        continue
                    paramNames.append(param.arg)

                # Get string list of decorators
                decorators = []
                for dec in node.decorator_list:
                    if isinstance(dec, ast.Name):
                        decorators.append(dec.id)
                    if isinstance(dec, ast.Attribute):
                        decorators.append(dec.attr)

                # If it's the constructor method, store params
                if node.name == "__init__":
                    attrs[name]['py']['init'] = paramNames
                # Skip protected methods
                elif node.name is None or node.name.startswith("_"):
                    continue
                # If it's a getter, store its name & whether it's settable
                elif "property" in decorators:
                    attrs[name]['py']['attribs'][
                        node.name] = node.name in attrs[name]['py']['attribs']
                # If it's a setter, store its name & the fact that it's settable
                elif "setter" in decorators:
                    attrs[name]['py']['attribs'][node.name] = True
                # If it's regular method, store its name and params
                else:
                    attrs[name]['py']['methods'][node.name] = paramNames

    # --- Compare ---

    compr = {}
    # Iterate through components
    for name in attrs:
        # Add field to comparison dict
        compr[name] = OrderedDict({})

        # Compare init params, attributes and method names
        for key in ('init', 'attribs', 'methods'):
            # Get lists
            py = attrs[name]['py'][key]
            js = attrs[name]['js'][key]
            # Do comparison
            justpy, justjs, both, either = _listcomp(py, js)
            # Store in dict
            compr[name][f'{key}_both'] = both
            compr[name][f'{key}_py'] = justpy
            compr[name][f'{key}_js'] = justjs

        # Add empty column
        compr[name]['|||'] = []

        # Compare params for each method
        for key in compr[name][f'methods_both']:
            # Get lists
            py = attrs[name]['py']['methods'][key]
            js = attrs[name]['js']['methods'][key]
            # Do comparison
            justpy, justjs, both, either = _listcomp(py, js)
            # Store in dict
            compr[name][f'{key}_both'] = both
            compr[name][f'{key}_py'] = justpy
            compr[name][f'{key}_js'] = justjs

        # If asked to, save to a table
        if outpath:
            # Pathify output path
            outpath = Path(outpath)
            # Save csv's
            for name, data in compr.items():
                # Pad columns to max
                ncols = max([len(val) for val in data.values()])
                for n in range(ncols):
                    for key in data:
                        while len(data[key]) < ncols:
                            data[key].append(None)
                # Make a pandas dataframe
                df = pd.DataFrame(data)
                # Write to csv
                df.to_csv(outpath / f"{name}.csv")

    return attrs, compr

コード例 #14

0

ファイルを表示

        def test(self):
            with open(result_file, 'rb') as f:
                expected_json = f.read()
            expected = toDict(json.loads(expected_json.decode('utf-8')))
            if isinstance(expected, dict):
                expected.pop(
                    'description',
                    None)  # Not all json failure files include description
                expected.pop('tokenize',
                             None)  # tokenize is not part of errors
                options = expected.pop(
                    'options', None)  # Extracts options from tree (if any)
            else:
                options = None

            with open(filename, 'rb') as f:
                actual_code = f.read()
            if '.source.' in filename:
                actual_code = SOURCE_RE.sub(
                    r'\2', actual_code).decode('unicode_escape')
            else:
                actual_code = actual_code.decode('utf-8')

            try:
                if result_type == '.tokens':
                    if options is None:
                        options = {
                            'loc': True,
                            'range': True,
                            'comment': True,
                            'tolerant': True,
                        }
                    actual = toDict(tokenize(actual_code, options=options))
                else:
                    sourceType = 'module' if '.module.' in filename else 'script'
                    if options is None:
                        options = {
                            'jsx': True,
                            'comment': 'comments' in expected,
                            'range': True,
                            'loc': True,
                            'tokens': True,
                            'raw': True,
                            'tolerant': 'errors' in expected,
                            'source': None,
                            'sourceType': expected.get('sourceType',
                                                       sourceType),
                        }

                    if options.get('comment'):

                        def hasAttachedComment(expected):
                            for k, v in expected.items():
                                if k in ('leadingComments', 'trailingComments',
                                         'innerComments'):
                                    return True
                                elif isinstance(v, dict):
                                    if hasAttachedComment(v):
                                        return True
                                elif isinstance(v, list):
                                    for i in v:
                                        if isinstance(i, dict):
                                            if hasAttachedComment(i):
                                                return True
                            return False

                        options['attachComment'] = hasAttachedComment(expected)

                    if expected.get('tokens'):
                        token = expected['tokens'][0]
                        options['range'] = 'range' in token
                        options['loc'] = 'loc' in token

                    if expected.get('comments'):
                        comment = expected['comments'][0]
                        options['range'] = 'range' in comment
                        options['loc'] = 'loc' in comment

                    if options.get('loc'):
                        options['source'] = expected.get('loc',
                                                         {}).get('source')

                    actual = toDict(parse(actual_code, options=options))
            except Error as e:
                actual = e.toDict()

            self.assertEqual(expected, actual)

コード例 #15

0

ファイルを表示

    def astgen(self,
               inpath,
               outfile,
               root=None,
               configpath=None,
               pkg_name=None,
               pkg_version=None,
               evaluate_smt=False):
        """
        There are two ways to implement the javascript ast parsing, each of them has their cons and pros.
        One is to directly use the npm esprima module, the other is to use the pypi esprima module.

        1. The npm module is the latest version and has lots of features to use directly. But it doesn't have a visitor
        and requires manually implementation.
        2. The pypi module is claimed to be a line by line translation of esprima in python, but it may be outdated and
        inactively maintained. However, it contains a visitor similar to python ast.NodeVisitor that we can directly use.

        To minimize the efforts, I currently choose the latter.
        """
        analyze_path, is_decompress_path, outfile, root, configpath = self._sanitize_astgen_args(
            inpath=inpath,
            outfile=outfile,
            root=root,
            configpath=configpath,
            language=self.language)

        # load the config proto
        configpb = AstLookupConfig()
        read_proto_from_file(configpb, configpath, binary=False)
        logging.debug("loaded lookup config from %s:\n%s", configpath,
                      configpb)
        # invoke the language specific ast generators to call functions

        # FIXME: current testdata sometimes fails the analyzer, inspect it!
        # get input files
        infiles, root = self._get_infiles(inpath=analyze_path,
                                          root=root,
                                          language=self.language)

        # initialize resultpb
        resultpb = PkgAstResults()
        pkg = resultpb.pkgs.add()
        pkg.config.CopyFrom(configpb)
        pkg.pkg_name = pkg_name if pkg_name is not None else basename(
            analyze_path)
        if pkg_version is not None:
            pkg.pkg_version = pkg_version
        pkg.language = ast_pb2.JAVASCRIPT
        for infile in infiles:
            all_source = open(infile, 'r').read()
            try:
                # tree = esprima.parseModule(), esprima.parseScript()
                tree = esprima.parse(all_source, options={'loc': True})
            except Exception as e:
                logging.error(
                    "Fatal error %s parsing file %s! Skipping this file!", e,
                    infile)
                continue
            visitor = JavaScriptDeclRefVisitor(source=all_source,
                                               configpb=configpb)
            visitor.visit(tree)
            logging.warning("collected functions: %s",
                            Counter(visitor.get_declrefs()).items())

            filepb = self._get_filepb(infile, root)
            for base, name, args, source_text, source_range in visitor.get_declrefs(
            ):
                api_result = self._get_api_result(base, name, args,
                                                  source_text, source_range,
                                                  filepb)
                pkg.api_results.add().CopyFrom(api_result)

        # optionally evaluate smt formula
        if evaluate_smt:
            satisfied = self._check_smt(astgen_results=[resultpb],
                                        configpath=configpath)
            resultpb.pkgs[0].config.smt_satisfied = satisfied

        # save resultpb
        write_proto_to_file(resultpb, outfile, binary=False)

        # clean up residues
        self._cleanup_astgen(analyze_path=analyze_path,
                             is_decompress_path=is_decompress_path)

コード例 #16

0

ファイルを表示

ファイル: translate_to_golang.py プロジェクト: epheien/ccxt

def parse_by_syntax(str_code):
    str_code = str_code.replace('await ', '')
    str_code = str_code.replace('async ', 'function ')
    syntax = esprima.parse(str_code)
    return syntax_analysis(syntax.body)

コード例 #17

0

ファイルを表示

ファイル: AST.py プロジェクト: xianlimei/thug

 def __init_ast(self, script):
     self.ast = esprima.parse(script, {
         'loc': True,
         'tolerant': True
     },
                              delegate=self.visitor)

コード例 #18

0

ファイルを表示

from __future__ import print_function

import json
import esprima

# Build a CallExpression expression statement manually:
callee = esprima.nodes.Identifier("alert")
args = [esprima.nodes.Literal("other alert", "'other alert'")]
call = esprima.nodes.CallExpression(callee, args)
other_alert = esprima.nodes.ExpressionStatement(call)

# Add a few expression statements using `parse()`:
expression_statements = {
    'some_alert': esprima.parse("alert('some alert')").body[0],
    'other_alert': other_alert,
    'console_log': esprima.parse("console.log()").body[0],
}


class MyVisitor(esprima.NodeVisitor):
    def transform_CallExpression(self, node, metadata):
        # If the callee is an `alert()`, change it to `console.log()`:
        if node.callee.name == 'alert':
            new_node = expression_statements['console_log'].expression
            new_node.arguments = node.arguments
            node = new_node
        return self.generic_transform(node, metadata)

    def visit_BlockStatement(self, node):
        # Add the expression statements to the body:
        node.body.append(expression_statements['some_alert'])

コード例 #19

0

ファイルを表示

ファイル: js_parser_builder.py プロジェクト: MOwneZ/Javascript-UML-Generator-Refactored

 def parse_file(self):
     self._parsed_file = parse(self._file)
     self._set_classes()

コード例 #20

0

ファイルを表示

 def __init__(self, javascript='const x = 10;\nconst y = x * 2 + 1;'):
     #parse code to generate tree
     self.tree = esprima.parse(javascript)