Python clone Examples, phply.phplex.lexer.clone Python Examples

Example #1

0

Show file

File: parser.py Project: xisafe/Cobra-W

def scan_parser(code_content, sensitive_func, vul_lineno, file_path):
    """
    开始检测函数
    :param code_content: 要检测的文件内容
    :param sensitive_func: 要检测的敏感函数,传入的为函数列表
    :param vul_lineno: 漏洞函数所在行号
    :param file_path: 文件名
    :param ast: 深度ast分析
    :return:
    """
    try:
        global scan_results
        scan_results = []
        parser = make_parser()
        all_nodes = parser.parse(code_content,
                                 debug=False,
                                 lexer=lexer.clone(),
                                 tracking=with_line)
        for func in sensitive_func:  # 循环判断代码中是否存在敏感函数，若存在，递归判断参数是否可控;对文件内容循环判断多次
            back_node = []
            analysis(all_nodes,
                     func,
                     back_node,
                     int(vul_lineno),
                     file_path,
                     function_params=None)
    except SyntaxError as e:
        logger.warning('[AST] [ERROR]:{e}'.format(e=e))

    return scan_results

Example #2

0

Show file

File: extraction_myphplistener_tests.py Project: tiandiyixian/wirecaml

def get_listener(code, fake_filename='filename.php'):
    parser = make_parser()

    line_map = [(None, None), ('filename.php', 1)]

    nodes = parser.parse(code, lexer=lexer.clone(), tracking=True, debug=False)
    listener = MyPHPListener(line_map=line_map, name=fake_filename)
    php_traverser.traverse(nodes, listener)

    return listener

Example #3

0

Show file

File: parser.py Project: xisafe/Cobra-W

def deep_parameters_back(node, back_node, function_params, count, file_path):
    """
    深度递归遍历
    :param node: 
    :param back_node: 
    :param function_params: 
    :param file_path: 
    :return: 
    """
    count += 1

    params = get_node_name(node)
    is_co, cp, expr_lineno = parameters_back(params, back_node,
                                             function_params)

    if count > 20:
        logger.warning("[Deep AST] depth too big to auto exit...")
        return is_co, cp, expr_lineno

    if is_co == 3:
        logger.debug("[Deep AST] try to find include, start deep AST")

        for node in back_node[::-1]:
            if isinstance(node, php.Include):
                filename = node.expr
                file_path = re.split(r"[\/\\]", file_path)
                file_path.pop()
                file_path.append(filename)
                file_path = "/".join(file_path)

                try:
                    logger.debug("[Deep AST] open new file {file_path}".format(
                        file_path=file_path))
                    f = open(file_path, 'r')
                    file_content = f.read()
                except:
                    logger.warning(
                        "[Deep AST] error to open new file...continue")
                    continue

                parser = make_parser()
                all_nodes = parser.parse(file_content,
                                         debug=False,
                                         lexer=lexer.clone(),
                                         tracking=with_line)
                node = php.Variable(cp)

                is_co, cp, expr_lineno = deep_parameters_back(
                    node, all_nodes, function_params, count, file_path)

                if is_co == -1:
                    break

    return is_co, cp, expr_lineno

Example #4

0

Show file

def scan(code_content, sensitive_func):
    """
    开始检测函数
    :param code_content: 要检测的文件内容
    :param sensitive_func: 要检测的敏感函数,传入的为函数列表
    :return:
    """
    parser = make_parser()
    all_nodes = parser.parse(code_content,
                             debug=False,
                             lexer=lexer.clone(),
                             tracking=with_line)
    pprint.pprint(all_nodes)
    for func in sensitive_func:  # 循环判断代码中是否存在敏感函数，若存在，递归判断参数是否可控
        analysis(all_nodes, func)

Example #5

0

Show file

File: cold.py Project: pombredanne/cold_corpses

def analyze(path):
    with codecs.open(path, 'r', 'latin1') as f:
        src = f.read()
        try:
            items = parser.parse(src, tracking=True, lexer=lexer.clone())
        except SyntaxError as e:
            print "Syntax Error", e.filename, e
        except ValueError as e:
            print "Syntax Error", e
        else:
            for ast in items:
                if hasattr(ast, 'generic'):
                    item = ast.generic(with_lineno=True)
                else:
                    item = ast
                print item

Example #6

0

Show file

    def pre_ast(self):

        for fileext in self.file_list:

            if ".php" == fileext[0]:
                # 下面是对于php文件的处理逻辑
                for filepath in fileext[1]['list']:

                    filepath = os.path.join(self.target_directory, filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'php'

                    fi = codecs.open(filepath,
                                     "r",
                                     encoding='utf-8',
                                     errors='ignore')
                    code_content = fi.read()

                    self.pre_result[filepath]['content'] = code_content

                    try:
                        parser = make_parser()
                        all_nodes = parser.parse(code_content,
                                                 debug=False,
                                                 lexer=lexer.clone(),
                                                 tracking=True)

                        # 合并字典
                        self.pre_result[filepath]['ast_nodes'] = all_nodes

                    except SyntaxError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    # 搜索所有的常量

                    for node in all_nodes:
                        if isinstance(
                                node,
                                php.FunctionCall) and node.name == "define":
                            define_params = node.params
                            logger.debug(
                                "[AST][Pretreatment] new define {}={}".format(
                                    define_params[0].node,
                                    define_params[1].node))
                            self.define_dict[
                                define_params[0].node] = define_params[1].node

Example #7

0

Show file

File: sphinxcontrib_phpautodoc.py Project: tk0miya/tk.phpautodoc

    def parse_code(self, filename):
        basedir = self.state.document.settings.env.doctreedir
        cachename = os.path.join(basedir, basename(filename, 'parse'))
        if is_same_mtime(filename, cachename):
            tree = pickle.load(open(cachename, 'rb'))
        else:
            try:
                with codecs.open(filename, 'r', 'utf-8') as f:
                    tree = parser.parse(f.read(), lexer=lexer.clone())

                with open(cachename, 'wb') as f:
                    pickle.dump(tree, f)
                mtime = os.stat(filename).st_mtime
                os.utime(cachename, (mtime, mtime))
            except Exception:
                raise

        return tree

Example #8

0

Show file

File: parser.py Project: LiGhT1EsS/cobra

def scan_parser(code_content, sensitive_func, vul_lineno):
    """
    开始检测函数
    :param code_content: 要检测的文件内容
    :param sensitive_func: 要检测的敏感函数,传入的为函数列表
    :param vul_lineno: 漏洞函数所在行号
    :return:
    """
    try:
        global scan_results
        scan_results = []
        parser = make_parser()
        all_nodes = parser.parse(code_content, debug=False, lexer=lexer.clone(), tracking=with_line)
        for func in sensitive_func:  # 循环判断代码中是否存在敏感函数，若存在，递归判断参数是否可控;对文件内容循环判断多次
            back_node = []
            analysis(all_nodes, func, back_node, int(vul_lineno), function_params=None)
    except SyntaxError as e:
        logger.warning('[AST] [ERROR]:{e}'.format(e=e))

    return scan_results

Example #9

0

Show file

File: parser.py Project: zshell/Cobra-W

def anlysis_params(param, code_content, file_path, lineno):
    """
    在cast调用时做中转数据预处理
    :param lineno: 
    :param param: 
    :param code_content: 
    :param file_path: 
    :return: 
    """
    count = 0
    function_params = None
    param = php.Variable(param)
    parser = make_parser()
    all_nodes = parser.parse(code_content, debug=False, lexer=lexer.clone(), tracking=with_line)

    vul_nodes = []
    for node in all_nodes:
        if node.lineno < int(lineno):
            vul_nodes.append(node)

    is_co, cp, expr_lineno = deep_parameters_back(param, vul_nodes, function_params, count, file_path, lineno)

    return is_co, cp, expr_lineno

Example #10

0

Show file

File: cfg.py Project: tiandiyixian/wirecaml

def create_graph(path, file):
    # Preprocess file so includes are considered
    pre = Preprocessor(path)

    # Line map contains a mapping between line number and original file + original line number
    line_map, file_str = pre.preprocess_file(file)

    # Reset definition register with every new graph
    DefinitionRegister.reset()

    # Make a parser
    parser = make_parser()

    # Make a lexer
    l = lexer.clone()

    nodes = parser.parse(file_str, lexer=l, tracking=True, debug=False)

    listener = MyPHPListener(line_map=line_map, name=file)

    php_traverser.traverse(nodes, listener)

    return listener.get_graph()

Example #11

0

Show file

    async def pre_ast(self):

        while not self.target_queue.empty():

            fileext = self.target_queue.get()

            if not self.lan:
                break

            if fileext[0] in ext_dict['php'] and 'php' in self.lan:
                # 下面是对于php文件的处理逻辑
                for filepath in fileext[1]['list']:
                    all_nodes = []

                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'php'
                    self.pre_result[filepath]['ast_nodes'] = []

                    fi = codecs.open(filepath,
                                     "r",
                                     encoding='utf-8',
                                     errors='ignore')
                    code_content = fi.read()
                    fi.close()

                    # self.pre_result[filepath]['content'] = code_content

                    try:
                        parser = make_parser()
                        all_nodes = parser.parse(code_content,
                                                 debug=False,
                                                 lexer=lexer.clone(),
                                                 tracking=True)

                        # 合并字典
                        self.pre_result[filepath]['ast_nodes'] = all_nodes

                    except SyntaxError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except AssertionError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except:
                        logger.warning('[AST] something error, {}'.format(
                            traceback.format_exc()))

                    # 搜索所有的常量
                    for node in all_nodes:
                        if isinstance(
                                node,
                                php.FunctionCall) and node.name == "define":
                            define_params = node.params
                            logger.debug(
                                "[AST][Pretreatment] new define {}={}".format(
                                    define_params[0].node,
                                    define_params[1].node))

                            self.define_dict[
                                define_params[0].node] = define_params[1].node

            elif fileext[0] in ext_dict[
                    'chromeext'] and 'chromeext' in self.lan:

                # 针对chrome 拓展的预处理
                # 需要提取其中的js和html？
                for filepath in fileext[1]['list']:
                    child_files = []
                    child_files_html = []

                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'chromeext'

                    # 首先想办法解压crx
                    try:
                        target_files_path = un_zip(filepath)
                        self.pre_result[filepath][
                            'target_files_path'] = target_files_path

                    except zipfile.BadZipFile:
                        logger.warning(
                            "[Pretreatment][Chrome Ext] file {} not zip".
                            format(filepath))
                        continue

                    except OSError:
                        logger.warning(
                            "[Pretreatment][Chrome Ext] file {} unzip error".
                            format(filepath))
                        continue

                    # 分析manifest.json
                    manifest_path = os.path.join(target_files_path,
                                                 "manifest.json")

                    # target可能是单个文件，这里需要专门处理
                    if not (self.target_directory.endswith("/")
                            or self.target_directory.endswith("\\")
                            ) and not os.path.isdir(self.target_directory):

                        path_list = re.split(r'[\\|/]', self.target_directory)
                        relative_path = os.path.join(path_list[-1] + "_files")
                    else:
                        relative_path = target_files_path.split(
                            self.target_directory)[-1]

                    if relative_path.startswith(
                            '\\') or relative_path.startswith("/"):
                        relative_path = relative_path[1:]

                    if os.path.isfile(manifest_path):
                        fi = codecs.open(manifest_path,
                                         "r",
                                         encoding='utf-8',
                                         errors='ignore')
                        manifest_content = fi.read()
                        fi.close()

                        try:
                            manifest = json.loads(manifest_content,
                                                  encoding='utf-8')

                        except json.decoder.JSONDecodeError:
                            logger.warning(
                                "[Pretreatment][Chrome Ext] File {} parse error..."
                                .format(target_files_path))
                            continue

                        self.pre_result[filepath]["manifest"] = manifest

                        # 想办法优化，如果不想深入js和html的判断，那么就跳过
                        if len(self.lan) and self.lan == 'chromeext':
                            logger.debug(
                                "[Pretreatment][Chrome Ext] pass js & html scan..."
                            )
                            continue

                        # content scripts
                        if "content_scripts" in manifest:
                            for script in manifest["content_scripts"]:
                                if "js" in script:
                                    child_files.extend([
                                        os.path.join(relative_path, js)
                                        for js in script['js']
                                    ])

                        # background js
                        if "background" in manifest:
                            if "scripts" in manifest["background"]:
                                child_files.extend([
                                    os.path.join(relative_path, js)
                                    for js in manifest["background"]["scripts"]
                                ])

                            # background html
                            if "page" in manifest["background"]:
                                child_files_html.append(
                                    os.path.join(
                                        relative_path,
                                        manifest["background"]["page"]))

                        # popup.html
                        if "browser_action" in manifest:
                            if "default_popup" in manifest["browser_action"]:
                                child_files_html.append(
                                    os.path.join(
                                        relative_path,
                                        manifest["browser_action"]
                                        ["default_popup"]))

                        # web_accessible_resources
                        if "web_accessible_resources" in manifest:
                            for resource in manifest[
                                    "web_accessible_resources"]:
                                if ".js" in resource:
                                    child_files.append(
                                        os.path.join(relative_path, resource))

                                if ".html" in resource:
                                    child_files_html.append(
                                        os.path.join(relative_path, resource))

                        # chrome_url_overrides
                        if "chrome_url_overrides" in manifest:
                            for key in manifest["chrome_url_overrides"]:
                                child_files_html.append(
                                    os.path.join(
                                        relative_path,
                                        manifest["chrome_url_overrides"][key]))

                        self.pre_result[filepath]["child_files"] = child_files

                        if len(child_files):
                            # 将content_scripts加入到文件列表中构造
                            self.target_queue.put(('.js', {
                                'count': len(child_files),
                                'list': child_files
                            }))

                            # 通过浅复制操作外部传入的files
                            self.file_list.append(('.js', {
                                'count': len(child_files),
                                'list': child_files
                            }))

                        if len(child_files_html):
                            self.target_queue.put(('.html', {
                                'count':
                                len(child_files_html),
                                'list':
                                child_files_html
                            }))

                    else:
                        logger.warning(
                            "[Pretreatment][Chrome Ext] File {} parse error..."
                            .format(target_files_path))
                        continue

            elif fileext[0] in ext_dict['html'] and 'javascript' in self.lan:
                # html only found js
                for filepath in fileext[1]['list']:
                    filepath = self.get_path(filepath)
                    script_list = []

                    try:
                        fi = codecs.open(filepath,
                                         "r",
                                         encoding='utf-8',
                                         errors='ignore')
                        code_content = fi.read()
                        fi.close()

                    except FileNotFoundError:
                        continue

                    except OSError:
                        continue

                    # tmp.js save all inline javascript code
                    tmp_path = os.path.join(os.path.dirname(filepath),
                                            "tmp.js")
                    fi2 = codecs.open(tmp_path,
                                      "a",
                                      encoding='utf-8',
                                      errors='ignore')

                    try:
                        soup = BeautifulSoup(code_content, "html.parser")

                        script_tag_list = soup.find_all('script')

                        for script_tag in script_tag_list:
                            script_attrs = script_tag.attrs

                            if 'src' in script_attrs:
                                parents_path = os.path.normpath("\\".join(
                                    re.split(r'[\\|/]', filepath)[:-1]))

                                script_path = os.path.join(
                                    parents_path, script_attrs['src'])
                                script_list.append(script_path)

                            else:
                                # 如果没有src，那么代表是内联js
                                script_content = script_tag.string

                                fi2.write(" \n{}\n ".format(script_content))

                        fi2.close()
                        if tmp_path not in script_list:
                            script_list.append(tmp_path)

                        # 将content_scripts加入到文件列表中构造
                        self.target_queue.put(('.js', {
                            'count': len(script_list),
                            'list': script_list
                        }))

                        # 通过浅复制操作外部传入的files
                        self.file_list.append(('.js', {
                            'count': len(script_list),
                            'list': script_list
                        }))

                    except:
                        logger.warning('[AST] something error, {}'.format(
                            traceback.format_exc()))
                        continue

            elif fileext[0] in ext_dict[
                    'javascript'] and 'javascript' in self.lan:

                # 针对javascript的预处理
                # 需要对js做语义分析
                for filepath in fileext[1]['list']:
                    filepath = self.get_path(filepath)

                    if not filepath.endswith(".js"):
                        continue

                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'javascript'
                    self.pre_result[filepath]['ast_nodes'] = []

                    try:
                        fi = codecs.open(filepath,
                                         "r",
                                         encoding='utf-8',
                                         errors='ignore')
                        code_content = fi.read()
                        fi.close()

                    except FileNotFoundError:
                        continue

                    except OSError:
                        continue

                    # 添加代码美化并且写入新文件
                    new_filepath = filepath + ".pretty"

                    try:

                        if not os.path.isfile(new_filepath):
                            fi2 = codecs.open(new_filepath,
                                              "w",
                                              encoding='utf-8',
                                              errors='ignore')
                            code_content = jsbeautifier.beautify(code_content)
                            fi2.write(code_content)
                            fi2.close()

                        # self.pre_result[filepath]['content'] = code_content

                        all_nodes = esprima.parse(code_content, {"loc": True})

                        # 合并字典
                        self.pre_result[filepath]['ast_nodes'] = all_nodes

                    except SyntaxError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except AssertionError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except esprima.error_handler.Error:
                        logger.warning(
                            '[AST] [ERROR] Invalid regular expression in {}...'
                            .format(filepath))

                    except KeyboardInterrupt:
                        logger.log('[AST stop...')
                        exit()

                    except:
                        logger.warning('[AST] something error, {}'.format(
                            traceback.format_exc()))
                        continue

            # 手动回收?
            gc.collect()

        return True

Example #12

0

Show file

File: parser.py Project: shadowliangliang/cobra

def scan(code_content, vul_function, vul_function_line):
    parser = make_parser()
    all_nodes = export(parser.parse(code_content, debug=False, lexer=lexer.clone(), tracking=with_line))
    return traversal(all_nodes, vul_function, vul_function_line)

Example #13

0

Show file

File: pretreatment.py Project: leiyuch/Cobra-W

    async def pre_ast(self):

        while not self.target_queue.empty():

            fileext = self.target_queue.get()

            if fileext[0] in ext_dict['php']:
                # 下面是对于php文件的处理逻辑
                for filepath in fileext[1]['list']:
                    all_nodes = []

                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'php'
                    self.pre_result[filepath]['ast_nodes'] = []

                    fi = codecs.open(filepath,
                                     "r",
                                     encoding='utf-8',
                                     errors='ignore')
                    code_content = fi.read()
                    fi.close()

                    self.pre_result[filepath]['content'] = code_content

                    try:
                        parser = make_parser()
                        all_nodes = parser.parse(code_content,
                                                 debug=False,
                                                 lexer=lexer.clone(),
                                                 tracking=True)

                        # 合并字典
                        self.pre_result[filepath]['ast_nodes'] = all_nodes

                    except SyntaxError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except AssertionError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except:
                        logger.warning('[AST] something error, {}'.format(
                            traceback.format_exc()))

                    # 搜索所有的常量
                    for node in all_nodes:
                        if isinstance(
                                node,
                                php.FunctionCall) and node.name == "define":
                            define_params = node.params
                            logger.debug(
                                "[AST][Pretreatment] new define {}={}".format(
                                    define_params[0].node,
                                    define_params[1].node))

                            self.define_dict[
                                define_params[0].node] = define_params[1].node

            elif fileext[0] in ext_dict['chromeext']:

                # 针对chrome 拓展的预处理
                # 需要提取其中的js和html？
                for filepath in fileext[1]['list']:
                    child_files = []

                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'chromeext'

                    # 首先想办法解压crx
                    try:
                        target_files_path = un_zip(filepath)
                        self.pre_result[filepath][
                            'target_files_path'] = target_files_path

                    except zipfile.BadZipFile:
                        logger.warning(
                            "[Pretreatment][Chrome Ext] file {} not zip".
                            format(filepath))
                        continue

                    # 分析manifest.json
                    manifest_path = os.path.join(target_files_path,
                                                 "manifest.json")

                    # target可能是单个文件，这里需要专门处理
                    if not (self.target_directory.endswith("/")
                            or self.target_directory.endswith("\\")
                            ) and not os.path.isdir(self.target_directory):
                        relative_path = os.path.join(
                            re.split(r'[\\|/]', self.target_directory)[-1] +
                            "_files")
                    else:
                        relative_path = target_files_path.split(
                            self.target_directory)[-1]

                    if relative_path.startswith(
                            '\\') or relative_path.startswith("/"):
                        relative_path = relative_path[1:]

                    if os.path.isfile(manifest_path):
                        fi = codecs.open(manifest_path,
                                         "r",
                                         encoding='utf-8',
                                         errors='ignore')
                        manifest_content = fi.read()
                        fi.close()

                        try:
                            manifest = json.loads(manifest_content,
                                                  encoding='utf-8')

                        except json.decoder.JSONDecodeError:
                            logger.warning(
                                "[Pretreatment][Chrome Ext] File {} parse error..."
                                .format(target_files_path))
                            continue

                        self.pre_result[filepath]["manifest"] = manifest

                        if "content_scripts" in manifest:
                            for script in manifest["content_scripts"]:
                                if 'js' in script:
                                    child_files.extend([
                                        os.path.join(relative_path, js)
                                        for js in script['js']
                                    ])

                        self.pre_result[filepath]["child_files"] = child_files

                        # 将content_scripts加入到文件列表中构造
                        self.target_queue.put(('.js', {
                            'count': len(child_files),
                            'list': child_files
                        }))

                        # 通过浅复制操作外部传入的files
                        self.file_list.append(('.js', {
                            'count': len(child_files),
                            'list': child_files
                        }))

                    else:
                        logger.warning(
                            "[Pretreatment][Chrome Ext] File {} parse error..."
                            .format(target_files_path))
                        continue

            elif fileext[0] in ext_dict['javascript']:

                # 针对javascript的预处理
                # 需要对js做语义分析
                for filepath in fileext[1]['list']:
                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'javascript'
                    self.pre_result[filepath]['ast_nodes'] = []

                    fi = codecs.open(filepath,
                                     "r",
                                     encoding='utf-8',
                                     errors='ignore')
                    code_content = fi.read()
                    fi.close()

                    # 添加代码美化并且写入新文件
                    new_filepath = filepath + ".pretty"

                    if not os.path.isfile(new_filepath):
                        fi2 = codecs.open(new_filepath,
                                          "w",
                                          encoding='utf-8',
                                          errors='ignore')
                        code_content = jsbeautifier.beautify(code_content)
                        fi2.write(code_content)
                        fi2.close()

                    self.pre_result[filepath]['content'] = code_content

                    try:
                        all_nodes = esprima.parse(code_content, {"loc": True})

                        # 合并字典
                        self.pre_result[filepath]['ast_nodes'] = all_nodes

                    except SyntaxError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except AssertionError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except esprima.error_handler.Error:
                        logger.warning(
                            '[AST] [ERROR] Invalid regular expression in {}...'
                            .format(filepath))

                    except KeyboardInterrupt:
                        logger.log('[AST stop...')
                        exit()

                    except:
                        logger.warning('[AST] something error, {}'.format(
                            traceback.format_exc()))
                        continue

            # 手动回收?
            gc.collect()

        return True

Example #14

0

Show file

File: pretreatment.py Project: p0prxx/Cobra-W

    def pre_ast(self, lan=None):

        if lan is not None:
            # 检查是否在可ast pasre列表中
            if not list(set(lan).intersection(set(could_ast_pase_lans))):

                logger.info("[AST][Pretreatment] Current scan target language does not require ast pretreatment...")
                return True

        for fileext in self.file_list:

            if fileext[0] in ext_dict['php']:
                # 下面是对于php文件的处理逻辑
                for filepath in fileext[1]['list']:
                    all_nodes = []

                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'php'
                    self.pre_result[filepath]['ast_nodes'] = []

                    fi = codecs.open(filepath, "r", encoding='utf-8', errors='ignore')
                    code_content = fi.read()

                    self.pre_result[filepath]['content'] = code_content

                    try:
                        parser = make_parser()
                        all_nodes = parser.parse(code_content, debug=False, lexer=lexer.clone(), tracking=True)

                        # 合并字典
                        self.pre_result[filepath]['ast_nodes'] = all_nodes

                    except SyntaxError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(filepath, traceback.format_exc()))

                    except AssertionError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(filepath, traceback.format_exc()))

                    except:
                        logger.warning('[AST] something error, {}'.format(traceback.format_exc()))

                    # 搜索所有的常量
                    for node in all_nodes:
                        if isinstance(node, php.FunctionCall) and node.name == "define":
                            define_params = node.params
                            logger.debug("[AST][Pretreatment] new define {}={}".format(define_params[0].node, define_params[1].node))

                            self.define_dict[define_params[0].node] = define_params[1].node

            elif fileext[0] in ext_dict['chromeext']:
                child_files = []

                # 针对chrome 拓展的预处理
                # 需要提取其中的js和html？
                for filepath in fileext[1]['list']:
                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'chromeext'

                    # 首先想办法解压crx
                    target_files_path = un_zip(filepath)
                    self.pre_result[filepath]['target_files_path'] = target_files_path

                    # 分析manifest.json
                    manifest_path = os.path.join(target_files_path, "manifest.json")
                    relative_path = target_files_path.split(self.target_directory)[-1]

                    if relative_path.startswith('\\') or relative_path.startswith("/"):
                        relative_path = relative_path[1:]

                    if os.path.isfile(manifest_path):
                        fi = codecs.open(manifest_path, "r", encoding='utf-8', errors='ignore')
                        manifest_content = fi.read()
                        manifest = json.loads(manifest_content)

                        self.pre_result[filepath]["manifest"] = manifest

                        if "content_scripts" in manifest:
                            for script in manifest["content_scripts"]:
                                child_files.extend([os.path.join(relative_path, js) for js in script['js']])

                        self.pre_result[filepath]["child_files"] = child_files
                    else:
                        logger.warning("[Pretreatment][Chrome Ext] File {} parse error...".format(target_files_path))
                        continue

            elif fileext[0] in ext_dict['javascript']:

                # 针对javascript的预处理
                # 需要对js做语义分析
                for filepath in fileext[1]['list']:
                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'javascript'
                    self.pre_result[filepath]['ast_nodes'] = []

                    fi = codecs.open(filepath, "r", encoding='utf-8', errors='ignore')
                    code_content = fi.read()

                    self.pre_result[filepath]['content'] = code_content

                    try:
                        all_nodes = esprima.parse(code_content, {"loc": True})

                        # 合并字典
                        self.pre_result[filepath]['ast_nodes'] = all_nodes

                    except SyntaxError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(filepath, traceback.format_exc()))

                    except AssertionError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(filepath, traceback.format_exc()))

                    except:
                        logger.warning('[AST] something error, {}'.format(traceback.format_exc()))