Esempio n. 1
0
    def parseConfiguration(self):
        dns_re = re.compile(r"^mysql:host=([^;]+);dbname=(.+)$")

        def array_get(array, key):
            if isinstance(array, Array):
                for element in array.nodes:
                    if element.key == key:
                        return element.value
            return None

        parser = make_parser()
        with open(self.fsconfig) as f:
            ast = parser.parse(f.read(), lexer=phplex.lexer.clone())
            if ast and isinstance(ast[0], Return):
                r = ast[0].node
                self.title = array_get(r, "name")
                if not self.title:
                    raise HHConfigError(self, "no title given")
                logging.debug("HH.parseConfiguration: title={}".format(self.title))
                components = array_get(r, "components")
                if components:
                    db = array_get(components, "db")
                    if db:
                        dsn = array_get(db, "dsn")
                        if dsn:
                            logging.debug("HH.parseConfiguration: dsn={}".format(dsn))
                            m = dns_re.match(dsn)
                            if m:
                                self.dbhost = m.group(1)
                                self.dbname = m.group(2)
                                self.dbuser = array_get(db, "username")
                                self.dbpass = array_get(db, "password")
                if not self.dbhost or not self.dbname:
                    raise HHConfigError(self, "no database given")
Esempio n. 2
0
def scan_parser(code_content, sensitive_func, vul_lineno, file_path):
    """
    开始检测函数
    :param code_content: 要检测的文件内容
    :param sensitive_func: 要检测的敏感函数,传入的为函数列表
    :param vul_lineno: 漏洞函数所在行号
    :param file_path: 文件名
    :param ast: 深度ast分析
    :return:
    """
    try:
        global scan_results
        scan_results = []
        parser = make_parser()
        all_nodes = parser.parse(code_content,
                                 debug=False,
                                 lexer=lexer.clone(),
                                 tracking=with_line)
        for func in sensitive_func:  # 循环判断代码中是否存在敏感函数,若存在,递归判断参数是否可控;对文件内容循环判断多次
            back_node = []
            analysis(all_nodes,
                     func,
                     back_node,
                     int(vul_lineno),
                     file_path,
                     function_params=None)
    except SyntaxError as e:
        logger.warning('[AST] [ERROR]:{e}'.format(e=e))

    return scan_results
def get_listener(code, fake_filename='filename.php'):
    parser = make_parser()

    line_map = [(None, None), ('filename.php', 1)]

    nodes = parser.parse(code, lexer=lexer.clone(), tracking=True, debug=False)
    listener = MyPHPListener(line_map=line_map, name=fake_filename)
    php_traverser.traverse(nodes, listener)

    return listener
Esempio n. 4
0
def deep_parameters_back(node, back_node, function_params, count, file_path):
    """
    深度递归遍历
    :param node: 
    :param back_node: 
    :param function_params: 
    :param file_path: 
    :return: 
    """
    count += 1

    params = get_node_name(node)
    is_co, cp, expr_lineno = parameters_back(params, back_node,
                                             function_params)

    if count > 20:
        logger.warning("[Deep AST] depth too big to auto exit...")
        return is_co, cp, expr_lineno

    if is_co == 3:
        logger.debug("[Deep AST] try to find include, start deep AST")

        for node in back_node[::-1]:
            if isinstance(node, php.Include):
                filename = node.expr
                file_path = re.split(r"[\/\\]", file_path)
                file_path.pop()
                file_path.append(filename)
                file_path = "/".join(file_path)

                try:
                    logger.debug("[Deep AST] open new file {file_path}".format(
                        file_path=file_path))
                    f = open(file_path, 'r')
                    file_content = f.read()
                except:
                    logger.warning(
                        "[Deep AST] error to open new file...continue")
                    continue

                parser = make_parser()
                all_nodes = parser.parse(file_content,
                                         debug=False,
                                         lexer=lexer.clone(),
                                         tracking=with_line)
                node = php.Variable(cp)

                is_co, cp, expr_lineno = deep_parameters_back(
                    node, all_nodes, function_params, count, file_path)

                if is_co == -1:
                    break

    return is_co, cp, expr_lineno
Esempio n. 5
0
def parser(filename):
    if not os.path.exists(filename):
        return {}

    with open(filename) as f:
        code = f.read()

    reload(phply.phplex)
    logger.debug('Parse file: %s' % filename)
    return export(make_parser().parse(code,
                                      lexer=phply.phplex.lexer,
                                      tracking=True))
Esempio n. 6
0
def scan(code_content, sensitive_func):
    """
    开始检测函数
    :param code_content: 要检测的文件内容
    :param sensitive_func: 要检测的敏感函数,传入的为函数列表
    :return:
    """
    parser = make_parser()
    all_nodes = parser.parse(code_content,
                             debug=False,
                             lexer=lexer.clone(),
                             tracking=with_line)
    pprint.pprint(all_nodes)
    for func in sensitive_func:  # 循环判断代码中是否存在敏感函数,若存在,递归判断参数是否可控
        analysis(all_nodes, func)
Esempio n. 7
0
    def pre_ast(self):

        for fileext in self.file_list:

            if ".php" == fileext[0]:
                # 下面是对于php文件的处理逻辑
                for filepath in fileext[1]['list']:

                    filepath = os.path.join(self.target_directory, filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'php'

                    fi = codecs.open(filepath,
                                     "r",
                                     encoding='utf-8',
                                     errors='ignore')
                    code_content = fi.read()

                    self.pre_result[filepath]['content'] = code_content

                    try:
                        parser = make_parser()
                        all_nodes = parser.parse(code_content,
                                                 debug=False,
                                                 lexer=lexer.clone(),
                                                 tracking=True)

                        # 合并字典
                        self.pre_result[filepath]['ast_nodes'] = all_nodes

                    except SyntaxError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    # 搜索所有的常量

                    for node in all_nodes:
                        if isinstance(
                                node,
                                php.FunctionCall) and node.name == "define":
                            define_params = node.params
                            logger.debug(
                                "[AST][Pretreatment] new define {}={}".format(
                                    define_params[0].node,
                                    define_params[1].node))
                            self.define_dict[
                                define_params[0].node] = define_params[1].node
Esempio n. 8
0
    def count_function_calls(self):
        # with open(self.input_file, "r") as fin:
        #     _file = fin.read()
        # with open(self.input_file, "a") as fin:
        #     if not _file.rstrip(" ").rstrip("\n").endswith("?>"):
        #         fin.write("?>")
        with open(self.input_file, "r") as fin:
            _file = fin.read()
        parser = make_parser()

        lexer = phplex.lexer.clone()
        lexer.filename = self.input_file.replace("\\", "/")
        output = parser.parse(_file, lexer=lexer)
        try:
            resolve_magic_constants(output)
            function_calls = str(output).count("FunctionCall")
        except RuntimeError:
            function_calls = None
        return function_calls
Esempio n. 9
0
def parseFile(filepath):
    allHTML = ""
    try:
        parser = make_parser()
        parsed = parser.parse(open(filepath).read(),
                              debug=False,
                              lexer=lexer,
                              tracking=True)
        print(parsed)
        for node in parsed:
            # node.
            if isinstance(node, InlineHTML):
                allHTML += node.data
        parser = None
        parsed = None
    except Exception as ex:
        errored.append(file)
        print(ex)

    return allHTML
Esempio n. 10
0
def scan_parser(code_content, sensitive_func, vul_lineno):
    """
    开始检测函数
    :param code_content: 要检测的文件内容
    :param sensitive_func: 要检测的敏感函数,传入的为函数列表
    :param vul_lineno: 漏洞函数所在行号
    :return:
    """
    try:
        global scan_results
        scan_results = []
        parser = make_parser()
        all_nodes = parser.parse(code_content, debug=False, lexer=lexer.clone(), tracking=with_line)
        for func in sensitive_func:  # 循环判断代码中是否存在敏感函数,若存在,递归判断参数是否可控;对文件内容循环判断多次
            back_node = []
            analysis(all_nodes, func, back_node, int(vul_lineno), function_params=None)
    except SyntaxError as e:
        logger.warning('[AST] [ERROR]:{e}'.format(e=e))

    return scan_results
Esempio n. 11
0
def create_graph(path, file):
    # Preprocess file so includes are considered
    pre = Preprocessor(path)

    # Line map contains a mapping between line number and original file + original line number
    line_map, file_str = pre.preprocess_file(file)

    # Reset definition register with every new graph
    DefinitionRegister.reset()

    # Make a parser
    parser = make_parser()

    # Make a lexer
    l = lexer.clone()

    nodes = parser.parse(file_str, lexer=l, tracking=True, debug=False)

    listener = MyPHPListener(line_map=line_map, name=file)

    php_traverser.traverse(nodes, listener)

    return listener.get_graph()
Esempio n. 12
0
def anlysis_params(param, code_content, file_path, lineno):
    """
    在cast调用时做中转数据预处理
    :param lineno: 
    :param param: 
    :param code_content: 
    :param file_path: 
    :return: 
    """
    count = 0
    function_params = None
    param = php.Variable(param)
    parser = make_parser()
    all_nodes = parser.parse(code_content, debug=False, lexer=lexer.clone(), tracking=with_line)

    vul_nodes = []
    for node in all_nodes:
        if node.lineno < int(lineno):
            vul_nodes.append(node)

    is_co, cp, expr_lineno = deep_parameters_back(param, vul_nodes, function_params, count, file_path, lineno)

    return is_co, cp, expr_lineno
Esempio n. 13
0
def phpparse(data):
    parser = make_parser(debug=False)
    s = data
    lexer = phplexer
    lexer.lineno = 1
    try:
        result = parser.parse(s, lexer=lexer.clone(), debug=False)
    except SyntaxError as e:
        if e.lineno is not None:
            logger.error(e, 'near', repr(e.text))
        else:
            logger.error(e)
        raise
    except:
        logger.error("Critical error")
        raise

    import pprint
    for item in result:
        if hasattr(item, 'generic'):
            item = item.generic()
        pprint.pprint(item)
    parser.restart()
Esempio n. 14
0
    def compute_avg_argument_length(self):
        # with open(self.input_file, "r") as fin:
        #     _file = fin.read()
        # with open(self.input_file, "a") as fin:
        #     if not _file.rstrip(" ").rstrip("\n").endswith("?>"):
        #         fin.write("?>")
        with open(self.input_file, "r") as fin:
            _file = fin.read()
        parser = make_parser()
        lexer = phplex.lexer.clone()
        lexer.filename = self.input_file
        output = parser.parse(_file, lexer=lexer)
        try:
            resolve_magic_constants(output)
        except RuntimeError:
            avg_length_of_arguments_to_function = None
            return avg_length_of_arguments_to_function
        indexes = [m.end() for m in re.finditer('FunctionCall', str(output))]
        func_args = []
        for index in indexes:
            pars = 1
            count = 0
            for char in str(output)[index + 1:]:
                count += 1
                if char == '(':
                    pars += 1
                elif char == ')':
                    pars -= 1
                if pars == 0:
                    func_args.append(str(output)[index:index + count + 1])
                    break
        functions = []
        for func in func_args:
            for ind, char2 in enumerate(func):
                if char2 == "'":
                    function_name = ""
                    for char in func[ind + 1:]:
                        if char == "'":
                            break
                        function_name += char
                    functions.append(function_name)
                    break

        functions = list(set(functions))
        functions = [x for x in functions if not x.startswith("$")]
        func_dict = {}
        for func in functions:
            indexes = [(m.start(), m.end()) for m in re.finditer(func, _file)]
            func_dict[func] = indexes

        for key, value in func_dict.iteritems():
            for tup in value:
                start_char = tup[0]
                if _file[start_char - 9:start_char - 1] == "function":
                    func_dict[key].remove(tup)
        func_args_dict = {}
        for key, value in func_dict.iteritems():
            func_args_dict[key] = []
            for tup in value:
                end_line = tup[1]
                pars = 1
                count = 0
                arguments = ""
                for char in _file[end_line + 1:]:
                    count += 1
                    if char == '(':
                        pars += 1
                    elif char == ')':
                        pars -= 1
                    if pars == 0:
                        arguments = _file[end_line + 1:end_line + count]
                        break
                func_args_dict[key].append(arguments)
        total_length = 0
        for key, value in func_args_dict.iteritems():
            for arg in value:
                total_length += len(arg)
        if len(func_args_dict) == 0:
            avg_length_of_arguments_to_function = 0
        else:
            avg_length_of_arguments_to_function = float(total_length) / len(
                func_args_dict)
        return avg_length_of_arguments_to_function
Esempio n. 15
0
    def parse(self, phpsrc):
        """Read the source of a PHP file in and include them as units."""
        def handle_array(prefix, nodes, lexer):
            prefix += lexer.extract_array()
            for item in nodes:
                assert isinstance(item, ArrayElement)
                # Skip empty keys
                if item.key == '':
                    continue
                if isinstance(item.key, BinaryOp):
                    name = '\'{0}\''.format(concatenate(item.key))
                elif isinstance(item.key, (int, float)):
                    name = '{0}'.format(item.key)
                else:
                    name = '\'{0}\''.format(item.key)
                if prefix:
                    name = '{0}->{1}'.format(prefix, name)
                if isinstance(item.value, Array):
                    handle_array(name, item.value.nodes, lexer)
                elif isinstance(item.value, six.string_types):
                    self.create_and_add_unit(
                        name,
                        item.value,
                        lexer.extract_quote(),
                        lexer.extract_comments(item.lexpositions[1]),
                    )

        def concatenate(item):
            if isinstance(item, six.string_types):
                return item
            elif isinstance(item, Variable):
                return item.name
            assert isinstance(item, BinaryOp)
            return concatenate(item.left) + concatenate(item.right)

        parser = make_parser()
        for item in parser.productions:
            item.callable = wrap_production(item.callable)
        lexer = PHPLexer()
        tree = parser.parse(phpsrc.decode(self.encoding), lexer=lexer, tracking=True)
        # Handle text without PHP start
        if len(tree) == 1 and isinstance(tree[0], InlineHTML):
            return self.parse(b'<?php\n' + phpsrc)
        for item in tree:
            if isinstance(item, FunctionCall):
                if item.name == 'define':
                    self.create_and_add_unit(
                        lexer.extract_name('COMMA', *item.lexpositions),
                        item.params[1].node,
                        lexer.extract_quote(),
                        lexer.extract_comments(item.lexpositions[1]),
                    )
            elif isinstance(item, Assignment):
                if isinstance(item.node, ArrayOffset):
                    name = lexer.extract_name('EQUALS', *item.lexpositions)
                    if isinstance(item.expr, six.string_types):
                        self.create_and_add_unit(
                            name,
                            item.expr,
                            lexer.extract_quote(),
                            lexer.extract_comments(item.lexpositions[1]),
                        )
                    elif isinstance(item.expr, BinaryOp) and item.expr.op == '.':
                        self.create_and_add_unit(
                            name,
                            concatenate(item.expr),
                            lexer.extract_quote(),
                            lexer.extract_comments(item.lexpositions[1]),
                        )
                elif isinstance(item.node, Variable):
                    name = lexer.extract_name('EQUALS', *item.lexpositions)
                    if isinstance(item.expr, Array):
                        handle_array(name, item.expr.nodes, lexer)
                    elif isinstance(item.expr, six.string_types):
                        self.create_and_add_unit(
                            name,
                            item.expr,
                            lexer.extract_quote(),
                            lexer.extract_comments(item.lexpositions[1]),
                        )
                    elif isinstance(item.expr, BinaryOp) and item.expr.op == '.':
                        self.create_and_add_unit(
                            name,
                            concatenate(item.expr),
                            lexer.extract_quote(),
                            lexer.extract_comments(item.lexpositions[1]),
                        )
            elif isinstance(item, Return):
                if isinstance(item.node, Array):
                    handle_array('return', item.node.nodes, lexer)
Esempio n. 16
0
    async def pre_ast(self):

        while not self.target_queue.empty():

            fileext = self.target_queue.get()

            if not self.lan:
                break

            if fileext[0] in ext_dict['php'] and 'php' in self.lan:
                # 下面是对于php文件的处理逻辑
                for filepath in fileext[1]['list']:
                    all_nodes = []

                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'php'
                    self.pre_result[filepath]['ast_nodes'] = []

                    fi = codecs.open(filepath,
                                     "r",
                                     encoding='utf-8',
                                     errors='ignore')
                    code_content = fi.read()
                    fi.close()

                    # self.pre_result[filepath]['content'] = code_content

                    try:
                        parser = make_parser()
                        all_nodes = parser.parse(code_content,
                                                 debug=False,
                                                 lexer=lexer.clone(),
                                                 tracking=True)

                        # 合并字典
                        self.pre_result[filepath]['ast_nodes'] = all_nodes

                    except SyntaxError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except AssertionError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except:
                        logger.warning('[AST] something error, {}'.format(
                            traceback.format_exc()))

                    # 搜索所有的常量
                    for node in all_nodes:
                        if isinstance(
                                node,
                                php.FunctionCall) and node.name == "define":
                            define_params = node.params
                            logger.debug(
                                "[AST][Pretreatment] new define {}={}".format(
                                    define_params[0].node,
                                    define_params[1].node))

                            self.define_dict[
                                define_params[0].node] = define_params[1].node

            elif fileext[0] in ext_dict[
                    'chromeext'] and 'chromeext' in self.lan:

                # 针对chrome 拓展的预处理
                # 需要提取其中的js和html?
                for filepath in fileext[1]['list']:
                    child_files = []
                    child_files_html = []

                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'chromeext'

                    # 首先想办法解压crx
                    try:
                        target_files_path = un_zip(filepath)
                        self.pre_result[filepath][
                            'target_files_path'] = target_files_path

                    except zipfile.BadZipFile:
                        logger.warning(
                            "[Pretreatment][Chrome Ext] file {} not zip".
                            format(filepath))
                        continue

                    except OSError:
                        logger.warning(
                            "[Pretreatment][Chrome Ext] file {} unzip error".
                            format(filepath))
                        continue

                    # 分析manifest.json
                    manifest_path = os.path.join(target_files_path,
                                                 "manifest.json")

                    # target可能是单个文件,这里需要专门处理
                    if not (self.target_directory.endswith("/")
                            or self.target_directory.endswith("\\")
                            ) and not os.path.isdir(self.target_directory):

                        path_list = re.split(r'[\\|/]', self.target_directory)
                        relative_path = os.path.join(path_list[-1] + "_files")
                    else:
                        relative_path = target_files_path.split(
                            self.target_directory)[-1]

                    if relative_path.startswith(
                            '\\') or relative_path.startswith("/"):
                        relative_path = relative_path[1:]

                    if os.path.isfile(manifest_path):
                        fi = codecs.open(manifest_path,
                                         "r",
                                         encoding='utf-8',
                                         errors='ignore')
                        manifest_content = fi.read()
                        fi.close()

                        try:
                            manifest = json.loads(manifest_content,
                                                  encoding='utf-8')

                        except json.decoder.JSONDecodeError:
                            logger.warning(
                                "[Pretreatment][Chrome Ext] File {} parse error..."
                                .format(target_files_path))
                            continue

                        self.pre_result[filepath]["manifest"] = manifest

                        # 想办法优化,如果不想深入js和html的判断,那么就跳过
                        if len(self.lan) and self.lan == 'chromeext':
                            logger.debug(
                                "[Pretreatment][Chrome Ext] pass js & html scan..."
                            )
                            continue

                        # content scripts
                        if "content_scripts" in manifest:
                            for script in manifest["content_scripts"]:
                                if "js" in script:
                                    child_files.extend([
                                        os.path.join(relative_path, js)
                                        for js in script['js']
                                    ])

                        # background js
                        if "background" in manifest:
                            if "scripts" in manifest["background"]:
                                child_files.extend([
                                    os.path.join(relative_path, js)
                                    for js in manifest["background"]["scripts"]
                                ])

                            # background html
                            if "page" in manifest["background"]:
                                child_files_html.append(
                                    os.path.join(
                                        relative_path,
                                        manifest["background"]["page"]))

                        # popup.html
                        if "browser_action" in manifest:
                            if "default_popup" in manifest["browser_action"]:
                                child_files_html.append(
                                    os.path.join(
                                        relative_path,
                                        manifest["browser_action"]
                                        ["default_popup"]))

                        # web_accessible_resources
                        if "web_accessible_resources" in manifest:
                            for resource in manifest[
                                    "web_accessible_resources"]:
                                if ".js" in resource:
                                    child_files.append(
                                        os.path.join(relative_path, resource))

                                if ".html" in resource:
                                    child_files_html.append(
                                        os.path.join(relative_path, resource))

                        # chrome_url_overrides
                        if "chrome_url_overrides" in manifest:
                            for key in manifest["chrome_url_overrides"]:
                                child_files_html.append(
                                    os.path.join(
                                        relative_path,
                                        manifest["chrome_url_overrides"][key]))

                        self.pre_result[filepath]["child_files"] = child_files

                        if len(child_files):
                            # 将content_scripts加入到文件列表中构造
                            self.target_queue.put(('.js', {
                                'count': len(child_files),
                                'list': child_files
                            }))

                            # 通过浅复制操作外部传入的files
                            self.file_list.append(('.js', {
                                'count': len(child_files),
                                'list': child_files
                            }))

                        if len(child_files_html):
                            self.target_queue.put(('.html', {
                                'count':
                                len(child_files_html),
                                'list':
                                child_files_html
                            }))

                    else:
                        logger.warning(
                            "[Pretreatment][Chrome Ext] File {} parse error..."
                            .format(target_files_path))
                        continue

            elif fileext[0] in ext_dict['html'] and 'javascript' in self.lan:
                # html only found js
                for filepath in fileext[1]['list']:
                    filepath = self.get_path(filepath)
                    script_list = []

                    try:
                        fi = codecs.open(filepath,
                                         "r",
                                         encoding='utf-8',
                                         errors='ignore')
                        code_content = fi.read()
                        fi.close()

                    except FileNotFoundError:
                        continue

                    except OSError:
                        continue

                    # tmp.js save all inline javascript code
                    tmp_path = os.path.join(os.path.dirname(filepath),
                                            "tmp.js")
                    fi2 = codecs.open(tmp_path,
                                      "a",
                                      encoding='utf-8',
                                      errors='ignore')

                    try:
                        soup = BeautifulSoup(code_content, "html.parser")

                        script_tag_list = soup.find_all('script')

                        for script_tag in script_tag_list:
                            script_attrs = script_tag.attrs

                            if 'src' in script_attrs:
                                parents_path = os.path.normpath("\\".join(
                                    re.split(r'[\\|/]', filepath)[:-1]))

                                script_path = os.path.join(
                                    parents_path, script_attrs['src'])
                                script_list.append(script_path)

                            else:
                                # 如果没有src,那么代表是内联js
                                script_content = script_tag.string

                                fi2.write(" \n{}\n ".format(script_content))

                        fi2.close()
                        if tmp_path not in script_list:
                            script_list.append(tmp_path)

                        # 将content_scripts加入到文件列表中构造
                        self.target_queue.put(('.js', {
                            'count': len(script_list),
                            'list': script_list
                        }))

                        # 通过浅复制操作外部传入的files
                        self.file_list.append(('.js', {
                            'count': len(script_list),
                            'list': script_list
                        }))

                    except:
                        logger.warning('[AST] something error, {}'.format(
                            traceback.format_exc()))
                        continue

            elif fileext[0] in ext_dict[
                    'javascript'] and 'javascript' in self.lan:

                # 针对javascript的预处理
                # 需要对js做语义分析
                for filepath in fileext[1]['list']:
                    filepath = self.get_path(filepath)

                    if not filepath.endswith(".js"):
                        continue

                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'javascript'
                    self.pre_result[filepath]['ast_nodes'] = []

                    try:
                        fi = codecs.open(filepath,
                                         "r",
                                         encoding='utf-8',
                                         errors='ignore')
                        code_content = fi.read()
                        fi.close()

                    except FileNotFoundError:
                        continue

                    except OSError:
                        continue

                    # 添加代码美化并且写入新文件
                    new_filepath = filepath + ".pretty"

                    try:

                        if not os.path.isfile(new_filepath):
                            fi2 = codecs.open(new_filepath,
                                              "w",
                                              encoding='utf-8',
                                              errors='ignore')
                            code_content = jsbeautifier.beautify(code_content)
                            fi2.write(code_content)
                            fi2.close()

                        # self.pre_result[filepath]['content'] = code_content

                        all_nodes = esprima.parse(code_content, {"loc": True})

                        # 合并字典
                        self.pre_result[filepath]['ast_nodes'] = all_nodes

                    except SyntaxError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except AssertionError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except esprima.error_handler.Error:
                        logger.warning(
                            '[AST] [ERROR] Invalid regular expression in {}...'
                            .format(filepath))

                    except KeyboardInterrupt:
                        logger.log('[AST stop...')
                        exit()

                    except:
                        logger.warning('[AST] something error, {}'.format(
                            traceback.format_exc()))
                        continue

            # 手动回收?
            gc.collect()

        return True
Esempio n. 17
0
def scan(code_content, vul_function, vul_function_line):
    parser = make_parser()
    all_nodes = export(parser.parse(code_content, debug=False, lexer=lexer.clone(), tracking=with_line))
    return traversal(all_nodes, vul_function, vul_function_line)
Esempio n. 18
0
    async def pre_ast(self):

        while not self.target_queue.empty():

            fileext = self.target_queue.get()

            if fileext[0] in ext_dict['php']:
                # 下面是对于php文件的处理逻辑
                for filepath in fileext[1]['list']:
                    all_nodes = []

                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'php'
                    self.pre_result[filepath]['ast_nodes'] = []

                    fi = codecs.open(filepath,
                                     "r",
                                     encoding='utf-8',
                                     errors='ignore')
                    code_content = fi.read()
                    fi.close()

                    self.pre_result[filepath]['content'] = code_content

                    try:
                        parser = make_parser()
                        all_nodes = parser.parse(code_content,
                                                 debug=False,
                                                 lexer=lexer.clone(),
                                                 tracking=True)

                        # 合并字典
                        self.pre_result[filepath]['ast_nodes'] = all_nodes

                    except SyntaxError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except AssertionError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except:
                        logger.warning('[AST] something error, {}'.format(
                            traceback.format_exc()))

                    # 搜索所有的常量
                    for node in all_nodes:
                        if isinstance(
                                node,
                                php.FunctionCall) and node.name == "define":
                            define_params = node.params
                            logger.debug(
                                "[AST][Pretreatment] new define {}={}".format(
                                    define_params[0].node,
                                    define_params[1].node))

                            self.define_dict[
                                define_params[0].node] = define_params[1].node

            elif fileext[0] in ext_dict['chromeext']:

                # 针对chrome 拓展的预处理
                # 需要提取其中的js和html?
                for filepath in fileext[1]['list']:
                    child_files = []

                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'chromeext'

                    # 首先想办法解压crx
                    try:
                        target_files_path = un_zip(filepath)
                        self.pre_result[filepath][
                            'target_files_path'] = target_files_path

                    except zipfile.BadZipFile:
                        logger.warning(
                            "[Pretreatment][Chrome Ext] file {} not zip".
                            format(filepath))
                        continue

                    # 分析manifest.json
                    manifest_path = os.path.join(target_files_path,
                                                 "manifest.json")

                    # target可能是单个文件,这里需要专门处理
                    if not (self.target_directory.endswith("/")
                            or self.target_directory.endswith("\\")
                            ) and not os.path.isdir(self.target_directory):
                        relative_path = os.path.join(
                            re.split(r'[\\|/]', self.target_directory)[-1] +
                            "_files")
                    else:
                        relative_path = target_files_path.split(
                            self.target_directory)[-1]

                    if relative_path.startswith(
                            '\\') or relative_path.startswith("/"):
                        relative_path = relative_path[1:]

                    if os.path.isfile(manifest_path):
                        fi = codecs.open(manifest_path,
                                         "r",
                                         encoding='utf-8',
                                         errors='ignore')
                        manifest_content = fi.read()
                        fi.close()

                        try:
                            manifest = json.loads(manifest_content,
                                                  encoding='utf-8')

                        except json.decoder.JSONDecodeError:
                            logger.warning(
                                "[Pretreatment][Chrome Ext] File {} parse error..."
                                .format(target_files_path))
                            continue

                        self.pre_result[filepath]["manifest"] = manifest

                        if "content_scripts" in manifest:
                            for script in manifest["content_scripts"]:
                                if 'js' in script:
                                    child_files.extend([
                                        os.path.join(relative_path, js)
                                        for js in script['js']
                                    ])

                        self.pre_result[filepath]["child_files"] = child_files

                        # 将content_scripts加入到文件列表中构造
                        self.target_queue.put(('.js', {
                            'count': len(child_files),
                            'list': child_files
                        }))

                        # 通过浅复制操作外部传入的files
                        self.file_list.append(('.js', {
                            'count': len(child_files),
                            'list': child_files
                        }))

                    else:
                        logger.warning(
                            "[Pretreatment][Chrome Ext] File {} parse error..."
                            .format(target_files_path))
                        continue

            elif fileext[0] in ext_dict['javascript']:

                # 针对javascript的预处理
                # 需要对js做语义分析
                for filepath in fileext[1]['list']:
                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'javascript'
                    self.pre_result[filepath]['ast_nodes'] = []

                    fi = codecs.open(filepath,
                                     "r",
                                     encoding='utf-8',
                                     errors='ignore')
                    code_content = fi.read()
                    fi.close()

                    # 添加代码美化并且写入新文件
                    new_filepath = filepath + ".pretty"

                    if not os.path.isfile(new_filepath):
                        fi2 = codecs.open(new_filepath,
                                          "w",
                                          encoding='utf-8',
                                          errors='ignore')
                        code_content = jsbeautifier.beautify(code_content)
                        fi2.write(code_content)
                        fi2.close()

                    self.pre_result[filepath]['content'] = code_content

                    try:
                        all_nodes = esprima.parse(code_content, {"loc": True})

                        # 合并字典
                        self.pre_result[filepath]['ast_nodes'] = all_nodes

                    except SyntaxError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except AssertionError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(
                            filepath, traceback.format_exc()))

                    except esprima.error_handler.Error:
                        logger.warning(
                            '[AST] [ERROR] Invalid regular expression in {}...'
                            .format(filepath))

                    except KeyboardInterrupt:
                        logger.log('[AST stop...')
                        exit()

                    except:
                        logger.warning('[AST] something error, {}'.format(
                            traceback.format_exc()))
                        continue

            # 手动回收?
            gc.collect()

        return True
Esempio n. 19
0
    def pre_ast(self, lan=None):

        if lan is not None:
            # 检查是否在可ast pasre列表中
            if not list(set(lan).intersection(set(could_ast_pase_lans))):

                logger.info("[AST][Pretreatment] Current scan target language does not require ast pretreatment...")
                return True

        for fileext in self.file_list:

            if fileext[0] in ext_dict['php']:
                # 下面是对于php文件的处理逻辑
                for filepath in fileext[1]['list']:
                    all_nodes = []

                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'php'
                    self.pre_result[filepath]['ast_nodes'] = []

                    fi = codecs.open(filepath, "r", encoding='utf-8', errors='ignore')
                    code_content = fi.read()

                    self.pre_result[filepath]['content'] = code_content

                    try:
                        parser = make_parser()
                        all_nodes = parser.parse(code_content, debug=False, lexer=lexer.clone(), tracking=True)

                        # 合并字典
                        self.pre_result[filepath]['ast_nodes'] = all_nodes

                    except SyntaxError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(filepath, traceback.format_exc()))

                    except AssertionError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(filepath, traceback.format_exc()))

                    except:
                        logger.warning('[AST] something error, {}'.format(traceback.format_exc()))

                    # 搜索所有的常量
                    for node in all_nodes:
                        if isinstance(node, php.FunctionCall) and node.name == "define":
                            define_params = node.params
                            logger.debug("[AST][Pretreatment] new define {}={}".format(define_params[0].node, define_params[1].node))

                            self.define_dict[define_params[0].node] = define_params[1].node

            elif fileext[0] in ext_dict['chromeext']:
                child_files = []

                # 针对chrome 拓展的预处理
                # 需要提取其中的js和html?
                for filepath in fileext[1]['list']:
                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'chromeext'

                    # 首先想办法解压crx
                    target_files_path = un_zip(filepath)
                    self.pre_result[filepath]['target_files_path'] = target_files_path

                    # 分析manifest.json
                    manifest_path = os.path.join(target_files_path, "manifest.json")
                    relative_path = target_files_path.split(self.target_directory)[-1]

                    if relative_path.startswith('\\') or relative_path.startswith("/"):
                        relative_path = relative_path[1:]

                    if os.path.isfile(manifest_path):
                        fi = codecs.open(manifest_path, "r", encoding='utf-8', errors='ignore')
                        manifest_content = fi.read()
                        manifest = json.loads(manifest_content)

                        self.pre_result[filepath]["manifest"] = manifest

                        if "content_scripts" in manifest:
                            for script in manifest["content_scripts"]:
                                child_files.extend([os.path.join(relative_path, js) for js in script['js']])

                        self.pre_result[filepath]["child_files"] = child_files
                    else:
                        logger.warning("[Pretreatment][Chrome Ext] File {} parse error...".format(target_files_path))
                        continue

            elif fileext[0] in ext_dict['javascript']:

                # 针对javascript的预处理
                # 需要对js做语义分析
                for filepath in fileext[1]['list']:
                    filepath = self.get_path(filepath)
                    self.pre_result[filepath] = {}
                    self.pre_result[filepath]['language'] = 'javascript'
                    self.pre_result[filepath]['ast_nodes'] = []

                    fi = codecs.open(filepath, "r", encoding='utf-8', errors='ignore')
                    code_content = fi.read()

                    self.pre_result[filepath]['content'] = code_content

                    try:
                        all_nodes = esprima.parse(code_content, {"loc": True})

                        # 合并字典
                        self.pre_result[filepath]['ast_nodes'] = all_nodes

                    except SyntaxError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(filepath, traceback.format_exc()))

                    except AssertionError as e:
                        logger.warning('[AST] [ERROR] parser {}: {}'.format(filepath, traceback.format_exc()))

                    except:
                        logger.warning('[AST] something error, {}'.format(traceback.format_exc()))
Esempio n. 20
0
    def parse(self, phpsrc):
        """Read the source of a PHP file in and include them as units."""
        def handle_array(prefix, nodes, lexer):
            prefix += lexer.extract_array()
            for item in nodes:
                assert isinstance(item, ArrayElement)
                if item.key is None:
                    name = []
                else:
                    # To update lexer current position
                    lexer.extract_name("DOUBLE_ARROW", *item.lexpositions)
                    if isinstance(item.key, BinaryOp):
                        name = "'{}'".format(concatenate(item.key))
                    elif isinstance(item.key, (int, float)):
                        name = f"{item.key}"
                    else:
                        name = f"'{item.key}'"
                if prefix:
                    name = f"{prefix}->{name}"
                if isinstance(item.value, Array):
                    handle_array(name, item.value.nodes, lexer)
                elif isinstance(item.value, str):
                    self.create_and_add_unit(
                        name,
                        item.value,
                        lexer.extract_quote(),
                        lexer.extract_comments(item.lexpositions[1]),
                    )

        def concatenate(item):
            if isinstance(item, str):
                return item
            elif isinstance(item, Variable):
                return item.name
            assert isinstance(item, BinaryOp)
            return concatenate(item.left) + concatenate(item.right)

        parser = make_parser()
        for item in parser.productions:
            item.callable = wrap_production(item.callable)
        lexer = PHPLexer()
        tree = parser.parse(phpsrc.decode(self.encoding),
                            lexer=lexer,
                            tracking=True)
        # Handle text without PHP start
        if len(tree) == 1 and isinstance(tree[0], InlineHTML):
            return self.parse(b"<?php\n" + phpsrc)
        for item in tree:
            if isinstance(item, FunctionCall):
                if item.name == "define":
                    self.create_and_add_unit(
                        lexer.extract_name("COMMA", *item.lexpositions),
                        item.params[1].node,
                        lexer.extract_quote(),
                        lexer.extract_comments(item.lexpositions[1]),
                    )
            elif isinstance(item, Assignment):
                if isinstance(item.node, ArrayOffset):
                    name = lexer.extract_name("EQUALS", *item.lexpositions)
                    if isinstance(item.expr, Array):
                        handle_array(name, item.expr.nodes, lexer)
                    elif isinstance(item.expr, str):
                        self.create_and_add_unit(
                            name,
                            item.expr,
                            lexer.extract_quote(),
                            lexer.extract_comments(item.lexpositions[1]),
                        )
                    elif isinstance(item.expr,
                                    BinaryOp) and item.expr.op == ".":
                        self.create_and_add_unit(
                            name,
                            concatenate(item.expr),
                            lexer.extract_quote(),
                            lexer.extract_comments(item.lexpositions[1]),
                        )
                elif isinstance(item.node, Variable):
                    name = lexer.extract_name("EQUALS", *item.lexpositions)
                    if isinstance(item.expr, Array):
                        handle_array(name, item.expr.nodes, lexer)
                    elif isinstance(item.expr, str):
                        self.create_and_add_unit(
                            name,
                            item.expr,
                            lexer.extract_quote(),
                            lexer.extract_comments(item.lexpositions[1]),
                        )
                    elif isinstance(item.expr,
                                    BinaryOp) and item.expr.op == ".":
                        self.create_and_add_unit(
                            name,
                            concatenate(item.expr),
                            lexer.extract_quote(),
                            lexer.extract_comments(item.lexpositions[1]),
                        )
            elif isinstance(item, Return):
                if isinstance(item.node, Array):
                    # Adjustextractor position
                    lexer.extract_name("RETURN", *item.lexpositions)
                    handle_array("return", item.node.nodes, lexer)
Esempio n. 21
0
    def parse(self, phpsrc):
        """Read the source of a PHP file in and include them as units."""
        def handle_array(prefix, nodes, lexer):
            prefix += lexer.extract_array()
            for item in nodes:
                assert isinstance(item, ArrayElement)
                # Skip empty keys
                if item.key == '':
                    continue
                if isinstance(item.key, BinaryOp):
                    name = '\'{0}\''.format(concatenate(item.key))
                elif isinstance(item.key, (int, float)):
                    name = '{0}'.format(item.key)
                else:
                    name = '\'{0}\''.format(item.key)
                if prefix:
                    name = '{0}->{1}'.format(prefix, name)
                if isinstance(item.value, Array):
                    handle_array(name, item.value.nodes, lexer)
                elif isinstance(item.value, str):
                    self.create_and_add_unit(
                        name,
                        item.value,
                        lexer.extract_quote(),
                        lexer.extract_comments(item.lexpositions[1]),
                    )

        def concatenate(item):
            if isinstance(item, str):
                return item
            elif isinstance(item, Variable):
                return item.name
            assert isinstance(item, BinaryOp)
            return concatenate(item.left) + concatenate(item.right)

        parser = make_parser()
        for item in parser.productions:
            item.callable = wrap_production(item.callable)
        lexer = PHPLexer()
        tree = parser.parse(phpsrc.decode(self.encoding), lexer=lexer, tracking=True)
        # Handle text without PHP start
        if len(tree) == 1 and isinstance(tree[0], InlineHTML):
            return self.parse(b'<?php\n' + phpsrc)
        for item in tree:
            if isinstance(item, FunctionCall):
                if item.name == 'define':
                    self.create_and_add_unit(
                        lexer.extract_name('COMMA', *item.lexpositions),
                        item.params[1].node,
                        lexer.extract_quote(),
                        lexer.extract_comments(item.lexpositions[1]),
                    )
            elif isinstance(item, Assignment):
                if isinstance(item.node, ArrayOffset):
                    name = lexer.extract_name('EQUALS', *item.lexpositions)
                    if isinstance(item.expr, str):
                        self.create_and_add_unit(
                            name,
                            item.expr,
                            lexer.extract_quote(),
                            lexer.extract_comments(item.lexpositions[1]),
                        )
                    elif isinstance(item.expr, BinaryOp) and item.expr.op == '.':
                        self.create_and_add_unit(
                            name,
                            concatenate(item.expr),
                            lexer.extract_quote(),
                            lexer.extract_comments(item.lexpositions[1]),
                        )
                elif isinstance(item.node, Variable):
                    name = lexer.extract_name('EQUALS', *item.lexpositions)
                    if isinstance(item.expr, Array):
                        handle_array(name, item.expr.nodes, lexer)
                    elif isinstance(item.expr, str):
                        self.create_and_add_unit(
                            name,
                            item.expr,
                            lexer.extract_quote(),
                            lexer.extract_comments(item.lexpositions[1]),
                        )
                    elif isinstance(item.expr, BinaryOp) and item.expr.op == '.':
                        self.create_and_add_unit(
                            name,
                            concatenate(item.expr),
                            lexer.extract_quote(),
                            lexer.extract_comments(item.lexpositions[1]),
                        )
            elif isinstance(item, Return):
                if isinstance(item.node, Array):
                    handle_array('return', item.node.nodes, lexer)
Esempio n. 22
0
#!/usr/bin/env python

# php2python.py - Converts PHP to Python using unparse.py
# Usage: php2python.py < input.php > output.py

import sys
sys.path.append('..')

from phply.phplex import lexer
from phply.phpparse import make_parser
from phply import pythonast

from ast import Module
from unparse import Unparser

input = sys.stdin
output = sys.stdout

parser = make_parser()
body = [pythonast.from_phpast(ast)
        for ast in parser.parse(input.read(), lexer=lexer)]
Unparser(body, output)
Esempio n. 23
0
        logging.info("No config file given, using default")
        config_file = DEFAULT_CONFIG
    return readConfig(config_file)


if __name__ == "__main__":
    enableDebug()

    lexer = phplex.lexer

    phpFile = sys.argv[1]
    with open(phpFile, "r") as f:
        code = f.read()

    if code:
        # FIXME: assuming it's php. Handle php inside HTML
        parser = make_parser()
        if (not code.strip().startswith('<')):
            parser.parse('<?', lexer=lexer)
        lexer.lineno = 1
        config = chooseConfigFile()
        try:
            rootNode = parser.parse(code, lexer=lexer)
            for rule in config:
                tryAnalyse(rule, rootNode, code)
        except SyntaxError as e:
            print(e, 'near', repr(e.text))

        except:
            traceback.print_exc()
Esempio n. 24
0
 def run(self):
     from phply.phpparse import make_parser
     make_parser(debug=False)