Example #1
0
    def __init__(self, path, filename, initialize=True, deps=[], mips=False):
        """Initializes access to analysis results."""
        self.path = path
        self.filename = filename
        self.deps = deps  # list of summary jars registered as dependencies
        self.mips = mips

        self.bdictionary = None  # BDictionary
        self.interfacedictionary = None  # InterfaceDictionary
        self.x86dictionary = None  # X86Dictionary
        self.mipsdictionary = None  # MIPSDictionary

        self.userdata = None  # UserData

        self.peheader = None
        self.elfheader = None

        self.resultdata = None  # AppResultData
        self.functions = {}  # faddr -> AsmFunction / MIPSFunction
        self.functioninfos = {}  # faddr -> FunctionInfo
        self.functionnames = None  # name -> function address

        self.models = ModelsAccess(self, dlljars=self.deps)

        if initialize and UF.has_bdictionary_file(self.path, self.filename):
            self._get_bdictionary()
            self._get_interface_dictionary()
            if self.mips:
                self._get_mips_dictionary()
            else:
                self._get_x86_dictionary()
            self._get_system_info()
            self._get_user_data()
    def lhs_ast(self, astree: AbstractSyntaxTree, iaddr: str,
                xdata: InstrXData) -> Tuple[ASTLval, List[ASTInstruction]]:
        def indirect_lhs(
                rtype: Optional[BCTyp]
        ) -> Tuple[ASTLval, List[ASTInstruction]]:
            tmplval = astree.mk_returnval_variable_lval(iaddr, rtype)
            tmprhs = astree.mk_lval_expr(tmplval)
            reglval = astree.mk_register_variable_lval("R0")
            return (tmplval, [astree.mk_assign(reglval, tmprhs)])

        calltarget = xdata.call_target(self.ixd)
        tgtname = calltarget.name
        models = ModelsAccess()
        if astree.has_symbol(tgtname):
            fnsymbol = astree.symbol(tgtname)
            if fnsymbol.returns_void:
                return (astree.mk_ignored_lval(), [])
            else:
                return indirect_lhs(fnsymbol.vtype)
        elif models.has_so_function_summary(tgtname):
            summary = models.so_function_summary(tgtname)
            returntype = summary.signature.returntype
            if returntype.is_named_type:
                returntype = cast(MNamedType, returntype)
                typename = returntype.typename
                if typename == "void" or typename == "VOID":
                    return (astree.mk_ignored_lval(), [])
                else:
                    return indirect_lhs(None)
            else:
                return indirect_lhs(None)
        else:
            return indirect_lhs(None)
def summaries_dll_function_cmd(args: argparse.Namespace) -> NoReturn:

    # arguments
    dll: str = args.dll
    fname: str = args.function

    models = ModelsAccess()

    if models.has_dll(dll):
        if models.has_dll_function_summary(dll, fname):
            summary = models.dll_function_summary(dll, fname)
            print("Function summary for dll function: " + fname)
            print(str(summary))
            print("=" * 80)
            exit(0)
        else:
            print("*" * 80)
            print("Function " + fname + " not found in dll " + dll)
            print("  -- try")
            print("       > chkx summaries dll-functions " + dll)
            print("     to see a list of functions included in " + dll)
            print("*" * 80)
            exit(0)
    else:
        print("*" * 80)
        print("Dll " + dll + " not found")
        print("  -- try")
        print("       > chkx summaries dlls")
        print("     to see a list of dlls included")
        print("*" * 80)
        exit(0)
def summaries_dll_functions_cmd(args: argparse.Namespace) -> NoReturn:

    # arguments
    dll: str = args.dll

    models = ModelsAccess()

    if models.has_dll(dll):
        dllfunctions = models.all_function_summaries_in_dll(dll)

        print("Functions in " + dll + " (" + str(len(dllfunctions)) + ")")
        print("=" * 80)
        for f in sorted(dllfunctions, key=lambda f: f.name):
            print("  " + f.name)
        print("=" * 80)

    else:
        print("*" * 80)
        print("Dll " + dll + " not found")
        print("  -- try")
        print("       > chkx summaries dlls")
        print("     to see a list of dlls included")
        print("*" * 80)

    exit(0)
def summaries_enums_cmd(args: argparse.Namespace) -> NoReturn:

    # arguments: none

    models = ModelsAccess()

    enums = models.enum_definitions()
    for enumtype in enums:
        print("\n" + enumtype)
        print(str(enums[enumtype]))

    exit(0)
def summaries_dlls_cmd(args: argparse.Namespace) -> NoReturn:

    # arguments: none

    models = ModelsAccess()

    modeldlls = models.dlls()
    for jar in modeldlls:
        print(jar)
        print("-" * 80)
        for dll in sorted(modeldlls[jar]):
            print("  " + dll)
        print("-" * 80)
    exit(0)
    def __init__(
            self,
            path: str,
            filename: str,
            fileformat: Type[HeaderTy],
            deps: List[str] = []) -> None:
        """Initializes access to analysis results."""
        self._path = path
        self._filename = filename
        self._deps = deps  # list of summary jars registered as dependencies
        self._header_ty: Type[HeaderTy] = fileformat  # currently supported: elf, pe

        self._userdata: Optional[UserData] = None

        self._header: Optional[HeaderTy] = None

        # functions
        self._appresultdata: Optional[AppResultData] = None
        self._functioninfos: Dict[str, FunctionInfo] = {}

        # callgraph
        self._callgraph: Optional[Callgraph] = None

        # summaries
        self.models = ModelsAccess(self.dependencies)

        # application-wide dictionaries
        self._bcdictionary: Optional[BCDictionary] = None
        self._bdictionary: Optional[BDictionary] = None
        self._interfacedictionary: Optional[InterfaceDictionary] = None
        self._bcfiles: Optional[BCFiles] = None

        self._systeminfo: Optional[SystemInfo] = None
def summaries_stats_cmd(args: argparse.Namespace) -> NoReturn:

    # arguments: none

    models = ModelsAccess()

    print(models.stats)
    exit(0)
def summaries_so_functions_cmd(args: argparse.Namespace) -> NoReturn:

    # arguments: none

    models = ModelsAccess()

    # returns a dictionary with so-functions for different jars
    sofunctions = models.all_so_function_summaries()
    for jar in sorted(sofunctions):
        print("\nShared object functions from " + jar + " (" +
              str(len(sofunctions[jar])) + ")")
        print("=" * 80)
        for f in sorted(sofunctions[jar], key=lambda f: f.name):
            print("  " + f.name)
        print("=" * 80)

    total = sum(len(sofunctions[jar]) for jar in sofunctions)
    print("\nTotal: " + str(total) + " summaries")
    exit(0)
def summaries_so_function_cmd(args: argparse.Namespace) -> NoReturn:

    # arguments
    fname: str = args.function

    models = ModelsAccess()

    if models.has_so_function_summary(fname):
        summary = models.so_function_summary(fname)
        print("Function summary for shared-object function: " + fname)
        print(str(summary))
        print("=" * 80)
        exit(0)
    else:
        print("*" * 80)
        print("Function " + fname + " not found in so_functions")
        print("  -- try")
        print("       > chkx summaries so-functions")
        print("     to see a list of shared-object functions included")
        print("*" * 80)
        exit(0)
def summaries_enum_cmd(args: argparse.Namespace) -> NoReturn:

    # arguments
    name: str = args.name

    models = ModelsAccess()

    if models.has_dll_enum_definition(name):
        enumdef = models.dll_enum_definition(name)
        print("Values for enum type " + name)
        print("=" * 80)
        for s in sorted(enumdef):
            print("  " + str(enumdef[s]))
        print("=" * 80)
        exit(0)
    else:
        print("*" * 80)
        print("Enum type " + name + " not found")
        print("  -- try")
        print("       > chkx summaries enums")
        print("     to see a list of enum types included")
        print("*" * 80)
        exit(0)
 def __init__(
         self,
         pathname: str,
         filename: str,
         xnode: ET.Element,
         deps: Sequence[str] = []) -> None:
     self._pathname = pathname
     self._filename = filename
     self.xnode = xnode
     self._models = ModelsAccess(deps)
     self._sectionheaders: Dict[str, H.PESectionHeader] = {}
     self._importtables: Dict[str, E.PEImportDirectoryEntry] = {}
     self._sections: Dict[str, S.PESection] = {}
     self._coffheader: Optional[PECoffFileHeader] = None
     self._optionalheader: Optional[PEOptionalHeader] = None
Example #13
0
class AppAccess(object):
    def __init__(self, path, filename, initialize=True, deps=[], mips=False):
        """Initializes access to analysis results."""
        self.path = path
        self.filename = filename
        self.deps = deps  # list of summary jars registered as dependencies
        self.mips = mips

        self.bdictionary = None  # BDictionary
        self.interfacedictionary = None  # InterfaceDictionary
        self.x86dictionary = None  # X86Dictionary
        self.mipsdictionary = None  # MIPSDictionary

        self.userdata = None  # UserData

        self.peheader = None
        self.elfheader = None

        self.resultdata = None  # AppResultData
        self.functions = {}  # faddr -> AsmFunction / MIPSFunction
        self.functioninfos = {}  # faddr -> FunctionInfo
        self.functionnames = None  # name -> function address

        self.models = ModelsAccess(self, dlljars=self.deps)

        if initialize and UF.has_bdictionary_file(self.path, self.filename):
            self._get_bdictionary()
            self._get_interface_dictionary()
            if self.mips:
                self._get_mips_dictionary()
            else:
                self._get_x86_dictionary()
            self._get_system_info()
            self._get_user_data()

    # Functions ----------------------------------------------------------------

    def get_function_addresses(self):
        self._get_results()
        return self.resultdata.get_function_addresses()

    def has_function(self, faddr):
        return faddr in self.get_function_addresses()

    def get_function(self, faddr):
        if not faddr in self.functions:
            xnode = UF.get_function_results_xnode(self.path, self.filename,
                                                  faddr)
            if self.mips:
                self.functions[faddr] = MIPSFunction(self, xnode)
            else:
                self.functions[faddr] = AsmFunction(self, xnode)
        return self.functions[faddr]

    def get_address_reference(self):
        """Return map of addr -> [ baddr, [ faddr ])."""
        result = {}

        def add(faddr, fn):
            fnref = fn.get_address_reference()  #  addr -> baddr
            for a in fnref:
                if a in result:
                    result[a][1].append(faddr)
                else:
                    result[a] = (fnref[a], [faddr])

        self.iter_functions(add)
        return result

    def has_function_name(self, faddr):
        return self.has_function(faddr) and self.get_function(faddr).has_name()

    def get_function_name(self, faddr):
        if self.has_function_name(faddr):
            return self.get_function(faddr).get_names()[0]

    def is_app_function_name(self, name):
        if self.functionnames is None: self._initialize_functionnames()
        return name in self.functionnames

    def is_unique_app_function_name(self, name):
        return (self.is_app_function_name(name)
                and len(self.functionnames[name]) == 1)

    def get_app_function_address(self, name):
        if self.is_unique_app_function_name(name):
            return self.functionnames[name][0]

    def get_function_info(self, faddr):
        if not faddr in self.functioninfos:
            xnode = UF.get_function_info_xnode(self.path, self.filename, faddr)
            self.functioninfos[faddr] = FunctionInfo(self, faddr, xnode)
        return self.functioninfos[faddr]

    def iter_functions(self, f):
        for faddr in self.get_function_addresses():
            f(faddr, self.get_function(faddr))

    def find_function(self, iaddr):
        for faddr in self.get_function_addresses():
            f = self.get_function(faddr)
            if f.has_instruction(iaddr):
                return f
        return None

    # Misc ---------------------------------------------------------------------

    # returns a dictionary of faddr -> string list
    def get_strings(self):
        result = {}

        def f(faddr, fn):
            result[faddr] = fn.get_strings()

        self.iter_functions(f)
        return result

    def get_md5_profile(self):
        """Creates a dictionary of function md5s.

        Structure:
        -- md5hash -> faddr -> instruction count
        """
        result = {}

        def get_md5(faddr, f):
            md5 = f.get_md5_hash()
            result.setdefault(md5, {})
            result[md5][faddr] = mf = {}
            mf['instrs'] = f.get_instruction_count()
            if f.has_name(): mf['names'] = f.get_names()

        self.iter_functions(get_md5)
        profile = {}
        profile['path'] = self.path
        profile['filename'] = self.filename
        profile['imagebase'] = self.get_pe_header().get_image_base()
        profile['md5s'] = result
        return profile

    def get_calls_to_app_function(self, tgtaddr):
        """Returns a dictionary faddr -> Asm/MIPSInstruction list."""
        result = {}

        def f(faddr, fn):
            calls = fn.get_calls_to_app_function(tgtaddr)
            if len(calls) > 0:
                result[faddr] = calls

        self.iter_functions(f)
        return result

    def get_app_calls(self):
        """Returns a dictionary faddr -> Asm/MIPSInstruction."""
        result = {}

        def f(faddr, fn):
            appcalls = fn.get_app_calls()
            if len(appcalls) > 0:
                result[faddr] = appcalls

        self.iter_functions(f)
        return result

    def get_jump_conditions(self):
        """Returns a dictionary faddr -> iaddr -> { data }."""
        result = {}

        def f(faddr, fn):
            jumpconditions = fn.get_jump_conditions()
            if len(jumpconditions) > 0:
                result[faddr] = jumpconditions

        self.iter_functions(f)
        return result

    def get_call_instructions(self):
        """Returns a dictionary faddr -> Asm/MIPSInstruction."""
        result = {}

        def f(faddr, fn):
            appcalls = fn.get_call_instructions()
            if len(appcalls) > 0:
                result[faddr] = appcalls

        self.iter_functions(f)
        return result

    def get_dll_calls(self):
        result = {}

        def f(faddr, fn):
            dllcalls = fn.get_dll_calls()
            if len(dllcalls) > 0:
                result[faddr] = dllcalls

        self.iter_functions(f)
        return result

    def get_ioc_arguments(self):
        dllcalls = self.get_dll_calls()
        result = {}  # ioc -> role-name -> (faddr,iaddr,arg-value)
        problems = {}

        def setproblem(p, dll, fname, faddr, iaddr, params=None, args=None):
            problems.setdefault(p, {})
            problems[p].setdefault(dll, {})
            problems[p][dll].setdefault(fname, [])
            problems[p][dll][fname].append((faddr, iaddr, params, args))

        for faddr in dllcalls:
            for instr in dllcalls[faddr]:
                tgt = instr.get_call_target().get_stub()
                args = instr.get_call_arguments()
                dll = tgt.get_dll()
                fname = tgt.get_name()
                if self.models.has_dll_summary(dll, fname):
                    summary = self.models.get_dll_summary(dll, fname)
                    params = summary.get_stack_parameters()
                    if not params is None:
                        if len(args) == len(params):
                            for (param, arg) in zip(params, args):
                                iocroles = [
                                    r for r in param.get_roles() if r.is_ioc()
                                ]
                                for r in iocroles:
                                    ioc = r.get_ioc_name()
                                    result.setdefault(ioc, {})
                                    result[ioc].setdefault(r.name, [])
                                    result[ioc][r.name].append(
                                        (faddr, instr.iaddr, arg))
                        else:  #  len(args) != len(params)
                            setproblem('argument mismatch',
                                       dll,
                                       fname,
                                       faddr,
                                       iaddr,
                                       params=len(params),
                                       args=len(args))
                    else:  # no parameters
                        setproblem('no parameters', dll, fname, faddr,
                                   instr.iaddr)
                else:  # no summary
                    setproblem('no summary', dll, fname, faddr, instr.iaddr)
        return (result, problems)

    def get_unresolved_calls(self):
        result = {}

        def f(faddr, fn):
            unrcalls = fn.get_unresolved_calls()
            if len(unrcalls) > 0:
                result[faddr] = unrcalls

        self.iter_functions(f)
        return result

    # Feature extraction -------------------------------------------------------

    def get_branch_predicates(self):
        result = {}

        def f(faddr, fn):
            predicates = fn.get_branch_predicates()
            if len(predicates) > 0:
                result[faddr] = predicates

        self.iter_functions(f)
        return result

    def get_structured_lhs_variables(self):
        result = {}

        def f(faddr, fn):
            lhsvars = fn.get_structured_lhs_variables()
            if len(lhsvars) > 0:
                result[faddr] = lhsvars

        self.iter_functions(f)
        return result

    def get_structured_lhs_instructions(self):
        result = {}

        def f(faddr, fn):
            lhsinstrs = fn.get_structured_lhs_instructions()
            if len(lhsinstrs) > 0:
                result[faddr] = lhsinstrs

        self.iter_functions(f)
        return result

    def get_structured_rhs_expressions(self):
        result = {}

        def f(faddr, fn):
            rhsxprs = fn.get_structured_rhs_expressions()
            if len(rhsxprs) > 0:
                result[faddr] = rhsxprs

        self.iter_functions(f)
        return result

    def get_return_expressions(self):
        result = {}

        def f(faddr, fn):
            retxprs = fn.get_return_expressions()
            if len(retxprs) > 0:
                result[faddr] = retxprs

        self.iter_functions(f)
        return result

    def get_fn_ioc_arguments(self):
        result = {}

        def f(faddr, fn):
            iocargs = fn.get_ioc_arguments()
            if len(iocargs) > 0:
                result[faddr] = iocargs

        self.iter_functions(f)
        return result

    # Global variables ---------------------------------------------------------

    # returns a dictionary of faddr -> gvar -> count
    def get_global_variables(self):
        result = {}

        def f(faddr, fn):
            result[faddr] = fn.get_global_variables()  # gvar -> count

        self.iter_functions(f)
        return result

    # Result Metrics -----------------------------------------------------------

    def get_result_metrics(self):
        x = UF.get_resultmetrics_xnode(self.path, self.filename)
        h = UF.get_resultmetrics_xheader(self.path, self.filename)
        return AppResultMetrics(self, x, h)

    def get_result_metrics_summary(self):
        return self.get_result_metrics().summary()

    # PE data ------------------------------------------------------------------

    def get_pe_header(self):
        self._get_pe_header()
        return self.peheader

    # ELF data  ----------------------------------------------------------------

    def get_elf_header(self):
        self._get_elf_header()
        return self.elfheader

    # Initialization -----------------------------------------------------------

    def _get_pe_header(self):
        if self.peheader is None:
            x = UF.get_pe_header_xnode(self.path, self.filename)
            self.peheader = PEHeader(self, x)

    def _get_elf_header(self):
        if self.elfheader is None:
            x = UF.get_elf_header_xnode(self.path, self.filename)
            self.elfheader = ELFHeader(self, x)

    def _get_user_data(self):
        if self.userdata is None:
            x = UF.get_user_system_data_xnode(self.path, self.filename)
            self.userdata = UserData(self, x)

    def _get_bdictionary(self):
        if self.bdictionary is None:
            x = UF.get_bdictionary_xnode(self.path, self.filename)
            self.bdictionary = BDictionary(self, x)

    def _get_interface_dictionary(self):
        if self.interfacedictionary is None:
            x = UF.get_interface_dictionary_xnode(self.path, self.filename)
            self.interfacedictionary = InterfaceDictionary(self, x)

    def _get_x86_dictionary(self):
        if self.x86dictionary is None:
            x = UF.get_x86_dictionary_xnode(self.path, self.filename)
            self.x86dictionary = X86Dictionary(self, x)

    def _get_mips_dictionary(self):
        if self.mipsdictionary is None:
            x = UF.get_mips_dictionary_xnode(self.path, self.filename)
            self.mipsdictionary = MIPSDictionary(self, x)

    def _get_system_info(self):
        s = UF.get_systeminfo_xnode(self.path, self.filename)
        self.functionsdata = FunctionsData(self, s.find('functions-data'))
        self.stringxrefs = StringsXRefs(self, s.find('string-xreferences'))
        jtnode = s.find('jumptables')
        if not jtnode is None:
            self._get_jump_tables(jtnode)

    def _get_jump_tables(self, jtnode):
        for x in jtnode.findall('jt'):
            self.jumptables[x.get('start')] = JumpTable(self, x)

    def _get_results(self):
        if self.resultdata is None:
            x = UF.get_resultdata_xnode(self.path, self.filename)
            self.resultdata = AppResultData(self, x)

    def _initialize_functionnames(self):
        self.functionnames = {}

        def f(faddr, fn):
            if fn.has_name():
                fnames = fn.get_names()
                for fname in fnames:
                    self.functionnames.setdefault(fname, [])
                    self.functionnames[fname].append(faddr)

        self.iter_functions(f)