Ejemplo n.º 1
0
        def run_decompiler(d, dx, decompiler):
            """
            Run the decompiler on a specific analysis

            :param d: the DalvikVMFormat object
            :type d: :class:`DalvikVMFormat` object
            :param dx: the analysis of the format
            :type dx: :class:`VMAnalysis` object
            :param decompiler: the type of decompiler to use ("dad", "dex2jad", "ded")
            :type decompiler: string
            """
            if decompiler is not None:
                decompiler = decompiler.lower()
                if decompiler == "dex2jad":
                    d.set_decompiler(
                        DecompilerDex2Jad(d, androconf.CONF["PATH_DEX2JAR"],
                                          androconf.CONF["BIN_DEX2JAR"],
                                          androconf.CONF["PATH_JAD"],
                                          androconf.CONF["BIN_JAD"],
                                          androconf.CONF["TMP_DIRECTORY"]))
                elif decompiler == "ded":
                    d.set_decompiler(
                        DecompilerDed(d, androconf.CONF["PATH_DED"],
                                      androconf.CONF["BIN_DED"],
                                      androconf.CONF["TMP_DIRECTORY"]))
                elif decompiler == "dad":
                    d.set_decompiler(DecompilerDAD(d, dx))
                else:
                    self.log(
                        'info',
                        "Unknown decompiler, use DAD decompiler by default")
                    d.set_decompiler(DecompilerDAD(d, dx))
Ejemplo n.º 2
0
    def addDEY(self, filename, data, dx=None):
        """
        Add an ODEX file to the session and run the analysis
        """
        digest = hashlib.sha256(data).hexdigest()
        log.debug("add DEY:%s" % digest)
        d = DalvikOdexVMFormat(data)
        log.debug("added DEY:%s" % digest)

        self.analyzed_files[filename].append(digest)
        self.analyzed_digest[digest] = filename

        self.analyzed_dex[digest] = d

        if self.export_ipython:
            d.create_python_export()

        if dx is None:
            dx = Analysis()

        dx.add(d)
        dx.create_xref()

        for d in dx.vms:
            # TODO: allow different decompiler here!
            d.set_decompiler(DecompilerDAD(d, dx))
            d.set_vmanalysis(dx)

        self.analyzed_vms[digest] = dx

        return digest, d, dx
def extract_permissions(file):
    a = APK(file)
    d = DalvikVMFormat(a.get_dex())
    dx = VMAnalysis(d)
    vm = dvm.DalvikVMFormat(a.get_dex())
    vmx = analysis.uVMAnalysis(vm)
    d.set_vmanalysis(dx)
    d.set_decompiler(DecompilerDAD(d, dx))
    return a.get_permissions() 
Ejemplo n.º 4
0
def extract_features(file_path):
    #result = []
    try:
        a = APK(file_path)
        d = DalvikVMFormat(a.get_dex())
        dx = Analysis(d)
        vm = dvm.DalvikVMFormat(a.get_dex())
        vmx = analysis.uAnalysis(vm)
        d.set_Analysis(dx)
        d.set_decompiler(DecompilerDAD(d, dx))
    except:
        return None
    return a.get_permissions()  #it will return permission
Ejemplo n.º 5
0
    def addDEX(self, filename, data, dx=None, postpone_xref=False):
        """
        Add a DEX file to the Session and run analysis.

        :param filename: the (file)name of the DEX file
        :param data: binary data of the dex file
        :param dx: an existing Analysis Object (optional)
        :param postpone_xref: True if no xref shall be created, and will be called manually
        :return: A tuple of SHA256 Hash, DalvikVMFormat Object and Analysis object
        """
        digest = hashlib.sha256(data).hexdigest()
        log.debug("add DEX:%s" % digest)

        log.debug("Parsing format ...")
        d = DalvikVMFormat(data)
        log.debug("added DEX:%s" % digest)

        self.analyzed_files[filename].append(digest)
        self.analyzed_digest[digest] = filename

        self.analyzed_dex[digest] = d

        if dx is None:
            dx = Analysis()

        dx.add(d)
        if not postpone_xref:
            dx.create_xref()

        # TODO: If multidex: this will called many times per dex, even if already set
        for d in dx.vms:
            # TODO: allow different decompiler here!
            d.set_decompiler(DecompilerDAD(d, dx))
            d.set_vmanalysis(dx)
        self.analyzed_vms[digest] = dx

        if self.export_ipython:
            log.debug("Exporting in ipython")
            d.create_python_export()

        return digest, d, dx
def extract_features(file_path):
    result = {}
    try:
        a = APK(file_path)
        d = DalvikVMFormat(a.get_dex())
        dx = Analysis(d)
        vm = dvm.DalvikVMFormat(a.get_dex())
        #vmx = analysis.uVMAnalysis(vm)
        d.set_vmanalysis(dx)
        d.set_decompiler(DecompilerDAD(d, dx))
    except:
        return None

    result['android_version_code'] = a.get_androidversion_code()
    result['android_version_name'] = a.get_androidversion_name()
    result['max_sdk'] = a.get_max_sdk_version()
    result['min_sdk'] = a.get_min_sdk_version()
    #result['libraries'] = a.get_libraries()
    result['filename'] = a.get_filename()
    result['target_sdk'] = a.get_target_sdk_version()
    result['md5'] = hashlib.md5(a.get_raw()).hexdigest()
    result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest()
    result['permissions'] = a.get_permissions()
    result['activities'] = a.get_activities()
    result['providers'] = a.get_providers()
    result['services'] = a.get_services()
    result['strings'] = d.get_strings()
    result['class_names'] = [c.get_name() for c in d.get_classes()]
    result['method_names'] = [m.get_name() for m in d.get_methods()]
    result['field_names'] = [f.get_name() for f in d.get_fields()]
    #result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0
    result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0
    #result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0
    '''result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0
    result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0'''
    result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0
    arr = []
    s = a.get_elements("action", "name")
    for i in s:
        arr.append(i)

    result['intents'] = arr

    s_list = []
    s_list.extend(result['class_names'])
    s_list.extend(result['method_names'])
    s_list.extend(result['field_names'])
    result['entropy_rate'] = entropy_rate(s_list)

    result['feature_vectors'] = {}

    # Search for the presence of api calls in a given apk
    result['feature_vectors']['api_calls'] = []
    for call in API_CALLS:
        status = 1 if dx.get_method(call) else 0
        result['feature_vectors']['api_calls'].append(status)

    # Search for the presence of permissions in a given apk
    result['feature_vectors']['permissions'] = []
    for permission in PERMISSIONS:
        status = 1 if permission in result['permissions'] else 0
        result['feature_vectors']['permissions'].append(status)

    #Search for the presence of intents in a given apk
    result['feature_vectors']['intents'] = []
    n = len(INTENTS)
    m = len(result['intents'])
    for i in range(n):
        stri = INTENTS[i]
        flg = False
        for j in range(m):
            if stri in result['intents'][j]:
                flg = True
                break
        if flg:
            status = 1
        else:
            status = 0
        result['feature_vectors']['intents'].append(status)

    #Check for special strings in code
    result['feature_vectors']['special_strings'] = []
    for word in SPECIAL_STRINGS:
        status = 1 if d.get_regex_strings(word) else 0
        result['feature_vectors']['special_strings'].append(status)

    return result
Ejemplo n.º 7
0
def extract_features(file_path):
    result = {}
    try:
        a = APK(file_path)
        d = DalvikVMFormat(a.get_dex())
        dx = Analysis(d)
        vm = dvm.DalvikVMFormat(a.get_dex())
        vmx = analysis.Analysis(vm)
        d.set_vmanalysis(dx)
        d.set_decompiler(DecompilerDAD(d, dx))
    except Exception as e:
        print e
        return None

    result['android_version_code'] = a.get_androidversion_code()
    result['android_version_name'] = a.get_androidversion_name()
    result['max_sdk'] = a.get_max_sdk_version()
    result['min_sdk'] = a.get_min_sdk_version()
    result['libraries'] = a.get_libraries()
    result['filename'] = a.get_filename()
    result['target_sdk'] = a.get_target_sdk_version()
    result['md5'] = hashlib.md5(a.get_raw()).hexdigest()
    result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest()
    result['permissions'] = a.get_permissions()
    result['activities'] = a.get_activities()
    result['providers'] = a.get_providers()
    result['services'] = a.get_services()
    result['strings'] = d.get_strings()
    result['class_names'] = [c.get_name() for c in d.get_classes()]
    result['method_names'] = [m.get_name() for m in d.get_methods()]
    result['field_names'] = [f.get_name() for f in d.get_fields()]
    # result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0
    result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0
    # result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0
    # result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0
    # result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0
    result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0

    s_list = []
    s_list.extend(result['class_names'])
    s_list.extend(result['method_names'])
    s_list.extend(result['field_names'])
    result['entropy_rate'] = entropy_rate(s_list)

    result['feature_vectors'] = {}

    result['feature_vectors']['api_calls'] = []
    for call in API_CALLS:
        status = 1 if dx.get_method_by_name(".", call, ".") else 0
        result['feature_vectors']['api_calls'].append(status)

    result['feature_vectors']['permissions'] = []
    for permission in PERMISSIONS:
        status = 1 if permission in result['permissions'] else 0
        result['feature_vectors']['permissions'].append(status)

    result['feature_vectors']['special_strings'] = []
    for word in SPECIAL_STRINGS:
        status = 1 if d.get_regex_strings(word) else 0
        result['feature_vectors']['special_strings'].append(status)

    result['feature_vectors']['others'] = [
        # result['is_reflection_code'],
        # result['is_crypto_code'],
        # result['is_native_code'],
        result['is_obfuscation'],
        result['is_database'],
        # result['is_dyn_code']
    ]

    return result
Ejemplo n.º 8
0
def extract_features(file_path):
    result = {}
    try:
        a = APK(file_path)
        d = DalvikVMFormat(a.get_dex())
        dx = VMAnalysis(d)
        vm = dvm.DalvikVMFormat(a.get_dex())
        vmx = analysis.uVMAnalysis(vm)
        d.set_vmanalysis(dx)
        d.set_decompiler(DecompilerDAD(d, dx))
    except:
        return None

    result['android_version_code'] = a.get_androidversion_code()
    result['android_version_name'] = a.get_androidversion_name()
    result['max_sdk'] = a.get_max_sdk_version()
    result['min_sdk'] = a.get_min_sdk_version()
    result['libraries'] = a.get_libraries()
    result['filename'] = a.get_filename()
    result['target_sdk'] = a.get_target_sdk_version()
    result['md5'] = hashlib.md5(a.get_raw()).hexdigest()
    result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest()
    result['permissions'] = a.get_permissions()
    result['activities'] = a.get_activities()
    result['providers'] = a.get_providers()
    result['services'] = a.get_services()
    #result['strings'] = d.get_strings()
    #result['class_names'] = [c.get_name() for c in d.get_classes()]
    #result['method_names'] = [m.get_name() for m in d.get_methods()]
    #result['field_names'] = [f.get_name() for f in d.get_fields()]
    class_names = [c.get_name() for c in d.get_classes()]
    method_names = [m.get_name() for m in d.get_methods()]
    field_names = [ f.get_name() for f in d.get_fields()]

    result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0
    result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0
    result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0
    result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0
    result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0
    result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0

    s_list = []
    #s_list.extend(result['class_names'])
    #s_list.extend(result['method_names'])
    #s_list.extend(result['field_names'])
    s_list.extend(class_names)
    s_list.extend(method_names)
    s_list.extend(method_names)
    result['entropy_rate'] = entropy_rate(s_list)

    result['feature_vectors'] = {}

    # Search for the presence of api calls in a given apk
    result['feature_vectors']['api_calls'] = []
    for call in API_CALLS:
        status = 1 if dx.tainted_packages.search_methods(".", call, ".") else 0
        result['feature_vectors']['api_calls'].append(status)

    # Search for the presence of permissions in a given apk        
    result['feature_vectors']['permissions'] = []
    for permission in PERMISSIONS:
        status = 1 if permission in result['permissions'] else 0
        result['feature_vectors']['permissions'].append(status)

    result['feature_vectors']['special_strings'] = []
    for word in SPECIAL_STRINGS:
        status = 1 if d.get_regex_strings(word) else 0
        result['feature_vectors']['special_strings'].append(status)

    opt_seq = []
    for m in d.get_methods():
        for i in m.get_instructions():
            opt_seq.append(i.get_name())

    optngramlist = [tuple(opt_seq[i:i+NGRAM]) for i in xrange(len(opt_seq) - NGRAM)]
    optngram = Counter(optngramlist)
    optcodes = dict()
    tmpCodes = dict(optngram)
    #for k,v in optngram.iteritems():
    #    if v>=NGRAM_THRE:
            #optcodes[str(k)] = v
    #        optcodes[str(k)] = 1
    tmpCodes = sorted(tmpCodes.items(),key =lambda d:d[1],reverse=True) 
    for value in tmpCodes[:NGRAM_THRE]:
        optcodes[str(value[0])] = 1
    result['feature_vectors']['opt_codes'] = optcodes

    return result
Ejemplo n.º 9
0
'''
Created on Jun 8, 2014

@author: lyx
'''
from androguard.core.bytecodes.apk import APK
from androguard.core.bytecodes import dvm
from androguard.core.analysis import analysis
from androguard.decompiler.decompiler import DecompilerDAD



if __name__ == '__main__':
    apk = APK('../sampleapk/MyTrojan.apk')
    d = dvm.DalvikVMFormat(apk.get_dex())
    dx = analysis.uVMAnalysis(d)
    d.set_decompiler( DecompilerDAD( d, dx ) )
    for current_class in d.get_classes():
        s = current_class#.source()
    print s
    print s.source()
    '''for current_method in d.get_methods():  # @IndentOk
        x = current_method.get_code()
    ins = x.get_bc().get_instructions()
    i = 0
    for s in ins:
        print s.show(i)
        i += 1
    #apk = analyzeAPK('./sampleapk/k9-4.409-release.apk')'''
Ejemplo n.º 10
0
    def analysis_app(self, log, apkobj, dexobj, analysisobj):
        dexobj.set_decompiler(DecompilerDAD(dexobj, analysisobj))
        apk_filename = log.filename
        CGpath = apk_filename.replace(self.APKPath, self.CGPath)[:-4]
        CGfilename = os.path.join(CGpath, "call.gml")
        if not os.path.exists(CGpath):
            try:
                os.makedirs(CGpath)
            except Exception:
                pass
        opcodeFilename = apk_filename.replace(
            self.APKPath,
            self.FeaturePath + "/opcode").replace(".apk", ".csv")
        opcodePath = opcodeFilename[:opcodeFilename.rfind('/')]
        if not os.path.exists(opcodePath):
            try:
                os.makedirs(opcodePath)
            except Exception:
                pass
        permissionFilename = apk_filename.replace(
            self.APKPath,
            self.FeaturePath + "/permission").replace(".apk", ".csv")
        permissionPath = permissionFilename[:permissionFilename.rfind('/')]
        if not os.path.exists(permissionPath):
            try:
                os.makedirs(permissionPath)
            except Exception:
                pass
        tplFilename = apk_filename.replace(self.APKPath,
                                           self.FeaturePath + "/tpl").replace(
                                               ".apk", ".csv")
        tplPath = tplFilename[:tplFilename.rfind('/')]
        if not os.path.exists(tplPath):
            try:
                os.makedirs(tplPath)
            except Exception:
                pass
        if not os.path.exists(CGfilename):
            G = analysisobj.get_call_graph()  # call graph
            nx.write_gml(G, CGfilename, stringizer=str)  # save the call graph
        self.call_graphs.append(CGfilename)
        G = nx.read_gml(CGfilename, label='id')
        if os.path.exists(tplFilename):
            return
        opcodeFile = utils.create_csv(self.smali_opcode, opcodeFilename)
        method2nodeMap = self.getMethod2NodeMap(G)
        if method2nodeMap == {}:
            _settings.logger.error("%s has call graph error" % log.filename)
            print("%s has call graph error" % log.filename)
            return
        class_functions = defaultdict(
            list)  # mappings of class and its functions
        super_dic = {
        }  # mappings of class and its superclass(for class replacement)
        implement_dic = {}

        for classes in analysisobj.get_classes():  # all class
            class_name = str(classes.get_class().get_name())
            if classes.extends != "Ljava/lang/Object;":
                super_dic[class_name] = str(classes.extends)
                if str(classes.extends) in self.replacemap:
                    implement_dic[class_name] = str(classes.extends)
            if classes.implements:
                for imp in classes.implements:
                    if str(imp) in self.replacemap:
                        implement_dic[class_name] = str(imp)
            for method in classes.get_methods():
                if method.is_external():
                    continue
                m = method.get_method()
                class_functions[class_name].append(str(m.full_name))
                c = defaultdict(int)
                flag = False
                for ins in m.get_instructions():  # count
                    flag = True  # exist instructions
                    c[ins.get_name()] += 1
                opcode = {}
                for p in self.smali_opcode:
                    opcode[p] = 0
                for op in c:
                    if op in self.smali_opcode:
                        opcode[op] += c[op]
                if flag:
                    try:
                        utils.write_csv(opcode, opcodeFile,
                                        method2nodeMap[str(m.full_name)][0])
                    except Exception:
                        print("apk: %s, method: %s not exists" %
                              (log.filename, str(m.full_name)))
        opcodeFile.close()
        cpermission = Permission(G=G,
                                 path=permissionFilename,
                                 class_functions=class_functions,
                                 super_dic=super_dic,
                                 implement_dic=implement_dic,
                                 dexobj=dexobj,
                                 permission=self.permission,
                                 cppermission=self.cppermission,
                                 method2nodeMap=method2nodeMap)
        cpermission.generate()
        class2init = cpermission.getClass2init()
        sensitiveapimap = cpermission.getsensitive_api()
        ctpl = Tpl(log.filename, G, tplFilename, sensitiveapimap,
                   self.permission, class2init)
        ctpl.generate()