Ejemplo n.º 1
0
def write_one_apk_source(apkfile):
    a = apk.APK(apkfile, False, 'r', None, 2)
    d = dvm.DalvikVMFormat(a.get_dex())
    vmx = analysis.Analysis(d)
    dp = decompiler.DecompilerDAD(d, vmx)  # DAD是androguard内部的decompiler
    a1, d1, dx = AnalyzeAPK(apkfile)
    CFG = nx.DiGraph()
    with open(
            'F:\\2018年第一学年科研\\APK科研\\数据集\\Word2vec_ao_yuliaoku\\test\\successed_1.txt',
            'w') as txtData:
        for k in d.get_classes():
            print('class_name:' + k.get_name())
            txtData.writelines(dp.get_source_class(k))
            for m in dx.find_methods(classname=k.get_name()):
                orig_method = m.get_method()
                if isinstance(orig_method, ExternalMethod):
                    is_this_external = True
                else:
                    is_this_external = False
                CFG.add_node(orig_method, external=is_this_external)
                if is_this_external == False:  # 用于获取一个class里面的所有方法
                    print('orig::' + orig_method.get_name())
                else:
                    print('orig+external::' + orig_method.get_name())
                for other_class, callee, offset in m.get_xref_to():
                    if isinstance(callee, ExternalMethod):
                        is_external = True
                    else:
                        is_external = False
                    if callee not in CFG.node:
                        CFG.add_node(callee, external=is_external)
                        if is_external == False:
                            print('external+callee::' + callee.get_name())
                        else:
                            print('callee:' + callee.get_name())
Ejemplo n.º 2
0
def get_apis(path, filename):
    app = apk.APK(path)
    app_dex = dvm.DalvikVMFormat(app.get_dex())
    # app_x = analysis.newVMAnalysis(app_dex)
    app_x = analysis.Analysis(app_dex)
    methods = set()
    cs = [cc.get_name() for cc in app_dex.get_classes()]

    for method in app_dex.get_methods():
        g = app_x.get_method(method)
        if method.get_code() == None:
            continue

        for i in g.get_basic_blocks().get():
            for ins in i.get_instructions():
                output = ins.get_output()
                match = re.search(r'(L[^;]*;)->[^\(]*\([^\)]*\).*', output)
                if match and match.group(1) not in cs:
                    methods.add(match.group())

    methods = list(methods)
    methods.sort()
    print
    "methods:" + "\n"
    print
    methods
    str = "Methods:"
    file = methods
    writeToTxt(str, file, filename)
    return methods
Ejemplo n.º 3
0
 def __init__(self, *args, **kwargs):
     super(RenameTest, self).__init__(*args, **kwargs)
     with open("examples/android/TestsAndroguard/bin/classes.dex",
               "rb") as fd:
         self.d = dvm.DalvikVMFormat(fd.read())
         self.dx = analysis.Analysis(self.d)
         self.d.set_vmanalysis(self.dx)
Ejemplo n.º 4
0
def Deal_one_apk_new(apkfile):
    a = apk.APK(apkfile, False, 'r', None, 2)
    d = dvm.DalvikVMFormat(a.get_dex())
    vmx = analysis.Analysis(d)
    dp = decompiler.DecompilerDAD(d, vmx)  # DAD是androguard内部的decompiler
    a1, d1, dx = AnalyzeAPK(apkfile)
    CFG = nx.DiGraph()
    apk_word = []
    for k in d.get_classes():
        print('class_name:' + k.get_name())
        # print(dp.get_source_class(k))
        for m in dx.find_methods(classname=k.get_name()):
            orig_method = m.get_method()
            # print(type(orig_method))
            if isinstance(orig_method, ExternalMethod):
                is_this_external = True
            else:
                is_this_external = False
            CFG.add_node(orig_method, external=is_this_external)
            if is_this_external == False:  # 用于获取一个class里面的所有方法
                print('orig::' + orig_method.get_name())
            else:
                print('orig+externalmethod::' + orig_method.get_name())
            for other_class, callee, offset in m.get_xref_to():
                if isinstance(callee, ExternalMethod):
                    is_external = True
                else:
                    is_external = False
                if callee not in CFG.node:
                    CFG.add_node(callee, external=is_external)
                    if is_external == False:
                        print('external+callee::' + callee.get_name())
                    else:
                        print('callee:' + callee.get_name())
def Get_APK_Words(apkfile):
    a = apk.APK(apkfile, False, 'r', None, 2)
    d = dvm.DalvikVMFormat(a.get_dex())
    vmx = analysis.Analysis(d)
    dp = decompiler.DecompilerDAD(d, vmx)  # DAD是androguard内部的decompiler
    a1, d1, dx = AnalyzeAPK(apkfile)
    CFG = nx.DiGraph()
    class_code_dic, is_amd_class_code_dic = Build_APK_Corpus(apkfile)
    for k in d.get_classes():  # 用于遍历每一个class
        all_orig_methods = []  # 用于统计一个class里面的所有的method
        # print(type(k))
        print('class_name+super_name::' + k.get_name() + ':' +
              k.get_superclassname())
        for m in dx.find_methods(
                classname=k.get_name()):  # 用于将一个class里面所有的原始方法提取到
            orig_method = m.get_method()
            if isinstance(orig_method, ExternalMethod):
                is_this_external = True
            else:
                is_this_external = False
            CFG.add_node(orig_method, external=is_this_external)
            if not isinstance(orig_method, ExternalMethod):
                all_orig_methods.append(orig_method)  # 用于得到所有的原始方法
        for m in dx.find_methods(
                classname=k.get_name()):  # 用于遍历一个class里面的所有的方法
            orig_method = m.get_method()
            if not isinstance(orig_method, ExternalMethod):
                if (orig_method.get_name() in all_callback) or (
                        orig_method.get_name()
                        in APK_Method_Key_Words.key_registers):
                    print('method_name+method_descriptor::' +
                          orig_method.get_name() +
                          orig_method.get_descriptor())
                    print(class_code_dic[k.get_name()][
                        orig_method.get_name() + orig_method.get_descriptor()])
Ejemplo n.º 6
0
 def __init__(self, name):
     vm = auto_vm(name)
     if vm is None:
         raise ValueError('Format not recognised: %s' % name)
     self.vma = analysis.Analysis(vm)
     self.classes = dict(
         (dvclass.get_name(), dvclass) for dvclass in vm.get_classes())
Ejemplo n.º 7
0
def test(apkfile):
    a = apk.APK(apkfile, False, 'r', None, 2)
    d = dvm.DalvikVMFormat(a.get_dex())
    vmx = analysis.Analysis(d)
    dp = decompiler.DecompilerDAD(d, vmx)  # DAD是androguard内部的decompiler
    for k in d.get_classes():
        print(dp.get_source_class(k))
Ejemplo n.º 8
0
    def AnalysisStart(self, filepath, savefolder):
        apk_dex, apk_name, apk_flag = self.getdex(filepath)
        d = dvm.DalvikVMFormat(apk_dex)
        x = analysis.Analysis(d)

        #判断APK是否有效
        if apk_flag:
            pass
        else:
            deal_msg = 'not valid'
            return apk_name, deal_msg

        # 创建保存的文件夹
        path = savefolder + filepath.split('/')[-1].split('.')[0] + '/'
        if not os.path.exists(path):
            os.makedirs(path)
            print(path, 'create success')
        else:
            deal_msg = path + ' already exist'
            return apk_name, deal_msg

        base_source = path + apk_name + '_base_source.txt'
        bs_file = open(base_source, 'w')

        for method in d.get_methods():
            g = x.get_method(method)
            if method.get_code() == None:
                continue
            idx = 0
            graphh = self.get_graph(g)
            self.has_cycle(graphh)
            properity = []
            index = 0
            for i in g.get_basic_blocks().get():
                index = index + 1
                node_pro = []
                child_num = 0
                father_num = 0
                node_pro.append(index)
                w = self.find_nodes(i)
                loop = self.find_loops(graphh, i)
                node_pro.append(w)
                node_pro.append(loop)
                for j in i.get_next():
                    child_num = child_num + 1
                for j in i.get_prev():
                    father_num = father_num + 1
                node_pro.append(child_num)
                node_pro.append(father_num)
                ins_len = 0
                for ins in i.get_instructions():
                    idx += ins.get_length()
                    ins_len = ins_len + 1
                node_pro.append(ins_len)
                properity.append(node_pro)
            self.save_base_feature(bs_file, method.get_class_name(),
                                   method.get_name(), properity)
        bs_file.close()
        return apk_name, 'analysis success'
Ejemplo n.º 9
0
def test(file_path):
    try:
        app = apk.APK(file_path)
        app_dex = dvm.DalvikVMFormat(app.get_dex())
        app_x = analysis.Analysis(app_dex)
        app.show()
    except Exception as e:
        print(e)
Ejemplo n.º 10
0
def extract_features(file_path):
    a = APK(file_path)
    d = DalvikVMFormat(a.get_dex())
    dx = Analysis(d)
    vm = dvm.DalvikVMFormat(a.get_dex())
    vmx = analysis.Analysis(vm)
    d.set_vmanalysis(dx)
    d.set_decompiler(DecompilerDAD(d, dx))
    return a.get_permissions()
Ejemplo n.º 11
0
def get_compressed_feature_vector(path, api_call_dictionary):
    '''
    Get a compressed feature vector.

    Parameters
    ----------
    path : the path of the file to get a compressed feature vector
    
    api_call_dictionary : a dictionary of API calls
    
    Returns
    -------
    feature_vector : a compressed feature vector

    recursion_error_count : the number of recursion errors
    '''

    max_sequences = max_calls
    feature_vector = np.zeros((max_calls, max_sequences), dtype = int)

    call_count = 0
    sequence_count = 0

    if path.endswith('.apk'):
        app = apk.APK(path)
        app_dex = dvm.DalvikVMFormat(app.get_dex())
    else: 
        app_dex = dvm.DalvikVMFormat(open(path, 'rb').read())

    app_x = analysis.Analysis(app_dex)
    class_names = [classes.get_name() for classes in app_dex.get_classes()]
    recursion_error_count = 0

    for method in app_dex.get_methods():
        g = app_x.get_method(method)
    
        if method.get_code() == None:
            continue

        for i in g.get_basic_blocks().get():
            if i.childs != [] and sequence_count < max_sequences:
                call_count = 0
                
                for ins in i.get_instructions():
                    output = ins.get_output() # this is a string that contains methods, variables, or anything else
                    match = re.search(r'(L[^;]*;)->[^\(]*\([^\)]*\).*', output)
                    
                    if match and match.group(1) not in class_names and call_count < max_calls:
                        feature_vector[call_count, sequence_count] = api_call_dictionary[match.group()]
                        call_count += 1

                rand_child_selected = np.random.randint(len(i.childs))
                recursion_error_count = traverse_graph(i.childs[rand_child_selected][2], feature_vector, class_names, call_count, sequence_count, recursion_error_count, api_call_dictionary)
                
                sequence_count += 1

    return feature_vector, recursion_error_count
Ejemplo n.º 12
0
def generate_dictionary(apk_folder_directory) -> int:
	'''
	Generate a dictionary storing mapping all distinct API calls to numbers and pickle the dictionary.

	Parameters
	----------
	apk_folder_directory : the directory of the folder containing APKs for scanning
	
	Returns
	-------
	dictionary_length : the length of the API call dictionary (-1 if any exception occurs)
	'''

	api_call_dictionary = {}

	try:
		for file in os.listdir(apk_folder_directory):
			file_path = os.path.join(apk_folder_directory, file)
			
			if file_path.endswith('.apk'):
				app = apk.APK(file_path)
				app_dex = dvm.DalvikVMFormat(app.get_dex())
			else: 
				continue

			app_x = analysis.Analysis(app_dex)

			method_list = []
			class_names = [classes.get_name() for classes in app_dex.get_classes()]

			for method in app_dex.get_methods():
				g = app_x.get_method(method)

				if method.get_code() == None:
					continue
				
				for i in g.get_basic_blocks().get():
					for ins in i.get_instructions():
						output = ins.get_output() # this is a string that contains methods, variables, or anything else
						match = re.search(r'(L[^;]*;)->[^\(]*\([^\)]*\).*', output)

						if match and match.group(1) not in class_names:
							method_list.append(match.group())

							if not api_call_dictionary.__contains__(match.group()):
								api_call_dictionary[match.group()] = len(api_call_dictionary)
	except:
		return -1

	dictionary_stream = open(get_dictionary_path(apk_folder_directory), 'wb')
	pickle.dump(api_call_dictionary, dictionary_stream, protocol = pickle.DEFAULT_PROTOCOL)
	dictionary_stream.close()

	return len(api_call_dictionary)
Ejemplo n.º 13
0
def Deal_one_apk(apkfile):
    a = apk.APK(apkfile, False, 'r', None, 2)
    d = dvm.DalvikVMFormat(a.get_dex())
    vmx = analysis.Analysis(d)
    dp = decompiler.DecompilerDAD(d, vmx)  # DAD是androguard内部的decompiler
    a1, d1, dx = AnalyzeAPK(apkfile)
    CFG = nx.DiGraph()
    apk_word = []
    class_code_dic, is_amd_class_code_dic = Update_one_apk_dictory(apkfile)
    for k in d.get_classes():
        amd_p1 = 'Lmy/app/client/Client;'
        if k.get_name() == amd_p1:
            # print(dp.get_source_class(k))
            all_orig_methods = []
            for m in dx.find_methods(
                    classname=amd_p1):  #用于将一个class里面所有的原始方法提取到
                orig_method = m.get_method()
                if isinstance(orig_method, ExternalMethod):
                    is_this_external = True
                else:
                    is_this_external = False
                CFG.add_node(orig_method, external=is_this_external)
                if not isinstance(orig_method, ExternalMethod):
                    all_orig_methods.append(orig_method)
            for m in dx.find_methods(classname=amd_p1):  #用于遍历一个class里面的所有的方法
                orig_method = m.get_method()
                if not isinstance(orig_method, ExternalMethod):
                    if (orig_method.get_name() in all_callback) or (
                            orig_method.get_name()
                            in APK_Method_Key_Words.key_registers):
                        callees = []  # 用于将所有的不在这个class里面的函数保存下来
                        for other_class, callee, offset in m.get_xref_to():
                            if isinstance(callee, ExternalMethod):
                                is_external = True
                            else:
                                is_external = False
                            if callee not in CFG.node:
                                CFG.add_node(callee, external=is_external)
                                callees.append(callee)
                        for callee in callees:
                            if callee in all_orig_methods:
                                class_code_dic[k.get_name()][
                                    orig_method.get_name() + orig_method.
                                    get_descriptor()] += class_code_dic[
                                        k.get_name()][callee.get_name() +
                                                      callee.get_descriptor()]
                        print(class_code_dic[k.get_name()]
                              [orig_method.get_name() +
                               orig_method.get_descriptor()])
                        print(is_amd_class_code_dic[k.get_name()]
                              [orig_method.get_name() +
                               orig_method.get_descriptor()])

    return apk_word
Ejemplo n.º 14
0
    def testMethodRename(self):
        with open("examples/android/TestsAndroguard/bin/classes.dex",
                  "rb") as fd:
            d = dvm.DalvikVMFormat(fd.read())
            dx = analysis.Analysis(d)
            d.set_vmanalysis(dx)

            meth, = d.get_method("testDouble")
            self.assertEqual(meth.get_name(), "testDouble")
            meth.set_name("blablaMyMethod")
            self.assertEqual(meth.get_name(), "blablaMyMethod")
Ejemplo n.º 15
0
def GetAnalyzedDex(sample_name):
    a = APK(sample_name)
    d = dvm.DalvikVMFormat(a.get_dex())
    dx = analysis.Analysis(d)

    if type(dx) is list:
        dx = dx[0]

    dx.create_xref()

    return dx
Ejemplo n.º 16
0
    def testMultiDexExternal(self):
        """
        Test if classes are noted as external if not both zips are opened
        """
        from zipfile import ZipFile

        with ZipFile("examples/tests/multidex/multidex.apk") as myzip:
            c1 = myzip.read("classes.dex")
            c2 = myzip.read("classes2.dex")

        d1 = dvm.DalvikVMFormat(c1)
        d2 = dvm.DalvikVMFormat(c2)

        dx = analysis.Analysis()

        dx.add(d1)

        # Both classes should be in the analysis, but only the fist is internal
        self.assertIn("Lcom/foobar/foo/Foobar;", dx.classes)
        self.assertFalse(dx.classes["Lcom/foobar/foo/Foobar;"].is_external())
        self.assertNotIn("Lcom/blafoo/bar/Blafoo;", dx.classes)

        dx = analysis.Analysis()
        dx.add(d2)
        self.assertIn("Lcom/blafoo/bar/Blafoo;", dx.classes)
        self.assertFalse(dx.classes["Lcom/blafoo/bar/Blafoo;"].is_external())
        self.assertNotIn("Lcom/foobar/foo/Foobar;", dx.classes)

        # Now we "see" the reference to Foobar
        dx.create_xref()
        self.assertIn("Lcom/foobar/foo/Foobar;", dx.classes)
        self.assertTrue(dx.classes["Lcom/foobar/foo/Foobar;"].is_external())

        dx = analysis.Analysis()
        dx.add(d1)
        dx.add(d2)

        self.assertIn("Lcom/blafoo/bar/Blafoo;", dx.classes)
        self.assertFalse(dx.classes["Lcom/blafoo/bar/Blafoo;"].is_external())
        self.assertIn("Lcom/foobar/foo/Foobar;", dx.classes)
        self.assertFalse(dx.classes["Lcom/foobar/foo/Foobar;"].is_external())
def Get_Apk_Source_Code(apkfile):

    a = apk.APK(apkfile, False, 'r', None, 2)
    d = dvm.DalvikVMFormat(a.get_dex())
    vmx = analysis.Analysis(d)
    dp = decompiler.DecompilerDAD(d, vmx)  # DAD是androguard内部的decompiler
    #用于获取源码
    with open('F:\\2018年第一学年科研\\APK科研\\数据集\\Word2vec_ao_yuliaoku\\source.txt',
              'w') as txtfile:
        for k in d.get_classes():  # 对于每一个class
            # print(dp.get_source_class(k))
            txtfile.writelines(dp.get_source_class(k))
Ejemplo n.º 18
0
    def create_adex(self, log, dexobj):
        """
        This method is called in order to create an Analysis object

        :param log: an object which corresponds to a unique app
        :param androguard.core.bytecodes.dvm.DalvikVMFormat dexobj: a :class:`DalvikVMFormat` object

        :rytpe: a :class:`~androguard.core.analysis.analysis.Analysis` object
        """
        vm_analysis = analysis.Analysis(dexobj)
        vm_analysis.create_xref()
        return vm_analysis
Ejemplo n.º 19
0
    def create_adex(self, log, dexobj):
        """
      This method is called in order to create a VMAnalysis object

      :param log: an object which corresponds to a unique app
      :param dexobj: a :class:`DalvikVMFormat` object

      :rytpe: a :class:`Analysis` object
    """
        vm_analysis = analysis.Analysis(dexobj)
        vm_analysis.create_xref()
        return vm_analysis
Ejemplo n.º 20
0
def static_dex2(app, result):

    libs = get_libs_from_dex(app.get_filename())
    app_dex = None
    app_x = None
    try:
        app_dex = dvm.DalvikVMFormat(app.get_dex())
        app_x = analysis.Analysis(app_dex)
    except Exception as e:
        traceback.print_exc()
        apis = {}
        method_strings = {}
    if not (app_dex is None or app_x is None):
        apis = gen_apis_one_cnt(app, app_dex, app_x, libs)
        #print(len(apis))
        method_strings = gen_strings_from_dex(app, app_dex, app_x, libs)

    result['dex'] = {"apis": apis, "strings": method_strings, "libs": libs}
Ejemplo n.º 21
0
    def __init__(self, name):
        """

        :param name: filename to load
        """
        self.vma = analysis.Analysis()

        # Proper detection which supports multidex inside APK
        ftype = androconf.is_android(name)
        if ftype == 'APK':
            for d in apk.APK(name).get_all_dex():
                self.vma.add(dvm.DalvikVMFormat(d))
        elif ftype == 'DEX':
            self.vma.add(dvm.DalvikVMFormat(read(name)))
        elif ftype == 'DEY':
            self.vma.add(dvm.DalvikOdexVMFormat(read(name)))
        else:
            raise ValueError("Format not recognised for filename '%s'" % name)

        self.classes = {dvclass.orig_class.get_name(): dvclass.orig_class for dvclass in self.vma.get_classes()}
Ejemplo n.º 22
0
def compile_dex(apkfile, filtercfg):
    show_logging(level=logging.INFO)

    d = auto_vm(apkfile)
    dx = analysis.Analysis(d)

    method_filter = MethodFilter(filtercfg, d)

    compiler = Dex2C(d, dx)

    compiled_method_code = {}
    errors = []

    for m in d.get_methods():
        method_triple = get_method_triple(m)

        jni_longname = JniLongName(*method_triple)
        full_name = ''.join(method_triple)

        if len(jni_longname) > 220:
            logger.debug("name to long %s(> 220) %s" %
                         (jni_longname, full_name))
            continue

        if method_filter.should_compile(m):
            logger.debug("compiling %s" % (full_name))
            try:
                code = compiler.get_source_method(m)
            except Exception as e:
                logger.warning("compile method failed:%s (%s)" %
                               (full_name, str(e)),
                               exc_info=True)
                errors.append('%s:%s' % (full_name, str(e)))
                continue

            if code:
                compiled_method_code[method_triple] = code

    return compiled_method_code, errors
Ejemplo n.º 23
0
def Deal_one_apk(apkfile):
    a = apk.APK(apkfile, False, 'r', None, 2)
    d = dvm.DalvikVMFormat(a.get_dex())
    vmx = analysis.Analysis(d)
    dp = decompiler.DecompilerDAD(d, vmx)  # DAD是androguard内部的decompiler
    a1, d1, dx = AnalyzeAPK(apkfile)
    CFG = nx.DiGraph()
    apk_word = []
    for k in d.get_classes():
        # print('class_name:'+k.get_name())
        amd_p1 = 'Lcom/mix_four/dd/LockReceiver;'
        amd_p2 = 'Lcom/mix_four/dd/CoreService$MyOrderRunnable;'
        amd_p3 = 'Landroid/support/v4/content/IntentCompat$IntentCompatImplBase;'
        amd_p4 = 'Lcom/mix_four/dd/CoreService$MyOrderRunnable;'
        if k.get_name() == amd_p1:
            print('class_name:' + k.get_name())
            print(dp.get_source_class(k))
            for m in dx.find_methods(classname=amd_p1):
                orig_method = m.get_method()
                if isinstance(orig_method, ExternalMethod):
                    is_this_external = True
                else:
                    is_this_external = False
                CFG.add_node(orig_method, external=is_this_external)
                if is_this_external == False:  # 用于获取一个class里面的所有方法
                    print('orig::' + orig_method.get_name())
                for other_class, callee, offset in m.get_xref_to():
                    if isinstance(callee, ExternalMethod):
                        is_external = True
                    else:
                        is_external = False
                    if callee not in CFG.node:
                        CFG.add_node(callee, external=is_external)
                        if is_external == False:
                            print('external+callee::' + callee.get_name())
                        else:
                            print('callee:' + callee.get_name())
    return apk_word
Ejemplo n.º 24
0
        def analyze_dex(filename, raw=False, decompiler=None):
            """
            Analyze an android dex file and setup all stuff for a more quickly analysis !

            :param filename: the filename of the android dex file or a buffer which represents the dex file
            :type filename: string
            :param raw: True is you would like to use a buffer (optional)
            :type raw: boolean
            :param decompiler: the type of decompiler to use ("dad", "dex2jad", "ded")
            :type decompiler: string

            :rtype: return the :class:`DalvikVMFormat`, and :class:`VMAnalysis` objects
            """
            d = None
            if raw:
                d = DalvikVMFormat(filename)
            else:
                d = DalvikVMFormat(open(filename, "rb").read())
            dx = analysis.Analysis(d)
            d.set_vmanalysis(dx)
            run_decompiler(d, dx, decompiler)
            dx.create_xref()
            return d, dx
Ejemplo n.º 25
0
def Update_one_apk_dictory(apkfile):
    a = apk.APK(apkfile, False, 'r', None, 2)
    d = dvm.DalvikVMFormat(a.get_dex())
    vmx = analysis.Analysis(d)
    dp = decompiler.DecompilerDAD(d, vmx)  # DAD是androguard内部的decompiler
    a1, d1, dx = AnalyzeAPK(apkfile)
    CFG = nx.DiGraph()
    class_code_dic, is_amd_class_code_dic = Get_one_apk_dictory(apkfile)
    for k in d.get_classes():
        if k.get_name() == k.get_name():
            for m in dx.find_methods(
                    classname=k.get_name()):  # 用于遍历一个class里面的所有的方法
                orig_method = m.get_method()
                if isinstance(orig_method, ExternalMethod):
                    is_this_external = True
                else:
                    is_this_external = False
                CFG.add_node(orig_method,
                             external=is_this_external)  #将原始结点保存在CFG中
                callees = []  # 用于将所有的不在这个class里面的函数保存下来
                for other_class, callee, offset in m.get_xref_to():
                    if isinstance(callee, ExternalMethod):
                        is_external = True
                    else:
                        is_external = False
                    if callee not in CFG.node:
                        CFG.add_node(
                            callee,
                            external=is_external)  #将不在此method的所有的函数保存在CFG中
                        callees.append(callee)
                '----------提出一个问题-----对于一个class里面的函数调用另外一个class里面的函数'
                orig_method_key_words = []
                if not isinstance(orig_method,
                                  ExternalMethod):  # 用于将属于这个class的所有method提取出来
                    b = 'false'
                    orig_method_key_words += (class_code_dic[k.get_name()][
                        orig_method.get_name() + orig_method.get_descriptor()])
                    for callee in callees:  # 用于统计非该class里面的函数,并且更新origin函数的所有关键字
                        if callee.get_class_name() in class_code_dic.keys():
                            # 如果函数在字典里面,则将其关键字提取出来
                            if (callee.get_name() +
                                    callee.get_descriptor()) in class_code_dic[
                                        callee.get_class_name()].keys():

                                orig_method_key_words += class_code_dic[
                                    callee.get_class_name()][
                                        callee.get_name() +
                                        callee.get_descriptor()]

                                if is_amd_class_code_dic[callee.get_class_name(
                                )][callee.get_name() +
                                   callee.get_descriptor()] == 'true':
                                    b = 'true'
                                else:
                                    pass
                            else:
                                orig_method_key_words += (
                                    Get_Word(callee.get_name() +
                                             callee.get_descriptor()))

                    # -------用于更新一下原始函数的key值
                    class_code_dic[k.get_name()][
                        orig_method.get_name() +
                        orig_method.get_descriptor()] = orig_method_key_words
                    if b == 'true':
                        is_amd_class_code_dic[k.get_name()][
                            orig_method.get_name() +
                            orig_method.get_descriptor()] = 'true'

    return class_code_dic, is_amd_class_code_dic
Ejemplo n.º 26
0
def extract_features(file_path):
    result = {}
    print('1.1')
    a, d, dx = AnalyzeAPK(file_path)
    print('1.2')
    vm = dvm.DalvikVMFormat(a.get_dex())
    print('1.3')
    vmx = analysis.Analysis(vm)
    print('1.4')
    #d.set_vmanalysis(dx)
    print('1.5')
    #d.set_decompiler(DecompilerDAD(d, dx))
    print('1.6')

    result['android_version_code'] = a.get_androidversion_code()
    result['android_version_name'] = a.get_androidversion_name()
    result['max_sdk'] = a.get_max_sdk_version()
    result['min_sdk'] = a.get_min_sdk_version()
    result['libraries'] = a.get_libraries()
    result['filename'] = a.get_filename()
    result['target_sdk'] = a.get_target_sdk_version()
    result['md5'] = hashlib.md5(a.get_raw()).hexdigest()
    result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest()
    result['permissions'] = a.get_permissions()
    result['activities'] = a.get_activities()
    result['providers'] = a.get_providers()
    result['services'] = a.get_services()
    result['strings'] = dx.get_strings()
    result['class_names'] = [c.name for c in dx.get_classes()]
    result['method_names'] = [m.name for m in dx.get_methods()]
    result['field_names'] = [f.name for f in dx.get_fields()]
    result['is_native_code'] = 1
    result['is_obfuscation'] = 1
    result['is_crypto_code'] = 1
    result['is_dyn_code'] = 1
    result['is_reflection_code'] = 1
    result['is_database'] = 1

    s_list = []
    s_list.extend(result['class_names'])
    s_list.extend(result['method_names'])
    s_list.extend(result['field_names'])
    result['entropy_rate'] = entropy_rate(s_list)

    result['feature_vectors'] = {}

    # Search for the presence of api calls in a given apk
    result['feature_vectors']['api_calls'] = []
    for call in API_CALLS:
        status = 1 if dx.find_methods(".", call, ".") else 0
        result['feature_vectors']['api_calls'].append(status)

    # Search for the presence of permissions in a given apk
    result['feature_vectors']['permissions'] = []
    for permission in PERMISSIONS:
        status = 1 if permission in result['permissions'] else 0
        result['feature_vectors']['permissions'].append(status)

    result['feature_vectors']['special_strings'] = []
    for word in SPECIAL_STRINGS:
        status = 1 if dx.find_strings(word) else 0
        result['feature_vectors']['special_strings'].append(status)

    return result
Ejemplo n.º 27
0
    def testXrefs(self):
        """Test if XREFs produce the correct results"""
        with open("examples/android/TestsAndroguard/bin/classes.dex",
                  "rb") as fd:
            d = dvm.DalvikVMFormat(fd.read())
            dx = analysis.Analysis(d)

        dx.create_xref()

        testcls = dx.classes['Ltests/androguard/TestActivity;']
        self.assertIsInstance(testcls, analysis.ClassAnalysis)

        testmeth = list(
            filter(lambda x: x.name == 'onCreate', testcls.get_methods()))[0]

        self.assertEqual(
            len(list(dx.find_methods(testcls.name, '^onCreate$'))), 1)
        self.assertEqual(
            list(dx.find_methods(testcls.name, '^onCreate$'))[0], testmeth)

        self.assertIsInstance(testmeth, analysis.MethodClassAnalysis)
        self.assertFalse(testmeth.is_external())
        self.assertIsInstance(testmeth.method, dvm.EncodedMethod)
        self.assertEquals(testmeth.name, 'onCreate')

        xrefs = list(
            map(
                lambda x: x.full_name,
                map(itemgetter(1),
                    sorted(testmeth.get_xref_to(), key=itemgetter(2)))))
        self.assertEqual(len(xrefs), 5)

        # First, super is called:
        self.assertEquals(
            xrefs.pop(0),
            'Landroid/app/Activity; onCreate (Landroid/os/Bundle;)V')
        # then setContentView (which is in the current class but the method is external)
        self.assertEquals(
            xrefs.pop(0),
            'Ltests/androguard/TestActivity; setContentView (I)V')
        # then getApplicationContext (inside the Toast)
        self.assertEquals(
            xrefs.pop(0),
            'Ltests/androguard/TestActivity; getApplicationContext ()Landroid/content/Context;'
        )
        # then Toast.makeText
        self.assertEquals(
            xrefs.pop(0),
            'Landroid/widget/Toast; makeText (Landroid/content/Context; Ljava/lang/CharSequence; I)Landroid/widget/Toast;'
        )
        # then show()
        self.assertEquals(xrefs.pop(0), 'Landroid/widget/Toast; show ()V')

        # Now, test if the reverse is true
        other = list(dx.find_methods('^Landroid/app/Activity;$', '^onCreate$'))
        self.assertEquals(len(other), 1)
        self.assertIsInstance(other[0], analysis.MethodClassAnalysis)
        self.assertTrue(other[0].is_external())
        self.assertTrue(other[0].is_android_api())
        self.assertIn(testmeth.method,
                      map(itemgetter(1), other[0].get_xref_from()))

        other = list(
            dx.find_methods('^Ltests/androguard/TestActivity;$',
                            '^setContentView$'))
        # External because not overwritten in class:
        self.assertEquals(len(other), 1)
        self.assertIsInstance(other[0], analysis.MethodClassAnalysis)
        self.assertTrue(other[0].is_external())
        self.assertFalse(other[0].is_android_api())
        self.assertIn(testmeth.method,
                      map(itemgetter(1), other[0].get_xref_from()))

        other = list(
            dx.find_methods('^Ltests/androguard/TestActivity;$',
                            '^getApplicationContext$'))
        # External because not overwritten in class:
        self.assertEquals(len(other), 1)
        self.assertIsInstance(other[0], analysis.MethodClassAnalysis)
        self.assertTrue(other[0].is_external())
        self.assertFalse(other[0].is_android_api())
        self.assertIn(testmeth.method,
                      map(itemgetter(1), other[0].get_xref_from()))

        other = list(dx.find_methods('^Landroid/widget/Toast;$', '^makeText$'))
        self.assertEquals(len(other), 1)
        self.assertIsInstance(other[0], analysis.MethodClassAnalysis)
        self.assertTrue(other[0].is_external())
        self.assertTrue(other[0].is_android_api())
        self.assertIn(testmeth.method,
                      map(itemgetter(1), other[0].get_xref_from()))

        other = list(dx.find_methods('^Landroid/widget/Toast;$', '^show$'))
        self.assertEquals(len(other), 1)
        self.assertIsInstance(other[0], analysis.MethodClassAnalysis)
        self.assertTrue(other[0].is_external())
        self.assertTrue(other[0].is_android_api())
        self.assertIn(testmeth.method,
                      map(itemgetter(1), other[0].get_xref_from()))

        # Next test internal calls
        testmeth = list(
            filter(lambda x: x.name == 'testCalls', testcls.get_methods()))[0]

        self.assertEqual(
            len(list(dx.find_methods(testcls.name, '^testCalls$'))), 1)
        self.assertEqual(
            list(dx.find_methods(testcls.name, '^testCalls$'))[0], testmeth)

        self.assertIsInstance(testmeth, analysis.MethodClassAnalysis)
        self.assertFalse(testmeth.is_external())
        self.assertIsInstance(testmeth.method, dvm.EncodedMethod)
        self.assertEquals(testmeth.name, 'testCalls')

        xrefs = list(
            map(
                lambda x: x.full_name,
                map(itemgetter(1),
                    sorted(testmeth.get_xref_to(), key=itemgetter(2)))))
        self.assertEqual(len(xrefs), 4)

        self.assertEquals(xrefs.pop(0),
                          'Ltests/androguard/TestActivity; testCall2 (J)V')
        self.assertEquals(xrefs.pop(0),
                          'Ltests/androguard/TestIfs; testIF (I)I')
        self.assertEquals(xrefs.pop(0),
                          'Ljava/lang/Object; getClass ()Ljava/lang/Class;')
        self.assertEquals(
            xrefs.pop(0),
            'Ljava/io/PrintStream; println (Ljava/lang/Object;)V')

        other = list(
            dx.find_methods('^Ltests/androguard/TestActivity;$',
                            '^testCall2$'))
        self.assertEquals(len(other), 1)
        self.assertIsInstance(other[0], analysis.MethodClassAnalysis)
        self.assertFalse(other[0].is_external())
        self.assertFalse(other[0].is_android_api())
        self.assertIn(testmeth.method,
                      map(itemgetter(1), other[0].get_xref_from()))

        other = list(
            dx.find_methods('^Ltests/androguard/TestIfs;$', '^testIF$'))
        self.assertEquals(len(other), 1)
        self.assertIsInstance(other[0], analysis.MethodClassAnalysis)
        self.assertFalse(other[0].is_external())
        self.assertFalse(other[0].is_android_api())
        self.assertIn(testmeth.method,
                      map(itemgetter(1), other[0].get_xref_from()))

        other = list(dx.find_methods('^Ljava/lang/Object;$', '^getClass$'))
        self.assertEquals(len(other), 1)
        self.assertIsInstance(other[0], analysis.MethodClassAnalysis)
        self.assertTrue(other[0].is_external())
        self.assertTrue(other[0].is_android_api())
        self.assertIn(testmeth.method,
                      map(itemgetter(1), other[0].get_xref_from()))
Ejemplo n.º 28
0
 def testDex(self):
     with open("examples/android/TestsAndroguard/bin/classes.dex",
               "rb") as fd:
         d = dvm.DalvikVMFormat(fd.read())
         dx = analysis.Analysis(d)
         self.assertIsInstance(dx, analysis.Analysis)
Ejemplo n.º 29
0
def Get_one_apk_dictory(apkfile):
    a = apk.APK(apkfile, False, 'r', None, 2)
    d = dvm.DalvikVMFormat(a.get_dex())
    vmx = analysis.Analysis(d)
    dp = decompiler.DecompilerDAD(d, vmx)  # DAD是androguard内部的decompiler
    a1, d1, dx = AnalyzeAPK(apkfile)
    CFG = nx.DiGraph()
    class_code_dic = {}  # 用于将class里面的所有关键的代码保存下来
    # 其格式为 class_code_dic={'class1':'key_class1','class2':'key_class2'}
    is_amd_class_code_dic = {}  #用于将class里面的所有method里面是否含有amd数据保存下来
    for k in d.get_classes():
        method_dic = {
        }  # 用于将一个方法里面的所有关键代码保存下来,其形式是method_dic={'method1':'key_word1','method2':'....}
        is_amd_method_dic = {}
        # print('class_name:' + k.get_name())
        # print(dp.get_source_class(k))
        for m in dx.find_methods(classname=k.get_name()):
            orig_method = m.get_method()
            if isinstance(orig_method, ExternalMethod):  #将原始结点保存在CFG中
                is_this_external = True
            else:
                is_this_external = False
            CFG.add_node(orig_method, external=is_this_external)
            callees = []  # 用于将所有的不在这个class里面的函数保存下来
            for other_class, callee, offset in m.get_xref_to():
                if isinstance(callee, ExternalMethod):
                    is_external = True
                else:
                    is_external = False
                if callee not in CFG.node:  #将非原始结点,即外部的结点保存在CFG中
                    CFG.add_node(callee, external=is_external)
                    callees.append(callee)
            orig_method_code = []
            orig_method_key_words = []
            if not isinstance(orig_method,
                              ExternalMethod):  # 用于将这个class创建的所有函数获取到
                orig_method_code = orig_method.get_source().split('\n')
                orig_method_code = [i.strip() for i in orig_method_code]
                orig_method_key_words += Get_Word(orig_method.get_name() +
                                                  '==' +
                                                  orig_method.get_descriptor())
                for callee in callees:
                    if not isinstance(callee, ExternalMethod):
                        if callee.get_name() != orig_method.get_name():
                            callee_code = callee.get_source().split('\n')
                            callee_code = [i.strip() for i in callee_code]
                            orig_method_code += callee_code  # 得到一个method的内部的所有的源代码
                            orig_method_key_words += Get_Word(
                                callee.get_name() + '==' +
                                callee.get_descriptor())
            if not isinstance(
                    orig_method,
                    ExternalMethod):  # 如果在一个class里面的method是非外部方法,则提取其内部代码
                amd_num = 0  # 用于统计这个method是否是包含amd的method
                # --------------用于判断amd数据---------------
                for key in APK_Method_Key_Words.amd_key_words:
                    for code in orig_method_code:  # 得到一个method内部的所有的源码
                        if key in code:
                            amd_num += 1
                            orig_method_key_words += Get_Word(key)
                #  ----------用于判断intent代理机制-------------
                for key in APK_Method_Key_Words.key_Intent:
                    for code in orig_method_code:  # 得到一个method内部的所有的源码
                        if key in code:
                            key_word = Get_Word(key)
                            key_word.append('Rbracket')
                            orig_method_key_words += key_word
                if amd_num > 0:
                    #  用于判断此函数里面是否含有恶意代码
                    is_amd_method_dic[orig_method.get_name() +
                                      orig_method.get_descriptor()] = 'true'
                else:
                    is_amd_method_dic[orig_method.get_name() +
                                      orig_method.get_descriptor()] = 'false'
                method_dic[
                    orig_method.get_name() +
                    orig_method.get_descriptor()] = orig_method_key_words
        class_code_dic[k.get_name()] = method_dic  # 用于将一个class里面的所有关键代码保存下来
        is_amd_class_code_dic[k.get_name()] = is_amd_method_dic
    # print(is_amd_class_code_dic)
    # print(class_code_dic)
    #用于将俩个字典返回,目的是为了进行后面的工作
    return class_code_dic, is_amd_class_code_dic
Ejemplo n.º 30
0
def extract_features(file_path):
    result = {}
    try:
        a = APK(file_path)
        d = DalvikVMFormat(a.get_dex())
        dx = Analysis(d)
        vm = dvm.DalvikVMFormat(a.get_dex())
        vmx = analysis.Analysis(vm)
        d.set_vmanalysis(dx)
        d.set_decompiler(DecompilerDAD(d, dx))
    except Exception as e:
        print e
        return None

    result['android_version_code'] = a.get_androidversion_code()
    result['android_version_name'] = a.get_androidversion_name()
    result['max_sdk'] = a.get_max_sdk_version()
    result['min_sdk'] = a.get_min_sdk_version()
    result['libraries'] = a.get_libraries()
    result['filename'] = a.get_filename()
    result['target_sdk'] = a.get_target_sdk_version()
    result['md5'] = hashlib.md5(a.get_raw()).hexdigest()
    result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest()
    result['permissions'] = a.get_permissions()
    result['activities'] = a.get_activities()
    result['providers'] = a.get_providers()
    result['services'] = a.get_services()
    result['strings'] = d.get_strings()
    result['class_names'] = [c.get_name() for c in d.get_classes()]
    result['method_names'] = [m.get_name() for m in d.get_methods()]
    result['field_names'] = [f.get_name() for f in d.get_fields()]
    # result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0
    result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0
    # result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0
    # result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0
    # result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0
    result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0

    s_list = []
    s_list.extend(result['class_names'])
    s_list.extend(result['method_names'])
    s_list.extend(result['field_names'])
    result['entropy_rate'] = entropy_rate(s_list)

    result['feature_vectors'] = {}

    result['feature_vectors']['api_calls'] = []
    for call in API_CALLS:
        status = 1 if dx.get_method_by_name(".", call, ".") else 0
        result['feature_vectors']['api_calls'].append(status)

    result['feature_vectors']['permissions'] = []
    for permission in PERMISSIONS:
        status = 1 if permission in result['permissions'] else 0
        result['feature_vectors']['permissions'].append(status)

    result['feature_vectors']['special_strings'] = []
    for word in SPECIAL_STRINGS:
        status = 1 if d.get_regex_strings(word) else 0
        result['feature_vectors']['special_strings'].append(status)

    result['feature_vectors']['others'] = [
        # result['is_reflection_code'],
        # result['is_crypto_code'],
        # result['is_native_code'],
        result['is_obfuscation'],
        result['is_database'],
        # result['is_dyn_code']
    ]

    return result