def get_apis(path):

    application = APK(path)
    application_dex = DalvikVMFormat(application.get_dex())
    application_x = Analysis(application_dex)

    methods = set()
    cs = [cc.get_name() for cc in application_dex.get_classes()]

    for method in application_dex.get_methods():
        g = application_x.get_method(method)

        if method.get_code() == None:
            continue

        for i in g.get_basic_blocks().get():
            for ins in i.get_instructions():

                output = ins.get_output()
                match = re.search(r'(L[^;]*;)->[^\(]*\([^\)]*\).*', output)
                if match and match.group(1) not in cs:
                    methods.add(match.group())

    methods = list(methods)
    return methods
def extract_features(file_path):
    result = {}
    try:
        a = APK(file_path)
        d = DalvikVMFormat(a.get_dex())
        dx = Analysis(d)
        vm = dvm.DalvikVMFormat(a.get_dex())
        #vmx = analysis.uVMAnalysis(vm)
        d.set_vmanalysis(dx)
        d.set_decompiler(DecompilerDAD(d, dx))
    except:
        return None

    result['android_version_code'] = a.get_androidversion_code()
    result['android_version_name'] = a.get_androidversion_name()
    result['max_sdk'] = a.get_max_sdk_version()
    result['min_sdk'] = a.get_min_sdk_version()
    #result['libraries'] = a.get_libraries()
    result['filename'] = a.get_filename()
    result['target_sdk'] = a.get_target_sdk_version()
    result['md5'] = hashlib.md5(a.get_raw()).hexdigest()
    result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest()
    result['permissions'] = a.get_permissions()
    result['activities'] = a.get_activities()
    result['providers'] = a.get_providers()
    result['services'] = a.get_services()
    result['strings'] = d.get_strings()
    result['class_names'] = [c.get_name() for c in d.get_classes()]
    result['method_names'] = [m.get_name() for m in d.get_methods()]
    result['field_names'] = [f.get_name() for f in d.get_fields()]
    #result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0
    result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0
    #result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0
    '''result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0
    result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0'''
    result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0
    arr = []
    s = a.get_elements("action", "name")
    for i in s:
        arr.append(i)

    result['intents'] = arr

    s_list = []
    s_list.extend(result['class_names'])
    s_list.extend(result['method_names'])
    s_list.extend(result['field_names'])
    result['entropy_rate'] = entropy_rate(s_list)

    result['feature_vectors'] = {}

    # Search for the presence of api calls in a given apk
    result['feature_vectors']['api_calls'] = []
    for call in API_CALLS:
        status = 1 if dx.get_method(call) else 0
        result['feature_vectors']['api_calls'].append(status)

    # Search for the presence of permissions in a given apk
    result['feature_vectors']['permissions'] = []
    for permission in PERMISSIONS:
        status = 1 if permission in result['permissions'] else 0
        result['feature_vectors']['permissions'].append(status)

    #Search for the presence of intents in a given apk
    result['feature_vectors']['intents'] = []
    n = len(INTENTS)
    m = len(result['intents'])
    for i in range(n):
        stri = INTENTS[i]
        flg = False
        for j in range(m):
            if stri in result['intents'][j]:
                flg = True
                break
        if flg:
            status = 1
        else:
            status = 0
        result['feature_vectors']['intents'].append(status)

    #Check for special strings in code
    result['feature_vectors']['special_strings'] = []
    for word in SPECIAL_STRINGS:
        status = 1 if d.get_regex_strings(word) else 0
        result['feature_vectors']['special_strings'].append(status)

    return result
Esempio n. 3
0
class PDG():
    def __init__(self, filename):
        """

        :type self: object
        """
        self.filename = filename
        try:
            self.a = APK(filename)
            self.d = DalvikVMFormat(self.a.get_dex())
            self.d.create_python_export()
            self.dx = Analysis(self.d)
        except zipfile.BadZipfile:
            # if file is not an APK, may be a dex object
            _, self.d, self.dx = AnalyzeDex(self.filename)

        self.d.set_vmanalysis(self.dx)
        self.dx.create_xref()
        self.fcg = self.dx.get_call_graph()
        self.icfg = self.build_icfg()

    def get_graph(self):
        return self.icfg

    def build_icfg(self):
        icfg = nx.DiGraph()
        methods = self.d.get_methods()
        for method in methods:
            for bb in self.dx.get_method(method).basic_blocks.get():
                children = []
                label = self.get_bb_label(bb)
                children = self.get_children(bb, self.dx)
                icfg.add_node(label)
                icfg.add_edges_from([(label, child) for child in children])
        return icfg

    def get_bb_label(self, bb):
        """ Return the descriptive name of a basic block
        """
        return self.get_method_label(bb.method) + (bb.name, )

    def get_method_label(self, method):
        """ Return the descriptive name of a method
        """
        return (method.get_class_name(), method.get_name(),
                method.get_descriptor())

    def get_children(self, bb, dx):
        """ Return the labels of the basic blocks that are children of the
        input basic block in and out of its method
        """
        return self.get_bb_intra_method_children(
            bb) + self.get_bb_extra_method_children(bb, dx)

    def get_bb_intra_method_children(self, bb):
        """ Return the labels of the basic blocks that are children of the
        input basic block within a method
        """
        child_labels = []
        for c_in_bb in bb.get_next():
            next_bb = c_in_bb[2]
            child_labels.append(self.get_bb_label(next_bb))
        return child_labels

    def get_bb_extra_method_children(self, bb, dx):
        """ Given a basic block, find the calls to external methods and
        return the label of the first basic block in these methods
        """

        call_labels = []
        # iterate over calls from bb method to external methods
        try:
            xrefs = dx.get_method_analysis(bb.method).get_xref_to()
        except AttributeError:
            return call_labels
        for xref in xrefs:
            remote_method_offset = xref[2]
            if self.call_in_bb(bb, remote_method_offset):
                try:
                    remote_method = dx.get_method(
                        self.d.get_method_by_idx(remote_method_offset))
                    if remote_method:
                        remote_bb = next(remote_method.basic_blocks.get())
                        call_labels.append(self.get_bb_label(remote_bb))
                except StopIteration:
                    pass
        return call_labels

    def call_in_bb(self, bb, idx):
        return bb.get_start() <= idx <= bb.get_end()
Esempio n. 4
0
def main():
    for path in samples():
        print(path)
        logging.error("Processing" + path)

        tests_apk = [
            "is_valid_APK", "get_filename", "get_app_name", "get_app_icon",
            "get_package", "get_androidversion_code",
            "get_androidversion_name", "get_files", "get_files_types",
            "get_files_crc32", "get_files_information", "get_raw", "get_dex",
            "get_all_dex", "get_main_activity", "get_activities",
            "get_services", "get_receivers", "get_providers",
            "get_permissions", "get_details_permissions",
            "get_requested_aosp_permissions",
            "get_requested_aosp_permissions_details",
            "get_requested_third_party_permissions",
            "get_declared_permissions", "get_declared_permissions_details",
            "get_max_sdk_version", "get_min_sdk_version",
            "get_target_sdk_version", "get_libraries",
            "get_android_manifest_axml", "get_android_manifest_xml",
            "get_android_resources", "get_signature_name",
            "get_signature_names", "get_signature", "get_signatures"
        ]

        tests_dex = [
            "get_api_version",
            "get_classes_def_item",
            "get_methods_id_item",
            "get_fields_id_item",
            "get_codes_item",
            "get_string_data_item",
            "get_debug_info_item",
            "get_header_item",
            "get_class_manager",
            "show",
            # "save",  # FIXME broken
            "get_classes_names",
            "get_classes",
            "get_all_fields",
            "get_fields",
            "get_methods",
            "get_len_methods",
            "get_strings",
            "get_format_type",
            "create_python_export",
            "get_BRANCH_DVM_OPCODES",
            "get_determineNext",
            "get_determineException",
            "print_classes_hierarchy",
            "list_classes_hierarchy",
            "get_format"
        ]

        try:
            # Testing APK
            a = APK(path)
            for t in tests_apk:
                print(t)
                x = getattr(a, t)
                try:
                    x()
                except Exception as aaa:
                    print(aaa)
                    traceback.print_exc()
                    print(path, aaa, file=sys.stderr)
                    logging.exception("{} .. {}".format(path, t))

            # Testing DEX
            dx = Analysis()
            for dex in a.get_all_dex():
                d = DalvikVMFormat(dex)
                dx.add(d)

                # Test decompilation
                for c in d.get_classes():
                    for m in c.get_methods():
                        mx = dx.get_method(m)
                        ms = DvMethod(mx)
                        try:
                            ms.process(doAST=True)
                        except Exception as aaa:
                            print(aaa)
                            traceback.print_exc()
                            print(path, aaa, file=sys.stderr)
                            logging.exception("{} .. {} .. {}".format(
                                path, c.get_name(), m.get_name()))
                        ms2 = DvMethod(mx)
                        try:
                            ms2.process(doAST=False)
                        except Exception as aaa:
                            print(aaa)
                            traceback.print_exc()
                            print(path, aaa, file=sys.stderr)
                            logging.exception("{} .. {} .. {}".format(
                                path, c.get_name(), m.get_name()))

                # DEX tests
                for t in tests_dex:
                    print(t)
                    x = getattr(d, t)
                    try:
                        x()
                    except Exception as aaa:
                        print(aaa)
                        traceback.print_exc()
                        print(path, aaa, file=sys.stderr)
                        logging.exception("{} .. {}".format(path, t))

            # Analysis Tests
            try:
                dx.create_xref()
            except Exception as aaa:
                print(aaa)
                traceback.print_exc()
                print(path, aaa, file=sys.stderr)
                logging.exception("{} .. {} at Analysis".format(path, t))

            # MethodAnalysis tests
            for m in dx.methods.values():
                for bb in m.get_basic_blocks():
                    try:
                        list(bb.get_instructions())
                    except Exception as aaa:
                        print(aaa)
                        traceback.print_exc()
                        print(path, aaa, file=sys.stderr)
                        logging.exception("{} .. {} at BasicBlock {}".format(
                            path, t, m))

        except KeyboardInterrupt:
            raise
        except FileNotFoundError:
            pass
        except Exception as e:
            print(e)
            traceback.print_exc()
            print(path, e, file=sys.stderr)
            logging.exception(path)
def get_apis(path):
    methods = set()
    error_file = open("error_files.txt", "w")

    """
    Get the APIs from an application.
    Parameters:
      path - The path of the application to be decompiled
    Returns:
      A sorted list of APIs with parameters
    """
    try:
        # You can see the documents of androguard to get the further details
        # of the decompilation procedures.
        # 获取APK文件对象
        # application:表示APK对象,在其中可以找到有关 APK 的信息,例如包名、权限、AndroidManifest.xml、resources
        # application_dex:DalvikVMFormat 对象数组,DalvikVMFormat 对应 apk 文件中的 dex 文件,从 dex 文件中我们可以获取类、方法和字符串。
        # application_x:表示 Analysis 对象,其包含链接了关于 classes.dex 信息的特殊的类,甚至可以一次处理许多 dex 文件。
        application = APK(path)
        application_dex = DalvikVMFormat(application.get_dex())
        application_x = Analysis(application_dex)

        # 获得class 对象
        classesList = [classes.get_name() for classes in application_dex.get_classes()]
        # print("classesList:", classesList)
        # 获得methods方法
        for method in application_dex.get_methods():
            methodAnalysis = application_x.get_method(method)
            if method.get_code() is None:
                continue

            for basicBlocks in methodAnalysis.get_basic_blocks().get():
                # 获得jvm指令
                for instructions in basicBlocks.get_instructions():
                    #  这是一个包含方法,变量或其他任何内容的字符串
                    output = instructions.get_output()
                    # print("output", output)

                    # Here we use regular expression to check if it is a function
                    # call. A function call comprises four parts: a class name, a
                    # function name, zero or more parameters, and a return type.
                    # The pattern is actually simple:
                    #
                    #      CLASS NAME: starts with a character L and ends in a right
                    #                  arrow.
                    #   FUNCTION NAME: starts with the right arrow and ends in a
                    #                  left parenthesis.
                    #      PARAMETERS: are between the parentheses.
                    #     RETURN TYPE: is the rest of the string.
                    #
                    # 这里拿到的classList是应用本身的类,第二个匹配的组(一个括号一个组)是调用的类不是应用本身的类,是系统的类
                    # 这里就是通过系统接口调用来做判断。
                    match = re.search(r'(L[^;]*;)->[^\(]*\([^\)]*\).*', output)
                    # print("----")
                    # if match:
                    #     log(match.group() + "----" + match.group(1), "->" + (match.group(1) not in classesList))
                    # print(match.group() + "----" + match.group(1), "->", (match.group(1) not in classesList))
                    # print("isInClassList:", match.group(1), "->", (match.group(1) not in classesList))

                    # match.group():Landroid/app/IntentService;-><init>(Ljava/lang/String;)V
                    # match.group(1):Landroid/app/IntentService;
                    if match and match.group(1) not in classesList:
                        # print(match.group() + "----" + match.group(1))
                        methods.add(match.group())
        methods = list(methods)

    except Exception as e:
        print(e)
        print("path", path)
        error_file.write(path)

    return methods
def main():
    for path in samples():
        print(path)
        logging.error("Processing" + path)

        tests_apk = ["is_valid_APK", "get_filename", "get_app_name", "get_app_icon",
                 "get_package", "get_androidversion_code", "get_androidversion_name",
                 "get_files", "get_files_types", "get_files_crc32", "get_files_information",
                 "get_raw", "get_dex", "get_all_dex", "get_main_activity",
                 "get_activities", "get_services", "get_receivers", "get_providers",
                 "get_permissions", "get_details_permissions", "get_requested_aosp_permissions",
                 "get_requested_aosp_permissions_details", "get_requested_third_party_permissions",
                 "get_declared_permissions", "get_declared_permissions_details", "get_max_sdk_version",
                 "get_min_sdk_version", "get_target_sdk_version", "get_libraries", "get_android_manifest_axml",
                 "get_android_manifest_xml", "get_android_resources", "get_signature_name", "get_signature_names",
                 "get_signature", "get_signatures"]

        tests_dex = ["get_api_version", "get_classes_def_item", "get_methods_id_item", "get_fields_id_item",
                     "get_codes_item", "get_string_data_item",
                     "get_debug_info_item", "get_header_item", "get_class_manager", "show",
                     "save", "get_classes_names", "get_classes",
                     "get_all_fields", "get_fields", "get_methods", "get_len_methods",
                     "get_strings", "get_format_type", "create_python_export",
                     "get_BRANCH_DVM_OPCODES", "get_determineNext",
                     "get_determineException", "print_classes_hierarchy",
                     "list_classes_hierarchy", "get_format"]

        try:
            # Testing APK
            a = APK(path)
            for t in tests_apk:
                print(t)
                x = getattr(a, t)
                try:
                    x()
                except Exception as aaa:
                    print(aaa)
                    traceback.print_exc()
                    print(path, aaa, file=sys.stderr)
                    logging.exception("{} .. {}".format(path, t))


            # Testing DEX
            for dex in a.get_all_dex():
                d = DalvikVMFormat(dex)
                dx = Analysis(d)
                d.set_vmanalysis(dx)

                # Test decompilation
                for c in d.get_classes():
                    for m in c.get_methods():
                        mx = dx.get_method(m)
                        ms = DvMethod(mx)
                        try:
                            ms.process(doAST=True)
                        except Exception as aaa:
                            print(aaa)
                            traceback.print_exc()
                            print(path, aaa, file=sys.stderr)
                            logging.exception("{} .. {} .. {}".format(path, c.get_name(), m.get_name()))
                        ms2 = DvMethod(mx)
                        try:
                            ms2.process(doAST=False)
                        except Exception as aaa:
                            print(aaa)
                            traceback.print_exc()
                            print(path, aaa, file=sys.stderr)
                            logging.exception("{} .. {} .. {}".format(path, c.get_name(), m.get_name()))

                # Other tests
                for t in tests_dex:
                    print(t)
                    x = getattr(d, t)
                    try:
                        x()
                    except Exception as aaa:
                        print(aaa)
                        traceback.print_exc()
                        print(path, aaa, file=sys.stderr)
                        logging.exception("{} .. {}".format(path, t))

        except KeyboardInterrupt:
            raise
        except FileNotFoundError:
            pass
        except Exception as e:
            print(e)
            traceback.print_exc()
            print(path, e, file=sys.stderr)
            logging.exception(path)