예제 #1
0
    def __edit_text_analyse__(self, a: apk.APK, d: DalvikVMFormat,
                              dx: Analysis):

        # load keywords
        with open(
                os.path.join(os.path.dirname(__file__),
                             "assets" + os.path.sep + 'pii_keywords.txt'),
                'r') as file:
            keywords: [str] = file.read().splitlines(False)

        # parse flowdroid results
        folder = os.path.join(os.path.dirname(__file__),
                              "results" + os.path.sep + "flowdroid")

        file_name = a.get_filename().split(os.path.sep)[-1][:-4]
        fd_path = os.path.join(folder, file_name + ".xml")

        self.leak_id_names: [str] = []

        if not os.path.exists(fd_path):
            print("Flowdroid result doesn't exist")
            return

        resource_list = self.__analyse_flowdroid_result__(fd_path)

        resource_ids = []
        for s_id, s_method, s_statement, sink_method, sink_statement in resource_list:
            resource_ids.append(s_id)

        # find Resource class
        package_name = a.get_package()
        package_name = package_name.replace(".", "/")

        cls: ClassDefItem = d.get_class("L" + package_name +
                                        "/R$id;")  # find resource id
        if cls is None:
            print("This application doesn't have an R class")
            return

        fields: [EncodedField] = cls.get_fields()

        # match ids with keywords
        for field in fields:
            field: EncodedField = field
            value: EncodedValue = field.get_init_value()

            # resource id -> resource name
            the_value = str(value.get_value())
            field_name = field.get_name()
            if the_value in resource_ids and field_name in keywords:
                self.leak_id_names.append(field_name)
def extract_features(file_path):
    result = {}
    try:
        a = APK(file_path)
        d = DalvikVMFormat(a.get_dex())
        dx = Analysis(d)
        vm = dvm.DalvikVMFormat(a.get_dex())
        #vmx = analysis.uVMAnalysis(vm)
        d.set_vmanalysis(dx)
        d.set_decompiler(DecompilerDAD(d, dx))
    except:
        return None

    result['android_version_code'] = a.get_androidversion_code()
    result['android_version_name'] = a.get_androidversion_name()
    result['max_sdk'] = a.get_max_sdk_version()
    result['min_sdk'] = a.get_min_sdk_version()
    #result['libraries'] = a.get_libraries()
    result['filename'] = a.get_filename()
    result['target_sdk'] = a.get_target_sdk_version()
    result['md5'] = hashlib.md5(a.get_raw()).hexdigest()
    result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest()
    result['permissions'] = a.get_permissions()
    result['activities'] = a.get_activities()
    result['providers'] = a.get_providers()
    result['services'] = a.get_services()
    result['strings'] = d.get_strings()
    result['class_names'] = [c.get_name() for c in d.get_classes()]
    result['method_names'] = [m.get_name() for m in d.get_methods()]
    result['field_names'] = [f.get_name() for f in d.get_fields()]
    #result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0
    result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0
    #result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0
    '''result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0
    result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0'''
    result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0
    arr = []
    s = a.get_elements("action", "name")
    for i in s:
        arr.append(i)

    result['intents'] = arr

    s_list = []
    s_list.extend(result['class_names'])
    s_list.extend(result['method_names'])
    s_list.extend(result['field_names'])
    result['entropy_rate'] = entropy_rate(s_list)

    result['feature_vectors'] = {}

    # Search for the presence of api calls in a given apk
    result['feature_vectors']['api_calls'] = []
    for call in API_CALLS:
        status = 1 if dx.get_method(call) else 0
        result['feature_vectors']['api_calls'].append(status)

    # Search for the presence of permissions in a given apk
    result['feature_vectors']['permissions'] = []
    for permission in PERMISSIONS:
        status = 1 if permission in result['permissions'] else 0
        result['feature_vectors']['permissions'].append(status)

    #Search for the presence of intents in a given apk
    result['feature_vectors']['intents'] = []
    n = len(INTENTS)
    m = len(result['intents'])
    for i in range(n):
        stri = INTENTS[i]
        flg = False
        for j in range(m):
            if stri in result['intents'][j]:
                flg = True
                break
        if flg:
            status = 1
        else:
            status = 0
        result['feature_vectors']['intents'].append(status)

    #Check for special strings in code
    result['feature_vectors']['special_strings'] = []
    for word in SPECIAL_STRINGS:
        status = 1 if d.get_regex_strings(word) else 0
        result['feature_vectors']['special_strings'].append(status)

    return result
예제 #3
0
class XAPK:
    def __init__(self, folder):
        self.folder = Path(folder)
        for x in self.folder.glob('*.apk'):
            self.apk_src = Path(x)
            break
        for x in self.folder.glob('*.obb'):
            self.obb_src = Path(x)
            break
        self.apk = APK(self.apk_src)
        self.manifest = self.make_manifest()
        self.icon = self.apk.get_file(self.apk.get_app_icon())

    def make_manifest(self):
        apk_size = self.apk_src.stat().st_size
        if self.obb_src:
            obb_size = self.obb_src.stat().st_size
        else:
            obb_size = 0
        total_size = apk_size + obb_size
        filename = self.apk.get_filename()

        manifest = {}
        manifest['xapk_version'] = 1
        manifest['package_name'] = self.apk.get_package()
        manifest['name'] = self.apk.get_app_name()
        # manifest['locales_name'] = {} # TODO
        manifest['version_code'] = self.apk.get_androidversion_code()
        manifest['version_name'] = self.apk.get_androidversion_name()
        manifest['min_sdk_version'] = self.apk.get_min_sdk_version()
        manifest['target_sdk_version'] = self.apk.get_target_sdk_version()
        manifest['permissions'] = self.apk.get_declared_permissions()
        manifest['total_size'] = total_size
        manifest['expansions'] = []

        if obb_size:
            main_obb = {}
            main_obb[
                'file'] = 'Android/obb/{package_name}/main.{version_code}.{package_name}.obb'.format(
                    **manifest)
            main_obb['install_location'] = 'EXTERNAL_STORAGE'
            main_obb[
                'install_path'] = 'Android/obb/{package_name}/main.{version_code}.{package_name}.obb'.format(
                    **manifest)
            manifest['expansions'].push(main_obb)

        return manifest

    def save(self):
        self.name = '{package_name}_v{version_name}.xapk'.format(
            **self.manifest)
        zip_path = self.folder.joinpath(self.name)

        zip_dir = tempfile.mkdtemp()
        try:
            print('copying apk to temp directory...')
            apk_name = '{package_name}.apk'.format(**self.manifest)
            apk_src = self.apk_src.resolve()
            apk_dest = PurePath(zip_dir).joinpath(apk_name)
            shutil.copy2(apk_src, apk_dest)
            print('apk: OK')

            if self.manifest.get('expansions'):
                print('copying obb to temp directory...')
                obb_name = self.manifest['expansions'][0]['install_path']
                obb_src = self.obb_src.resolve()
                obb_dest = PurePath(zip_dir).joinpath(obb_name)
                os.makedirs(Path(obb_dest).parent, exist_ok=True)
                shutil.copy2(obb_src, obb_dest)
                print('obb: OK')
            else:
                print('no obb found')

            print('creating icon in temp directory...')
            icon = self.icon
            icon_dest = PurePath(zip_dir).joinpath('icon.png')
            with open(icon_dest, 'wb') as iconfile:
                iconfile.write(icon)
            print('icon: OK')

            print('creating manifest in temp directory...')
            manifest_dest = PurePath(zip_dir).joinpath('manifest.json')
            with open(manifest_dest, 'w') as manifestfile:
                s = json.dumps(self.manifest, separators=(':', ','))
                manifestfile.write(s)
            print('manifest: OK')

            print('creating xapk archive...')
            with zipfile.ZipFile(zip_path,
                                 'w',
                                 compression=zipfile.ZIP_DEFLATED) as zfd:
                for root, dirs, files in os.walk(zip_dir):
                    for f in files:
                        filename = os.path.join(root, f)
                        zfd.write(filename, os.path.relpath(filename, zip_dir))
            print('xapk: OK')
        finally:
            print('cleaning up temp directory...')
            shutil.rmtree(zip_dir)
            print('cleanup: OK')
예제 #4
0
def extract_features(file_path):
    result = {}
    try:
        a = APK(file_path)
        d = DalvikVMFormat(a.get_dex())
        dx = Analysis(d)
        vm = dvm.DalvikVMFormat(a.get_dex())
        vmx = analysis.Analysis(vm)
        d.set_vmanalysis(dx)
        d.set_decompiler(DecompilerDAD(d, dx))
    except Exception as e:
        print e
        return None

    result['android_version_code'] = a.get_androidversion_code()
    result['android_version_name'] = a.get_androidversion_name()
    result['max_sdk'] = a.get_max_sdk_version()
    result['min_sdk'] = a.get_min_sdk_version()
    result['libraries'] = a.get_libraries()
    result['filename'] = a.get_filename()
    result['target_sdk'] = a.get_target_sdk_version()
    result['md5'] = hashlib.md5(a.get_raw()).hexdigest()
    result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest()
    result['permissions'] = a.get_permissions()
    result['activities'] = a.get_activities()
    result['providers'] = a.get_providers()
    result['services'] = a.get_services()
    result['strings'] = d.get_strings()
    result['class_names'] = [c.get_name() for c in d.get_classes()]
    result['method_names'] = [m.get_name() for m in d.get_methods()]
    result['field_names'] = [f.get_name() for f in d.get_fields()]
    # result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0
    result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0
    # result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0
    # result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0
    # result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0
    result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0

    s_list = []
    s_list.extend(result['class_names'])
    s_list.extend(result['method_names'])
    s_list.extend(result['field_names'])
    result['entropy_rate'] = entropy_rate(s_list)

    result['feature_vectors'] = {}

    result['feature_vectors']['api_calls'] = []
    for call in API_CALLS:
        status = 1 if dx.get_method_by_name(".", call, ".") else 0
        result['feature_vectors']['api_calls'].append(status)

    result['feature_vectors']['permissions'] = []
    for permission in PERMISSIONS:
        status = 1 if permission in result['permissions'] else 0
        result['feature_vectors']['permissions'].append(status)

    result['feature_vectors']['special_strings'] = []
    for word in SPECIAL_STRINGS:
        status = 1 if d.get_regex_strings(word) else 0
        result['feature_vectors']['special_strings'].append(status)

    result['feature_vectors']['others'] = [
        # result['is_reflection_code'],
        # result['is_crypto_code'],
        # result['is_native_code'],
        result['is_obfuscation'],
        result['is_database'],
        # result['is_dyn_code']
    ]

    return result
예제 #5
0
def analyze(path):
    try:
        start = process_time()
        hashfunctions = dict(md5=hashlib.md5,
                             sha1=hashlib.sha1,
                             sha256=hashlib.sha256,
                             sha512=hashlib.sha512)
        a = APK(path)

        certs = set(
            a.get_certificates_der_v3() + a.get_certificates_der_v2() +
            [a.get_certificate_der(x) for x in a.get_signature_names()])

        for cert in certs:
            x509_cert = x509.Certificate.load(cert)

            issuer = {
                'commonName': None,
                'organizationName': None,
                'organizationalUnitName': None,
                'countryName': None,
                'stateOrProvinceName': None,
                'localityName': None
            }
            subject = {
                'commonName': None,
                'organizationName': None,
                'organizationalUnitName': None,
                'countryName': None,
                'stateOrProvinceName': None,
                'localityName': None
            }

            strIssuer = get_certificate_name_string(x509_cert.issuer,
                                                    short=False)
            strSubject = get_certificate_name_string(x509_cert.subject,
                                                     short=False)

            arrIssuer = strIssuer.split(',')
            for i in arrIssuer:
                if i.lstrip().split('=')[0] == 'commonName':
                    issuer['commonName'] = i.lstrip().split('=')[1]
                elif i.lstrip().split('=')[0] == 'organizationName':
                    issuer['organizationName'] = i.lstrip().split('=')[1]
                elif i.lstrip().split('=')[0] == 'organizationalUnitName':
                    issuer['organizationalUnitName'] = i.lstrip().split('=')[1]
                elif i.lstrip().split('=')[0] == 'countryName':
                    issuer['countryName'] = i.lstrip().split('=')[1]
                elif i.lstrip().split('=')[0] == 'stateOrProvinceName':
                    issuer['stateOrProvinceName'] = i.lstrip().split('=')[1]
                elif i.lstrip().split('=')[0] == 'localityName':
                    issuer['localityName'] = i.lstrip().split('=')[1]

            arrSubject = strSubject.split(',')
            for i in arrSubject:
                if i.lstrip().split('=')[0] == 'commonName':
                    subject['commonName'] = i.lstrip().split('=')[1]
                elif i.lstrip().split('=')[0] == 'organizationName':
                    subject['organizationName'] = i.lstrip().split('=')[1]
                elif i.lstrip().split('=')[0] == 'organizationalUnitName':
                    subject['organizationalUnitName'] = i.lstrip().split(
                        '=')[1]
                elif i.lstrip().split('=')[0] == 'countryName':
                    subject['countryName'] = i.lstrip().split('=')[1]
                elif i.lstrip().split('=')[0] == 'stateOrProvinceName':
                    subject['stateOrProvinceName'] = i.lstrip().split('=')[1]
                elif i.lstrip().split('=')[0] == 'localityName':
                    subject['localityName'] = i.lstrip().split('=')[1]

            for k, v in hashfunctions.items():
                if k == 'md5':
                    md5 = v(cert).hexdigest()
                elif k == 'sha1':
                    sha1 = v(cert).hexdigest()
                elif k == 'sha256':
                    sha256 = v(cert).hexdigest()
                elif k == 'sha512':
                    sha512 = v(cert).hexdigest()

        md5 = md5

        appName = a.get_app_name()
        fileSize = os.stat(a.get_filename()).st_size
        sha1 = sha1
        sha256 = sha256
        sha512 = sha512
        timestamp = time.time()
        dateTime = datetime.fromtimestamp(timestamp)
        timeOfSubmit = dateTime.strftime("%Y-%m-%d %H:%M:%S")
        package = a.get_package()
        androidversionCode = a.get_androidversion_code()
        androidversionName = a.get_androidversion_name()
        minSDKVersion = a.get_min_sdk_version()
        maxSDKVersion = a.get_max_sdk_version()
        targetSDKVersion = a.get_target_sdk_version()
        mainActivity = a.get_main_activity()

        attributes = {
            'validFrom':
            x509_cert['tbs_certificate']['validity']
            ['not_before'].native.strftime("%Y-%m-%d %H:%M:%S"),
            'validTo':
            x509_cert['tbs_certificate']['validity']
            ['not_after'].native.strftime("%Y-%m-%d %H:%M:%S"),
            'serialNumber':
            hex(x509_cert.serial_number),
            'hashAlgorithm':
            x509_cert.hash_algo,
            'signatureAlgorithm':
            x509_cert.signature_algo
        }

        certificateAttributes = json.dumps(attributes)
        certificateIssuer = json.dumps(issuer)
        certificateSubject = json.dumps(subject)

        declaredPermissions = json.dumps(a.get_declared_permissions())

        requestedPermissions = json.dumps(a.get_permissions())

        activities = json.dumps(a.get_activities())

        services = json.dumps(a.get_services())

        receivers = json.dumps(a.get_receivers())

        providers = json.dumps(a.get_providers())

        stop = process_time()
        analysisTime = stop - start

        connect = mysql.connect()
        cursor = connect.cursor()

        sql = "INSERT INTO tbl_apkinfo (md5, appName, fileSize, analysisTime, sha1, sha256, sha512, firstSubmission, lastSubmission, package, androidversionCode, androidversionName, minSDKVersion, maxSDKVersion, targetSDKVersion, mainActivity, certificateAttributes, certificateIssuer, certificateSubject,	declaredPermissions, requestedPermissions, activities, services, providers, receivers) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
        param = (md5, appName, fileSize, analysisTime, sha1, sha256, sha512,
                 timeOfSubmit, timeOfSubmit, package, androidversionCode,
                 androidversionName, minSDKVersion, maxSDKVersion,
                 targetSDKVersion, mainActivity, certificateAttributes,
                 certificateIssuer, certificateSubject, declaredPermissions,
                 requestedPermissions, activities, services, providers,
                 receivers)
        cursor.execute(sql, param)

        connect.commit()
        connect.close()

        androaxml_main(path,
                       os.path.join(app.config['OUTPUT_PATH'], md5 + '.xml'))
        return True
    except:
        return False
예제 #6
0
def extract_features(file_path):
    result = {}
    try:
        a = APK(file_path)
        d = DalvikVMFormat(a.get_dex())
        dx = VMAnalysis(d)
        vm = dvm.DalvikVMFormat(a.get_dex())
        vmx = analysis.uVMAnalysis(vm)
        d.set_vmanalysis(dx)
        d.set_decompiler(DecompilerDAD(d, dx))
    except:
        return None

    result['android_version_code'] = a.get_androidversion_code()
    result['android_version_name'] = a.get_androidversion_name()
    result['max_sdk'] = a.get_max_sdk_version()
    result['min_sdk'] = a.get_min_sdk_version()
    result['libraries'] = a.get_libraries()
    result['filename'] = a.get_filename()
    result['target_sdk'] = a.get_target_sdk_version()
    result['md5'] = hashlib.md5(a.get_raw()).hexdigest()
    result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest()
    result['permissions'] = a.get_permissions()
    result['activities'] = a.get_activities()
    result['providers'] = a.get_providers()
    result['services'] = a.get_services()
    #result['strings'] = d.get_strings()
    #result['class_names'] = [c.get_name() for c in d.get_classes()]
    #result['method_names'] = [m.get_name() for m in d.get_methods()]
    #result['field_names'] = [f.get_name() for f in d.get_fields()]
    class_names = [c.get_name() for c in d.get_classes()]
    method_names = [m.get_name() for m in d.get_methods()]
    field_names = [ f.get_name() for f in d.get_fields()]

    result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0
    result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0
    result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0
    result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0
    result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0
    result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0

    s_list = []
    #s_list.extend(result['class_names'])
    #s_list.extend(result['method_names'])
    #s_list.extend(result['field_names'])
    s_list.extend(class_names)
    s_list.extend(method_names)
    s_list.extend(method_names)
    result['entropy_rate'] = entropy_rate(s_list)

    result['feature_vectors'] = {}

    # Search for the presence of api calls in a given apk
    result['feature_vectors']['api_calls'] = []
    for call in API_CALLS:
        status = 1 if dx.tainted_packages.search_methods(".", call, ".") else 0
        result['feature_vectors']['api_calls'].append(status)

    # Search for the presence of permissions in a given apk        
    result['feature_vectors']['permissions'] = []
    for permission in PERMISSIONS:
        status = 1 if permission in result['permissions'] else 0
        result['feature_vectors']['permissions'].append(status)

    result['feature_vectors']['special_strings'] = []
    for word in SPECIAL_STRINGS:
        status = 1 if d.get_regex_strings(word) else 0
        result['feature_vectors']['special_strings'].append(status)

    opt_seq = []
    for m in d.get_methods():
        for i in m.get_instructions():
            opt_seq.append(i.get_name())

    optngramlist = [tuple(opt_seq[i:i+NGRAM]) for i in xrange(len(opt_seq) - NGRAM)]
    optngram = Counter(optngramlist)
    optcodes = dict()
    tmpCodes = dict(optngram)
    #for k,v in optngram.iteritems():
    #    if v>=NGRAM_THRE:
            #optcodes[str(k)] = v
    #        optcodes[str(k)] = 1
    tmpCodes = sorted(tmpCodes.items(),key =lambda d:d[1],reverse=True) 
    for value in tmpCodes[:NGRAM_THRE]:
        optcodes[str(value[0])] = 1
    result['feature_vectors']['opt_codes'] = optcodes

    return result