예제 #1
0
    def testAPKManifest(self):
        from androguard.core.bytecodes.apk import APK
        a = APK("examples/android/TestsAndroguard/bin/TestActivity.apk",
                testzip=True)

        self.assertEqual(a.get_app_name(), "TestsAndroguardApplication")
        self.assertEqual(a.get_app_icon(), "res/drawable-hdpi/icon.png")
        self.assertEqual(a.get_app_icon(max_dpi=120),
                         "res/drawable-ldpi/icon.png")
        self.assertEqual(a.get_app_icon(max_dpi=160),
                         "res/drawable-mdpi/icon.png")
        self.assertEqual(a.get_app_icon(max_dpi=240),
                         "res/drawable-hdpi/icon.png")
        self.assertIsNone(a.get_app_icon(max_dpi=1))
        self.assertEqual(a.get_main_activity(),
                         "tests.androguard.TestActivity")
        self.assertEqual(a.get_package(), "tests.androguard")
        self.assertEqual(a.get_androidversion_code(), '1')
        self.assertEqual(a.get_androidversion_name(), "1.0")
        self.assertEqual(a.get_min_sdk_version(), "9")
        self.assertEqual(a.get_target_sdk_version(), "16")
        self.assertIsNone(a.get_max_sdk_version())
        self.assertEqual(a.get_permissions(), [])
        self.assertEqual(a.get_declared_permissions(), [])
        self.assertTrue(a.is_valid_APK())
예제 #2
0
def extract_attributes(sha256):
    with NamedTemporaryFile() as f:
        f.write(default_storage.open(sha256).read())
        f.seek(0)

        sign = ApplicationSignature.compute_from_apk(f.name)
        package = sign.handle
        sign = sign.to_dict()

        a = APK(f.name)
        sign['uploaded_at'] = datetime.now()
        sign['sha256'] = sha256
        sign['activities'] = a.get_activities()
        sign['features'] = a.get_features()
        sign['libraries'] = a.get_libraries()
        sign['main_activity'] = a.get_activities()
        sign['min_sdk_version'] = a.get_min_sdk_version()
        sign['max_sdk_version'] = a.get_max_sdk_version()
        sign['target_sdk_version'] = a.get_target_sdk_version()
        sign['permissions'] = a.get_permissions()
        sign['aosp_permissions'] = a.get_requested_aosp_permissions()
        sign[
            'third_party_permissions'] = a.get_requested_third_party_permissions(
            )
        sign['providers'] = a.get_providers()
        sign['receivers'] = a.get_receivers()
        sign['services'] = a.get_services()
        sign['is_valid'] = a.is_valid_APK()
        sign['is_signed'] = a.is_signed()
        sign['is_signed_v1'] = a.is_signed_v1()
        sign['is_signed_v2'] = a.is_signed_v2()
        sign['is_signed_v3'] = a.is_signed_v3()

        if not es.exists(settings.ELASTICSEARCH_APK_INDEX, id=sha256):
            es.index(index=settings.ELASTICSEARCH_APK_INDEX,
                     id=sha256,
                     body=sign)
        else:
            es.update(index=settings.ELASTICSEARCH_APK_INDEX,
                      id=sha256,
                      body={'doc': sign},
                      retry_on_conflict=5)
    del a, sign, f
    gc.collect()

    return package
예제 #3
0
    def testAPKManifest(self):
        from androguard.core.bytecodes.apk import APK
        a = APK("examples/android/TestsAndroguard/bin/TestActivity.apk", testzip=True)

        self.assertEqual(a.get_app_name(), "TestsAndroguardApplication")
        self.assertEqual(a.get_app_icon(), "res/drawable-hdpi/icon.png")
        self.assertEqual(a.get_app_icon(max_dpi=120), "res/drawable-ldpi/icon.png")
        self.assertEqual(a.get_app_icon(max_dpi=160), "res/drawable-mdpi/icon.png")
        self.assertEqual(a.get_app_icon(max_dpi=240), "res/drawable-hdpi/icon.png")
        self.assertIsNone(a.get_app_icon(max_dpi=1))
        self.assertEqual(a.get_main_activity(), "tests.androguard.TestActivity")
        self.assertEqual(a.get_package(), "tests.androguard")
        self.assertEqual(a.get_androidversion_code(), '1')
        self.assertEqual(a.get_androidversion_name(), "1.0")
        self.assertEqual(a.get_min_sdk_version(), "9")
        self.assertEqual(a.get_target_sdk_version(), "16")
        self.assertIsNone(a.get_max_sdk_version())
        self.assertEqual(a.get_permissions(), [])
        self.assertEqual(a.get_declared_permissions(), [])
        self.assertTrue(a.is_valid_APK())
def extract_features(file_path):
    result = {}
    try:
        a = APK(file_path)
        d = DalvikVMFormat(a.get_dex())
        dx = Analysis(d)
        vm = dvm.DalvikVMFormat(a.get_dex())
        #vmx = analysis.uVMAnalysis(vm)
        d.set_vmanalysis(dx)
        d.set_decompiler(DecompilerDAD(d, dx))
    except:
        return None

    result['android_version_code'] = a.get_androidversion_code()
    result['android_version_name'] = a.get_androidversion_name()
    result['max_sdk'] = a.get_max_sdk_version()
    result['min_sdk'] = a.get_min_sdk_version()
    #result['libraries'] = a.get_libraries()
    result['filename'] = a.get_filename()
    result['target_sdk'] = a.get_target_sdk_version()
    result['md5'] = hashlib.md5(a.get_raw()).hexdigest()
    result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest()
    result['permissions'] = a.get_permissions()
    result['activities'] = a.get_activities()
    result['providers'] = a.get_providers()
    result['services'] = a.get_services()
    result['strings'] = d.get_strings()
    result['class_names'] = [c.get_name() for c in d.get_classes()]
    result['method_names'] = [m.get_name() for m in d.get_methods()]
    result['field_names'] = [f.get_name() for f in d.get_fields()]
    #result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0
    result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0
    #result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0
    '''result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0
    result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0'''
    result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0
    arr = []
    s = a.get_elements("action", "name")
    for i in s:
        arr.append(i)

    result['intents'] = arr

    s_list = []
    s_list.extend(result['class_names'])
    s_list.extend(result['method_names'])
    s_list.extend(result['field_names'])
    result['entropy_rate'] = entropy_rate(s_list)

    result['feature_vectors'] = {}

    # Search for the presence of api calls in a given apk
    result['feature_vectors']['api_calls'] = []
    for call in API_CALLS:
        status = 1 if dx.get_method(call) else 0
        result['feature_vectors']['api_calls'].append(status)

    # Search for the presence of permissions in a given apk
    result['feature_vectors']['permissions'] = []
    for permission in PERMISSIONS:
        status = 1 if permission in result['permissions'] else 0
        result['feature_vectors']['permissions'].append(status)

    #Search for the presence of intents in a given apk
    result['feature_vectors']['intents'] = []
    n = len(INTENTS)
    m = len(result['intents'])
    for i in range(n):
        stri = INTENTS[i]
        flg = False
        for j in range(m):
            if stri in result['intents'][j]:
                flg = True
                break
        if flg:
            status = 1
        else:
            status = 0
        result['feature_vectors']['intents'].append(status)

    #Check for special strings in code
    result['feature_vectors']['special_strings'] = []
    for word in SPECIAL_STRINGS:
        status = 1 if d.get_regex_strings(word) else 0
        result['feature_vectors']['special_strings'].append(status)

    return result
예제 #5
0
def extract_features(file_path):
    result = {}
    try:
        a = APK(file_path)
        d = DalvikVMFormat(a.get_dex())
        dx = Analysis(d)
        vm = dvm.DalvikVMFormat(a.get_dex())
        vmx = analysis.Analysis(vm)
        d.set_vmanalysis(dx)
        d.set_decompiler(DecompilerDAD(d, dx))
    except Exception as e:
        print e
        return None

    result['android_version_code'] = a.get_androidversion_code()
    result['android_version_name'] = a.get_androidversion_name()
    result['max_sdk'] = a.get_max_sdk_version()
    result['min_sdk'] = a.get_min_sdk_version()
    result['libraries'] = a.get_libraries()
    result['filename'] = a.get_filename()
    result['target_sdk'] = a.get_target_sdk_version()
    result['md5'] = hashlib.md5(a.get_raw()).hexdigest()
    result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest()
    result['permissions'] = a.get_permissions()
    result['activities'] = a.get_activities()
    result['providers'] = a.get_providers()
    result['services'] = a.get_services()
    result['strings'] = d.get_strings()
    result['class_names'] = [c.get_name() for c in d.get_classes()]
    result['method_names'] = [m.get_name() for m in d.get_methods()]
    result['field_names'] = [f.get_name() for f in d.get_fields()]
    # result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0
    result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0
    # result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0
    # result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0
    # result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0
    result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0

    s_list = []
    s_list.extend(result['class_names'])
    s_list.extend(result['method_names'])
    s_list.extend(result['field_names'])
    result['entropy_rate'] = entropy_rate(s_list)

    result['feature_vectors'] = {}

    result['feature_vectors']['api_calls'] = []
    for call in API_CALLS:
        status = 1 if dx.get_method_by_name(".", call, ".") else 0
        result['feature_vectors']['api_calls'].append(status)

    result['feature_vectors']['permissions'] = []
    for permission in PERMISSIONS:
        status = 1 if permission in result['permissions'] else 0
        result['feature_vectors']['permissions'].append(status)

    result['feature_vectors']['special_strings'] = []
    for word in SPECIAL_STRINGS:
        status = 1 if d.get_regex_strings(word) else 0
        result['feature_vectors']['special_strings'].append(status)

    result['feature_vectors']['others'] = [
        # result['is_reflection_code'],
        # result['is_crypto_code'],
        # result['is_native_code'],
        result['is_obfuscation'],
        result['is_database'],
        # result['is_dyn_code']
    ]

    return result
예제 #6
0
def analyze(path):
    try:
        start = process_time()
        hashfunctions = dict(md5=hashlib.md5,
                             sha1=hashlib.sha1,
                             sha256=hashlib.sha256,
                             sha512=hashlib.sha512)
        a = APK(path)

        certs = set(
            a.get_certificates_der_v3() + a.get_certificates_der_v2() +
            [a.get_certificate_der(x) for x in a.get_signature_names()])

        for cert in certs:
            x509_cert = x509.Certificate.load(cert)

            issuer = {
                'commonName': None,
                'organizationName': None,
                'organizationalUnitName': None,
                'countryName': None,
                'stateOrProvinceName': None,
                'localityName': None
            }
            subject = {
                'commonName': None,
                'organizationName': None,
                'organizationalUnitName': None,
                'countryName': None,
                'stateOrProvinceName': None,
                'localityName': None
            }

            strIssuer = get_certificate_name_string(x509_cert.issuer,
                                                    short=False)
            strSubject = get_certificate_name_string(x509_cert.subject,
                                                     short=False)

            arrIssuer = strIssuer.split(',')
            for i in arrIssuer:
                if i.lstrip().split('=')[0] == 'commonName':
                    issuer['commonName'] = i.lstrip().split('=')[1]
                elif i.lstrip().split('=')[0] == 'organizationName':
                    issuer['organizationName'] = i.lstrip().split('=')[1]
                elif i.lstrip().split('=')[0] == 'organizationalUnitName':
                    issuer['organizationalUnitName'] = i.lstrip().split('=')[1]
                elif i.lstrip().split('=')[0] == 'countryName':
                    issuer['countryName'] = i.lstrip().split('=')[1]
                elif i.lstrip().split('=')[0] == 'stateOrProvinceName':
                    issuer['stateOrProvinceName'] = i.lstrip().split('=')[1]
                elif i.lstrip().split('=')[0] == 'localityName':
                    issuer['localityName'] = i.lstrip().split('=')[1]

            arrSubject = strSubject.split(',')
            for i in arrSubject:
                if i.lstrip().split('=')[0] == 'commonName':
                    subject['commonName'] = i.lstrip().split('=')[1]
                elif i.lstrip().split('=')[0] == 'organizationName':
                    subject['organizationName'] = i.lstrip().split('=')[1]
                elif i.lstrip().split('=')[0] == 'organizationalUnitName':
                    subject['organizationalUnitName'] = i.lstrip().split(
                        '=')[1]
                elif i.lstrip().split('=')[0] == 'countryName':
                    subject['countryName'] = i.lstrip().split('=')[1]
                elif i.lstrip().split('=')[0] == 'stateOrProvinceName':
                    subject['stateOrProvinceName'] = i.lstrip().split('=')[1]
                elif i.lstrip().split('=')[0] == 'localityName':
                    subject['localityName'] = i.lstrip().split('=')[1]

            for k, v in hashfunctions.items():
                if k == 'md5':
                    md5 = v(cert).hexdigest()
                elif k == 'sha1':
                    sha1 = v(cert).hexdigest()
                elif k == 'sha256':
                    sha256 = v(cert).hexdigest()
                elif k == 'sha512':
                    sha512 = v(cert).hexdigest()

        md5 = md5

        appName = a.get_app_name()
        fileSize = os.stat(a.get_filename()).st_size
        sha1 = sha1
        sha256 = sha256
        sha512 = sha512
        timestamp = time.time()
        dateTime = datetime.fromtimestamp(timestamp)
        timeOfSubmit = dateTime.strftime("%Y-%m-%d %H:%M:%S")
        package = a.get_package()
        androidversionCode = a.get_androidversion_code()
        androidversionName = a.get_androidversion_name()
        minSDKVersion = a.get_min_sdk_version()
        maxSDKVersion = a.get_max_sdk_version()
        targetSDKVersion = a.get_target_sdk_version()
        mainActivity = a.get_main_activity()

        attributes = {
            'validFrom':
            x509_cert['tbs_certificate']['validity']
            ['not_before'].native.strftime("%Y-%m-%d %H:%M:%S"),
            'validTo':
            x509_cert['tbs_certificate']['validity']
            ['not_after'].native.strftime("%Y-%m-%d %H:%M:%S"),
            'serialNumber':
            hex(x509_cert.serial_number),
            'hashAlgorithm':
            x509_cert.hash_algo,
            'signatureAlgorithm':
            x509_cert.signature_algo
        }

        certificateAttributes = json.dumps(attributes)
        certificateIssuer = json.dumps(issuer)
        certificateSubject = json.dumps(subject)

        declaredPermissions = json.dumps(a.get_declared_permissions())

        requestedPermissions = json.dumps(a.get_permissions())

        activities = json.dumps(a.get_activities())

        services = json.dumps(a.get_services())

        receivers = json.dumps(a.get_receivers())

        providers = json.dumps(a.get_providers())

        stop = process_time()
        analysisTime = stop - start

        connect = mysql.connect()
        cursor = connect.cursor()

        sql = "INSERT INTO tbl_apkinfo (md5, appName, fileSize, analysisTime, sha1, sha256, sha512, firstSubmission, lastSubmission, package, androidversionCode, androidversionName, minSDKVersion, maxSDKVersion, targetSDKVersion, mainActivity, certificateAttributes, certificateIssuer, certificateSubject,	declaredPermissions, requestedPermissions, activities, services, providers, receivers) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
        param = (md5, appName, fileSize, analysisTime, sha1, sha256, sha512,
                 timeOfSubmit, timeOfSubmit, package, androidversionCode,
                 androidversionName, minSDKVersion, maxSDKVersion,
                 targetSDKVersion, mainActivity, certificateAttributes,
                 certificateIssuer, certificateSubject, declaredPermissions,
                 requestedPermissions, activities, services, providers,
                 receivers)
        cursor.execute(sql, param)

        connect.commit()
        connect.close()

        androaxml_main(path,
                       os.path.join(app.config['OUTPUT_PATH'], md5 + '.xml'))
        return True
    except:
        return False
예제 #7
0
def extract_features(file_path):
    result = {}
    try:
        a = APK(file_path)
        d = DalvikVMFormat(a.get_dex())
        dx = VMAnalysis(d)
        vm = dvm.DalvikVMFormat(a.get_dex())
        vmx = analysis.uVMAnalysis(vm)
        d.set_vmanalysis(dx)
        d.set_decompiler(DecompilerDAD(d, dx))
    except:
        return None

    result['android_version_code'] = a.get_androidversion_code()
    result['android_version_name'] = a.get_androidversion_name()
    result['max_sdk'] = a.get_max_sdk_version()
    result['min_sdk'] = a.get_min_sdk_version()
    result['libraries'] = a.get_libraries()
    result['filename'] = a.get_filename()
    result['target_sdk'] = a.get_target_sdk_version()
    result['md5'] = hashlib.md5(a.get_raw()).hexdigest()
    result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest()
    result['permissions'] = a.get_permissions()
    result['activities'] = a.get_activities()
    result['providers'] = a.get_providers()
    result['services'] = a.get_services()
    #result['strings'] = d.get_strings()
    #result['class_names'] = [c.get_name() for c in d.get_classes()]
    #result['method_names'] = [m.get_name() for m in d.get_methods()]
    #result['field_names'] = [f.get_name() for f in d.get_fields()]
    class_names = [c.get_name() for c in d.get_classes()]
    method_names = [m.get_name() for m in d.get_methods()]
    field_names = [ f.get_name() for f in d.get_fields()]

    result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0
    result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0
    result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0
    result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0
    result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0
    result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0

    s_list = []
    #s_list.extend(result['class_names'])
    #s_list.extend(result['method_names'])
    #s_list.extend(result['field_names'])
    s_list.extend(class_names)
    s_list.extend(method_names)
    s_list.extend(method_names)
    result['entropy_rate'] = entropy_rate(s_list)

    result['feature_vectors'] = {}

    # Search for the presence of api calls in a given apk
    result['feature_vectors']['api_calls'] = []
    for call in API_CALLS:
        status = 1 if dx.tainted_packages.search_methods(".", call, ".") else 0
        result['feature_vectors']['api_calls'].append(status)

    # Search for the presence of permissions in a given apk        
    result['feature_vectors']['permissions'] = []
    for permission in PERMISSIONS:
        status = 1 if permission in result['permissions'] else 0
        result['feature_vectors']['permissions'].append(status)

    result['feature_vectors']['special_strings'] = []
    for word in SPECIAL_STRINGS:
        status = 1 if d.get_regex_strings(word) else 0
        result['feature_vectors']['special_strings'].append(status)

    opt_seq = []
    for m in d.get_methods():
        for i in m.get_instructions():
            opt_seq.append(i.get_name())

    optngramlist = [tuple(opt_seq[i:i+NGRAM]) for i in xrange(len(opt_seq) - NGRAM)]
    optngram = Counter(optngramlist)
    optcodes = dict()
    tmpCodes = dict(optngram)
    #for k,v in optngram.iteritems():
    #    if v>=NGRAM_THRE:
            #optcodes[str(k)] = v
    #        optcodes[str(k)] = 1
    tmpCodes = sorted(tmpCodes.items(),key =lambda d:d[1],reverse=True) 
    for value in tmpCodes[:NGRAM_THRE]:
        optcodes[str(value[0])] = 1
    result['feature_vectors']['opt_codes'] = optcodes

    return result