-
Notifications
You must be signed in to change notification settings - Fork 0
/
core2.py
107 lines (90 loc) · 4.1 KB
/
core2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
'''
Author: Ashish Katlam
Descriptioin: This code extracts all the features of a given Android application.
All the features are extracted using AndroGuard Tool
'''
from androguard.core.bytecodes.dvm import DalvikVMFormat
from androguard.core.analysis.analysis import VMAnalysis
from androguard.decompiler.decompiler import DecompilerDAD
from androguard.core.bytecodes.apk import APK
from androguard.core.analysis import analysis
from androguard.core.bytecodes import dvm
from constants import SPECIAL_STRINGS, DB_REGEX, API_CALLS, PERMISSIONS
import math
import hashlib
# Extract all features for a given application
def extract_features(file_path):
result = {}
try:
a = APK(file_path)
d = DalvikVMFormat(a.get_dex())
dx = VMAnalysis(d)
vm = dvm.DalvikVMFormat(a.get_dex())
vmx = analysis.uVMAnalysis(vm)
d.set_vmanalysis(dx)
d.set_decompiler(DecompilerDAD(d, dx))
except:
return None
result['android_version_code'] = a.get_androidversion_code()
result['android_version_name'] = a.get_androidversion_name()
result['max_sdk'] = a.get_max_sdk_version()
result['min_sdk'] = a.get_min_sdk_version()
result['libraries'] = a.get_libraries()
result['filename'] = a.get_filename()
result['target_sdk'] = a.get_target_sdk_version()
result['md5'] = hashlib.md5(a.get_raw()).hexdigest()
result['sha256'] = hashlib.sha256(a.get_raw()).hexdigest()
result['permissions'] = a.get_permissions()
result['activities'] = a.get_activities()
result['providers'] = a.get_providers()
result['services'] = a.get_services()
result['strings'] = d.get_strings()
result['class_names'] = [c.get_name() for c in d.get_classes()]
result['method_names'] = [m.get_name() for m in d.get_methods()]
result['field_names'] = [f.get_name() for f in d.get_fields()]
result['is_native_code'] = 1 if analysis.is_native_code(dx) else 0
result['is_obfuscation'] = 1 if analysis.is_ascii_obfuscation(d) else 0
result['is_crypto_code'] = 1 if analysis.is_crypto_code(dx) else 0
result['is_dyn_code'] = 1 if analysis.is_dyn_code(dx) else 0
result['is_reflection_code'] = 1 if analysis.is_reflection_code(vmx) else 0
result['is_database'] = 1 if d.get_regex_strings(DB_REGEX) else 0
s_list = []
s_list.extend(result['class_names'])
s_list.extend(result['method_names'])
s_list.extend(result['field_names'])
result['entropy_rate'] = entropy_rate(s_list)
result['feature_vectors'] = {}
# Search for the presence of api calls in a given apk
result['feature_vectors']['api_calls'] = []
for call in API_CALLS:
status = 1 if dx.tainted_packages.search_methods(".", call, ".") else 0
result['feature_vectors']['api_calls'].append(status)
# Search for the presence of permissions in a given apk
result['feature_vectors']['permissions'] = []
for permission in PERMISSIONS:
status = 1 if permission in result['permissions'] else 0
result['feature_vectors']['permissions'].append(status)
result['feature_vectors']['special_strings'] = []
for word in SPECIAL_STRINGS:
status = 1 if d.get_regex_strings(word) else 0
result['feature_vectors']['special_strings'].append(status)
return result
def entropy_rate(data):
for s in data:
prob = [float(s.count(c)) / len(s) for c in dict.fromkeys(list(s))]
entropy = - sum([p * math.log(p) / math.log(2.0) for p in prob])
p = 1.0 / len(data)
idealize = -1.0 * len(data) * p * math.log(p) / math.log(2.0)
return round((abs(idealize) - entropy) / idealize, 2)
def create_vector_single(apk):
feature_vector = []
feature_vector.extend(apk['feature_vectors']['permissions'])
feature_vector.extend(apk['feature_vectors']['api_calls'])
feature_vector.extend(apk['feature_vectors']['special_strings'])
entropy_rate = int(apk['entropy_rate'])
native = int(apk['is_crypto_code'])
db = int(apk['is_database'])
feature_vector.append(entropy_rate)
feature_vector.append(native)
feature_vector.append(db)
return feature_vector