def dynamic_analysis2(root): total_time = 0 cnt = 0 print("dynamic start: ", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) for sub_root, app in yield_app_paths2(root): abs_path = os.path.join(sub_root, app) json_file = os.path.join(abs_path + "_info", "dynamic.json") if os.path.isfile(json_file): print("dynamic exists") continue if not os.path.isdir(abs_path + "_info"): os.makedirs(abs_path + "_info") app_dynamic_result = {} time_cost = dynamic_analysis_one(abs_path, app_dynamic_result) app_dynamic_result['time_cost'] = time_cost save_json(app_dynamic_result, json_file) cnt += 1 total_time += time_cost #break print("dynamic finish: ", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) print("average time_cost: ", total_time * 1.0 / cnt)
def mas_run2(root): for sub_root, app in yield_app_paths2(root): try: apk_path = os.path.join(sub_root, app) run_apps(apk_path) except Exception as e: print(e) pass
def split_data2(data, func, rate=0.7, is_positive=0, feature_root="", nparray=True): all_keys = {} for sub_root, app in yield_app_paths2(feature_root, print_path=False): sub_path = sub_root[len(feature_root) + 1:] all_keys[sub_path] = 1 all_keys = list(all_keys.keys()) shuffle(all_keys) last_train_key = int(rate * len(all_keys)) train_keys = all_keys[:last_train_key] test_keys = all_keys[last_train_key:] train_positive = train_negative = test_positive = test_negative = 0 train_x = [] train_y = [] test_x = [] test_y = [] for item in data: label, v = to_vector(item, func) if label == 0: if train_negative < len(data) * rate: train_x.append(v) train_y.append(label) train_negative += 1 else: test_x.append(v) test_y.append(label) test_negative += 1 else: is_test = False for k in test_keys: if k in item[0]: is_test = True break if is_test: test_x.append(v) test_y.append(label) test_positive += 1 else: train_x.append(v) train_y.append(label) train_positive += 1 print("[train] total: ", len(train_x), " positive: ", train_positive, "negative: ", train_negative) print("[test] total: ", len(test_x), " positive: ", test_positive, "negative: ", test_negative) if nparray: return np.array(train_x), np.array(test_x), np.array( train_y), np.array(test_y) else: return train_x, test_x, train_y, test_y
def load_data2(feature_root, feature_name, label_root): data = {} for sub_root, app in yield_app_paths2(feature_root, print_path=False): app_path = os.path.join(sub_root, app) feature_path = os.path.join(os.path.join(sub_root, app + "_info"), feature_name) sub_path = sub_root[len(feature_root) + 1:] target_dir = os.path.join(label_root, sub_path) label_path = os.path.join(target_dir, app + ".json") if not (os.path.isfile(feature_path) and os.path.isfile(label_path) and os.path.isfile(app_path)): continue with open(feature_path, 'r') as f: feature = json.load(f) with open(label_path, 'r') as f: label = json.load(f) data[app_path] = {'feature': feature, 'label': label} return data
def get_sha(): derbin0_feature_root = r"M:\Android_Samples\android_malware\Android_malware\drebin-data\drebin-0" andmal2017_feature_root = r"M:\Android_Samples\android_malware\Android_malware\andmal2017" sha_derbin = {} for app in yield_app_paths(derbin0_feature_root, print_path=False): app_path = os.path.join(derbin0_feature_root, app) sha = sha256(app_path) sha_derbin[sha] = 1 sha_andmal = {} for sub_root, app in yield_app_paths2(andmal2017_feature_root, print_path=False): app_path = os.path.join(sub_root, app) sha = sha256(app_path) sha_andmal[sha] = 1 print(len(sha_derbin)) #983 print(len(sha_andmal)) #377 interact = 0 for k in sha_andmal: if k in sha_derbin: interact += 1 print(interact) #4