def mmdt_feature_merge(): """ 实现特征合并 """ file_name1 = sys.argv[1] file_name2 = sys.argv[2] data1 = mmdt_load(file_name1) data2 = mmdt_load(file_name2) data1.extend(data2) mmdt_save(file_name1, data1)
def filter_mmdt_hash(name, dlt): datas = mmdt_load(name) print('old len: %d' % len(datas)) new_datas = list() for data in datas: arr_std = mmdt_std(data) if arr_std > dlt: new_datas.append(data) else: print('remove: %s' % (data)) new_datas = list(set(new_datas)) print('new len: %d' % len(new_datas)) mmdt_save(name, new_datas)
def __init__(self): cwd = os.path.abspath(os.path.dirname(__file__)) lib_core_path = os.path.join( cwd, "libcore.{}".format(ENGINE_SUFFIX[SYSTEM_VER])) mmdt_feature_file_name = os.path.join(cwd, "mmdt_feature.data") mmdt_feature_label_file_name = os.path.join(cwd, "mmdt_feature.label") self.datas = list() self.labels = list() self.build_datas = None self.build_labels = None if not os.path.exists(lib_core_path): raise Exception(lib_core_path) if os.path.exists(mmdt_feature_file_name): self.datas = mmdt_load(mmdt_feature_file_name) if os.path.exists(mmdt_feature_label_file_name): self.labels = mmdt_load(mmdt_feature_label_file_name) api = CDLL(lib_core_path) self.py_mmdt_hash = api.mmdt_hash self.py_mmdt_hash.argtypes = [c_char_p, POINTER(MMDT_Data)] self.py_mmdt_hash.restype = c_int self.py_mmdt_compare = api.mmdt_compare self.py_mmdt_compare.argtypes = [c_char_p, c_char_p] self.py_mmdt_compare.restype = c_double self.py_mmdt_hash_streaming = api.mmdt_hash_streaming self.py_mmdt_hash_streaming.argtypes = [ c_char_p, c_uint32, POINTER(MMDT_Data) ] self.py_mmdt_hash_streaming.restype = c_int self.py_mmdt_compare_hash = api.mmdt_compare_hash self.py_mmdt_compare_hash.argtypes = [MMDT_Data, MMDT_Data] self.py_mmdt_compare_hash.restype = c_double
def gen_simple_features(self): if os.path.exists(self.mmdt_feature_file_name): datas = mmdt_load(self.mmdt_feature_file_name) for data in datas: tmp = data.split(":") index_value = int(tmp[0], 16) if index_value not in self.simple_datas.keys(): self.simple_datas[index_value] = [("%s:%s" % (tmp[0], tmp[1]), tmp[2], tmp[3])] else: self.simple_datas[index_value].append(("%s:%s" % (tmp[0], tmp[1]), tmp[2], tmp[3])) else: print('缺少特征文件') exit(0)
def gen_knn_features(self): if os.path.exists(self.mmdt_feature_file_name): data_list = list() datas = mmdt_load(self.mmdt_feature_file_name) for data in datas: tmp = data.split(":") main_hash = tmp[1] main_values = [] for i in range(0, len(main_hash), 2): main_values.append(int(main_hash[i : i + 2], 16)) data_list.append(main_values) self.knn_train_labels.append(tmp[2]) self.knn_train_sha1s.append(tmp[3]) self.knn_train_datas = np.array(data_list) else: print('缺少特征文件') exit(0)
def filter_mmdt_hash_simpleclassify(name): datas = mmdt_load(name) print('old len: %d' % len(datas)) datas = list(set(datas)) print('new len: %d' % len(datas)) mmdt_save(name, datas)