Example #1
0
def mmdt_feature_merge():
    """
    实现特征合并
    """
    file_name1 = sys.argv[1]
    file_name2 = sys.argv[2]
    data1 = mmdt_load(file_name1)
    data2 = mmdt_load(file_name2)
    data1.extend(data2)
    mmdt_save(file_name1, data1)
Example #2
0
 def filter_mmdt_hash(name, dlt):
     datas = mmdt_load(name)
     print('old len: %d' % len(datas))
     new_datas = list()
     for data in datas:
         arr_std = mmdt_std(data)
         if arr_std > dlt:
             new_datas.append(data)
         else:
             print('remove: %s' % (data))
     new_datas = list(set(new_datas))
     print('new len: %d' % len(new_datas))
     mmdt_save(name, new_datas)
Example #3
0
    def __init__(self):
        cwd = os.path.abspath(os.path.dirname(__file__))
        lib_core_path = os.path.join(
            cwd, "libcore.{}".format(ENGINE_SUFFIX[SYSTEM_VER]))
        mmdt_feature_file_name = os.path.join(cwd, "mmdt_feature.data")
        mmdt_feature_label_file_name = os.path.join(cwd, "mmdt_feature.label")
        self.datas = list()
        self.labels = list()
        self.build_datas = None
        self.build_labels = None

        if not os.path.exists(lib_core_path):
            raise Exception(lib_core_path)

        if os.path.exists(mmdt_feature_file_name):
            self.datas = mmdt_load(mmdt_feature_file_name)

        if os.path.exists(mmdt_feature_label_file_name):
            self.labels = mmdt_load(mmdt_feature_label_file_name)

        api = CDLL(lib_core_path)

        self.py_mmdt_hash = api.mmdt_hash
        self.py_mmdt_hash.argtypes = [c_char_p, POINTER(MMDT_Data)]
        self.py_mmdt_hash.restype = c_int

        self.py_mmdt_compare = api.mmdt_compare
        self.py_mmdt_compare.argtypes = [c_char_p, c_char_p]
        self.py_mmdt_compare.restype = c_double

        self.py_mmdt_hash_streaming = api.mmdt_hash_streaming
        self.py_mmdt_hash_streaming.argtypes = [
            c_char_p, c_uint32, POINTER(MMDT_Data)
        ]
        self.py_mmdt_hash_streaming.restype = c_int

        self.py_mmdt_compare_hash = api.mmdt_compare_hash
        self.py_mmdt_compare_hash.argtypes = [MMDT_Data, MMDT_Data]
        self.py_mmdt_compare_hash.restype = c_double
Example #4
0
 def gen_simple_features(self):
     if os.path.exists(self.mmdt_feature_file_name):
         datas = mmdt_load(self.mmdt_feature_file_name)
         for data in datas:
             tmp = data.split(":")
             index_value = int(tmp[0], 16)
             if index_value not in self.simple_datas.keys():
                 self.simple_datas[index_value] = [("%s:%s" % (tmp[0], tmp[1]), tmp[2], tmp[3])]
             else:
                 self.simple_datas[index_value].append(("%s:%s" % (tmp[0], tmp[1]), tmp[2], tmp[3]))
     else:
         print('缺少特征文件')
         exit(0)
Example #5
0
    def gen_knn_features(self):
        if os.path.exists(self.mmdt_feature_file_name):
            data_list = list()
            datas = mmdt_load(self.mmdt_feature_file_name)
            for data in datas:
                tmp = data.split(":")
                main_hash = tmp[1]
                main_values = []
                for i in range(0, len(main_hash), 2):
                    main_values.append(int(main_hash[i : i + 2], 16))
                data_list.append(main_values)
                self.knn_train_labels.append(tmp[2])
                self.knn_train_sha1s.append(tmp[3])

            self.knn_train_datas = np.array(data_list)
        else:
            print('缺少特征文件')
            exit(0)
Example #6
0
 def filter_mmdt_hash_simpleclassify(name):
     datas = mmdt_load(name)
     print('old len: %d' % len(datas))
     datas = list(set(datas))
     print('new len: %d' % len(datas))
     mmdt_save(name, datas)