def filter_mmdt_hash(name, dlt): datas = mmdt_load(name) print('old len: %d' % len(datas)) new_datas = list() for data in datas: arr_std = mmdt_std(data) if arr_std > dlt: new_datas.append(data) else: print('remove: %s' % (data)) new_datas = list(set(new_datas)) print('new len: %d' % len(new_datas)) mmdt_save(name, new_datas)
def classify(self, filename, dlt, classify_type=1): md = self.mmdt_hash(filename) if md: arr_std = mmdt_std(md) if classify_type == 1: sim, label = self.simple_classify(md, dlt) elif classify_type == 2: sim, label = self.knn_classify(md, dlt) else: sim = 0.0 label = 'unknown' print('%s,%f,%s,%f' % (filename, sim, label, arr_std)) else: print('%s mmdt_hash is None' % filename)
def classify(self, filename, dlt, classify_type=1): md = self.mmdt_hash(filename) if md: arr_std = mmdt_std(md) if classify_type == 1: data = self.simple_classify_2(md, dlt) elif classify_type == 2: data = self.knn_classify_2(md, dlt) else: data = { "label": "unknown", "labels": [ { "label": "unknown", "ratio": "100.00%", } ], "similars": [] } print(data) else: print("%s mmdt_hash is None" % filename)
def check_mmdt_hash(self, md): arr_std = mmdt_std(md) if arr_std > self.mmdt_feature_dlt: return True return False