def mmdt_scan_online(): mmdt = MMDT() file_name = sys.argv[1] file_md5 = gen_md5(file_name) file_sha1 = gen_sha1(file_name) file_mmdt = mmdt.mmdt_hash(file_name) data = { "md5": file_md5, "sha1": file_sha1, "file_name": file_name, "mmdt": file_mmdt, "data": {} } r = requests.post(url='http://146.56.242.184/mmdt/scan', json=data) r_data = r.json() print(json.dumps(r_data, indent=4, ensure_ascii=False))
def mmdt_classfiy(): try: epilog = r""" Use like: 1. use simple classify mmdt-classify -s $sample_path -t 0.95 -c 1 2. use knn classify mmdt-classify -s $sample_path -t 0.95 -c 2 """ parser = argparse.ArgumentParser( prog='python_mmdt malicious file scan tool', description= 'A malicious scanner tool based on mmdt_hash. Version 0.2.2', epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-s', '--scans', help='set file/path to scan.', type=str, dest='scans', action='store') parser.add_argument( '-t', '--threshold', help= 'set threshold value to determine whether the file is a malicious file. (default 0.95)', type=float, dest='threshold', action='store', default=0.95) parser.add_argument( '-c', '--classify', help= 'set classify type.set 1 for simple classify, set 2 for knn classify.(default 1)', type=int, dest='classify_type', action='store', default=1) args = parser.parse_args() except Exception as e: print('error: %s' % str(e)) exit(0) mmdt = MMDT() threshold = args.threshold classify_type = args.classify_type target = args.scans mmdt.build_features(classify_type) if os.path.isdir(target): files = os.listdir(target) for f in files: full_file = os.path.join(target, f) mmdt.classify(full_file, threshold, classify_type) else: mmdt.classify(target, threshold, classify_type)
def test_process(self): mmdt = MMDT() test_path = os.path.dirname(__file__) test_samples = os.path.join(test_path, "samples") files = os.listdir(test_samples) for f in files: file_path = os.path.join(test_samples, f) r1 = mmdt.mmdt_hash(file_path) print(r1) r2 = mmdt.mmdt_hash_streaming(file_path) print(r2) sim1 = mmdt.mmdt_compare(file_path, file_path) print(sim1) sim2 = mmdt.mmdt_compare_hash(r1, r2) print(sim2)
def __init__(self, dlt=10.0): self.mmdt_feature_file_name = 'mmdt_feature.data' self.mmdt_feature_dlt = dlt self.mmdt = MMDT()
class MMDTFeature(object): def __init__(self, dlt=10.0): self.mmdt_feature_file_name = 'mmdt_feature.data' self.mmdt_feature_dlt = dlt self.mmdt = MMDT() @staticmethod def read_lables(file_name): with open(file_name, 'r') as target: lines = target.readlines() labels = dict() for line in lines: line = line.strip() tmp = line.split(',') if tmp: labels[tmp[0]] = tmp[1] return labels @staticmethod def calc_sha1(file_name): with open(file_name, 'rb') as f: data = f.read() _s = hashlib.sha1() _s.update(data) return _s.hexdigest() @staticmethod def list_dir(root_dir): files = os.listdir(root_dir) for f in files: file_path = os.path.join(root_dir, f) yield file_path, f def check_mmdt_hash(self, md): arr_std = mmdt_std(md) if arr_std > self.mmdt_feature_dlt: return True return False @staticmethod def filter_mmdt_hash(name, dlt): datas = mmdt_load(name) print('old len: %d' % len(datas)) new_datas = list() for data in datas: arr_std = mmdt_std(data) if arr_std > dlt: new_datas.append(data) else: print('remove: %s' % (data)) new_datas = list(set(new_datas)) print('new len: %d' % len(new_datas)) mmdt_save(name, new_datas) @staticmethod def filter_mmdt_hash_simpleclassify(name): datas = mmdt_load(name) print('old len: %d' % len(datas)) datas = list(set(datas)) print('new len: %d' % len(datas)) mmdt_save(name, datas) def gen_datas(self, samples_path, samples_label_file): labels = self.read_lables(samples_label_file) count = 0 datas = list() for full_path, file_name in self.list_dir(samples_path): count += 1 print('process: %s, %d' % (file_name, count)) mmdt_hash = self.mmdt.mmdt_hash(full_path) if self.check_mmdt_hash(mmdt_hash): c_sha1 = self.calc_sha1(full_path) label = labels.get(file_name) data = '%s:%s:%s' % (mmdt_hash, label, c_sha1) datas.append(data) mmdt_save(self.mmdt_feature_file_name, datas)
def __init__(self, dlt=10.0): self.mmdt_feature_file_name = 'mmdt_feature.data' self.mmdt_feature_label_file_name = 'mmdt_feature.label' self.mmdt_feature_dlt = dlt self.mmdt_feature_labels = list() self.mmdt = MMDT()
def mmdt_compare(): mmdt = MMDT() sim = 0.0 sim = mmdt.mmdt_compare(sys.argv[1], sys.argv[2]) print(sim)
def mmdt_hash(): mmdt = MMDT() r = mmdt.mmdt_hash(sys.argv[1]) print(r)