Ejemplo n.º 1
0
def mmdt_scan_online():
    mmdt = MMDT()
    file_name = sys.argv[1]
    file_md5 = gen_md5(file_name)
    file_sha1 = gen_sha1(file_name)
    file_mmdt = mmdt.mmdt_hash(file_name)
    data = {
        "md5": file_md5,
        "sha1": file_sha1,
        "file_name": file_name,
        "mmdt": file_mmdt,
        "data": {}
    }
    r = requests.post(url='http://146.56.242.184/mmdt/scan', json=data)
    r_data = r.json()
    print(json.dumps(r_data, indent=4, ensure_ascii=False))
Ejemplo n.º 2
0
def mmdt_classfiy():
    try:
        epilog = r"""
Use like:
    1. use simple classify
    mmdt-classify -s $sample_path -t 0.95 -c 1
    2. use knn classify
    mmdt-classify -s $sample_path -t 0.95 -c 2
        """
        parser = argparse.ArgumentParser(
            prog='python_mmdt malicious file scan tool',
            description=
            'A malicious scanner tool based on mmdt_hash. Version 0.2.2',
            epilog=epilog,
            formatter_class=argparse.RawDescriptionHelpFormatter)
        parser.add_argument('-s',
                            '--scans',
                            help='set file/path to scan.',
                            type=str,
                            dest='scans',
                            action='store')
        parser.add_argument(
            '-t',
            '--threshold',
            help=
            'set threshold value to determine whether the file is a malicious file. (default 0.95)',
            type=float,
            dest='threshold',
            action='store',
            default=0.95)
        parser.add_argument(
            '-c',
            '--classify',
            help=
            'set classify type.set 1 for simple classify, set 2 for knn classify.(default 1)',
            type=int,
            dest='classify_type',
            action='store',
            default=1)

        args = parser.parse_args()
    except Exception as e:
        print('error: %s' % str(e))
        exit(0)

    mmdt = MMDT()
    threshold = args.threshold
    classify_type = args.classify_type
    target = args.scans
    mmdt.build_features(classify_type)
    if os.path.isdir(target):
        files = os.listdir(target)
        for f in files:
            full_file = os.path.join(target, f)
            mmdt.classify(full_file, threshold, classify_type)
    else:
        mmdt.classify(target, threshold, classify_type)
Ejemplo n.º 3
0
 def test_process(self):
     mmdt = MMDT()
     test_path = os.path.dirname(__file__)
     test_samples = os.path.join(test_path, "samples")
     files = os.listdir(test_samples)
     for f in files:
         file_path = os.path.join(test_samples, f)
         r1 = mmdt.mmdt_hash(file_path)
         print(r1)
         r2 = mmdt.mmdt_hash_streaming(file_path)
         print(r2)
         sim1 = mmdt.mmdt_compare(file_path, file_path)
         print(sim1)
         sim2 = mmdt.mmdt_compare_hash(r1, r2)
         print(sim2)
Ejemplo n.º 4
0
 def __init__(self, dlt=10.0):
     self.mmdt_feature_file_name = 'mmdt_feature.data'
     self.mmdt_feature_dlt = dlt
     self.mmdt = MMDT()
Ejemplo n.º 5
0
class MMDTFeature(object):
    def __init__(self, dlt=10.0):
        self.mmdt_feature_file_name = 'mmdt_feature.data'
        self.mmdt_feature_dlt = dlt
        self.mmdt = MMDT()

    @staticmethod
    def read_lables(file_name):
        with open(file_name, 'r') as target:
            lines = target.readlines()

        labels = dict()
        for line in lines:
            line = line.strip()
            tmp = line.split(',')
            if tmp:
                labels[tmp[0]] = tmp[1]

        return labels

    @staticmethod
    def calc_sha1(file_name):
        with open(file_name, 'rb') as f:
            data = f.read()
            _s = hashlib.sha1()
            _s.update(data)
            return _s.hexdigest()

    @staticmethod
    def list_dir(root_dir):
        files = os.listdir(root_dir)
        for f in files:
            file_path = os.path.join(root_dir, f)
            yield file_path, f

    def check_mmdt_hash(self, md):
        arr_std = mmdt_std(md)
        if arr_std > self.mmdt_feature_dlt:
            return True
        return False

    @staticmethod
    def filter_mmdt_hash(name, dlt):
        datas = mmdt_load(name)
        print('old len: %d' % len(datas))
        new_datas = list()
        for data in datas:
            arr_std = mmdt_std(data)
            if arr_std > dlt:
                new_datas.append(data)
            else:
                print('remove: %s' % (data))
        new_datas = list(set(new_datas))
        print('new len: %d' % len(new_datas))
        mmdt_save(name, new_datas)

    @staticmethod
    def filter_mmdt_hash_simpleclassify(name):
        datas = mmdt_load(name)
        print('old len: %d' % len(datas))
        datas = list(set(datas))
        print('new len: %d' % len(datas))
        mmdt_save(name, datas)

    def gen_datas(self, samples_path, samples_label_file):
        labels = self.read_lables(samples_label_file)
        count = 0
        datas = list()
        for full_path, file_name in self.list_dir(samples_path):
            count += 1
            print('process: %s, %d' % (file_name, count))
            mmdt_hash = self.mmdt.mmdt_hash(full_path)
            if self.check_mmdt_hash(mmdt_hash):
                c_sha1 = self.calc_sha1(full_path)
                label = labels.get(file_name)
                data = '%s:%s:%s' % (mmdt_hash, label, c_sha1)
                datas.append(data)

        mmdt_save(self.mmdt_feature_file_name, datas)
Ejemplo n.º 6
0
 def __init__(self, dlt=10.0):
     self.mmdt_feature_file_name = 'mmdt_feature.data'
     self.mmdt_feature_label_file_name = 'mmdt_feature.label'
     self.mmdt_feature_dlt = dlt
     self.mmdt_feature_labels = list()
     self.mmdt = MMDT()
Ejemplo n.º 7
0
def mmdt_compare():
    mmdt = MMDT()
    sim = 0.0
    sim = mmdt.mmdt_compare(sys.argv[1], sys.argv[2])
    print(sim)
Ejemplo n.º 8
0
def mmdt_hash():
    mmdt = MMDT()
    r = mmdt.mmdt_hash(sys.argv[1])
    print(r)