def impfuzzy_comp(list, list_new):
    ssdeep = re.compile("^[0-9]{1,5}:[0-9a-zA-Z\/\+]+:[0-9a-zA-Z\/\+]+$", re.DOTALL)
    complist = []
    list_len = len(list_new)
    i = 0
    for item_new in list_new:
        i += 1
        if re.search(ssdeep, item_new[2]) and len(item_new[2]) < 150:
            for j in range(i, list_len):
                if re.search(ssdeep, list_new[j][2]) and len(list_new[j][2]) < 150:
                    complist.append([item_new[0], list_new[j][0], pyimpfuzzy.hash_compare(item_new[2], list_new[j][2])])
                else:
                    ("[!] This impfuzzy hash is not ssdeep format: %s" % item_new[2])

    if list:
        for item_new in list_new:
            if re.search(ssdeep, item_new[2]) and len(item_new[2]) < 150:
                for item in list:
                    if re.search(ssdeep, item[2]) and len(item[2]) < 150:
                        complist.append([item_new[0], item[0], pyimpfuzzy.hash_compare(item_new[2], item[2])])
                    else:
                        ("[!] This impfuzzy hash is not ssdeep format: %s" % item[2])
            else:
                ("[!] This impfuzzy hash is not ssdeep format: %s" % item_new[2])

    return complist
def dbsearch(path, finp):
    conn = dbConnect()
    c = conn.cursor()
    cout = c.execute('select fuzImp from MalwareDump;')
    rows = cout.fetchall()
    for i in rows:
        print(path + "--" + str(pyimpfuzzy.hash_compare(finp, i)))
 def run(self, obj, config):
     self._info("Impfuzzy: run()")
     threshold = config.get("threshold", 50)
     target_impfuzzy = None
     try:
         target_impfuzzy = pyimpfuzzy.get_impfuzzy_data(obj.filedata.read())
     except Exception:
         pass
     target_md5 = obj.md5
     if not target_impfuzzy:
         logger.error = "impfuzzy: Could not generate impfuzzy value for sample: %s" % str(obj.id)
         self._error("Could not generate impfuzzy value for sample")
         return
     # setup the sample space to compare against
     # first use the mimetype as a comparator if available
     if obj.impfuzzy:
         obj.impfuzzy = target_impfuzzy
         obj.save()
         self._info("impfuzzy: Filled-in in the impfuzzy")
     else:
         self._info("impfuzzy attribute already present, not overwriting")
     self._add_result('impfuzzy_hash', target_impfuzzy,{'impfuzzy': target_impfuzzy})
     target_mimetype = obj.mimetype
     query_filter = {}
     if target_mimetype:
         query_filter['mimetype'] = target_mimetype
     # then use only samples with a multiple of chunksize
     chunk_size = int(target_impfuzzy.split(":")[0])
     query_filter["$or"] = []
     query_filter["$or"].append({"impfuzzy": {"$regex": "^%d:" % chunk_size * 2}})
     query_filter["$or"].append({"impfuzzy": {"$regex": "^%d:" % chunk_size}})
     query_filter["$or"].append({"impfuzzy": {"$regex": "^%d:" % (chunk_size // 2)}})
     result_filter = {'md5': 1, 'impfuzzy': 1, 'description':1}
     candidate_space = Sample.objects(__raw__=query_filter).only(*result_filter)
     #    self.info("candidate: %s" % repr(candidate_space))
     match_list = []
     for candidate in candidate_space:
         if "impfuzzy" in candidate:
             score = pyimpfuzzy.hash_compare(target_impfuzzy, candidate["impfuzzy"])
             if score >= threshold and candidate["md5"] != target_md5:
                 # Grab the md5 and the description for later
                 match_list.append({'md5': candidate["md5"], 'description': candidate["description"], 'score': score})
     # finally sort the results
     match_list.sort(key=lambda sample: sample["score"], reverse=True)
     for match in match_list:
         #Show the MD5 and the Description
         self._add_result("impfuzzy_match", match["md5"], {'description': match["description"], 'md5': match["md5"], 'score': match["score"]})
     self._info("impfuzzy run() done")
Beispiel #4
0
 def run(self, obj, config):
     threshold = config.get("threshold", 50)
     target_impfuzzy = None
     try:
         target_impfuzzy = pyimpfuzzy.get_impfuzzy_data(obj.filedata.read())
     except Exception:
         pass
     target_md5 = obj.md5
     if not target_impfuzzy:
         logger.error = "impfuzzy: Could not generate impfuzzy value for sample: %s" % str(obj.id)
         self._error("Could not generate impfuzzy value for sample")
         return
     # setup the sample space to compare against
     # first use the mimetype as a comparator if available
     if obj.impfuzzy:
         obj.impfuzzy = target_impfuzzy
         obj.save()
         self._info("impfuzzy: Filled-in in the impfuzzy")
     else:
         self._info("impfuzzy attribute already present, not overwriting")
     self._add_result('impfuzzy_hash', target_impfuzzy,{'impfuzzy': target_impfuzzy})
     target_mimetype = obj.mimetype
     query_filter = {}
     if target_mimetype:
         query_filter['mimetype'] = target_mimetype
     # then use only samples with a multiple of chunksize
     chunk_size = int(target_impfuzzy.split(":")[0])
     query_filter["$or"] = []
     query_filter["$or"].append({"impfuzzy": {"$regex": "^%d:" % chunk_size * 2}})
     query_filter["$or"].append({"impfuzzy": {"$regex": "^%d:" % chunk_size}})
     query_filter["$or"].append({"impfuzzy": {"$regex": "^%d:" % (chunk_size // 2)}})
     result_filter = {'md5': 1, 'impfuzzy': 1, 'description':1}
     candidate_space = Sample.objects(__raw__=query_filter).only(*result_filter)
     #    self.info("candidate: %s" % repr(candidate_space))
     match_list = []
     for candidate in candidate_space:
         if "impfuzzy" in candidate:
             score = pyimpfuzzy.hash_compare(target_impfuzzy, candidate["impfuzzy"])
             if score >= threshold and candidate["md5"] != target_md5:
                 # Grab the md5 and the description for later
                 match_list.append({'md5': candidate["md5"], 'description': candidate["description"], 'score': score})
     # finally sort the results
     match_list.sort(key=lambda sample: sample["score"], reverse=True)
     for match in match_list:
         #Show the MD5 and the Description
         self._add_result("impfuzzy_match (MD5)", match["md5"], {'description': match["description"], 'score': match["score"]})
Beispiel #5
0
    fuzzy_lib = ctypes.cdll.LoadLibrary(str(fuzzy_lib_path))
    result_buf = ctypes.create_string_buffer(FUZZY_MAX_RESULT)
    data_buf = ctypes.create_string_buffer(data)
    fuzzy_lib.fuzzy_hash_buf(data_buf, len(data_buf) - 1, result_buf)
    hash_value = result_buf.value.decode("ascii")

    return hash_value


if len(sys.argv) == 4:
    hash_type = sys.argv[1]
    hash1 = sys.argv[2]
    hash2 = sys.argv[3]

    if hash_type == "compare":
        print(pyimpfuzzy.hash_compare(hash1, hash2), end="")
elif len(sys.argv) == 2:
    hash_type = sys.argv[1]

    # Receive data
    data = sys.stdin.buffer.read()

    if hash_type == "ssdeep":
        print(ssdeep(data), end="")
    elif hash_type == "impfuzzy":
        file_type = magic.from_buffer(data)

        if file_type[:4] == "PE32":
            try:
                pe = None
                pe = pefile.PE(data=data)
Beispiel #6
0
import pyimpfuzzy

import sys

#hash1 = pyimpfuzzy.get_impfuzzy("D:\\SRC\\imphash\\samples\\Git-2.20.1-64-bit.exe")
#hash2 = pyimpfuzzy.get_impfuzzy("D:\\SRC\\imphash\\samples\\vlc-3.0.6-win64.exe")
hash1 = "48:o4/c+4QjuC5Q4FNO0MeAXGo4E/gjF5J/RscXr9ubudS19WOG/iB:oc94A5TNO0MHYXrMeS1oXiB"
hash2 = "48:I2/dKEE8QyoOttGarYau/LsquqQEHA9vPel1bEX/KA/6UyJra8ESvS55w+0o4Rn2:ImdKNstGoYa6ZGxfOBA"
print("ImpFuzzy1: %s" % hash1)
print("ImpFuzzy2: %s" % hash2)

print("Compare: %i" % pyimpfuzzy.hash_compare(hash1, hash2))
Beispiel #7
0
    def render_text(self, outfd, data):
        # This is a impfuzzys threshold
        ss_threshold = 30

        if not has_pyimpfuzzy:
            debug.error("pyimpfuzzy must be installed for this plugin")

        files = []
        impfuzzys = []
        impfuzzy = ""
        if self._config.EXEFILE is not None:
            mode = "search"
            if os.path.isdir(self._config.EXEFILE):
                for root, dirs, filenames in os.walk(self._config.EXEFILE):
                    for name in filenames:
                        files.append(os.path.join(root, name))
            elif os.path.isfile(self._config.EXEFILE):
                files.append(self._config.EXEFILE)

            for file in files:
                impfuzzys.append(pyimpfuzzy.get_impfuzzy(file))
                # outfd.write("%s Impfuzzy : %s\n" % (file, pyimpfuzzy.get_impfuzzy(file)))
        elif self._config.COMPIMPFUZZY is not None:
            mode = "search"
            of = open(self._config.COMPIMPFUZZY, "r")
            lines = of.readlines()
            for line in lines:
                impfuzzys.append(line.rstrip())
        elif self._config.LISTIMPFUZZY:
            mode = "list"
        else:
            debug.error(
                "Please set option -e (PE file or directory) or -i (impfuzzy hash list file) or -a (Listing the impfuzzy)")

        if self._config.THRESHOLD is not None:
            ss_threshold = self._config.THRESHOLD

        if "search" in mode:
            self.table_header(outfd,
                              [("Process", "[addrpad]"),
                               ("Name", "20"),
                               ("Module Base", "[addrpad]"),
                               ("Module Name",   "20"),
                               ("impfuzzy", "20"),
                               ("Compare", "7")])

            for offset, FileName, base, ModName, hash_result, fuzzy_result in data:
                for impfuzzy in impfuzzys:
                    if not "Error" in fuzzy_result:
                        if pyimpfuzzy.hash_compare(impfuzzy, fuzzy_result) >= ss_threshold:
                            self.table_row(outfd, offset, FileName, base, ModName, fuzzy_result,
                                           pyimpfuzzy.hash_compare(impfuzzy, fuzzy_result))

        if "list" in mode:
            self.table_header(outfd,
                              [("Process", "[addrpad]"),
                               ("Name", "20"),
                               ("Module Base", "[addrpad]"),
                               ("Module Name", "20"),
                               ("impfuzzy", "110")])

            for offset, FileName, base, ModName, hash_result, fuzzy_result in data:
                if not "Error" in fuzzy_result:
                    self.table_row(outfd, offset, FileName,
                                   base, ModName, fuzzy_result)
Beispiel #8
0
    def _generator(self, procs, hashlist, threshold):
        pe_table_name = intermed.IntermediateSymbolTable.create(
            self.context,
            self.config_path,
            "windows",
            "pe",
            class_types=pe.class_types)

        filter_func = lambda _: False
        if self.config.get('address', None) is not None:
            filter_func = lambda x: x.get_start(
            ) not in [self.config['address']]

        for proc in procs:

            proc_id = "Unknown"
            try:
                proc_id = proc.UniqueProcessId
                proc_layer_name = proc.add_process_layer()
            except exceptions.InvalidAddressException as excp:
                vollog.debug(
                    "Process {}: invalid address {} in layer {}".format(
                        proc_id, excp.invalid_address, excp.layer_name))
                continue

            for vad in vadinfo.VadInfo.list_vads(proc,
                                                 filter_func=filter_func):

                # this parameter is inherited from the VadInfo plugin. if a user specifies
                # an address, then it bypasses the DLL identification heuristics
                if self.config.get("address", None) is None:

                    # rather than relying on the PEB for DLLs, which can be swapped,
                    # it requires special handling on wow64 processes, and its
                    # unreliable from an integrity standpoint, let's use the VADs instead
                    protection_string = vad.get_protection(
                        vadinfo.VadInfo.protect_values(
                            self.context, self.config['primary'],
                            self.config['nt_symbols']),
                        vadinfo.winnt_protections)

                    # DLLs are write copy...
                    if protection_string != "PAGE_EXECUTE_WRITECOPY":
                        continue

                    # DLLs have mapped files...
                    if isinstance(vad.get_file_name(),
                                  interfaces.renderers.BaseAbsentValue):
                        continue

                try:
                    dos_header = self.context.object(
                        pe_table_name + constants.BANG + "_IMAGE_DOS_HEADER",
                        offset=vad.get_start(),
                        layer_name=proc_layer_name)

                    pe_data = io.BytesIO()

                    for offset, data in dos_header.reconstruct():
                        pe_data.seek(offset)
                        pe_data.write(data)

                    pe_data_raw = pe_data.getvalue()

                    pe_data.close()

                    result_text = self.calc_hash(pe_data_raw)
                except Exception:
                    result_text = "Unable to dump PE at {0:#x}".format(
                        vad.get_start())

                if len(hashlist) == 0:
                    yield (0, (proc.UniqueProcessId,
                               proc.ImageFileName.cast(
                                   "string",
                                   max_length=proc.ImageFileName.vol.count,
                                   errors='replace'),
                               format_hints.Hex(vad.get_start()),
                               vad.get_file_name(), result_text))
                elif not "Unable" in result_text:
                    for hash in hashlist:
                        if pyimpfuzzy.hash_compare(result_text,
                                                   hash) >= threshold:
                            yield (0, (
                                proc.UniqueProcessId,
                                proc.ImageFileName.cast(
                                    "string",
                                    max_length=proc.ImageFileName.vol.count,
                                    errors='replace'),
                                format_hints.Hex(vad.get_start()),
                                vad.get_file_name(), result_text))
def analyse(filepath, pefiles_dir, pe, collection, useVT, api_key):
    ret_list = []

    # 'date', 'md5', 'sha1', 'sha256',
    m5 = hashlib.md5()
    s1 = hashlib.sha1()
    s256 = hashlib.sha256()
    with open(filepath, 'rb') as f:
        while True:
            c = f.read(8192 * s256.block_size)
            if len(c) == 0:
                break
            m5.update(c)
            s1.update(c)
            s256.update(c)
    s256path = pefiles_dir + '/' + s256.hexdigest() + '.txt'
    if os.path.isfile(s256path):
        print(filepath, ' already run this program.', file=sys.stderr)
        return os.path.basename(filepath) + 'はすでに解析を完了しています。', s256.hexdigest()

    ret_list.append(datetime.datetime.now().strftime('%Y%m%d'))
    ret_list.append(m5.hexdigest())
    ret_list.append(s1.hexdigest())
    ret_list.append(s256.hexdigest())

    # 'ssdeep', 'imphash', 'impfuzzy'
    ret_list.append(ssdeep.hash_from_file(filepath))
    ret_list.append(pe.get_imphash())
    ret_list.append(pyimpfuzzy.get_impfuzzy(filepath))

    # 'Totalhash', 'AnyMaster', 'AnyMaster_v1_0_1', 'EndGame', 'Crits', 'peHashNG'
    ret_list.append(pehash.totalhash_hex(filepath))
    ret_list.append(pehash.anymaster_hex(filepath))
    ret_list.append(pehash.anymaster_v1_0_1_hex(filepath))
    ret_list.append(pehash.endgame_hex(filepath))
    ret_list.append(pehash.crits_hex(filepath))
    ret_list.append(pehash.pehashng_hex(filepath))

    # 'Platform', 'GUI Program', 'Console Program', 'DLL', 'Packed', 'Anti-Debug'
    # 'mutex', 'contains base64', 'AntiDebugMethod', 'PEiD'
    cwd = os.getcwd()
    os.chdir(cwd + '/PEiD')
    res = subprocess.check_output(['./PEiD', filepath])
    res = res.decode('utf-8').split('\n')
    r = re.compile('^\s+([^:]+)\s:\s(.+)$')
    res_dict = {}
    for s in res:
        m = r.match(s)
        if m:
            res_dict[m.group(1)] = m.group(2)
    res_contains = [s for s in res if s.startswith('  contains base64')]

    ret_list.append(res_dict['PE'])
    ret_list.append(res_dict['GUI Program'])
    ret_list.append(res_dict['Console Program'])
    ret_list.append(res_dict['DLL'])
    ret_list.append(res_dict['Packed'])
    ret_list.append(res_dict['Anti-Debug'])
    ret_list.append('yes' if 'mutex' in list(res_dict.keys()) else 'no')
    ret_list.append('yes' if res_contains else '')
    if 'AntiDebug' in list(res_dict.keys()):
        res_antidebug = re.sub('[\[\]"]', '',
                               res_dict['AntiDebug']).replace(' ', '|')
        ret_list.append(res_antidebug)
    else:
        ret_list.append('')
    if 'PEiD' in list(res_dict.keys()):
        res_peid = re.sub('[\[\]"]', '', res_dict['PEiD']).replace(' ', '|')
        ret_list.append(res_peid)
    else:
        ret_list.append('')

    # 'TrID'
    os.chdir(cwd + '/trid')
    res = subprocess.check_output(['./trid', filepath])
    res = res.decode('utf-8').split('\n')
    res = [s for s in res if re.match('^\s*[0-9]+\.[0-9]%', s)]
    res_trid = '\\n'.join(res)
    ret_list.append(res_trid)

    os.chdir(cwd)

    # nearest sha256, nearest value
    newfuzzy = ret_list[HEADER.index('impfuzzy')]
    nearest_sha256 = ''
    nearest_value = -1

    result = collection.find()
    if result.count() > 0:
        overwrites = []
        for r in result:
            cmpval = pyimpfuzzy.hash_compare(newfuzzy, r['impfuzzy'])
            if cmpval > nearest_value:
                nearest_sha256 = r['sha256']
                nearest_value = cmpval
            if cmpval > r['nearest value']:
                overwrites.append([{
                    'sha256': r['sha256']
                }, {
                    '$set': {
                        'nearest sha256': s256.hexdigest(),
                        'nearest value': cmpval
                    }
                }])
        for r in overwrites:
            collection.update(r[0], r[1])
    ret_list.append(nearest_sha256)
    ret_list.append(nearest_value)

    # ismalware, VirusTotalLink
    flag = False
    if useVT == 'on':
        vt = VirusTotalPublicApi(api_key)
        res = vt.get_file_report(m5.hexdigest())
        if 'results' in res:
            if 'positives' in res['results']:
                if res['results']['positives'] > 0:
                    ret_list.append('True')
                else:
                    ret_list.append('False')
            else:
                ret_list.append('False')
            if 'permalink' in res['results']:
                ret_list.append(str(res['results']['permalink']))
            else:
                ret_list.append('')
        else:
            print('Server error occured.', file=sys.stderr)
            return 'VirusTotalでエラーが発生しました。', s256.hexdigest()
    else:
        ret_list.append('')
        ret_list.append('')

    # strings
    res = subprocess.check_output(['strings', filepath])
    res = res.decode('utf-8')
    ret_list.append(res)

    # import table
    imports = []
    for entry in pe.DIRECTORY_ENTRY_IMPORT:
        for imp in entry.imports:
            try:
                imports.append(imp.name.decode('utf-8'))
            except:
                pass
    imports_str = '\n'.join(imports)
    ret_list.append(imports_str)

    # export table
    if hasattr(pe, 'DIRECTORY_ENTRY_EXPORT'):
        exports = [
            s.name.decode('utf-8') for s in pe.DIRECTORY_ENTRY_EXPORT.symbols
        ]
        exports_str = '\n'.join(exports)
    else:
        exports_str = ''
    ret_list.append(exports_str)

    # 'Dynamic Base', 'ASLR', 'High Entropy VA', 'Force Integrity',
    # 'Isolation', 'NX', 'SEH', 'CFG', 'RFG', 'SafeSEH', 'GS',
    # 'Authenticode', '.NET'
    if isUbuntu:
        cwd = os.getcwd()
        os.chdir(cwd + '/winchecksec/build')
        res = subprocess.check_output(['./winchecksec', filepath])
        res = res.decode('utf-8').split('\n')
        r = re.compile('([^:]+):\s\"([^"]+)\"')
        res_dict = {}
        for s in res:
            m = r.match(s)
            if m:
                k = m.group(1).strip()
                res_dict[k if k != '.NET' else 'dotNET'] = m.group(2)

        ret_list.append(res_dict['Dynamic Base'])
        ret_list.append(res_dict['ASLR'])
        ret_list.append(res_dict['High Entropy VA'])
        ret_list.append(res_dict['Force Integrity'])
        ret_list.append(res_dict['Isolation'])
        ret_list.append(res_dict['NX'])
        ret_list.append(res_dict['SEH'])
        ret_list.append(res_dict['CFG'])
        ret_list.append(res_dict['RFG'])
        ret_list.append(res_dict['SafeSEH'])
        ret_list.append(res_dict['GS'])
        ret_list.append(res_dict['Authenticode'])
        ret_list.append(res_dict['dotNET'])

        os.chdir(cwd)

    ret_dict = {}
    for i in range(0, len(HEADER)):
        ret_dict[HEADER[i]] = ret_list[i]

    return ret_dict, s256path