Beispiel #1
0
def _analyze_compressed_file(parent, node, path, nesting_level):
    m_type = mime.from_file(path)
    size = os.path.getsize(path)

    m = md5()
    s = sha1()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            m.update(chunk)
            s.update(chunk)

    str_md5 = m.hexdigest()
    str_sha1 = s.hexdigest()

    str_fuzzy = ssdeep.hash_from_file(path)

    node['filename'] = os.path.basename(path)
    node['mime_type'] = m_type
    node['size'] = size
    node['md5'] = str_md5
    node['sha1'] = str_sha1
    node['fuzzy'] = str_fuzzy
    node['nesting_level'] = nesting_level + 1
    str_fuzzy = ssdeep.hash_from_file(path)
    node['fuzzy'] = str_fuzzy
    if parent is None:
        node['parent_hash'] = None
    else:
        node['parent_hash'] = parent.get('sha1')

    node['compressed_children'] = []

    # If this is a compressed file, analyze it recursively. This means we need to create a new directory, uncompress
    # files there and calculate hashes. Then, delete the extracted files when done.
    # zip, x-tar, x-7z-compressed, x-rar, vnd.ms-cab-compressed, gzip, x-bzip2, x-7z-compressed
    tmpdir = tempfile.mkdtemp()
    try:
        # Brute force approach: we don't even check the mime file. We try to unpack evey archive.
        # Extract all the files
        patoolib.extract_archive(path, outdir=tmpdir)

        # Analyze each file
        files = [
            os.path.join(tmpdir, f) for f in os.listdir(tmpdir)
            if os.path.isfile(os.path.join(tmpdir, f))
        ]
        for f in files:
            child = dict()
            _analyze_compressed_file(parent=node,
                                     node=child,
                                     path=f,
                                     nesting_level=nesting_level + 1)
            node['compressed_children'].append(child)
    except:
        pass
    finally:
        # Remove the temporary file directory
        shutil.rmtree(tmpdir)
def main():

    parser = argparse.ArgumentParser()

    parser.add_argument("originalFile", help="File to antifuzz")

    parser.add_argument("--newFile", help="Name of the antifuzzed file")

    parser.add_argument(
        "-m",
        action='store_true',
        default=False,
        help=
        "Change the metadata of the file instead, will still change the ssdeep hash"
    )

    args = parser.parse_args()

    pattern = re.compile('mp3$')

    if args.newFile is None:

        args.newFile = args.originalFile

    if not args.originalFile.endswith('.mp3'):

        print "Please use a file with the .mp3 extension for your original file"

        return 1

    if not args.newFile.endswith('.mp3'):

        print "Please use a file with the .mp3 extension for your newfile"

        return 1

    # Take in file
    ogFile = args.originalFile

    # Make copy of file
    nFile = args.newFile

    # Hash original file
    ogHash = ssdeep.hash_from_file(ogFile)

    # Make changes to given file
    mp3(ogFile, nFile, args)

    # Hash new file
    newHash = ssdeep.hash_from_file(nFile)

    # Compare the hashes
    diff = str(ssdeep.compare(ogHash, newHash))

    print("The files are " + diff + "% similar")

    return 0
Beispiel #3
0
    def test_hash_from_file(self):
        with pytest.raises(IOError):
            ssdeep.hash_from_file("tests/files/")

        with pytest.raises(IOError):
            ssdeep.hash_from_file("tests/files/file-does-not-exist.txt")

        res = ssdeep.hash_from_file("tests/files/file.txt")
        assert res == "3:AXGBicFlgVNhBGcL6wCrFQE3:AXGHsNhxLsr2s"
Beispiel #4
0
    def _on_diff_type_diff(self, sender, ctx):
        if sender.a_path is None or sender.new_file:
            operation = 'A'  # Added
        elif sender.b_path is None or sender.deleted_file:
            operation = 'D'  # Deleted
        elif not sender.diff:
            operation = 'R'  # Renamed
        else:
            operation = 'M'  # Modified

        data = {
            'operation': operation,
            # Relative paths to the repository
            'a_rel_path': sender.a_path,
            'b_rel_path': sender.b_path,
        }

        if sender.a_path is not None and operation != 'A':
            if ctx['a_path'].is_file():
                a_fs_path = ctx['a_path']
            else:
                a_fs_path = ctx['a_path'] / sender.a_path

            data['a_ref'] = os.fspath(sender.a_path)
            data['a_md5'] = utils.md5(a_fs_path)
            data['a_mime'] = magic.from_file(os.fspath(a_fs_path), mime=True)
            data['a_ssdeep'] = ssdeep.hash_from_file(os.fspath(a_fs_path))
            data['a_size'] = a_fs_path.stat().st_size
        else:
            data['a_size'] = 0

        if sender.b_path is not None and operation != 'D':
            if ctx['b_path'].is_file():
                b_fs_path = ctx['b_path']
            else:
                b_fs_path = ctx['b_path'] / sender.b_path

            # FIXME: parent $  ref when unpacking data['b_ref'] = utils.construct_path(sender.b_path, parent=ctx.get('b_ref'))
            data['b_ref'] = os.fspath(b_fs_path)
            data['b_md5'] = utils.md5(b_fs_path)
            data['b_mime'] = magic.from_file(os.fspath(b_fs_path), mime=True)
            data['b_ssdeep'] = ssdeep.hash_from_file(os.fspath(b_fs_path))
            data['b_size'] = b_fs_path.stat().st_size
        else:
            data['b_size'] = 0

        if data.get('a_ssdeep') and data.get('b_ssdeep'):
            data['diff'] = sender.diff.decode()
            data['similarity'] = ssdeep.compare(data['a_ssdeep'], data['b_ssdeep'])
        else:
            data['similarity'] = 0.0

        self.diffs.append(data)
Beispiel #5
0
def main(known_file, comparison, output_type):
    """
    The main function handles the main operations of the script
    :param known_file: path to known file
    :param comparison: path to look for similar files
    :param output_type: type of output to provide
    :return: None
    """

    # Check output formats
    if output_type not in OUTPUT_OPTS:
        logger.error("Unsupported output format '{}' selected. Please "
                     "use one of {}".format(output_type,
                                            ", ".join(OUTPUT_OPTS)))
        sys.exit(2)
    elif output_type == 'csv':
        # Special handling for CSV headers
        print('"similarity","known_file","known_hash",'
              '"comp_file","comp_hash"')

    # Check provided file paths
    known_file = os.path.abspath(known_file)
    comparison = os.path.abspath(comparison)

    # Generate ssdeep signature for known file
    if not os.path.exists(known_file):
        logger.error("Error - path {} not found".format(comparison))
        sys.exit(1)

    known_hash = ssdeep.hash_from_file(known_file)

    # Generate and test ssdeep signature for comparison file(s)
    if os.path.isdir(comparison):
        # Process files in folders
        for root, _, files in os.walk(comparison):
            for f in files:
                file_entry = os.path.join(root, f)
                comp_hash = ssdeep.hash_from_file(file_entry)
                comp_val = ssdeep.compare(known_hash, comp_hash)
                output(known_file, known_hash, file_entry, comp_hash, comp_val,
                       output_type)

    elif os.path.isfile(comparison):
        # Process a single file
        comp_hash = ssdeep.hash_from_file(comparison)
        comp_val = ssdeep.compare(known_hash, comp_hash)
        output(known_file, known_hash, file_entry, comp_hash, comp_val,
               output_type)
    else:
        logger.error("Error - path {} not found".format(comparison))
        sys.exit(1)
Beispiel #6
0
def comparetrees(dir1, dir2, diffs):
    """
    Compare all subdirectories and files in two directory trees
    Same files have a matching score of 100
    Symlinks have a matching score of 100
    Different files have a matching score calculated using ssdeep (0 to 100)
    """
    names1 = os.listdir(dir1)
    names2 = os.listdir(dir2)
    comparedirs(dir1, dir2, diffs, names1, names2)
    common = intersect(names1, names2)
    missed = common[:]

    # compare contents of files in common
    for name in common:
        path1 = os.path.join(dir1, name)
        path2 = os.path.join(dir2, name)
        if os.path.isfile(path1) and os.path.isfile(path2):
            missed.remove(name)
            file1 = open(path1, 'rb')
            file2 = open(path2, 'rb')
            while True:
                bytes1 = file1.read(blocksize)
                bytes2 = file2.read(blocksize)
                if (not bytes1) and (not bytes2):  # same file
                    print '  100 matches', '/'.join(path1.split('/')[1:])
                    diffs.append(100)
                    break
                if bytes1 != bytes2:  # different content
                    score = ssdeep.compare(ssdeep.hash_from_file(path1),
                                           ssdeep.hash_from_file(path2))
                    print str(score).rjust(5), 'differs', '/'.join(
                        path1.split('/')[1:])
                    diffs.append(score)
                    break

    # recur to compare directories in common
    for name in common:
        path1 = os.path.join(dir1, name)
        path2 = os.path.join(dir2, name)
        if os.path.isdir(path1) and os.path.isdir(path2):
            missed.remove(name)
            comparetrees(path1, path2, diffs)

    # same name but not both files or dirs (symlinks)
    for name in missed:
        diffs.append(100)
        print('    - ignored ' + name + ' (symlink)')
def ssdeep_hash(filename):
    try:

        return ssdeep.hash_from_file(filename)

    except Exception, e:
        logger.error("Error while computing ssdeep hash of file {} - {}".format(filename, e.message), exc_info=True)
Beispiel #8
0
def upload(request):
    if request.method == "GET":
        upload_form = UploadForm()

    elif request.method == "POST":
        up_file = request.FILES['upload_file']
        up_file_md5 = get_hash_str(up_file)
        UploadFile_obj = UploadFile(id=up_file_md5, upload_file=up_file)
        UploadFile_obj.save()
        up_file_url = os.path.join(settings.MEDIA_ROOT,
                                   UploadFile_obj.upload_file.name)
        #ops_file_url = make_ops(up_file_url)
        up_file_ssdeep = ssdeep.hash_from_file(up_file_url)
        sys.stderr.write(up_file_ssdeep)
        UploadFileMeta_obj = UploadFileMeta(id=up_file_md5,
                                            ssdeep=up_file_ssdeep)
        UploadFileMeta_obj.save()

        response = {'status': 200, 'pk': up_file_md5}
        return HttpResponse(json.dumps(response),
                            content_type='application/json')

    ctx = {
        'upload_form': upload_form,
    }

    return render(request, 'upload.html', ctx)
def get_hash(pe, filename):
    # Import Hash
    ih = pe.get_imphash()
    fh = open(filename, 'rb')
    m = hashlib.md5()
    s = hashlib.sha1()
    s2 = hashlib.sha256()
    s5 = hashlib.sha512()

    while True:
        data = fh.read(8192)
        if not data:
            break

        m.update(data)
        s.update(data)
        s2.update(data)
        s5.update(data)

    md5 = m.hexdigest()
    sha1 = s.hexdigest()
    sha2 = s2.hexdigest()
    sha5 = s5.hexdigest()

    hashdeep = ssdeep.hash_from_file(filename)
    return md5, sha1, ih, hashdeep, sha2, sha5
Beispiel #10
0
def hash_calc(malware_path):
    print(malware_path)
    pe = pefile.PE(malware_path)
    imp_hash = pe.get_imphash()
    ssdeep_hash = ssdeep.hash_from_file(malware_path)
    sha = sha1(open(malware_path, 'rb').read()).hexdigest()
    return imp_hash, ssdeep_hash, sha
Beispiel #11
0
def malwaresignature(input_malware):
    malware_file = input_malware
    malware = os.path.basename(malware_file)
    with open(malware_file, 'rb') as f:
        header = f.read(32)
        for call in filetypes:
            if call in header:
                print("Loading...")
                ts = os.path.getctime(malware_file)
                dt = datetime.fromtimestamp(ts, timezone.utc)
                ISO8601 = dt.astimezone().isoformat()
                record = pefile.PE(malware_file)
                fuzzyhash = ssdeep.hash_from_file(malware_file)
                importeddlls = []
                for access in record.DIRECTORY_ENTRY_IMPORT:
                    dlls = access.dll.decode('utf-8')
                    importeddlls.append(dlls)
                arch = record.FILE_HEADER.Machine
                hashmethod = hashlib.sha256()
                with open(malware_file, 'rb') as malwarefile:
                    reader = malwarefile.read()
                    hashmethod.update(reader)
                    hashvalue = hashmethod.hexdigest()
                imphash = record.get_imphash()
                warnings = record.get_warnings()
                nameandSHA = {"Name of sample: ": malware,"Hash Value: " :hashvalue}
                with open ('nameandsha.json', 'w') as k:
                    data = json.dumps(nameandSHA)
                    k.write(data)
                ent = entropy(input_malware)
                print("Done")

                return malware_file,ISO8601, hashvalue, arch, importeddlls, imphash, fuzzyhash,warnings, ent
Beispiel #12
0
def hashFileCreator():
    screenshot = Image.open('/root/Desktop/vnchash/ubuntu/ubuntu200.17.220.25%3A02.jpg')
    screenshot = screenshot.resize((100, 100))
    screenshot = screenshot.convert('P', palette=Image.ADAPTIVE, colors=10)
    screenshot.save('/root/Desktop/vnchash/ubuntu/compressed.gif')
    hash = ssdeep.hash_from_file('/root/Desktop/vnchash/ubuntu/compressed.gif')
    print hash
Beispiel #13
0
def scan(filelist):
    results = []
    for fname in filelist:
        goodtogo = False
        i = 0
        # Ran into a weird issue with file locking, this fixes it
        while not goodtogo and i < 5:
            try:
                ssdeep_hash = ssdeep.hash_from_file(fname)
                chunksize, chunk, double_chunk = ssdeep_hash.split(':')
                chunksize = int(chunksize)
                doc = {
                    'ssdeep_hash': ssdeep_hash,
                    'chunksize': chunksize,
                    'chunk': chunk,
                    'double_chunk': double_chunk,
                    'analyzed': 'false',
                    'matches': {},
                }

                results.append((fname, doc))
                goodtogo = True
            except Exception as e:
                print('ssdeeper:', e)
                time.sleep(3)
                i += 1

    metadata = {}
    metadata["Name"] = NAME
    metadata["Type"] = TYPE
    metadata["Include"] = False
    return (results, metadata)
Beispiel #14
0
    def gen_ssdeep_hash(self, filepath, exclude=False):
        files = os.listdir(filepath)
        for file in files:
            if not os.path.isfile(filepath + file):
                print "[+] WARNING: %s is not a file and will not analysis it. " % (
                    filepath + file)
                continue
            tmp_ssdeep_hash = ''
            if self.cluster_type == 'strings_ssdeep':
                data = os.popen('strings %s' % (filepath + file)).read()

                tmp_ssdeep_hash = ssdeep.hash(data)
            elif self.cluster_type == 'file_ssdeep':
                tmp_ssdeep_hash = ssdeep.hash_from_file(filepath + file)

            elif self.cluster_type == 'imp_exp_ssdeep':
                imp_exp_str = imp_exp_functions(filepath + file)
                if imp_exp_str:
                    tmp_ssdeep_hash = ssdeep.hash(imp_exp_str)

            if tmp_ssdeep_hash:
                dst_file = self.tmpdir + str(self.count)
                f = open(dst_file, 'w')
                f.write(tmp_ssdeep_hash)
                f.close()
                self.count += 1
                if tmp_ssdeep_hash not in self.ssdeep_stats.keys():
                    self.ssdeep_stats[tmp_ssdeep_hash] = []
                tmp_file_ssdeep = {}
                tmp_file_ssdeep['file_path'] = filepath + file
                tmp_file_ssdeep['file_md5'] = file_md5(filepath + file)
                tmp_file_ssdeep['cluster_type'] = self.cluster_type
                tmp_file_ssdeep['exclude'] = 1 if exclude else 0
                self.ssdeep_stats[tmp_ssdeep_hash].append(tmp_file_ssdeep)
def getmalwaresignature(input_malware):

    malwares_files = os.listdir(input_malware)
    for malware in malwares_files:
        malware_file = os.path.join(input_malware, malware)
        with open(malware_file, 'rb') as f:
            header = f.read(32)
            for call in filetypes:
                if call in header:
                    record = pefile.PE(malware_file)
                    fuzzyhash = ssdeep.hash_from_file(malware_file)
                    nameandfuzzy = {malware: fuzzyhash}
                    print("Fuzzy hash of file: " + malware, fuzzyhash)
                    with open('fuzzyhashlist.csv', 'a') as e:
                        for key in nameandfuzzy.keys():
                            e.write("%s,%s\n" % (key, nameandfuzzy[key]))

                    access = None
                    importeddlls = []
                    for access in record.DIRECTORY_ENTRY_IMPORT:
                        dlls = access.dll.decode('utf-8')
                        print(dlls + "\n")
                        importeddlls.append(dlls)
                    if hex(record.FILE_HEADER.Machine) == '0x14c':
                        print("This is a 32-bit binary")
                    else:
                        print("This is a 64-bit binary")
                    timestamp = (record.FILE_HEADER.dump_dict()['TimeDateStamp']['Value'].split('[')[1][:-1])
                    print("Timestamp " + timestamp)
                    for optional in record.OPTIONAL_HEADER.DATA_DIRECTORY:
                        print(optional.name, str(optional.Size), str(optional.VirtualAddress) + '\n')

                    hashmethod = hashlib.sha256()
                    with open(malware_file, 'rb') as malwarefile:
                        reader = malwarefile.read()
                        hashmethod.update(reader)
                        print("The file name is: ", malware + " The sha256 hash of the file is:", hashmethod.hexdigest())
                    hashes = {"Name of malware: ": malware, "Imphash:": record.get_imphash(), "SHA256:": hashmethod.hexdigest()}
                    warnings = record.get_warnings()
                    warnstring = {"Name of file :": malware, " Warning :": (str(warnings).strip('[]'))}
                    basicfiledata = None
                    if access:
                        basicfiledata = {"Name of Malware:": malware, "Imported DLLs": importeddlls, "Arch": record.FILE_HEADER.Machine, "Timestamp": timestamp}
                        print(basicfiledata)
                    cprint(figlet_format(
                        "Check the CSV and Json files.",
                        font='digital'), color='green')

                    with open('hashes.json', 'a') as f:
                        hashdata = json.dumps(hashes)
                        f.write(hashdata + "\n")

                    with open('warnings.json', 'a') as f:
                        warndata = json.dumps(warnstring)
                        f.write(warndata + "\n")

                    with open('info.json', 'a') as outfile:
                        if basicfiledata:
                            jdata = json.dumps(basicfiledata)
                            outfile.write(jdata + "\n")
Beispiel #16
0
def get_ssdeep(file_path):
    """

    :param file_path: absolute path of desired file
    :return: ssdeep
    """
    return ssdeep.hash_from_file(file_path)
def hash(fileSystemPosition):
    try:
        ssdeepValue = ssdeep.hash_from_file(fileSystemPosition)
        return ssdeepValue
    except Exception as e:
        print str(e.message)
        ssdeepValue = "(None)"
        return ssdeepValue
Beispiel #18
0
def comparetrees(dir1, dir2, diffs):
    """
    Compare all subdirectories and files in two directory trees
    Same files have a matching score of 100
    Symlinks have a matching score of 100
    Different files have a matching score calculated using ssdeep (0 to 100)
    """
    names1 = os.listdir(dir1)
    names2 = os.listdir(dir2)    
    comparedirs(dir1, dir2, diffs, names1, names2)
    common = intersect(names1, names2)
    missed = common[:]

    # compare contents of files in common
    for name in common:
        path1 = os.path.join(dir1, name)
        path2 = os.path.join(dir2, name)
        if os.path.isfile(path1) and os.path.isfile(path2):
            missed.remove(name)
            file1 = open(path1, 'rb')
            file2 = open(path2, 'rb')
            while True:
                bytes1 = file1.read(blocksize)
                bytes2 = file2.read(blocksize)
                if (not bytes1) and (not bytes2):   # same file
                    print '  100 matches','/'.join(path1.split('/')[1:])
                    diffs.append(100)
                    break
                if bytes1 != bytes2:    # different content
                    score = ssdeep.compare(ssdeep.hash_from_file(path1),ssdeep.hash_from_file(path2))
                    print str(score).rjust(5),'differs','/'.join(path1.split('/')[1:])
                    diffs.append(score)
                    break

    # recur to compare directories in common
    for name in common:
        path1 = os.path.join(dir1, name)
        path2 = os.path.join(dir2, name)
        if os.path.isdir(path1) and os.path.isdir(path2):
            missed.remove(name)
            comparetrees(path1, path2, diffs)

    # same name but not both files or dirs (symlinks)
    for name in missed:
        diffs.append(100)
        print('    - ignored '+name+' (symlink)')
Beispiel #19
0
def get_info(filepath):
    result = {}
    result['size'] = get_filesize(filepath)
    result['md5'] = hashlib.md5(open(filepath, 'rb').read()).hexdigest()
    result['sha1'] = hashlib.sha1(open(filepath, 'rb').read()).hexdigest()
    result['ssdeep'] = ssdeep.hash_from_file(filepath)
    result['type'] = (getoutput('file %s' % filepath).split('%s: ' % filepath)[1])
    return result
Beispiel #20
0
def hash(fileSystemPosition):
    try:
        ssdeepValue = ssdeep.hash_from_file(fileSystemPosition)
        return ssdeepValue
    except Exception as e:
        print str(e.message)
        ssdeepValue = "(None)"
        return ssdeepValue
def directoryController(known, comparison):
    """
    The directoryController function processes a directory and hands each file to the fileController
    :param known: str path to the known file
    :param comparison: str path to the comparison directory
    :return: list of dictionaries containing comparison results
    """

    logging.info('Processing Directory')

    known_hash = ssdeep.hash_from_file(known)

    # Prepare progressbar
    files_to_process = list()
    for root, directories, files in os.walk(comparison):
        for file_entry in files:
            file_entry_path = os.path.abspath(os.path.join(root, file_entry))
            files_to_process.append(file_entry_path)

    pb_widgets = [
        progressbar.Bar(), ' ',
        progressbar.SimpleProgress(), ' ',
        progressbar.ETA()
    ]
    pbar = progressbar.ProgressBar(widgets=pb_widgets,
                                   maxval=len(files_to_process))

    pbar.start()
    compared_hashes = []
    for count, file_path in enumerate(files_to_process):
        try:
            comparison_hash = ssdeep.hash_from_file(file_path)
        except IOError as e:
            logging.error('Could not open ' + file_path + ' | ' + str(e))
            pbar.update(count)
            continue

        hash_comparison = ssdeep.compare(known_hash, comparison_hash)
        compared_hashes.append({
            'file_path': file_path,
            'similarity': hash_comparison
        })
        pbar.update(count)

    pbar.finish()
    return compared_hashes
Beispiel #22
0
def hashgen(path, fileName):
    screenshot = Image.open(path + fileName)
    screenshot = screenshot.resize((10, 10))
    screenshot = screenshot.convert('P', palette=Image.ADAPTIVE, colors=3)
    screenshot.save(path + 'compressed.gif')
    hash = ssdeep.hash_from_file(path + 'compressed.gif')
    os.remove(path + 'compressed.gif')
    return hash
 def compute_fuzzy_hash(self, file):
     file_size_in_bytes = os.path.getsize(file.absolute_path)
     file_size_in_kb = file_size_in_bytes / 1024
     if file_size_in_kb < 4:
         file.delete_fuzzy_hash_entries(file)
     elif file_size_in_kb >= 4:
         file.add_fuzzy_hash(file,
                             ssdeep.hash_from_file(file.absolute_path))
Beispiel #24
0
 def display_hashes(self, filepath, data, pe):
     """Display md5, sha1 and sh256 of the data given"""
     for algo in ["md5", "sha1", "sha256"]:
         m = getattr(hashlib, algo)()
         m.update(data)
         print("%-14s %s" % (algo.upper()+":", m.hexdigest()))
     print("%-14s %s" % ("IMPHASH:", pe.get_imphash()))
     print("%-14s %s" %("SSDEEP:", ssdeep.hash_from_file(filepath)))
Beispiel #25
0
    def fileinfo(self, path: str) -> Dict:
        path = safe_str(path)
        data = get_digests_for_file(path, on_first_block=self.ident)
        data["ssdeep"] = ssdeep.hash_from_file(path)

        # Check if file empty
        if not int(data.get("size", -1)):
            data["type"] = "empty"

        # Futher identify zip files based of their content
        elif data["type"] in [
                "archive/zip", "java/jar", "document/office/unknown"
        ]:
            data["type"] = zip_ident(path, data["type"])

        # Further check CaRT files, they may have an explicit type set
        elif data["type"] == "archive/cart":
            data["type"] = cart_ident(path)

        # Further identify dos executables has this may be a PE that has been misidentified
        elif data["type"] == "executable/windows/dos":
            data["type"] = dos_ident(path)

        # If we're so far failed to identified the file, lets run the yara rules
        elif "unknown" in data["type"] or data["type"] == "text/plain":
            data["type"] = self.yara_ident(path, data, fallback=data["type"])

        # Extra checks for office documents
        #  - Check for encryption
        if data["type"] in [
                "document/office/word",
                "document/office/excel",
                "document/office/powerpoint",
                "document/office/unknown",
        ]:
            try:
                msoffcrypto_obj = msoffcrypto.OfficeFile(open(path, "rb"))
                if msoffcrypto_obj and msoffcrypto_obj.is_encrypted():
                    data["type"] = "document/office/passwordprotected"
            except Exception:
                # If msoffcrypto can't handle the file to confirm that it is/isn't password protected,
                # then it's not meant to be. Moving on!
                pass

        # Extra checks for PDF documents
        #  - Check for encryption
        #  - Check for PDF collection (portfolio)
        if data["type"] == "document/pdf":
            # Password protected documents typically contain '/Encrypt'
            pdf_content = open(path, "rb").read()
            if re.search(b"/Encrypt", pdf_content):
                data["type"] = "document/pdf/passwordprotected"
            # Portfolios typically contain '/Type/Catalog/Collection
            elif re.search(b"/Type/Catalog/Collection", pdf_content):
                data["type"] = "document/pdf/portfolio"

        return data
def fileController(known, comparison):
    """
    The fileController function fuzzy hashes and compares a file
    :param known: path to known file to use for comparison
    :param comparison: list of hashes from the comparison file
    :return: dictionary of file_path and similarity for output
    """

    logging.info('Processing File')

    known_hash = ssdeep.hash_from_file(known)
    comparison_hash = ssdeep.hash_from_file(comparison)
    hash_comparison = ssdeep.compare(known_hash, comparison_hash)

    return {
        'file_path': os.path.abspath(comparison),
        'similarity': hash_comparison
    }
def getHashes(filename):
	with open(filename, "r") as file:
		print "File Name:\t", filename
		print "MD5:\t\t", hashlib.md5(file.read()).hexdigest()
		print "SHA1:\t\t", hashlib.sha1(file.read()).hexdigest()
		print "SHA256:\t\t", hashlib.sha256(file.read()).hexdigest()
		print "SHA512:\t\t", hashlib.sha512(file.read()).hexdigest()
		print "SSDeep:\t\t", str(ssdeep.hash_from_file(filename))
		print "File Size:\t", os.path.getsize(filename), "bytes"
def add_db_record(cursor, filename, quiet):
    """
    docstring
    """
    skip_hash = False

    absolute = os.path.abspath(filename)
    try:
        tempstat = os.stat(absolute)
        perms = oct(tempstat.st_mode)
        owner = tempstat.st_uid
        group = tempstat.st_gid
        size = tempstat.st_size
    except OSError as err:
        print "[-] Couldn't open %s: %s" % (absolute, err)
        return False

    # Skip hashing if the file is a FIFO, because the script will
    # just hang forever trying to read data to calculate a hash.
    if stat.S_ISFIFO(os.stat(absolute).st_mode):
        skip_hash = True
        fuzzy_hash = "FIFO"
        md5digest = "FIFO"
        sha1digest = "FIFO"

    # Determine file type with libmagic
    filetype = magic.detect_from_filename(absolute).name

    if quiet is False:
        print "[+] Adding %s -- %s" % (filename, filetype)

    if skip_hash is False:
        # Calculate ssdeep hash
        try:
            fuzzy_hash = ssdeep.hash_from_file(absolute)
        except IOError:
            fuzzy_hash = "PERMISSION DENIED"
        except UnicodeDecodeError:
            fuzzy_hash = "UNICODE DECODE ERROR"

        # Calculate MD5 hash
        md5hash = hashlib.md5()
        md5hash.update(open(absolute).read())
        md5digest = md5hash.hexdigest()

        # Calculate SHA1 hash
        sha1hash = hashlib.sha1()
        sha1hash.update(open(absolute).read())
        sha1digest = sha1hash.hexdigest()

    cursor.execute(
        "INSERT INTO hashes VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, DATETIME())",
        (HOSTNAME, absolute, size, perms, owner, group, fuzzy_hash, md5digest,
         sha1digest, filetype))

    return True
Beispiel #29
0
 def get_ssdeep(self):
     if args.ssdeep and 'ssdeep' in sys.modules:
         try:
             result = {"ssdeep": ssdeep.hash_from_file(self.file)}
             return result
         except IOError as e:
             self.errors.append(f"SSDeepIOError[{e.strerror}]")
         except ssdeep.InternalError as e:
             self.errors.append(f"SSDeepError[{e}]")
     return {}
Beispiel #30
0
	def bulk_ssdeep(lst = []):

		for path in lst:
			try:
			
				fileSsdeep = ssdeep.hash_from_file(path)

			except IOError as ioe:
				print "Error:\t" + str(ioe)

			ssdeepList.append(fileSsdeep)
Beispiel #31
0
def file_info(filename):
    info = []
    with open(filename, 'rb') as f:
        file = f.read()
        info.append("File: {}".format(filename))
        info.append("Size: {} bytes".format(os.path.getsize(filename)))
        info.append("Type: {}".format(magic.from_file(filename, mime=True)))
        info.append("MD5:  {}".format(hashlib.md5(file).hexdigest()))
        info.append("SHA1: {}".format(hashlib.sha1(file).hexdigest()))
        if ssdeep_r:
            info.append("ssdeep: {}".format(ssdeep.hash_from_file(filename)))
    return info
Beispiel #32
0
	def file(inFile):

		with open(inFile, 'rb') as afile: #read file as binary via 'rb' to prevent corruption
			buffer = afile.read(BLOCKSIZE)
			while len(buffer) > 0:
				h.update(buffer)
				buffer = afile.read(BLOCKSIZE)

		print os.path.abspath(inFile) + '\t' + '(' + hash_name + ')\t' + h.hexdigest()

		if args.s == 'true':
			print os.path.abspath(inFile) + '\t' + '(ssdeep)\t' + ssdeep.hash_from_file(args.f)
Beispiel #33
0
 def run(self, args, data):
     """Display md5, sha1 and sh256 of the data given"""
     for algo in ["md5", "sha1", "sha256"]:
         m = getattr(hashlib, algo)()
         m.update(data)
         print("%-14s %s" % (algo.upper() + ":", m.hexdigest()))
     print("%-14s %s" % ("IMPHASH:", "(unavailable)"))
     print("%-14s %s" % ("SSDEEP:", ssdeep.hash_from_file(args.PEFILE)))
     print("Size:          %d bytes" % len(data))
     print("Type:          %s" % magic.from_buffer(data))
     print("Compile Time:  %s" % "(unavailable)")
     print("Observered Path: %s" % os.path.abspath(args.PEFILE))
     print("Observered Filename: %s" % ntpath.basename(args.PEFILE))
Beispiel #34
0
 def mountCrawler(self, fileName):
     if (fileName != ""):
         try:
             # Opens the file for reading
             self.logger.info("Hashing {}...".format(fileName))
             self.settings.setHashListComp("{},{}".format(ssdeep.hash_from_file(fileName), fileName))
         except Exception as err:
             # Loggs if any error happened during file operations
             self.logger.error("Reading the file failed with error: {}".format(err))
             pass
     else:
         self.logger.error("Reading Worker received an empty filename")
         return
Beispiel #35
0
    def get_page_data(response):
        page = WebPage()
        page['uri'] = response.url
        page['status_code'] = response.status

        if 'screenshot' in response.meta:
            page['screenshot'] = response.meta['screenshot']
            page['ssdeep_pagesource'] = str(ssdeep.hash(response.body))

            try:
                screenshot_hash = ssdeep.hash_from_file(response.meta['screenshot'])
                page['ssdeep_screenshot'] = screenshot_hash
            except:
                log.msg("Could not create hash from screenshot: " + response.meta['screenshot'], level=log.DEBUG)

        return page
Beispiel #36
0
def calculatehashes(directory, oldhashes={}):
    ourhashes = {}

    # get list of all files in the directory
    dirlist = os.listdir(directory)

    # iterate through each file
    for f in dirlist:
        # skip files already in hash DB
        if f in oldhashes:
            # use previously-calculated hash
            ourhashes[f] = oldhashes[f]
        else:
            # calculate hash and store
            ourhashes[f] = ssdeep.hash_from_file(os.path.join(directory, f))

    return ourhashes
Beispiel #37
0
    def get_page_info(self):
        page = WebPage()
        page['uri'] = self.response.url
        page['status_code'] = self.response.status
        page['useragent'] = self.response.meta.get('User-Agent')
        page['referer'] = self.response.meta.get('Referer')

        if 'screenshot' in self.response.meta:
            page['screenshot'] = self.response.meta['screenshot']
            page['ssdeep_pagesource'] = str(ssdeep.hash(self.response.body))

            try:
                screenshot_hash = ssdeep.hash_from_file(self.response.meta['screenshot'])
                page['ssdeep_screenshot'] = screenshot_hash
            except:
                log.msg("Could not create hash from screenshot: " + self.response.meta['screenshot'], level=log.DEBUG)
        return page
Beispiel #38
0
 def get_detailes(self, data, _path):
     '''
     get general details of file
     '''
     data["Details"] = deepcopy(self.datastruct)
     temp_f = open(_path, "rb").read()
     open(_path, "rb").read(4)
     data["Details"]["Properties"] = {"Name": path.basename(_path).lower(),
                                      "md5": md5(temp_f).hexdigest(),
                                      "sha1": sha1(temp_f).hexdigest(),
                                      "sha256": sha256(temp_f).hexdigest(),
                                      "ssdeep": hash_from_file(_path),
                                      "size": convert_size(path.getsize(_path)),
                                      "bytes": path.getsize(_path),
                                      "mime": from_file(_path, mime=True),
                                      "extension": guess_type(_path)[0],
                                      "Entropy": get_entropy(temp_f)}
Beispiel #39
0
def calculatehashes(directory, oldhashes={}):
    ourhashes = {}

    # get list of all files in the directory
    dirlist=os.listdir(directory)

    # iterate through each file
    for f in dirlist:
        # skip files already in hash DB
        if f in oldhashes:
            # use previously-calculated hash
            ourhashes[f] = oldhashes[f]
        else:
            # calculate hash and store
            ourhashes[f] = ssdeep.hash_from_file(os.path.join(directory,'/',f))

    return ourhashes
Beispiel #40
0
def ProcessFile(path):
    if not(os.path.isfile(path)):
        print '{0} not a file!'.format(path)
        return 2

    output = ""
    try:
        if ssdeep_python:
            hash = ssdeep.hash_from_file(path)
            print hash
        else:
            p = Popen(["ssdeep", "-b", path], stdout=PIPE, stderr=PIPE)
            output, err = p.communicate()
            rc = p.returncode
            print ParseOutput(output)
    except Exception as ex:
        return 1
        
    return 0
Beispiel #41
0
def scan(filelist):
    results = []
    for fname in filelist:
        goodtogo = False
        i = 0
        #Ran into a weird issue with file locking, this fixes it
        while not goodtogo and i < 5:
            try:
                results.append((fname, ssdeep.hash_from_file(fname)))
                goodtogo = True
            except Exception as e:
                print('ssdeeper:', e)
                time.sleep(3)
                i += 1
                
    metadata = {}
    metadata["Name"] = NAME
    metadata["Type"] = TYPE
    metadata["Include"] = False
    return (results, metadata)
Beispiel #42
0
        sys.exit(1)
    else:
        return (dir1, dir2)
                
if __name__ == '__main__':
    dir1, dir2 = getargs()
    diffs = []
    totalscore = 0

    # command line arguments are both dirs
    if os.path.isdir(dir1) & os.path.isdir(dir2):
        print '\nSCORE RESULT  PATH'
        comparetrees(dir1, dir2, diffs)
        if not diffs:
            print('No diffs found\n')
        else:
            for score in diffs:
                totalscore += score
            print '\nTotal files compared:',len(diffs)
            print 'Overall match score: ',str(totalscore/len(diffs))+'%\n'
    else:
        try:
            # command line arguments are both files
            score = ssdeep.compare(ssdeep.hash_from_file(dir1),ssdeep.hash_from_file(dir2))
            print 'Overall match score: ',str(score)+'%\n'

        except:
            print 'Invalid Files/Folders: Aborting...'
            sys.exit(1)

Beispiel #43
0
 def testComputeHashFromFile(self):
     self.assertEqual(
         ssdeep.hash_from_file("test-file.txt"),
         "3:AXGBicFlgVNhBGcL6wCrFQE3:AXGHsNhxLsr2s"
     )
 def get_ssdeep(self):
     try:
         return ssdeep.hash_from_file(self.filepath)
     except Exception as e:
         self.logger.exception('%s: %s' % (Exception, e))
Beispiel #45
0
screenshotPath = '/root/Desktop/vnchash/arena/'

for file in os.listdir(screenshotPath):
    screenshot = Image.open(screenshotPath + file)
    screenshot = screenshot.resize((10, 10))
    screenshot = screenshot.convert('P', palette=Image.ADAPTIVE, colors=20)

    hash = imagehash.dhash(screenshot)


    print str(hash) + '            ' + file

def hashFileCreator():
    screenshot = Image.open('/root/Desktop/vnchash/ubuntu/ubuntu200.17.220.25%3A02.jpg')
    screenshot = screenshot.resize((100, 100))
    screenshot = screenshot.convert('P', palette=Image.ADAPTIVE, colors=10)
    screenshot.save('/root/Desktop/vnchash/ubuntu/compressed.gif')
    hash = ssdeep.hash_from_file('/root/Desktop/vnchash/ubuntu/compressed.gif')
    print hash

def compareHashes()
    hashone = ssdeep.hash_from_file('/root/Desktop/vnchash/win7/win7hash.jpg')
    hashtwo = ssdeep.hash_from_file('/root/Desktop/vnchash/win7/win7hash.jpg-temp.jpg')

    print hashone
    print hashtwo

    print ssdeep.compare(hashone, hashtwo)

compareHashes()