Python SearchSubfile.SearchSubfileの例、hachoir_subfile.search.SearchSubfile.SearchSubfile Pythonの例

コード例 #1

0

ファイルを表示

    def subfile(self, filePath):
        # hachoir-subfile is a tool based on hachoir-parser to find subfiles in any binary stream.
        # Website: http://bitbucket.org/haypo/hachoir/wiki/hachoir-subfile
        # bypass sys.stdout, sys.stderr
        oldStdOut = sys.stdout
        oldStdErr = sys.stderr
        outputStdErr = StringIO.StringIO()
        outputStdOut = StringIO.StringIO()
        sys.stdout = outputStdOut
        sys.stderr = outputStdErr

        stream = FileInputStream(unicodeFilename(filePath),
                                 real_filename=filePath)

        # Search for subfiles
        subfile = SearchSubfile(stream, 0, None)
        subfile.loadParsers(categories=None, parser_ids=None)
        subfile.main()

        # sys.stdout, sys.stderr reset
        sys.stdout = oldStdOut
        sys.stderr = oldStdErr

        # parse stdout, stderr from SearchSubfile
        return self.parse(outputStdOut.getvalue(), outputStdErr.getvalue())

コード例 #2

0

ファイルを表示

    def save_response_binaries(self, path, hash_value):
        try:
            flow = Flow.objects.get(hash_value=hash_value)
            flow_details = flow.details
            for detail in flow_details:
                # create the orig file ex: contents_192.168.1.5:42825-62.212.84.227:80_resp.dat
                source_str = ":".join([detail.src_ip, str(detail.sport)])
                destination_str = ":".join([detail.dst_ip, str(detail.dport)])
                flow_str = "-".join([source_str, destination_str])
                resp_file = "_".join(["contents", flow_str,"resp.dat"])
                file_path = "/".join([path, resp_file])
                file_path = str(file_path)

                try:
                    stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path)
                except NullStreamError:
                    continue
                subfile = SearchSubfile(stream, 0, None)
                subfile.loadParsers()
                root = "/".join([path, "html-files"])
                if not os.path.exists(root):
                    os.makedirs(root)
                output = "/".join([root, flow_str])
                output = str(output)
                if not os.path.exists(output):
                    os.mkdir(output)
                subfile.setOutput(output)
                ok = subfile.main()

                # save the files info at the db also

            return True

        except Exception, ex:
            return False

コード例 #3

0

ファイルを表示

    def convert_gzip_files(self, path, hash_value):
        try:
            flow = Flow.objects.get(hash_value=hash_value)
            flow_details = flow.details
            for detail in flow_details:
                # create the orig file ex: contents_192.168.1.5:42825-62.212.84.227:80_resp.dat
                source_str = ":".join([detail.src_ip, str(detail.sport)])
                destination_str = ":".join([detail.dst_ip, str(detail.dport)])
                flow_str = "-".join([source_str, destination_str])
                resp_file = "_".join(["contents", flow_str,"resp.dat"])
                file_path = "/".join([path, resp_file])
                # path is created as unicode, convert it a regular string for hachoir operation
                file_path = str(file_path)

                try:
                    stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path)
                except NullStreamError:
                    continue
                subfile = SearchSubfile(stream, 0, None)
                subfile.loadParsers()
                root = "/".join([path, "html-files"])
                if not os.path.exists(root):
                    os.makedirs(root)
                output = "/".join([root, flow_str])
                output = str(output)
                subfile.setOutput(output)

                http_details = filter(lambda x: x.flow_details.id == detail.id ,HTTPDetails.objects.filter(http_type="response"))
                file_ext = ".txt"
                for http in http_details:
                    if http.content_type:
                        filename = subfile.output.createFilename(file_ext)
                        if http.content_encoding == "gzip":
                            r = open("/".join([output, filename]), "r")
                            body = r.read()
                            r.close()
                            data = StringIO.StringIO(body)
                            gzipper = gzip.GzipFile(fileobj=data)
                            html = gzipper.read()
                            filename = filename.split(".")[0] + ".html"
                            w = open("/".join([output, filename]), "w")
                            w.write(html)
                            w.close()

            return True

        except Exception, ex:
            print ex
            return False

コード例 #4

0

ファイルを表示

ファイル: EXTRACT_EMBEDDED.py プロジェクト: zachsis/fsf

def EXTRACT_EMBEDDED(s, buff):

    EXTRACT_FILES = {}
    CHILD_BUFF = {}

    stream = StringInputStream(buff)
    subfile = SearchSubfile(stream)
    subfile.loadParsers(categories=None, parser_ids=None)

    subfile.stats = {}
    subfile.next_offset = None
    counter = 0

    last_start = 0
    last_end = 0

    while subfile.current_offset < subfile.size:
        subfile.datarate.update(subfile.current_offset)
        for offset, parser in subfile.findMagic(subfile.current_offset):
            # Don't care about extracting the base file, just what's within it
            # False positives often return sizes exceeding the size of the file
            # they also may not even posess a content size at all, weed em out
            if offset != 0 and parser.content_size != subfile.size \
            and parser.content_size < subfile.size and parser.content_size:
                start = offset // 8
                end = start + parser.content_size // 8
                # We want to make sure we aren't pulling sub files out of ones
                # we are already extracting, we will be doing that later anyway
                # when the module is run again on the 'buffer' returned key value
                if start >= last_end:
                    EXTRACT_FILES['Object_%s' % counter] = OrderedDict([
                        ('Start', '%s bytes' % start),
                        ('End', '%s bytes' % end),
                        ('Description', parser.description),
                        ('Buffer', buff[start:end])
                    ])
                    counter += 1
                    last_start = start
                    last_end = end

        subfile.current_offset += subfile.slice_size
        if subfile.next_offset:
            subfile.current_offset = max(subfile.current_offset,
                                         subfile.next_offset)
        subfile.current_offset = min(subfile.current_offset, subfile.size)

    return EXTRACT_FILES

コード例 #5

0

ファイルを表示

    def save_response_files(self, path, hash_value):
        try:
            flow = Flow.objects.get(hash_value=hash_value)
            flow_details = flow.details
            for detail in flow_details:
                # create the orig file ex: contents_192.168.1.5:42825-62.212.84.227:80_resp.dat
                source_str = ":".join([detail.src_ip, str(detail.sport)])
                destination_str = ":".join([detail.dst_ip, str(detail.dport)])
                flow_str = "-".join([source_str, destination_str])
                resp_file = "_".join(["contents", flow_str,"resp.dat"])
                file_path = "/".join([path, resp_file])
                # path is created as unicode, convert it a regular string for hachoir operation
                file_path = str(file_path)

                strings = ["Content-Type: text/html", "Content-Type: application/x-javascript", "Content-Type: text/css"]
                file_handler = FileHandler()
                responses = []
                search_li = file_handler.search(file_path, strings)
                if not search_li: continue
                for item in search_li:
                    responses.append(item[0])

                empty_lines = []
                strings = ["\r\n\r\n"]
                search_li = file_handler.search(file_path, strings)
                if not search_li: continue
                for item in search_li:
                    empty_lines.append(item[0])

                http_lines = []
                strings = ["HTTP/1.1"]
                search_li = file_handler.search(file_path, strings)
                if not search_li: continue
                for item in search_li:
                    http_lines.append(item[0])

                try:
                    stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path)
                except NullStreamError:
                    continue
                subfile = SearchSubfile(stream, 0, None)
                subfile.loadParsers()
                root = "/".join([path, "html-files"])
                if not os.path.exists(root):
                    os.makedirs(root)
                output = "/".join([root, flow_str])
                output = str(output)
                subfile.setOutput(output)

                for x in range(len(responses)):
                    # here i have the request header
                    data = file_handler.data
                    #f = data[empty_lines[x]:http_lines[x+1]]
                    file_ext = ".txt"
                    #if ("html" in f or "body" in f):
                    #    file_ext = ".html"
                    #elif ("script" in f):
                     #   file_ext = ".js"
                    #else:

                    # select the closest empty line
                    empty_lines.append(responses[x])
                    empty_lines.sort()
                    index = empty_lines.index(responses[x])
                    offset = empty_lines[index+1]

                    size = None
                    try:
                        size = http_lines[x+1]-2
                    except IndexError:
                        size = stream.size

                    f = data[offset+4:size]

                    filename = subfile.output.createFilename(file_ext)
                    w = open("/".join([output, filename]), "w")
                    w.write(f)
                    w.close()

                # saving the hachoir saved binaries to the db with the created txt files
                if detail.protocol == "http":
                    http_files = os.listdir(output)
                    #http_files = filter(lambda x: x.split(".")[-1] != 'txt', http_files) # no need to take the txt files
                    if len(http_files) > 0:
                        http_li = filter(lambda x: x.flow_details.id == detail.id, HTTPDetails.objects.all())
                        for http in http_li:
                            http.files = http_files
                            http.save()

            return True

        except Exception, ex:
            print ex
            return False

コード例 #6

0

ファイルを表示

def file_subfiles(filename):
    if filename and filename != "":
        offset, size, memorylimit, filemaxsize = 0, 999999, 50 * 1024 * 1024, 100 * 1024 * 1024
        stream = FileInputStream(unicodeFilename(filename),
                                 real_filename=filename)
        subfile = SearchSubfile(stream, offset, size)
        try:
            subfile.loadParsers()
            subfile.stats = {}
            subfile.verbose = False
            subfile.next_offset = None
            subfiles = []
            while subfile.current_offset < subfile.size:
                _ = subfile.datarate.update(subfile.current_offset)
                for offset, parser in subfile.findMagic(
                        subfile.current_offset):
                    try:
                        size = parser.content_size // 8 if parser.content_size else None
                    except Exception as ex:
                        size = None
                    try:
                        description = parser.description if not (
                            parser.content_size
                        ) or parser.content_size // 8 < filemaxsize else parser.__class__.__name__
                    except Exception as ex:
                        description = None
                    offset = offset // 8
                    # skip the first subfile
                    # as its the original file itself
                    if offset == 0:
                        continue
                    with open(filename, "rb") as fo:
                        filedata = fo.read()
                    mimetype = data_mimetype(
                        filedata[offset:offset + size]
                    ) if offset > 0 and size and size > 0 else None
                    md5 = data_hashes(
                        filedata[offset:offset + size],
                        "md5") if offset >= 0 and size > 0 else None
                    sha256 = data_hashes(
                        filedata[offset:offset + size],
                        "sha256") if (offset or offset == 0) and size else None
                    ssdeep = data_hashes(
                        filedata[offset:offset + size],
                        "ssdeep") if (offset or offset == 0) and size else None
                    subfiles.append({
                        "offset": offset,
                        "size": size,
                        "mimetype": mimetype,
                        "description": description,
                        "hashes": {
                            "md5": md5,
                            "sha256": sha256,
                            "ssdeep": ssdeep
                        }
                    })
                subfile.current_offset += subfile.slice_size
                if subfile.next_offset:
                    subfile.current_offset = max(subfile.current_offset,
                                                 subfile.next_offset)
                subfile.current_offset = min(subfile.current_offset,
                                             subfile.size)
        except MemoryError:
            error("[!] Memory error!")
        return subfiles