Beispiel #1
0
def get_parser(data, streamdata, sessid):
    """Guess or retrieve the parser based on the stream.

    Streams are retrieved from the "data" persistant storage variable, from
    the "streams" key.

    The parser for the main stream ((None, None, filename) in data['streams'])
    is cached for efficiency reasons in data['parser_cache'].

    """
    # must remake parser EVERY TIME because parsers can't be pickled
    # (they contain generators which are currently not pickleable)
    # best I can do here is cache the parser, so at least we're not
    # taking time to re-guess the parser...
    if streamdata[0] is None: # original file
        stream = FileInputStream(data['filename'],
                            real_filename = unicode(tmp_dir+sessid+'.file'))
        if 'parser_cache' in data:
            parser = data['parser_cache'](stream)
        else:
            parser = guessParser(stream)
            if not parser:
                print_parse_error()
                return (None, None)
            data['parser_cache'] = parser.__class__
            save_data(data, sessid)
    elif isinstance(streamdata[0], tuple):
        prevstream, prevparser = get_parser(data, streamdata[0], sessid)
        stream = prevparser[streamdata[1]].getSubIStream()
        parser = guessParser(stream)
    else:
        stream = StringInputStream(streamdata[1])
        stream.tags = streamdata[0]
        parser = guessParser(stream)
    return stream, parser
Beispiel #2
0
    def search(self, file_path, strings=None):
        try:
            self.stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path)
        except NullStreamError:
            return False
        patterns = PatternMatching()
        for s in strings:
            patterns.addString(s)

        start = 0
        end = self.stream.size
        self.data = self.stream.readBytes(start, end//8)
        return patterns.search(self.data)
Beispiel #3
0
    def subfile(self, filePath):
        # hachoir-subfile is a tool based on hachoir-parser to find subfiles in any binary stream.
        # Website: http://bitbucket.org/haypo/hachoir/wiki/hachoir-subfile
        # bypass sys.stdout, sys.stderr
        oldStdOut = sys.stdout
        oldStdErr = sys.stderr
        outputStdErr = StringIO.StringIO()
        outputStdOut = StringIO.StringIO()
        sys.stdout = outputStdOut
        sys.stderr = outputStdErr

        stream = FileInputStream(unicodeFilename(filePath),
                                 real_filename=filePath)

        # Search for subfiles
        subfile = SearchSubfile(stream, 0, None)
        subfile.loadParsers(categories=None, parser_ids=None)
        subfile.main()

        # sys.stdout, sys.stderr reset
        sys.stdout = oldStdOut
        sys.stderr = oldStdErr

        # parse stdout, stderr from SearchSubfile
        return self.parse(outputStdOut.getvalue(), outputStdErr.getvalue())
Beispiel #4
0
    def save_response_binaries(self, path, hash_value):
        try:
            flow = Flow.objects.get(hash_value=hash_value)
            flow_details = flow.details
            for detail in flow_details:
                # create the orig file ex: contents_192.168.1.5:42825-62.212.84.227:80_resp.dat
                source_str = ":".join([detail.src_ip, str(detail.sport)])
                destination_str = ":".join([detail.dst_ip, str(detail.dport)])
                flow_str = "-".join([source_str, destination_str])
                resp_file = "_".join(["contents", flow_str,"resp.dat"])
                file_path = "/".join([path, resp_file])
                file_path = str(file_path)

                try:
                    stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path)
                except NullStreamError:
                    continue
                subfile = SearchSubfile(stream, 0, None)
                subfile.loadParsers()
                root = "/".join([path, "html-files"])
                if not os.path.exists(root):
                    os.makedirs(root)
                output = "/".join([root, flow_str])
                output = str(output)
                if not os.path.exists(output):
                    os.mkdir(output)
                subfile.setOutput(output)
                ok = subfile.main()

                # save the files info at the db also

            return True

        except Exception, ex:
            return False
Beispiel #5
0
def createParser(filename, real_filename=None):
    """
    Create a parser from a file or returns None on error.

    Options:
    - filename (unicode): Input file name ;
    - real_filename (str|unicode): Real file name.
    """
    return guessParser(FileInputStream(filename, real_filename))
Beispiel #6
0
def createParser(filename, real_filename=None, tags=None):
    """
    Create a parser from a file or returns None on error.

    Options:
    - filename (unicode): Input file name ;
    - real_filename (str|unicode): Real file name.
    """
    if not tags:
        tags = []
    stream = FileInputStream(filename, real_filename, tags=tags)
    return guessParser(stream)
Beispiel #7
0
class Handler:
    def __init__(self):
        self.file_path = None
        self.file_name = None
        self.stream = None
        self.data = None
        self.log = Logger("File Handler", "DEBUG")

    def create_dir(self):
        now = datetime.datetime.now()
        self.log.message("Now is: %s:" % now)
        directory_name = now.strftime("%d-%m-%y")
        self.log.message("Directory name: %s:" % directory_name)
        directory_path = "/".join([settings.PROJECT_ROOT, "uploads", directory_name])
        self.log.message("Directory path: %s" % directory_path)
        if not os.path.exists(directory_path):
            os.mkdir(directory_path)
            self.log.message("Directory created")
        # we need to create another directory also for each upload
        new_dir = generate_name_from_timestame()
        new_dir_path = "/".join([directory_path, new_dir])
        if not os.path.exists(new_dir_path):
            os.mkdir(new_dir_path)
            self.log.message("Directory created")
        self.upload_dir = new_dir_path

    def save_file(self, f):
        self.file_name = f.name
        self.file_path = "/".join([self.upload_dir, self.file_name])
        destination = open(self.file_path, 'wb+')
        for chunk in f.chunks():
            destination.write(chunk)
        destination.close()

    def search(self, file_path, strings=None):
        try:
            self.stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path)
        except NullStreamError:
            return False
        patterns = PatternMatching()
        for s in strings:
            patterns.addString(s)

        start = 0
        end = self.stream.size
        self.data = self.stream.readBytes(start, end//8)
        return patterns.search(self.data)

    def reset_data(self):
        self.data = None
Beispiel #8
0
def main():
    usage = "usage: %prog <file_name>"
    op = OptionParser(usage)

    (options, args) = op.parse_args()
    if len(args) != 1:
        op.print_help()
        sys.exit(1)

    inputFileName = unicode(args[0])
    try:
        stream = FileInputStream(inputFileName)
    except InputStreamError, err:
        exit("Unable to open file: %s" % err)
Beispiel #9
0
    def convert_gzip_files(self, path, hash_value):
        try:
            flow = Flow.objects.get(hash_value=hash_value)
            flow_details = flow.details
            for detail in flow_details:
                # create the orig file ex: contents_192.168.1.5:42825-62.212.84.227:80_resp.dat
                source_str = ":".join([detail.src_ip, str(detail.sport)])
                destination_str = ":".join([detail.dst_ip, str(detail.dport)])
                flow_str = "-".join([source_str, destination_str])
                resp_file = "_".join(["contents", flow_str,"resp.dat"])
                file_path = "/".join([path, resp_file])
                # path is created as unicode, convert it a regular string for hachoir operation
                file_path = str(file_path)

                try:
                    stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path)
                except NullStreamError:
                    continue
                subfile = SearchSubfile(stream, 0, None)
                subfile.loadParsers()
                root = "/".join([path, "html-files"])
                if not os.path.exists(root):
                    os.makedirs(root)
                output = "/".join([root, flow_str])
                output = str(output)
                subfile.setOutput(output)

                http_details = filter(lambda x: x.flow_details.id == detail.id ,HTTPDetails.objects.filter(http_type="response"))
                file_ext = ".txt"
                for http in http_details:
                    if http.content_type:
                        filename = subfile.output.createFilename(file_ext)
                        if http.content_encoding == "gzip":
                            r = open("/".join([output, filename]), "r")
                            body = r.read()
                            r.close()
                            data = StringIO.StringIO(body)
                            gzipper = gzip.GzipFile(fileobj=data)
                            html = gzipper.read()
                            filename = filename.split(".")[0] + ".html"
                            w = open("/".join([output, filename]), "w")
                            w.write(html)
                            w.close()

            return True

        except Exception, ex:
            print ex
            return False
Beispiel #10
0
def get_parser(data, streamdata, sessid):
    """Guess or retrieve the parser based on the stream.

    Streams are retrieved from the "data" persistant storage variable, from
    the "streams" key.

    The parser for the main stream ((None, None, filename) in data['streams'])
    is cached for efficiency reasons in data['parser_cache'].

    """
    # must remake parser EVERY TIME because parsers can't be pickled
    # (they contain generators which are currently not pickleable)
    # best I can do here is cache the parser, so at least we're not
    # taking time to re-guess the parser...
    if streamdata[0] is None:  # original file
        stream = FileInputStream(data['filename'],
                                 real_filename=unicode(tmp_dir + sessid +
                                                       '.file'))
        if 'parser_cache' in data:
            parser = data['parser_cache'](stream)
        else:
            parser = guessParser(stream)
            if not parser:
                print_parse_error()
                return (None, None)
            data['parser_cache'] = parser.__class__
            save_data(data, sessid)
    elif isinstance(streamdata[0], tuple):
        prevstream, prevparser = get_parser(data, streamdata[0], sessid)
        stream = prevparser[streamdata[1]].getSubIStream()
        parser = guessParser(stream)
    else:
        stream = StringInputStream(streamdata[1])
        stream.tags = streamdata[0]
        parser = guessParser(stream)
    return stream, parser
Beispiel #11
0
    def _verify_download(self, file_name=None):
        """
        Checks the saved file to see if it was actually valid, if not then consider the download a failure.
        """
        result = True
        # primitive verification of torrents, just make sure we didn't get a text file or something
        if GenericProvider.TORRENT == self.providerType:
            parser = stream = None
            try:
                stream = FileInputStream(file_name)
                parser = guessParser(stream)
            except:
                pass
            result = parser and 'application/x-bittorrent' == parser.mime_type

            try:
                stream._input.close()
            except:
                pass

        return result
Beispiel #12
0
    def save_response_files(self, path, hash_value):
        try:
            flow = Flow.objects.get(hash_value=hash_value)
            flow_details = flow.details
            for detail in flow_details:
                # create the orig file ex: contents_192.168.1.5:42825-62.212.84.227:80_resp.dat
                source_str = ":".join([detail.src_ip, str(detail.sport)])
                destination_str = ":".join([detail.dst_ip, str(detail.dport)])
                flow_str = "-".join([source_str, destination_str])
                resp_file = "_".join(["contents", flow_str,"resp.dat"])
                file_path = "/".join([path, resp_file])
                # path is created as unicode, convert it a regular string for hachoir operation
                file_path = str(file_path)

                strings = ["Content-Type: text/html", "Content-Type: application/x-javascript", "Content-Type: text/css"]
                file_handler = FileHandler()
                responses = []
                search_li = file_handler.search(file_path, strings)
                if not search_li: continue
                for item in search_li:
                    responses.append(item[0])

                empty_lines = []
                strings = ["\r\n\r\n"]
                search_li = file_handler.search(file_path, strings)
                if not search_li: continue
                for item in search_li:
                    empty_lines.append(item[0])

                http_lines = []
                strings = ["HTTP/1.1"]
                search_li = file_handler.search(file_path, strings)
                if not search_li: continue
                for item in search_li:
                    http_lines.append(item[0])

                try:
                    stream = FileInputStream(unicodeFilename(file_path), real_filename=file_path)
                except NullStreamError:
                    continue
                subfile = SearchSubfile(stream, 0, None)
                subfile.loadParsers()
                root = "/".join([path, "html-files"])
                if not os.path.exists(root):
                    os.makedirs(root)
                output = "/".join([root, flow_str])
                output = str(output)
                subfile.setOutput(output)

                for x in range(len(responses)):
                    # here i have the request header
                    data = file_handler.data
                    #f = data[empty_lines[x]:http_lines[x+1]]
                    file_ext = ".txt"
                    #if ("html" in f or "body" in f):
                    #    file_ext = ".html"
                    #elif ("script" in f):
                     #   file_ext = ".js"
                    #else:

                    # select the closest empty line
                    empty_lines.append(responses[x])
                    empty_lines.sort()
                    index = empty_lines.index(responses[x])
                    offset = empty_lines[index+1]

                    size = None
                    try:
                        size = http_lines[x+1]-2
                    except IndexError:
                        size = stream.size

                    f = data[offset+4:size]

                    filename = subfile.output.createFilename(file_ext)
                    w = open("/".join([output, filename]), "w")
                    w.write(f)
                    w.close()

                # saving the hachoir saved binaries to the db with the created txt files
                if detail.protocol == "http":
                    http_files = os.listdir(output)
                    #http_files = filter(lambda x: x.split(".")[-1] != 'txt', http_files) # no need to take the txt files
                    if len(http_files) > 0:
                        http_li = filter(lambda x: x.flow_details.id == detail.id, HTTPDetails.objects.all())
                        for http in http_li:
                            http.files = http_files
                            http.save()

            return True

        except Exception, ex:
            print ex
            return False
Beispiel #13
0
def file_subfiles(filename):
    if filename and filename != "":
        offset, size, memorylimit, filemaxsize = 0, 999999, 50 * 1024 * 1024, 100 * 1024 * 1024
        stream = FileInputStream(unicodeFilename(filename),
                                 real_filename=filename)
        subfile = SearchSubfile(stream, offset, size)
        try:
            subfile.loadParsers()
            subfile.stats = {}
            subfile.verbose = False
            subfile.next_offset = None
            subfiles = []
            while subfile.current_offset < subfile.size:
                _ = subfile.datarate.update(subfile.current_offset)
                for offset, parser in subfile.findMagic(
                        subfile.current_offset):
                    try:
                        size = parser.content_size // 8 if parser.content_size else None
                    except Exception as ex:
                        size = None
                    try:
                        description = parser.description if not (
                            parser.content_size
                        ) or parser.content_size // 8 < filemaxsize else parser.__class__.__name__
                    except Exception as ex:
                        description = None
                    offset = offset // 8
                    # skip the first subfile
                    # as its the original file itself
                    if offset == 0:
                        continue
                    with open(filename, "rb") as fo:
                        filedata = fo.read()
                    mimetype = data_mimetype(
                        filedata[offset:offset + size]
                    ) if offset > 0 and size and size > 0 else None
                    md5 = data_hashes(
                        filedata[offset:offset + size],
                        "md5") if offset >= 0 and size > 0 else None
                    sha256 = data_hashes(
                        filedata[offset:offset + size],
                        "sha256") if (offset or offset == 0) and size else None
                    ssdeep = data_hashes(
                        filedata[offset:offset + size],
                        "ssdeep") if (offset or offset == 0) and size else None
                    subfiles.append({
                        "offset": offset,
                        "size": size,
                        "mimetype": mimetype,
                        "description": description,
                        "hashes": {
                            "md5": md5,
                            "sha256": sha256,
                            "ssdeep": ssdeep
                        }
                    })
                subfile.current_offset += subfile.slice_size
                if subfile.next_offset:
                    subfile.current_offset = max(subfile.current_offset,
                                                 subfile.next_offset)
                subfile.current_offset = min(subfile.current_offset,
                                             subfile.size)
        except MemoryError:
            error("[!] Memory error!")
        return subfiles