Example #1
0
    def parse(self, forward_fd, reverse_fd, scanners):
        while True:
            request = { 'url':'/unknown_request_%s' % forward_fd.inode_id,
                        'method': 'GET' }
            response = {}
            parse = False
            request_body = response_body = None

            ## First parse both request and response
            ## Get the current timestamp of the request
            packet = NetworkScanner.dissect_packet(forward_fd)
            if self.read_request(request, forward_fd):
                try:
                    request['timestamp'] = packet.ts_sec
                except AttributeError:
                    request['timestamp'] = 0

                parse = True
                request_body = self.skip_body(request, forward_fd)
                request_body.dirty = 0

            packet = NetworkScanner.dissect_packet(reverse_fd)
            if self.read_response(response, reverse_fd):
                try:
                    response['timestamp'] = packet.ts_sec
                except AttributeError:
                    response['timestamp'] = 0

                parse = True
                response_body = self.skip_body(response, reverse_fd)

            ## We hang all the parameters on the response object
            ## (i.e. file attachment, post parameters, cookies)
            if response_body and request_body:
                self.process_cookies(request, response_body)
                self.process_post_body(request, request_body, response_body)
                if request_body.size > 0:
                    request_body.close()

            if response_body and response_body.size > 0:
                ## Store information about the object in the http table:
                url = request.get('url','/')

                ## We try to store the url in a normalized form so we
                ## can find it regardless of the various permutations
                ## it can go though
                response_body.insert_to_table("http",
                                              dict(method = request.get('method'),
                                                   url = url,
                                                   status = response.get('HTTP_code'),
                                                   content_type = response.get('content-type'),
                                                   useragent = request.get('user-agent'),
                                                   host = request.get('host'),
                                                   tld = make_tld(request.get('host',''))
                                                   )
                                              )
                response_body.close()
                Scanner.scan_inode_distributed(forward_fd.case, response_body.inode_id,
                                               scanners, self.cookie)

            if not parse: break
Example #2
0
    def parse(self, forward_fd, reverse_fd, scanners):
        while True:
            request = {
                'url': '/unknown_request_%s' % forward_fd.inode_id,
                'method': 'GET'
            }
            response = {}
            parse = False
            request_body = response_body = None

            ## First parse both request and response
            ## Get the current timestamp of the request
            packet = NetworkScanner.dissect_packet(forward_fd)
            if self.read_request(request, forward_fd):
                try:
                    request['timestamp'] = packet.ts_sec
                except AttributeError:
                    request['timestamp'] = 0

                parse = True
                request_body = self.skip_body(request, forward_fd)
                request_body.dirty = 0

            packet = NetworkScanner.dissect_packet(reverse_fd)
            if self.read_response(response, reverse_fd):
                try:
                    response['timestamp'] = packet.ts_sec
                except AttributeError:
                    response['timestamp'] = 0

                parse = True
                response_body = self.skip_body(response, reverse_fd)

            ## We hang all the parameters on the response object
            ## (i.e. file attachment, post parameters, cookies)
            if response_body and request_body:
                self.process_cookies(request, response_body)
                self.process_post_body(request, request_body, response_body)
                if request_body.size > 0:
                    request_body.close()

            if response_body and response_body.size > 0:
                ## Store information about the object in the http table:
                url = request.get('url', '/')

                ## We try to store the url in a normalized form so we
                ## can find it regardless of the various permutations
                ## it can go though
                response_body.insert_to_table(
                    "http",
                    dict(method=request.get('method'),
                         url=url,
                         status=response.get('HTTP_code'),
                         content_type=response.get('content-type'),
                         useragent=request.get('user-agent'),
                         host=request.get('host'),
                         tld=make_tld(request.get('host', ''))))
                response_body.close()
                Scanner.scan_inode_distributed(forward_fd.case,
                                               response_body.inode_id,
                                               scanners, self.cookie)

            if not parse: break
Example #3
0
File: HTTP.py Project: ntvis/pyflag
##                        host = m.group(2)
##                        dbh.insert("http", url=referer, host=host)
##                        parent = dbh.autoincrement()
##                else:
##                    parent = row['inode_id']

            args = dict(inode_id = inode_id,
                        request_packet = p.request.get("packet_id",0),
                        method         = p.request.get("method","-"),
                        url            = url,
                        response_packet= p.response.get("packet_id"),
                        status         = p.response.get("HTTP_code"),
                        content_type   = p.response.get("content-type","text/html"),
                        referrer       = referer[:500],
                        host           = host,
                        tld            = make_tld(host),
                        useragent      = p.request.get('user-agent', '-'),
                        )

            if date:
                args['date'] = date
            
            dbh.insert('http', **args)
#                       parent         = parent)                            

            ## Replicate the information about the subobjects in the
            ## connection_details table - this makes it easier to do
            ## some queries:
            dbh.insert("connection_details",
                       ts_sec = stream.ts_sec,
                       inode_id = inode_id,
Example #4
0
    import sys

    s = HTTPScanner()

    dbh = DB.DBO(sys.argv[1])
    dbh2 = dbh.clone()
    while 1:
        count = 0
        print "|",
        sys.stdout.flush()
        dbh.execute("select * from http where isnull(tld) limit 10000")
        for row in dbh:
            count += 1
            if count % 1000 == 0:
                print ".",
                sys.stdout.flush()
            if row["host"]:
                tld = make_tld(row["host"])
            else:
                url = row["url"]
                m = re.match("[^:]+://([^/]+)/", url)
                if m:
                    tld = make_tld(m.group(1))
                else:
                    tld = "unknown"

            dbh2.update("http", _fast=True, where="inode_id = %s" % row["inode_id"], tld=tld)

        if count == 0:
            break
Example #5
0
    def process_stream(self, stream, factories):
        """ We look for HTTP requests to identify the stream. This
        allows us to processes HTTP connections on unusual ports. This
        situation might arise if HTTP proxies are used for example.
        """
        if stream.reverse:
            combined_inode = "I%s|S%s/%s" % (stream.fd.name, stream.inode_id,
                                             stream.reverse)
            try:
                fd = self.fsfd.open(inode=combined_inode)
            ## If we cant open the combined stream, we quit (This could
            ## happen if we are trying to operate on a combined stream
            ## already
            except IOError:
                return
        else:
            fd = stream

        p = HTTP(fd, self.fsfd)
        ## Check that this is really HTTP
        if not p.identify():
            return

        pyflaglog.log(pyflaglog.DEBUG, "Openning %s for HTTP" % combined_inode)
        ## Iterate over all the messages in this connection
        for f in p.parse():
            if not f: continue
            offset, size = f

            ## Create the VFS node:
            new_inode = "%s|H%s:%s" % (combined_inode, offset, size)

            try:
                if 'chunked' in p.response['transfer-encoding']:
                    new_inode += "|c0"
            except KeyError:
                pass

            try:
                if 'gzip' in p.response['content-encoding']:
                    new_inode += "|G1"

            except KeyError:
                pass

            try:
                if 'deflate' in p.response['content-encoding']:
                    new_inode += "|d1"

            except KeyError:
                pass

            ## stream.ts_sec is already formatted in DB format
            ## need to convert back to utc/gmt as paths are UTC
            timestamp = fd.get_packet_ts(offset)
            ds_timestamp = Time.convert(timestamp,
                                        case=self.case,
                                        evidence_tz="UTC")
            try:
                date_str = ds_timestamp.split(" ")[0]
            except:
                date_str = stream.ts_sec.split(" ")[0]

            path, inode, inode_id = self.fsfd.lookup(inode=combined_inode)

            ## Try to put the HTTP inodes at the mount point. FIXME:
            ## This should not be needed when a http stats viewer is
            ## written.
            path = posixpath.normpath(path + "/../../../../../")

            inode_id = self.fsfd.VFSCreate(
                None,
                new_inode,
                "%s/HTTP/%s/%s" % (path, date_str, escape(p.request['url'])),
                mtime=timestamp,
                size=size)

            ## Update the inode again:
            #new_inode = new_inode % inode_id
            ## This updates the inode table with the new inode
            #self.fsfd.VFSCreate(None,new_inode,
            #                    None, update_only = True,
            #                    inode_id = inode_id
            #                    )

            ## Store information about this request in the
            ## http table:
            host = p.request.get("host", IP2str(stream.dest_ip))
            url = HTML.url_unquote(p.request.get("url"))
            try:
                date = p.response["date"]
                date = Time.parse(date, case=self.case, evidence_tz=None)
            except (KeyError, ValueError):
                date = 0

            ## Two forms for the referrer:
            referer = p.request.get('referer', p.request.get('referrer', ''))
            if not url.startswith("http://") and not url.startswith("ftp://"):
                url = "http://%s%s" % (host, url)

            ## Not sure if we really care about this?
            ## Find referred page:


##            parent = 0
            dbh = DB.DBO(self.case)
            ##            if referer:
            ##                dbh.execute("select inode_id from http where url=%r order by inode_id desc limit 1", referer)
            ##                row = dbh.fetch()

            ##                ## If there is no referrer we just make a psuedo entry
            ##                if not row:
            ##                    ## Find out the host
            ##                    m=re.match("(http://|ftp://)([^/]+)([^\?\&\=]*)",
            ##                               "%s" % referer)
            ##                    if m:
            ##                        host = m.group(2)
            ##                        dbh.insert("http", url=referer, host=host)
            ##                        parent = dbh.autoincrement()
            ##                else:
            ##                    parent = row['inode_id']

            args = dict(
                inode_id=inode_id,
                request_packet=p.request.get("packet_id", 0),
                method=p.request.get("method", "-"),
                url=url,
                response_packet=p.response.get("packet_id"),
                status=p.response.get("HTTP_code"),
                content_type=p.response.get("content-type", "text/html"),
                referrer=referer[:500],
                host=host,
                tld=make_tld(host),
                useragent=p.request.get('user-agent', '-'),
            )

            if date:
                args['date'] = date

            dbh.insert('http', **args)
            #                       parent         = parent)

            ## Replicate the information about the subobjects in the
            ## connection_details table - this makes it easier to do
            ## some queries:
            dbh.insert(
                "connection_details",
                ts_sec=stream.ts_sec,
                inode_id=inode_id,
                src_ip=stream.src_ip,
                src_port=stream.src_port,
                dest_ip=stream.dest_ip,
                dest_port=stream.dest_port,
            )
            ## handle the request's parameters:
            try:
                self.handle_parameters(p.request, inode_id)
            except (KeyError, TypeError):
                pass

            ## Only scan the new file using the scanner train if its
            ## size of bigger than 0:
            if size > 0:
                self.scan_as_file(new_inode, factories)
Example #6
0
    def process_stream(self, stream, factories):
        """ We look for HTTP requests to identify the stream. This
        allows us to processes HTTP connections on unusual ports. This
        situation might arise if HTTP proxies are used for example.
        """
        if stream.reverse:
            combined_inode = "I%s|S%s/%s" % (stream.fd.name, stream.inode_id, stream.reverse)
            try:
                fd = self.fsfd.open(inode=combined_inode)
            ## If we cant open the combined stream, we quit (This could
            ## happen if we are trying to operate on a combined stream
            ## already
            except IOError: return
        else:
            fd = stream
            
        p=HTTP(fd,self.fsfd)
        ## Check that this is really HTTP
        if not p.identify():
            return
        
        pyflaglog.log(pyflaglog.DEBUG,"Openning %s for HTTP" % combined_inode)
        ## Iterate over all the messages in this connection
        for f in p.parse():
            if not f: continue
            offset, size = f
            
            ## Create the VFS node:
            new_inode="%s|H%s:%s" % (combined_inode,offset,size)
            
            try:
                if 'chunked' in p.response['transfer-encoding']:
                    new_inode += "|c0"
            except KeyError:
                pass

            try:
                if 'gzip' in p.response['content-encoding']:
                    new_inode += "|G1"

            except KeyError:
                pass

            try:
                if 'deflate' in p.response['content-encoding']:
                    new_inode += "|d1"

            except KeyError:
                pass

            ## stream.ts_sec is already formatted in DB format
            ## need to convert back to utc/gmt as paths are UTC
            timestamp =  fd.get_packet_ts(offset)
            ds_timestamp = Time.convert(timestamp, case=self.case, evidence_tz="UTC")
            try:
                date_str = ds_timestamp.split(" ")[0]
            except:
                date_str = stream.ts_sec.split(" ")[0]
                
            path,inode,inode_id=self.fsfd.lookup(inode=combined_inode)

            ## Try to put the HTTP inodes at the mount point. FIXME:
            ## This should not be needed when a http stats viewer is
            ## written.
            path=posixpath.normpath(path+"/../../../../../")

            inode_id = self.fsfd.VFSCreate(None,new_inode,
                                           "%s/HTTP/%s/%s" % (path,date_str,
                                                              escape(p.request['url'])),
                                           mtime=timestamp, size=size
                                           )

            ## Update the inode again:
            #new_inode = new_inode % inode_id
            ## This updates the inode table with the new inode
            #self.fsfd.VFSCreate(None,new_inode,
            #                    None, update_only = True,
            #                    inode_id = inode_id
            #                    )
            
            ## Store information about this request in the
            ## http table:
            host = p.request.get("host",IP2str(stream.dest_ip))
            url = HTML.url_unquote(p.request.get("url"))
            try:
                date = p.response["date"]
                date = Time.parse(date, case=self.case, evidence_tz=None) 
            except (KeyError,ValueError):
                date = 0

            ## Two forms for the referrer:
            referer = p.request.get('referer', p.request.get('referrer',''))
            if not url.startswith("http://") and not url.startswith("ftp://"):
                url = "http://%s%s" % (host, url)

            ## Not sure if we really care about this?
            ## Find referred page:
##            parent = 0
            dbh = DB.DBO(self.case)
##            if referer:
##                dbh.execute("select inode_id from http where url=%r order by inode_id desc limit 1", referer)
##                row = dbh.fetch()

##                ## If there is no referrer we just make a psuedo entry
##                if not row:
##                    ## Find out the host
##                    m=re.match("(http://|ftp://)([^/]+)([^\?\&\=]*)",
##                               "%s" % referer)
##                    if m:
##                        host = m.group(2)
##                        dbh.insert("http", url=referer, host=host)
##                        parent = dbh.autoincrement()
##                else:
##                    parent = row['inode_id']

            args = dict(inode_id = inode_id,
                        request_packet = p.request.get("packet_id",0),
                        method         = p.request.get("method","-"),
                        url            = url,
                        response_packet= p.response.get("packet_id"),
                        status         = p.response.get("HTTP_code"),
                        content_type   = p.response.get("content-type","text/html"),
                        referrer       = referer[:500],
                        host           = host,
                        tld            = make_tld(host),
                        useragent      = p.request.get('user-agent', '-'),
                        )

            if date:
                args['date'] = date
            
            dbh.insert('http', **args)
#                       parent         = parent)                            

            ## Replicate the information about the subobjects in the
            ## connection_details table - this makes it easier to do
            ## some queries:
            dbh.insert("connection_details",
                       ts_sec = stream.ts_sec,
                       inode_id = inode_id,
                       src_ip = stream.src_ip,
                       src_port = stream.src_port,
                       dest_ip = stream.dest_ip,
                       dest_port = stream.dest_port,
                       )
            ## handle the request's parameters:
            try:
                self.handle_parameters(p.request, inode_id)
            except (KeyError, TypeError):
                pass

            ## Only scan the new file using the scanner train if its
            ## size of bigger than 0:
            if size>0:
                self.scan_as_file(new_inode, factories)