def parse(self, forward_fd, reverse_fd, scanners): while True: request = { 'url':'/unknown_request_%s' % forward_fd.inode_id, 'method': 'GET' } response = {} parse = False request_body = response_body = None ## First parse both request and response ## Get the current timestamp of the request packet = NetworkScanner.dissect_packet(forward_fd) if self.read_request(request, forward_fd): try: request['timestamp'] = packet.ts_sec except AttributeError: request['timestamp'] = 0 parse = True request_body = self.skip_body(request, forward_fd) request_body.dirty = 0 packet = NetworkScanner.dissect_packet(reverse_fd) if self.read_response(response, reverse_fd): try: response['timestamp'] = packet.ts_sec except AttributeError: response['timestamp'] = 0 parse = True response_body = self.skip_body(response, reverse_fd) ## We hang all the parameters on the response object ## (i.e. file attachment, post parameters, cookies) if response_body and request_body: self.process_cookies(request, response_body) self.process_post_body(request, request_body, response_body) if request_body.size > 0: request_body.close() if response_body and response_body.size > 0: ## Store information about the object in the http table: url = request.get('url','/') ## We try to store the url in a normalized form so we ## can find it regardless of the various permutations ## it can go though response_body.insert_to_table("http", dict(method = request.get('method'), url = url, status = response.get('HTTP_code'), content_type = response.get('content-type'), useragent = request.get('user-agent'), host = request.get('host'), tld = make_tld(request.get('host','')) ) ) response_body.close() Scanner.scan_inode_distributed(forward_fd.case, response_body.inode_id, scanners, self.cookie) if not parse: break
def parse(self, forward_fd, reverse_fd, scanners): while True: request = { 'url': '/unknown_request_%s' % forward_fd.inode_id, 'method': 'GET' } response = {} parse = False request_body = response_body = None ## First parse both request and response ## Get the current timestamp of the request packet = NetworkScanner.dissect_packet(forward_fd) if self.read_request(request, forward_fd): try: request['timestamp'] = packet.ts_sec except AttributeError: request['timestamp'] = 0 parse = True request_body = self.skip_body(request, forward_fd) request_body.dirty = 0 packet = NetworkScanner.dissect_packet(reverse_fd) if self.read_response(response, reverse_fd): try: response['timestamp'] = packet.ts_sec except AttributeError: response['timestamp'] = 0 parse = True response_body = self.skip_body(response, reverse_fd) ## We hang all the parameters on the response object ## (i.e. file attachment, post parameters, cookies) if response_body and request_body: self.process_cookies(request, response_body) self.process_post_body(request, request_body, response_body) if request_body.size > 0: request_body.close() if response_body and response_body.size > 0: ## Store information about the object in the http table: url = request.get('url', '/') ## We try to store the url in a normalized form so we ## can find it regardless of the various permutations ## it can go though response_body.insert_to_table( "http", dict(method=request.get('method'), url=url, status=response.get('HTTP_code'), content_type=response.get('content-type'), useragent=request.get('user-agent'), host=request.get('host'), tld=make_tld(request.get('host', '')))) response_body.close() Scanner.scan_inode_distributed(forward_fd.case, response_body.inode_id, scanners, self.cookie) if not parse: break
## host = m.group(2) ## dbh.insert("http", url=referer, host=host) ## parent = dbh.autoincrement() ## else: ## parent = row['inode_id'] args = dict(inode_id = inode_id, request_packet = p.request.get("packet_id",0), method = p.request.get("method","-"), url = url, response_packet= p.response.get("packet_id"), status = p.response.get("HTTP_code"), content_type = p.response.get("content-type","text/html"), referrer = referer[:500], host = host, tld = make_tld(host), useragent = p.request.get('user-agent', '-'), ) if date: args['date'] = date dbh.insert('http', **args) # parent = parent) ## Replicate the information about the subobjects in the ## connection_details table - this makes it easier to do ## some queries: dbh.insert("connection_details", ts_sec = stream.ts_sec, inode_id = inode_id,
import sys s = HTTPScanner() dbh = DB.DBO(sys.argv[1]) dbh2 = dbh.clone() while 1: count = 0 print "|", sys.stdout.flush() dbh.execute("select * from http where isnull(tld) limit 10000") for row in dbh: count += 1 if count % 1000 == 0: print ".", sys.stdout.flush() if row["host"]: tld = make_tld(row["host"]) else: url = row["url"] m = re.match("[^:]+://([^/]+)/", url) if m: tld = make_tld(m.group(1)) else: tld = "unknown" dbh2.update("http", _fast=True, where="inode_id = %s" % row["inode_id"], tld=tld) if count == 0: break
def process_stream(self, stream, factories): """ We look for HTTP requests to identify the stream. This allows us to processes HTTP connections on unusual ports. This situation might arise if HTTP proxies are used for example. """ if stream.reverse: combined_inode = "I%s|S%s/%s" % (stream.fd.name, stream.inode_id, stream.reverse) try: fd = self.fsfd.open(inode=combined_inode) ## If we cant open the combined stream, we quit (This could ## happen if we are trying to operate on a combined stream ## already except IOError: return else: fd = stream p = HTTP(fd, self.fsfd) ## Check that this is really HTTP if not p.identify(): return pyflaglog.log(pyflaglog.DEBUG, "Openning %s for HTTP" % combined_inode) ## Iterate over all the messages in this connection for f in p.parse(): if not f: continue offset, size = f ## Create the VFS node: new_inode = "%s|H%s:%s" % (combined_inode, offset, size) try: if 'chunked' in p.response['transfer-encoding']: new_inode += "|c0" except KeyError: pass try: if 'gzip' in p.response['content-encoding']: new_inode += "|G1" except KeyError: pass try: if 'deflate' in p.response['content-encoding']: new_inode += "|d1" except KeyError: pass ## stream.ts_sec is already formatted in DB format ## need to convert back to utc/gmt as paths are UTC timestamp = fd.get_packet_ts(offset) ds_timestamp = Time.convert(timestamp, case=self.case, evidence_tz="UTC") try: date_str = ds_timestamp.split(" ")[0] except: date_str = stream.ts_sec.split(" ")[0] path, inode, inode_id = self.fsfd.lookup(inode=combined_inode) ## Try to put the HTTP inodes at the mount point. FIXME: ## This should not be needed when a http stats viewer is ## written. path = posixpath.normpath(path + "/../../../../../") inode_id = self.fsfd.VFSCreate( None, new_inode, "%s/HTTP/%s/%s" % (path, date_str, escape(p.request['url'])), mtime=timestamp, size=size) ## Update the inode again: #new_inode = new_inode % inode_id ## This updates the inode table with the new inode #self.fsfd.VFSCreate(None,new_inode, # None, update_only = True, # inode_id = inode_id # ) ## Store information about this request in the ## http table: host = p.request.get("host", IP2str(stream.dest_ip)) url = HTML.url_unquote(p.request.get("url")) try: date = p.response["date"] date = Time.parse(date, case=self.case, evidence_tz=None) except (KeyError, ValueError): date = 0 ## Two forms for the referrer: referer = p.request.get('referer', p.request.get('referrer', '')) if not url.startswith("http://") and not url.startswith("ftp://"): url = "http://%s%s" % (host, url) ## Not sure if we really care about this? ## Find referred page: ## parent = 0 dbh = DB.DBO(self.case) ## if referer: ## dbh.execute("select inode_id from http where url=%r order by inode_id desc limit 1", referer) ## row = dbh.fetch() ## ## If there is no referrer we just make a psuedo entry ## if not row: ## ## Find out the host ## m=re.match("(http://|ftp://)([^/]+)([^\?\&\=]*)", ## "%s" % referer) ## if m: ## host = m.group(2) ## dbh.insert("http", url=referer, host=host) ## parent = dbh.autoincrement() ## else: ## parent = row['inode_id'] args = dict( inode_id=inode_id, request_packet=p.request.get("packet_id", 0), method=p.request.get("method", "-"), url=url, response_packet=p.response.get("packet_id"), status=p.response.get("HTTP_code"), content_type=p.response.get("content-type", "text/html"), referrer=referer[:500], host=host, tld=make_tld(host), useragent=p.request.get('user-agent', '-'), ) if date: args['date'] = date dbh.insert('http', **args) # parent = parent) ## Replicate the information about the subobjects in the ## connection_details table - this makes it easier to do ## some queries: dbh.insert( "connection_details", ts_sec=stream.ts_sec, inode_id=inode_id, src_ip=stream.src_ip, src_port=stream.src_port, dest_ip=stream.dest_ip, dest_port=stream.dest_port, ) ## handle the request's parameters: try: self.handle_parameters(p.request, inode_id) except (KeyError, TypeError): pass ## Only scan the new file using the scanner train if its ## size of bigger than 0: if size > 0: self.scan_as_file(new_inode, factories)
def process_stream(self, stream, factories): """ We look for HTTP requests to identify the stream. This allows us to processes HTTP connections on unusual ports. This situation might arise if HTTP proxies are used for example. """ if stream.reverse: combined_inode = "I%s|S%s/%s" % (stream.fd.name, stream.inode_id, stream.reverse) try: fd = self.fsfd.open(inode=combined_inode) ## If we cant open the combined stream, we quit (This could ## happen if we are trying to operate on a combined stream ## already except IOError: return else: fd = stream p=HTTP(fd,self.fsfd) ## Check that this is really HTTP if not p.identify(): return pyflaglog.log(pyflaglog.DEBUG,"Openning %s for HTTP" % combined_inode) ## Iterate over all the messages in this connection for f in p.parse(): if not f: continue offset, size = f ## Create the VFS node: new_inode="%s|H%s:%s" % (combined_inode,offset,size) try: if 'chunked' in p.response['transfer-encoding']: new_inode += "|c0" except KeyError: pass try: if 'gzip' in p.response['content-encoding']: new_inode += "|G1" except KeyError: pass try: if 'deflate' in p.response['content-encoding']: new_inode += "|d1" except KeyError: pass ## stream.ts_sec is already formatted in DB format ## need to convert back to utc/gmt as paths are UTC timestamp = fd.get_packet_ts(offset) ds_timestamp = Time.convert(timestamp, case=self.case, evidence_tz="UTC") try: date_str = ds_timestamp.split(" ")[0] except: date_str = stream.ts_sec.split(" ")[0] path,inode,inode_id=self.fsfd.lookup(inode=combined_inode) ## Try to put the HTTP inodes at the mount point. FIXME: ## This should not be needed when a http stats viewer is ## written. path=posixpath.normpath(path+"/../../../../../") inode_id = self.fsfd.VFSCreate(None,new_inode, "%s/HTTP/%s/%s" % (path,date_str, escape(p.request['url'])), mtime=timestamp, size=size ) ## Update the inode again: #new_inode = new_inode % inode_id ## This updates the inode table with the new inode #self.fsfd.VFSCreate(None,new_inode, # None, update_only = True, # inode_id = inode_id # ) ## Store information about this request in the ## http table: host = p.request.get("host",IP2str(stream.dest_ip)) url = HTML.url_unquote(p.request.get("url")) try: date = p.response["date"] date = Time.parse(date, case=self.case, evidence_tz=None) except (KeyError,ValueError): date = 0 ## Two forms for the referrer: referer = p.request.get('referer', p.request.get('referrer','')) if not url.startswith("http://") and not url.startswith("ftp://"): url = "http://%s%s" % (host, url) ## Not sure if we really care about this? ## Find referred page: ## parent = 0 dbh = DB.DBO(self.case) ## if referer: ## dbh.execute("select inode_id from http where url=%r order by inode_id desc limit 1", referer) ## row = dbh.fetch() ## ## If there is no referrer we just make a psuedo entry ## if not row: ## ## Find out the host ## m=re.match("(http://|ftp://)([^/]+)([^\?\&\=]*)", ## "%s" % referer) ## if m: ## host = m.group(2) ## dbh.insert("http", url=referer, host=host) ## parent = dbh.autoincrement() ## else: ## parent = row['inode_id'] args = dict(inode_id = inode_id, request_packet = p.request.get("packet_id",0), method = p.request.get("method","-"), url = url, response_packet= p.response.get("packet_id"), status = p.response.get("HTTP_code"), content_type = p.response.get("content-type","text/html"), referrer = referer[:500], host = host, tld = make_tld(host), useragent = p.request.get('user-agent', '-'), ) if date: args['date'] = date dbh.insert('http', **args) # parent = parent) ## Replicate the information about the subobjects in the ## connection_details table - this makes it easier to do ## some queries: dbh.insert("connection_details", ts_sec = stream.ts_sec, inode_id = inode_id, src_ip = stream.src_ip, src_port = stream.src_port, dest_ip = stream.dest_ip, dest_port = stream.dest_port, ) ## handle the request's parameters: try: self.handle_parameters(p.request, inode_id) except (KeyError, TypeError): pass ## Only scan the new file using the scanner train if its ## size of bigger than 0: if size>0: self.scan_as_file(new_inode, factories)