def external_process(self,fd): """ This is run on the extracted file """ #Get a TarFile object - We must access a complete file #here fd.cache() tar=tarfile.TarFile(fileobj=fd) ## List all the files in the tar file: inodes = [] dircount = 0 namelist = tar.getnames() for i in range(len(namelist)): ## If the entry corresponds to just a directory we ignore it. if not os.path.basename(namelist[i]): continue ## Add the file into the VFS self.ddfs.VFSCreate( self.inode,"T%s" % i,namelist[i], size=tar.getmember(namelist[i]).size, _mtime=tar.getmember(namelist[i]).mtime, uid=tar.getmember(namelist[i]).uid, gid=tar.getmember(namelist[i]).gid, mode=oct(tar.getmember(namelist[i]).mode), ) new_inode="%s|T%s" % (self.inode,i) inodes.append(new_inode) for inode in inodes: ## Scan the new file using the scanner train: fd=self.ddfs.open(inode=inode) Scanner.scanfile(self.ddfs,fd,self.factories)
def scanfs(self, scanners, action=None): ## Prepare the scanner factory for scanning: for s in scanners: s.prepare() dbh2 = DB.DBO(self.case) dbh3 = DB.DBO(self.case) dbh3.execute( 'select inode, concat(path,name) as filename from file where mode="r/r" and status="alloc"' ) count = 0 for row in dbh3: # open file count += 1 if not count % 100: pyflaglog.log( pyflaglog.INFO, "File (%s) is inode %s (%s)" % (count, row['inode'], row['filename'])) try: fd = self.open(inode=row['inode']) Scanner.scanfile(self, fd, scanners) fd.close() except Exception, e: pyflaglog.log(pyflaglog.ERRORS, "%r: %s" % (e, e)) continue
def external_process(self,fd): """ This is run on the extracted file """ filename = CacheManager.MANAGER.provide_cache_filename(self.case, self.fd.inode) dirname = filename+"_" try: os.mkdir(dirname) except OSError: pass ## We spawn libpst rather than incorporate in process to ## protect ourselves against memory leaks and seg faults ## in libpst. readpst_binary = os.path.join(config.FLAG_BIN, "readpst") subprocess.call([readpst_binary, '-o', dirname, "-w", filename]) ## Create the required VFS Inodes for t in ['Inbox','Sent Items']: new_inode_id = self.ddfs.VFSCreate(self.fd.inode, "x"+t, t ) CacheManager.MANAGER.create_cache_from_file(self.case, self.fd.inode + "|x" + t, os.path.join(dirname,t), inode_id = new_inode_id) ## Scan the inbox: fd = self.ddfs.open(inode_id = new_inode_id) Scanner.scanfile(self.ddfs, fd, self.factories)
def external_process(self, fd): """ This is run on the extracted file """ filename = CacheManager.MANAGER.provide_cache_filename( self.case, self.fd.inode) dirname = filename + "_" try: os.mkdir(dirname) except OSError: pass ## We spawn libpst rather than incorporate in process to ## protect ourselves against memory leaks and seg faults ## in libpst. readpst_binary = os.path.join(config.FLAG_BIN, "readpst") subprocess.call([readpst_binary, '-o', dirname, "-w", filename]) ## Create the required VFS Inodes for t in ['Inbox', 'Sent Items']: new_inode_id = self.ddfs.VFSCreate(self.fd.inode, "x" + t, t) CacheManager.MANAGER.create_cache_from_file( self.case, self.fd.inode + "|x" + t, os.path.join(dirname, t), inode_id=new_inode_id) ## Scan the inbox: fd = self.ddfs.open(inode_id=new_inode_id) Scanner.scanfile(self.ddfs, fd, self.factories)
def run(self,case, inode, scanners, *args): factories = Scanner.get_factories(case, scanners.split(",")) if factories: ddfs = DBFS(case) fd = ddfs.open(inode = inode) Scanner.scanfile(ddfs, fd, factories) fd.close()
def finish(self): if self.filename: self.ddfs.VFSCreate(self.inode,"G0",self.filename) new_inode="%s|G0" % (self.inode) ## Scan the new file using the scanner train: fd=self.ddfs.open(inode=new_inode) Scanner.scanfile(self.ddfs,fd,self.factories)
def add_inodes(path, root_item): for item in pst_file.listitems(root_item): properties = item.properties() item_inode = "%s|P%s" % (self.fd.inode, item.get_id()) new_path = FlagFramework.normpath( "%s/%s" % (path, item.__str__().replace('/', '_'))) ## This is a little optimization - we save the ## cache copy of the property list so the File ## driver does not need to do anything: property_data = format_properties(properties) ## These are the inode properties: args = dict(size=len(property_data)) try: args['_ctime'] = properties.get( 'create_date', properties['arrival_date']) except: pass try: args['_mtime'] = properties.get( 'modify_date', properties['sent_date']) except: pass self.ddfs.VFSCreate(None, item_inode, new_path, **args) ## Make sure we can scan it: fd = self.ddfs.open(inode=item_inode) Scanner.scanfile(self.ddfs, fd, self.factories) ## If its an email we create VFS nodes for its ## attachments: try: for i in range(len(properties['_attachments'])): att = properties['_attachments'][i] attachment_path = FlagFramework.normpath( "%s/%s" % (new_path, att['filename1'].replace('/', '_'))) args['size'] = len(att['body']) attach_inode = "%s:%s" % (item_inode, i) self.ddfs.VFSCreate(None, attach_inode, attachment_path, **args) ## Make sure we scan it: fd = self.ddfs.open(inode=attach_inode) Scanner.scanfile(self.ddfs, fd, self.factories) except KeyError: pass ## Recursively add the next inode: add_inodes(new_path, item)
def scan_as_file(self, inode, factories): """ Scans inode as a file (i.e. without any Stream scanners). """ fd = self.fsfd.open(inode=inode) ## If does not matter if we use stream scanners on files ## because they would ignore it anyway. #factories = [ x for x in factories if not isinstance(x, StreamScannerFactory) ] Scanner.scanfile(self.fsfd, fd, factories) fd.close()
def scan_as_file(self, inode, factories): """ Scans inode as a file (i.e. without any Stream scanners). """ fd = self.fsfd.open(inode=inode) ## If does not matter if we use stream scanners on files ## because they would ignore it anyway. #factories = [ x for x in factories if not isinstance(x, StreamScannerFactory) ] Scanner.scanfile(self.fsfd,fd,factories) fd.close()
def add_inodes(path, root_item): for item in pst_file.listitems(root_item): properties = item.properties() item_inode = "%s|P%s" % (self.fd.inode, item.get_id()) new_path = FlagFramework.normpath( "%s/%s" % (path, item.__str__().replace('/','_')) ) ## This is a little optimization - we save the ## cache copy of the property list so the File ## driver does not need to do anything: property_data = format_properties(properties) ## These are the inode properties: args = dict(size = len(property_data)) try: args['_ctime'] = properties.get('create_date', properties['arrival_date']) except: pass try: args['_mtime'] = properties.get('modify_date', properties['sent_date']) except: pass self.ddfs.VFSCreate(None, item_inode, new_path, **args) ## Make sure we can scan it: fd = self.ddfs.open(inode = item_inode) Scanner.scanfile(self.ddfs, fd, self.factories) ## If its an email we create VFS nodes for its ## attachments: try: for i in range(len(properties['_attachments'])): att = properties['_attachments'][i] attachment_path = FlagFramework.normpath( "%s/%s" % (new_path, att['filename1'].replace('/','_'))) args['size'] = len(att['body']) attach_inode = "%s:%s" % (item_inode,i) self.ddfs.VFSCreate(None, attach_inode, attachment_path, **args) ## Make sure we scan it: fd = self.ddfs.open(inode = attach_inode) Scanner.scanfile(self.ddfs, fd, self.factories) except KeyError: pass ## Recursively add the next inode: add_inodes(new_path, item)
def process_send_message(self, fd): ## Check to see if this is a POST request (i.e. mail is ## sent to the server): dbh = DB.DBO(fd.case) dbh.execute( "select `inode_id`,`key`,`value` from http_parameters where inode_id=%r", fd.inode_id) query = {} key_map = {} for row in dbh: query[row['key'].lower()] = row['value'] key_map[row['key'].lower()] = row['inode_id'] result = {'type': 'Edit Sent'} for field, pattern in [('To', 'to'), ('From', 'from'), ('CC', 'cc'), ('Bcc', 'bcc'), ('Subject', 'subject'), ('Message', 'body')]: if query.has_key(pattern): result[field] = query[pattern] if len(result.keys()) < 3: return False ## Fixme: Create VFS node for attachments message_id = self.insert_message(result, "webmail") ## Are there any attachments? for k in query.keys(): if k.startswith("f_"): ## Create an Inode for it: dbh.execute("select mtime from inode where inode_id = %r", self.fd.inode_id) row = dbh.fetch() new_inode = "thttp_parameters:inode_id:%s:value" % key_map[ k] inode_id = self.ddfs.VFSCreate(self.fd.inode, new_inode, k, mtime=row['mtime'], _fast=True) dbh.insert("webmail_attachments", inode_id=message_id, attachment=inode_id) fd = self.ddfs.open(inode="%s|%s" % (self.fd.inode, new_inode)) Scanner.scanfile(self.ddfs, fd, self.factories) return message_id
def external_process(self, fd): """ This is run on the extracted file """ pyflaglog.log(pyflaglog.VERBOSE_DEBUG, "Decompressing Zip File %s" % fd.inode) cache_key = "%s:%s" % (self.case, self.fd.inode) ## Try to read the fd as a zip file z = zipfile.ZipFile(fd) pathname, inode, inode_id = self.ddfs.lookup(inode=self.inode) ## retrieve evidence timezone, this is necessary because zip files ## store time in localtime evidence_tz = Time.get_evidence_tz_name(self.case, self.fd) ## List all the files in the zip file: dircount = 0 inodes = [] namelist = z.namelist() for i in range(len(namelist)): ## Add the file into the VFS try: ## Convert the time to case timezone t = Time.convert(z.infolist()[i].date_time, case=self.case, evidence_tz=evidence_tz) except: t = 0 ## If the entry corresponds to just a directory we ignore it. if not posixpath.basename(namelist[i]): continue info = z.infolist()[i] inode = "%s|Z%s:%s" % (self.inode, info.header_offset, info.compress_size) inodes.append(inode) inode_id = self.ddfs.VFSCreate(None, inode, DB.expand( "%s/%s", (pathname, namelist[i])), size=info.file_size, mtime=t, _fast=True) for inode in inodes: ## Now call the scanners on this new file (FIXME limit ## the recursion level here) fd = self.ddfs.open(inode_id=inode_id) Scanner.scanfile(self.ddfs, fd, self.factories)
def process_send_message(self,fd): ## Check to see if this is a POST request (i.e. mail is ## sent to the server): dbh = DB.DBO(self.case) dbh.execute("select `inode_id`,`key`,`value` from http_parameters where inode_id=%r", self.fd.inode_id) query = {} key_map = {} for row in dbh: query[row['key'].lower()] = row['value'] key_map[row['key'].lower()] = row['inode_id'] result = {'type':'Edit Sent'} for field, pattern in [('To','to'), ('From','from'), ('CC','cc'), ('Bcc', 'bcc'), ('Subject', 'subject'), ('Message', 'body')]: if query.has_key(pattern): result[field] = query[pattern] if len(result.keys())<3: return False ## Fixme: Create VFS node for attachments message_id = self.insert_message(result, "webmail") ## Are there any attachments? for k in query.keys(): if k.startswith("f_"): ## Create an Inode for it: dbh.execute("select mtime from inode where inode_id = %r" , self.fd.inode_id) row = dbh.fetch() new_inode = "thttp_parameters:inode_id:%s:value" % key_map[k] inode_id = self.ddfs.VFSCreate(self.fd.inode, new_inode, k, mtime = row['mtime'], _fast = True) dbh.insert("webmail_attachments", inode_id = message_id, attachment = inode_id) fd = self.ddfs.open(inode = "%s|%s" % (self.fd.inode, new_inode)) Scanner.scanfile(self.ddfs, fd, self.factories) return message_id
class Scan(Farm.Task): """ A task to distribute scanning among all workers """ def run(self,case, inode, scanners, *args): factories = Scanner.get_factories(case, scanners.split(",")) if factories: ddfs = DBFS(case) fd = ddfs.open(inode = inode) Scanner.scanfile(ddfs, fd, factories) fd.close()
def external_process(self,fd): """ This is run on the extracted file """ pyflaglog.log(pyflaglog.VERBOSE_DEBUG, "Decompressing Zip File %s" % fd.inode) cache_key = "%s:%s" % (self.case , self.fd.inode) ## Try to read the fd as a zip file z = zipfile.ZipFile(fd) pathname, inode, inode_id = self.ddfs.lookup(inode = self.inode) ## retrieve evidence timezone, this is necessary because zip files ## store time in localtime evidence_tz = Time.get_evidence_tz_name(self.case, self.fd) ## List all the files in the zip file: dircount = 0 inodes = [] namelist = z.namelist() for i in range(len(namelist)): ## Add the file into the VFS try: ## Convert the time to case timezone t = Time.convert(z.infolist()[i].date_time, case=self.case, evidence_tz=evidence_tz) except: t=0 ## If the entry corresponds to just a directory we ignore it. if not posixpath.basename(namelist[i]): continue info = z.infolist()[i] inode = "%s|Z%s:%s" % (self.inode,info.header_offset, info.compress_size) inodes.append(inode) inode_id = self.ddfs.VFSCreate(None, inode,DB.expand("%s/%s",(pathname,namelist[i])), size=info.file_size, mtime=t, _fast=True) for inode in inodes: ## Now call the scanners on this new file (FIXME limit ## the recursion level here) fd = self.ddfs.open(inode_id = inode_id) Scanner.scanfile(self.ddfs,fd,self.factories)
def store_file(self, metadata): """ Creates the VFS node and scans it """ try: data = metadata['Attachment data'] except KeyError: return path, inode, inode_id = self.ddfs.lookup(inode = self.fd.inode) new_inode = "%s|O%s" % (self.fd.inode, self.count) self.count+=1 filename = metadata.get('Attach filename', metadata.get('Attach long filenm','Attachment')) CacheManager.MANAGER.create_cache_from_data(self.case, new_inode, data) self.ddfs.VFSCreate(None, new_inode, "%s/%s" % (path, filename), size = len(data)) new_fd = self.ddfs.open(inode = new_inode) Scanner.scanfile(self.ddfs, new_fd, self.factories)
def scanfs(self, scanners, action=None): ## Prepare the scanner factory for scanning: for s in scanners: s.prepare() dbh2 = DB.DBO(self.case) dbh3=DB.DBO(self.case) dbh3.execute('select inode, concat(path,name) as filename from file where mode="r/r" and status="alloc"') count=0 for row in dbh3: # open file count+=1 if not count % 100: pyflaglog.log(pyflaglog.INFO,"File (%s) is inode %s (%s)" % (count,row['inode'],row['filename'])) try: fd = self.open(inode=row['inode']) Scanner.scanfile(self,fd,scanners) fd.close() except Exception,e: pyflaglog.log(pyflaglog.ERRORS,"%r: %s" % (e,e)) continue
def process_message(self, fd): count = 0 try: new_path, new_inode, new_inode_id = self.ddfs.lookup(inode = fd.inode) a = email.message_from_file(fd) try: subject = a['subject'] if len(subject)>50: subject = subject[:50] + " ..." new_name = "%s: %s" % (new_path, subject) self.ddfs.VFSRename(new_inode_id, new_name) except KeyError: pass pyflaglog.log(pyflaglog.DEBUG,"Found an email message in %s: %s" % ( new_inode, a['subject'])) #Mysql is really picky about the date formatting date = email.Utils.parsedate(a.get('Date')) if not date: raise Exception("No Date field in message - this is probably not an RFC2822 message at all.") dbh=DB.DBO(self.case) dbh.insert('email', inode = self.inode, _date = "from_unixtime(%r)" % int(time.mktime(date)), to = a.get('To'), _from = "%r" % a.get('From'), subject = a.get('Subject')) for part in a.walk(): if part.get_content_maintype() == 'multipart': continue filename = part.get_filename() data = part.get_payload(decode=1) ## Sometimes the filename is specified in the ## content-type header: try: for x,y in part.get_params(): if x =="name": filename=y break except: pass if not filename: filename="Attachment %s" % count ## Create the VFSs node: new_inode_id = self.ddfs.VFSCreate( new_inode,"m%s" % count, filename, _mtime = time.mktime(date), size=len(data) ) ## Now call the scanners on new file: new_fd = self.ddfs.open(inode_id=new_inode_id) Scanner.scanfile(self.ddfs,new_fd,self.factories) new_fd.close() count+=1 except Exception,e: pyflaglog.log(pyflaglog.DEBUG,"RFC2822 Scan: Unable to parse inode %s as an RFC2822 message (%s)" % (self.inode,e))
def external_process(self,fd): dbh=DB.DBO(self.case) dbh._warnings = False dbh.mass_insert_start('ie_history') inode_id = self.fd.lookup_id() ## Find our path dbh.execute("select path from file where inode_id = %r", inode_id) row = dbh.fetch() path = row['path'] history = IECache.IEHistoryFile(fd) for event in history: if event: url = event['url'].get_value() url.inclusive = False url = url.get_value() ## How big is the entry size = event['size'].get_value() * IECache.blocksize args = dict(inode_id = inode_id, type = event['type'], offset = event['offset'], length = size, url = url, filename = event['filename'], headers = event['data'].get_value(),) modified = event['modified_time'].get_value() if modified>1000: args['_modified'] = 'from_unixtime(%d)' % modified else: modified = None accessed = event['accessed_time'].get_value() if accessed>1000: args['_accessed'] = 'from_unixtime(%d)' % accessed else: accessed = None dbh.mass_insert(**args) ## Try to locate the actual inode try: index = event['directory_index'].get_value() tmp_path = FlagFramework.normpath((FlagFramework.joinpath([ path, history.directories[index]]))) except: continue dbh.execute("select inode, inode_id from file where path='%s/' and name=%r", tmp_path, args['filename']) row = dbh.fetch() if row: inode_id = row['inode_id'] headers = args['headers'] ## We always create a new inode for cache ## entries to guarantee they get scanned by ## other scanners _after_ http info is ## populated. This essentially means we get ## duplicated inodes for the same actual files ## which is a bit of extra overhead (cache ## files are processed twice). encoding_driver = "|o0" m = content_encoding_re.search(headers) if m: ## Is it gzip encoding? if m.group(1) == 'gzip': encoding_driver = "|G1" elif m.group(1) == 'deflate': encoding_driver = '|d1' else: print "I have no idea what %s encoding is" % m.group(1) inode_id = self.ddfs.VFSCreate(None, "%s%s" % (row['inode'], encoding_driver), "%s/%s" % (tmp_path, args['filename']), size = size, _mtime = modified, _atime = accessed ) http_args = dict( inode_id = inode_id, url = url_unquote(url), ) ## Put in a dodgy pcap entry for the timestamp: if '_accessed' in args: dbh.insert('pcap', _fast=True, _ts_sec = args['_accessed'], ts_usec = 0, offset=0, length=0) packet_id = dbh.autoincrement() http_args['response_packet'] = packet_id http_args['request_packet'] = packet_id ## Populate http table if possible m = content_type_re.search(headers) if m: http_args['content_type'] = m.group(1) host = FlagFramework.find_hostname(url) if host: http_args['host'] = host http_args['tld'] = FlagFramework.make_tld(host) dbh.insert('http', _fast=True, **http_args ) ## Now populate the http parameters from the ## URL GET parameters: try: base, query = url.split("?",1) qs = cgi.parse_qs(query) for k,values in qs.items(): for v in values: dbh.insert('http_parameters', _fast=True, inode_id = inode_id, key = k, value = v) except ValueError: pass ## Scan new files using the scanner train: fd=self.ddfs.open(inode_id=inode_id) Scanner.scanfile(self.ddfs,fd,self.factories)
metamtime=None # add a VFS entry using the stream VFS ('S') self.fsfd.VFSCreate(None, new_inode, new_path, size=out_fd_len, mtime=metamtime, inode_id=new_inode_id) # record the new_id new_ids.append(new_inode_id) # now that we know both new_ids, add to the connection table dbh.execute("insert into connection_details (inode_id, reverse, src_ip, src_port, dest_ip, dest_port, isn, ts_sec, type) (select %r, %r, src_ip, src_port, dest_ip, dest_port, isn, ts_sec, type from connection_details where inode_id=%r)", (new_ids[0], new_ids[1], forward_id)) dbh.execute("insert into connection_details (inode_id, reverse, src_ip, src_port, dest_ip, dest_port, isn, ts_sec, type) (select %r, %r, src_ip, src_port, dest_ip, dest_port, isn, ts_sec, type from connection_details where inode_id=%r)", (new_ids[1], new_ids[0], reverse_id)) # scan both new inodes for inode in new_ids: fd = self.fsfd.open(inode_id = inode) Scanner.scanfile(self.fsfd, fd, factories) def process_stream(self, stream, factories): if stream.dest_port in(31337, 23456, 5350): for (packet_id, cache_offset, data) in stream.packet_data(): dbh = DB.DBO(self.case) try: # decrypt the key: dec = RC4.new(ssl_packet_psk) # see if there is already an entry for this stream dbh.execute("select inode_id from sslkeys where crypt_text=%r", data[:8]) row = dbh.fetch() if row: inode_id = row['inode_id'] dbh.execute("update sslkeys set packet_id=%r, key_data=%r where inode_id=%r", (packet_id, dec.decrypt(data[10:]), inode_id))
def process_message(self, fd): count = 0 try: new_path, new_inode, new_inode_id = self.ddfs.lookup( inode=fd.inode) a = email.message_from_file(fd) try: subject = a['subject'] if len(subject) > 50: subject = subject[:50] + " ..." new_name = "%s: %s" % (new_path, subject) self.ddfs.VFSRename(new_inode_id, new_name) except KeyError: pass pyflaglog.log( pyflaglog.DEBUG, "Found an email message in %s: %s" % (new_inode, a['subject'])) #Mysql is really picky about the date formatting date = email.Utils.parsedate(a.get('Date')) if not date: raise Exception( "No Date field in message - this is probably not an RFC2822 message at all." ) dbh = DB.DBO(self.case) dbh.insert('email', inode=self.inode, _date="from_unixtime(%r)" % int(time.mktime(date)), to=a.get('To'), _from="%r" % a.get('From'), subject=a.get('Subject')) for part in a.walk(): if part.get_content_maintype() == 'multipart': continue filename = part.get_filename() data = part.get_payload(decode=1) ## Sometimes the filename is specified in the ## content-type header: try: for x, y in part.get_params(): if x == "name": filename = y break except: pass if not filename: filename = "Attachment %s" % count ## Create the VFSs node: new_inode_id = self.ddfs.VFSCreate( new_inode, "m%s" % count, filename, _mtime=time.mktime(date), size=len(data)) ## Now call the scanners on new file: new_fd = self.ddfs.open(inode_id=new_inode_id) Scanner.scanfile(self.ddfs, new_fd, self.factories) new_fd.close() count += 1 except Exception, e: pyflaglog.log( pyflaglog.DEBUG, "RFC2822 Scan: Unable to parse inode %s as an RFC2822 message (%s)" % (self.inode, e))
# record the new_id new_ids.append(new_inode_id) # now that we know both new_ids, add to the connection table dbh.execute( "insert into connection_details (inode_id, reverse, src_ip, src_port, dest_ip, dest_port, isn, ts_sec, type) (select %r, %r, src_ip, src_port, dest_ip, dest_port, isn, ts_sec, type from connection_details where inode_id=%r)", (new_ids[0], new_ids[1], forward_id)) dbh.execute( "insert into connection_details (inode_id, reverse, src_ip, src_port, dest_ip, dest_port, isn, ts_sec, type) (select %r, %r, src_ip, src_port, dest_ip, dest_port, isn, ts_sec, type from connection_details where inode_id=%r)", (new_ids[1], new_ids[0], reverse_id)) # scan both new inodes for inode in new_ids: fd = self.fsfd.open(inode_id=inode) Scanner.scanfile(self.fsfd, fd, factories) def process_stream(self, stream, factories): if stream.dest_port in (31337, 23456, 5350): for (packet_id, cache_offset, data) in stream.packet_data(): dbh = DB.DBO(self.case) try: # decrypt the key: dec = RC4.new(ssl_packet_psk) # see if there is already an entry for this stream dbh.execute( "select inode_id from sslkeys where crypt_text=%r", data[:8]) row = dbh.fetch() if row: inode_id = row['inode_id']
def external_process(self, fd): #find the other files we need in order to process cache s = self.fd.stat() filename = "%s%s" % (s['path'], s['name']) data_fds = [ self.ddfs.open("%s_CACHE_001_" % s['path']), self.ddfs.open("%s_CACHE_002_" % s['path']), self.ddfs.open("%s_CACHE_003_" % s['path']) ] mozcache = MozCache.MozCache(fd, data_fds) #print mozcache dbh = DB.DBO(self.case) # process each cache record for record in mozcache.records(): meta = record.get_entry() (method, status, header) = parse_response(meta['MetaData']) # deal with content-encoding (gzip/deflate) encoding_driver = "" encoding = header.getheader("content-encoding") if encoding: if "gzip" in encoding.lower(): encoding_driver = "|G1" elif "deflate" in encoding.lower(): encoding_driver = "|d1" # locate embedded entries length = 0 if record.record['DataLocation']['DataFile'] != 0: fileidx, offset, length = record.get_data_location() inode = '%s|o%s:%s' % (data_fds[fileidx].inode, offset, length) else: inode = self.ddfs.lookup(path="%s%08Xd01" % (s['path'], record.record['HashNumber'].get_value()))[1] inode += "|o0" # differentiate the inode from the existing one # add new entry to the VFS if encoding: length=0 inode_id = self.ddfs.VFSCreate(None, "%s%s" % (inode, encoding_driver), "%s/%08Xd01" % (filename, record.record['HashNumber'].get_value()), _mtime=meta['LastModified'], _atime=meta['LastFetched'], size=length) ## Insert a dodgy pcap entry to represent the ## timestamp of this request dbh.insert('pcap', _fast=True, _ts_sec = 'from_unixtime(%d)' % meta['LastModified'], ts_usec = 0, offset=0, length=0) packet_id = dbh.autoincrement() # add to http table # we parse the date, it is automatically returned in case # timezone. We do not need to supply an evidence timezone as # http date strings contain a timezone specification. try: date = Time.parse(header.getheader("date"), case=self.case, evidence_tz=None) except TypeError: date = 0 # chomp NULL from end url = str(meta['KeyData'])[:-1] if url.startswith("HTTP:"): url = url[len("HTTP:"):] args = dict(inode_id=inode_id, ## urls are always stored normalised in the db url=url_unquote(url), request_packet = packet_id, response_packet = packet_id, method=method, status=status, content_type=header.getheader("content-type"), date=date) host = FlagFramework.find_hostname(url) if host: args['host'] = host args['tld'] = FlagFramework.make_tld(host) dbh.insert("http", _fast=True, **args) ## Now populate the http parameters from the ## URL GET parameters: try: base, query = url.split("?",1) qs = cgi.parse_qs(query) for k,values in qs.items(): for v in values: dbh.insert('http_parameters', _fast=True, inode_id = inode_id, key = k, value = v) except ValueError: pass ## Scan the new file using the scanner train: fd=self.ddfs.open(inode_id=inode_id) Scanner.scanfile(self.ddfs,fd,self.factories)