def scanfs(self, scanners, action=None): ## Prepare the scanner factory for scanning: for s in scanners: s.prepare() dbh2 = DB.DBO(self.case) dbh3 = DB.DBO(self.case) dbh3.execute( 'select inode, concat(path,name) as filename from file where mode="r/r" and status="alloc"' ) count = 0 for row in dbh3: # open file count += 1 if not count % 100: pyflaglog.log( pyflaglog.INFO, "File (%s) is inode %s (%s)" % (count, row['inode'], row['filename'])) try: fd = self.open(inode=row['inode']) Scanner.scanfile(self, fd, scanners) fd.close() except Exception, e: pyflaglog.log(pyflaglog.ERRORS, "%r: %s" % (e, e)) continue
def external_process(self, fd): """ This is run on the extracted file """ filename = CacheManager.MANAGER.provide_cache_filename( self.case, self.fd.inode) dirname = filename + "_" try: os.mkdir(dirname) except OSError: pass ## We spawn libpst rather than incorporate in process to ## protect ourselves against memory leaks and seg faults ## in libpst. readpst_binary = os.path.join(config.FLAG_BIN, "readpst") subprocess.call([readpst_binary, '-o', dirname, "-w", filename]) ## Create the required VFS Inodes for t in ['Inbox', 'Sent Items']: new_inode_id = self.ddfs.VFSCreate(self.fd.inode, "x" + t, t) CacheManager.MANAGER.create_cache_from_file( self.case, self.fd.inode + "|x" + t, os.path.join(dirname, t), inode_id=new_inode_id) ## Scan the inbox: fd = self.ddfs.open(inode_id=new_inode_id) Scanner.scanfile(self.ddfs, fd, self.factories)
def external_process(self,fd): """ This is run on the extracted file """ #Get a TarFile object - We must access a complete file #here fd.cache() tar=tarfile.TarFile(fileobj=fd) ## List all the files in the tar file: inodes = [] dircount = 0 namelist = tar.getnames() for i in range(len(namelist)): ## If the entry corresponds to just a directory we ignore it. if not os.path.basename(namelist[i]): continue ## Add the file into the VFS self.ddfs.VFSCreate( self.inode,"T%s" % i,namelist[i], size=tar.getmember(namelist[i]).size, _mtime=tar.getmember(namelist[i]).mtime, uid=tar.getmember(namelist[i]).uid, gid=tar.getmember(namelist[i]).gid, mode=oct(tar.getmember(namelist[i]).mode), ) new_inode="%s|T%s" % (self.inode,i) inodes.append(new_inode) for inode in inodes: ## Scan the new file using the scanner train: fd=self.ddfs.open(inode=inode) Scanner.scanfile(self.ddfs,fd,self.factories)
def execute(self): if len(self.args)<2: yield self.help() return ## Try to glob the inode list: dbh=DB.DBO(self.environment._CASE) dbh.execute("select inode_id from vfs where !isnull(inode_id) and path rlike %r", (fnmatch.translate(self.args[0]))) pdbh = DB.DBO() pdbh.mass_insert_start('jobs') ## This is a cookie used to identify our requests so that we ## can check they have been done later. cookie = time.time() scanners = [] for i in range(1,len(self.args)): scanners.extend(fnmatch.filter(Registry.SCANNERS.scanners, self.args[i])) scanners = ScannerUtils.fill_in_dependancies(scanners) for row in dbh: Scanner.scan_inode_distributed(dbh.case, row['inode_id'], scanners, cookie=cookie) self.wait_for_scan(cookie) yield "Scanning complete"
def external_process(self,fd): """ This is run on the extracted file """ filename = CacheManager.MANAGER.provide_cache_filename(self.case, self.fd.inode) dirname = filename+"_" try: os.mkdir(dirname) except OSError: pass ## We spawn libpst rather than incorporate in process to ## protect ourselves against memory leaks and seg faults ## in libpst. readpst_binary = os.path.join(config.FLAG_BIN, "readpst") subprocess.call([readpst_binary, '-o', dirname, "-w", filename]) ## Create the required VFS Inodes for t in ['Inbox','Sent Items']: new_inode_id = self.ddfs.VFSCreate(self.fd.inode, "x"+t, t ) CacheManager.MANAGER.create_cache_from_file(self.case, self.fd.inode + "|x" + t, os.path.join(dirname,t), inode_id = new_inode_id) ## Scan the inbox: fd = self.ddfs.open(inode_id = new_inode_id) Scanner.scanfile(self.ddfs, fd, self.factories)
def run(self,case, inode, scanners, *args): factories = Scanner.get_factories(case, scanners.split(",")) if factories: ddfs = DBFS(case) fd = ddfs.open(inode = inode) Scanner.scanfile(ddfs, fd, factories) fd.close()
def finish(self): if self.filename: self.ddfs.VFSCreate(self.inode,"G0",self.filename) new_inode="%s|G0" % (self.inode) ## Scan the new file using the scanner train: fd=self.ddfs.open(inode=new_inode) Scanner.scanfile(self.ddfs,fd,self.factories)
def add_inodes(path, root_item): for item in pst_file.listitems(root_item): properties = item.properties() item_inode = "%s|P%s" % (self.fd.inode, item.get_id()) new_path = FlagFramework.normpath( "%s/%s" % (path, item.__str__().replace('/', '_'))) ## This is a little optimization - we save the ## cache copy of the property list so the File ## driver does not need to do anything: property_data = format_properties(properties) ## These are the inode properties: args = dict(size=len(property_data)) try: args['_ctime'] = properties.get( 'create_date', properties['arrival_date']) except: pass try: args['_mtime'] = properties.get( 'modify_date', properties['sent_date']) except: pass self.ddfs.VFSCreate(None, item_inode, new_path, **args) ## Make sure we can scan it: fd = self.ddfs.open(inode=item_inode) Scanner.scanfile(self.ddfs, fd, self.factories) ## If its an email we create VFS nodes for its ## attachments: try: for i in range(len(properties['_attachments'])): att = properties['_attachments'][i] attachment_path = FlagFramework.normpath( "%s/%s" % (new_path, att['filename1'].replace('/', '_'))) args['size'] = len(att['body']) attach_inode = "%s:%s" % (item_inode, i) self.ddfs.VFSCreate(None, attach_inode, attachment_path, **args) ## Make sure we scan it: fd = self.ddfs.open(inode=attach_inode) Scanner.scanfile(self.ddfs, fd, self.factories) except KeyError: pass ## Recursively add the next inode: add_inodes(new_path, item)
def scan_as_file(self, inode, factories): """ Scans inode as a file (i.e. without any Stream scanners). """ fd = self.fsfd.open(inode=inode) ## If does not matter if we use stream scanners on files ## because they would ignore it anyway. #factories = [ x for x in factories if not isinstance(x, StreamScannerFactory) ] Scanner.scanfile(self.fsfd,fd,factories) fd.close()
def scan_as_file(self, inode, factories): """ Scans inode as a file (i.e. without any Stream scanners). """ fd = self.fsfd.open(inode=inode) ## If does not matter if we use stream scanners on files ## because they would ignore it anyway. #factories = [ x for x in factories if not isinstance(x, StreamScannerFactory) ] Scanner.scanfile(self.fsfd, fd, factories) fd.close()
def add_inodes(path, root_item): for item in pst_file.listitems(root_item): properties = item.properties() item_inode = "%s|P%s" % (self.fd.inode, item.get_id()) new_path = FlagFramework.normpath( "%s/%s" % (path, item.__str__().replace('/','_')) ) ## This is a little optimization - we save the ## cache copy of the property list so the File ## driver does not need to do anything: property_data = format_properties(properties) ## These are the inode properties: args = dict(size = len(property_data)) try: args['_ctime'] = properties.get('create_date', properties['arrival_date']) except: pass try: args['_mtime'] = properties.get('modify_date', properties['sent_date']) except: pass self.ddfs.VFSCreate(None, item_inode, new_path, **args) ## Make sure we can scan it: fd = self.ddfs.open(inode = item_inode) Scanner.scanfile(self.ddfs, fd, self.factories) ## If its an email we create VFS nodes for its ## attachments: try: for i in range(len(properties['_attachments'])): att = properties['_attachments'][i] attachment_path = FlagFramework.normpath( "%s/%s" % (new_path, att['filename1'].replace('/','_'))) args['size'] = len(att['body']) attach_inode = "%s:%s" % (item_inode,i) self.ddfs.VFSCreate(None, attach_inode, attachment_path, **args) ## Make sure we scan it: fd = self.ddfs.open(inode = attach_inode) Scanner.scanfile(self.ddfs, fd, self.factories) except KeyError: pass ## Recursively add the next inode: add_inodes(new_path, item)
def process_send_message(self, fd): ## Check to see if this is a POST request (i.e. mail is ## sent to the server): dbh = DB.DBO(fd.case) dbh.execute( "select `inode_id`,`key`,`value` from http_parameters where inode_id=%r", fd.inode_id) query = {} key_map = {} for row in dbh: query[row['key'].lower()] = row['value'] key_map[row['key'].lower()] = row['inode_id'] result = {'type': 'Edit Sent'} for field, pattern in [('To', 'to'), ('From', 'from'), ('CC', 'cc'), ('Bcc', 'bcc'), ('Subject', 'subject'), ('Message', 'body')]: if query.has_key(pattern): result[field] = query[pattern] if len(result.keys()) < 3: return False ## Fixme: Create VFS node for attachments message_id = self.insert_message(result, "webmail") ## Are there any attachments? for k in query.keys(): if k.startswith("f_"): ## Create an Inode for it: dbh.execute("select mtime from inode where inode_id = %r", self.fd.inode_id) row = dbh.fetch() new_inode = "thttp_parameters:inode_id:%s:value" % key_map[ k] inode_id = self.ddfs.VFSCreate(self.fd.inode, new_inode, k, mtime=row['mtime'], _fast=True) dbh.insert("webmail_attachments", inode_id=message_id, attachment=inode_id) fd = self.ddfs.open(inode="%s|%s" % (self.fd.inode, new_inode)) Scanner.scanfile(self.ddfs, fd, self.factories) return message_id
def execute(self): if len(self.args)<2: yield self.help() return case = self.environment._CASE scanners = [] for i in range(1,len(self.args)): scanners.extend(fnmatch.filter(Registry.SCANNERS.scanners, self.args[i])) scanners = ScannerUtils.fill_in_dependancies(scanners) Scanner.scan_inode(case, self.args[0], scanners, force = True, cookie=time.time())
def external_process(self, fd): """ This is run on the extracted file """ pyflaglog.log(pyflaglog.VERBOSE_DEBUG, "Decompressing Zip File %s" % fd.inode) cache_key = "%s:%s" % (self.case, self.fd.inode) ## Try to read the fd as a zip file z = zipfile.ZipFile(fd) pathname, inode, inode_id = self.ddfs.lookup(inode=self.inode) ## retrieve evidence timezone, this is necessary because zip files ## store time in localtime evidence_tz = Time.get_evidence_tz_name(self.case, self.fd) ## List all the files in the zip file: dircount = 0 inodes = [] namelist = z.namelist() for i in range(len(namelist)): ## Add the file into the VFS try: ## Convert the time to case timezone t = Time.convert(z.infolist()[i].date_time, case=self.case, evidence_tz=evidence_tz) except: t = 0 ## If the entry corresponds to just a directory we ignore it. if not posixpath.basename(namelist[i]): continue info = z.infolist()[i] inode = "%s|Z%s:%s" % (self.inode, info.header_offset, info.compress_size) inodes.append(inode) inode_id = self.ddfs.VFSCreate(None, inode, DB.expand( "%s/%s", (pathname, namelist[i])), size=info.file_size, mtime=t, _fast=True) for inode in inodes: ## Now call the scanners on this new file (FIXME limit ## the recursion level here) fd = self.ddfs.open(inode_id=inode_id) Scanner.scanfile(self.ddfs, fd, self.factories)
def scan(self, fd, scanners, type, mime, cookie, **args): if "gzip" in type: new_path = "%s/%s" % (fd.urn, self.find_gzipped_filename(fd, type)) new_fd = CacheManager.AFF4_MANAGER.create_cache_fd(self.case, new_path) gz = gzip.GzipFile(fileobj=fd, mode='r') while 1: data = gz.read(1024*1024) if not data: break new_fd.write(data) new_fd.close() ## Now scan the new fd Scanner.scan_inode(self.case, new_fd.inode_id, scanners)
def process_send_message(self,fd): ## Check to see if this is a POST request (i.e. mail is ## sent to the server): dbh = DB.DBO(self.case) dbh.execute("select `inode_id`,`key`,`value` from http_parameters where inode_id=%r", self.fd.inode_id) query = {} key_map = {} for row in dbh: query[row['key'].lower()] = row['value'] key_map[row['key'].lower()] = row['inode_id'] result = {'type':'Edit Sent'} for field, pattern in [('To','to'), ('From','from'), ('CC','cc'), ('Bcc', 'bcc'), ('Subject', 'subject'), ('Message', 'body')]: if query.has_key(pattern): result[field] = query[pattern] if len(result.keys())<3: return False ## Fixme: Create VFS node for attachments message_id = self.insert_message(result, "webmail") ## Are there any attachments? for k in query.keys(): if k.startswith("f_"): ## Create an Inode for it: dbh.execute("select mtime from inode where inode_id = %r" , self.fd.inode_id) row = dbh.fetch() new_inode = "thttp_parameters:inode_id:%s:value" % key_map[k] inode_id = self.ddfs.VFSCreate(self.fd.inode, new_inode, k, mtime = row['mtime'], _fast = True) dbh.insert("webmail_attachments", inode_id = message_id, attachment = inode_id) fd = self.ddfs.open(inode = "%s|%s" % (self.fd.inode, new_inode)) Scanner.scanfile(self.ddfs, fd, self.factories) return message_id
class Scan(Farm.Task): """ A task to distribute scanning among all workers """ def run(self,case, inode, scanners, *args): factories = Scanner.get_factories(case, scanners.split(",")) if factories: ddfs = DBFS(case) fd = ddfs.open(inode = inode) Scanner.scanfile(ddfs, fd, factories) fd.close()
def store_file(self, metadata): """ Creates the VFS node and scans it """ try: data = metadata['Attachment data'] except KeyError: return path, inode, inode_id = self.ddfs.lookup(inode = self.fd.inode) new_inode = "%s|O%s" % (self.fd.inode, self.count) self.count+=1 filename = metadata.get('Attach filename', metadata.get('Attach long filenm','Attachment')) CacheManager.MANAGER.create_cache_from_data(self.case, new_inode, data) self.ddfs.VFSCreate(None, new_inode, "%s/%s" % (path, filename), size = len(data)) new_fd = self.ddfs.open(inode = new_inode) Scanner.scanfile(self.ddfs, new_fd, self.factories)
def external_process(self,fd): """ This is run on the extracted file """ pyflaglog.log(pyflaglog.VERBOSE_DEBUG, "Decompressing Zip File %s" % fd.inode) cache_key = "%s:%s" % (self.case , self.fd.inode) ## Try to read the fd as a zip file z = zipfile.ZipFile(fd) pathname, inode, inode_id = self.ddfs.lookup(inode = self.inode) ## retrieve evidence timezone, this is necessary because zip files ## store time in localtime evidence_tz = Time.get_evidence_tz_name(self.case, self.fd) ## List all the files in the zip file: dircount = 0 inodes = [] namelist = z.namelist() for i in range(len(namelist)): ## Add the file into the VFS try: ## Convert the time to case timezone t = Time.convert(z.infolist()[i].date_time, case=self.case, evidence_tz=evidence_tz) except: t=0 ## If the entry corresponds to just a directory we ignore it. if not posixpath.basename(namelist[i]): continue info = z.infolist()[i] inode = "%s|Z%s:%s" % (self.inode,info.header_offset, info.compress_size) inodes.append(inode) inode_id = self.ddfs.VFSCreate(None, inode,DB.expand("%s/%s",(pathname,namelist[i])), size=info.file_size, mtime=t, _fast=True) for inode in inodes: ## Now call the scanners on this new file (FIXME limit ## the recursion level here) fd = self.ddfs.open(inode_id = inode_id) Scanner.scanfile(self.ddfs,fd,self.factories)
def scan(self, fd, scanners, type, mime, cookie, scores=None, **args): if 'x86 boot sector' in type: try: parts = sk.mmls(fd) except IOError,e: print e return for part in parts: ## Make a unique and sensible name for this partition name = "%s @ 0x%X" % (part[2], part[0]) ## Add new maps for each partition map = CacheManager.AFF4_MANAGER.create_cache_map( fd.case, "%s/%s" % (fd.urn.parser.query, name)) map.write_from(fd.urn, SECTOR_SIZE * part[0], SECTOR_SIZE * part[1]) map.close() ## Now we recursively scan each object fsfd = FileSystem.DBFS(fd.case) new_fd = fsfd.open(inode_id = map.inode_id) try: fs = sk.skfs(new_fd) fs.close() ## Lets add a hint Magic.set_magic(fd.case, inode_id = map.inode_id, mime = "application/filesystem", magic = "Filesystem") except: pass Scanner.scan_inode_distributed(fd.case, map.inode_id, scanners, cookie)
def scanfs(self, scanners, action=None): ## Prepare the scanner factory for scanning: for s in scanners: s.prepare() dbh2 = DB.DBO(self.case) dbh3=DB.DBO(self.case) dbh3.execute('select inode, concat(path,name) as filename from file where mode="r/r" and status="alloc"') count=0 for row in dbh3: # open file count+=1 if not count % 100: pyflaglog.log(pyflaglog.INFO,"File (%s) is inode %s (%s)" % (count,row['inode'],row['filename'])) try: fd = self.open(inode=row['inode']) Scanner.scanfile(self,fd,scanners) fd.close() except Exception,e: pyflaglog.log(pyflaglog.ERRORS,"%r: %s" % (e,e)) continue
def execute(self): if len(self.args) < 2: yield self.help() return scanners = [] for i in range(1, len(self.args)): scanners.extend(fnmatch.filter(Registry.SCANNERS.scanners, self.args[i])) factories = Scanner.get_factories(self.environment._CASE, scanners) for f in factories: f.multiple_inode_reset(self.args[0]) yield "Resetting complete"
def execute(self): if len(self.args) < 2: yield self.help() return scanners = [] for i in range(1, len(self.args)): scanners.extend( fnmatch.filter(Registry.SCANNERS.scanners, self.args[i])) factories = Scanner.get_factories(self.environment._CASE, scanners) for f in factories: f.multiple_inode_reset(self.args[0]) yield "Resetting complete"
def execute(self): if len(self.args) < 2: yield self.help() return scanners = [] if type(self.args[1]) == types.ListType: scanners = self.args[1] else: for i in range(1, len(self.args)): scanners.extend(fnmatch.filter(Registry.SCANNERS.scanners, self.args[i])) print "GETTING FACTORIES" factories = Scanner.get_factories(self.environment._CASE, scanners) print "OK NOW RESETING EM" for f in factories: f.reset_entire_path(self.args[0]) print "HOKAY" yield "Reset Complete"
def execute(self): if len(self.args) < 2: yield self.help() return scanners = [] if type(self.args[1]) == types.ListType: scanners = self.args[1] else: for i in range(1, len(self.args)): scanners.extend( fnmatch.filter(Registry.SCANNERS.scanners, self.args[i])) print "GETTING FACTORIES" factories = Scanner.get_factories(self.environment._CASE, scanners) print "OK NOW RESETING EM" for f in factories: f.reset_entire_path(self.args[0]) print "HOKAY" yield "Reset Complete"
def process_message(self, fd): count = 0 try: new_path, new_inode, new_inode_id = self.ddfs.lookup(inode = fd.inode) a = email.message_from_file(fd) try: subject = a['subject'] if len(subject)>50: subject = subject[:50] + " ..." new_name = "%s: %s" % (new_path, subject) self.ddfs.VFSRename(new_inode_id, new_name) except KeyError: pass pyflaglog.log(pyflaglog.DEBUG,"Found an email message in %s: %s" % ( new_inode, a['subject'])) #Mysql is really picky about the date formatting date = email.Utils.parsedate(a.get('Date')) if not date: raise Exception("No Date field in message - this is probably not an RFC2822 message at all.") dbh=DB.DBO(self.case) dbh.insert('email', inode = self.inode, _date = "from_unixtime(%r)" % int(time.mktime(date)), to = a.get('To'), _from = "%r" % a.get('From'), subject = a.get('Subject')) for part in a.walk(): if part.get_content_maintype() == 'multipart': continue filename = part.get_filename() data = part.get_payload(decode=1) ## Sometimes the filename is specified in the ## content-type header: try: for x,y in part.get_params(): if x =="name": filename=y break except: pass if not filename: filename="Attachment %s" % count ## Create the VFSs node: new_inode_id = self.ddfs.VFSCreate( new_inode,"m%s" % count, filename, _mtime = time.mktime(date), size=len(data) ) ## Now call the scanners on new file: new_fd = self.ddfs.open(inode_id=new_inode_id) Scanner.scanfile(self.ddfs,new_fd,self.factories) new_fd.close() count+=1 except Exception,e: pyflaglog.log(pyflaglog.DEBUG,"RFC2822 Scan: Unable to parse inode %s as an RFC2822 message (%s)" % (self.inode,e))
def external_process(self,fd): dbh=DB.DBO(self.case) dbh._warnings = False dbh.mass_insert_start('ie_history') inode_id = self.fd.lookup_id() ## Find our path dbh.execute("select path from file where inode_id = %r", inode_id) row = dbh.fetch() path = row['path'] history = IECache.IEHistoryFile(fd) for event in history: if event: url = event['url'].get_value() url.inclusive = False url = url.get_value() ## How big is the entry size = event['size'].get_value() * IECache.blocksize args = dict(inode_id = inode_id, type = event['type'], offset = event['offset'], length = size, url = url, filename = event['filename'], headers = event['data'].get_value(),) modified = event['modified_time'].get_value() if modified>1000: args['_modified'] = 'from_unixtime(%d)' % modified else: modified = None accessed = event['accessed_time'].get_value() if accessed>1000: args['_accessed'] = 'from_unixtime(%d)' % accessed else: accessed = None dbh.mass_insert(**args) ## Try to locate the actual inode try: index = event['directory_index'].get_value() tmp_path = FlagFramework.normpath((FlagFramework.joinpath([ path, history.directories[index]]))) except: continue dbh.execute("select inode, inode_id from file where path='%s/' and name=%r", tmp_path, args['filename']) row = dbh.fetch() if row: inode_id = row['inode_id'] headers = args['headers'] ## We always create a new inode for cache ## entries to guarantee they get scanned by ## other scanners _after_ http info is ## populated. This essentially means we get ## duplicated inodes for the same actual files ## which is a bit of extra overhead (cache ## files are processed twice). encoding_driver = "|o0" m = content_encoding_re.search(headers) if m: ## Is it gzip encoding? if m.group(1) == 'gzip': encoding_driver = "|G1" elif m.group(1) == 'deflate': encoding_driver = '|d1' else: print "I have no idea what %s encoding is" % m.group(1) inode_id = self.ddfs.VFSCreate(None, "%s%s" % (row['inode'], encoding_driver), "%s/%s" % (tmp_path, args['filename']), size = size, _mtime = modified, _atime = accessed ) http_args = dict( inode_id = inode_id, url = url_unquote(url), ) ## Put in a dodgy pcap entry for the timestamp: if '_accessed' in args: dbh.insert('pcap', _fast=True, _ts_sec = args['_accessed'], ts_usec = 0, offset=0, length=0) packet_id = dbh.autoincrement() http_args['response_packet'] = packet_id http_args['request_packet'] = packet_id ## Populate http table if possible m = content_type_re.search(headers) if m: http_args['content_type'] = m.group(1) host = FlagFramework.find_hostname(url) if host: http_args['host'] = host http_args['tld'] = FlagFramework.make_tld(host) dbh.insert('http', _fast=True, **http_args ) ## Now populate the http parameters from the ## URL GET parameters: try: base, query = url.split("?",1) qs = cgi.parse_qs(query) for k,values in qs.items(): for v in values: dbh.insert('http_parameters', _fast=True, inode_id = inode_id, key = k, value = v) except ValueError: pass ## Scan new files using the scanner train: fd=self.ddfs.open(inode_id=inode_id) Scanner.scanfile(self.ddfs,fd,self.factories)
metamtime=None # add a VFS entry using the stream VFS ('S') self.fsfd.VFSCreate(None, new_inode, new_path, size=out_fd_len, mtime=metamtime, inode_id=new_inode_id) # record the new_id new_ids.append(new_inode_id) # now that we know both new_ids, add to the connection table dbh.execute("insert into connection_details (inode_id, reverse, src_ip, src_port, dest_ip, dest_port, isn, ts_sec, type) (select %r, %r, src_ip, src_port, dest_ip, dest_port, isn, ts_sec, type from connection_details where inode_id=%r)", (new_ids[0], new_ids[1], forward_id)) dbh.execute("insert into connection_details (inode_id, reverse, src_ip, src_port, dest_ip, dest_port, isn, ts_sec, type) (select %r, %r, src_ip, src_port, dest_ip, dest_port, isn, ts_sec, type from connection_details where inode_id=%r)", (new_ids[1], new_ids[0], reverse_id)) # scan both new inodes for inode in new_ids: fd = self.fsfd.open(inode_id = inode) Scanner.scanfile(self.fsfd, fd, factories) def process_stream(self, stream, factories): if stream.dest_port in(31337, 23456, 5350): for (packet_id, cache_offset, data) in stream.packet_data(): dbh = DB.DBO(self.case) try: # decrypt the key: dec = RC4.new(ssl_packet_psk) # see if there is already an entry for this stream dbh.execute("select inode_id from sslkeys where crypt_text=%r", data[:8]) row = dbh.fetch() if row: inode_id = row['inode_id'] dbh.execute("update sslkeys set packet_id=%r, key_data=%r where inode_id=%r", (packet_id, dec.decrypt(data[10:]), inode_id))
def Callback(mode, packet, connection): if mode == 'est': if 'map' not in connection: ## Lookup the urn this packet came from urn = urn_dispatcher[packet.pcap_file_id] ip = packet.find_type("IP") ## We can only get tcp or udp packets here try: tcp = packet.find_type("TCP") except AttributeError: tcp = packet.find_type("UDP") base_urn = "/%s-%s/%s-%s/" % ( ip.source_addr, ip.dest_addr, tcp.source, tcp.dest) timestamp = pyaff4.XSDDatetime() timestamp.set(packet.ts_sec) map_stream = CacheManager.AFF4_MANAGER.create_cache_map( case, base_urn + "forward", timestamp = timestamp, target = urn) connection['map'] = map_stream ## These streams are used to point at the start of ## each packet header - this helps us get back to ## the packet information for each bit of data map_stream_pkt = CacheManager.AFF4_MANAGER.create_cache_map( case, base_urn + "forward.pkt", timestamp = timestamp, target = urn, inherited = map_stream.urn) connection['map.pkt'] = map_stream_pkt r_map_stream = CacheManager.AFF4_MANAGER.create_cache_map( case, base_urn + "reverse", timestamp = timestamp, target = urn, inherited = map_stream.urn) connection['reverse']['map'] = r_map_stream ## These streams are used to point at the start of ## each packet header - this helps us get back to ## the packet information for each bit of data r_map_stream_pkt = CacheManager.AFF4_MANAGER.create_cache_map( case, base_urn + "reverse.pkt", timestamp = timestamp, target = urn, inherited = r_map_stream.urn) connection['reverse']['map.pkt'] = r_map_stream_pkt ## Add to connection table map_stream.insert_to_table("connection_details", dict(reverse = r_map_stream.inode_id, src_ip = ip.src, src_port = tcp.source, dest_ip = ip.dest, dest_port = tcp.dest, _ts_sec = "from_unixtime(%s)" % packet.ts_sec, ) ) elif mode == 'data': try: tcp = packet.find_type("TCP") except AttributeError: tcp = packet.find_type("UDP") try: length = len(tcp.data) except: return urn = urn_dispatcher[packet.pcap_file_id] if packet.offset==0: pdb.set_trace() connection['map'].write_from(urn, packet.offset + tcp.data_offset, length) connection['map.pkt'].write_from(urn, packet.offset, length) elif mode == 'destroy': if connection['map'].size > 0 or connection['reverse']['map'].size > 0: map_stream = connection['map'] r_map_stream = connection['reverse']['map'] map_stream_pkt = connection['map.pkt'] Magic.set_magic(case, map_stream_pkt.inode_id, "Packet Map") r_map_stream_pkt = connection['reverse']['map.pkt'] Magic.set_magic(case, r_map_stream_pkt.inode_id, "Packet Map") r_map_stream.set_attribute(PYFLAG_REVERSE_STREAM, map_stream.urn) map_stream.set_attribute(PYFLAG_REVERSE_STREAM, r_map_stream.urn) ## Close all the streams r_map_stream_pkt.close() map_stream_pkt.close() r_map_stream.close() map_stream.close() ## FIXME - this needs to be done out of process using ## the distributed architecture!!! ## Open read only versions of these streams for ## scanning dbfs = FileSystem.DBFS(case) map_stream = dbfs.open(inode_id = map_stream.inode_id) r_map_stream = dbfs.open(inode_id = r_map_stream.inode_id) Scanner.scan_inode_distributed(case, map_stream.inode_id, scanners, cookie) Scanner.scan_inode_distributed(case, r_map_stream.inode_id, scanners, cookie)
def external_process(self, fd): #find the other files we need in order to process cache s = self.fd.stat() filename = "%s%s" % (s['path'], s['name']) data_fds = [ self.ddfs.open("%s_CACHE_001_" % s['path']), self.ddfs.open("%s_CACHE_002_" % s['path']), self.ddfs.open("%s_CACHE_003_" % s['path']) ] mozcache = MozCache.MozCache(fd, data_fds) #print mozcache dbh = DB.DBO(self.case) # process each cache record for record in mozcache.records(): meta = record.get_entry() (method, status, header) = parse_response(meta['MetaData']) # deal with content-encoding (gzip/deflate) encoding_driver = "" encoding = header.getheader("content-encoding") if encoding: if "gzip" in encoding.lower(): encoding_driver = "|G1" elif "deflate" in encoding.lower(): encoding_driver = "|d1" # locate embedded entries length = 0 if record.record['DataLocation']['DataFile'] != 0: fileidx, offset, length = record.get_data_location() inode = '%s|o%s:%s' % (data_fds[fileidx].inode, offset, length) else: inode = self.ddfs.lookup(path="%s%08Xd01" % (s['path'], record.record['HashNumber'].get_value()))[1] inode += "|o0" # differentiate the inode from the existing one # add new entry to the VFS if encoding: length=0 inode_id = self.ddfs.VFSCreate(None, "%s%s" % (inode, encoding_driver), "%s/%08Xd01" % (filename, record.record['HashNumber'].get_value()), _mtime=meta['LastModified'], _atime=meta['LastFetched'], size=length) ## Insert a dodgy pcap entry to represent the ## timestamp of this request dbh.insert('pcap', _fast=True, _ts_sec = 'from_unixtime(%d)' % meta['LastModified'], ts_usec = 0, offset=0, length=0) packet_id = dbh.autoincrement() # add to http table # we parse the date, it is automatically returned in case # timezone. We do not need to supply an evidence timezone as # http date strings contain a timezone specification. try: date = Time.parse(header.getheader("date"), case=self.case, evidence_tz=None) except TypeError: date = 0 # chomp NULL from end url = str(meta['KeyData'])[:-1] if url.startswith("HTTP:"): url = url[len("HTTP:"):] args = dict(inode_id=inode_id, ## urls are always stored normalised in the db url=url_unquote(url), request_packet = packet_id, response_packet = packet_id, method=method, status=status, content_type=header.getheader("content-type"), date=date) host = FlagFramework.find_hostname(url) if host: args['host'] = host args['tld'] = FlagFramework.make_tld(host) dbh.insert("http", _fast=True, **args) ## Now populate the http parameters from the ## URL GET parameters: try: base, query = url.split("?",1) qs = cgi.parse_qs(query) for k,values in qs.items(): for v in values: dbh.insert('http_parameters', _fast=True, inode_id = inode_id, key = k, value = v) except ValueError: pass ## Scan the new file using the scanner train: fd=self.ddfs.open(inode_id=inode_id) Scanner.scanfile(self.ddfs,fd,self.factories)
def run(self,case=None, inode_id=0, scanners=None, cookie=1, *args, **kwargs): if scanners: Scanner.scan_inode(case, inode_id, scanners, cookie)
def process_message(self, fd): count = 0 try: new_path, new_inode, new_inode_id = self.ddfs.lookup( inode=fd.inode) a = email.message_from_file(fd) try: subject = a['subject'] if len(subject) > 50: subject = subject[:50] + " ..." new_name = "%s: %s" % (new_path, subject) self.ddfs.VFSRename(new_inode_id, new_name) except KeyError: pass pyflaglog.log( pyflaglog.DEBUG, "Found an email message in %s: %s" % (new_inode, a['subject'])) #Mysql is really picky about the date formatting date = email.Utils.parsedate(a.get('Date')) if not date: raise Exception( "No Date field in message - this is probably not an RFC2822 message at all." ) dbh = DB.DBO(self.case) dbh.insert('email', inode=self.inode, _date="from_unixtime(%r)" % int(time.mktime(date)), to=a.get('To'), _from="%r" % a.get('From'), subject=a.get('Subject')) for part in a.walk(): if part.get_content_maintype() == 'multipart': continue filename = part.get_filename() data = part.get_payload(decode=1) ## Sometimes the filename is specified in the ## content-type header: try: for x, y in part.get_params(): if x == "name": filename = y break except: pass if not filename: filename = "Attachment %s" % count ## Create the VFSs node: new_inode_id = self.ddfs.VFSCreate( new_inode, "m%s" % count, filename, _mtime=time.mktime(date), size=len(data)) ## Now call the scanners on new file: new_fd = self.ddfs.open(inode_id=new_inode_id) Scanner.scanfile(self.ddfs, new_fd, self.factories) new_fd.close() count += 1 except Exception, e: pyflaglog.log( pyflaglog.DEBUG, "RFC2822 Scan: Unable to parse inode %s as an RFC2822 message (%s)" % (self.inode, e))
def parse(self, forward_fd, reverse_fd, scanners): while True: request = { 'url':'/unknown_request_%s' % forward_fd.inode_id, 'method': 'GET' } response = {} parse = False request_body = response_body = None ## First parse both request and response ## Get the current timestamp of the request packet = NetworkScanner.dissect_packet(forward_fd) if self.read_request(request, forward_fd): try: request['timestamp'] = packet.ts_sec except AttributeError: request['timestamp'] = 0 parse = True request_body = self.skip_body(request, forward_fd) request_body.dirty = 0 packet = NetworkScanner.dissect_packet(reverse_fd) if self.read_response(response, reverse_fd): try: response['timestamp'] = packet.ts_sec except AttributeError: response['timestamp'] = 0 parse = True response_body = self.skip_body(response, reverse_fd) ## We hang all the parameters on the response object ## (i.e. file attachment, post parameters, cookies) if response_body and request_body: self.process_cookies(request, response_body) self.process_post_body(request, request_body, response_body) if request_body.size > 0: request_body.close() if response_body and response_body.size > 0: ## Store information about the object in the http table: url = request.get('url','/') ## We try to store the url in a normalized form so we ## can find it regardless of the various permutations ## it can go though response_body.insert_to_table("http", dict(method = request.get('method'), url = url, status = response.get('HTTP_code'), content_type = response.get('content-type'), useragent = request.get('user-agent'), host = request.get('host'), tld = make_tld(request.get('host','')) ) ) response_body.close() Scanner.scan_inode_distributed(forward_fd.case, response_body.inode_id, scanners, self.cookie) if not parse: break
# record the new_id new_ids.append(new_inode_id) # now that we know both new_ids, add to the connection table dbh.execute( "insert into connection_details (inode_id, reverse, src_ip, src_port, dest_ip, dest_port, isn, ts_sec, type) (select %r, %r, src_ip, src_port, dest_ip, dest_port, isn, ts_sec, type from connection_details where inode_id=%r)", (new_ids[0], new_ids[1], forward_id)) dbh.execute( "insert into connection_details (inode_id, reverse, src_ip, src_port, dest_ip, dest_port, isn, ts_sec, type) (select %r, %r, src_ip, src_port, dest_ip, dest_port, isn, ts_sec, type from connection_details where inode_id=%r)", (new_ids[1], new_ids[0], reverse_id)) # scan both new inodes for inode in new_ids: fd = self.fsfd.open(inode_id=inode) Scanner.scanfile(self.fsfd, fd, factories) def process_stream(self, stream, factories): if stream.dest_port in (31337, 23456, 5350): for (packet_id, cache_offset, data) in stream.packet_data(): dbh = DB.DBO(self.case) try: # decrypt the key: dec = RC4.new(ssl_packet_psk) # see if there is already an entry for this stream dbh.execute( "select inode_id from sslkeys where crypt_text=%r", data[:8]) row = dbh.fetch() if row: inode_id = row['inode_id']