def external_process(self, fd): ## Read all the events from the file: pyflaglog.log(pyflaglog.DEBUG, "Processing %s as mork" % self.fd.inode) dbh = DB.DBO(self.case) inode_id = self.fd.lookup_id() h = MozHist.MozHist(fd=fd) context = None while 1: token = h.next_token() if not token: break if token=='EVENT_END': e = h.event if not context: if "formhistory" in h.types['80']: context = 'form' else: context = 'history' if context == 'form': try: dbh.insert('mozilla_form_history', inode_id = inode_id, id = e['id'], name = e['Name'], value = e['Value']) except KeyError: continue else: try: result = dict( inode_id = inode_id, url = e['URL'], _LastVisitDate = "from_unixtime('%s')" % e['LastVisitDate'][:10], _FirstVisitDate = "from_unixtime('%s')" % e['FirstVisitDate'][:10], ) except KeyError: continue try: result['id'] = int(e['id']) except: pass try: result['Typed'] = e['Typed'] except: pass try: result['Referrer'] = e['Referrer'] except: pass try: result['VisitCount'] = e['VisitCount'] except: pass try: result['name'] = e['Name'] except: pass try: result['host'] = e['Hostname'] except: pass dbh.insert('mozilla_history', **result)
def reset_all(**query): """ This searchs for all executed reports with the provided parameters in them and resets them all. Callers need to provide at least a report name, case and a family or an exception is raised. """ flag = GLOBAL_FLAG_OBJ report =Registry.REPORTS.dispatch(query['family'],query['report']) dbh=DB.DBO(query['case']) family=query['family'].replace(" ","%20") dbh.execute("select value from meta where property='report_executed' and value like '%%family=%s%%'" % family) for row in dbh: import cgi q = query_type(string=row['value'],case=query['case']) try: for k in query.keys(): if k=='case': continue if q[k]!=query[k]: raise KeyError() ## This report should now be reset: pyflaglog.log(pyflaglog.DEBUG, "Will now reset %s" % row['value']) print "Resetting %s" % query try: report=report(flag) except: pass report.reset(q) dbh2 = DB.DBO(query['case']) dbh2.execute("delete from meta where property='report_executed' and value=%r",row['value']) except KeyError: pass
def authenticate(self, query, result): """ This method is called first by FlagFramework to evaluate authentication and determine if the user is allowed to execute this report.""" ## By default everyone is authorised try: ## If it exists config.AUTHORISED_USERS consists of a line looking like: ## PYFLAG_AUTHORISES_USERS="mic:password,joe:letmein" if not config.AUTHORISED_USERS: return True for token in config.AUTHORISED_USERS.split(","): try: username, password = token.split(':') if username.lstrip().rstrip( ) == query.user and password.lstrip().rstrip( ) == query.passwd: pyflaglog.log( pyflaglog.DEBUG, "Accepted Authentication from %s" % query.user) return True except ValueError: pyflaglog.log( pyflaglog.WARNINGS, "Parse error in Authentication Token %r - ignoring this token " % token) except AttributeError, e: ## If config.AUTHORISE does not exist, we dont require authentication return True
def resetscanfs(self, scanners): for i in scanners: try: i.reset() except DB.DBError, e: pyflaglog.log(pyflaglog.ERRORS, "Could not reset Scanner %s: %s" % (i, e))
def external_process(self, fd): pyflaglog.log(pyflaglog.DEBUG,"Opening %s for Hotmail processing" % self.fd.inode) ## Now we should be able to parse the data out: self.process_send_message(fd) self.process_editread(fd) self.process_readmessage(fd) self.process_mail_listing()
def read(self, length=None): try: return File.read(self,length) except IOError: pass if not self.gz: self.fd.seek(0) self.gz = gzip.zlib.decompressobj(-15) count = 0 step = 1024 result = '' ## Copy ourself into the file - This is in case we have errors ## in the file, we try to read as much as possible: while 1: try: data=self.gz.decompress(self.fd.read(step)) except IOError,e: step /= 2 if step<10: pyflaglog.log(pyflaglog.DEBUG, "Error reading from %s, could only get %s bytes" % (self.fd.inode, count)); break else: continue except Exception, e: pyflaglog.log(pyflaglog.WARNING, "Unable to decompress inode %s" % e) break
def _warning_check(self): """ We need to override this because for some cases it issues a SHOW WARNINGS query. Which will raise an 'out of sync error' when we operate in SS. This is a most sane approach - when warnings are detected, we simply try to drain the resultsets and then read the warnings. """ if self.ignore_warnings: return ## We have warnings to show if self._warnings: last_executed = [x[:500] for x in self._last_executed_sequence] results = list(self._fetch_row(1000)) if len(results) < 1000: self.execute("SHOW WARNINGS") while 1: a = self.fetchone() if not a: break pyflaglog.log( pyflaglog.DEBUG, "Mysql warnings: query %r: %s" % (last_executed, a)) else: pyflaglog.log( pyflaglog.DEBUG, "Mysql issued warnings but we are unable to drain result queue" ) ## If we have strict SQL we abort on warnings: if config.STRICTSQL: raise DBError(a) self.py_row_cache.extend(results)
def check_index(self, table, key, idx_type='', length=None): """ This checks the database to ensure that the said table has an index on said key. If an index is missing, we create it here, so we always ensure an index exists once we return. """ ## We implement a local cache to ensure that we dont hit the ## DB all the time: cache_key = "%s/%s" % (self.case,table) try: ## These should be the fields with the indexes on them: fields = DBIndex_Cache.get(cache_key) except KeyError: self.execute("show index from `%s`",table) fields = [ row['Key_name'] for row in self] DBIndex_Cache.put(fields, key=cache_key) ## Now fields is an array stored in the Store - we can append ## to it directly because we also hold a reference here and it ## will affect the next value gotten from the Store: if key not in fields: if length: sql="(`%s`(%s))" % (key,length) else: sql="(`%s`)" % (key) pyflaglog.log(pyflaglog.VERBOSE_DEBUG,"Oops... No index found in table %s on field %s - Generating index, this may take a while" %(table,key)) ## Index not found, we make it here: self.execute("Alter table `%s` add index %s %s",(table,idx_type,sql)) ## Add to cache: fields.append(key)
def _warning_check(self): """ We need to override this because for some cases it issues a SHOW WARNINGS query. Which will raise an 'out of sync error' when we operate in SS. This is a most sane approach - when warnings are detected, we simply try to drain the resultsets and then read the warnings. """ if self.ignore_warnings: return ## We have warnings to show if self._warnings: last_executed = [ x[:500] for x in self._last_executed_sequence] results = list(self._fetch_row(1000)) if len(results)<1000: self.execute("SHOW WARNINGS") while 1: a=self.fetchone() if not a: break pyflaglog.log(pyflaglog.DEBUG,"Mysql warnings: query %r: %s" % (last_executed,a)) else: pyflaglog.log(pyflaglog.DEBUG,"Mysql issued warnings but we are unable to drain result queue") ## If we have strict SQL we abort on warnings: if config.STRICTSQL: raise DBError(a) self.py_row_cache.extend(results)
def __init__(self): """ We keep a record of all magic handlers and instantiate them all. """ if not MagicResolver.indexer: MagicResolver.indexer = index.Index() for cls in Registry.MAGIC_HANDLERS.classes: cls = cls() MagicResolver.magic_handlers.append(cls) for rule in cls.regex_rules: MagicResolver.indexer.add_word(rule[0], MagicResolver.count, index.WORD_EXTENDED) MagicResolver.index_map[MagicResolver.count] = cls MagicResolver.rule_map[MagicResolver.count] = rule MagicResolver.count += 1 for rule in cls.literal_rules: MagicResolver.indexer.add_word(rule[0], MagicResolver.count, index.WORD_ENGLISH) MagicResolver.index_map[MagicResolver.count] = cls MagicResolver.rule_map[MagicResolver.count] = rule MagicResolver.count += 1 pyflaglog.log( pyflaglog.DEBUG, "Loaded %s signatures into Magic engine" % MagicResolver.count)
def process_stream(self, stream, factories): combined_inode = "I%s|S%s/%s" % (stream.fd.name, stream.inode_id, stream.reverse) pyflaglog.log(pyflaglog.DEBUG,"Openning %s for SMTP" % combined_inode) ## We open the file and scan it for emails: fd = self.fsfd.open(inode=combined_inode) dbh=DB.DBO(self.case) p=SMTP(fd,dbh,self.fsfd) ## Iterate over all the messages in this connection for f in p.parse(): if not f: continue ## message number and its offset: count, offset, length = f ## Create the VFS node: path, combined_inode, inode_id =self.fsfd.lookup(inode=combined_inode) path=posixpath.normpath(path+"/../../../../../") new_inode="%s|o%s:%s" % (combined_inode,offset,length) ds_timestamp = Time.convert(stream.ts_sec, case=self.case, evidence_tz="UTC") date_str = ds_timestamp.split(" ")[0] self.fsfd.VFSCreate(None, new_inode, "%s/SMTP/%s/Message_%s" % (path, date_str, count), mtime = stream.ts_sec, size=length ) ## Scan the new file using the scanner train. If ## the user chose the RFC2822 scanner, we will be ## able to understand this: self.scan_as_file(new_inode, factories)
def run(self, *args, **kwargs): pyflaglog.log(pyflaglog.VERBOSE_DEBUG, "Running Housekeeping tasks on %s" % time.ctime()) try: FlagFramework.post_event('periodic', None) finally: self.schedule()
def put(self, object, prefix='', key=None): """ Stores an object in the Store. Returns the key for the object. If key is already supplied we use that instead - Note that we do not check that it doesnt already exist. """ self.mutex.acquire() try: ## Ensure that we have enough space: self.check_full() ## Push the item in: now = time.time() if not key: key = "%s%s" % (prefix, self.id) self.creation_times.append([now, key, object]) self.id += 1 finally: self.mutex.release() pyflaglog.log(pyflaglog.VERBOSE_DEBUG, "Stored key %s: %s" % (key, ("%r" % (object, ))[:100])) return key
def get(self, key, remove=False): """ Retrieve the key from the store. If remove is specified we remove it from the Store altogether. """ ## FIXME: This is slow for large stores... use a dict for ## quick reference: self.mutex.acquire() try: ## Find and remove the object from the store i = 0 for t, k, obj in self.creation_times: if k == key: ## Remove the object from the store: t, k, obj = self.creation_times.pop(i) ## Reinsert it into the cache at the most recent ## time: if not remove: self.creation_times.append([time.time(), k, obj]) self.check_full() pyflaglog.log( pyflaglog.VERBOSE_DEBUG, "Got key %s: %s" % (key, ("%r" % (obj, ))[:100])) return obj i += 1 ## If we are here we could not find the key: pyflaglog.log(pyflaglog.VERBOSE_DEBUG, "Key %s not found" % (key, )) raise KeyError("Key not found %s" % (key, )) finally: self.mutex.release()
def check_index(self, table, key, length=None): """ This checks the database to ensure that the said table has an index on said key. If an index is missing, we create it here, so we always ensure an index exists once we return. """ ## We implement a local cache to ensure that we dont hit the ## DB all the time: cache_key = "%s/%s" % (self.case, table) try: ## These should be the fields with the indexes on them: fields = DBIndex_Cache.get(cache_key) except KeyError: self.execute("show index from `%s`", table) fields = [row['Key_name'] for row in self] DBIndex_Cache.put(fields, key=cache_key) ## Now fields is an array stored in the Store - we can append ## to it directly because we also hold a reference here and it ## will affect the next value gotten from the Store: if key not in fields: if length: sql = "(`%s`(%s))" % (key, length) else: sql = "(`%s`)" % (key) pyflaglog.log( pyflaglog.VERBOSE_DEBUG, "Oops... No index found in table %s on field %s - Generating index, this may take a while" % (table, key)) ## Index not found, we make it here: self.execute("Alter table `%s` add index%s", (table, sql)) ## Add to cache: fields.append(key)
def execute(self,string): self.py_row_cache = [] self.py_cache_size = 10 self._last_executed = string self._last_executed_sequence.append(string) self._last_executed_sequence = self._last_executed_sequence[:-3] def cancel(): pyflaglog.log(pyflaglog.WARNINGS, "Killing query in thread %s because it took too long" % self.connection.thread_id()) self.kill_connection('query') if self.timeout: t = threading.Timer(self.timeout, cancel) t.start() try: pyflaglog.log(pyflaglog.VERBOSE_DEBUG, string) MySQLdb.cursors.SSDictCursor.execute(self,string) finally: t.cancel() t.join() pass else: if self.logged: pyflaglog.log(pyflaglog.VERBOSE_DEBUG, string) MySQLdb.cursors.SSDictCursor.execute(self,string)
def read(self, length=None): try: return File.read(self,length) except IOError: pass if not self.gz: self.fd.seek(0) self.gz = gzip.GzipFile(fileobj=self.fd, mode='r') count = 0 step = 1024 result = '' ## Copy ourself into the file - This is in case we have errors ## in the file, we try to read as much as possible: while 1: try: data=self.gz.read(step) except IOError,e: step /= 2 if step<10: pyflaglog.log(pyflaglog.DEBUG, "Error reading from %s(%s), could only get %s bytes (wanted %s/%s)" % (self.fd.inode, e, count, length,self.size)); break else: continue except Exception, e: #pyflaglog.log(pyflaglog.WARNING, "Unable to decompress inode (%s) %s" % (self.inode, e)) break
def scan(self, fd, scanners, type, mime, cookie, scores=None, **args): if scores.get('GmailStreamMagic', 0) == 0: return pyflaglog.log(pyflaglog.DEBUG, "Opening %s for Gmail processing" % fd.inode_id) self.current_time = None self.current_box = 'Unknown' if "html" in mime: html_parser = HTML.HTMLParser() html_parser.parse_fd(fd) html_parser.close() ## Process all script segments for script_tag in html_parser.root.search("script"): script = script_tag.innerHTML() try: j = Javascript.JSParser() j.feed(script) j.close() except: continue self.process_js(j.root, fd) elif "javascript" in mime: ## Make a new parser j = Javascript.JSParser() j.parse_fd(fd) j.close() self.process_js(j.root, fd)
def add_inode(self, fd, offset, factories): """ We think we have a zip file here. """ b = Zip.Buffer(fd=fd)[offset:] try: header = Zip.ZipFileHeader(b) size = int(header['uncompr_size']) compressed_length = int(header['compr_size']) ## Some zip programs seem to leave this at 0 - because its ## already in the central directory. Unfortunately the ## carver currently does not look at the central directory ## - so we just make it a reasonable value if compressed_length==0: compressed_length = 100*1024 name = header['zip_path'].get_value() if len(name)==0 or invalid_filename.search(name): pyflaglog.log(pyflaglog.DEBUG, "Thought the name %r is invalid - skipping file" % name[:10]) return 10 header_offset = header['data'].buffer.offset except: return 10 new_inode = "%s|Z%s:%s" % (fd.inode, offset, compressed_length) self._add_inode(new_inode, size, name, fd, factories) return size
def external_process(self, fd): self.parser.close() if self.process_view_document(): pyflaglog.log( pyflaglog.DEBUG, "Opening %s for Google Document processing" % self.fd.inode)
def execute(self, string): self.py_row_cache = [] self.py_cache_size = 10 self._last_executed = string self._last_executed_sequence.append(string) self._last_executed_sequence = self._last_executed_sequence[:-3] def cancel(): pyflaglog.log( pyflaglog.WARNINGS, "Killing query in thread %s because it took too long" % self.connection.thread_id()) self.kill_connection('query') if self.timeout: t = threading.Timer(self.timeout, cancel) t.start() try: pyflaglog.log(pyflaglog.VERBOSE_DEBUG, string) MySQLdb.cursors.SSDictCursor.execute(self, string) finally: t.cancel() t.join() pass else: if self.logged: pyflaglog.log(pyflaglog.VERBOSE_DEBUG, string) MySQLdb.cursors.SSDictCursor.execute(self, string)
def get(self, key, remove=False): """ Retrieve the key from the store. If remove is specified we remove it from the Store altogether. """ ## FIXME: This is slow for large stores... use a dict for ## quick reference: self.mutex.acquire() try: ## Find and remove the object from the store i=0 for t, k, obj in self.creation_times: if k==key: ## Remove the object from the store: t, k, obj = self.creation_times.pop(i) ## Reinsert it into the cache at the most recent ## time: if not remove: self.creation_times.append([time.time(), k, obj]) self.check_full() pyflaglog.log(pyflaglog.VERBOSE_DEBUG, "Got key %s: %s" % (key, ("%r" % (obj,))[:100])) return obj i+=1 ## If we are here we could not find the key: pyflaglog.log(pyflaglog.VERBOSE_DEBUG, "Key %s not found" % (key,)) raise KeyError("Key not found %s" % (key,)) finally: self.mutex.release()
def scanfs(self, scanners, action=None): ## Prepare the scanner factory for scanning: for s in scanners: s.prepare() dbh2 = DB.DBO(self.case) dbh3 = DB.DBO(self.case) dbh3.execute( 'select inode, concat(path,name) as filename from file where mode="r/r" and status="alloc"' ) count = 0 for row in dbh3: # open file count += 1 if not count % 100: pyflaglog.log( pyflaglog.INFO, "File (%s) is inode %s (%s)" % (count, row['inode'], row['filename'])) try: fd = self.open(inode=row['inode']) Scanner.scanfile(self, fd, scanners) fd.close() except Exception, e: pyflaglog.log(pyflaglog.ERRORS, "%r: %s" % (e, e)) continue
def put(self,object, prefix='', key=None): """ Stores an object in the Store. Returns the key for the object. If key is already supplied we use that instead - Note that we do not check that it doesnt already exist. """ self.mutex.acquire() try: ## Ensure that we have enough space: self.check_full() ## Push the item in: now = time.time() if not key: key = "%s%s" % (prefix,self.id) self.creation_times.append([now,key, object]) self.id+=1 finally: self.mutex.release() pyflaglog.log(pyflaglog.VERBOSE_DEBUG, "Stored key %s: %s" % (key, ("%r" % (object,))[:100])) return key
def external_process(self, fd): if self.mime_type == "application/x-winnt-registry": print "Grabbing message sources from %s" % self.fd.inode ## populate the EventMessageSources table from the registry dbh=DB.DBO(self.case) pydbh = DB.DBO() inode_id = self.fd.lookup_id() dbh.execute("select * from reg where reg_key='EventMessageFile' and inode_id=%r", inode_id) for row in dbh: service = os.path.basename(os.path.normpath(row['path'])) pydbh.execute("select * from EventMessageSources where source=%r limit 1",service) pyrow=pydbh.fetch() if not pyrow: filename = row['value'].split("\\")[-1].lower() pydbh.execute("insert ignore into EventMessageSources set filename=%r, source=%r" , (filename, service)) return filename, inode, inode_id = self.ddfs.lookup(inode=self.inode) b = Buffer(fd=fd) pyflaglog.log(pyflaglog.VERBOSE_DEBUG, "Opening %s to extract messages" % self.inode) pydbh = DB.DBO() pydbh.mass_insert_start('EventMessages') try: m=PElib.get_messages(b) for k,v in m.messages.items(): pydbh.mass_insert(filename = os.path.basename(filename), message_id = k, message = v['Message'], offset = v.buffer.offset, ) except (IndexError, IOError, AttributeError): pyflaglog.log(pyflaglog.VERBOSE_DEBUG, "%s does not contain messages" % filename)
def scan(self, fd, scanners, type, mime, cookie, scores=None, **args): if scores.get('GmailStreamMagic',0) == 0: return pyflaglog.log(pyflaglog.DEBUG,"Opening %s for Gmail processing" % fd.inode_id) self.current_time = None self.current_box = 'Unknown' if "html" in mime: html_parser = HTML.HTMLParser() html_parser.parse_fd(fd) html_parser.close() ## Process all script segments for script_tag in html_parser.root.search("script"): script = script_tag.innerHTML() try: j=Javascript.JSParser() j.feed(script) j.close() except: continue self.process_js(j.root, fd) elif "javascript" in mime: ## Make a new parser j=Javascript.JSParser() j.parse_fd(fd) j.close() self.process_js(j.root, fd)
def start_workers(): if config.FLUSH: dbh = DB.DBO() pyflaglog.log(pyflaglog.WARNING, "Deleting job queue and killing workers") #dbh.execute("select max(id) as max from jobs") #row = dbh.fetch() #broadcast_id = row['max'] or 0 dbh.execute("delete from jobs") #dbh.insert("jobs", _fast=True, # command='Exit', state='broadcast', # ) if config.WORKERS == 0: return for i in range(config.WORKERS): try: r, w = os.pipe() pid = os.fork() except AttributeError: ## When running under windows we can not fork... We must ## launch this module by itself instead - this is very ## suboptimal because we will be performing all startup ## code (registry parsing etc) for each worker. If you want ## performance you would not choose windows anyway, ## though. The following is windows specific: ## First find the name of the interpreter: import ctypes, sys name = ctypes.create_string_buffer(255) length = ctypes.windll.kernel32.GetModuleFileNameA(None, name, 255) interpreter = name.raw[:length] ## This encloses at least the file path in quotes just in ## case we are installed to somewhere with spaces - It ## seems that on windows argvs are not processed correctly ## because the below array ends up as a single command line ## string WTF? This is very dodgy... os.spawnv( os.P_NOWAIT, interpreter, ['"%s"' % interpreter, '"%s"' % __file__] + sys.argv[1:]) pid = 1 ## Parents: if pid: os.close(r) children.append(pid) else: os.close(w) nanny(worker_run, keepalive=r) atexit.register(terminate_children) ## The parent now calls the startup method on each of the events: for event in Registry.EVENT_HANDLERS.classes: try: event().startup() except Exception, e: pyflaglog.log(pyflaglog.WARNING, "Error: %s" % e)
def drop_table(case, name): """ Drops the log table tablename """ if not name: return dbh = DB.DBO(case) pyflaglog.log(pyflaglog.DEBUG, "Dropping log table %s in case %s" % (name, case)) dbh.execute("select * from log_tables where table_name = %r limit 1" , name) row = dbh.fetch() ## Table not found if not row: return preset = row['preset'] ## Get the driver for this table: log = load_preset(case, preset) log.drop(name) ## Ask the driver to remove its table: dbh.delete("log_tables", where= DB.expand("table_name = %r ", name)); ## Make sure that the reports get all reset FlagFramework.reset_all(family='Load Data', report="Load Preset Log File", table = name, case=case)
def external_process(self, fd): pyflaglog.log(pyflaglog.DEBUG, "Opening %s for MMS Processing" % self.fd.inode) try: message = mms.MMSMessage.fromFile(fd.name) except: pyflaglog.log(pyflaglog.DEBUG, "Error parsing %s" % self.fd.inode) return result = {'type': 'Sent', 'message': ''} for k,v in [ ('From', 'From'), ('To', 'To'), ('Data', 'sent'), ('Subject', 'subject') ]: try: result[v] = message.headers[k] except KeyError: pass ## Create a new webmail message: inode_id = self.insert_message(result) dbh = DB.DBO(self.fd.case) count = 0 for part in message.dataParts: count +=1 if part.contentType.startswith('text/'): result['message'] += part.data dbh.update('webmail_messages', where='inode_id="%s"' % inode_id, message = result['message']) elif not part.contentType.endswith('smil'): new_inode = self.fd.inode + "|m%s" % count filename = CacheManager.MANAGER.get_temp_path(self.fd.case, new_inode) fd = open(filename,"wb") fd.write(part.data) fd.close() ## Add Attachment path, inode, inode_id = self.ddfs.lookup(inode_id = inode_id) attachment_id = self.ddfs.VFSCreate(None, new_inode, "%s/Message %s" % (path,count), size = len(part.data)) parameters = {} for hdr in part.headers: value = part.headers[hdr] if type(value) == tuple: if len(value[1]) > 0: parameters = value[1] filename = parameters.get("Filename", parameters.get("Name","output.bin")) dbh.insert("webmail_attachments", inode_id = inode_id, attachment = attachment_id, url = filename)
def execute(self, query_str, *params): """ SQL execution method. This functions executes the SQL in this object's cursor context. the query must be given as a string with with %s or %r escape characters, and the correct number of strings in the params list. @note: Just as a reminder - using %r will escape the corresponding string in a manner that is adequate for mysql, it will also automatically insert quotes around the string. On the other hand using %s will not escape the strings. >>> a.execute('select * from %s where id=%r' , ('table','person')) @arg query_str: A format string with only %r and %s format sequences @arg params: A list of strings which will be formatted into query_str. If there is only one format string and the programmer is truely lazy, a string is ok. """ try: params[0].__iter__ params = params[0] except (AttributeError, IndexError): pass if params: string = db_expand(query_str, params) else: string = query_str try: self.cursor.execute(string) #If anything went wrong we raise it as a DBError except Exception, e: str = "%s" % e if 'cursor closed' in str or \ 'Commands out of sync' in str or \ 'server has gone away' in str or \ 'Lost connection' in str: pyflaglog.log(pyflaglog.VERBOSE_DEBUG, "Got DB Error: %s" % (str)) ## We terminate the current connection and reconnect ## to the DB pyflaglog.log( pyflaglog.DEBUG, "Killing connection because %s. Last query was %s" % (e, self.cursor._last_executed_sequence)) try: self.cursor.kill_connection() del self.dbh except AttributeError: pass global db_connections db_connections -= 1 self.get_dbh(self.case) #self.dbh.ignore_warnings = self.cursor.ignore_warnings self.cursor = self.dbh.cursor() ## Redo the query with the new connection - if we fail ## again, we just raise - otherwise we risk running ## into recursion issues: return self.cursor.execute(string) elif not str.startswith('Records'): raise DBError(e)
def _warning_check(self): last = self._last_executed if self._warnings and not self.ignore_warnings: self.execute("SHOW WARNINGS") while 1: a=self.fetchone() if not a: break pyflaglog.log(pyflaglog.WARNINGS,"query %r: %s" % (last[:100],a['Message']))
def row(self,*columns, **options): """ Add a row to the table. If a table is not defined as yet, a new table is created. Column entries for the row should be given as a list of arguements. Options may be given as named pairs. Note that column objects may be strings or other UI entities. options is usually passed to the underlying implementation, but a number of keywords are understood by the UI: - type: heading - this row is the table's heading - colspan: The row has fewer elements than are needed, and the extra columns are to be filled with blanks. """ pyflaglog.log(pyflaglog.DEBUG, "row not implemented")
def display(self): """ Main display method. Called when the framework is ready to display the UI object. Note that further operations on this UI are not defined once display is called. Note also that the specific type of object returned here really depends on the implementation. The front-end should handle the return type appropriately This function in combination with the front end is expected to produce all the navigational aids required (e.g. nav bar or tool bars etc). """ pyflaglog.log(pyflaglog.DEBUG, "display not implemented")
def row(self, *columns, **options): """ Add a row to the table. If a table is not defined as yet, a new table is created. Column entries for the row should be given as a list of arguements. Options may be given as named pairs. Note that column objects may be strings or other UI entities. options is usually passed to the underlying implementation, but a number of keywords are understood by the UI: - type: heading - this row is the table's heading - colspan: The row has fewer elements than are needed, and the extra columns are to be filled with blanks. """ pyflaglog.log(pyflaglog.DEBUG, "row not implemented")
def external_process(self, fd): pyflaglog.log( pyflaglog.DEBUG, "Opening %s for SquirrelMail processing" % self.fd.inode) if self.process_mail_listing() or \ self.process_send_message(fd) or self.process_readmessage(fd): pass
def external_process(self, fd): pyflaglog.log(pyflaglog.DEBUG, "Opening %s for Hotmail processing" % self.fd.inode) ## Now we should be able to parse the data out: self.process_send_message(fd) self.process_editread(fd) self.process_readmessage(fd) self.process_mail_listing()
def analyse(self,query): scanner_names = self.calculate_scanners(query) pyflaglog.log(pyflaglog.VERBOSE_DEBUG, "Asking pyflash to scan the inode: %s with scanners %s" % (query['inode'], scanner_names)) #Use pyflash to do all the work env = pyflagsh.environment(case=query['case']) pyflagsh.shell_execv(env=env, command="scan", argv=[query['inode'],] + scanner_names)
def start_workers(): if config.FLUSH: dbh = DB.DBO() pyflaglog.log(pyflaglog.WARNING,"Deleting job queue and killing workers") #dbh.execute("select max(id) as max from jobs") #row = dbh.fetch() #broadcast_id = row['max'] or 0 dbh.execute("delete from jobs") #dbh.insert("jobs", _fast=True, # command='Exit', state='broadcast', # ) if config.WORKERS == 0: return for i in range(config.WORKERS): try: r,w = os.pipe() pid = os.fork() except AttributeError: ## When running under windows we can not fork... We must ## launch this module by itself instead - this is very ## suboptimal because we will be performing all startup ## code (registry parsing etc) for each worker. If you want ## performance you would not choose windows anyway, ## though. The following is windows specific: ## First find the name of the interpreter: import ctypes, sys name = ctypes.create_string_buffer(255) length = ctypes.windll.kernel32.GetModuleFileNameA(None, name, 255) interpreter = name.raw[:length] ## This encloses at least the file path in quotes just in ## case we are installed to somewhere with spaces - It ## seems that on windows argvs are not processed correctly ## because the below array ends up as a single command line ## string WTF? This is very dodgy... os.spawnv(os.P_NOWAIT, interpreter, ['"%s"' % interpreter, '"%s"' % __file__] + sys.argv[1:]) pid = 1 ## Parents: if pid: os.close(r) children.append(pid) else: os.close(w) nanny(worker_run, keepalive=r) atexit.register(terminate_children) ## The parent now calls the startup method on each of the events: for event in Registry.EVENT_HANDLERS.classes: try: event().startup() except Exception,e: pyflaglog.log(pyflaglog.WARNING, "Error: %s" % e)
def check_mem(cb, *args, **kwargs): """ Checks for our current memory usage - if it exceeds the limit we exit and let the nanny restart us. """ if config.MAXIMUM_WORKER_MEMORY > 0: mem = open("/proc/%s/statm" % os.getpid()).read().split() if int(mem[1])*4096/1024/1024 > config.MAXIMUM_WORKER_MEMORY: pyflaglog.log(pyflaglog.WARNING, "Process resident memory exceeds threshold. Exiting") cb(*args, **kwargs)
def drop_preset(preset): """ Drops the specified preset name """ pyflaglog.log(pyflaglog.DEBUG, "Droppping preset %s" % preset) for case, table in find_tables(preset): drop_table(case, table) dbh = DB.DBO() if preset: dbh.delete("log_presets", where=DB.expand("name = %r",preset))
def lookup_whois(ip): """ Functions searches the database for the most specific whois match. @arg ip: Either an unsigned int or a string IP in decimal notation. Returns a whois id. This id can be used to display the whois table. """ ## Polymorphic code - if its numeric we use it as such - if its a ## string it must be an IP in dot notation. try: ip / 2 sql_ip = ip except TypeError: if ip == None: pyflaglog.log( pyflaglog.WARNING, "Was asked to perform a whois lookup on a blank IP address. Will return the default route, but this might suggest an error" ) return 0 sql_ip = "inet_aton(%r)" % ip.strip() ## First check the cache: id = 0 try: return WHOIS_CACHE.get(ip) except KeyError: dbh = DB.DBO() dbh.check_index("whois_cache", "ip") dbh.execute("select id from whois_cache where ip=%s limit 1", sql_ip) row = dbh.fetch() if row: id = row['id'] WHOIS_CACHE.put(id, key=ip) return id if config.PRECACHE_WHOIS: id = lookup_whois_id(dbh, ip) ## Cache it. We also may as well do a GEOIP lookup :) ipinfo = get_all_geoip_data(ip) ## For speed we try and do it all in one go try: insert_whois_cache(sql_ip, id, ipinfo) ## we can only assume that we got nothing back from the geoip stuff except DB.DBError, e: ## ## LOOKUP GEOIP COUNTRY ## try: dbh.insert("geoip_country", _fast=True, country=ipinfo.get('country_code3', '---'), country2=ipinfo.get('country_code', '00')) except DB.DBError, e: pass
def drop_preset(preset): """ Drops the specified preset name """ pyflaglog.log(pyflaglog.DEBUG, "Droppping preset %s" % preset) for case, table in find_tables(preset): drop_table(case, table) dbh = DB.DBO() if preset: dbh.delete("log_presets", where=DB.expand("name = %r", preset))
def link(self, string, target=FlagFramework.query_type(()), **target_options): """ Create a link to somewhere else. A link is categorized by a list of named arguements, usually given as elements of query_type. Derived classes must make the link launch the correct part of the front end as specified by the link attributes """ pyflaglog.log(pyflaglog.DEBUG, "link not implemented")
def external_process(self, fd): pyflaglog.log(pyflaglog.DEBUG,"Opening %s for YahooMail2.0 processing" % self.fd.inode) if self.context=='GetDisplayMessageResponse': self.process_readmessage() elif self.context=='ListMessagesResponse': self.process_mail_listing() elif self.context=='SendMessageResponse': self.process_send_message()
def process(case,subsys,extension=None): """ A generator to produce all the recoverable files within the io object identified by identifier @arg subsys: Either an IO object to use, or the string name of an io object that will be opened using IO.open(). @arg extension: A list of extensions we would like to see """ if type(subsys)==types.StringType: io=IO.open(case,subsys) else: io=subsys blocksize=1024*1024*10 windowsize=100 count=0 bytes_read=0 window='' while(1): ## This implements a sliding window of window bytes to ensure ## we do not miss a signature that was split across blocksize: try: data=io.read(blocksize) if not len(data): break except IOError: break f=window+data bytes_read+=len(data) pyflaglog.log(pyflaglog.INFO,"Processed %u Mb" % (bytes_read/1024/1024)) for cut in definitions: if extension and cut['Extension'] not in extension: continue pos=0 while pos<blocksize: match=cut['CStartRE'].search(f,pos) if match: offset=match.start()+count-len(window) length=cut['MaxLength'] ## If there is an end RE, we try to read the entire length in, and then look for the end to we can adjust the length acurately. This is essential for certain file types which do not tolerate garbage at the end of the file, e.g. pdfs. if cut.has_key('CEndRE'): tell=io.tell() io.seek(offset) file_data=io.read(length) io.seek(tell) end_match=cut['CEndRE'].search(file_data,0) if end_match: length=end_match.end() yield({'offset':offset,'length':length,'type':cut['Extension']}) pos=match.start()+1 else: pos=blocksize window=f[-windowsize:] count+=blocksize io.close()
def display(self,query,result): ## Try to delete the old cases: try: dbh = DB.DBO() dbh.execute("select * from meta where property='flag_db'") for row in dbh: pyflaglog.log(pyflaglog.INFO, "Deleting case %s due to an upgrade" % row['value']) FlagFramework.delete_case(row['value']) except DB.DBError,e: pass
def analyse(self, query): scanner_names = self.calculate_scanners(query) pyflaglog.log( pyflaglog.VERBOSE_DEBUG, "Asking pyflash to scan the inode: %s with scanners %s" % (query["inode"], scanner_names), ) # Use pyflash to do all the work env = pyflagsh.environment(case=query["case"]) pyflagsh.shell_execv(env=env, command="scan", argv=[query["inode"]] + scanner_names)
def parse(self): while 1: line = self.fd.readline().strip() if len(line)==0: break try: m=self.regex.match(line) ## Dispatch a command handler: self.dispatch(m.group(1),m.group(2),m.group(3)) except IndexError,e: pyflaglog.log(pyflaglog.WARNINGS, "unable to parse line %s (%s)" % (line,e))
def execute(self,query_str, *params): """ SQL execution method. This functions executes the SQL in this object's cursor context. the query must be given as a string with with %s or %r escape characters, and the correct number of strings in the params list. @note: Just as a reminder - using %r will escape the corresponding string in a manner that is adequate for mysql, it will also automatically insert quotes around the string. On the other hand using %s will not escape the strings. >>> a.execute('select * from %s where id=%r' , ('table','person')) @arg query_str: A format string with only %r and %s format sequences @arg params: A list of strings which will be formatted into query_str. If there is only one format string and the programmer is truely lazy, a string is ok. """ try: params[0].__iter__ params = params[0] except (AttributeError,IndexError): pass if params: string = db_expand(query_str, params) else: string = query_str try: self.cursor.execute(string) #If anything went wrong we raise it as a DBError except Exception,e: str = "%s" % e if 'cursor closed' in str or \ 'Commands out of sync' in str or \ 'server has gone away' in str or \ 'Lost connection' in str: pyflaglog.log(pyflaglog.VERBOSE_DEBUG, "Got DB Error: %s" % (str)) ## We terminate the current connection and reconnect ## to the DB pyflaglog.log(pyflaglog.DEBUG, "Killing connection because %s. Last query was %s" % (e,self.cursor._last_executed_sequence)) try: self.cursor.kill_connection() del self.dbh except AttributeError: pass global db_connections db_connections -=1 self.get_dbh(self.case) #self.dbh.ignore_warnings = self.cursor.ignore_warnings self.cursor = self.dbh.cursor() ## Redo the query with the new connection - if we fail ## again, we just raise - otherwise we risk running ## into recursion issues: return self.cursor.execute(string) elif not str.startswith('Records'): raise DBError(e)
def process_stream(self, stream, factories): combined_inode = "I%s|S%s/%s" % (stream.fd.name, stream.inode_id, stream.reverse) ## Check to see if this is an IRC stream at all: try: fd = self.fsfd.open(inode=combined_inode) except IOError: return irc=IRC(fd, self.case) pyflaglog.log(pyflaglog.DEBUG,"Openning %s for IRC" % combined_inode) irc.parse()
def check(self, dbh): """ Checks the table in dbh to ensure that all the columns defined are present """ columns = [ c for c in self.instantiate_columns() ] try: dbh.execute("desc %s", self.name) except DB.DBError,e: pyflaglog.log(pyflaglog.INFO, "Table %s does not exist in case %s - Creating" % (self.name, dbh.case)) self.create(dbh) return