def processlog(ipd,lastlogid,conn,services=None): """ read log entries for a specific ip and do stats this should be considered a critical section needs to be called from readmsgs """ global statslock try: cur = conn.cursor() lcur = conn.cursor() scur = conn.cursor() if lastlogid != None: ipd['lastlogid'] = lastlogid else: ipd['lastlogid'] = 0 try: cur.execute( """ select hour,REMOTE_ADDR, status_line, useragent, epoch, HTTP_HOST, content_type,logid from botlog where http_host=%(http_host)s and remote_addr=%(remote_addr)s and logid > %(lastlogid)s order by logid """,ipd) if cur.rowcount == 0: if verbose: logging.debug("found nothing to process for %s %s" % (str(ipd),cur.query)) return 0 except: return 0 # get our stats row or make one isrc = True # statslock.acquire() stats = bt.getstats(scur,ipd) if stats == None: logging.debug("no stats yet") isrc = bt.initstats(scur, conn, ipd) stats = bt.getstats(scur, ipd) else: logging.debug("found stats") # statslock.release() if not isrc: raise Exception("error initializing stats for %s" % (ipd['ip'])) if stats == None: raise Exception("Stats should not be empty here!") # go through the list of entries for us and update diffs update = False diff = -1 log = {} logcount = 0 # keep a unique list of user agents if 'uas' in stats and stats['uas'] != None: uas = set(stats['uas']) else: uas = set([]) latest = bt.getbotlatest(lcur, ipd) for loglist in cur: log = bt.botlog2dict(loglist) logging.debug("next entry ... %s" % log) if len(uas) >= 10: tmpuas = list(uas) del(tmpuas[0]) uas = set(tmpuas) uas.add(log['useragent']) stats['reqs'] += 1 if bt.iserr(log): stats['errs'] += 1 if bt.ishtml(log): stats['pages'] += 1 else: continue if latest == None: latest = log bt.insertbotlatest(latest, lcur, conn) diff = log['epoch'] - latest['epoch'] if diff < 0: continue latest = log if len(stats['diffs']) > bt.MAXDIFFS: del(stats['diffs'][0]) stats['diffs'].append(diff) hourdiff = abs(log['hour'] - latest['hour']) if len(stats['hourdiffs']) > bt.MAXDIFFS: del(stats['hourdiffs'][0]) stats['hourdiffs'].append(hourdiff) stats['hours'][log['hour']] += 1 logcount += 1 stats['uas'] = list(uas); logging.debug("saving stats") logging.debug(stats) bt.updatestatcounts( ipd['ip'], stats['reqs'], stats['pages'], stats['errs'], stats['uas'], scur, conn) if len(stats['diffs']) > 0 and logcount > 0: bt.updatestats( ipd['ip'], stats['diffs'], stats['hourdiffs'], stats['hours'], scur, conn, stats,services) # delete what we have seen # note that more entries may have been added # while we were processing logging.debug("last entry %s" % log) bt.updatebotlatest(log, lcur, conn) logging.debug( "deleting log entries for %(http_host)s %(remote_addr)s before %(logid)d" % log) try: cur.execute( """ delete from botlog where remote_addr=%(remote_addr)s and http_host=%(http_host)s and logid <= %(logid)s """,log); conn.commit() logging.debug("deleted %d" % (cur.rowcount)) except Exception as e: conn.rollback() logging.error("Failed to delete: %s" % e) return logcount except Exception as e: conn.rollback() # from http://stackoverflow.com/questions/14519177/python-exception-handling-line-number exc_type, exc_obj, tb = sys.exc_info() logging.error("processlog failed at %d: %s" % (tb.tb_lineno, str(e)))
from mldb import dbname, dbuser, dbpw conn = psycopg2.connect(dbname=dbname,user=dbuser,password=dbpw) cur = conn.cursor() ucur = conn.cursor() """ n | integer | sum | bigint | mean | double precision | var | double precision | skew | double precision | kurtosis | double precision | diffs | integer[] | """ cur.execute( """ select ip,diffs,hourdiffs,hours from botstats where ip like 'archive-%' and mean is not null order by mean desc """) for row in cur: ip, diffs, hourdiffs, hours = row scaleddiffs = [x/1000 for x in diffs] print ip," ",diffs," now ",scaleddiffs bt.updatestats(ip, scaleddiffs, hourdiffs, hours, ucur, conn)
if i > 2: pdiff = diff - prevprev ppdiff = prev - prevprev # must be monotonically increasing if pdiff >= 0 and ppdiff >= 0 and pdiff >= ppdiff: fixed[i-1] = ppdiff fixed.append(diff - prev) hfixed[i-1] = abs(hprev-hprevprev) hfixed.append(abs(hdiff-hprev)) else: fixed.append(diff) hfixed.append(hdiff) else: fixed.append(diff) hfixed.append(hdiff) prevprev = prev prev = diff hprevprev = hprev hprev = hdiff print (ip, fixed, hfixed) bottiming.updatestats(ip,fixed,hfixed,hours,ucur,conn) rowcount += 1 if rowcount > 100: rowcount = 0 conn.commit() except Exception as e: conn.rollback() print str(e)