Ejemplo n.º 1
0
def processlog(ipd,lastlogid,conn,services=None):
    """ 
    read log entries for a specific ip and do stats 
    this should be considered a critical section
    needs to be called from readmsgs
    """
    global statslock
    try:
        cur = conn.cursor()
        lcur = conn.cursor()
        scur = conn.cursor()

        if lastlogid != None:
            ipd['lastlogid'] = lastlogid
        else:
            ipd['lastlogid'] = 0
        try:
            cur.execute(
                """
                select 
                hour,REMOTE_ADDR, status_line, useragent, epoch, HTTP_HOST, content_type,logid
                from botlog 
                where http_host=%(http_host)s 
                    and remote_addr=%(remote_addr)s 
                    and logid > %(lastlogid)s
                    order by logid
                """,ipd)
            if cur.rowcount == 0:
                if verbose:
                    logging.debug("found nothing to process for %s %s" % (str(ipd),cur.query))
                return 0
        except:
            return 0

        # get our stats row or make one
        isrc = True
        # statslock.acquire()
        stats = bt.getstats(scur,ipd)
        if stats == None:
            logging.debug("no stats yet")
            isrc = bt.initstats(scur, conn, ipd)
            stats = bt.getstats(scur, ipd)
        else:
            logging.debug("found stats")
        # statslock.release()
        if not isrc:
            raise Exception("error initializing stats for %s" % (ipd['ip']))
        if stats == None:
            raise Exception("Stats should not be empty here!")

        # go through the list of entries for us and update diffs
        update = False
        diff = -1
        log = {}
        logcount = 0

        # keep a unique list of user agents
        if 'uas' in stats and stats['uas'] != None:
            uas = set(stats['uas'])
        else:
            uas = set([])

        latest = bt.getbotlatest(lcur, ipd)

        for loglist in cur:
            log = bt.botlog2dict(loglist)
            logging.debug("next entry ... %s" % log)

            if len(uas) >= 10:
                tmpuas = list(uas)
                del(tmpuas[0])
                uas = set(tmpuas)
            uas.add(log['useragent'])

            stats['reqs'] += 1
            if bt.iserr(log):
                stats['errs'] += 1
            if bt.ishtml(log):
                stats['pages'] += 1
            else:
                continue

            if latest == None:
                latest = log
                bt.insertbotlatest(latest, lcur, conn)

            diff = log['epoch'] - latest['epoch']
            if diff < 0: continue

            latest = log

            if len(stats['diffs']) > bt.MAXDIFFS:
                del(stats['diffs'][0])
            stats['diffs'].append(diff)

            hourdiff = abs(log['hour'] - latest['hour'])
            if len(stats['hourdiffs']) > bt.MAXDIFFS: 
                del(stats['hourdiffs'][0])
            stats['hourdiffs'].append(hourdiff)

            stats['hours'][log['hour']] += 1
            logcount += 1

        stats['uas'] = list(uas);
        logging.debug("saving stats")
        logging.debug(stats)
        bt.updatestatcounts(
            ipd['ip'],
            stats['reqs'],
            stats['pages'],
            stats['errs'],
            stats['uas'],
            scur, conn)

        if len(stats['diffs']) > 0 and logcount > 0:
            bt.updatestats(
                ipd['ip'],
                stats['diffs'],
                stats['hourdiffs'],
                stats['hours'],
                scur, conn,
                stats,services)

        # delete what we have seen
        # note that more entries may have been added
        # while we were processing
        logging.debug("last entry %s" % log)
        bt.updatebotlatest(log, lcur, conn)
        logging.debug(
            "deleting log entries for %(http_host)s %(remote_addr)s before %(logid)d" 
            % log)
        try:
            cur.execute(
                """
                delete from botlog 
                where 
                remote_addr=%(remote_addr)s 
                and http_host=%(http_host)s
                and logid <= %(logid)s
                """,log);
            conn.commit()
            logging.debug("deleted %d" % (cur.rowcount))
        except Exception as e:
            conn.rollback()
            logging.error("Failed to delete: %s" % e)

        return logcount

    except Exception as e:
        conn.rollback()
        # from http://stackoverflow.com/questions/14519177/python-exception-handling-line-number
        exc_type, exc_obj, tb = sys.exc_info()
        logging.error("processlog failed at %d: %s" % (tb.tb_lineno, str(e)))
Ejemplo n.º 2
0
from mldb import dbname, dbuser, dbpw

conn = psycopg2.connect(dbname=dbname,user=dbuser,password=dbpw)
cur = conn.cursor()
ucur = conn.cursor()
"""
 n          | integer                  | 
  sum        | bigint                   | 
   mean       | double precision         | 
    var        | double precision         | 
     skew       | double precision         | 
      kurtosis   | double precision         | 
       diffs      | integer[]                | 
"""

cur.execute(
    """
    select ip,diffs,hourdiffs,hours 
    from botstats
    where ip like 'archive-%'
    and mean is not null
    order by mean desc
    """)

for row in cur:
    ip, diffs, hourdiffs, hours = row
    scaleddiffs = [x/1000 for x in diffs]
    print ip," ",diffs," now ",scaleddiffs
    bt.updatestats(ip, scaleddiffs, hourdiffs, hours, ucur, conn)

Ejemplo n.º 3
0
            if i > 2:
                pdiff = diff - prevprev
                ppdiff = prev - prevprev
                # must be monotonically increasing
                if pdiff >= 0 and ppdiff >= 0 and pdiff >= ppdiff:
                    fixed[i-1] = ppdiff
                    fixed.append(diff - prev)
                    hfixed[i-1] = abs(hprev-hprevprev)
                    hfixed.append(abs(hdiff-hprev))
                else:
                    fixed.append(diff)
                    hfixed.append(hdiff)
            else:
                fixed.append(diff)
                hfixed.append(hdiff)
                    
            prevprev = prev
            prev = diff
            hprevprev = hprev
            hprev = hdiff
        print (ip, fixed, hfixed)
        bottiming.updatestats(ip,fixed,hfixed,hours,ucur,conn)
        rowcount += 1
        if rowcount > 100:
            rowcount = 0
            conn.commit()

except Exception as e:
    conn.rollback()
    print str(e)