Esempio n. 1
0
class TomcatLogtailer(object):
    # only used in daemon mode
    period = 60
    def __init__(self):
        '''This function should initialize any data structures or variables
        needed for the internal state of the line parser.'''
        self.reset_state()
        self.lock = threading.RLock()
        # this is what will match the tomcat lines
        # tomcat log format string:
        # %v %A %a %u %{%Y-%m-%dT%H:%M:%S}t %c %s %>s %B %D %{cookie}n \"%{Referer}i\" \"%r\" \"%{User-Agent}i\" %P
        # host.com 127.0.0.1 127.0.0.1 - 2008-05-08T07:34:44 - 200 200 371 103918 - "-" "GET /path HTTP/1.0" "-" 23794
        # match keys: server_name, local_ip, remote_ip, date, conn_status, init_retcode, final_retcode, size,
        #               req_time, cookie, referrer, request, user_agent, pid
        self.reg = re.compile("^INFO: \[\] webapp=(?P<webapp>[^\s]+) path=(?P<path>[^\s]+) params=(?P<params>\{[^\}]*\}) status=(?P<status>[^\s]+) QTime=(?P<qtime>[0-9]+)$")
        # assume we're in daemon mode unless set_check_duration gets called
        self.dur_override = False


    # example function for parse line
    # takes one argument (text) line to be parsed
    # returns nothing
    def parse_line(self, line):
        '''This function should digest the contents of one line at a time,
        updating the internal state variables.'''
        self.lock.acquire()
        self.num_hits+=1
        regMatch = self.reg.match(line)
        if regMatch:
            linebits = regMatch.groupdict()
            self.num_hits += 1
            # capture request duration
            dur = int(linebits['qtime'])
            self.req_time += dur
            # store for 90th % calculation
            self.ninetieth.append(dur)
        self.lock.release()
    # example function for deep copy
    # takes no arguments
    # returns one object
    def deep_copy(self):
        '''This function should return a copy of the data structure used to
        maintain state.  This copy should different from the object that is
        currently being modified so that the other thread can deal with it
        without fear of it changing out from under it.  The format of this
        object is internal to the plugin.'''
        myret = dict( num_hits=self.num_hits,
                    req_time=self.req_time,
                    ninetieth=self.ninetieth
                    )
        return myret
    # example function for reset_state
    # takes no arguments
    # returns nothing
    def reset_state(self):
        '''This function resets the internal data structure to 0 (saving
        whatever state it needs).  This function should be called
        immediately after deep copy with a lock in place so the internal
        data structures can't be modified in between the two calls.  If the
        time between calls to get_state is necessary to calculate metrics,
        reset_state should store now() each time it's called, and get_state
        will use the time since that now() to do its calculations'''
        self.num_hits = 0
        self.req_time = 0
        self.ninetieth = list()
        self.last_reset_time = time.time()
    # example for keeping track of runtimes
    # takes no arguments
    # returns float number of seconds for this run
    def set_check_duration(self, dur):
        '''This function only used if logtailer is in cron mode.  If it is
        invoked, get_check_duration should use this value instead of calculating
        it.'''
        self.duration = dur 
        self.dur_override = True
    def get_check_duration(self):
        '''This function should return the time since the last check.  If called
        from cron mode, this must be set using set_check_duration().  If in
        daemon mode, it should be calculated internally.'''
        if( self.dur_override ):
            duration = self.duration
        else:
            cur_time = time.time()
            duration = cur_time - self.last_reset_time
            # the duration should be within 10% of period
            acceptable_duration_min = self.period - (self.period / 10.0)
            acceptable_duration_max = self.period + (self.period / 10.0)
            if (duration < acceptable_duration_min or duration > acceptable_duration_max):
                raise LogtailerStateException, "time calculation problem - duration (%s) > 10%% away from period (%s)" % (duration, self.period)
        return duration
    # example function for get_state
    # takes no arguments
    # returns a dictionary of (metric => metric_object) pairs
    def get_state(self):
        '''This function should acquire a lock, call deep copy, get the
        current time if necessary, call reset_state, then do its
        calculations.  It should return a list of metric objects.'''
        # get the data to work with
        self.lock.acquire()
        try:
            mydata = self.deep_copy()
            check_time = self.get_check_duration()
            self.reset_state()
            self.lock.release()
        except LogtailerStateException, e:
            # if something went wrong with deep_copy or the duration, reset and continue
            self.reset_state()
            self.lock.release()
            raise e

        # crunch data to how you want to report it
        hits_per_second = mydata['num_hits'] / check_time
        if (mydata['num_hits'] != 0):
             avg_req_time = mydata['req_time'] / mydata['num_hits']
        else:
             avg_req_time = 0

        # calculate 90th % request time
        ninetieth_list = mydata['ninetieth']
        ninetieth_list.sort()
        num_entries = len(ninetieth_list)
        if (num_entries != 0 ):
            slowest = ninetieth_list[-1]
            ninetieth_element = ninetieth_list[int(num_entries * 0.9)]
        else:
            slowest = 0
            ninetieth_element = 0

        # package up the data you want to submit
        hps_metric = GangliaMetricObject('solr_rps', hits_per_second, units='rps')
        avgdur_metric = GangliaMetricObject('solr_avg_dur', avg_req_time, units='ms')
        ninetieth_metric = GangliaMetricObject('solr_90th_dur', ninetieth_element, units='ms')
        slowest_metric   = GangliaMetricObject('solr_slowest_dur', slowest, units='ms')
        # return a list of metric objects
        return [ hps_metric, avgdur_metric, ninetieth_metric, slowest_metric ]
Esempio n. 2
0
        three_per_second = mydata['num_three'] / check_time
        four_per_second = mydata['num_four'] / check_time
        five_per_second = mydata['num_five'] / check_time

        # calculate 90th % request time
        ninetieth_list = mydata['ninetieth']
        ninetieth_list.sort()
        num_entries = len(ninetieth_list)
        if (num_entries != 0):
            ninetieth_element = ninetieth_list[int(num_entries * 0.9)]
        else:
            ninetieth_element = 0

        # package up the data you want to submit
        hps_metric = GangliaMetricObject('apache_hits',
                                         hits_per_second,
                                         units='hps')
        gps_metric = GangliaMetricObject('apache_gets',
                                         gets_per_second,
                                         units='hps')
        avgdur_metric = GangliaMetricObject('apache_avg_dur',
                                            avg_req_time,
                                            units='sec')
        ninetieth_metric = GangliaMetricObject('apache_90th_dur',
                                               ninetieth_element,
                                               units='sec')
        twops_metric = GangliaMetricObject('apache_200',
                                           two_per_second,
                                           units='hps')
        threeps_metric = GangliaMetricObject('apache_300',
                                             three_per_second,
Esempio n. 3
0
                self.add_metric('haproxy_%s_feconn_%s' % (name, 'max'),
                                feconn[-1])
                self.add_metric('haproxy_%s_feconn_%s' % (name, 'avg'),
                                float(sum(feconn)) / len(feconn))
                beconn = listener["beconn"]
                beconn.sort()
                self.add_metric('haproxy_%s_beconn_%s' % (name, 'min'),
                                beconn[0])
                self.add_metric('haproxy_%s_beconn_%s' % (name, 'max'),
                                beconn[-1])
                self.add_metric('haproxy_%s_beconn_%s' % (name, 'avg'),
                                float(sum(beconn)) / len(beconn))
                for code in self.response_codes:
                    self.add_metric(
                        'haproxy_%s_%s_hits' % (name, code),
                        float(listener["responses"][code]) / check_time)

        for (name, val) in self.metricshash.iteritems():
            if 'hits_p' in name:
                results.append(GangliaMetricObject(name, val, units='percent'))
            elif 'hits' in name:
                results.append(GangliaMetricObject(name, val, units='hps'))
            elif 'latency' in name:
                results.append(GangliaMetricObject(name, val, units='sec'))
            else:
                results.append(
                    GangliaMetricObject(name, val, units='connections'))

        # return a list of metric objects
        return results
Esempio n. 4
0
    # returns a dictionary of (metric => metric_object) pairs
    def get_state(self):
        '''This function should acquire a lock, call deep copy, get the
        current time if necessary, call reset_state, then do its
        calculations.  It should return a list of metric objects.'''
        # get the data to work with
        self.lock.acquire()
        try:
            mydata = self.deep_copy()
            check_time = self.get_check_duration()
            self.reset_state()
            self.lock.release()
        except LogtailerStateException, e:
            # if something went wrong with deep_copy or the duration, reset and continue
            self.reset_state()
            self.lock.release()
            raise e

        # normalize to queries per second
        slapdquery = mydata['num_slapdquery'] / check_time
        #print slapdquery

        # package up the data you want to submit
        slapdquery_metric = GangliaMetricObject('slapd_queries',
                                                slapdquery,
                                                units='qps')
        # return a list of metric objects
        return [
            slapdquery_metric,
        ]
Esempio n. 5
0
            # if something went wrong with deep_copy or the duration, reset and continue
            self.reset_state()
            self.lock.release()
            raise e

        # crunch data to how you want to report it
        hits_per_second = mydata['num_hits'] / check_time
        gets_per_second = mydata['num_gets'] / check_time
        two_per_second = mydata['num_two'] / check_time
        three_per_second = mydata['num_three'] / check_time
        four_per_second = mydata['num_four'] / check_time
        five_per_second = mydata['num_five'] / check_time

        # package up the data you want to submit
        hps_metric = GangliaMetricObject('varnish_hits',
                                         hits_per_second,
                                         units='hps')
        gps_metric = GangliaMetricObject('varnish_gets',
                                         gets_per_second,
                                         units='hps')
        twops_metric = GangliaMetricObject('varnish_200',
                                           two_per_second,
                                           units='hps')
        threeps_metric = GangliaMetricObject('varnish_300',
                                             three_per_second,
                                             units='hps')
        fourps_metric = GangliaMetricObject('varnish_400',
                                            four_per_second,
                                            units='hps')
        fiveps_metric = GangliaMetricObject('varnish_500',
                                            five_per_second,
        metrics = list()
        for role, percentloss_list in percentloss_dict.iteritems():

            percentloss_list.sort()
            num_entries = len(percentloss_list)
            if (num_entries != 0):
                packetloss_90th = percentloss_list[int(num_entries * 0.9)]
                packetloss_ave = sum(percentloss_list) / len(percentloss_list)
            else:
                # in this event, all data was thrown out in parse_line
                packetloss_90th = 99
                packetloss_ave = 99
            # package up the data you want to submit
            # setting tmax to 960 seconds as data may take as long as 15 minutes to be processed
            if (role == 'all_roles'):
                packetloss_ave_metric = GangliaMetricObject(
                    'packet_loss_average', packetloss_ave, units='%', tmax=960)
                packetloss_90th_metric = GangliaMetricObject(
                    'packet_loss_90th', packetloss_90th, units='%', tmax=960)
            else:
                packetloss_ave_metric = GangliaMetricObject(
                    'packet_loss_average:%s' % (role),
                    packetloss_ave,
                    units='%',
                    tmax=960)
                packetloss_90th_metric = GangliaMetricObject(
                    'packet_loss_90th:%s' % (role),
                    packetloss_90th,
                    units='%',
                    tmax=960)
            metrics.append(packetloss_ave_metric)
            metrics.append(packetloss_90th_metric)
Esempio n. 7
0
        # calculate number of querying IPs and maximum number of queries per IP
        clist = mydata['client_ip_list']

        cdict = dict()
        for elem in clist:
            cdict[elem] = cdict.get(elem, 0) + 1

        # number of unique clients connecting, normalized to per minute
        num_client_ips = len(cdict) / check_time
        # number of requests issued by the client making the most
        max_client_ip_count = max(cdict.values()) / check_time

        # package up the data you want to submit
        qps_metric = GangliaMetricObject('bind_queries',
                                         queries_per_second,
                                         units='qps')
        clients_metric = GangliaMetricObject('bind_num_clients',
                                             num_client_ips,
                                             units='cps')
        max_reqs_metric = GangliaMetricObject('bind_largest_volume_client',
                                              max_client_ip_count,
                                              units='qps')

        # return a list of metric objects
        return [
            qps_metric,
            clients_metric,
            max_reqs_metric,
        ]
Esempio n. 8
0
                combined[key]['req_time_max'] = 0

        # The req_time_90th field for the "other" vhosts is now a sum. Need to
        # divide by the number of "other" vhosts
        if otherCount > 0:
            combined['other']['req_time_90th'] /= (otherCount * 1.0)
        else:
            combined['other']['req_time_90th'] = 0

        for vhost, stats in combined.iteritems():
            #print vhost
            #print "\t", stats

            # skip empty vhosts
            if stats['num_hits'] == 0:
                continue

            # package up the data you want to submit
            results.append(GangliaMetricObject('apache_%s_hits' % vhost, stats['num_hits'], units='hps'))
            results.append(GangliaMetricObject('apache_%s_gets' % vhost, stats['num_gets'], units='hps'))
            results.append(GangliaMetricObject('apache_%s_dur_avg' % vhost, stats['req_time_avg'], units='sec'))
            results.append(GangliaMetricObject('apache_%s_dur_90th' % vhost, stats['req_time_90th'], units='sec'))
            results.append(GangliaMetricObject('apache_%s_dur_max' % vhost, stats['req_time_max'], units='sec'))
            results.append(GangliaMetricObject('apache_%s_200' % vhost, stats['num_200'], units='hps'))
            results.append(GangliaMetricObject('apache_%s_300' % vhost, stats['num_300'], units='hps'))
            results.append(GangliaMetricObject('apache_%s_400' % vhost, stats['num_400'], units='hps'))
            results.append(GangliaMetricObject('apache_%s_500' % vhost, stats['num_500'], units='hps'))

        # return a list of metric objects
        return results
Esempio n. 9
0
        current time if necessary, call reset_state, then do its
        calculations.  It should return a list of metric objects.'''
        # get the data to work with
        self.lock.acquire()
        try:
            mydata = self.deep_copy()
            check_time = self.get_check_duration()
            self.reset_state()
            self.lock.release()
        except LogtailerStateException, e:
            # if something went wrong with deep_copy or the duration, reset and continue
            self.reset_state()
            self.lock.release()
            raise e

        # crunch data to how you want to report it
        connections_per_second = mydata['num_conns'] / check_time
        deliveries_per_second = mydata['num_deliv'] / check_time
        bounces_per_second = mydata['num_bounc'] / check_time

        # package up the data you want to submit
        cps_metric = GangliaMetricObject('postfix_connections', connections_per_second, units='cps')
        dps_metric = GangliaMetricObject('postfix_deliveries', deliveries_per_second, units='dps')
        bps_metric = GangliaMetricObject('postfix_bounces', bounces_per_second, units='bps')

        # return a list of metric objects
        return [ cps_metric, dps_metric, bps_metric, ]



Esempio n. 10
0
        hits_delete_ps = mydata['num_delete'] / check_time
        hits_proppatch_ps = mydata['num_proppatch'] / check_time
        hits_checkout_ps = mydata['num_checkout'] / check_time
        hits_merge_ps = mydata['num_merge'] / check_time
        hits_mkactivity_ps = mydata['num_mkactivity'] / check_time
        hits_copy_ps = mydata['num_copy'] / check_time

        #
        two_per_second = mydata['num_two'] / check_time
        three_per_second = mydata['num_three'] / check_time
        four_per_second = mydata['num_four'] / check_time
        five_per_second = mydata['num_five'] / check_time

        # package up the data you want to submit
        hps_metric = GangliaMetricObject('svn_total',
                                         hits_per_second,
                                         units='hps')
        gets_metric = GangliaMetricObject('svn_gets',
                                          hits_gets_ps,
                                          units='hps')
        posts_metric = GangliaMetricObject('svn_posts',
                                           hits_posts_ps,
                                           units='hps')
        propfind_metric = GangliaMetricObject('svn_propfind',
                                              hits_propfind_ps,
                                              units='hps')
        options_metric = GangliaMetricObject('svn_options',
                                             hits_options_ps,
                                             units='hps')
        put_metric = GangliaMetricObject('svn_put', hits_put_ps, units='hps')
        report_metric = GangliaMetricObject('svn_report',
Esempio n. 11
0
            mydata = self.deep_copy()
            check_time = self.get_check_duration()
            self.reset_state()
            self.lock.release()
        except LogtailerStateException, e:
            # if something went wrong with deep_copy or the duration, reset and continue
            self.reset_state()
            self.lock.release()
            raise e

        # crunch data to how you want to report it
        garbage = float(mydata['garbage']) * 1000

        # package up the data you want to submit
        full_gc_metric = GangliaMetricObject('gc_full',
                                             mydata['full_gc'],
                                             units='events')
        minor_gc_metric = GangliaMetricObject('gc_minor',
                                              mydata['minor_gc'],
                                              units='events')
        broken_gc_metric = GangliaMetricObject('gc_broken',
                                               mydata['broken_gc'],
                                               units='events')
        gc_time_metric = GangliaMetricObject('gc_time',
                                             mydata['gc_time'],
                                             units='seconds')
        garbage_metric = GangliaMetricObject('gc_garbage',
                                             garbage,
                                             units='bytes')

        # return a list of metric objects
                combined['swift_%s_hits' % (method)] = 0
            #print "method 90th index is %s, len is %s" % (int(len(durs) * 0.9), len(durs))
            combined['swift_%s_%s' % (method, '90th')] = durs[int(len(durs) * 0.9)]
            combined['swift_%s_%s' % (method, '50th')] = durs[int(len(durs) * 0.5)]
            combined['swift_%s_%s' % (method, 'max')] = durs[-1]
            #print durs
            #print ">> %s %s<<" % (sum(durs), len(durs))
            #combined['%s_%s' % (method, 'avg')] = sum(durs) / len(durs)
        try:
            combined['swift_hits'] = totalhits / check_time
            for (key, val) in statuscounter.items():
                combined['swift_%s_hits' % key] = val / check_time
                combined['swift_%s_hits_%%' % key] = (val / check_time) / combined['swift_hits'] * 100 # percentage of hits that are 200s etc.
        except ZeroDivisionError:
            combined['swift_hits'] = 0
            for (key, val) in statuscounter.items():
                combined['swift_%s_hits' % key] = 0



        for metricname, metricval in combined.iteritems():
            # package up the data you want to submit
            if 'hits' in metricname:
                #print "metric info %s, %s, %s" % (metricname, metricval, 'hps')
                results.append(GangliaMetricObject(metricname, metricval, units='hps'))
            else:
                #print "metric info %s, %s, %s" % (metricname, metricval, 'sec')
                results.append(GangliaMetricObject(metricname, metricval, units='sec'))
        # return a list of metric objects
        return results
Esempio n. 13
0
class UnboundLogtailer(object):
    # period must be defined and indicates how often the gmetric thread should call get_state() (in seconds) (in daemon mode only)
    # note that if period is shorter than it takes to run get_state() (if there's lots of complex calculation), the calling thread will automatically double period.
    # period must be >15.  It should probably be >=60 (to avoid excessive load).  120 to 300 is a good range (2-5 minutes).  Take into account the need for time resolution, as well as the number of hosts reporting (6000 hosts * 15s == lots of data).
    period = 5

    def __init__(self):
        '''This function should initialize any data structures or variables
        needed for the internal state of the line parser.'''
        self.dur_override = False
        self.reset_state()
        self.reg = re.compile(
            '^(?P<month>\S+)\s+(?P<day>\S+)\s+(?P<time>\S+)\s+(?P<hostname>\S+)\s+(?P<program>\S+):\s+\[(?P<pid>\d+):\d+\]\s+(?P<facility>\S+):\s+server\sstats\sfor\sthread\s(?P<thread>\d+):\s+(?P<queries>\d+)\s+\S+\s+(?P<caches>\d+)\s+\S+\s+\S+\s+\S+\s+(?P<recursions>)\d+'
        )
        self.lock = threading.RLock()
        self.queries = [0, 0, 0, 0]
        self.caches = [0, 0, 0, 0]
        self.recursions = [0, 0, 0, 0]

    # example function for parse line
    # takes one argument (text) line to be parsed
    # returns nothing
    def parse_line(self, line):
        '''This function should digest the contents of one line at a time,
        updating the internal state variables.'''
        self.lock.acquire()
        regMatch = self.reg.match(line)
        if regMatch:
            self.num_lines += 1
            bitsdict = regMatch.groupdict()
            self.queries[int(bitsdict['thread'])] += int(bitsdict['queries'])
            self.caches[int(bitsdict['thread'])] += int(bitsdict['caches'])
            self.recursions[int(bitsdict['thread'])] += int(
                bitsdict['queries']) - int(bitsdict['caches'])
        self.lock.release()

    # example function for deep copy
    # takes no arguments
    # returns one object
    def deep_copy(self):
        '''This function should return a copy of the data structure used to
        maintain state.  This copy should different from the object that is
        currently being modified so that the other thread can deal with it
        without fear of it changing out from under it.  The format of this
        object is internal to the plugin.'''
        return [self.num_lines, self.queries, self.caches, self.recursions]

    # example function for reset_state
    # takes no arguments
    # returns nothing
    def reset_state(self):
        '''This function resets the internal data structure to 0 (saving
        whatever state it needs).  This function should be called
        immediately after deep copy with a lock in place so the internal
        data structures can't be modified in between the two calls.  If the
        time between calls to get_state is necessary to calculate metrics,
        reset_state should store now() each time it's called, and get_state
        will use the time since that now() to do its calculations'''
        self.num_lines = 0
        self.queries = [0, 0, 0, 0]
        self.caches = [0, 0, 0, 0]
        self.recursions = [0, 0, 0, 0]
        self.last_reset_time = time.time()

    # example for keeping track of runtimes
    # takes no arguments
    # returns float number of seconds for this run
    def set_check_duration(self, dur):
        '''This function only used if logtailer is in cron mode.  If it is
        invoked, get_check_duration should use this value instead of calculating
        it.'''
        self.duration = dur
        self.dur_override = True

    def get_check_duration(self):
        '''This function should return the time since the last check.  If called
        from cron mode, this must be set using set_check_duration().  If in
        daemon mode, it should be calculated internally.'''
        if (self.dur_override):
            duration = self.duration
        else:
            cur_time = time.time()
            duration = cur_time - self.last_reset_time
            # the duration should be within 10% of period
            acceptable_duration_min = self.period - (self.period / 10.0)
            acceptable_duration_max = self.period + (self.period / 10.0)
            if (duration < acceptable_duration_min
                    or duration > acceptable_duration_max):
                raise LogtailerStateException, "time calculation problem - duration (%s) > 10%% away from period (%s)" % (
                    duration, self.period)
        return duration

    # example function for get_state
    # takes no arguments
    # returns a dictionary of (metric => metric_object) pairs
    def get_state(self):
        '''This function should acquire a lock, call deep copy, get the
        current time if necessary, call reset_state, then do its
        calculations.  It should return a list of metric objects.'''
        # get the data to work with
        self.lock.acquire()
        try:
            number_of_lines, queries, caches, recursions = self.deep_copy()
            check_time = self.get_check_duration()
            self.reset_state()
            self.lock.release()
        except LogtailerStateException, e:
            # if something went wrong with deep_copy or the duration, reset and continue
            self.reset_state()
            self.lock.release()
            raise e

        # crunch data to how you want to report it
        queries_per_second = sum(queries) / check_time
        recursions_per_second = sum(recursions) / check_time
        caches_per_second = sum(caches) / check_time

        # package up the data you want to submit
        qps_metric = GangliaMetricObject('unbound_queries',
                                         queries_per_second,
                                         units='qps')
        rps_metric = GangliaMetricObject('unbound_recursions',
                                         recursions_per_second,
                                         units='rps')
        cps_metric = GangliaMetricObject('unbound_cachehits',
                                         caches_per_second,
                                         units='cps')
        # return a list of metric objects
        return [qps_metric, rps_metric, cps_metric]
Esempio n. 14
0
class TomcatLogtailer(object):
    # only used in daemon mode
    period = 60

    def __init__(self):
        '''This function should initialize any data structures or variables
        needed for the internal state of the line parser.'''
        self.reset_state()
        self.lock = threading.RLock()
        # This is what will match the tomcat lines
        # Tomcat access log valve pattern string:
        #  %h %l %u %t &quot;%r&quot; %s %b %D
        # Sample Line:
        #  10.0.1.31 - - [08/Jul/2013:12:44:19 -0400] "OPTIONS /status HTTP/1.0" 200 - 0
        self.reg = re.compile(
            "(?P<host>[0-9]+(?:\.[0-9]+){3}) (?P<ident>[^\s]+) (?P<user>[^\s]+) (?P<date>\[([^\]]+)\]) (?P<req>\"([A-Z]+)[^\"]*\") (?P<status>\d+) (?P<bytes>[^\s]+) (?P<qtime>\d+)"
        )
        # assume we're in daemon mode unless set_check_duration gets called
        self.dur_override = False

    # example function for parse line
    # takes one argument (text) line to be parsed
    # returns nothing
    def parse_line(self, line):
        '''This function should digest the contents of one line at a time,
        updating the internal state variables.'''
        self.lock.acquire()
        self.num_hits += 1
        regMatch = self.reg.match(line)
        if regMatch:
            linebits = regMatch.groupdict()
            self.num_hits += 1
            # capture request duration
            dur = int(linebits['qtime'])
            self.req_time += dur
            # store for 90th % calculation
            self.ninetieth.append(dur)
        self.lock.release()

    # example function for deep copy
    # takes no arguments
    # returns one object
    def deep_copy(self):
        '''This function should return a copy of the data structure used to
        maintain state.  This copy should different from the object that is
        currently being modified so that the other thread can deal with it
        without fear of it changing out from under it.  The format of this
        object is internal to the plugin.'''
        myret = dict(num_hits=self.num_hits,
                     req_time=self.req_time,
                     ninetieth=self.ninetieth)
        return myret

    # example function for reset_state
    # takes no arguments
    # returns nothing
    def reset_state(self):
        '''This function resets the internal data structure to 0 (saving
        whatever state it needs).  This function should be called
        immediately after deep copy with a lock in place so the internal
        data structures can't be modified in between the two calls.  If the
        time between calls to get_state is necessary to calculate metrics,
        reset_state should store now() each time it's called, and get_state
        will use the time since that now() to do its calculations'''
        self.num_hits = 0
        self.req_time = 0
        self.ninetieth = list()
        self.last_reset_time = time.time()

    # example for keeping track of runtimes
    # takes no arguments
    # returns float number of seconds for this run
    def set_check_duration(self, dur):
        '''This function only used if logtailer is in cron mode.  If it is
        invoked, get_check_duration should use this value instead of calculating
        it.'''
        self.duration = dur
        self.dur_override = True

    def get_check_duration(self):
        '''This function should return the time since the last check.  If called
        from cron mode, this must be set using set_check_duration().  If in
        daemon mode, it should be calculated internally.'''
        if (self.dur_override):
            duration = self.duration
        else:
            cur_time = time.time()
            duration = cur_time - self.last_reset_time
            # the duration should be within 10% of period
            acceptable_duration_min = self.period - (self.period / 10.0)
            acceptable_duration_max = self.period + (self.period / 10.0)
            if (duration < acceptable_duration_min
                    or duration > acceptable_duration_max):
                raise LogtailerStateException, "time calculation problem - duration (%s) > 10%% away from period (%s)" % (
                    duration, self.period)
        return duration

    # example function for get_state
    # takes no arguments
    # returns a dictionary of (metric => metric_object) pairs
    def get_state(self):
        '''This function should acquire a lock, call deep copy, get the
        current time if necessary, call reset_state, then do its
        calculations.  It should return a list of metric objects.'''
        # get the data to work with
        self.lock.acquire()
        try:
            mydata = self.deep_copy()
            check_time = self.get_check_duration()
            self.reset_state()
            self.lock.release()
        except LogtailerStateException, e:
            # if something went wrong with deep_copy or the duration, reset and continue
            self.reset_state()
            self.lock.release()
            raise e

        # crunch data to how you want to report it
        hits_per_second = mydata['num_hits'] / check_time
        if (mydata['num_hits'] != 0):
            avg_req_time = mydata['req_time'] / mydata['num_hits']
        else:
            avg_req_time = 0

        # calculate 90th % request time
        ninetieth_list = mydata['ninetieth']
        ninetieth_list.sort()
        num_entries = len(ninetieth_list)
        if (num_entries != 0):
            slowest = ninetieth_list[-1]
            ninetieth_element = ninetieth_list[int(num_entries * 0.9)]
        else:
            slowest = 0
            ninetieth_element = 0

        # package up the data you want to submit
        hps_metric = GangliaMetricObject('solr_rps',
                                         hits_per_second,
                                         units='rps')
        avgdur_metric = GangliaMetricObject('solr_avg_dur',
                                            avg_req_time,
                                            units='ms')
        ninetieth_metric = GangliaMetricObject('solr_90th_dur',
                                               ninetieth_element,
                                               units='ms')
        slowest_metric = GangliaMetricObject('solr_slowest_dur',
                                             slowest,
                                             units='ms')
        # return a list of metric objects
        return [hps_metric, avgdur_metric, ninetieth_metric, slowest_metric]