Esempio n. 1
0
    def sort_by_size(self, group_limit=None, discard_others=False, others_label='others'):
        """ sorts the groups by the number of elements they contain, descending. Also has option to 
            limit the number of groups. If this option is chosen, the remaining elements are placed
            into another group with the name specified with others_label. if discard_others is True,
            the others group is removed instead.
        """

        # sort groups by number of elements
        self.groups = OrderedDict( sorted(self.groups.iteritems(), key=lambda x: len(x[1]), reverse=True) )

        # if group-limit is provided, combine remaining groups
        if group_limit != None:

            # now group together all groups that did not make the limit
            if not discard_others:
                group_keys = self.groups.keys()[ group_limit-1: ]
                self.groups.setdefault(others_label, list())
            else:
                group_keys = self.groups.keys()[ group_limit: ]

            # only go to second last (-1), since the 'others' group is now last
            for g in group_keys:
                if not discard_others:
                    self.groups[others_label].extend(self.groups[g])
                del self.groups[g]

            # remove if empty
            if others_label in self.groups and len(self.groups[others_label]) == 0:
                del self.groups[others_label]

        # remove others group regardless of limit if requested
        if discard_others and others_label in self.groups:
            del self.groups[others_label]
Esempio n. 2
0
class ChooseOperator(BaseOperator):

    dict_format = True
    names = ['$choose']
    defaults = OrderedDict([('from', []), ('weights', None)])

    def __call__(self, options=None):
        # options can be arbitrary long list, store as "from" in options dictionary
        if isinstance(options, list):
            options = {'from': options}

        options = self._parse_options(options)

        # decode ratio
        weights = self._decode(options['weights'])
        if not weights:
            # pick one choice, uniformly distributed, but don't evaluate yet
            return choice(options['from'])
        else:
            assert len(weights) == len(options['from'])

            total_weight = 0
            acc_weight_items = []
            for item, weight in zip(options['from'], weights):
                total_weight += weight
                acc_weight_items.append((total_weight, item))

            pick = random() * total_weight
            for weight, item in acc_weight_items:
                if weight >= pick:
                    return item
Esempio n. 3
0
class DateTimeOperator(BaseOperator):

    dict_format = True
    string_format = True

    names = ['$datetime', '$date']
    defaults = OrderedDict([('min', 0), ('max', int(time.time()))])

    def _parse_dt(self, input):
        """ parse input, either int (epoch) or date string (use dateutil parser). """
        if isinstance(input, str):
            # string needs conversion, try parsing with dateutil's parser
            try:
                dt = parser.parse(input)
            except Exception as e:
                raise SystemExit("can't parse date/time format for %s." %
                                 input)

            td = dt - datetime.utcfromtimestamp(0)
            return int((td.microseconds +
                        (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6)
        else:
            return int(input)

    def __call__(self, options=None):
        options = self._parse_options(options)

        # decode min and max and convert time formats to epochs
        mintime = self._parse_dt(self._decode(options['min']))
        maxtime = self._parse_dt(self._decode(options['max']))

        # generate random epoch number
        epoch = randint(mintime, maxtime)
        return datetime.fromtimestamp(epoch)
Esempio n. 4
0
class ObjectIdOperator(DateTimeOperator):
    """ with no parameters, just generate a new ObjectId. If min and/or max
        are provided, handle like DateTimeOperator and replace the timestamp
        portion in the ObjectId with the random date and time.
    """

    names = ['$objectid', '$oid']
    defaults = OrderedDict([('min', None), ('max', None)])

    def __call__(self, options=None):
        options = self._parse_options(options)

        mintime = self._decode(options['min'])
        maxtime = self._decode(options['max'])

        if (mintime == None and maxtime == None):
            return ObjectId()

        # decode min and max and convert time formats to epochs
        mintime = self._parse_dt(mintime or 0)
        maxtime = self._parse_dt(maxtime or time.time())
        assert mintime <= maxtime

        # generate random epoch number
        epoch = randint(mintime, maxtime)
        oid = struct.pack(">i", int(epoch)) + ObjectId().binary[4:]

        return ObjectId(oid)
Esempio n. 5
0
    def run(self):
        """Run this section and print out information."""
        grouping = Grouping(
            group_by=lambda x: (x.datetime, x.cursorid, x.reapedtime))
        logfile = self.mloginfo.logfile

        if logfile.start and logfile.end:
            progress_start = self.mloginfo._datetime_to_epoch(logfile.start)
            progress_total = (self.mloginfo._datetime_to_epoch(logfile.end) -
                              progress_start)
        else:
            self.mloginfo.progress_bar_enabled = False

        for i, le in enumerate(logfile):
            # update progress bar every 1000 lines
            if self.mloginfo.progress_bar_enabled and (i % 1000 == 0):
                if le.datetime:
                    progress_curr = self.mloginfo._datetime_to_epoch(
                        le.datetime)
                    if progress_total:
                        (self.mloginfo.update_progress(
                            float(progress_curr - progress_start) /
                            progress_total))

            if 'Cursor id' in le.line_str:
                lt = LogTuple(le.datetime, le.cursor, le._reapedtime)
                grouping.add(lt)

        grouping.sort_by_size()

        # clear progress bar again
        if self.mloginfo.progress_bar_enabled:
            self.mloginfo.update_progress(1.0)

        # no cursor information in the log file
        if not len(grouping):
            print('no cursor information found.')
            return

        titles = ['datetime', 'cursorid', 'reapedtime']

        table_rows = []
        # using only important key-values
        for g in grouping:
            # calculate statistics for this group
            datetime, cursorid, reapedtime = g
            stats = OrderedDict()
            stats['datetime'] = str(datetime)
            stats['cursorid'] = str(cursorid)
            stats['reapedtime'] = str(reapedtime)
            table_rows.append(stats)

        print_table(table_rows, titles, uppercase_headers=True)

        print('')
Esempio n. 6
0
    def __init__(self, args=None, unknown_args=None):
        self.args = args
        self.unknown_args = unknown_args
        self.groups = OrderedDict()
        self.empty = True
        self.limits = None

        if self.args["optime_start"]:
            self.xlabel = "time (start of ops)"
        else:
            self.xlabel = "time (end of ops)"
Esempio n. 7
0
    def __init__(self, args=None, unknown_args=None):
        self.args = args
        self.unknown_args = unknown_args
        self.groups = OrderedDict()
        self.empty = True
        self.limits = None

        if self.args['optime_start']:
            self.xlabel = 'time (start of ops)'
        else:
            self.xlabel = 'time (end of ops)'
Esempio n. 8
0
class ArrayOperator(BaseOperator):

    dict_format = True
    names = ['$array']
    defaults = OrderedDict([('of', None), ('number', 10)])

    def __call__(self, options=None):
        options = self._parse_options(options)

        # evaluate number
        number = self._decode(options['number'])

        # build array of 'of' elements, but don't evaluate them yet
        return [options['of']] * number
Esempio n. 9
0
    def group(self):
        """ (re-)group all loglines by the given group. """
        if hasattr(self, "group_by"):
            group_by = self.group_by
        else:
            group_by = self.default_group_by
            if self.args["group"] != None:
                group_by = self.args["group"]

        groups = OrderedDict()

        for logline in self.loglines:
            # if group_by is a function, call on logline
            if hasattr(group_by, "__call__"):
                key = group_by(logline)
            # if the logline has attribute of group_by, use that as key
            elif group_by and hasattr(logline, group_by):
                key = getattr(logline, group_by)
            # if the PlotType has a method with the name of group_by call that on logline
            elif group_by and hasattr(self, group_by):
                f = getattr(self, group_by)
                key = f(logline)
            # if a --label was given, use that as key
            elif self.args and self.args["label"]:
                key = self.args["label"]
            # else key is None
            else:
                key = None

            # special case: group together all connections
            if group_by == "thread" and key and key.startswith("conn"):
                key = "conn####"

            groups.setdefault(key, list()).append(logline)

        self.groups = groups
Esempio n. 10
0
class ZipfOperator(BaseOperator):

    dict_format = True
    string_format = True
    names = ['$zipf', '$zeta']
    defaults = OrderedDict([('alpha', 2.0)])

    def __call__(self, options=None):
        options = self._parse_options(options)

        # decode distribution parameter
        alpha = self._decode(options['alpha'])

        val = zipf(alpha) - 1
        return val
Esempio n. 11
0
    def group(self):
        """ (re-)group all loglines by the given group. """
        if hasattr(self, 'group_by'):
            group_by = self.group_by
        else:
            group_by = self.default_group_by
            if self.args['group'] != None:
                group_by = self.args['group']

        groups = OrderedDict()

        for logline in self.loglines:
            # if group_by is a function, call on logline
            if hasattr(group_by, '__call__'):
                key = group_by(logline)
            # if the logline has attribute of group_by, use that as key
            elif group_by and hasattr(logline, group_by):
                key = getattr(logline, group_by)
            # if the PlotType has a method with the name of group_by call that on logline
            elif group_by and hasattr(self, group_by):
                f = getattr(self, group_by)
                key = f(logline)
            # if a --label was given, use that as key
            # elif self.args and self.args['label']:
            #     key = self.args['label']
            # else key is None
            else:
                key = None

            # special case: group together all connections
            # if group_by == "thread" and key and key.startswith("conn"):
            #     key = "conn####"

            groups.setdefault(key, list()).append(logline)

        self.groups = groups
Esempio n. 12
0
class PointOperator(BaseOperator):

    dict_format = True
    string_format = True
    names = ['$point']
    defaults = OrderedDict([ ('long_lim', [-180, 180]), ('lat_lim', [-90, 90]) ])

    def __call__(self, options=None):
        options = self._parse_options(options)

        # evaluate limits
        long_lim = self._decode(options['long_lim'])
        lat_lim = self._decode(options['lat_lim'])

        # return coordinate by using random numbers between limits
        return { "type": "Point", "coordinates": { "$coord": [long_lim, lat_lim] } }
Esempio n. 13
0
    def run(self):
        """Run this section and print out information."""
        titles = ['date', 'host', 'state/message']
        table_rows = []

        for host, state, logevent in self.mloginfo.logfile.rs_state:
            stats = OrderedDict()
            stats['date'] = logevent.datetime.strftime("%b %d %H:%M:%S")
            stats['host'] = host
            stats['state/message'] = state
            table_rows.append(stats)

        print_table(table_rows, titles, uppercase_headers=False)

        if len(self.mloginfo.logfile.rs_state) == 0:
            print("  no rs state changes found")
Esempio n. 14
0
class CoordinateOperator(BaseOperator):

    dict_format = True
    string_format = True
    names = ['$coordinates', '$coordinate', '$coord', '$geo']
    defaults = OrderedDict([('long_lim', [-180, 180]), ('lat_lim', [-90, 90])])

    def __call__(self, options=None):
        options = self._parse_options(options)

        # evaluate limits
        long_lim = self._decode(options['long_lim'])
        lat_lim = self._decode(options['lat_lim'])

        # return coordinate by using random numbers between limits
        return [{"$float": long_lim}, {"$float": lat_lim}]
Esempio n. 15
0
class NumberOperator(BaseOperator):

    dict_format = True
    string_format = True
    names = ['$number', '$num']
    defaults = OrderedDict([('min', 0), ('max', 100)])

    def __call__(self, options=None):
        options = self._parse_options(options)

        # decode min and max first
        minval = self._decode(options['min'])
        maxval = self._decode(options['max'])
        assert minval <= maxval

        return randint(minval, maxval)
Esempio n. 16
0
class GaussOperator(BaseOperator):

    dict_format = True
    string_format = True
    names = ['$gauss', '$normal']
    defaults = OrderedDict([('mean', 0.0), ('std', 1.0)])

    def __call__(self, options=None):
        options = self._parse_options(options)

        # decode mean and standard deviation
        mu = self._decode(options['mean'])
        sigma = self._decode(options['std'])

        val = gauss(mu, sigma)
        return val
Esempio n. 17
0
class FloatOperator(BaseOperator):

    dict_format = True
    string_format = True
    names = ['$float']
    defaults = OrderedDict([('min', 0.0), ('max', 1.0)])

    def __call__(self, options=None):
        options = self._parse_options(options)

        # decode min and max first
        minval = self._decode(options['min'])
        maxval = self._decode(options['max'])
        assert minval <= maxval

        val = random() * (maxval - minval) + minval
        return val
Esempio n. 18
0
class PickOperator(BaseOperator):

    dict_format = True
    string_format = False
    names = ['$pick']
    defaults = OrderedDict([('array', []), ('element', 0)])

    def __call__(self, options=None):
        options = self._parse_options(options)

        # decode choices and weights
        array = self._decode(options['array'])
        element = self._decode(options['element'])

        if len(array) <= element:
            return '$missing'

        return array[element]
Esempio n. 19
0
class IncOperator(BaseOperator):

    dict_format = True
    string_format = True
    names = ['$inc']
    defaults = OrderedDict([('start', 0), ('step', 1)])

    def __init__(self, decode_method):
        self.counter = None
        BaseOperator.__init__(self, decode_method)

    def __call__(self, options=None):
        options = self._parse_options(options)

        # initialize counter on first use (not threadsafe!)
        if self.counter == None:
            self.counter = itertools.count(options['start'], options['step'])

        return self.counter.next()
Esempio n. 20
0
class MissingOperator(BaseOperator):

    dict_format = True
    string_format = True

    names = ['$missing']
    defaults = OrderedDict([('percent', 100), ('ifnot', None)])

    def __call__(self, options=None):
        options = self._parse_options(options)

        # evaluate percent
        percent = self._decode(options['percent'])

        if randint(1, 100) <= percent:
            return '$missing'
        else:
            # ifnot is not yet evaluated, leave that up to another operator
            return options['ifnot']
Esempio n. 21
0
class ConcatOperator(BaseOperator):

    dict_format = True
    names = ['$concat']
    defaults = OrderedDict([('items', []), ('sep', '')])

    def __call__(self, options=None):

        # options can be arbitrary long list, store as "items" in options dictionary
        if isinstance(options, list):
            options = {'items': options}

        options = self._parse_options(options)

        # evaluate items
        items = self._decode(options['items'])
        # separator
        sep = self._decode(options['sep'])

        # return concatenated string
        return sep.join(str(i) for i in items)
Esempio n. 22
0
class BinaryOperator(BaseOperator):

    dict_format = True
    string_format = True
    names = ['$bin']
    defaults = OrderedDict([('length', 10), ('type', 0)])

    def __call__(self, options=None):
        options = self._parse_options(options)

        # evaluate limits
        length = self._decode(options['length'])
        bintype = self._decode(options['type'])

        # return coordinate by using random numbers between limits
        assert length > 0
        bindata = ''.join(
            choice(string.ascii_letters + string.digits)
            for i in xrange(length))

        return Binary(bindata, bintype)
Esempio n. 23
0
class StringOperator(BaseOperator):

    dict_format = True
    string_format = True
    names = ['$string', '$str']
    defaults = OrderedDict([ ('length', 10), ('mask', None) ])

    def __call__(self, options=None):
        options = self._parse_options(options)

        # decode min and max first
        length = self._decode(options['length'])
        mask = self._decode(options['mask'])

        if mask == None:
            mask = '.' * length

        assert length > 0
        result = ''.join( choice(string.ascii_letters + string.digits) for i in xrange(length) )

        return result
Esempio n. 24
0
class BaseOperator(object):
    names = []
    dict_format = False
    string_format = False
    defaults = OrderedDict()

    def __init__(self, decode_method):
        self._decode = decode_method

    def _parse_options(self, options={}):
        parsed = self.defaults.copy()

        if isinstance(options, list):
            parsed.update(zip(self.defaults.keys(), options))

        elif isinstance(options, dict):
            parsed.update(options)

        for k, v in parsed.iteritems():
            if isinstance(v, unicode):
                parsed[k] = v.encode('utf-8')
        return parsed
Esempio n. 25
0
class DateTimeFilter(BaseFilter):
    """ This filter has two parser arguments: --from and --to, both are 
        optional. All possible values for --from and --to can be described as:

        [DATE] [TIME] [OFFSET] in that order, separated by a space.

        [DATE] can be any of
            - a 3-letter weekday (Mon, Tue, Wed, ...)
            - a date as 3-letter month, 1-2 digits day (Sep 5, Jan 31, Aug 08)
            - the words: today, now, start, end

        [TIME] can be any of
            - hours and minutes (20:15, 04:00, 3:00)
            - hours, minutes and seconds (13:30:01, 4:55:55)

        [OFFSET] consists of [OPERATOR][VALUE][UNIT]   (no spaces in between)

        [OPERATOR] can be + or - (note that - can only be used if the whole 
            "[DATE] [TIME] [OFFSET]" is in quotation marks, otherwise it would 
            be confused with a separate parameter)

        [VALUE] can be any number

        [UNIT] can be any of s, sec, m, min, h, hours, d, days, w, weeks, mo,
            months, y, years

        The [OFFSET] is added/subtracted to/from the specified [DATE] [TIME].

        For the --from parameter, the default is the same as 'start' 
            (0001-01-01 00:00:00). If _only_ an [OFFSET] is given, it is 
            added to 'start' (which is not very useful).

        For the --to parameter, the default is the same as 'end' 
            (9999-31-12 23:59:59). If _only_ an [OFFSET] is given, however, 
            it is added to [FROM].

        Examples:  
            --from Sun 10:00 
                goes from last Sunday 10:00:00am to the end of the file

            --from Sep 29
                goes from Sep 29 00:00:00 to the end of the file

            --to today 15:00
                goes from the beginning of the file to today at 15:00:00

            --from today --to +1h
                goes from today's date 00:00:00 to today's date 01:00:00

            --from 20:15 --to +3m  
                goes from today's date at 20:15:00 to today's date at 20:18:00
    """

    filterArgs = [('--from', {
        'action': 'store',
        'type': custom_parse_dt,
        'nargs': '*',
        'default': 'start',
        'help': 'output starting at FROM',
        'dest': 'from'
    }),
                  ('--to', {
                      'action': 'store',
                      'type': custom_parse_dt,
                      'nargs': '*',
                      'default': 'end',
                      'help': 'output up to TO',
                      'dest': 'to'
                  })]

    timeunits = [
        's', 'sec', 'm', 'min', 'h', 'hours', 'd', 'days', 'w', 'weeks', 'mo',
        'months', 'y', 'years'
    ]
    weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
    months = [
        'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
        'Nov', 'Dec'
    ]

    dtRegexes = OrderedDict([
        ('weekday', r'|'.join(weekdays)),  # weekdays: see above
        ('date', '(' + '|'.join(months) + ')' +
         r'\s+\d{1,2}'),  # month + day:  Jan 5, Oct 13, Sep 03, ...
        ('word', r'now|start|end|today'),
        ('time2', r'\d{1,2}:\d{2,2}'),  # 11:59, 1:13, 00:00, ...
        ('time3',
         r'\d{1,2}:\d{2,2}:\d{2,2}'),  # 11:59:00, 1:13:12, 00:00:59, ...
        ('offset', r'[\+-]\d+(' + '|'.join(timeunits) +
         ')'),  # offsets: +3min, -20s, +7days, ...                    
    ])

    def __init__(self, commandLineArgs):
        BaseFilter.__init__(self, commandLineArgs)
        self.fromReached = False
        self.toReached = False

        if 'from' in self.commandLineArgs or 'to' in self.commandLineArgs:
            self.active = True

    def setup(self):
        """ get start end end date of logfile before starting to parse. """
        logfile = self.commandLineArgs['logfile']
        seekable = False

        if logfile:
            seekable = logfile.name != "<stdin>"

        if not seekable:
            # assume this year (we have no other info)
            now = datetime.now()
            self.startDateTime = datetime(now.year, 1, 1)
            self.endDateTime = datetime(MAXYEAR, 12, 31)
            # self.fromDateTime = datetime(MINYEAR, 1, 1)
            # self.toDateTime = datetime(MAXYEAR, 12, 31)

        else:
            # get start datetime
            for line in logfile:
                logline = LogLine(line)
                date = logline.datetime
                if date:
                    break
            self.startDateTime = date

            # get end datetime (lines are at most 10k, go back 15k at most to make sure)
            logfile.seek(0, 2)
            file_size = logfile.tell()
            logfile.seek(-min(file_size, 15000), 2)

            for line in reversed(logfile.readlines()):
                logline = LogLine(line)
                date = logline.datetime
                if date:
                    break
            self.endDateTime = date

            # if there was a roll-over, subtract 1 year from start time
            if self.endDateTime < self.startDateTime:
                self.startDateTime = self.startDateTime.replace(
                    year=self.startDateTime.year - 1)

            # reset logfile
            logfile.seek(0)

        # now parse for further changes to from and to datetimes
        dtbound = DateTimeBoundaries(self.startDateTime, self.endDateTime)
        self.fromDateTime, self.toDateTime = dtbound(
            self.commandLineArgs['from'] or None, self.commandLineArgs['to']
            or None)

    def accept(self, logline):
        dt = logline.datetime

        # if logline has no datetime, accept if between --from and --to
        if dt == None:
            return self.fromReached

        if self.fromDateTime <= dt <= self.toDateTime:
            self.toReached = False
            self.fromReached = True
            return True

        elif dt > self.toDateTime:
            self.toReached = True
            return False

        else:
            return False

    def skipRemaining(self):
        return self.toReached
Esempio n. 26
0
class BasePlotType(object):

    colors = ['k', 'b', 'g', 'r', 'c', 'm', 'y']
    color_index = 0
    markers = ['o', 's', '<', 'D']
    marker_index = 0

    sort_order = 0
    plot_type_str = 'base'
    default_group_by = None

    # set group_by in sub-classes to force a group_by as below
    # group_by = 'example'

    def __init__(self, args=None, unknown_args=None):
        self.args = args
        self.unknown_args = unknown_args
        self.groups = OrderedDict()
        self.empty = True
        self.limits = None

    def accept_line(self, logline):
        """ return True if this PlotType can plot this line. """
        return True

    def add_line(self, logline):
        """ append log line to this plot type. """
        key = None
        self.empty = False
        self.groups.setdefault(key, list()).append(logline)

    @property
    def loglines(self):
        """ iterator yielding all loglines from groups dictionary. """
        for key in self.groups:
            for logline in self.groups[key]:
                yield logline

    @classmethod
    def color_map(cls, group):
        color = cls.colors[cls.color_index]
        cls.color_index += 1

        marker = cls.markers[cls.marker_index]
        if cls.color_index >= len(cls.colors):
            cls.marker_index += 1
            cls.marker_index %= len(cls.markers)
            cls.color_index %= cls.color_index

        return color, marker

    def group(self):
        """ (re-)group all loglines by the given group. """
        if hasattr(self, 'group_by'):
            group_by = self.group_by
        else:
            group_by = self.default_group_by
            if self.args['group'] != None:
                group_by = self.args['group']

        groups = OrderedDict()

        for logline in self.loglines:
            # if group_by is a function, call on logline
            if hasattr(group_by, '__call__'):
                key = group_by(logline)
            # if the logline has attribute of group_by, use that as key
            elif group_by and hasattr(logline, group_by):
                key = getattr(logline, group_by)
            # if the PlotType has a method with the name of group_by call that on logline
            elif group_by and hasattr(self, group_by):
                f = getattr(self, group_by)
                key = f(logline)
            # if a --label was given, use that as key
            # elif self.args and self.args['label']:
            #     key = self.args['label']
            # else key is None
            else:
                key = None

            # special case: group together all connections
            # if group_by == "thread" and key and key.startswith("conn"):
            #     key = "conn####"

            groups.setdefault(key, list()).append(logline)

        self.groups = groups

    def plot_group(self, group, idx, axis):
        raise NotImplementedError(
            "BasePlotType can't plot. Use a derived class instead")

    def plot(self, axis, ith_plot, total_plots, limits):
        self.limits = limits

        artists = []
        print self.plot_type_str.upper(), "plot"
        print "%5s %9s  %s" % ("id", " #points", "group")

        for idx, group in enumerate(self.groups):
            print "%5s %9s  %s" % (idx + 1, len(self.groups[group]), group)
            group_artists = self.plot_group(group, idx + ith_plot, axis)
            if isinstance(group_artists, list):
                artists.extend(group_artists)
            else:
                artists.append(group_artists)

        print

        return artists
Esempio n. 27
0
 def __init__(self, args=None):
     self.args = args
     self.groups = OrderedDict()
     self.empty = True
Esempio n. 28
0
    def run(self):
        """ run this section and print out information. """
        grouping = Grouping(group_by=lambda x: (x.namespace, x.pattern))
        logfile = self.mloginfo.logfile

        if logfile.start and logfile.end:
            progress_start = self.mloginfo._datetime_to_epoch(logfile.start)
            progress_total = self.mloginfo._datetime_to_epoch(
                logfile.end) - progress_start
        else:
            self.progress_bar_enabled = False

        for i, le in enumerate(logfile):
            # update progress bar every 1000 lines
            if self.progress_bar_enabled and (i % 1000 == 0):
                if le.datetime:
                    progress_curr = self.mloginfo._datetime_to_epoch(
                        le.datetime)
                    self.mloginfo.update_progress(
                        float(progress_curr - progress_start) / progress_total)

            if le.operation in ['query', 'update', 'remove']:
                grouping.add(le)

        grouping.sort_by_size()

        # clear progress bar again
        self.mloginfo.update_progress(1.0)

        titles = [
            'namespace', 'pattern', 'count', 'min (ms)', 'max (ms)',
            'mean (ms)', 'sum (ms)'
        ]
        table_rows = []
        for g in grouping:
            # calculate statistics for this group
            namespace, pattern = g

            group_events = [
                le.duration for le in grouping[g] if le.duration != None
            ]

            stats = OrderedDict()
            stats['namespace'] = namespace
            stats['pattern'] = pattern
            stats['count'] = len(group_events)
            stats['min'] = min(group_events) if group_events else '-'
            stats['max'] = max(group_events) if group_events else '-'
            stats['mean'] = 0
            stats['sum'] = sum(group_events) if group_events else '-'
            stats['mean'] = stats['sum'] / stats[
                'count'] if group_events else '-'

            if self.mloginfo.args['verbose']:
                stats['example'] = grouping[g][0]
                titles.append('example')

            table_rows.append(stats)

        table_rows = sorted(table_rows, key=itemgetter('sum'), reverse=True)
        print_table(table_rows, titles, uppercase_headers=False)
        print
Esempio n. 29
0
 def __init__(self, args=None, unknown_args=None):
     self.args = args
     self.unknown_args = unknown_args
     self.groups = OrderedDict()
     self.empty = True
     self.limits = None
Esempio n. 30
0
class BasePlotType(object):

    # 14 most distinguishable colors, according to 
    # http://stackoverflow.com/questions/309149/generate-distinctly-different-rgb-colors-in-graphs
    colors = ['#000000','#00FF00','#0000FF','#FF0000','#01FFFE','#FFA6FE','#FFDB66','#006401', \
              '#010067','#95003A','#007DB5','#FF00F6','#FFEEE8','#774D00']
    color_index = 0
    markers = ['o', 's', '<', 'D']
    marker_index = 0

    sort_order = 0
    plot_type_str = 'base'
    default_group_by = None
    date_range = (datetime(MAXYEAR, 12, 31), datetime(MINYEAR, 1, 1))

    # set group_by in sub-classes to force a group_by as below
    # group_by = 'example'

    def __init__(self, args=None, unknown_args=None):
        self.args = args
        self.unknown_args = unknown_args
        self.groups = OrderedDict()
        self.empty = True
        self.limits = None


    def accept_line(self, logline):
        """ return True if this PlotType can plot this line. """
        return True

    def add_line(self, logline):
        """ append log line to this plot type. """
        key = None
        self.empty = False
        self.groups.setdefault(key, list()).append(logline)

    @property 
    def loglines(self):
        """ iterator yielding all loglines from groups dictionary. """
        for key in self.groups:
            for logline in self.groups[key]:
                yield logline

    @classmethod
    def color_map(cls, group):
        color = cls.colors[cls.color_index]
        cls.color_index += 1

        marker = cls.markers[cls.marker_index]
        if cls.color_index >= len(cls.colors):
            cls.marker_index += 1
            cls.marker_index %= len(cls.markers)
            cls.color_index %= cls.color_index

        return color, marker


    def group(self):
        """ (re-)group all loglines by the given group. """
        if hasattr(self, 'group_by'):
            group_by = self.group_by
        else:
            group_by = self.default_group_by
            if self.args['group'] != None:
                group_by = self.args['group']
        
        groups = OrderedDict()

        for logline in self.loglines:

            if self.args['optime_start']:
                self.xlabel = 'time (start of ops)'
            else:
                self.xlabel = 'time (end of ops)'

            # if group_by is a function, call on logline
            if hasattr(group_by, '__call__'):
                key = group_by(logline)
            # if the logline has attribute of group_by, use that as key
            elif group_by and hasattr(logline, group_by):
                key = getattr(logline, group_by)
            # if the PlotType has a method with the name of group_by call that on logline
            elif group_by and hasattr(self, group_by):
                f = getattr(self, group_by)
                key = f(logline)
            # if a --label was given, use that as key
            # elif self.args and self.args['label']:
            #     key = self.args['label']
            # else key is None
            else:
                key = None
                # try to match as regular expression
                if type(group_by) == types.StringType:
                    match = re.search(group_by, logline.line_str)
                    if match:
                        if len(match.groups()) > 0:
                            key = match.group(1)
                        else:
                            key = match.group()

            # special case: group together all connections
            # if group_by == "thread" and key and key.startswith("conn"):
            #     key = "conn####"

            groups.setdefault(key, list()).append(logline)
        
        # sort groups by number of data points
        groups = OrderedDict( sorted(groups.iteritems(), key=lambda x: len(x[1]), reverse=True) )

        # if --group-limit is provided, combine remaining groups
        if self.args['group_limit']:
            group_label = 'all others combined'
            # now group together all groups that did not make the limit
            groups[group_label] = []
            # only go to second last (-1), since the 'other' group is now last
            for other_group in groups.keys()[ self.args['group_limit']:-1 ]:
                groups[group_label].extend(groups[other_group])
                del groups[other_group]

            # remove if empty
            if len(groups[group_label]) == 0:
                del groups[group_label]

        self.groups = groups

    def plot_group(self, group, idx, axis):
        raise NotImplementedError("BasePlotType can't plot. Use a derived class instead")

    def plot(self, axis, ith_plot, total_plots, limits):
        self.limits = limits

        artists = []
        print self.plot_type_str.upper(), "plot"
        print "%5s %9s  %s"%("id", " #points", "group")

        for idx, group in enumerate(self.groups):
            print "%5s %9s  %s"%(idx+1, len(self.groups[group]), group)
            group_artists = self.plot_group(group, idx+ith_plot, axis)
            if isinstance(group_artists, list):
                artists.extend(group_artists)
            else:
                artists.append(group_artists)

        print

        return artists
Esempio n. 31
0
class BasePlotType(object):

    colors = ["k", "b", "g", "r", "c", "m", "y"]
    color_index = 0
    markers = ["o", "s", "<", "D"]
    marker_index = 0

    sort_order = 0
    plot_type_str = "base"
    default_group_by = None

    # set group_by in sub-classes to force a group_by as below
    # group_by = 'example'

    def __init__(self, args=None):
        self.args = args
        self.groups = OrderedDict()
        self.empty = True

    def accept_line(self, logline):
        """ return True if this PlotType can plot this line. """
        return True

    def add_line(self, logline):
        """ append log line to this plot type. """
        key = None
        self.empty = False
        self.groups.setdefault(key, list()).append(logline)

    @property
    def loglines(self):
        """ iterator yielding all loglines from groups dictionary. """
        for key in self.groups:
            for logline in self.groups[key]:
                yield logline

    @classmethod
    def color_map(cls, group):
        color = cls.colors[cls.color_index]
        cls.color_index += 1

        marker = cls.markers[cls.marker_index]
        if cls.color_index >= len(cls.colors):
            cls.marker_index += 1
            cls.marker_index %= len(cls.markers)
            cls.color_index %= cls.color_index

        return color, marker

    def group(self):
        """ (re-)group all loglines by the given group. """
        if hasattr(self, "group_by"):
            group_by = self.group_by
        else:
            group_by = self.default_group_by
            if self.args["group"] != None:
                group_by = self.args["group"]

        groups = OrderedDict()

        for logline in self.loglines:
            # if group_by is a function, call on logline
            if hasattr(group_by, "__call__"):
                key = group_by(logline)
            # if the logline has attribute of group_by, use that as key
            elif group_by and hasattr(logline, group_by):
                key = getattr(logline, group_by)
            # if the PlotType has a method with the name of group_by call that on logline
            elif group_by and hasattr(self, group_by):
                f = getattr(self, group_by)
                key = f(logline)
            # if a --label was given, use that as key
            elif self.args and self.args["label"]:
                key = self.args["label"]
            # else key is None
            else:
                key = None

            # special case: group together all connections
            if group_by == "thread" and key and key.startswith("conn"):
                key = "conn####"

            groups.setdefault(key, list()).append(logline)

        self.groups = groups

    def plot_group(self, group, idx, axis):
        raise NotImplementedError("BasePlotType can't plot. Use a derived class instead")

    def plot(self, axis, i):
        artists = []
        print self.plot_type_str.upper(), "plot"
        print "%5s %9s  %s" % ("id", " #points", "group")

        for idx, group in enumerate(self.groups):
            print "%5s %9s  %s" % (idx + 1, len(self.groups[group]), group)
            group_artists = self.plot_group(group, idx + i, axis)
            if isinstance(group_artists, list):
                artists.extend(group_artists)
            else:
                artists.append(group_artists)

        print

        return artists
Esempio n. 32
0
class Grouping(object):
    """Grouping object and related functions."""

    def __init__(self, iterable=None, group_by=None):
        """Init object."""
        self.groups = {}
        self.group_by = group_by

        if iterable:
            for item in iterable:
                self.add(item, group_by)

    def add(self, item, group_by=None):
        """General purpose class to group items by certain criteria."""
        key = None

        if not group_by:
            group_by = self.group_by

        if group_by:
            # if group_by is a function, use it with item as argument
            if hasattr(group_by, '__call__'):
                key = group_by(item)

            # if the item has attribute of group_by as string, use that as key
            elif isinstance(group_by, str) and hasattr(item, group_by):
                key = getattr(item, group_by)

            else:
                key = None
                # try to match str(item) with regular expression
                if isinstance(group_by, str):
                    match = re.search(group_by, str(item))
                    if match:
                        if len(match.groups()) > 0:
                            key = match.group(1)
                        else:
                            key = match.group()

        self.groups.setdefault(key, list()).append(item)


    def __getitem__(self, key):
        """Return item corresponding to key."""
        return self.groups[key]

    def __iter__(self):
        """Iterate items in group."""
        for key in self.groups:
            yield key

    def __len__(self):
        """Return length of group."""
        return len(self.groups)

    def keys(self):
        """Return keys in group."""
        return self.groups.keys()

    def values(self):
        """Return values in group."""
        return self.groups.values()

    def items(self):
        """Return items in group."""
        return self.groups.items()

    def regroup(self, group_by=None):
        """Regroup items."""
        if not group_by:
            group_by = self.group_by

        groups = self.groups
        self.groups = {}

        for g in groups:
            for item in groups[g]:
                self.add(item, group_by)

    def move_items(self, from_group, to_group):
        """Take all elements from the from_group and add it to the to_group."""
        if from_group not in self.keys() or len(self.groups[from_group]) == 0:
            return

        self.groups.setdefault(to_group, list()).extend(self.groups.get
                                                        (from_group, list()))
        if from_group in self.groups:
            del self.groups[from_group]

    def sort_by_size(self, group_limit=None, discard_others=False,
                     others_label='others'):
        """
        Sort the groups by the number of elements they contain, descending.

        Also has option to limit the number of groups. If this option is
        chosen, the remaining elements are placed into another group with the
        name specified with others_label. if discard_others is True, the others
        group is removed instead.
        """
        # sort groups by number of elements
        self.groups = OrderedDict(sorted(six.iteritems(self.groups),
                                         key=lambda x: len(x[1]),
                                         reverse=True))

        # if group-limit is provided, combine remaining groups
        if group_limit is not None:

            # now group together all groups that did not make the limit
            if not discard_others:
                group_keys = self.groups.keys()[group_limit - 1:]
                self.groups.setdefault(others_label, list())
            else:
                group_keys = self.groups.keys()[group_limit:]

            # only go to second last (-1), since the 'others' group is now last
            for g in group_keys:
                if not discard_others:
                    self.groups[others_label].extend(self.groups[g])
                del self.groups[g]

            # remove if empty
            if (others_label in self.groups and
                    len(self.groups[others_label]) == 0):
                del self.groups[others_label]

        # remove others group regardless of limit if requested
        if discard_others and others_label in self.groups:
            del self.groups[others_label]
Esempio n. 33
0
 def __init__(self, args=None, unknown_args=None):
     self.args = args
     self.unknown_args = unknown_args
     self.groups = OrderedDict()
     self.empty = True
     self.limits = None
Esempio n. 34
0
    def group(self):
        """ (re-)group all loglines by the given group. """
        if hasattr(self, 'group_by'):
            group_by = self.group_by
        else:
            group_by = self.default_group_by
            if self.args['group'] != None:
                group_by = self.args['group']
        
        groups = OrderedDict()

        for logline in self.loglines:

            if self.args['optime_start']:
                self.xlabel = 'time (start of ops)'
            else:
                self.xlabel = 'time (end of ops)'

            # if group_by is a function, call on logline
            if hasattr(group_by, '__call__'):
                key = group_by(logline)
            # if the logline has attribute of group_by, use that as key
            elif group_by and hasattr(logline, group_by):
                key = getattr(logline, group_by)
            # if the PlotType has a method with the name of group_by call that on logline
            elif group_by and hasattr(self, group_by):
                f = getattr(self, group_by)
                key = f(logline)
            # if a --label was given, use that as key
            # elif self.args and self.args['label']:
            #     key = self.args['label']
            # else key is None
            else:
                key = None
                # try to match as regular expression
                if type(group_by) == types.StringType:
                    match = re.search(group_by, logline.line_str)
                    if match:
                        if len(match.groups()) > 0:
                            key = match.group(1)
                        else:
                            key = match.group()

            # special case: group together all connections
            # if group_by == "thread" and key and key.startswith("conn"):
            #     key = "conn####"

            groups.setdefault(key, list()).append(logline)
        
        # sort groups by number of data points
        groups = OrderedDict( sorted(groups.iteritems(), key=lambda x: len(x[1]), reverse=True) )

        # if --group-limit is provided, combine remaining groups
        if self.args['group_limit']:
            group_label = 'all others combined'
            # now group together all groups that did not make the limit
            groups[group_label] = []
            # only go to second last (-1), since the 'other' group is now last
            for other_group in groups.keys()[ self.args['group_limit']:-1 ]:
                groups[group_label].extend(groups[other_group])
                del groups[other_group]

            # remove if empty
            if len(groups[group_label]) == 0:
                del groups[group_label]

        self.groups = groups
Esempio n. 35
0
class DateTimeFilter(BaseFilter):
    """ This filter has two parser arguments: --from and --to, both are
        optional. All possible values for --from and --to can be described as:

        [DATE] [TIME] [OFFSET] in that order, separated by a space.

        [DATE] can be any of
            - a 3-letter weekday (Mon, Tue, Wed, ...)
            - a date as 3-letter month, 1-2 digits day (Sep 5, Jan 31, Aug 08)
            - the words: today, now, start, end

        [TIME] can be any of
            - hours and minutes (20:15, 04:00, 3:00)
            - hours, minutes and seconds (13:30:01, 4:55:55)

        [OFFSET] consists of [OPERATOR][VALUE][UNIT]   (no spaces in between)

        [OPERATOR] can be + or - (note that - can only be used if the whole
            "[DATE] [TIME] [OFFSET]" is in quotation marks, otherwise it would
            be confused with a separate parameter)

        [VALUE] can be any number

        [UNIT] can be any of s, sec, m, min, h, hours, d, days, w, weeks, mo,
            months, y, years

        The [OFFSET] is added/subtracted to/from the specified [DATE] [TIME].

        For the --from parameter, the default is the same as 'start'
            (0001-01-01 00:00:00). If _only_ an [OFFSET] is given, it is
            added to 'start' (which is not very useful).

        For the --to parameter, the default is the same as 'end'
            (9999-31-12 23:59:59). If _only_ an [OFFSET] is given, however,
            it is added to [FROM].

        Examples:
            --from Sun 10:00
                goes from last Sunday 10:00:00am to the end of the file

            --from Sep 29
                goes from Sep 29 00:00:00 to the end of the file

            --to today 15:00
                goes from the beginning of the file to today at 15:00:00

            --from today --to +1h
                goes from today's date 00:00:00 to today's date 01:00:00

            --from 20:15 --to +3m
                goes from today's date at 20:15:00 to today's date at 20:18:00
    """

    filterArgs = [
       ('--from', {'action':'store',  'type':custom_parse_dt, 'nargs':'*', 'default':'start', 'help':'output starting at FROM', 'dest':'from'}),
       ('--to',   {'action':'store',  'type':custom_parse_dt, 'nargs':'*', 'default':'end',   'help':'output up to TO',         'dest':'to'})
    ]

    timeunits = ['s', 'sec', 'm', 'min', 'h', 'hours', 'd', 'days', 'w', 'weeks', 'mo', 'months', 'y', 'years']
    weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
    months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

    dtRegexes = OrderedDict([
        ('weekday', r'|'.join(weekdays)),                         # weekdays: see above
        ('date',    '('+ '|'.join(months) +')' + r'\s+\d{1,2}'),  # month + day:  Jan 5, Oct 13, Sep 03, ...
        ('word',    r'now|start|end|today'),
        ('time2',   r'\d{1,2}:\d{2,2}'),                          # 11:59, 1:13, 00:00, ...
        ('time3',   r'\d{1,2}:\d{2,2}:\d{2,2}'),                  # 11:59:00, 1:13:12, 00:00:59, ...
        ('offset',  r'[\+-]\d+(' + '|'.join(timeunits) + ')'),    # offsets: +3min, -20s, +7days, ...
    ])

    def __init__(self, mlogfilter):
        BaseFilter.__init__(self, mlogfilter)
        self.fromReached = False
        self.toReached = False

        self.active = ('from' in self.mlogfilter.args and self.mlogfilter.args['from'] != 'start') or \
                      ('to' in self.mlogfilter.args and self.mlogfilter.args['to'] != 'end')


    def setup(self):
        """ get start end end date of logfile before starting to parse. """

        if self.mlogfilter.is_stdin:
            # assume this year (we have no other info)
            now = datetime.now()
            self.startDateTime = datetime(now.year, 1, 1, tzinfo=tzutc())
            self.endDateTime = datetime(MAXYEAR, 12, 31, tzinfo=tzutc())

        else:
            logfiles = self.mlogfilter.args['logfile']
            self.startDateTime = min([lf.start+timedelta(hours=self.mlogfilter.args['timezone'][i]) for i, lf in enumerate(logfiles)])
            self.endDateTime = max([lf.end+timedelta(hours=self.mlogfilter.args['timezone'][i]) for i, lf in enumerate(logfiles)])

        # now parse for further changes to from and to datetimes
        dtbound = DateTimeBoundaries(self.startDateTime, self.endDateTime)
        self.fromDateTime, self.toDateTime = dtbound(self.mlogfilter.args['from'] or None,
                                                     self.mlogfilter.args['to'] or None)

        # define start_limit for mlogfilter's fast_forward method
        self.start_limit = self.fromDateTime

        # for single logfile, get file seek position of `to` datetime
        if len(self.mlogfilter.args['logfile']) == 1 and not self.mlogfilter.is_stdin:

            if self.mlogfilter.args['to'] != "end":
                # fast forward, get seek value, then reset file
                logfile = self.mlogfilter.args['logfile'][0]
                logfile.fast_forward(self.toDateTime)
                self.seek_to = logfile.filehandle.tell()
                logfile.filehandle.seek(0)
            else:
                self.seek_to = -1
        else:
            self.seek_to = False


    def accept(self, logevent):
        if self.fromReached and self.seek_to:
            if self.seek_to != -1:
                self.toReached = self.mlogfilter.args['logfile'][0].filehandle.tell() >= self.seek_to
            return True
        else:
            # slow version has to check each datetime
            dt = logevent.datetime

            # if logevent has no datetime, accept if between --from and --to
            if dt == None:
                return self.fromReached

            if self.fromDateTime <= dt <= self.toDateTime:
                self.toReached = False
                self.fromReached = True
                return True

            elif dt > self.toDateTime:
                self.toReached = True
                return False

            else:
                return False


    def skipRemaining(self):
        return self.toReached
Esempio n. 36
0
 def __init__(self, args=None):
     self.args = args
     self.groups = OrderedDict()
     self.empty = True
Esempio n. 37
0
class DateTimeBoundaries(object):

    timeunits = ['secs', 'sec', 's', 'mins', 'min', 'm', 'months', 'month', 'mo', 'hours', 'hour', 'h', 'days', 'day', 'd', 'weeks','week', 'w', 'years', 'year', 'y']
    weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']

    dtRegexes = OrderedDict([
        # special constants
        ('constant', re.compile('(now|start|end|today|yesterday)' + '($|\s+)')),
        # weekday: Mon, Wed, Sat
        ('weekday',  re.compile('(' + '|'.join(weekdays) + ')' + '($|\s+)')),
        # 11:59:00.123, 1:13:12.004  (also match timezone postfix like Z or +0700 or -05:30)
        # ('time',     re.compile('(?P<hour>\d{1,2}):(?P<minute>\d{2,2})' + '(?::(?P<second>\d{2,2})(?:.(?P<microsecond>\d{3,3}))?)?(?P<timezone>[0-9Z:\+\-]+)?' + '($|\s+)')),
        # offsets: +3min, -20s, +7days  (see timeunits above)
        ('offset',   re.compile('(?P<operator>[\+-])(?P<value>\d+)(?P<unit>' + '|'.join(timeunits) +')'+'($|\s+)'))
    ])

    def __init__(self, start, end):
        """ initialize the DateTimeBoundaries object with true start and end datetime objects. """

        if start > end:
            raise ValueError('Error in DateTimeBoundaries: end cannot be before start datetime.')

        # make sure all datetimes are timezone-aware
        self.start = start
        if not self.start.tzinfo:
            self.start = self.start.replace(tzinfo=tzutc())

        self.end = end
        if not self.end.tzinfo:
            self.end = self.end.replace(tzinfo=tzutc())


    def string2dt(self, s, lower_bound=None):
        original_s = s

        result = {}
        dt = None

        # if s is completely empty, return start or end, depending on what parameter is evaluated
        if s == '':
            return self.end if lower_bound else self.start

        # first try to match the defined regexes
        for idx in self.dtRegexes:
            regex = self.dtRegexes[idx]
            mo = regex.search(s)
            # if match was found, cut it out of original string and store in result
            if mo:
                result[idx] = mo
                s = s[:mo.start(0)] + s[mo.end(0):]

        # handle constants
        if 'constant' in result:
            constant = result['constant'].group(0).strip()
            if constant == 'end':
                dt = self.end
            elif constant == 'start':
                dt = self.start
            elif constant == 'today':
                dt = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
            elif constant == 'yesterday':
                dt = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=1)
            elif constant == 'now':
                dt = datetime.now()

        elif 'weekday' in result:
                weekday = result['weekday'].group(0).strip()
                # assume most-recently occured weekday in logfile
                most_recent_date = self.end.replace(hour=0, minute=0, second=0, microsecond=0)
                offset = (most_recent_date.weekday() - self.weekdays.index(weekday)) % 7
                dt = most_recent_date - timedelta(days=offset)

        # if anything remains unmatched, try parsing it with dateutil's parser
        if s.strip() != '':
            try:
                if dt:
                    dt = parser.parse(s, default=dt, tzinfos=tzutc)
                else:
                    # check if it's only time, then use the start dt as default, else just use the current year
                    if re.match('(?P<hour>\d{1,2}):(?P<minute>\d{2,2})' + '(?::(?P<second>\d{2,2})(?:.(?P<microsecond>\d{3,3}))?)?(?P<timezone>[0-9Z:\+\-]+)?$', s):
                        default = self.end if lower_bound else self.start
                    else:
                        default = datetime(self.end.year, 1, 1, 0, 0, 0)
                    default = default.replace(second=0, microsecond=0)

                    dt = parser.parse(s, default=default)

            except ValueError as e:
                raise ValueError("Error in DateTimeBoundaries: can't parse datetime from %s" % s)

        if not dt:
            dt = lower_bound or self.end

        # if no timezone specified, use the one from the logfile
        if dt.tzinfo == None:
            dt = dt.replace(tzinfo=self.start.tzinfo)


        # time is applied separately (not through the parser) so that string containing only time don't use today as default date (parser behavior)
        # if 'time' in result:
        #     dct = dict( (k, int(v)) for k,v in result['time'].groupdict(0).iteritems() )
        #     dct['microsecond'] *= 1000
        #     dt = dt.replace(**dct)

        # apply offset
        if 'offset' in result:

            # separate in operator, value, unit
            dct = result['offset'].groupdict()

            mult = 1
            if dct['unit'] in ['s', 'sec', 'secs']:
                dct['unit'] = 'seconds'
            elif dct['unit'] in ['m', 'min', 'mins']:
                dct['unit'] = 'minutes'
            elif dct['unit'] in ['h', 'hour', 'hours']:
                dct['unit'] = 'hours'
            elif dct['unit'] in ['d', 'day', 'days']:
                dct['unit'] = 'days'
            elif dct['unit'] in ['w', 'week', 'weeks']:
                dct['unit'] = 'days'
                mult = 7
            elif dct['unit'] in ['mo', 'month', 'months']:
                dct['unit'] = 'days'
                mult = 30.43
            elif dct['unit'] in ['y', 'year', 'years']:
                dct['unit'] = 'days'
                mult = 365.24

            if dct['operator'] == '-':
                mult *= -1

            dt = dt + eval('timedelta(%s=%i)'%(dct['unit'], mult*int(dct['value'])))

        # if parsed datetime is out of bounds and no year specified, try to adjust year
        year_present = re.search('\d{4,4}', original_s)

        if not year_present and not 'constant' in result:
            if dt < self.start and dt.replace(year=dt.year+1) >= self.start and dt.replace(year=dt.year+1) <= self.end:
                dt = dt.replace(year=dt.year+1)
            elif dt > self.end and dt.replace(year=dt.year-1) >= self.start and dt.replace(year=dt.year-1) <= self.end:
                dt = dt.replace(year=dt.year-1)

        return dt


    def __call__(self, from_str=None, to_str=None):
        """ sets the boundaries based on `from` and `to` strings. """

        from_dt = self.string2dt(from_str, lower_bound=None)
        to_dt = self.string2dt(to_str, lower_bound=from_dt)

        if to_dt < from_dt:
            raise ValueError('Error in DateTimeBoundaries: lower bound is greater than upper bound.')

        # limit from and to at the real boundaries
        if to_dt > self.end:
            to_dt = self.end

        if from_dt < self.start:
            from_dt = self.start

        return from_dt, to_dt
Esempio n. 38
0
    def run(self):
        """Run this section and print out information."""
        grouping = Grouping(
            group_by=lambda x: (x.namespace, x.operation, x.pattern))
        logfile = self.mloginfo.logfile

        if logfile.start and logfile.end:
            progress_start = self.mloginfo._datetime_to_epoch(logfile.start)
            progress_total = (self.mloginfo._datetime_to_epoch(logfile.end) -
                              progress_start)
        else:
            self.mloginfo.progress_bar_enabled = False

        for i, le in enumerate(logfile):
            # update progress bar every 1000 lines
            if self.mloginfo.progress_bar_enabled and (i % 1000 == 0):
                if le.datetime:
                    progress_curr = self.mloginfo._datetime_to_epoch(
                        le.datetime)
                    if progress_total:
                        (self.mloginfo.update_progress(
                            float(progress_curr - progress_start) /
                            progress_total))

            if (le.operation in ['query', 'getmore', 'update', 'remove']
                    or le.command
                    in ['count', 'findandmodify', 'geonear', 'find']):
                lt = LogTuple(namespace=le.namespace,
                              operation=op_or_cmd(le),
                              pattern=le.pattern,
                              duration=le.duration)
                grouping.add(lt)

        grouping.sort_by_size()

        # clear progress bar again
        if self.mloginfo.progress_bar_enabled:
            self.mloginfo.update_progress(1.0)

        # no queries in the log file
        if len(grouping) < 1:
            print('no queries found.')
            return

        titles = [
            'namespace', 'operation', 'pattern', 'count', 'min (ms)',
            'max (ms)', 'mean (ms)', '95%-ile (ms)', 'sum (ms)'
        ]
        table_rows = []

        for g in grouping:
            # calculate statistics for this group
            namespace, op, pattern = g

            group_events = [
                le.duration for le in grouping[g] if le.duration is not None
            ]

            stats = OrderedDict()
            stats['namespace'] = namespace
            stats['operation'] = op
            stats['pattern'] = pattern
            stats['count'] = len(group_events)
            stats['min'] = min(group_events) if group_events else '-'
            stats['max'] = max(group_events) if group_events else '-'
            stats['mean'] = 0
            if np:
                stats['95%'] = (np.percentile(group_events, 95)
                                if group_events else '-')
            else:
                stats['95%'] = 'n/a'
            stats['sum'] = sum(group_events) if group_events else '-'
            stats['mean'] = (stats['sum'] /
                             stats['count'] if group_events else '-')

            if self.mloginfo.args['verbose']:
                stats['example'] = grouping[g][0]
                titles.append('example')

            table_rows.append(stats)

        # sort order depending on field names
        reverse = True
        if self.mloginfo.args['sort'] in ['namespace', 'pattern']:
            reverse = False

        table_rows = sorted(table_rows,
                            key=itemgetter(self.mloginfo.args['sort']),
                            reverse=reverse)
        print_table(table_rows, titles, uppercase_headers=False)
        print('')