Esempio n. 1
0
 def zranked(cls, pk, parent_dimension='_', metric='hits', period=None,
         at=None, start=0, size=10, sort_dir=None, tzoffset=None):
     period, ats, tzoffset = Period.get_days(period, at)
     dt = ats or [Period.convert(cls.now(), tzoffset)]
     return map(try_loads,
             _ranked(cls.whale_driver(), pk, parent_dimension, metric,
                 period, dt, start, size, sort_dir=sort_dir))
Esempio n. 2
0
 def total(cls, pk, metric, dimension='_', period=None, at=None, index=None,
         tzoffset=None):
     period, ats, tzoffset = Period.get_days(period, at, tzoffset=tzoffset)
     top, bot = parse_formula(metric)
     dimension = maybe_dumps(dimension)
     if not ats and not index:
         index = -1
     if isinstance(index, int):
         pps = cls.plotpoints(pk, dimension, metric, period=period, points_type=list)
         return pps[dimension][metric][index][1]
     else:
         if not bot:
             pps = cls.plotpoints(pk, dimension, metric, period=period,
                                  tzoffset=tzoffset)
             ppsm = pps[dimension][metric]
             return sum([ppsm[dt] for dt in ats if dt in ppsm])
         else:
             top_pps = cls.plotpoints(pk, dimension, top, period=period,
                                      tzoffset=tzoffset)
             bot_pps = cls.plotpoints(pk, dimension, bot, period=period,
                                      tzoffset=tzoffset)
             top_ppsm = top_pps[dimension][top]
             bot_ppsm = bot_pps[dimension][bot]
             top_tot = sum([top_ppsm[dt] for dt in ats if dt in top_ppsm])
             bot_tot = sum([bot_ppsm[dt] for dt in ats if dt in bot_ppsm])
             return bot_tot and top_tot/bot_tot or 0
Esempio n. 3
0
 def totals(cls, pk, dimensions=None, metrics=None, periods=None, at=None):
     if not periods:
         periods = DEFAULT_PERIODS
     if not isinstance(periods, list):
         periods = [periods]
     metrics = metrics or ['hits']
     if not isinstance(metrics, list):
         metrics = [metrics]
     ratios = []
     for metric in metrics:
         if '/' in metric:
             metrics.remove(metric)
             ratios.append(metric)
             metrics += metric.split('/')
     d = {}
     for p in periods:
         period, ats, tzoffset = Period.get_days(p,at)
         p_data = cls.plotpoints(pk, dimensions, metrics, period=p,at=at)
         p_totals = dict()
         for dim in p_data.keys():
             p_totals[dim] = dict()
             for met, vals in p_data[dim].items():
                 p_totals[dim][met] = sum([
                     v for k, v in vals.items()
                     if k in ats])
             for rat in ratios:
                 top, bot = parse_formula(rat)
                 topt, bott = p_totals[dim][top], p_totals[dim][bot]
                 p_totals[dim][rat] = bott and topt / bott or 0
         d[str(p)] = p_totals
     return d
Esempio n. 4
0
 def totals(cls, pk, dimensions=None, metrics=None, periods=None, at=None):
     if not periods:
         periods = DEFAULT_PERIODS
     if not isinstance(periods, list):
         periods = [periods]
     metrics = metrics or ['hits']
     if not isinstance(metrics, list):
         metrics = [metrics]
     ratios = []
     for metric in metrics:
         if '/' in metric:
             metrics.remove(metric)
             ratios.append(metric)
             metrics += metric.split('/')
     d = {}
     for p in periods:
         period, ats, tzoffset = Period.get_days(p, at)
         p_data = cls.plotpoints(pk, dimensions, metrics, period=p, at=at)
         p_totals = dict()
         for dim in p_data.keys():
             p_totals[dim] = dict()
             for met, vals in p_data[dim].items():
                 p_totals[dim][met] = sum(
                     [v for k, v in vals.items() if k in ats])
             for rat in ratios:
                 top, bot = parse_formula(rat)
                 topt, bott = p_totals[dim][top], p_totals[dim][bot]
                 p_totals[dim][rat] = bott and topt / bott or 0
         d[str(p)] = p_totals
     return d
Esempio n. 5
0
    def rank_subdimensions_ratio(cls,
                                 pk,
                                 numerator,
                                 denominator='hits',
                                 dimension='_',
                                 period=None,
                                 recursive=True,
                                 points=False):
        top, bottom = numerator, denominator
        p_obj, ats, tzoffset = Period.get_days(period)
        p_s = str(p_obj)
        d_k = keyify(dimension)
        top_points = cls.totals(pk, dimension, top, periods=[p_s])
        top_total = p_s in top_points and top_points[p_s][d_k][top] or 0
        bottom_points = cls.totals(pk, dimension, bottom, periods=[p_s])
        bottom_total = p_s in bottom_points and bottom_points[p_s][d_k][
            bottom] or 0
        ratio_total = bottom_total and float(top_total / bottom_total) or 0
        ranked = dict()

        def info(sub):
            pps = cls.plotpoints(
                pk, sub, [top, bottom, '%s/%s' % (top, bottom)],
                period=period)[sub]
            top_pps = pps[top]
            bottom_pps = pps[bottom]

            sub_top_sum = sum({k: v
                               for k, v in top_pps.items()
                               if k in ats}.values())
            sub_bottom_sum = sum(
                {k: v
                 for k, v in bottom_pps.items() if k in ats}.values())

            ratio = sub_bottom_sum and float(sub_top_sum / sub_bottom_sum) or 0

            difference = ratio_total and (ratio -
                                          ratio_total) / ratio_total or 0
            important = sub_bottom_sum > 5 and math.fabs(difference) > .1

            data = {
                'points': pps,
                'score': ratio,
                'difference': difference,
                'effect': difference * sub_bottom_sum * ratio_total,
                'value': sub_top_sum,
                'count': sub_bottom_sum,
                'important': important,
                'dimension': sub
            }
            if not points:
                del data['points']
            return data

        _subs = recursive and cls.all_subdimensions or cls.get_subdimensions

        for sub in map(maybe_dumps, _subs(pk, dimension)):
            ranked[sub] = info(sub)

        return ranked
Esempio n. 6
0
 def total(cls,
           pk,
           metric,
           dimension='_',
           period=None,
           at=None,
           index=None,
           tzoffset=None):
     period, ats, tzoffset = Period.get_days(period, at, tzoffset=None)
     top, bot = parse_formula(metric)
     if not ats and not index:
         index = -1
     if isinstance(index, int):
         pps = cls.plotpoints(pk,
                              dimension,
                              metric,
                              period=period,
                              points_type=list)
         return pps[dimension][metric][index][1]
     else:
         if not bot:
             pps = cls.plotpoints(pk, dimension, metric, period=period)
             ppsm = pps[dimension][metric]
             return sum([ppsm[dt] for dt in ats if dt in ppsm])
         else:
             top_pps = cls.plotpoints(pk, dimension, top, period=period)
             bot_pps = cls.plotpoints(pk, dimension, bot, period=period)
             top_ppsm = top_pps[dimension][top]
             bot_ppsm = bot_pps[dimension][bot]
             top_tot = sum([top_ppsm[dt] for dt in ats if dt in top_ppsm])
             bot_tot = sum([bot_ppsm[dt] for dt in ats if dt in bot_ppsm])
             return bot_tot and top_tot / bot_tot or 0
Esempio n. 7
0
    def render_divs(cls,
                    pk,
                    metric,
                    dimension='_',
                    period=None,
                    at=None,
                    tzoffset=None,
                    format=None,
                    hidden=False):
        period, ats, tzoffset = Period.get_days(period, at, tzoffset=tzoffset)
        top, bot = parse_formula(metric)
        pps = cls.plotpoints(pk, dimension, metric, period=period)
        ppsm = pps[dimension][metric]
        if not format:
            if bot:
                format = 'pct'
            else:
                format = 'grouped'

        def fmt(v):
            import locale
            if v == 'None':
                v = None
            f = format
            if f == 'int':
                f = lambda s: int(float(s or 0))
            elif f == 'float':
                v = v or 0.0
                f = float
            elif f == 'grouped':
                v = v or 0
                f = lambda s: locale.format('%d', int(float(s)), True)
            elif f in ['pct', 'percent', '%', 'ratio']:
                v = min(101, v and float(v) * 100 or 0)
                f = lambda s: '%.2f%%' % s
            elif f in ['cash', 'money', 'usd', '$', 'dollars', 'cents']:
                v = v and float(v) or 0.0
                if f == 'cents':
                    v = v / 100.0
                f = locale.currency
            if not f:
                return v
            return callable(f) and f(v) or v

        hidden = hidden and 'style="display: none"' or ''
        rep = lambda s: s.format(
            pk=pk, metric=metric, dimension=dimension, hidden=hidden)
        table = rep(
            '<table {hidden} data-hw-pk="{pk}" data-hw-name="{{name}}" \
                data-hw-dimension="{dimension}" data-metric="{metric}">'
        ) + '\n'.join([
            '<tr><td>%s</td><td>%s</td></tr>' %
            (at.replace(' 00:00:00', ''), fmt(count))
            for at, count in ppsm.items()
        ]) + '</table>'
        return table
Esempio n. 8
0
    def rank_subdimensions_ratio(cls, pk, numerator, denominator='hits',
            dimension='_', period=None, recursive=True, points=False):
        top, bottom = numerator, denominator
        p_obj, ats, tzoffset = Period.get_days(period)
        p_s = str(p_obj)
        d_k = keyify(dimension)
        top_points = cls.totals(pk, dimension, top,
                periods=[p_s])
        top_total = p_s in top_points and top_points[p_s][d_k][top] or 0
        bottom_points = cls.totals(pk, dimension, bottom,
                periods=[p_s])
        bottom_total = p_s in bottom_points and bottom_points[p_s][d_k][bottom] or 0
        ratio_total = bottom_total and float(top_total / bottom_total) or 0
        ranked = dict()

        def info(sub):
            pps = cls.plotpoints(pk, sub, [top, bottom, '%s/%s' % (top, bottom)], period=period)[sub]
            top_pps = pps[top]
            bottom_pps = pps[bottom]

            sub_top_sum = sum({k: v for k, v in top_pps.items() if k in ats}.values())
            sub_bottom_sum = sum({k: v for k, v in bottom_pps.items() if k in ats}.values())

            ratio = sub_bottom_sum and float(sub_top_sum / sub_bottom_sum) or 0

            difference = ratio_total and (ratio - ratio_total) / ratio_total or 0
            important = sub_bottom_sum > 5 and math.fabs(difference) > .1

            data = {
                'points': pps,
                'score': ratio,
                'difference': difference,
                'effect': difference * sub_bottom_sum * ratio_total, 
                'value': sub_top_sum,
                'count': sub_bottom_sum,
                'important': important,
                'dimension': sub
            }
            if not points:
                del data['points']
            return data

        _subs = recursive and cls.all_subdimensions or cls.get_subdimensions

        for sub in map(maybe_dumps, _subs(pk, dimension)):
            ranked[sub] = info(sub)

        return ranked
Esempio n. 9
0
    def rank_subdimensions_scalar(cls,
                                  pk,
                                  dimension='_',
                                  metric='hits',
                                  period=None,
                                  recursive=True,
                                  prune_parents=True,
                                  points=False):
        p_obj, ats, tzoffset = Period.get_days(period)
        p_s = str(period)
        d_k = keyify(dimension)
        # sum of all values in metric
        total = cls.totals(pk, dimension, metric,
                           periods=[p_s])[p_s][d_k][metric]
        ranked = dict()

        def info(sub):
            pps = cls.plotpoints(pk, sub, metric, period=period)[sub][metric]
            sub_total = sum(pps.values())
            data = {
                'points': pps,
                'score': sub_total,
                'important': sub_total > 10 and (sub_total > (total / 10)),
                'effect': total - sub_total,
                'difference': total - sub_total,
                'value': sub_total,
                'count': sub_total,
                'dimension': sub
            }
            if not points:
                del data['points']
            return data

        _subs = recursive and cls.all_subdimensions or cls.get_subdimensions
        for sub in map(maybe_dumps, _subs(pk, dimension)):
            ranked[sub] = info(sub)

        # Prune parents
        if recursive and prune_parents:
            for sub, info in ranked.items():
                children = map(maybe_dumps, cls.get_subdimensions(pk, sub))
                children_total = sum(
                    map(lambda s: ranked[s]['score'], children))
                if info['important'] and (info['score'] -
                                          children_total) < (total / 10):
                    info['important'] = False
        return ranked
Esempio n. 10
0
 def render_hw_plotpoint_table(cls, pk, metric, dimension='_', period=None, at=None,
         tzoffset=None, format=None, hidden=False, graph_color=''):
     period, ats, tzoffset = Period.get_days(period, tzoffset=tzoffset)
     top, bot = parse_formula(metric)
     pps = cls.plotpoints(pk, dimension, metric, period=period,
                          tzoffset=tzoffset)
     ppsm = pps[dimension][metric]
     if not format:
         if bot:
             format = 'pct'
         else:
             format = 'grouped'
     def fmt(v):
         import locale
         if v == 'None':
             v = None
         f = format
         if f == 'int':
             f = lambda s: int(float(s or 0))
         elif f == 'float':
             v = v or 0.0
             f = float
         elif f == 'grouped':
             v = v or 0
             f = lambda s: locale.format('%d', int(float(s)), True)
         elif f in ['pct', 'percent', '%', 'ratio']:
             v = min(101, v and float(v)*100 or 0)
             f = lambda s: '%.2f%%'%s
         elif f in ['cash', 'money', 'usd', '$', 'dollars', 'cents']:
             v = v and float(v) or 0.0
             if f == 'cents':
                 v = v/100.0
             f = locale.currency
         if not f:
             return v
         return callable(f) and f(v) or v
     hidden = hidden and 'style="display: none"' or ''
     rep = lambda s: s.format(pk=pk, metric=metric, dimension=dimension,
     hidden=hidden, color=graph_color)
     table = rep('<table {hidden} data-hw-pk="{pk}" data-hw-name="{{name}}" \
             data-hw-dimension="{dimension}" data-metric="{metric}" \
             data-hw-color="{color}">')+'\n'.join([
         '<tr><td>%s</td><td>%s</td></tr>'%(at.replace(' 00:00:00', ''), fmt(count) )
         for at, count in ppsm.items()])+'</table>'
     return table
Esempio n. 11
0
 def zranked(cls,
             pk,
             parent_dimension='_',
             metric='hits',
             period=None,
             at=None,
             start=0,
             size=10,
             sort_dir=None,
             tzoffset=None):
     period, ats, tzoffset = Period.get_days(period, at)
     dt = ats or [Period.convert(cls.now(), tzoffset)]
     return map(
         try_loads,
         _ranked(cls.whale_driver(),
                 pk,
                 parent_dimension,
                 metric,
                 period,
                 dt,
                 start,
                 size,
                 sort_dir=sort_dir))
Esempio n. 12
0
    def rank_subdimensions_scalar(cls, pk, dimension='_', metric='hits',
            period=None, recursive=True, prune_parents=True, points=False):
        p_obj, ats, tzoffset = Period.get_days(period)
        p_s = str(period)
        d_k = keyify(dimension)
        # sum of all values in metric
        total = cls.totals(pk, dimension, metric, periods=[p_s])[p_s][d_k][metric]
        ranked = dict()

        def info(sub):
            pps = cls.plotpoints(pk, sub, metric, period=period)[sub][metric]
            sub_total = sum(pps.values())
            data = {
                'points': pps,
                'score': sub_total,
                'important': sub_total > 10 and (sub_total > (total / 10)),
                'effect': total - sub_total,
                'difference': total - sub_total,
                'value': sub_total,
                'count': sub_total,
                'dimension': sub
            }
            if not points:
                del data['points']
            return data
        _subs = recursive and cls.all_subdimensions or cls.get_subdimensions
        for sub in map(maybe_dumps, _subs(pk, dimension)):
            ranked[sub] = info(sub)

        # Prune parents
        if recursive and prune_parents:
            for sub, info in ranked.items():
                children = map(maybe_dumps, cls.get_subdimensions(pk, sub))
                children_total = sum(map(lambda s: ranked[s]['score'], children))
                if info['important'] and (info['score'] - children_total) < (total / 10):
                    info['important'] = False
        return ranked
Esempio n. 13
0
    def scalar_plotpoints(cls, pk, dimensions=None, metrics=None, at=None,
            depth=0, period=None, flot_time=False, points_type=OrderedDict):
        metrics = metrics or ['hits']
        at = at or times.now()
        if isinstance(metrics, basestring):
            metrics = [metrics]
        p_obj, ats, tzoffset = Period.get_days(period,at)
        p_s = str(p_obj)
        dts = list(p_obj.datetimes_strs(end=Period.parse_dt_str(at)))

        sparse = _retrieve(cls.whale_driver(), pk, dimensions, metrics, period=p_obj)
        nonsparse = defaultdict(dict)
        if flot_time:
            points_type = list
        for dim, mets in sparse.items():
            for met, points in mets.items():
                nonsparse[dim][met] = []
                use_method = False
                met_name = met
                if ':' in met:
                    met_name, use_method = met.split(':')

                const_value = False
                if met_name in TIME_MATRIX:
                    const_value = float(p_obj.getUnits()[0] /
                            TIME_MATRIX[met_name])
                # Try to parse static metrics too
                elif met_name == '_count':
                    const_value = len(dts)
                try:
                    const_value = float(met_name)
                except:
                    pass
                last_value = total = 0
                for dt in dts:
                    dt_obj = Period.parse_dt_str(dt)
                    if met_name == '_days_in_month':
                        from calendar import monthrange
                        const_value = monthrange(dt_obj.year, dt_obj.month)[1]
                    if flot_time:
                        dt_t = to_flot_time(dt_obj)
                    else:
                        dt_t = dt
                    if const_value:
                        value = const_value
                    else:
                        value = points[dt] if dt in points else 0
                    if use_method == 'count' or not use_method:
                        value = value
                    elif use_method in ['+', 'sum', 'add', 'cumulative']:
                        total += value
                        value = total
                    elif use_method in ['_', 'set', 'last', 'level']:
                        if not last_value:
                            last_value = value
                        if not value:
                            value = last_value
                        last_value = value
                    nonsparse[dim][met].append([dt_t, float(value)])
                nonsparse[dim][met] = points_type(nonsparse[dim][met])

        if depth > 0:
            for sub in cls.get_subdimensions(pk, dimensions):
                nonsparse = dict(nonsparse.items() +
                    cls.plotpoints(pk, sub, metrics, at=at, depth=depth - 1, period=period,
                        flot_time=flot_time, points_type=points_type).items())
        return nonsparse
Esempio n. 14
0
    def scalar_plotpoints(cls,
                          pk,
                          dimensions=None,
                          metrics=None,
                          at=None,
                          depth=0,
                          period=None,
                          flot_time=False,
                          points_type=OrderedDict):
        metrics = metrics or ['hits']
        at = at or times.now()
        if isinstance(metrics, basestring):
            metrics = [metrics]
        p_obj, ats, tzoffset = Period.get_days(period, at)
        p_s = str(p_obj)
        dts = list(p_obj.datetimes_strs(end=Period.parse_dt_str(at)))

        sparse = _retrieve(cls.whale_driver(),
                           pk,
                           dimensions,
                           metrics,
                           period=p_obj)
        nonsparse = defaultdict(dict)
        if flot_time:
            points_type = list
        for dim, mets in sparse.items():
            for met, points in mets.items():
                nonsparse[dim][met] = []
                use_method = False
                met_name = met
                if ':' in met:
                    met_name, use_method = met.split(':')

                const_value = False
                if met_name in TIME_MATRIX:
                    const_value = float(p_obj.getUnits()[0] /
                                        TIME_MATRIX[met_name])
                # Try to parse static metrics too
                elif met_name == '_count':
                    const_value = len(dts)
                try:
                    const_value = float(met_name)
                except:
                    pass
                last_value = total = 0
                for dt in dts:
                    dt_obj = Period.parse_dt_str(dt)
                    if met_name == '_days_in_month':
                        from calendar import monthrange
                        const_value = monthrange(dt_obj.year, dt_obj.month)[1]
                    if flot_time:
                        dt_t = to_flot_time(dt_obj)
                    else:
                        dt_t = dt
                    if const_value:
                        value = const_value
                    else:
                        value = points[dt] if dt in points else 0
                    if use_method == 'count' or not use_method:
                        value = value
                    elif use_method in ['+', 'sum', 'add', 'cumulative']:
                        total += value
                        value = total
                    elif use_method in ['_', 'set', 'last', 'level']:
                        if not last_value:
                            last_value = value
                        if not value:
                            value = last_value
                        last_value = value
                    nonsparse[dim][met].append([dt_t, float(value)])
                nonsparse[dim][met] = points_type(nonsparse[dim][met])

        if depth > 0:
            for sub in cls.get_subdimensions(pk, dimensions):
                nonsparse = dict(
                    nonsparse.items() +
                    cls.plotpoints(pk,
                                   sub,
                                   metrics,
                                   at=at,
                                   depth=depth - 1,
                                   period=period,
                                   flot_time=flot_time,
                                   points_type=points_type).items())
        return nonsparse