Ejemplo n.º 1
0
    def rank_subdimensions_scalar(cls, pk, dimension='_', metric='hits', period=None):
        period = period or Period.default_size()
        d_k = keyify(dimension)
        total = cls.totals(pk, dimension, metric, periods=[period])[str(period)][d_k][metric]
        ranked = dict()

        def info(sub):
            pps = cls.plotpoints(pk, sub, metric, period=period)[sub][metric]
            sub_total = sum(pps.values())
            return {
                'points': pps,
                'total': sub_total,
                'important': sub_total > 10 and (sub_total > (total / 10)) or False
            }

        for sub in map(maybe_dumps, cls.all_subdimensions(pk, dimension)):
            ranked[sub] = info(sub)
        del(ranked[dimension])

        # Prune parents
        for sub, info in ranked.items():
            children = map(maybe_dumps, cls.get_subdimensions(pk, sub))
            children_total = sum(map(lambda s: ranked[s]['total'], children))
            if info['important'] and (info['total'] - children_total) < (total / 10):
                info['important'] = False
        return ranked
Ejemplo n.º 2
0
    def rank_subdimensions_scalar(cls, pk, dimension='_', metric='hits',
            period=None, recursive=True, prune_parents=True, points=False):
        period = period or Period.default_size()
        d_k = keyify(dimension)
        total = cls.cached_totals(pk, dimension, metric, periods=[period])[period][d_k][metric]
        ranked = dict()

        def info(sub):
            pps = cls.plotpoints(pk, sub, metric, period=period)[sub][metric]
            sub_total = sum(pps.values())
            data = {
                'points': pps,
                'score': sub_total,
                'important': sub_total > 10 and (sub_total > (total / 10)) or False,
                'effect': total - sub_total,
                'difference': total - sub_total,
                'value': sub_total,
                'count': sub_total,
                'dimension': sub
            }
            if not points:
                del data['points']
            return data
        _subs = recursive and cls.all_subdimensions or cls.get_subdimensions
        for sub in map(maybe_dumps, _subs(pk, dimension)):
            ranked[sub] = info(sub)

        # Prune parents
        if recursive and prune_parents:
            for sub, info in ranked.items():
                children = map(maybe_dumps, cls.get_subdimensions(pk, sub))
                children_total = sum(map(lambda s: ranked[s]['score'], children))
                if info['important'] and (info['score'] - children_total) < (total / 10):
                    info['important'] = False
        return ranked
Ejemplo n.º 3
0
 def plotpoints(cls, pk, dimensions=None, metrics=None,
         period=None, overall=True):
     metrics = metrics or ['hits',]
     period = period or Period.default_size()
     sparse = cls.whale_driver().retrieve(pk,dimensions,metrics,
             period=period, overall=overall)
     nonsparse = defaultdict(dict)
     for dimensions, metrics in sparse.items():
         for metric, points in metrics.items():
             dts = Period(*period.split('x')).datetimes_strs()
             nonsparse[dimensions][metric] = []
             for dt in dts:
                 flot_time = to_flot_time(Period.parse_dt_str(dt))
                 value = points[dt] if dt in points else 0
                 nonsparse[dimensions][metric].append([flot_time,
                     float(value)])
     return nonsparse
Ejemplo n.º 4
0
    def rank_subdimensions_ratio(
        cls, pk, numerator, denominator="hits", dimension="_", period=None, recursive=True, points=False
    ):
        top, bottom = numerator, denominator
        period = period or Period.default_size()
        d_k = keyify(dimension)
        top_total = cls.totals(pk, dimension, top, periods=[period])[str(period)][d_k][top]
        bottom_total = cls.totals(pk, dimension, bottom, periods=[period])[str(period)][d_k][bottom]
        ratio_total = bottom_total and float(top_total / bottom_total) or 0
        ranked = dict()

        def info(sub):
            pps = cls.plotpoints(pk, sub, [top, bottom, "%s/%s" % (top, bottom)], period=period)[sub]
            top_pps = pps[top]
            bottom_pps = pps[bottom]

            sub_top_sum = sum(top_pps.values())
            sub_bottom_sum = sum(bottom_pps.values())

            ratio = sub_bottom_sum and float(sub_top_sum / sub_bottom_sum) or 0

            difference = ratio_total and (ratio - ratio_total) / ratio_total or 0
            important = sub_bottom_sum > 5 and math.fabs(difference) > 0.1

            data = {
                "points": pps,
                "score": ratio,
                "difference": difference,
                "effect": difference * sub_bottom_sum * ratio_total,
                "value": sub_top_sum,
                "count": sub_bottom_sum,
                "important": important,
                "dimension": sub,
            }
            if not points:
                del data["points"]
            return data

        _subs = recursive and cls.all_subdimensions or cls.get_subdimensions

        for sub in map(maybe_dumps, _subs(pk, dimension)):
            ranked[sub] = info(sub)

        return ranked
Ejemplo n.º 5
0
    def rank_subdimensions_ratio(cls, pk, numerator, denominator='hits',
            dimension='_', period=None, recursive=True, points=False):
        top, bottom = numerator, denominator
        period = period or Period.default_size()
        d_k = keyify(dimension)
        top_total = cls.cached_totals(pk, dimension, top, periods=[period])[str(period)][d_k][top]
        bottom_total = cls.cached_totals(pk, dimension, bottom, periods=[period])[str(period)][d_k][bottom]
        ratio_total = bottom_total and float(top_total / bottom_total) or 0
        ranked = dict()

        def info(sub):
            pps = cls.plotpoints(pk, sub, [top, bottom, '%s/%s' % (top, bottom)], period=period)[sub]
            top_pps = pps[top]
            bottom_pps = pps[bottom]

            sub_top_sum = sum(top_pps.values())
            sub_bottom_sum = sum(bottom_pps.values())

            ratio = sub_bottom_sum and float(sub_top_sum / sub_bottom_sum) or 0

            difference = ratio_total and (ratio - ratio_total) / ratio_total or 0
            important = sub_bottom_sum > 5 and math.fabs(difference) > .1

            data = {
                'points': pps,
                'score': ratio,
                'difference': difference,
                'effect': difference * sub_bottom_sum * ratio_total,
                'value': sub_top_sum,
                'count': sub_bottom_sum,
                'important': important,
                'dimension': sub
            }
            if not points:
                del data['points']
            return data

        _subs = recursive and cls.all_subdimensions or cls.get_subdimensions

        for sub in map(maybe_dumps, _subs(pk, dimension)):
            ranked[sub] = info(sub)

        return ranked
Ejemplo n.º 6
0
 def plotpoints(self, categories=None, dimensions=None, metrics=None,
         period=None, depth=0):
     categories = categories or ''
     dimensions = dimensions or json.dumps(list(list()))
     # Convert categories to a list, if it's not
     if type(categories) in [str,unicode]: categories = [categories,]
     metrics = metrics or ['hits',]
     period = period or Period.default_size()
     sparse = self.driver().retrieve(categories,dimensions,metrics,
             period=period, depth=depth)
     nonsparse = defaultdict(dict)
     for dimensions, metrics in sparse.items():
         for metric, points in metrics.items():
             dts = Period(*period.split('x')).datetimes_strs()
             nonsparse[dimensions][metric] = []
             for dt in dts:
                 flot_time = to_flot_time(Period.parse_dt_str(dt))
                 value = points[dt] if dt in points else 0
                 nonsparse[dimensions][metric].append([flot_time,
                     float(value)])
     return nonsparse
Ejemplo n.º 7
0
    def rank_subdimensions_scalar(
        cls, pk, dimension="_", metric="hits", period=None, recursive=True, prune_parents=True, points=False
    ):
        period = period or Period.default_size()
        d_k = keyify(dimension)
        total = cls.totals(pk, dimension, metric, periods=[period])[period][d_k][metric]
        ranked = dict()

        def info(sub):
            pps = cls.plotpoints(pk, sub, metric, period=period)[sub][metric]
            sub_total = sum(pps.values())
            data = {
                "points": pps,
                "score": sub_total,
                "important": sub_total > 10 and (sub_total > (total / 10)) or False,
                "effect": total - sub_total,
                "difference": total - sub_total,
                "value": sub_total,
                "count": sub_total,
                "dimension": sub,
            }
            if not points:
                del data["points"]
            return data

        _subs = recursive and cls.all_subdimensions or cls.get_subdimensions
        for sub in map(maybe_dumps, _subs(pk, dimension)):
            ranked[sub] = info(sub)

        # Prune parents
        if recursive and prune_parents:
            for sub, info in ranked.items():
                children = map(maybe_dumps, cls.get_subdimensions(pk, sub))
                children_total = sum(map(lambda s: ranked[s]["score"], children))
                if info["important"] and (info["score"] - children_total) < (total / 10):
                    info["important"] = False
        return ranked
Ejemplo n.º 8
0
    def rank_subdimensions_ratio(cls, pk, numerator, denominator='hits', dimension='_', period=None):
        top, bottom = numerator, denominator
        period = period or Period.default_size()
        d_k = keyify(dimension)
        top_total = cls.totals(pk, dimension, top, periods=[period])[str(period)][d_k][top]
        bottom_total = cls.totals(pk, dimension, bottom, periods=[period])[str(period)][d_k][bottom]
        ratio_total = bottom_total and float(top_total / bottom_total) or 0
        ranked = dict() 

        def info(sub):
            pps = cls.plotpoints(pk, sub, [top, bottom], period=period)[sub]
            ratio_points = cls.ratio_plotpoints(pk, top, bottom, sub, period=period)[sub]
            top_pps = pps[top]
            bottom_pps = pps[bottom]

            sub_top_sum = sum(top_pps.values())
            sub_bottom_sum = sum(bottom_pps.values())

            ratio = sub_bottom_sum and float(sub_top_sum / sub_bottom_sum) or 0

            difference = (ratio - ratio_total) / ratio_total

            important = sub_bottom_sum > 5 and (difference > .1 or -difference > .1)

            return {
                'points': pps,
                'ratio_points': ratio_points,
                'difference': difference,
                'effect': difference * sub_bottom_sum,
                'important': important
            }
        
        for sub in map(maybe_dumps, cls.all_subdimensions(pk, dimension)):
            ranked[sub] = info(sub)
        del(ranked[dimension])

        return ranked