Esempio n. 1
0
File: api.py Progetto: ktf/DAS
    def __call__(self, **kwargs):
        analytics = get_analytics_interface()

        counter = collections.defaultdict(lambda: collections.defaultdict(int))
        records = list(analytics.col.find({"api.name": {"$exists": True}}))

        for record in records:
            counter[record['system']][record['api']['name']] \
                += record['counter']

        api_plot = dict(central_label=False,
                        data=nested_to_baobab(counter),
                        external=False,
                        title="API Calls")

        return ("analytics_report_api", {"api_plot": api_plot})
Esempio n. 2
0
    def __call__(self, **kwargs):
        period = int(kwargs.get('period', 7*86400))
        view_key = kwargs.get('key', None)
        now = time.time()
        analytics = get_analytics_interface()
        analyzer_exists = any([task['classname']=="QueryAnalyzer" \
                for task in self.scheduler.get_registry().values()])
        
        summaries = analytics.get_summary(\
                identifier="query_analyzer", after=now-period)
        # [ (query_structure, count) ]
            
        
        count_by_key = collections.defaultdict(int)
        field_count_by_key = collections.defaultdict(\
                        lambda: collections.defaultdict(int))
        instance_count = collections.defaultdict(int)
        constraint_by_key = collections.defaultdict(\
                        lambda: collections.defaultdict(int))
        
        seen_keys = set()

        time_bins = period / 3600
        if time_bins > self.max_series_length:
            time_bins = self.max_series_length
        time_interval = float(period) / time_bins       
        time_series = collections.defaultdict(lambda: [0]*(time_bins+1))
        
        total_queries = 0
        for summary in summaries:
            midtime = 0.5*(summary['start']+summary['finish'])
            time_bin = int((midtime - (now - period)) / time_interval)
            for query in summary['queries']:
                if view_key and not view_key in query[0]['keys']:
                    continue
                count = query[1]
                total_queries += count
                for key in query[0]['keys']:
                    seen_keys.add(key)
                    constraint_by_key[key][query[0]['keys'][key]] += count
                    count_by_key[key] += count
                    for field in query[0]['fields']:
                        field_count_by_key[key][field] += count
                    time_series[key][time_bin] += count
                instance_count[query[0]['instance']] += count
            
        time_plot = dict(legend="topleft",
                         series=[dict(label=key, values=time_series[key]) \
                                for key in time_series],
                         title="Calls by time",
                         xaxis=dict(bins=time_bins+1,
                                    min=now-period,
                                    width=time_interval,
                                    label="Time",
                                    format="time"),
                         yaxis=dict(label="Queries"))
                    
            
        constraint_plot = dict(central_label=False,
                               data=nested_to_baobab(constraint_by_key),
                               external=False,
                               title="Constraint by key")
        field_plot = dict(central_label=False,
                          data=nested_to_baobab(field_count_by_key),
                          external=False,
                          title="Field by key")
        instance_plot = dict(labels=True,
                             percentage=True,
                             series=[{'label':instance,
                                      'value': instance_count[instance]} \
                                        for instance in instance_count],
                             title="DBS Instance")
        key_plot = dict(labels=True,
                        percentage=True,
                        series=[{'label':key, 'value': count_by_key[key]} \
                                for key in count_by_key],
                        title="Key(s) used")
        
        popular_key = sorted(count_by_key, \
                key=lambda x: count_by_key[x])[-1] if count_by_key else None
        
        return ("analytics_report_query", {"nsummaries": len(summaries),
                                           "nqueries": total_queries,
                                           "view_key": view_key,
                                           "seen_keys": seen_keys,
                                           "analyzer_exists": analyzer_exists,
                                           "constraint_plot":constraint_plot,
                                           "field_plot":field_plot,
                                           "instance_plot":instance_plot,
                                           "key_plot":key_plot,
                                           "time_plot":time_plot,
                                           "period":period,
                                           "popular_key":popular_key})
Esempio n. 3
0
    def hotspot_report(self, **kwargs):
        "Hotspot report"
        analytics = get_analytics_interface()
        identifier = kwargs['identifier']
        
        
        taskdicts = [task for task in \
                self.scheduler.get_registry().values() \
                        if 'Hotspot' in task['classname'] and \
                        gen_identifier(task) == identifier]
        taskobj = None
        if taskdicts:
            taskobj = taskdicts[0]


        period = 86400*30
        interval = 3600*4
        fraction = 0.15
        if taskobj:
            period = taskobj['kwargs'].get('period', 86400*30)
            interval = taskobj['interval']
            fraction = taskobj['kwargs'].get('fraction', 0.15)
        
        period = int(kwargs.get('period', period))
        fraction = float(kwargs.get('fraction', fraction))
        
        epoch_end = time.time()
        epoch_start = time.time() - period
        
        summaries = analytics.get_summary(identifier, 
                                          after=epoch_start,
                                          before=epoch_end) 
         
        counter = collections.defaultdict(int)
        map(lambda x: counter.update(x['keys']), summaries)
        
        sorted_keys = sorted(counter, key=lambda x: counter[x])
        total_calls = float(sum(counter.values()))
        
        key_series = []
        call_count = 0
        for key in sorted_keys:
            call_count += counter[key]
            key_series += [call_count]
        
        binning = 1
        if len(key_series) > self.max_series_length:
            binning = len(key_series)/self.max_series_length
            key_series = key_series[len(key_series)%binning-1::binning]
            
        key_plot = dict(legend="null",
                        series=[dict(colour="#ff0000",
                                     label="Calls",
                                     values=key_series)],
                        title="Cumulative calls by key",
                        xaxis=dict(bins=len(key_series)-1,
                                   label="Keys",
                                   width=binning,
                                   min=0),
                        yaxis=dict(label="Cumulative calls"))
        
        summary_durations = [s['finish'] - s['start'] for s in summaries]
        summary_density = [len(s['keys']) for s in summaries] 
        
        summary_plot = dict(legend="null",
                            series=[dict(colour="#ff0000",
                                         label="Summaries",
                                         marker="*",
                                         x=summary_durations,
                                         y=summary_density)],
                            title="Summary length and call count",
                            xaxis=dict(label="Summary length"),
                            yaxis=dict(label="Number of calls"))
        
        
        time_bins = int((epoch_end - epoch_start) / interval)
        time_interval = interval
        if time_bins > self.max_series_length:
            time_bins = self.max_series_length
            time_interval = (epoch_end - epoch_start) / time_bins
        time_series = [0]*(time_bins+1)
        
        for sss in summaries:
            bin = int(((0.5*(sss['finish']+s['start'])) - epoch_start)\
                      / time_interval)
            time_series[bin] += len(s['keys'])
        
        time_plot = dict(legend="null",
                         series=[dict(colour="#ff0000",
                                      label="Query density over time",
                                      values=time_series)],
                         title="Query density over time",
                         xaxis=dict(bins=time_bins+1,
                                    label="Time",
                                    format="time",
                                    min=epoch_start,
                                    width=time_interval),
                         yaxis=dict(label="Calls"))
        
        selected = []
        cumulative = 0
        while sorted_keys and cumulative < fraction * total_calls:
            key = sorted_keys.pop()
            cumulative += counter[key]
            selected += [key]
        
        
        return ("analytics_report_hotspot", {"list":False,
                                             "identifier":identifier,
                                             "task":taskobj,
                                             "nkeys":len(counter.keys()),
                                             "ncalls":total_calls,
                                             "nsummaries":len(summaries),
                                             "key_plot":key_plot,
                                             "summary_plot":summary_plot,
                                             "time_plot":time_plot,
                                             "selected":selected})