def _get_impala_result_size(self, notebook, snippet): total_records_match = None total_records, total_size, msg = None, None, None query_id = self._get_impala_query_id(snippet) server_url = _get_api(self.user, snippet)._url if query_id: LOG.debug( "Attempting to get Impala query profile at server_url %s for query ID: %s" % (server_url, query_id)) fragment = self._get_impala_query_profile(server_url, query_id=query_id) total_records_re = "Coordinator Fragment F\d\d.+?RowsReturned: \d+(?:.\d+[KMB])? \((?P<total_records>\d+)\).*?(Averaged Fragment F\d\d)" total_records_match = re.search(total_records_re, fragment, re.MULTILINE | re.DOTALL) if total_records_match: total_records = int(total_records_match.group('total_records')) query_plan = self._get_impala_profile_plan(query_id, fragment) if query_plan: LOG.info('Query plan for Impala query %s: %s' % (query_id, query_plan)) else: LOG.info('Query plan for Impala query %s not found.' % query_id) return total_records, total_size, msg
def alanize(request): response = {'status': -1} cluster = json.loads(request.POST.get('cluster', '{}')) query_id = json.loads(request.POST.get('query_id')) api = _get_api(request.user, cluster=cluster) if query_id: LOG.debug("Attempting to get Impala query profile for query ID: %s" % (query_id)) doc = Document2.objects.get(id=query_id) snippets = doc.data_dict.get('snippets', []) secret = snippets[0]['result']['handle']['secret'] impala_query_id = unpack_guid(base64.decodestring(secret)) query_profile = api.get_query_profile_encoded(impala_query_id) profile = analyzer.analyze(analyzer.parse_data(query_profile)) ANALYZER.pre_process(profile) result = ANALYZER.run(profile) heatmap = {} summary = analyzer.summary(profile) heatmapMetrics = ['AverageThreadTokens', 'BloomFilterBytes', 'PeakMemoryUsage', 'PerHostPeakMemUsage', 'PrepareTime', 'RowsProduced', 'TotalCpuTime', 'TotalNetworkReceiveTime', 'TotalNetworkSendTime', 'TotalStorageWaitTime', 'TotalTime'] for key in heatmapMetrics: metrics = analyzer.heatmap_by_host(profile, key) if metrics['data']: heatmap[key] = metrics response['data'] = { 'query': { 'healthChecks' : result[0]['result'], 'summary': summary, 'heatmap': heatmap, 'heatmapMetrics': sorted(list(heatmap.iterkeys())) } } response['status'] = 0 return JsonResponse(response)
def alanize(request): response = {'status': -1} cluster = json.loads(request.POST.get('cluster', '{}')) query_id = json.loads(request.POST.get('query_id')) api = _get_api(request.user, cluster=cluster) if query_id: LOG.debug("Attempting to get Impala query profile for query ID: %s" % (query_id)) doc = Document2.objects.get(id=query_id) snippets = doc.data_dict.get('snippets', []) secret = snippets[0]['result']['handle']['secret'] impala_query_id = unpack_guid(base64.decodestring(secret)) query_profile = api.get_query_profile_encoded(impala_query_id) profile = analyzer.analyze(analyzer.parse_data(query_profile)) ANALYZER.pre_process(profile) result = ANALYZER.run(profile) heatmap = {} summary = analyzer.summary(profile) heatmapMetrics = ['AverageThreadTokens', 'BloomFilterBytes', 'PeakMemoryUsage', 'PerHostPeakMemUsage', 'PrepareTime', 'RowsProduced', 'TotalCpuTime', 'TotalNetworkReceiveTime', 'TotalNetworkSendTime', 'TotalStorageWaitTime', 'TotalTime'] for key in heatmapMetrics: metrics = analyzer.heatmap_by_host(profile, key) if metrics['data']: heatmap[key] = metrics response['data'] = { 'query': { 'healthChecks' : result[0]['result'], 'summary': summary, 'heatmap': heatmap, 'heatmapMetrics': sorted(list(heatmap.keys())) } } response['status'] = 0 return JsonResponse(response)
def alanize_metrics(request): response = {'status': -1} cluster = json.loads(request.POST.get('cluster', '{}')) query_id = json.loads(request.POST.get('query_id')) api = _get_api(request.user, cluster=cluster) if query_id: LOG.debug("Attempting to get Impala query profile for query ID: %s" % (query_id)) query_profile = api.get_query_profile_encoded(query_id) profile = analyzer.analyze(analyzer.parse_data(query_profile)) ANALYZER.pre_process(profile) metrics = analyzer.metrics(profile) response['data'] = metrics response['status'] = 0 return JsonResponse(response)