Exemple #1
0
 def setUp(self):
   self.profile = a.analyze(
       a.parse(
           os.path.join(
               os.path.dirname(__file__),
               "../..",
               "testdata",
               "profile.thrift")))
   self.analyze = rules.TopDownAnalysis()
Exemple #2
0
from builtins import filter
from builtins import range
import itertools
import logging
import re
import time
from datetime import datetime

from django.utils.translation import ugettext as _

from jobbrowser.apis.base_api import Api
from libanalyze import analyze as analyzer, rules
from notebook.conf import ENABLE_QUERY_ANALYSIS

ANALYZER = rules.TopDownAnalysis(
)  # We need to parse some files so save as global
LOG = logging.getLogger(__name__)

try:
    from beeswax.models import Session
    from impala.server import get_api as get_impalad_api, _get_impala_server_url
except Exception as e:
    LOG.exception('Some application are not enabled: %s' % e)


def _get_api(user, cluster=None):
    if cluster and cluster.get('type') == 'altus-dw':
        server_url = 'http://impala-coordinator-%(name)s:25000' % cluster
    else:
        # TODO: multi computes if snippet.get('compute') or snippet['type'] has computes
        application = cluster.get('type', 'impala')
Exemple #3
0
 def test_get_top_reasons_json(self):
   p = rules.TopDownAnalysis()
   result = p.run(self.profile)
   assert_true(len(result[0]['result']) == 67)
   test = json.dumps([{"result": [{"reason": [{"impact": 16798499570, "message": "Slow Aggregate: Excess time (over expected time) spent in the aggregate; might be caused by complex group by"}, {"impact": 1841684634.666668, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}, {"impact": 1, "message": " Spilled Partitions: This operation has spilled to disk. Check if the ressource configuration of Impala can be changed to allow for a higher memory limit."}], "result_id": "30", "wall_clock_time": 20683095270, "contribution_factor_str": "SQLOperator 30:AGGREGATION_NODE"}, {"reason": [{"impact": 16137425107, "message": "Slow Aggregate: Excess time (over expected time) spent in the aggregate; might be caused by complex group by"}, {"impact": 1249201121.2222214, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}, {"impact": 1, "message": " Spilled Partitions: This operation has spilled to disk. Check if the ressource configuration of Impala can be changed to allow for a higher memory limit."}], "result_id": "7", "wall_clock_time": 20022020807, "contribution_factor_str": "SQLOperator 07:AGGREGATION_NODE"}, {"reason": [{"impact": 15991669185, "message": "Slow Aggregate: Excess time (over expected time) spent in the aggregate; might be caused by complex group by"}, {"impact": 1062368963.2222214, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}, {"impact": 1, "message": " Spilled Partitions: This operation has spilled to disk. Check if the ressource configuration of Impala can be changed to allow for a higher memory limit."}], "result_id": "20", "wall_clock_time": 19681122971, "contribution_factor_str": "SQLOperator 20:AGGREGATION_NODE"}, {"reason": [{"impact": 538561025.333333, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "17", "wall_clock_time": 6966953012, "contribution_factor_str": "SQLOperator 17:HASH_JOIN_NODE"}, {"reason": [{"impact": 874553885.333333, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "29", "wall_clock_time": 6705756207, "contribution_factor_str": "SQLOperator 29:HASH_JOIN_NODE"}, {"reason": [{"impact": 496170372, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "27", "wall_clock_time": 6663793736, "contribution_factor_str": "SQLOperator 27:HASH_JOIN_NODE"}, {"reason": [{"impact": 467446848.55555534, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "4", "wall_clock_time": 6641201075, "contribution_factor_str": "SQLOperator 04:HASH_JOIN_NODE"}, {"reason": [{"impact": 503890745.8888893, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "6", "wall_clock_time": 6611505627, "contribution_factor_str": "SQLOperator 06:HASH_JOIN_NODE"}, {"reason": [{"impact": 634909229.333333, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "19", "wall_clock_time": 6401734479, "contribution_factor_str": "SQLOperator 19:HASH_JOIN_NODE"}, {"reason": [], "result_id": "-1", "wall_clock_time": 2612825457, "contribution_factor_str": "RemoteFragmentsStarted -1:N/A"}, {"reason": [{"impact": 3672332795.524691, "message": "Slow HDFS Scan: Predicates might be expensive (expectes speed 10m rows per sec per core)"}, {"impact": 1271091421, "message": "This is the time waiting for HDFS NN RPC."}, {"impact": 929179291.4444444, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}, {"impact": 45400713.888888806, "message": "Rows Read Skew: The skew (max-avg) in rows processed"}, {"impact": 0.006735614444444418, "message": "Bytes Read Skew: Addition IO time cost by the skew (assuming 5 disks)"}], "result_id": "14", "wall_clock_time": 2320876241, "contribution_factor_str": "SQLOperator 14:HDFS_SCAN_NODE"}, {"reason": [{"impact": 165377262.44444442, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "5", "wall_clock_time": 2258327578, "contribution_factor_str": "SQLOperator 05:HASH_JOIN_NODE"}, {"reason": [{"impact": 174711179.44444442, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "28", "wall_clock_time": 2231494483, "contribution_factor_str": "SQLOperator 28:HASH_JOIN_NODE"}, {"reason": [{"impact": 4598206116.796875, "message": "Slow HDFS Scan: Predicates might be expensive (expectes speed 10m rows per sec per core)"}, {"impact": 1261948355, "message": "This is the time waiting for HDFS NN RPC."}, {"impact": 836163684.8888888, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}, {"impact": 49606693.93939389, "message": "Lack of scanner thread parallelism: Speed can be improved by that much if there's 8 scanner threads"}, {"impact": 45400713.888888806, "message": "Rows Read Skew: The skew (max-avg) in rows processed"}, {"impact": 0.006735614444444418, "message": "Bytes Read Skew: Addition IO time cost by the skew (assuming 5 disks)"}], "result_id": "1", "wall_clock_time": 2201407589, "contribution_factor_str": "SQLOperator 01:HDFS_SCAN_NODE"}, {"reason": [{"impact": 4407935855.252918, "message": "Slow HDFS Scan: Predicates might be expensive (expectes speed 10m rows per sec per core)"}, {"impact": 1767671213, "message": "This is the time waiting for HDFS NN RPC."}, {"impact": 722860231, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}, {"impact": 45400713.888888806, "message": "Rows Read Skew: The skew (max-avg) in rows processed"}, {"impact": 0.006735614444444418, "message": "Bytes Read Skew: Addition IO time cost by the skew (assuming 5 disks)"}], "result_id": "24", "wall_clock_time": 2193866884, "contribution_factor_str": "SQLOperator 24:HDFS_SCAN_NODE"}, {"reason": [{"impact": 96606459.11111116, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "18", "wall_clock_time": 2180207014, "contribution_factor_str": "SQLOperator 18:HASH_JOIN_NODE"}, {"reason": [{"impact": 1111759224.8888888, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "F04 0", "wall_clock_time": 1250729128, "contribution_factor_str": "SQLOperator F04 0:CodeGen"}, {"reason": [{"impact": 193415667.33333337, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "F04 1", "wall_clock_time": 1201795461, "contribution_factor_str": "SQLOperator F04 1:CodeGen"}, {"reason": [{"impact": 92531774.55555558, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "F00 0", "wall_clock_time": 1062080747, "contribution_factor_str": "SQLOperator F00 0:CodeGen"}, {"reason": [{"impact": 118700210.11111116, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "F05 0", "wall_clock_time": 1009980856, "contribution_factor_str": "SQLOperator F05 0:CodeGen"}, {"reason": [{"impact": 132909682.88888884, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "F09 0", "wall_clock_time": 950194410, "contribution_factor_str": "SQLOperator F09 0:CodeGen"}, {"reason": [{"impact": 95305427.33333337, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "F10 0", "wall_clock_time": 878960263, "contribution_factor_str": "SQLOperator F10 0:CodeGen"}, {"reason": [{"impact": 46199805, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "F14 0", "wall_clock_time": 769058113, "contribution_factor_str": "SQLOperator F14 0:CodeGen"}, {"reason": [], "result_id": "-1", "wall_clock_time": 613452579, "contribution_factor_str": "PlanningTime -1:N/A"}, {"reason": [{"impact": 306772810, "message": "Slow Aggregate: Excess time (over expected time) spent in the aggregate; might be caused by complex group by"}, {"impact": 42519756.55555558, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "45", "wall_clock_time": 319264610, "contribution_factor_str": "SQLOperator 45:AGGREGATION_NODE"}, {"reason": [{"impact": 297637309, "message": "Slow Aggregate: Excess time (over expected time) spent in the aggregate; might be caused by complex group by"}, {"impact": 29017600.555555582, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "51", "wall_clock_time": 309567409, "contribution_factor_str": "SQLOperator 51:AGGREGATION_NODE"}, {"reason": [], "result_id": "-1", "wall_clock_time": 107247619, "contribution_factor_str": "ClientFetchWaitTimer -1:N/A"}, {"reason": [{"impact": 97484030, "message": "Slow Sorting: Excess time (over expected time) spent in the sort; might be caused by too many sorting column"}, {"impact": 36347752, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "31", "wall_clock_time": 98861130, "contribution_factor_str": "SQLOperator 31:SORT_NODE"}, {"reason": [{"impact": 67982884, "message": "Slow Aggregate: Excess time (over expected time) spent in the aggregate; might be caused by complex group by"}, {"impact": 7664156.555555552, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "40", "wall_clock_time": 80474684, "contribution_factor_str": "SQLOperator 40:AGGREGATION_NODE"}, {"reason": [{"impact": 32130961.111111112, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "12", "wall_clock_time": 71088072, "contribution_factor_str": "SQLOperator 12:SELECT_NODE"}, {"reason": [{"impact": 58733676, "message": "Slow Sorting: Excess time (over expected time) spent in the sort; might be caused by too many sorting column"}, {"impact": 5766554.333333336, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "8", "wall_clock_time": 60080276, "contribution_factor_str": "SQLOperator 08:SORT_NODE"}, {"reason": [{"impact": 57966057, "message": "Slow Sorting: Excess time (over expected time) spent in the sort; might be caused by too many sorting column"}, {"impact": 4243951.444444448, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "21", "wall_clock_time": 59294857, "contribution_factor_str": "SQLOperator 21:SORT_NODE"}, {"reason": [{"impact": 47950535, "message": "Lack of scanner thread parallelism: Speed can be improved by that much if there's 8 scanner threads"}, {"impact": 37688100, "message": "Slow HDFS Scan: Predicates might be expensive (expectes speed 10m rows per sec per core)"}], "result_id": "15", "wall_clock_time": 47950535, "contribution_factor_str": "SQLOperator 15:HDFS_SCAN_NODE"}, {"reason": [{"impact": 17818123.666666668, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "52", "wall_clock_time": 44603227, "contribution_factor_str": "SQLOperator 52:EXCHANGE_NODE"}, {"reason": [{"impact": 9621600, "message": "Wrong join strategy - RHS 121390; LHS 105174"}, {"impact": 4113826, "message": "Slow Hash Join: Excess time (over expected time) spent in the hash join"}, {"impact": 2924865.666666664, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "34", "wall_clock_time": 43779812, "contribution_factor_str": "SQLOperator 34:HASH_JOIN_NODE"}, {"reason": [{"impact": 14784147, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "32", "wall_clock_time": 42111797, "contribution_factor_str": "SQLOperator 32:ANALYTIC_EVAL_NODE"}, {"reason": [{"impact": 39518015, "message": "Lack of scanner thread parallelism: Speed can be improved by that much if there's 8 scanner threads"}, {"impact": 29689100, "message": "Slow HDFS Scan: Predicates might be expensive (expectes speed 10m rows per sec per core)"}], "result_id": "2", "wall_clock_time": 39518015, "contribution_factor_str": "SQLOperator 02:HDFS_SCAN_NODE"}, {"reason": [{"impact": 20851584.222222224, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "46", "wall_clock_time": 38647270, "contribution_factor_str": "SQLOperator 46:EXCHANGE_NODE"}, {"reason": [{"impact": 8035800, "message": "Wrong join strategy - RHS 105576; LHS 121383"}, {"impact": 3816722, "message": "Slow Hash Join: Excess time (over expected time) spent in the hash join"}, {"impact": 1904130.4444444478, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "33", "wall_clock_time": 37364443, "contribution_factor_str": "SQLOperator 33:HASH_JOIN_NODE"}, {"reason": [{"impact": 31174821, "message": "Slow Sorting: Excess time (over expected time) spent in the sort; might be caused by too many sorting column"}, {"impact": 1894590, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "10", "wall_clock_time": 32551921, "contribution_factor_str": "SQLOperator 10:SORT_NODE"}, {"reason": [{"impact": 26659473.75, "message": "Lack of scanner thread parallelism: Speed can be improved by that much if there's 8 scanner threads"}, {"impact": 20690100, "message": "Slow HDFS Scan: Predicates might be expensive (expectes speed 10m rows per sec per core)"}], "result_id": "25", "wall_clock_time": 30467970, "contribution_factor_str": "SQLOperator 25:HDFS_SCAN_NODE"}, {"reason": [{"impact": 7084883.444444444, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "11", "wall_clock_time": 28336314, "contribution_factor_str": "SQLOperator 11:ANALYTIC_EVAL_NODE"}, {"reason": [{"impact": 2135688.222222224, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "9", "wall_clock_time": 22614443, "contribution_factor_str": "SQLOperator 09:ANALYTIC_EVAL_NODE"}, {"reason": [{"impact": 1150084.666666668, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "22", "wall_clock_time": 22144125, "contribution_factor_str": "SQLOperator 22:ANALYTIC_EVAL_NODE"}, {"reason": [{"impact": 2047632, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "39", "wall_clock_time": 11957699, "contribution_factor_str": "SQLOperator 39:EXCHANGE_NODE"}, {"reason": [{"impact": 1332451, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "44", "wall_clock_time": 11506235, "contribution_factor_str": "SQLOperator 44:EXCHANGE_NODE"}, {"reason": [{"impact": 728588, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "50", "wall_clock_time": 10172630, "contribution_factor_str": "SQLOperator 50:EXCHANGE_NODE"}, {"reason": [{"impact": 3334413, "message": "Lack of scanner thread parallelism: Speed can be improved by that much if there's 8 scanner threads"}, {"impact": 1199000, "message": "Slow HDFS Scan: Predicates might be expensive (expectes speed 10m rows per sec per core)"}], "result_id": "0", "wall_clock_time": 3334413, "contribution_factor_str": "SQLOperator 00:HDFS_SCAN_NODE"}, {"reason": [], "result_id": "53", "wall_clock_time": 3082111, "contribution_factor_str": "SQLOperator 53:EXCHANGE_NODE"}, {"reason": [{"impact": 2594847, "message": "Lack of scanner thread parallelism: Speed can be improved by that much if there's 8 scanner threads"}, {"impact": 1199000, "message": "Slow HDFS Scan: Predicates might be expensive (expectes speed 10m rows per sec per core)"}], "result_id": "23", "wall_clock_time": 2594847, "contribution_factor_str": "SQLOperator 23:HDFS_SCAN_NODE"}, {"reason": [{"impact": 2452312, "message": "Lack of scanner thread parallelism: Speed can be improved by that much if there's 8 scanner threads"}, {"impact": 2198000, "message": "Slow HDFS Scan: Predicates might be expensive (expectes speed 10m rows per sec per core)"}], "result_id": "13", "wall_clock_time": 2452312, "contribution_factor_str": "SQLOperator 13:HDFS_SCAN_NODE"}, {"reason": [{"impact": 1706125, "message": "Lack of scanner thread parallelism: Speed can be improved by that much if there's 8 scanner threads"}, {"impact": 287883, "message": "This is the time waiting for HDFS NN RPC."}], "result_id": "16", "wall_clock_time": 1706125, "contribution_factor_str": "SQLOperator 16:HDFS_SCAN_NODE"}, {"reason": [{"impact": 1619889, "message": "Lack of scanner thread parallelism: Speed can be improved by that much if there's 8 scanner threads"}, {"impact": 601555, "message": "This is the time waiting for HDFS NN RPC."}], "result_id": "3", "wall_clock_time": 1619889, "contribution_factor_str": "SQLOperator 03:HDFS_SCAN_NODE"}, {"reason": [{"impact": 1385497, "message": "Lack of scanner thread parallelism: Speed can be improved by that much if there's 8 scanner threads"}, {"impact": 181359, "message": "This is the time waiting for HDFS NN RPC."}], "result_id": "26", "wall_clock_time": 1385497, "contribution_factor_str": "SQLOperator 26:HDFS_SCAN_NODE"}, {"reason": [{"impact": 559177.1111111111, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "43", "wall_clock_time": 1378341, "contribution_factor_str": "SQLOperator 43:EXCHANGE_NODE"}, {"reason": [{"impact": 362490.3333333334, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "38", "wall_clock_time": 1291643, "contribution_factor_str": "SQLOperator 38:EXCHANGE_NODE"}, {"reason": [{"impact": 265681, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "49", "wall_clock_time": 1177394, "contribution_factor_str": "SQLOperator 49:EXCHANGE_NODE"}, {"reason": [], "result_id": "-1", "wall_clock_time": 775849, "contribution_factor_str": "RowMaterializationTimer -1:N/A"}, {"reason": [{"impact": 235417.66666666666, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "48", "wall_clock_time": 279531, "contribution_factor_str": "SQLOperator 48:EXCHANGE_NODE"}, {"reason": [{"impact": 10539.11111111111, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "36", "wall_clock_time": 31603, "contribution_factor_str": "SQLOperator 36:EXCHANGE_NODE"}, {"reason": [{"impact": 8916.666666666668, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "47", "wall_clock_time": 29729, "contribution_factor_str": "SQLOperator 47:EXCHANGE_NODE"}, {"reason": [{"impact": 8002.1111111111095, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "41", "wall_clock_time": 29716, "contribution_factor_str": "SQLOperator 41:EXCHANGE_NODE"}, {"reason": [{"impact": 1725.1111111111113, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "42", "wall_clock_time": 16432, "contribution_factor_str": "SQLOperator 42:EXCHANGE_NODE"}, {"reason": [{"impact": 791.1111111111113, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "37", "wall_clock_time": 14808, "contribution_factor_str": "SQLOperator 37:EXCHANGE_NODE"}, {"reason": [{"impact": 1111759224.8888888, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "F04 0", "wall_clock_time": 0, "contribution_factor_str": "SQLOperator F04 0:BlockMgr"}, {"reason": [], "result_id": "F15 0", "wall_clock_time": 0, "contribution_factor_str": "SQLOperator F15 0:BlockMgr"}, {"reason": [{"impact": 2305122.555555556, "message": "TotalTime Skew: The skew (max-avg) contributed this amount of time to this SQL operator"}], "result_id": "35", "wall_clock_time": -5250104, "contribution_factor_str": "SQLOperator 35:SORT_NODE"}], "rule": {"message": "Top contributing factors and its reasons", "prio": 1, "label": "Top Down Analysis"}, "template": "alan-tpl"}])
   assert_true(json.dumps(result) == test)