Ejemplo n.º 1
0
    def evaluate(self, profile, plan_node_id):
        """
        Determine if the join exploded the number of rows
        this format:
        {
            "impact": the amount of slow down (in ns),
            "message" : the displayed "explanation" string
        }
        :return:
        """
        self.metric_names = ["Hosts", "Broadcast", "BuildRows", "ProbeRows"]

        hosts = models.query_node_by_id(profile, plan_node_id, "Hosts",
                                        True)[0][0]
        probeRows = models.query_node_by_id(profile, plan_node_id, "ProbeRows",
                                            True)[0][0]
        probeTime = models.query_node_by_id(profile, plan_node_id, "ProbeTime",
                                            True)[0][0]
        rowsReturned = models.query_node_by_id(profile, plan_node_id,
                                               "RowsReturned", True)[0][0]

        impact = 0
        if (rowsReturned > 0):
            impact = probeTime * (rowsReturned - probeRows) / rowsReturned
        return {
            "impact":
            impact,
            "message":
            "Exploding join: %d input rows are exploded to %d output rows" %
            (probeRows, rowsReturned)
        }
Ejemplo n.º 2
0
    def evaluate(self, profile, plan_node_id):
        """
        Determine if the join order/strategy is correct and evaluate the impact of this cause
        to the query. The return is a json string with
        this format:
        {
            "impact": the amount of slow down (in ns),
            "message" : the displayed "explanation" string
        }
        :return:
        """
        self.metric_names = ["Hosts", "Broadcast", "BuildRows", "ProbeRows"]

        hosts = models.query_node_by_id(profile, plan_node_id, "Hosts",
                                        True)[0][0]
        isBroadcast = models.query_node_by_id(profile, plan_node_id,
                                              "Broadcast", True)[0][0]
        buildRows = models.query_node_by_id(profile, plan_node_id, "BuildRows",
                                            True)[0][0]
        probeRows = models.query_node_by_id(profile, plan_node_id, "ProbeRows",
                                            True)[0][0]

        rhsRows = 0
        lhsRows = 0
        networkcost = 0
        if (isBroadcast == 1):
            networkcost = buildRows * hosts
            rhsRows = buildRows
            lhsRows = probeRows * hosts
        else:
            networkcost = (buildRows + probeRows) * hosts
            rhsRows = buildRows * hosts
            lhsRows = probeRows * hosts

        impact = (rhsRows - lhsRows * 1.5) / hosts / 0.01
        if (impact > 0):
            return {
                "impact": impact,
                "message":
                "Wrong join order - RHS %d; LHS %d" % (rhsRows, lhsRows)
            }

        bcost = rhsRows * hosts
        scost = lhsRows + rhsRows
        impact = (networkcost - min(bcost, scost) - 1) / hosts / 0.01
        return {
            "impact": impact,
            "message":
            "Wrong join strategy - RHS %d; LHS %d" % (rhsRows, lhsRows)
        }
Ejemplo n.º 3
0
 def evaluate(self, profile, plan_node_id):
     """
     Determine the impact of NN RPC latency
     this format:
     {
         "impact": the amount of slow down (in ns),
         "message" : the displayed "explanation" string
     }
     :return:
     """
     totalStorageTime = models.query_avg_fragment_metric_by_node_nid(profile, plan_node_id, "TotalStorageWaitTime")
     hdfsRawReadTime = models.query_node_by_id(profile, plan_node_id, "TotalRawHdfsReadTime(*)", True)[0][0]
     avgReadThreads = models.query_node_by_id(profile, plan_node_id, "AverageHdfsReadThreadConcurrency", True)[0][0]
     avgReadThreads = max(1, to_double(avgReadThreads))
     impact = max(0, (totalStorageTime - hdfsRawReadTime) / avgReadThreads)
     return {
         "impact": impact,
         "message": "This is the time waiting for HDFS NN RPC."
     }
Ejemplo n.º 4
0
    def evaluate(self, profile, plan_node_id):
        """
        Evaluate the impact of this cause to the query. The return is a json string with
        this format:
        {
            "impact": the amount of slow down (in ns),
            "message" : the displayed "explanation" string
        }
        :return:
        """
        impact = -1
        expr_data = ''
        if len(self.exprs):
            assert len(self.metric_names) == 1

            # metric_names can have multiple values create a dict for all of
            # them
            db_result = models.query_node_by_id(profile, plan_node_id,
                                                self.metric_names[0])
            for k, g in groupby(db_result, lambda x: x.fid):
                grouped = list(g)
                # A list of pairs, with aggregated value and index at value for
                # max / min like exprs
                converted_exprs = self.check_exprs(grouped)
                expr_vars = {
                    "vars":
                    dict(zip(self.exprs, map(lambda x: x[0],
                                             converted_exprs))),
                    "idxs":
                    dict(zip(self.exprs, map(lambda x: x[1],
                                             converted_exprs))),
                }

                expr_val = exprs.Expr.evaluate(self.rule["expr"], expr_vars)
                if (impact is None or impact < expr_val):
                    impact = expr_val
        else:
            # For each of the metrics get the result
            with Timer() as t:
                # Get the metric values from the db grouped by metric name
                db_result = [
                    models.query_node_by_id(profile, plan_node_id, m)
                    for m in self.metric_names
                ]
                # Assuming that for all metric names the same number of rows have been returned transpose the array
                all_metrics = zip(*db_result)

            for row in all_metrics:
                # Convert to double values if unit is 6(double)
                metric_values = map(
                    lambda x: x.value
                    if x.unit != 6 else to_double(x.value), row)

                surrogate_node = row[0].node
                local_vars = {
                    "vars": dict(zip(self.metric_names, metric_values))
                }
                local_vars["vars"]["IOBound"] = self.isStorageBound(
                    surrogate_node)
                local_vars["vars"]['InputRows'] = self.getNumInputRows(
                    surrogate_node)
                condition = True
                if ("condition" in self.rule):
                    condition = exprs.Expr.evaluate(self.rule["condition"],
                                                    local_vars)
                if (condition):
                    expr_val = exprs.Expr.evaluate(self.rule["expr"],
                                                   local_vars)
                    if (impact is None or impact < expr_val):
                        impact = expr_val

            if self.kwargs.get('info_names'):
                db_result = [
                    models.query_element_by_info(profile, plan_node_id, m)
                    for m in self.kwargs['info_names']
                ]
                all_metrics = zip(*db_result)
                for row in all_metrics:
                    metric_values = map(lambda x: x.value, row)
                    local_vars['vars'].update(
                        dict(zip(self.kwargs['info_names'], metric_values)))
                    expr_data = exprs.Expr.evaluate(self.kwargs['fix']['data'],
                                                    local_vars)

        msg = self.rule["label"] + ": " + self.rule["message"]
        return {"impact": impact, "message": msg, "data": expr_data}