timeWrongJoinImpls += operator['avg_time'] elif isWrongJoinLeftRight(operator, leftChild, rightChild): numWrongJoinLeftRights += 1 timeWrongJoinLeftRights += operator['avg_time'] else: numCorrectJoins += 1 timeCorrectJoins += operator['avg_time'] plots.stacked_bar( [ numCorrectJoins, numWrongJoinImpls, numWrongJoinLeftRights, ], 'Number of Joins', [ 'Correct Join %s' % numCorrectJoins, 'Wrong Join Impl %s' % numWrongJoinImpls, 'Wrong Join Left Right %s' % numWrongJoinLeftRights, ], 'Join Correctness', 'stacked_num_join_correctness.png' ) plots.stacked_bar( [ timeCorrectJoins / 1000000, timeWrongJoinImpls / 1000000, timeWrongJoinLeftRights / 1000000, ], 'Time of Joins',
"_id": "Fragment Start", "sum_time": query["fragment_start_time"], "time_pct": query["fragment_start_time"] / sumTimeAllOperators, } ) operators.append({"_id": "CodeGen", "sum_time": codeGenTime, "time_pct": codeGenTime / sumTimeAllOperators}) operators.append( {"_id": "HdfsTableSink", "sum_time": hdfsTableSinkTime, "time_pct": hdfsTableSinkTime / sumTimeAllOperators} ) if not args.summary: operators.sort(key=lambda operator: operator["sum_time"], reverse=True) plots.stacked_bar( [operator["sum_time"] / 1000000 for operator in operators], "Time", ["%s %sms" % (operator["_id"], operator["sum_time"] / 1000000) for operator in operators], "Operator Sum Time (ms)", "%s_stacked_time.png" % query["_id"], ) for operator in operators: if operator["_id"] not in timePctPerOperator: timePctPerOperator[operator["_id"]] = [] timePctPerOperator[operator["_id"]].append(operator["time_pct"]) if operator["_id"] not in sumTimePerOperator: sumTimePerOperator[operator["_id"]] = [] sumTimePerOperator[operator["_id"]].append(operator["sum_time"]) numJoins.append(db.operators.find({"query_id": query["_id"], "name": {"$in": ["HASH JOIN", "CROSS JOIN"]}}).count())