def pushdownProject(self, op): # First pushdown operators below: op.subPlan = self.pushdownOperator(op.subPlan) if op.subPlan.operatorType() in ["GroupBy", "TableScan"]: return op elif op.subPlan.operatorType() == "Project": # Attempt to remove redundant projections: bools = [op.subPlan.projectExprs[key][0].isAttribute() for key in op.projectExprs] if False not in bools: op.subPlan = op.subPlan.subPlan return self.pushdownOperator(op) elif op.subPlan.operatorType() == "Select": # Move op below its subplan if op provides all attributes needed for the selectExpr selectAttrs = ExpressionInfo(op.subPlan.selectExpr).getAttributes() outputAttrs = set(op.projectExprs.keys()) result = op if selectAttrs.issubset(outputAttrs): result = op.subPlan op.subPlan = result.subPlan result.subPlan = self.pushdownOperator(op) return result elif op.subPlan.operatorType() == "Sort": return op elif op.subPlan.operatorType() == "UnionAll": # Place a copy of op on each side of the union result = op.subPlan result.lhsPlan = self.pushdownOperator(Project(result.lhsPlan, op.projectExprs)) result.rhsPlan = self.pushdownOperator(Project(result.rhsPlan, op.projectExprs)) return result elif "Join" in op.subPlan.operatorType(): # Partition the projections among the input relations, as much as possible lhsAttrs = set(op.subPlan.lhsPlan.schema().fields) rhsAttrs = set(op.subPlan.rhsPlan.schema().fields) lhsProjectExprs = {} rhsProjectExprs = {} remainingProjectExprs = False for attr in op.projectExprs: requiredAttrs = ExpressionInfo(op.projectExprs[attr][0]).getAttributes() if requiredAttrs.issubset(lhsAttrs): lhsProjectExprs[attr] = op.projectExprs[attr] elif requiredAttrs.issubset(rhsAttrs): rhsProjectExprs[attr] = op.projectExprs[attr] else: remainingProjectExprs = True if lhsProjectExprs: op.subPlan.lhsPlan = self.pushdownOperator(Project(op.subPlan.lhsPlan, lhsProjectExprs)) if rhsProjectExprs: op.subPlan.rhsPlan = self.pushdownOperator(Project(op.subPlan.rhsPlan, rhsProjectExprs)) result = op # Remove op from the tree if there are no remaining project expressions, and each side of the join recieved a projection if not remainingProjectExprs and lhsProjectExprs and rhsProjectExprs: result = op.subPlan return result else: print("Unmatched operatorType in pushdownOperator(): " + op.operatorType()) raise NotImplementedError
def pushdownSelect(self, op): # First pushdown operators below: op.subPlan = self.pushdownOperator(op.subPlan) if op.subPlan.operatorType() in ["GroupBy", "TableScan", "Project"]: return op elif op.subPlan.operatorType() == "Select": # Reorder two selects based on 'score' useEstimated = True opScore = (1 - op.selectivity(useEstimated)) / op.tupleCost childScore = (1 - op.subPlan.selectivity(useEstimated)) / op.tupleCost result = op if childScore > opScore: result = op.subPlan op.subPlan = result.subPlan result.subPlan = self.pushdownOperator(op) return result elif op.subPlan.operatorType() == "Sort": # Always move a select below a sort result = op.subPlan op.subPlan = result.subPlan result.subPlan = self.pushdownOperator(op) return result elif op.subPlan.operatorType() == "UnionAll": # Place a copy of op on each side of the union result = op.subPlan result.lhsPlan = self.pushdownOperator(Select(result.lhsPlan, op.selectExpr)) result.rhsPlan = self.pushdownOperator(Select(result.rhsPlan, op.selectExpr)) return result elif "Join" in op.subPlan.operatorType(): # Partition the select expr as much as possible exprs = ExpressionInfo(op.selectExpr).decomposeCNF() lhsExprs = [] rhsExprs = [] remainingExprs = [] lhsAttrs = set(op.subPlan.lhsPlan.schema().fields) rhsAttrs = set(op.subPlan.rhsPlan.schema().fields) for e in exprs: attrs = ExpressionInfo(e).getAttributes() if attrs.issubset(lhsAttrs): lhsExprs.append(e) elif attrs.issubset(rhsAttrs): rhsExprs.append(e) else: remainingExprs.append(e) if lhsExprs: newLhsExpr = ' and '.join(lhsExprs) lhsSelect = Select(op.subPlan.lhsPlan, newLhsExpr) op.subPlan.lhsPlan = self.pushdownOperator(lhsSelect) if rhsExprs: newRhsExpr = ' and '.join(rhsExprs) rhsSelect = Select(op.subPlan.rhsPlan, newRhsExpr) op.subPlan.rhsPlan = self.pushdownOperator(rhsSelect) result = None if remainingExprs: newExpr = ' and '.join(remainingExprs) result = Select(op.subPlan, newExpr) else: result = op.subPlan return result else: print("Unmatched operatorType in pushdownOperator(): " + op.operatorType()) raise NotImplementedError
def pushdownSelect(self, op): # First pushdown operators below: op.subPlan = self.pushdownOperator(op.subPlan) if op.subPlan.operatorType() in ["GroupBy", "TableScan", "Project"]: return op elif op.subPlan.operatorType() == "Select": # Reorder two selects based on 'score' #useEstimated = True #opScore = (1.0 - op.selectivity(useEstimated)) / op.tupleCost #childScore = (1.0 - op.subPlan.selectivity(useEstimated)) / op.tupleCost result = op #if childScore > myScore: result = op.subPlan op.subPlan = result.subPlan result.subPlan = self.pushdownOperator(op) return result elif op.subPlan.operatorType() == "Sort": # Always move a select below a sort result = op.subPlan op.subPlan = result.subPlan result.subPlan = self.pushdownOperator(op) return result elif op.subPlan.operatorType() == "UnionAll": # Place a copy of op on each side of the union result = op.subPlan result.lhsPlan = self.pushdownOperator( Select(result.lhsPlan, op.selectExpr)) result.rhsPlan = self.pushdownOperator( Select(result.rhsPlan, op.selectExpr)) return result elif "Join" in op.subPlan.operatorType(): # Partition the select expr as much as possible exprs = ExpressionInfo(op.selectExpr).decomposeCNF() lhsExprs = [] rhsExprs = [] remainingExprs = [] lhsAttrs = set(op.subPlan.lhsPlan.schema().fields) rhsAttrs = set(op.subPlan.rhsPlan.schema().fields) for e in exprs: attrs = ExpressionInfo(e).getAttributes() if attrs.issubset(lhsAttrs): lhsExprs.append(e) elif attrs.issubset(rhsAttrs): rhsExprs.append(e) else: remainingExprs.append(e) if lhsExprs: newLhsExpr = ' and '.join(lhsExprs) lhsSelect = Select(op.subPlan.lhsPlan, newLhsExpr) op.subPlan.lhsPlan = self.pushdownOperator(lhsSelect) if rhsExprs: newRhsExpr = ' and '.join(rhsExprs) rhsSelect = Select(op.subPlan.rhsPlan, newRhsExpr) op.subPlan.rhsPlan = self.pushdownOperator(rhsSelect) result = None if remainingExprs: newExpr = ' and '.join(remainingExprs) result = Select(op.subPlan, newExpr) else: result = op.subPlan return result else: print("Unmatched operatorType in pushdownOperator(): " + op.operatorType()) raise NotImplementedError
def pushdownProject(self, op): # First pushdown operators below: op.subPlan = self.pushdownOperator(op.subPlan) if op.subPlan.operatorType() in ["GroupBy", "TableScan"]: return op elif op.subPlan.operatorType() == "Project": # Attempt to remove redundant projections: bools = [ op.subPlan.projectExprs[key][0].isAttribute() for key in op.projectExprs ] if False not in bools: op.subPlan = op.subPlan.subPlan return self.pushdownOperator(op) elif op.subPlan.operatorType() == "Select": # Move op below its subplan if op provides all attributes needed for the selectExpr selectAttrs = ExpressionInfo(op.subPlan.selectExpr).getAttributes() outputAttrs = set(op.projectExprs.keys()) result = op if selectAttrs.issubset(outputAttrs): result = op.subPlan op.subPlan = result.subPlan result.subPlan = self.pushdownOperator(op) return result elif op.subPlan.operatorType() == "Sort": # TODO return op elif op.subPlan.operatorType() == "UnionAll": # Place a copy of op on each side of the union result = op.subPlan result.lhsPlan = self.pushdownOperator( Project(result.lhsPlan, op.projectExprs)) result.rhsPlan = self.pushdownOperator( Project(result.rhsPlan, op.projectExprs)) return result elif "Join" in op.subPlan.operatorType(): # Partition the projections among the input relations, as much as possible lhsAttrs = set(op.subPlan.lhsPlan.schema().fields) rhsAttrs = set(op.subPlan.rhsPlan.schema().fields) lhsProjectExprs = {} rhsProjectExprs = {} remainingProjectExprs = False for attr in op.projectExprs: requiredAttrs = ExpressionInfo( op.projectExprs[attr][0]).getAttributes() if requiredAttrs.issubset(lhsAttrs): lhsProjectExprs[attr] = op.projectExprs[attr] elif requiredAttrs.issubset(rhsAttrs): rhsProjectExprs[attr] = op.projectExprs[attr] else: remainingProjectExprs = True if lhsProjectExprs: op.subPlan.lhsPlan = self.pushdownOperator( Project(op.subPlan.lhsPlan, lhsProjectExprs)) if rhsProjectExprs: op.subPlan.rhsPlan = self.pushdownOperator( Project(op.subPlan.rhsPlan, rhsProjectExprs)) result = op # Remove op from the tree if there are no remaining project expressions, and each side of the join recieved a projection if not remainingProjectExprs and lhsProjectExprs and rhsProjectExprs: result = op.subPlan return result else: print("Unmatched operatorType in pushdownOperator(): " + op.operatorType()) raise NotImplementedError
def pushdownHelper(self, operator): #first determine operator type opertorType = operator.operatorType() #first check if valid operatorType if operatorType != "Project" and operatorType != "Select" and operatorType != "GroupBy" and operatorType != "Sort" and operatorType != "UnionAll" and operatorType[-4:] != "Join": return operator elif operatorType == "Project": operator.subPlan = self.pushdownHelper(operator.subPlan) subplanType = operator.subPlan.operatorType() #call second helper if subplanType == "Select": ''' Check keys - if not in keys, cannot pushdown anymore ''' for select in ExpressionInfo(operator.subPlan.selectExpr).getAttributes(): keys = operator.projectExprs.keys() if select not in keys: return operator operator.subPlan = operator.subPlan.subPlan operator.subPlan.subPlan = self.pushdownHelper(operator) elif subplanType[-4:] == "Join": items = operator.projectExprs.items() right = operator.subPlan.rhsPlan.schema().fields rightProject = {} left = operator.subPlan.lhsPlan.schema().fields leftProject = {} for (attribute, (expr, rand)) in items: pros = ExpressionInfo(expr)getAttributes() result = True #left for e in pros: if e not in left: result = False # if True if result: leftProject[attribute] = operator.projectExprs[attribute] continue #repeat with right now result = True for e in pros: if e not in right: result = False if result: rightProject[attribute] = operator.projectExprs[attribute] #end for #if left dictionary not empty #remember empty dic evaluates to false if leftProject: lPlan = operator.subPlan.lhsPlan operator.subPlan.lhsPlan = self.pushdownHelper(Project(lPlan, leftProject)) if rightProject: rPlan = operator.subPlan.rhsPlan operator.subPlan.rhsPlan = self.pushdownHelper(Project(rPlan, rightProject)) #length check - must be same size iIOT pushdown fullSize = len(operator.projectExprs) rightSize = len(rightProject) leftSize = len(leftProject) if fullSize != (rightSize + leftSize): return operator #end subPlan "Join" elif subplanType == "UnionAll": tempLeft = Project(operator.subPlan.lhsPlan) tempRight = Project(operator.subPlan.rhsPlan) operator.subPlan.lhsPlan = self.pushdownHelper(tempLeft, operator.projectExprs) operator.subPlan.rhsPlan = self.pushdownHelper(tempRight, operator.projectExprs) #else not Join or Union else: return operator return operator.subPlan #end "Project" #safety check above, so operatorType must be "Select" elif operatorType == "Select": #first part same as with "Project": subPlan pushdown operator.subPlan = self.pushdownHelper(operator.subPlan) subplanType = operator.subPlan.operatorType() if subplanType == "Sort" or "sort": operator.subPlan = operator.subPlan.subPlan operator.subPlan.subPlan = self.pushdownHelper(operator) elif subplanType[-4:] == "Join": selectExpress = ExpressionInfo(operator.selectExpr).decomposeCNF() left = operator.subPlan.lhsPlan.schema().fields right = operator.subPlan.rhsPlan.schema().fields leftExpress = [] leftAttributes = set(operator.subPlan.lhsPlan.schema().fields) rightAttributes = set(operator.subPlan.rhsPlan.schema().fields) rightExpress = [] unpushedExpress = [] for expr in selectExpress: select = ExpressionInfo(selectExpr).getAttributes() if select.issubset(leftAttributes): left.append(select) elif select.issubset(rightAttributes): right.append(select) else: unpushedExpress.append(select) if leftExpress: newExpression = ' and '.join(leftExpress) #lSelect op.subPlan.lhsPlan = self.pushdownHelper(Select(operator.subPlan.lhsPlan, newExpression)) if rightExpress: newExpression = ' and '.join(rightExpress) op.subPlan.rhsPlan = self.pushdownHelper(Select(operator.subPlan.rhsPlan, newExpression)) if unpushedExpress: return Select(operator.subPlan, ' and '.join(unpushedExpress)) else: return operator return operator.subPlan elif operatorType == "UnionAll" or operatorType[-4:] == "Join": operator.lhsPlan = self.pushdownHelper(operator.lhsPlan) operator.rhsPlan = self.pushdownHelper(operator.rhsPlan) return operator elif operatorType == "GroupBy" or operatorType == "Sort": operator.subPlan = self.pushdownHelper(operator.subPlan) return operator