def pickJoinOrder(self, plan): relations = plan.relations() fieldDict = self.obtainFieldDict(plan) (joinTablesDict, selectTablesDict) = self.getExprDicts(plan, fieldDict) # makes dicts that maps a list of relations to exprs involving that list # then in system R we will build opt(A,B) Join C using join exprs involving A,C and B,C # and on top of it the select exprs that involve 2 tables A,C or B,C isGroupBy = True if plan.root.operatorType() == "GroupBy" else False outputSchema = plan.schema() self.reportPlanCount = 0 worklist = [] for r in relations: table = TableScan(r,self.db.relationSchema(r)) table.prepare(self.db) if (r,) in selectTablesDict: selectExprs = selectTablesDict[(r,)] selectString = self.combineSelects(selectExprs) select = Select(table,selectString) select.prepare(self.db) worklist.append(Plan(root=select)) else: worklist.append(Plan(root=table)) while(len(worklist) > 1): combos = itertools.combinations(worklist,2) bestJoin = None sourcePair = None for pair in combos: op1 = pair[0].root op2 = pair[1].root selectExpr = self.createExpression(pair[0].relations(), pair[1].relations(), selectTablesDict) joinExpr = self.createExpression(pair[0].relations(), pair[1].relations(), joinTablesDict) join1BnljOp = Join(op1, op2, expr=joinExpr, method="block-nested-loops" ) join2BnljOp = Join(op2, op1, expr=joinExpr, method="block-nested-loops" ) join1NljOp = Join(op1, op2, expr=joinExpr, method="nested-loops" ) join2NljOp = Join(op2, op1, expr=joinExpr, method="nested-loops" ) if selectExpr == "True": full1BnljOp = join1BnljOp full2BnljOp = join2BnljOp full1NljOp = join1NljOp full2NljOp = join2NljOp else: full1BnljOp = Select(join1BnljOp, selectExpr) full2BnljOp = Select(join2BnljOp, selectExpr) full1NljOp = Select(join1NljOp, selectExpr) full2NljOp = Select(join2NljOp, selectExpr) joinList = [full1BnljOp, full2BnljOp, full1NljOp, full2NljOp] for j in joinList: joinplan = Plan(root=j) joinplan.prepare(self.db) joinplan.sample(100) if bestJoin == None or joinplan.cost(True) < bestJoin.cost(True): bestJoin = joinplan sourcePair = pair self.reportPlanCount += 4 self.clearSampleFiles() worklist.remove(sourcePair[0]) worklist.remove(sourcePair[1]) worklist.append(bestJoin) # after System R algorithm newPlan = worklist[0] if isGroupBy: newGroupBy = GroupBy(newPlan.root, groupSchema=plan.root.groupSchema, \ aggSchema=plan.root.aggSchema, groupExpr=plan.root.groupExpr, \ aggExprs=plan.root.aggExprs, \ groupHashFn=plan.root.groupHashFn) newGroupBy.prepare(self.db) newPlan = Plan(root=newGroupBy) if set(outputSchema.schema()) != set(newPlan.schema().schema()): projectDict = {} for f, t in outputSchema.schema(): projectDict[f] = (f, t) currRoot = newPlan.root project = Project(currRoot, projectDict) project.prepare(self.db) newPlan = Plan(root=project) return newPlan
def pickJoinOrder(self, plan): relations = plan.relations() fieldDict = self.obtainFieldDict(plan) (joinTablesDict, selectTablesDict) = self.getExprDicts(plan, fieldDict) # makes dicts that maps a list of relations to exprs involving that list # then in system R we will build opt(A,B) Join C using join exprs involving A,C and B,C # and on top of it the select exprs that involve 2 tables A,C or B,C isGroupBy = True if plan.root.operatorType() == "GroupBy" else False outputSchema = plan.schema() self.reportPlanCount = 0 worklist = [] for r in relations: table = TableScan(r, self.db.relationSchema(r)) table.prepare(self.db) if (r, ) in selectTablesDict: selectExprs = selectTablesDict[(r, )] selectString = self.combineSelects(selectExprs) select = Select(table, selectString) select.prepare(self.db) worklist.append(Plan(root=select)) else: worklist.append(Plan(root=table)) while (len(worklist) > 1): combos = itertools.combinations(worklist, 2) bestJoin = None sourcePair = None for pair in combos: op1 = pair[0].root op2 = pair[1].root selectExpr = self.createExpression(pair[0].relations(), pair[1].relations(), selectTablesDict) joinExpr = self.createExpression(pair[0].relations(), pair[1].relations(), joinTablesDict) join1BnljOp = Join(op1, op2, expr=joinExpr, method="block-nested-loops") join2BnljOp = Join(op2, op1, expr=joinExpr, method="block-nested-loops") join1NljOp = Join(op1, op2, expr=joinExpr, method="nested-loops") join2NljOp = Join(op2, op1, expr=joinExpr, method="nested-loops") if selectExpr == "True": full1BnljOp = join1BnljOp full2BnljOp = join2BnljOp full1NljOp = join1NljOp full2NljOp = join2NljOp else: full1BnljOp = Select(join1BnljOp, selectExpr) full2BnljOp = Select(join2BnljOp, selectExpr) full1NljOp = Select(join1NljOp, selectExpr) full2NljOp = Select(join2NljOp, selectExpr) joinList = [full1BnljOp, full2BnljOp, full1NljOp, full2NljOp] for j in joinList: joinplan = Plan(root=j) joinplan.prepare(self.db) joinplan.sample(100) if bestJoin == None or joinplan.cost(True) < bestJoin.cost( True): bestJoin = joinplan sourcePair = pair self.reportPlanCount += 4 self.clearSampleFiles() worklist.remove(sourcePair[0]) worklist.remove(sourcePair[1]) worklist.append(bestJoin) # after System R algorithm newPlan = worklist[0] if isGroupBy: newGroupBy = GroupBy(newPlan.root, groupSchema=plan.root.groupSchema, \ aggSchema=plan.root.aggSchema, groupExpr=plan.root.groupExpr, \ aggExprs=plan.root.aggExprs, \ groupHashFn=plan.root.groupHashFn) newGroupBy.prepare(self.db) newPlan = Plan(root=newGroupBy) if set(outputSchema.schema()) != set(newPlan.schema().schema()): projectDict = {} for f, t in outputSchema.schema(): projectDict[f] = (f, t) currRoot = newPlan.root project = Project(currRoot, projectDict) project.prepare(self.db) newPlan = Plan(root=project) return newPlan
def pickJoinOrder(self, plan): relations = plan.relations() fieldDict = self.obtainFieldDict(plan) (joinTablesDict, selectTablesDict) = self.getExprDicts(plan, fieldDict) # makes dicts that maps a list of relations to exprs involving that list # then in system R we will build opt(A,B) Join C using join exprs involving A,C and B,C # and on top of it the select exprs that involve 2 tables A,C or B,C isGroupBy = True if plan.root.operatorType() == "GroupBy" else False outputSchema = plan.schema() optDict = {} self.reportPlanCount = 0 for npass in range(1, len(relations) + 1): if npass == 1: for r in relations: table = TableScan(r,self.db.relationSchema(r)) if (r,) in selectTablesDict: selectExprs = selectTablesDict[(r,)] selectString = self.combineSelects(selectExprs) select = Select(table,selectString) optDict[(r,)] = Plan(root=select) else: optDict[(r,)] = Plan(root=table) self.reportPlanCount += 1 else: combinations = itertools.combinations(relations,npass) for c in combinations: fullList = sorted(c) clist = self.getCombos(fullList) bestJoin = None for subcombo in clist: complement = self.getComplement(fullList, subcombo) leftOps = optDict[tuple(complement)].root rightOps = optDict[tuple(subcombo)].root selectExpr = self.createExpression(complement, subcombo, selectTablesDict) joinExpr = self.createExpression(complement, subcombo, joinTablesDict) joinBnljOp = Join(leftOps, rightOps, expr=joinExpr, method="block-nested-loops" ) fullBnljOp = Select(joinBnljOp, selectExpr) if selectExpr == "True": joinBnlj = Plan(root=joinBnljOp) else: joinBnlj = Plan(root=fullBnljOp) joinBnlj.prepare(self.db) joinBnlj.sample(100) joinNljOp = Join(leftOps, rightOps, expr=joinExpr, method="nested-loops" ) fullNljOp = Select(joinNljOp, selectExpr) if selectExpr == "True": joinNlj = Plan(root=joinNljOp) else: joinNlj = Plan(root=fullNljOp) joinNlj.prepare(self.db) joinNlj.sample(100) if joinBnlj.cost(True) < joinNlj.cost(True): if bestJoin == None or joinBnlj.cost(True) < bestJoin.cost(True): bestJoin = joinBnlj else: if bestJoin == None or joinNlj.cost(True) < bestJoin.cost(True): bestJoin = joinNlj self.reportPlanCount += 2 self.clearSampleFiles() optDict[tuple(fullList)] = bestJoin # after System R algorithm newPlan = optDict[tuple(sorted(relations))] if isGroupBy: newGroupBy = GroupBy(newPlan.root, groupSchema=plan.root.groupSchema, \ aggSchema=plan.root.aggSchema, groupExpr=plan.root.groupExpr, \ aggExprs=plan.root.aggExprs, \ groupHashFn=plan.root.groupHashFn) newGroupBy.prepare(self.db) newPlan = Plan(root=newGroupBy) if set(outputSchema.schema()) != set(newPlan.schema().schema()): projectDict = {} for f, t in outputSchema.schema(): projectDict[f] = (f, t) currRoot = newPlan.root project = Project(currRoot, projectDict) project.prepare(self.db) newPlan = Plan(root=project) return newPlan
def pickJoinOrder(self, plan): relations = plan.relations() fieldDict = self.obtainFieldDict(plan) (joinTablesDict, selectTablesDict) = self.getExprDicts(plan, fieldDict) # makes dicts that maps a list of relations to exprs involving that list # then in system R we will build opt(A,B) Join C using join exprs involving A,C and B,C # and on top of it the select exprs that involve 2 tables A,C or B,C isGroupBy = True if plan.root.operatorType() == "GroupBy" else False outputSchema = plan.schema() optDict = {} self.reportPlanCount = 0 for npass in range(1, len(relations) + 1): if npass == 1: for r in relations: table = TableScan(r, self.db.relationSchema(r)) if (r, ) in selectTablesDict: selectExprs = selectTablesDict[(r, )] selectString = self.combineSelects(selectExprs) select = Select(table, selectString) optDict[(r, )] = Plan(root=select) else: optDict[(r, )] = Plan(root=table) self.reportPlanCount += 1 else: combinations = itertools.combinations(relations, npass) for c in combinations: fullList = sorted(c) clist = self.getCombos(fullList) bestJoin = None for subcombo in clist: complement = self.getComplement(fullList, subcombo) leftOps = optDict[tuple(complement)].root rightOps = optDict[tuple(subcombo)].root selectExpr = self.createExpression( complement, subcombo, selectTablesDict) joinExpr = self.createExpression( complement, subcombo, joinTablesDict) joinBnljOp = Join(leftOps, rightOps, expr=joinExpr, method="block-nested-loops") fullBnljOp = Select(joinBnljOp, selectExpr) if selectExpr == "True": joinBnlj = Plan(root=joinBnljOp) else: joinBnlj = Plan(root=fullBnljOp) joinBnlj.prepare(self.db) joinBnlj.sample(100) joinNljOp = Join(leftOps, rightOps, expr=joinExpr, method="nested-loops") fullNljOp = Select(joinNljOp, selectExpr) if selectExpr == "True": joinNlj = Plan(root=joinNljOp) else: joinNlj = Plan(root=fullNljOp) joinNlj.prepare(self.db) joinNlj.sample(100) if joinBnlj.cost(True) < joinNlj.cost(True): if bestJoin == None or joinBnlj.cost( True) < bestJoin.cost(True): bestJoin = joinBnlj else: if bestJoin == None or joinNlj.cost( True) < bestJoin.cost(True): bestJoin = joinNlj self.reportPlanCount += 2 self.clearSampleFiles() optDict[tuple(fullList)] = bestJoin # after System R algorithm newPlan = optDict[tuple(sorted(relations))] if isGroupBy: newGroupBy = GroupBy(newPlan.root, groupSchema=plan.root.groupSchema, \ aggSchema=plan.root.aggSchema, groupExpr=plan.root.groupExpr, \ aggExprs=plan.root.aggExprs, \ groupHashFn=plan.root.groupHashFn) newGroupBy.prepare(self.db) newPlan = Plan(root=newGroupBy) if set(outputSchema.schema()) != set(newPlan.schema().schema()): projectDict = {} for f, t in outputSchema.schema(): projectDict[f] = (f, t) currRoot = newPlan.root project = Project(currRoot, projectDict) project.prepare(self.db) newPlan = Plan(root=project) return newPlan