コード例 #1
0
ファイル: Optimizer.py プロジェクト: elanas/DB_HW3
  def pickJoinOrder(self, plan):
    relations = plan.relations()
    fieldDict = self.obtainFieldDict(plan)
    (joinTablesDict, selectTablesDict) = self.getExprDicts(plan, fieldDict)
    # makes dicts that maps a list of relations to exprs involving that list
    # then in system R we will build opt(A,B) Join C using join exprs involving A,C and B,C
    # and on top of it the select exprs that involve 2 tables A,C or B,C

    isGroupBy = True if plan.root.operatorType() == "GroupBy" else False
    outputSchema = plan.schema() 
    self.reportPlanCount = 0

    worklist = []
    for r in relations:
      table = TableScan(r,self.db.relationSchema(r))
      table.prepare(self.db)
      if (r,) in selectTablesDict: 
        selectExprs = selectTablesDict[(r,)]
        selectString = self.combineSelects(selectExprs)
        select = Select(table,selectString)
        select.prepare(self.db)
        worklist.append(Plan(root=select))
      else:
        worklist.append(Plan(root=table))

    while(len(worklist) > 1):
      combos = itertools.combinations(worklist,2)
      bestJoin = None
      sourcePair = None

      for pair in combos:
        op1 = pair[0].root
        op2 = pair[1].root

        selectExpr = self.createExpression(pair[0].relations(), pair[1].relations(), selectTablesDict)
        joinExpr = self.createExpression(pair[0].relations(), pair[1].relations(), joinTablesDict)
        
        join1BnljOp = Join(op1, op2, expr=joinExpr, method="block-nested-loops" )
        join2BnljOp = Join(op2, op1, expr=joinExpr, method="block-nested-loops" )


        join1NljOp = Join(op1, op2, expr=joinExpr, method="nested-loops" )
        join2NljOp = Join(op2, op1, expr=joinExpr, method="nested-loops" )

        if selectExpr == "True":
          full1BnljOp = join1BnljOp
          full2BnljOp = join2BnljOp
          
          full1NljOp = join1NljOp
          full2NljOp = join2NljOp

        else:
          full1BnljOp = Select(join1BnljOp, selectExpr)
          full2BnljOp = Select(join2BnljOp, selectExpr)
          
          full1NljOp = Select(join1NljOp, selectExpr)
          full2NljOp = Select(join2NljOp, selectExpr)
        

        joinList = [full1BnljOp, full2BnljOp, full1NljOp, full2NljOp]

        for j in joinList:
          joinplan = Plan(root=j)
          joinplan.prepare(self.db)
          joinplan.sample(100)

          if bestJoin == None or joinplan.cost(True) < bestJoin.cost(True):
            bestJoin = joinplan
            sourcePair = pair

        self.reportPlanCount += 4
        self.clearSampleFiles()



      worklist.remove(sourcePair[0])
      worklist.remove(sourcePair[1])
      worklist.append(bestJoin)

    # after System R algorithm
    newPlan = worklist[0]

    if isGroupBy:
      newGroupBy = GroupBy(newPlan.root, groupSchema=plan.root.groupSchema, \
        aggSchema=plan.root.aggSchema, groupExpr=plan.root.groupExpr, \
        aggExprs=plan.root.aggExprs, \
        groupHashFn=plan.root.groupHashFn)
      newGroupBy.prepare(self.db)
      newPlan = Plan(root=newGroupBy)

    if set(outputSchema.schema()) != set(newPlan.schema().schema()):
      projectDict = {}

      for f, t in outputSchema.schema():
        projectDict[f] = (f, t) 
      
      currRoot = newPlan.root
      project = Project(currRoot, projectDict)
      project.prepare(self.db)
      newPlan = Plan(root=project)
  
    return newPlan
コード例 #2
0
    def pickJoinOrder(self, plan):
        relations = plan.relations()
        fieldDict = self.obtainFieldDict(plan)
        (joinTablesDict, selectTablesDict) = self.getExprDicts(plan, fieldDict)
        # makes dicts that maps a list of relations to exprs involving that list
        # then in system R we will build opt(A,B) Join C using join exprs involving A,C and B,C
        # and on top of it the select exprs that involve 2 tables A,C or B,C

        isGroupBy = True if plan.root.operatorType() == "GroupBy" else False
        outputSchema = plan.schema()
        self.reportPlanCount = 0

        worklist = []
        for r in relations:
            table = TableScan(r, self.db.relationSchema(r))
            table.prepare(self.db)
            if (r, ) in selectTablesDict:
                selectExprs = selectTablesDict[(r, )]
                selectString = self.combineSelects(selectExprs)
                select = Select(table, selectString)
                select.prepare(self.db)
                worklist.append(Plan(root=select))
            else:
                worklist.append(Plan(root=table))

        while (len(worklist) > 1):
            combos = itertools.combinations(worklist, 2)
            bestJoin = None
            sourcePair = None

            for pair in combos:
                op1 = pair[0].root
                op2 = pair[1].root

                selectExpr = self.createExpression(pair[0].relations(),
                                                   pair[1].relations(),
                                                   selectTablesDict)
                joinExpr = self.createExpression(pair[0].relations(),
                                                 pair[1].relations(),
                                                 joinTablesDict)

                join1BnljOp = Join(op1,
                                   op2,
                                   expr=joinExpr,
                                   method="block-nested-loops")
                join2BnljOp = Join(op2,
                                   op1,
                                   expr=joinExpr,
                                   method="block-nested-loops")

                join1NljOp = Join(op1,
                                  op2,
                                  expr=joinExpr,
                                  method="nested-loops")
                join2NljOp = Join(op2,
                                  op1,
                                  expr=joinExpr,
                                  method="nested-loops")

                if selectExpr == "True":
                    full1BnljOp = join1BnljOp
                    full2BnljOp = join2BnljOp

                    full1NljOp = join1NljOp
                    full2NljOp = join2NljOp

                else:
                    full1BnljOp = Select(join1BnljOp, selectExpr)
                    full2BnljOp = Select(join2BnljOp, selectExpr)

                    full1NljOp = Select(join1NljOp, selectExpr)
                    full2NljOp = Select(join2NljOp, selectExpr)

                joinList = [full1BnljOp, full2BnljOp, full1NljOp, full2NljOp]

                for j in joinList:
                    joinplan = Plan(root=j)
                    joinplan.prepare(self.db)
                    joinplan.sample(100)

                    if bestJoin == None or joinplan.cost(True) < bestJoin.cost(
                            True):
                        bestJoin = joinplan
                        sourcePair = pair

                self.reportPlanCount += 4
                self.clearSampleFiles()

            worklist.remove(sourcePair[0])
            worklist.remove(sourcePair[1])
            worklist.append(bestJoin)

        # after System R algorithm
        newPlan = worklist[0]

        if isGroupBy:
            newGroupBy = GroupBy(newPlan.root, groupSchema=plan.root.groupSchema, \
              aggSchema=plan.root.aggSchema, groupExpr=plan.root.groupExpr, \
              aggExprs=plan.root.aggExprs, \
              groupHashFn=plan.root.groupHashFn)
            newGroupBy.prepare(self.db)
            newPlan = Plan(root=newGroupBy)

        if set(outputSchema.schema()) != set(newPlan.schema().schema()):
            projectDict = {}

            for f, t in outputSchema.schema():
                projectDict[f] = (f, t)

            currRoot = newPlan.root
            project = Project(currRoot, projectDict)
            project.prepare(self.db)
            newPlan = Plan(root=project)

        return newPlan
コード例 #3
0
ファイル: Optimizer.py プロジェクト: elanas/DB_HW3
  def pickJoinOrder(self, plan):
    
    relations = plan.relations()
    fieldDict = self.obtainFieldDict(plan)
    

    (joinTablesDict, selectTablesDict) = self.getExprDicts(plan, fieldDict)
    # makes dicts that maps a list of relations to exprs involving that list
    # then in system R we will build opt(A,B) Join C using join exprs involving A,C and B,C
    # and on top of it the select exprs that involve 2 tables A,C or B,C

    isGroupBy = True if plan.root.operatorType() == "GroupBy" else False
    outputSchema = plan.schema() 
    optDict = {}
    self.reportPlanCount = 0

    for npass in range(1, len(relations) + 1):
      if npass == 1:
        for r in relations:
          table = TableScan(r,self.db.relationSchema(r))
          if (r,) in selectTablesDict: 
            selectExprs = selectTablesDict[(r,)]
            selectString = self.combineSelects(selectExprs)
            select = Select(table,selectString)
            optDict[(r,)] = Plan(root=select)
          else:
            optDict[(r,)] = Plan(root=table)
          self.reportPlanCount += 1
      else:
        combinations = itertools.combinations(relations,npass)
        for c in combinations:
          fullList = sorted(c)
          clist = self.getCombos(fullList)
          bestJoin = None
          for subcombo in clist:
            complement = self.getComplement(fullList, subcombo)
            
            leftOps = optDict[tuple(complement)].root
            rightOps = optDict[tuple(subcombo)].root

            selectExpr = self.createExpression(complement, subcombo, selectTablesDict)
            joinExpr = self.createExpression(complement, subcombo, joinTablesDict)
            
            joinBnljOp = Join(leftOps, rightOps, expr=joinExpr, method="block-nested-loops" )
            fullBnljOp = Select(joinBnljOp, selectExpr)

            if selectExpr == "True":
              joinBnlj = Plan(root=joinBnljOp)
            else:
              joinBnlj = Plan(root=fullBnljOp)
            
            joinBnlj.prepare(self.db)
            joinBnlj.sample(100)
            
            joinNljOp = Join(leftOps, rightOps, expr=joinExpr, method="nested-loops" )
            fullNljOp = Select(joinNljOp, selectExpr)

            if selectExpr == "True":
              joinNlj = Plan(root=joinNljOp)
            else:
              joinNlj = Plan(root=fullNljOp)
            
            joinNlj.prepare(self.db)
            joinNlj.sample(100)

            if joinBnlj.cost(True) < joinNlj.cost(True):
              if bestJoin == None or joinBnlj.cost(True) < bestJoin.cost(True):
                bestJoin = joinBnlj
            else:
              if bestJoin == None or joinNlj.cost(True) < bestJoin.cost(True):
                bestJoin = joinNlj

            self.reportPlanCount += 2
            self.clearSampleFiles()

          optDict[tuple(fullList)] = bestJoin
          
    # after System R algorithm
    newPlan = optDict[tuple(sorted(relations))]

    if isGroupBy:
      newGroupBy = GroupBy(newPlan.root, groupSchema=plan.root.groupSchema, \
        aggSchema=plan.root.aggSchema, groupExpr=plan.root.groupExpr, \
        aggExprs=plan.root.aggExprs, \
        groupHashFn=plan.root.groupHashFn)
      newGroupBy.prepare(self.db)
      newPlan = Plan(root=newGroupBy)

    if set(outputSchema.schema()) != set(newPlan.schema().schema()):
      projectDict = {}

      for f, t in outputSchema.schema():
        projectDict[f] = (f, t) 
      
      currRoot = newPlan.root
      project = Project(currRoot, projectDict)
      project.prepare(self.db)
      newPlan = Plan(root=project)
  
    return newPlan
コード例 #4
0
    def pickJoinOrder(self, plan):

        relations = plan.relations()
        fieldDict = self.obtainFieldDict(plan)

        (joinTablesDict, selectTablesDict) = self.getExprDicts(plan, fieldDict)
        # makes dicts that maps a list of relations to exprs involving that list
        # then in system R we will build opt(A,B) Join C using join exprs involving A,C and B,C
        # and on top of it the select exprs that involve 2 tables A,C or B,C

        isGroupBy = True if plan.root.operatorType() == "GroupBy" else False
        outputSchema = plan.schema()
        optDict = {}
        self.reportPlanCount = 0

        for npass in range(1, len(relations) + 1):
            if npass == 1:
                for r in relations:
                    table = TableScan(r, self.db.relationSchema(r))
                    if (r, ) in selectTablesDict:
                        selectExprs = selectTablesDict[(r, )]
                        selectString = self.combineSelects(selectExprs)
                        select = Select(table, selectString)
                        optDict[(r, )] = Plan(root=select)
                    else:
                        optDict[(r, )] = Plan(root=table)
                    self.reportPlanCount += 1
            else:
                combinations = itertools.combinations(relations, npass)
                for c in combinations:
                    fullList = sorted(c)
                    clist = self.getCombos(fullList)
                    bestJoin = None
                    for subcombo in clist:
                        complement = self.getComplement(fullList, subcombo)

                        leftOps = optDict[tuple(complement)].root
                        rightOps = optDict[tuple(subcombo)].root

                        selectExpr = self.createExpression(
                            complement, subcombo, selectTablesDict)
                        joinExpr = self.createExpression(
                            complement, subcombo, joinTablesDict)

                        joinBnljOp = Join(leftOps,
                                          rightOps,
                                          expr=joinExpr,
                                          method="block-nested-loops")
                        fullBnljOp = Select(joinBnljOp, selectExpr)

                        if selectExpr == "True":
                            joinBnlj = Plan(root=joinBnljOp)
                        else:
                            joinBnlj = Plan(root=fullBnljOp)

                        joinBnlj.prepare(self.db)
                        joinBnlj.sample(100)

                        joinNljOp = Join(leftOps,
                                         rightOps,
                                         expr=joinExpr,
                                         method="nested-loops")
                        fullNljOp = Select(joinNljOp, selectExpr)

                        if selectExpr == "True":
                            joinNlj = Plan(root=joinNljOp)
                        else:
                            joinNlj = Plan(root=fullNljOp)

                        joinNlj.prepare(self.db)
                        joinNlj.sample(100)

                        if joinBnlj.cost(True) < joinNlj.cost(True):
                            if bestJoin == None or joinBnlj.cost(
                                    True) < bestJoin.cost(True):
                                bestJoin = joinBnlj
                        else:
                            if bestJoin == None or joinNlj.cost(
                                    True) < bestJoin.cost(True):
                                bestJoin = joinNlj

                        self.reportPlanCount += 2
                        self.clearSampleFiles()

                    optDict[tuple(fullList)] = bestJoin

        # after System R algorithm
        newPlan = optDict[tuple(sorted(relations))]

        if isGroupBy:
            newGroupBy = GroupBy(newPlan.root, groupSchema=plan.root.groupSchema, \
              aggSchema=plan.root.aggSchema, groupExpr=plan.root.groupExpr, \
              aggExprs=plan.root.aggExprs, \
              groupHashFn=plan.root.groupHashFn)
            newGroupBy.prepare(self.db)
            newPlan = Plan(root=newGroupBy)

        if set(outputSchema.schema()) != set(newPlan.schema().schema()):
            projectDict = {}

            for f, t in outputSchema.schema():
                projectDict[f] = (f, t)

            currRoot = newPlan.root
            project = Project(currRoot, projectDict)
            project.prepare(self.db)
            newPlan = Plan(root=project)

        return newPlan