Example #1
0
    def getExprDicts(self, plan, fieldDict):
        q = []
        q.append(plan.root)
        selectTablesDict = {
        }  # mapping of relation list to list of exprs using them: [A,B] -> [a < b, etc]
        joinTablesDict = {}  # same thing but for joins, not selects

        while len(q) > 0:
            currNode = q.pop()

            if (currNode.operatorType() == "Select"):
                selectExprList = ExpressionInfo(
                    currNode.selectExpr).decomposeCNF()
                for selectExpr in selectExprList:
                    attrList = ExpressionInfo(selectExpr).getAttributes()
                    sourceList = []
                    for attr in attrList:
                        source = fieldDict[attr]
                        if source not in sourceList:
                            sourceList.append(source)

                    sourceTuple = tuple(sorted(sourceList))
                    if sourceTuple not in selectTablesDict:
                        selectTablesDict[sourceTuple] = []
                    selectTablesDict[sourceTuple].append(selectExpr)

            elif "Join" in currNode.operatorType():
                joinExprList = ExpressionInfo(currNode.joinExpr).decomposeCNF()
                for joinExpr in joinExprList:
                    attrList = ExpressionInfo(joinExpr).getAttributes()
                    sourceList = []
                    for attr in attrList:
                        source = fieldDict[attr]
                        if source not in sourceList:
                            sourceList.append(source)

                    sourceTuple = tuple(sorted(sourceList))
                    if sourceTuple not in joinTablesDict:
                        joinTablesDict[sourceTuple] = []
                    joinTablesDict[sourceTuple].append(joinExpr)

            if len(currNode.inputs()) > 1:
                q.append(currNode.lhsPlan)
                q.append(currNode.rhsPlan)
            elif len(currNode.inputs()) == 1:
                q.append(currNode.subPlan)

        return (joinTablesDict, selectTablesDict)
Example #2
0
    def reorderSelProj(self, operator):
        if operator.operatorType() == "TableScan":
            return operator
        elif (operator.operatorType() == "Project"
              or operator.operatorType() == "GroupBy"):
            operator.subPlan = self.reorderSelProj(operator.subPlan)
            return operator
        elif (operator.operatorType() == "UnionAll"
              or operator.operatorType()[-4:] == "Join"):
            operator.lhsPlan = self.reorderSelProj(operator.lhsPlan)
            operator.rhsPlan = self.reorderSelProj(operator.rhsPlan)
            return operator
        else:
            subPlan = operator.subPlan
            if subPlan.operatorType() == "Project":
                subSubPlan = subPlan.subPlan
                subSubOutput = [k for (k, v) in subSubPlan.schema().schema()]

                selectFields = [
                    v for v in ExpressionInfo(
                        operator.selectExpr).getAttributes()
                ]
                # we can't filter selectFields because of the getAttributes weakness
                # We assume that we can prohibit Math.sqrt and etc here.
                if self.isSubList(selectFields, subSubOutput):
                    operator.subPlan = subSubPlan
                    subPlan.subPlan = operator
                    return self.reorderSelProj(subPlan)
                else:
                    operator.subPlan = self.reorderSelProj(operator.subPlan)
                    return operator
            else:
                operator.subPlan = self.reorderSelProj(operator.subPlan)
                return operator
Example #3
0
    def decompSelects(self, selectList):
        decompList = []

        for s in selectList:
            exprList = ExpressionInfo(s.selectExpr).decomposeCNF()
            for e in exprList:
                select = Select(None, e)
                decompList.append(select)
            return decompList
Example #4
0
 def relativeRelations(self, relIds, op):
   fields = ExpressionInfo(op.joinExpr).getAttributes()
   relativeR = set()
   for f in fields:
     for r in relIds:
       if f in self.db.relationSchema(r).fields:
         relativeR.add(r)
         break
   return frozenset(relativeR)
Example #5
0
  def get_best_join(self, candidates, required_joins):
    best_plan_cost = None
    best_plan = None
    for left, right in candidates:

      relevant_expr = None

      # Find the joinExpr corresponding to the current join candidate. If there is none, it's a
      # cartesian product.
      for join in required_joins:
        names = ExpressionInfo(join.joinExpr).getAttributes()
        if set(join.rhsSchema.fields).intersection(names) and set(join.lhsSchema.fields).intersection(names):
          relevant_expr = join.joinExpr
          break
      else:
        relevant_expr = 'True'

      # Construct a join plan for the current candidate, for each possible join algorithm.
      # TODO: Evaluate more than just nested loop joins, and determine feasibility of those methods.
      for algorithm in ["nested-loops", "block-nested-loops"]:
        test_plan = Plan(root = Join(
          lhsPlan = left,
          rhsPlan = right,
          method = algorithm,
          expr = relevant_expr
        ))

        # Prepare and run the plan in sampling mode, and get the estimated cost.
        test_plan.prepare(self.db)
        test_plan.sample(1.0)
        cost = test_plan.cost(estimated = True)

        # Update running best.
        if best_plan_cost is None or cost < best_plan_cost:
          best_plan_cost = cost
          best_plan = test_plan

    # Need to return the root operator rather than the plan itself, since it's going back into the
    # table.
    return best_plan.root
Example #6
0
    def decodeJoinExprs(self, operator):
        lst = []
        if self.isUnaryPath(operator):
            return lst
        else:
            if operator.operatorType()[-4:] == "Join":
                # The Join type we support:
                # "nested-loops", "block-nested-loops", "hash"
                # indexed join cannot work.
                if operator.joinMethod == "nested-loops" or operator.joinMethod == "block-nested-loops":
                    lst.append(
                        tuple(
                            ExpressionInfo(operator.joinExpr).getAttributes()))
                elif operator.joinMethod == "hash":
                    lst.append((operator.lhsKeySchema.fields[0],
                                operator.rhsKeySchema.fields[0]))
                else:
                    raise ValueError(
                        "Join method not supported by the optimizer")
                lst += self.decodeJoinExprs(operator.lhsPlan)
                lst += self.decodeJoinExprs(operator.rhsPlan)

            return lst
Example #7
0
    def singlePushDown(self, operator):

        if operator.operatorType() == 'Select':
            selectOperator = operator
            selectOperator.subplan = self.singlePushDown(
                selectOperator.subPlan)

            subPlan = selectOperator.subPlan
            subplanType = subPlan.operatorType()

            if subplanType.endswith('Join'):
                lhsPlan = subPlan.lhsPlan
                rhsPlan = subPlan.rhsPlan

                lhsFields = lhsPlan.schema().fields
                rhsFields = rhsPlan.schema().fields

                sendToLeft = ''
                sendToRight = ''
                kept = ''

                selectExprs = ExpressionInfo(
                    selectOperator.selectExpr).decomposeCNF()

                for selectExpr in selectExprs:
                    attributes = ExpressionInfo(selectExpr).getAttributes()
                    for attr in attributes:
                        if attr in lhsFields:
                            sendToLeft += selectExpr
                            sendToLeft += ' and '
                        elif attr in rhsFields:
                            sendToRight += selectExpr
                            sendToRight += ' and '
                        else:
                            kept += selectExpr
                            kept += ' and '

                if len(sendToLeft) > 0:
                    sendToLeft = sendToLeft[:-5]
                    selectOperator.subPlan.lhsPlan = self.singlePushDown(
                        Select(selectOperator.subPlan.lhsPlan, sendToLeft))
                if len(sendToRight) > 0:
                    sendToRight = sendToRight[:-5]
                    selectOperator.subPlan.rhsPlan = self.singlePushDown(
                        Select(selectOperator.subPlan.rhsPlan, sendToRight))
                if len(kept) > 0:
                    kept = kept[:-5]
                    return Select(selectOperator.subplan, kept)

            elif subplanType == 'UnionAll':
                subPlan.lhsPlan = self.singlePushDown(
                    Select(subPlan.lhsPlan, selectOperator.selectExpr))
                subPlan.rhsPlan = self.singlePushDown(
                    Select(subPlan.rhsPlan, selectOperator.selectExpr))

            else:  # We only push down selects through joins and unions
                return selectOperator

            return selectOperator.subPlan  # This is the very last return statement

        elif operator.operatorType() == 'Project':
            projectOperator = operator
            projectOperator.subPlan = self.singlePushDown(
                projectOperator.subPlan)

            subPlan = projectOperator.subPlan
            subplanType = subPlan.operatorType()

            if subplanType == 'Select':
                selectCriteria = ExpressionInfo(
                    subPlan.selectExpr).getAttributes()

                for selection in selectCriteria:
                    if selection not in operator.projectExprs:
                        return operator

                operator.subPlan = operator.subPlan.subPlan
                operator.subPlan.subPlan = self.singlePushDown(operator)

            elif subplanType.endswith('Join'):
                lhsPlan = subPlan.lhsPlan
                rhsPlan = subPlan.rhsPlan

                lhsFields = lhsPlan.schema().fields
                rhsFields = rhsPlan.schema().fields

                sendToLeft = {}
                sendToRight = {}
                kept = {}

                projectExprs = projectOperator.projectExprs

                for key in projectExprs:
                    if key in lhsFields:
                        sendToLeft[key] = projectExprs[key]
                    elif key in rhsFields:
                        sendToRight[key] = projectExprs[key]
                    else:
                        kept[key] = projectExprs[key]

                if sendToLeft:
                    projectOperator.subPlan.lhsPlan = self.singlePushDown(
                        Project(projectOperator.subPlan.lhsPlan, sendToLeft))
                if sendToRight:
                    projectOperator.subPlan.rhsPlan = self.singlePushDown(
                        Project(projectOperator.subPlan.rhsPlan, sendToRight))
                if kept:
                    return projectOperator  # There are project Exprs that are not join predicates

            elif subplanType == 'UnionAll':
                subPlan.lhsPlan = self.singlePushDown(
                    Project(subPlan.lhsPlan, projectOperator.projectExprs))
                subPlan.rhsPlan = self.singlePushDown(
                    Project(subPlan.rhsPlan, projectOperator.projectExprs))

            else:
                return operator

            return projectOperator.subPlan

        elif operator.operatorType() == 'UnionAll' or operator.operatorType(
        ).endswith('Join'):
            operator.lhsPlan = self.singlePushDown(operator.lhsPlan)
            operator.rhsPlan = self.singlePushDown(operator.rhsPlan)
            return operator

        elif operator.operatorType() == 'GroupBy':
            operator.subPlan = self.singlePushDown(operator.subPlan)
            return operator

        else:
            return operator
Example #8
0
    def checkJoins(self, joins, allAttributes, cachedLHS, cachedRHS):
        for join in joins:
            if join.joinMethod == 'hash':
                lhsCheck = join.lhsKeySchema.fields  # This is a list
                rhsCheck = join.rhsKeySchema.fields  # This is a list
                joinAttr = lhsCheck + rhsCheck

                contains = all(x in allAttributes for x in joinAttr) and any(
                    y in cachedRHS.schema().fields for y in joinAttr)

                # print(lhsCheck, rhsCheck, contains)
                if contains:
                    keySchema1 = DBSchema('keyschema1', [(lhsCheck[0], 'int')])
                    keySchema2 = DBSchema('keyschema2', [(rhsCheck[0], 'int')])
                    hashFn1 = 'hash(' + lhsCheck[0] + ') % 7'
                    hashFn2 = 'hash(' + rhsCheck[0] + ') % 7'
                    joinExpr = lhsCheck[0] + ' == ' + rhsCheck[0]

                    if lhsCheck[0] in cachedLHS.schema().fields:
                        return True, {
                            'joinMethod': 'hash',
                            'lhsHashFn': hashFn1,
                            'lhsKeySchema': keySchema1,
                            'rhsHashFn': hashFn2,
                            'rhsKeySchema': keySchema2,
                            'joinExpr': joinExpr
                        }
                    else:
                        return True, {
                            'joinMethod': 'hash',
                            'lhsHashFn': hashFn2,
                            'lhsKeySchema': keySchema2,
                            'rhsHashFn': hashFn1,
                            'rhsKeySchema': keySchema1,
                            'joinExpr': joinExpr
                        }

            elif join.joinExpr:
                joinAttr = ExpressionInfo(join.joinExpr).getAttributes()
                contains = all(x in allAttributes for x in joinAttr)

                if contains:
                    joinAttr1 = joinAttr.pop()
                    joinAttr2 = joinAttr.pop()

                    try:
                        joinAttr.pop()
                    except KeyError:
                        pass
                        # print('There are only two attributes in this, as expected')

                    keySchema1 = DBSchema('keySchema1', [([joinAttr1, 'int'])])
                    keySchema2 = DBSchema('keySchema1', [([joinAttr2, 'int'])])
                    hashFn1 = 'hash(' + joinAttr1 + ') % 7'
                    hashFn2 = 'hash(' + joinAttr2 + ') % 7'
                    joinExpr = join.joinExpr

                    if joinAttr1 in cachedLHS.schema().fields:
                        return True, {
                            'joinMethod': 'hash',
                            'lhsHashFn': hashFn1,
                            'lhsKeySchema': keySchema1,
                            'rhsHashFn': hashFn2,
                            'rhsKeySchema': keySchema2,
                            'joinExpr': joinExpr
                        }
                    else:
                        return True, {
                            'joinMethod': 'hash',
                            'lhsHashFn': hashFn2,
                            'lhsKeySchema': keySchema2,
                            'rhsHashFn': hashFn1,
                            'rhsKeySchema': keySchema1,
                            'joinExpr': joinExpr
                        }
        else:
            return False, None
Example #9
0
    def pushdownProject(self, op):
        # First pushdown operators below:
        op.subPlan = self.pushdownOperator(op.subPlan)

        if op.subPlan.operatorType() in ["GroupBy", "TableScan"]:
            return op

        elif op.subPlan.operatorType() == "Project":
            # Attempt to remove redundant projections:
            bools = [
                op.subPlan.projectExprs[key][0].isAttribute()
                for key in op.projectExprs
            ]
            if False not in bools:
                op.subPlan = op.subPlan.subPlan
            return self.pushdownOperator(op)

        elif op.subPlan.operatorType() == "Select":
            # Move op below its subplan if op provides all attributes needed for the selectExpr
            selectAttrs = ExpressionInfo(op.subPlan.selectExpr).getAttributes()
            outputAttrs = set(op.projectExprs.keys())
            result = op
            if selectAttrs.issubset(outputAttrs):
                result = op.subPlan
                op.subPlan = result.subPlan
                result.subPlan = self.pushdownOperator(op)
            return result

        elif op.subPlan.operatorType() == "Sort":
            # TODO
            return op

        elif op.subPlan.operatorType() == "UnionAll":
            # Place a copy of op on each side of the union
            result = op.subPlan
            result.lhsPlan = self.pushdownOperator(
                Project(result.lhsPlan, op.projectExprs))
            result.rhsPlan = self.pushdownOperator(
                Project(result.rhsPlan, op.projectExprs))
            return result

        elif "Join" in op.subPlan.operatorType():
            # Partition the projections among the input relations, as much as possible
            lhsAttrs = set(op.subPlan.lhsPlan.schema().fields)
            rhsAttrs = set(op.subPlan.rhsPlan.schema().fields)
            lhsProjectExprs = {}
            rhsProjectExprs = {}
            remainingProjectExprs = False

            for attr in op.projectExprs:
                requiredAttrs = ExpressionInfo(
                    op.projectExprs[attr][0]).getAttributes()
                if requiredAttrs.issubset(lhsAttrs):
                    lhsProjectExprs[attr] = op.projectExprs[attr]
                elif requiredAttrs.issubset(rhsAttrs):
                    rhsProjectExprs[attr] = op.projectExprs[attr]
                else:
                    remainingProjectExprs = True

            if lhsProjectExprs:
                op.subPlan.lhsPlan = self.pushdownOperator(
                    Project(op.subPlan.lhsPlan, lhsProjectExprs))
            if rhsProjectExprs:
                op.subPlan.rhsPlan = self.pushdownOperator(
                    Project(op.subPlan.rhsPlan, rhsProjectExprs))

            result = op
            # Remove op from the tree if there are no remaining project expressions, and each side of the join recieved a projection
            if not remainingProjectExprs and lhsProjectExprs and rhsProjectExprs:
                result = op.subPlan
            return result
        else:
            print("Unmatched operatorType in pushdownOperator(): " +
                  op.operatorType())
            raise NotImplementedError
Example #10
0
    def pushdownSelect(self, op):
        # First pushdown operators below:
        op.subPlan = self.pushdownOperator(op.subPlan)

        if op.subPlan.operatorType() in ["GroupBy", "TableScan", "Project"]:
            return op

        elif op.subPlan.operatorType() == "Select":
            # Reorder two selects based on 'score'
            #useEstimated = True
            #opScore = (1.0 - op.selectivity(useEstimated)) / op.tupleCost
            #childScore = (1.0 - op.subPlan.selectivity(useEstimated)) / op.tupleCost

            result = op
            #if childScore > myScore:
            result = op.subPlan
            op.subPlan = result.subPlan
            result.subPlan = self.pushdownOperator(op)
            return result

        elif op.subPlan.operatorType() == "Sort":
            # Always move a select below a sort
            result = op.subPlan
            op.subPlan = result.subPlan
            result.subPlan = self.pushdownOperator(op)
            return result

        elif op.subPlan.operatorType() == "UnionAll":
            # Place a copy of op on each side of the union
            result = op.subPlan
            result.lhsPlan = self.pushdownOperator(
                Select(result.lhsPlan, op.selectExpr))
            result.rhsPlan = self.pushdownOperator(
                Select(result.rhsPlan, op.selectExpr))
            return result

        elif "Join" in op.subPlan.operatorType():
            # Partition the select expr as much as possible
            exprs = ExpressionInfo(op.selectExpr).decomposeCNF()
            lhsExprs = []
            rhsExprs = []
            remainingExprs = []

            lhsAttrs = set(op.subPlan.lhsPlan.schema().fields)
            rhsAttrs = set(op.subPlan.rhsPlan.schema().fields)

            for e in exprs:
                attrs = ExpressionInfo(e).getAttributes()
                if attrs.issubset(lhsAttrs):
                    lhsExprs.append(e)
                elif attrs.issubset(rhsAttrs):
                    rhsExprs.append(e)
                else:
                    remainingExprs.append(e)

            if lhsExprs:
                newLhsExpr = ' and '.join(lhsExprs)
                lhsSelect = Select(op.subPlan.lhsPlan, newLhsExpr)
                op.subPlan.lhsPlan = self.pushdownOperator(lhsSelect)

            if rhsExprs:
                newRhsExpr = ' and '.join(rhsExprs)
                rhsSelect = Select(op.subPlan.rhsPlan, newRhsExpr)
                op.subPlan.rhsPlan = self.pushdownOperator(rhsSelect)

            result = None
            if remainingExprs:
                newExpr = ' and '.join(remainingExprs)
                result = Select(op.subPlan, newExpr)
            else:
                result = op.subPlan

            return result
        else:
            print("Unmatched operatorType in pushdownOperator(): " +
                  op.operatorType())
            raise NotImplementedError
Example #11
0
    def pushdownProjections(self, operator):

        if operator.operatorType() == "TableScan":
            return operator
        elif (operator.operatorType() == "Select"
              or operator.operatorType() == "GroupBy"):
            newSubPlan = self.pushdownProjections(operator.subPlan)
            operator.subPlan = newSubPlan
            return operator
        elif (operator.operatorType() == "UnionAll"
              or operator.operatorType()[-4:] == "Join"):
            newlPlan = self.pushdownProjections(operator.lhsPlan)
            newrPlan = self.pushdownProjections(operator.rhsPlan)
            operator.lhsPlan = newlPlan
            operator.rhsPlan = newrPlan
            return operator
        else:
            subPlan = operator.subPlan
            if subPlan.operatorType() == "TableScan":
                return operator
            elif subPlan.operatorType() == "Select":
                subSubPlan = subPlan.subPlan
                operator.subPlan = subSubPlan
                subPlan.subPlan = operator
                return self.pushdownProjections(subPlan)
            elif subPlan.operatorType() == "GroupBy":
                newSubSubPlan = self.pushdownProjections(subPlan.subPlan)
                subPlan.subPlan = newSubSubPlan
                return operator
            elif subPlan.operatorType() == "Project":
                # Note that here we need to combine two projections
                # We assume that the upper projection must be based on the outputschema
                # of the lower one;
                subRepExp = {
                    k: v1
                    for (k, (v1, _)) in subPlan.projectExprs.items()
                }
                newExpr = dict()

                # Combine projections
                # TODO: Here we don't guarantee 100% success of replacement
                for (k, (v1, v2)) in operator.projectExprs.items():
                    newV1 = v1
                    for (key, value) in subRepExp.items():
                        newV1 = newV1.replace(key, value)
                    newExpr[k] = (newV1, v2)

                # Reorder the projection operator
                operator.projectExprs = newExpr
                operator.outputSchema  = DBSchema(operator.relationId(), \
                                  [(k, v[1]) for (k,v) in operator.projectExprs.items()])
                operator.subPlan = subPlan.subPlan
                return self.pushdownProjections(operator)
            elif subPlan.operatorType() == "UnionAll":
                # For Union operator, the push down is very simple
                subPlan.lhsPlan = Project(subPlan.lhsPlan,
                                          operator.projectExprs)
                subPlan.rhsPlan = Project(subPlan.rhsPlan,
                                          operator.projectExprs)
                subPlan.validateSchema()
                del operator
                return self.pushdownProjections(subPlan)
            else:
                # Here we deal with the Join Case
                # This is a lot harder than other cases
                # The first step is to collect input fields needed directly.
                # We grab out the fields in the projectExprs first
                # and then filter them with the project inputSchema
                fields = set()
                outputNames = [
                    k for (k, (v1, _)) in operator.projectExprs.items()
                ]
                inputNames = operator.inputSchemas()[0].fields
                lhsPlanNames = subPlan.lhsPlan.schema().fields
                rhsPlanNames = subPlan.rhsPlan.schema().fields

                for (k, (v1, _)) in operator.projectExprs.items():
                    attributes = ExpressionInfo(v1).getAttributes()
                    # filter attributes
                    for name in attributes:
                        if name not in inputNames:
                            attributes.remove(name)
                    fields = fields.union(attributes)

                # collecting join condition fields;
                if subPlan.joinMethod == "nested-loops" or subPlan.joinMethod == "block-nested-loops":
                    fields = fields.union(
                        ExpressionInfo(subPlan.joinExpr).getAttributes())
                elif subPlan.joinMethod == "hash":
                    fields = fields.union(
                        set(subPlan.lhsKeySchema.fields +
                            subPlan.rhsKeySchema.fields))
                else:
                    # We don't support indexed
                    raise NotImplementedError

                # constructing virtual l and r projections
                lprojectExpr = dict()
                rprojectExpr = dict()
                for (f, v) in subPlan.lhsPlan.schema().schema():
                    if f in fields:
                        lprojectExpr[f] = (f, v)
                for (f, v) in subPlan.rhsPlan.schema().schema():
                    if f in fields:
                        rprojectExpr[f] = (f, v)

                if len(lprojectExpr) != len(lhsPlanNames):
                    subPlan.lhsPlan = Project(subPlan.lhsPlan, lprojectExpr)
                    subPlan.lhsPlan.outputSchema  = DBSchema(subPlan.lhsPlan.relationId(), \
                                    [(k, v[1]) for (k,v) in subPlan.lhsPlan.projectExprs.items()])

                if len(rprojectExpr) != len(rhsPlanNames):
                    subPlan.rhsPlan = Project(subPlan.rhsPlan, rprojectExpr)
                    subPlan.rhsPlan.outputSchema  = DBSchema(subPlan.rhsPlan.relationId(), \
                                    [(k, v[1]) for (k,v) in subPlan.rhsPlan.projectExprs.items()])

                if subPlan.validateJoin():
                    subPlan.initializeSchema()
                # push down project through join
                operator.subPlan = self.pushdownProjections(subPlan)
                return operator
Example #12
0
  def selectPushDown(self, plan):
    root = plan.root
    selectResult = []

    #New a stack and put info about op into it in the form of
    # (current op, parent op, accumulateSelect)
    queue = deque([(root, None, None)])

    while queue:
      (curr, parent, accuSelect) = queue.popleft()
      children = curr.inputs()

      if children:
        #When dealing with Select, collect select expressions into accumulate select
        if isinstance(curr, Select):
          if not accuSelect:
            accuSelect = []
          for decomp in ExpressionInfo(curr.selectExpr).decomposeCNF():
            accuSelect.append(decomp)

          queue.extendleft([(children[0], curr, accuSelect)])

        #Do not pushdown project at this point, so put it into result.
        #Accumulate select can always pass project
        elif isinstance(curr, Project):
          selectResult.append((curr, parent))
          queue.extendleft([(children[0], curr, accuSelect)])

        #When encountering a join, seperate the accumulate select expressions into three parts,
        #one part goes to left, one goes to right, and the remaining place above the join operator
        elif isinstance(curr, Join):
          leftSelect = []
          rightSelect = []
          newSelect = None
          leftFields = curr.lhsSchema.fields
          rightFields = curr.rhsSchema.fields
          put = []
          if accuSelect:
            for a in accuSelect:
              f = ExpressionInfo(a).getAttributes()
              flag = False
              if set(f).issubset(set(leftFields)):
                leftSelect.append(a)
                flag = True
              if set(f).issubset(set(rightFields)):
                rightSelect.append(a)
                flag = True
              if not flag:
                put.append(a)
            if put:
              newSelect = self.placeSelect(put, curr, parent, selectResult)

          if newSelect:
            selectResult.append((curr, newSelect))
          else:
            selectResult.append((curr, parent))

          queue.extendleft([(curr.lhsPlan, curr, leftSelect)])
          queue.extendleft([(curr.rhsPlan, curr, rightSelect)])

        #When encounter groupby, place all the accumulate select
        elif isinstance(curr, GroupBy):
          newSelect = self.placeSelect(accuSelect, curr, parent, selectResult)

          if newSelect:
            selectResult.append((curr, newSelect))
          else:
            selectResult.append((curr, parent))

          queue.extendleft([(children[0], curr, None)])

        #Deal with union similarly to join
        else:
          leftSelect = []
          rightSelect = []
          newSelect = None
          attrs = curr.unionSchema.fields
          put = []

          if accuSelect:
            for a in accuSelect:
              f = ExpressionInfo(a).getAttributes()
              if set(f).issubset(set(attrs)):
                leftSelect.append(a)
                rightSelect.append(a)
              else:
                put.append(a)

            newSelect = self.placeSelect(accuSelect, curr, parent, selectResult)

          if newSelect:
            selectResult.append((curr, newSelect))
          else:
            selectResult.append((curr, parent))

          queue.extendleft([(curr.lhsPlan, curr, leftSelect)])
          queue.extendleft([(curr.rhsPlan, curr, rightSelect)])

      #Deal with tablescan, place all the accumulate select
      else:
        newSelect = self.placeSelect(accuSelect, curr, parent, selectResult)

        if newSelect:
          selectResult.append((curr, newSelect))
        else:
          selectResult.append((curr, parent))

    newRoot = selectResult[0][0]
    return Plan(root=newRoot)
Example #13
0
  def projectPushDown(self, plan):
    root = plan.root
    result = []

    #Keep info in the form (current op, parent, accumulate Porject)
    queue = deque([(root, None, None)])

    while queue:
      (curr, parent, accuProject) = queue.popleft()
      children = curr.inputs()

      if children:
        #Add current project into accumulate project
        if isinstance(curr, Project):
          if not accuProject:
            accuProject = curr.projectExprs
          else:
            accuProject.update({curr.projectExprs})

          queue.extendleft([(children[0], curr, accuProject)])

        elif isinstance(curr, Select):
          newProject = None
          if accuProject:
            selectAttrs = ExpressionInfo(curr.selectExpr).getAttributes()
            projectAttrs = self.getProjectAttrs(accuProject)
            newProject = Project(curr, accuProject)
            if set(selectAttrs).issubset(set(projectAttrs)):
              result.append((curr, parent))
              queue.extendleft([(children[0], curr, accuProject)])
              '''
              #If considering the order of select and project:
              #Project can go through select
              #but if the selectivity of select is smaller, we do not let project pass
              curr.useSampling(sampled=True, sampleFactor=10.0)
              newProject.useSampling(sampled=True, sampleFactor=10.0)
              if curr.selectivity(estimated=True) < newProject.selectivity(estimated=True):
                result.append((newProject, parent))
                result.append((curr, newProject))
                queue.extendleft([(children[0], curr, None)])
              else:
                result.append((curr, parent))
                queue.extendleft([(children[0], curr, accuProject)])
              '''
            #If select operation has attributes that don't belongs to project
            #project has to stop here
            else:
              result.append((newProject, parent))
              result.append((curr, newProject))
              queue.extendleft([(children[0], curr, None)])

          else:
            result.append((curr, parent))
            queue.extendleft([(children[0], curr, accuProject)])

        elif isinstance(curr, Join):
          #If we don't decompose project
          if accuProject:
            newProject = Project(curr, accuProject)
            result.append((newProject, parent))
            result.append((curr, newProject))
          else:
            result.append((curr, parent))
          queue.extendleft([(curr.lhsPlan, curr, None)])
          queue.extendleft([(curr.rhsPlan, curr, None)])
          '''
          #This part can be used to decompose project operation
          leftProject = {}
          rightProject = {}
          newProject = None
          leftFields = curr.lhsSchema.fields
          rightFields = curr.rhsSchema.fields
          put = {}

          if accuProject:
            projectAttrs = self.getProjectAttrs(accuProject)
            joinAttrs = ExpressionInfo(curr.joinExpr).getAttributes()
            if set(joinAttrs).issubset(set(projectAttrs)):
              for (k,v) in accuProject.items():
                flag = False
                f = ExpressionInfo(k).getAttributes()
                if set(f).issubset(set(leftFields)):
                  leftProject.update({k: v})
                  flag = True
                if set(f).issubset(set(rightFields)):
                  rightProject.update({k: v})
                  flag = True
                if not flag:
                  put.update({k: v})

              if put:
                newProject = Project(curr, put)
                result.append((newProject, parent))

            else:
              newProject = Project(curr, accuProject)
              result.append((newProject, parent))

          if newProject:
            result.append((curr, newProject))
          else:
            result.append((curr, parent))

          queue.extendleft([(curr.lhsPlan, curr, leftProject)])
          queue.extendleft([(curr.rhsPlan, curr, rightProject)])
          '''

        elif isinstance(curr, GroupBy):
          newProject = None

          if accuProject:
            newProject = Project(curr, accuProject)
            result.append((newProject, parent))


          if newProject:
            result.append((curr, newProject))
          else:
            result.append((curr, parent))

          queue.extendleft([(children[0], curr, None)])

        else:
          #If we don't decompose project
          if accuProject:
            newProject = Project(curr, accuProject)
            result.append((newProject, parent))
            result.append((curr, newProject))
          else:
            result.append((curr, parent))
          queue.extendleft([(curr.lhsPlan, curr, None)])
          queue.extendleft([(curr.rhsPlan, curr, None)])
          '''
          #This part can be used to decompose project
          leftProject = {}
          rightProject = {}
          newProject = None
          attrs = curr.unionSchema.fields
          put = {}

          if accuProject:
            projectAttrs = self.getProjectAttrs(accuProject)
            if set(attrs).issubset(set(projectAttrs)):
              leftProject = accuProject
              rightProject = accuProject
            else:
              newProject = Project(curr, accuProject)
              result.append((newProject, parent))

          if newProject:
            result.append((curr, newProject))
          else:
            result.append((curr, parent))

          queue.extendleft([(curr.lhsPlan, curr, leftProject)])
          queue.extendleft([(curr.rhsPlan, curr, rightProject)])
          '''

      else:
        newProject = None
        if accuProject:
          newProject = Project(curr, accuProject)
        if newProject:
          result.append((newProject, parent))
          result.append((curr, newProject))
        else:
          result.append((curr, parent))

    newRoot = result[0][0]
    return Plan(root=newRoot)
Example #14
0
	def pushdownSelect(self, op):
		# First pushdown operators below:
		op.subPlan = self.pushdownOperator(op.subPlan)

		if op.subPlan.operatorType() in ["GroupBy", "TableScan", "Project"]:
			return op

		elif op.subPlan.operatorType() == "Select":
			# Reorder two selects based on 'score'
			useEstimated = True
			opScore = (1 - op.selectivity(useEstimated)) / op.tupleCost
			childScore = (1 - op.subPlan.selectivity(useEstimated)) / op.tupleCost

			result = op
			if childScore > opScore:
				result = op.subPlan
				op.subPlan = result.subPlan
				result.subPlan = self.pushdownOperator(op)
			return result

		elif op.subPlan.operatorType() == "Sort":
			# Always move a select below a sort
			result = op.subPlan
			op.subPlan = result.subPlan
			result.subPlan = self.pushdownOperator(op)
			return result

		elif op.subPlan.operatorType() == "UnionAll":
			# Place a copy of op on each side of the union
			result = op.subPlan
			result.lhsPlan = self.pushdownOperator(Select(result.lhsPlan, op.selectExpr))
			result.rhsPlan = self.pushdownOperator(Select(result.rhsPlan, op.selectExpr))
			return result

		elif "Join" in op.subPlan.operatorType():
			# Partition the select expr as much as possible
			exprs = ExpressionInfo(op.selectExpr).decomposeCNF()
			lhsExprs = []
			rhsExprs = []
			remainingExprs = []

			lhsAttrs = set(op.subPlan.lhsPlan.schema().fields)
			rhsAttrs = set(op.subPlan.rhsPlan.schema().fields)

			for e in exprs:
				attrs = ExpressionInfo(e).getAttributes()
				if attrs.issubset(lhsAttrs):
					lhsExprs.append(e)
				elif attrs.issubset(rhsAttrs):
					rhsExprs.append(e)
				else:
					remainingExprs.append(e)

			if lhsExprs:
				newLhsExpr = ' and '.join(lhsExprs)
				lhsSelect = Select(op.subPlan.lhsPlan, newLhsExpr)
				op.subPlan.lhsPlan = self.pushdownOperator(lhsSelect)

			if rhsExprs:
				newRhsExpr = ' and '.join(rhsExprs)
				rhsSelect = Select(op.subPlan.rhsPlan, newRhsExpr)
				op.subPlan.rhsPlan = self.pushdownOperator(rhsSelect)

			result = None
			if remainingExprs:
				newExpr = ' and '.join(remainingExprs)
				result = Select(op.subPlan, newExpr)
			else:
				result = op.subPlan

			return result
		else:
			print("Unmatched operatorType in pushdownOperator(): " + op.operatorType())
			raise NotImplementedError
Example #15
0
    def pushdownSelections(self, operator):
        if operator.operatorType() == "TableScan":
            return operator
        elif (operator.operatorType() == "Project"
              or operator.operatorType() == "GroupBy"):
            newSubPlan = self.pushdownSelections(operator.subPlan)
            operator.subPlan = newSubPlan
            return operator
        elif (operator.operatorType() == "UnionAll"
              or operator.operatorType()[-4:] == "Join"):
            newlPlan = self.pushdownSelections(operator.lhsPlan)
            newrPlan = self.pushdownSelections(operator.rhsPlan)
            operator.lhsPlan = newlPlan
            operator.rhsPlan = newrPlan
            return operator
        else:
            # Here we deal with the Select Case
            # This is a lot harder than other cases
            subPlan = operator.subPlan
            # trivial case
            if subPlan.operatorType() == "TableScan":
                return operator
            # In this case we need to combine two selections
            elif subPlan.operatorType() == "Select":
                operator.selectExpr = "(" + operator.selectExpr + ")" + " and " + "(" + subPlan.selectExpr + ")"
                operator.subPlan = subPlan.subPlan
                del subPlan
                return self.pushdownSelections(operator)
            # We don't have to move selections through groupby since
            # groupby may create new field names
            elif subPlan.operatorType() == "GroupBy":
                newSubSubPlan = self.pushdownSelections(subPlan.subPlan)
                subPlan.subPlan = newSubSubPlan
                return operator
            elif subPlan.operatorType() == "UnionAll":
                subPlan.lhsPlan = Select(subPlan.lhsPlan, operator.selectExpr)
                subPlan.rhsPlan = Select(subPlan.rhsPlan, operator.selectExpr)
                subPlan.validateSchema()
                del operator
                return self.pushdownSelections(subPlan)

            # Some tricky behavior here.
            # We substitute all some tokens in selectExpr by the projectExpr.
            # However, here we only support some easy computations. We cannot
            # exhaustively test all the cases (all the math exprs)
            elif subPlan.operatorType() == "Project":
                selectExpr = operator.selectExpr
                for (k, (v1, _)) in subPlan.projectExprs.items():
                    selectExpr = selectExpr.replace(k, "(" + v1 + ")")
                operator.subPlan = subPlan.subPlan
                subPlan.subPlan = operator
                return self.pushdownSelections(subPlan)
            else:
                # Here we move the selections down to the Join Operator
                lhsPlanNames = subPlan.lhsPlan.schema().fields
                rhsPlanNames = subPlan.rhsPlan.schema().fields
                cnfExprList = ExpressionInfo(
                    operator.selectExpr).decomposeCNF()

                lhsSelectExpr = ""
                rhsSelectExpr = ""
                remSelectExpr = ""

                for expr in cnfExprList:
                    attributes = []
                    # filter attributes
                    for var in ExpressionInfo(expr).getAttributes():
                        if (var in lhsPlanNames):
                            attributes.append(var)
                        if (var in rhsPlanNames):
                            attributes.append(var)

                    if self.isSubList(attributes, lhsPlanNames):
                        if lhsSelectExpr == "":
                            lhsSelectExpr += "(" + expr + ")"
                        else:
                            lhsSelectExpr += " and " + "(" + expr + ")"

                    elif self.isSubList(attributes, rhsPlanNames):
                        if rhsSelectExpr == "":
                            rhsSelectExpr += "(" + expr + ")"
                        else:
                            rhsSelectExpr += " and " + "(" + expr + ")"

                    else:
                        if remSelectExpr == "":
                            remSelectExpr += "(" + expr + ")"
                        else:
                            remSelectExpr += " and " + "(" + expr + ")"

                # push down selections
                if remSelectExpr == "":
                    # A case that the selection all comes from lhsPlan
                    if (lhsSelectExpr != "" and rhsSelectExpr == ""):
                        operator.subPlan = subPlan.lhsPlan
                        operator.selectExpr = lhsSelectExpr
                        subPlan.lhsPlan = operator
                    elif (rhsSelectExpr != "" and lhsSelectExpr == ""):
                        operator.subPlan = subPlan.rhsPlan
                        operator.selectExpr = rhsSelectExpr
                        subPlan.rhsPlan = operator
                    else:
                        subPlan.lhsPlan = Select(subPlan.lhsPlan,
                                                 lhsSelectExpr)
                        subPlan.rhsPlan = Select(subPlan.rhsPlan,
                                                 rhsSelectExpr)
                        del operator

                    return self.pushdownSelections(subPlan)
                else:
                    operator.selectExpr = remSelectExpr
                    if (lhsSelectExpr != "" and rhsSelectExpr == ""):
                        subPlan.lhsPlan = Select(subPlan.lhsPlan,
                                                 lhsSelectExpr)
                    elif (rhsSelectExpr != "" and lhsSelectExpr == ""):
                        subPlan.rhsPlan = Select(subPlan.rhsPlan,
                                                 rhsSelectExpr)
                    else:
                        subPlan.lhsPlan = Select(subPlan.lhsPlan,
                                                 lhsSelectExpr)
                        subPlan.rhsPlan = Select(subPlan.rhsPlan,
                                                 rhsSelectExpr)

                    if subPlan.validateJoin():
                        subPlan.initializeSchema()
                    operator.subPlan = self.pushdownSelections(subPlan)
                    return operator
Example #16
0
	def pushdownProject(self, op):
		# First pushdown operators below:
		op.subPlan = self.pushdownOperator(op.subPlan)

		if op.subPlan.operatorType() in ["GroupBy", "TableScan"]:
			return op

		elif op.subPlan.operatorType() == "Project":
			# Attempt to remove redundant projections:
			bools = [op.subPlan.projectExprs[key][0].isAttribute() for key in op.projectExprs]
			if False not in bools:
				op.subPlan = op.subPlan.subPlan
			return self.pushdownOperator(op)

		elif op.subPlan.operatorType() == "Select":
			# Move op below its subplan if op provides all attributes needed for the selectExpr
			selectAttrs = ExpressionInfo(op.subPlan.selectExpr).getAttributes()
			outputAttrs = set(op.projectExprs.keys())
			result = op
			if selectAttrs.issubset(outputAttrs):
				result = op.subPlan
				op.subPlan = result.subPlan
				result.subPlan = self.pushdownOperator(op)
			return result

		elif op.subPlan.operatorType() == "Sort":
			return op

		elif op.subPlan.operatorType() == "UnionAll":
			# Place a copy of op on each side of the union
			result = op.subPlan
			result.lhsPlan = self.pushdownOperator(Project(result.lhsPlan, op.projectExprs))
			result.rhsPlan = self.pushdownOperator(Project(result.rhsPlan, op.projectExprs))
			return result

		elif "Join" in op.subPlan.operatorType():
			# Partition the projections among the input relations, as much as possible
			lhsAttrs = set(op.subPlan.lhsPlan.schema().fields)
			rhsAttrs = set(op.subPlan.rhsPlan.schema().fields)
			lhsProjectExprs = {}
			rhsProjectExprs = {}
			remainingProjectExprs = False

			for attr in op.projectExprs:
				requiredAttrs = ExpressionInfo(op.projectExprs[attr][0]).getAttributes()
				if requiredAttrs.issubset(lhsAttrs):
					lhsProjectExprs[attr] = op.projectExprs[attr]
				elif requiredAttrs.issubset(rhsAttrs):
					rhsProjectExprs[attr] = op.projectExprs[attr]
				else:
					remainingProjectExprs = True

			if lhsProjectExprs:
				op.subPlan.lhsPlan = self.pushdownOperator(Project(op.subPlan.lhsPlan, lhsProjectExprs))
			if rhsProjectExprs:
				op.subPlan.rhsPlan = self.pushdownOperator(Project(op.subPlan.rhsPlan, rhsProjectExprs))

			result = op
			# Remove op from the tree if there are no remaining project expressions, and each side of the join recieved a projection
			if not remainingProjectExprs and lhsProjectExprs and rhsProjectExprs:
				result = op.subPlan
			return result
		else:
			print("Unmatched operatorType in pushdownOperator(): " + op.operatorType())
			raise NotImplementedError
Example #17
0
  def pushdownHelper(self, operator):
    #first determine operator type
    opertorType = operator.operatorType()

    #first check if valid operatorType
    if operatorType != "Project" and operatorType != "Select" and operatorType != "GroupBy" and operatorType != "Sort" and operatorType != "UnionAll" and operatorType[-4:] != "Join":
      return operator

    elif operatorType == "Project":
      operator.subPlan = self.pushdownHelper(operator.subPlan)
      subplanType = operator.subPlan.operatorType()

      #call second helper
      if subplanType == "Select":

        '''
        Check keys - if not in keys, cannot pushdown anymore
        '''
        for select in ExpressionInfo(operator.subPlan.selectExpr).getAttributes():
          keys = operator.projectExprs.keys()
          if select not in keys:
            return operator

        operator.subPlan = operator.subPlan.subPlan
        operator.subPlan.subPlan = self.pushdownHelper(operator)

      elif subplanType[-4:] == "Join":

        items = operator.projectExprs.items()

        right = operator.subPlan.rhsPlan.schema().fields
        rightProject = {}

        left = operator.subPlan.lhsPlan.schema().fields
        leftProject = {}

        for (attribute, (expr, rand)) in items:
          pros = ExpressionInfo(expr)getAttributes()

          result = True
          #left
          for e in pros:
            if e not in left:
              result = False

          # if True
          if result:
            leftProject[attribute] = operator.projectExprs[attribute]
            continue

          #repeat with right now
          result = True
          for e in pros:
            if e not in right:
              result = False

          if result:
            rightProject[attribute] = operator.projectExprs[attribute]

        #end for

        #if left dictionary not empty
        #remember empty dic evaluates to false
        if leftProject:
          lPlan = operator.subPlan.lhsPlan
          operator.subPlan.lhsPlan = self.pushdownHelper(Project(lPlan, leftProject))

        if rightProject:
          rPlan = operator.subPlan.rhsPlan
          operator.subPlan.rhsPlan = self.pushdownHelper(Project(rPlan, rightProject))


        #length check - must be same size iIOT pushdown
        fullSize = len(operator.projectExprs)
        rightSize = len(rightProject)
        leftSize = len(leftProject)

        if fullSize != (rightSize + leftSize):
          return operator

      #end subPlan "Join"

      elif subplanType == "UnionAll":
        tempLeft = Project(operator.subPlan.lhsPlan)
        tempRight = Project(operator.subPlan.rhsPlan)

        operator.subPlan.lhsPlan = self.pushdownHelper(tempLeft, operator.projectExprs)
        operator.subPlan.rhsPlan = self.pushdownHelper(tempRight, operator.projectExprs)

      #else not Join or Union
      else:
        return operator

      return operator.subPlan

    #end "Project"

    #safety check above, so operatorType must be "Select"
    elif operatorType == "Select":

      #first part same as with "Project": subPlan pushdown
      operator.subPlan = self.pushdownHelper(operator.subPlan)
      subplanType = operator.subPlan.operatorType()

      if subplanType == "Sort" or "sort":
        operator.subPlan = operator.subPlan.subPlan
        operator.subPlan.subPlan = self.pushdownHelper(operator)
      elif subplanType[-4:] == "Join":

        selectExpress = ExpressionInfo(operator.selectExpr).decomposeCNF()



        left = operator.subPlan.lhsPlan.schema().fields
        right = operator.subPlan.rhsPlan.schema().fields
        leftExpress = []
        leftAttributes = set(operator.subPlan.lhsPlan.schema().fields)
        rightAttributes = set(operator.subPlan.rhsPlan.schema().fields)
        rightExpress = []
        unpushedExpress = []

        for expr in selectExpress:
          select = ExpressionInfo(selectExpr).getAttributes()
          if select.issubset(leftAttributes):
            left.append(select)
          elif select.issubset(rightAttributes):
            right.append(select)
          else:
            unpushedExpress.append(select)


        if leftExpress:
          newExpression = ' and '.join(leftExpress)
          #lSelect
          op.subPlan.lhsPlan = self.pushdownHelper(Select(operator.subPlan.lhsPlan, newExpression))

        if rightExpress:
          newExpression = ' and '.join(rightExpress)
          op.subPlan.rhsPlan = self.pushdownHelper(Select(operator.subPlan.rhsPlan, newExpression))

        if unpushedExpress:
          return Select(operator.subPlan, ' and '.join(unpushedExpress))

        else:
          return operator
        return operator.subPlan

    elif operatorType == "UnionAll" or operatorType[-4:] == "Join":
      operator.lhsPlan = self.pushdownHelper(operator.lhsPlan)
      operator.rhsPlan = self.pushdownHelper(operator.rhsPlan)
      return operator

    elif operatorType == "GroupBy" or operatorType == "Sort":
      operator.subPlan = self.pushdownHelper(operator.subPlan)
      return operator