コード例 #1
0
ファイル: Optimizer.py プロジェクト: Brinews/jacky
	def pushdownProject(self, op):
		# First pushdown operators below:
		op.subPlan = self.pushdownOperator(op.subPlan)

		if op.subPlan.operatorType() in ["GroupBy", "TableScan"]:
			return op

		elif op.subPlan.operatorType() == "Project":
			# Attempt to remove redundant projections:
			bools = [op.subPlan.projectExprs[key][0].isAttribute() for key in op.projectExprs]
			if False not in bools:
				op.subPlan = op.subPlan.subPlan
			return self.pushdownOperator(op)

		elif op.subPlan.operatorType() == "Select":
			# Move op below its subplan if op provides all attributes needed for the selectExpr
			selectAttrs = ExpressionInfo(op.subPlan.selectExpr).getAttributes()
			outputAttrs = set(op.projectExprs.keys())
			result = op
			if selectAttrs.issubset(outputAttrs):
				result = op.subPlan
				op.subPlan = result.subPlan
				result.subPlan = self.pushdownOperator(op)
			return result

		elif op.subPlan.operatorType() == "Sort":
			return op

		elif op.subPlan.operatorType() == "UnionAll":
			# Place a copy of op on each side of the union
			result = op.subPlan
			result.lhsPlan = self.pushdownOperator(Project(result.lhsPlan, op.projectExprs))
			result.rhsPlan = self.pushdownOperator(Project(result.rhsPlan, op.projectExprs))
			return result

		elif "Join" in op.subPlan.operatorType():
			# Partition the projections among the input relations, as much as possible
			lhsAttrs = set(op.subPlan.lhsPlan.schema().fields)
			rhsAttrs = set(op.subPlan.rhsPlan.schema().fields)
			lhsProjectExprs = {}
			rhsProjectExprs = {}
			remainingProjectExprs = False

			for attr in op.projectExprs:
				requiredAttrs = ExpressionInfo(op.projectExprs[attr][0]).getAttributes()
				if requiredAttrs.issubset(lhsAttrs):
					lhsProjectExprs[attr] = op.projectExprs[attr]
				elif requiredAttrs.issubset(rhsAttrs):
					rhsProjectExprs[attr] = op.projectExprs[attr]
				else:
					remainingProjectExprs = True

			if lhsProjectExprs:
				op.subPlan.lhsPlan = self.pushdownOperator(Project(op.subPlan.lhsPlan, lhsProjectExprs))
			if rhsProjectExprs:
				op.subPlan.rhsPlan = self.pushdownOperator(Project(op.subPlan.rhsPlan, rhsProjectExprs))

			result = op
			# Remove op from the tree if there are no remaining project expressions, and each side of the join recieved a projection
			if not remainingProjectExprs and lhsProjectExprs and rhsProjectExprs:
				result = op.subPlan
			return result
		else:
			print("Unmatched operatorType in pushdownOperator(): " + op.operatorType())
			raise NotImplementedError
コード例 #2
0
ファイル: Optimizer.py プロジェクト: Brinews/jacky
	def pushdownSelect(self, op):
		# First pushdown operators below:
		op.subPlan = self.pushdownOperator(op.subPlan)

		if op.subPlan.operatorType() in ["GroupBy", "TableScan", "Project"]:
			return op

		elif op.subPlan.operatorType() == "Select":
			# Reorder two selects based on 'score'
			useEstimated = True
			opScore = (1 - op.selectivity(useEstimated)) / op.tupleCost
			childScore = (1 - op.subPlan.selectivity(useEstimated)) / op.tupleCost

			result = op
			if childScore > opScore:
				result = op.subPlan
				op.subPlan = result.subPlan
				result.subPlan = self.pushdownOperator(op)
			return result

		elif op.subPlan.operatorType() == "Sort":
			# Always move a select below a sort
			result = op.subPlan
			op.subPlan = result.subPlan
			result.subPlan = self.pushdownOperator(op)
			return result

		elif op.subPlan.operatorType() == "UnionAll":
			# Place a copy of op on each side of the union
			result = op.subPlan
			result.lhsPlan = self.pushdownOperator(Select(result.lhsPlan, op.selectExpr))
			result.rhsPlan = self.pushdownOperator(Select(result.rhsPlan, op.selectExpr))
			return result

		elif "Join" in op.subPlan.operatorType():
			# Partition the select expr as much as possible
			exprs = ExpressionInfo(op.selectExpr).decomposeCNF()
			lhsExprs = []
			rhsExprs = []
			remainingExprs = []

			lhsAttrs = set(op.subPlan.lhsPlan.schema().fields)
			rhsAttrs = set(op.subPlan.rhsPlan.schema().fields)

			for e in exprs:
				attrs = ExpressionInfo(e).getAttributes()
				if attrs.issubset(lhsAttrs):
					lhsExprs.append(e)
				elif attrs.issubset(rhsAttrs):
					rhsExprs.append(e)
				else:
					remainingExprs.append(e)

			if lhsExprs:
				newLhsExpr = ' and '.join(lhsExprs)
				lhsSelect = Select(op.subPlan.lhsPlan, newLhsExpr)
				op.subPlan.lhsPlan = self.pushdownOperator(lhsSelect)

			if rhsExprs:
				newRhsExpr = ' and '.join(rhsExprs)
				rhsSelect = Select(op.subPlan.rhsPlan, newRhsExpr)
				op.subPlan.rhsPlan = self.pushdownOperator(rhsSelect)

			result = None
			if remainingExprs:
				newExpr = ' and '.join(remainingExprs)
				result = Select(op.subPlan, newExpr)
			else:
				result = op.subPlan

			return result
		else:
			print("Unmatched operatorType in pushdownOperator(): " + op.operatorType())
			raise NotImplementedError
コード例 #3
0
ファイル: Optimizer2.py プロジェクト: SamBrayman/DB
    def pushdownSelect(self, op):
        # First pushdown operators below:
        op.subPlan = self.pushdownOperator(op.subPlan)

        if op.subPlan.operatorType() in ["GroupBy", "TableScan", "Project"]:
            return op

        elif op.subPlan.operatorType() == "Select":
            # Reorder two selects based on 'score'
            #useEstimated = True
            #opScore = (1.0 - op.selectivity(useEstimated)) / op.tupleCost
            #childScore = (1.0 - op.subPlan.selectivity(useEstimated)) / op.tupleCost

            result = op
            #if childScore > myScore:
            result = op.subPlan
            op.subPlan = result.subPlan
            result.subPlan = self.pushdownOperator(op)
            return result

        elif op.subPlan.operatorType() == "Sort":
            # Always move a select below a sort
            result = op.subPlan
            op.subPlan = result.subPlan
            result.subPlan = self.pushdownOperator(op)
            return result

        elif op.subPlan.operatorType() == "UnionAll":
            # Place a copy of op on each side of the union
            result = op.subPlan
            result.lhsPlan = self.pushdownOperator(
                Select(result.lhsPlan, op.selectExpr))
            result.rhsPlan = self.pushdownOperator(
                Select(result.rhsPlan, op.selectExpr))
            return result

        elif "Join" in op.subPlan.operatorType():
            # Partition the select expr as much as possible
            exprs = ExpressionInfo(op.selectExpr).decomposeCNF()
            lhsExprs = []
            rhsExprs = []
            remainingExprs = []

            lhsAttrs = set(op.subPlan.lhsPlan.schema().fields)
            rhsAttrs = set(op.subPlan.rhsPlan.schema().fields)

            for e in exprs:
                attrs = ExpressionInfo(e).getAttributes()
                if attrs.issubset(lhsAttrs):
                    lhsExprs.append(e)
                elif attrs.issubset(rhsAttrs):
                    rhsExprs.append(e)
                else:
                    remainingExprs.append(e)

            if lhsExprs:
                newLhsExpr = ' and '.join(lhsExprs)
                lhsSelect = Select(op.subPlan.lhsPlan, newLhsExpr)
                op.subPlan.lhsPlan = self.pushdownOperator(lhsSelect)

            if rhsExprs:
                newRhsExpr = ' and '.join(rhsExprs)
                rhsSelect = Select(op.subPlan.rhsPlan, newRhsExpr)
                op.subPlan.rhsPlan = self.pushdownOperator(rhsSelect)

            result = None
            if remainingExprs:
                newExpr = ' and '.join(remainingExprs)
                result = Select(op.subPlan, newExpr)
            else:
                result = op.subPlan

            return result
        else:
            print("Unmatched operatorType in pushdownOperator(): " +
                  op.operatorType())
            raise NotImplementedError
コード例 #4
0
ファイル: Optimizer2.py プロジェクト: SamBrayman/DB
    def pushdownProject(self, op):
        # First pushdown operators below:
        op.subPlan = self.pushdownOperator(op.subPlan)

        if op.subPlan.operatorType() in ["GroupBy", "TableScan"]:
            return op

        elif op.subPlan.operatorType() == "Project":
            # Attempt to remove redundant projections:
            bools = [
                op.subPlan.projectExprs[key][0].isAttribute()
                for key in op.projectExprs
            ]
            if False not in bools:
                op.subPlan = op.subPlan.subPlan
            return self.pushdownOperator(op)

        elif op.subPlan.operatorType() == "Select":
            # Move op below its subplan if op provides all attributes needed for the selectExpr
            selectAttrs = ExpressionInfo(op.subPlan.selectExpr).getAttributes()
            outputAttrs = set(op.projectExprs.keys())
            result = op
            if selectAttrs.issubset(outputAttrs):
                result = op.subPlan
                op.subPlan = result.subPlan
                result.subPlan = self.pushdownOperator(op)
            return result

        elif op.subPlan.operatorType() == "Sort":
            # TODO
            return op

        elif op.subPlan.operatorType() == "UnionAll":
            # Place a copy of op on each side of the union
            result = op.subPlan
            result.lhsPlan = self.pushdownOperator(
                Project(result.lhsPlan, op.projectExprs))
            result.rhsPlan = self.pushdownOperator(
                Project(result.rhsPlan, op.projectExprs))
            return result

        elif "Join" in op.subPlan.operatorType():
            # Partition the projections among the input relations, as much as possible
            lhsAttrs = set(op.subPlan.lhsPlan.schema().fields)
            rhsAttrs = set(op.subPlan.rhsPlan.schema().fields)
            lhsProjectExprs = {}
            rhsProjectExprs = {}
            remainingProjectExprs = False

            for attr in op.projectExprs:
                requiredAttrs = ExpressionInfo(
                    op.projectExprs[attr][0]).getAttributes()
                if requiredAttrs.issubset(lhsAttrs):
                    lhsProjectExprs[attr] = op.projectExprs[attr]
                elif requiredAttrs.issubset(rhsAttrs):
                    rhsProjectExprs[attr] = op.projectExprs[attr]
                else:
                    remainingProjectExprs = True

            if lhsProjectExprs:
                op.subPlan.lhsPlan = self.pushdownOperator(
                    Project(op.subPlan.lhsPlan, lhsProjectExprs))
            if rhsProjectExprs:
                op.subPlan.rhsPlan = self.pushdownOperator(
                    Project(op.subPlan.rhsPlan, rhsProjectExprs))

            result = op
            # Remove op from the tree if there are no remaining project expressions, and each side of the join recieved a projection
            if not remainingProjectExprs and lhsProjectExprs and rhsProjectExprs:
                result = op.subPlan
            return result
        else:
            print("Unmatched operatorType in pushdownOperator(): " +
                  op.operatorType())
            raise NotImplementedError
コード例 #5
0
  def pushdownHelper(self, operator):
    #first determine operator type
    opertorType = operator.operatorType()

    #first check if valid operatorType
    if operatorType != "Project" and operatorType != "Select" and operatorType != "GroupBy" and operatorType != "Sort" and operatorType != "UnionAll" and operatorType[-4:] != "Join":
      return operator

    elif operatorType == "Project":
      operator.subPlan = self.pushdownHelper(operator.subPlan)
      subplanType = operator.subPlan.operatorType()

      #call second helper
      if subplanType == "Select":

        '''
        Check keys - if not in keys, cannot pushdown anymore
        '''
        for select in ExpressionInfo(operator.subPlan.selectExpr).getAttributes():
          keys = operator.projectExprs.keys()
          if select not in keys:
            return operator

        operator.subPlan = operator.subPlan.subPlan
        operator.subPlan.subPlan = self.pushdownHelper(operator)

      elif subplanType[-4:] == "Join":

        items = operator.projectExprs.items()

        right = operator.subPlan.rhsPlan.schema().fields
        rightProject = {}

        left = operator.subPlan.lhsPlan.schema().fields
        leftProject = {}

        for (attribute, (expr, rand)) in items:
          pros = ExpressionInfo(expr)getAttributes()

          result = True
          #left
          for e in pros:
            if e not in left:
              result = False

          # if True
          if result:
            leftProject[attribute] = operator.projectExprs[attribute]
            continue

          #repeat with right now
          result = True
          for e in pros:
            if e not in right:
              result = False

          if result:
            rightProject[attribute] = operator.projectExprs[attribute]

        #end for

        #if left dictionary not empty
        #remember empty dic evaluates to false
        if leftProject:
          lPlan = operator.subPlan.lhsPlan
          operator.subPlan.lhsPlan = self.pushdownHelper(Project(lPlan, leftProject))

        if rightProject:
          rPlan = operator.subPlan.rhsPlan
          operator.subPlan.rhsPlan = self.pushdownHelper(Project(rPlan, rightProject))


        #length check - must be same size iIOT pushdown
        fullSize = len(operator.projectExprs)
        rightSize = len(rightProject)
        leftSize = len(leftProject)

        if fullSize != (rightSize + leftSize):
          return operator

      #end subPlan "Join"

      elif subplanType == "UnionAll":
        tempLeft = Project(operator.subPlan.lhsPlan)
        tempRight = Project(operator.subPlan.rhsPlan)

        operator.subPlan.lhsPlan = self.pushdownHelper(tempLeft, operator.projectExprs)
        operator.subPlan.rhsPlan = self.pushdownHelper(tempRight, operator.projectExprs)

      #else not Join or Union
      else:
        return operator

      return operator.subPlan

    #end "Project"

    #safety check above, so operatorType must be "Select"
    elif operatorType == "Select":

      #first part same as with "Project": subPlan pushdown
      operator.subPlan = self.pushdownHelper(operator.subPlan)
      subplanType = operator.subPlan.operatorType()

      if subplanType == "Sort" or "sort":
        operator.subPlan = operator.subPlan.subPlan
        operator.subPlan.subPlan = self.pushdownHelper(operator)
      elif subplanType[-4:] == "Join":

        selectExpress = ExpressionInfo(operator.selectExpr).decomposeCNF()



        left = operator.subPlan.lhsPlan.schema().fields
        right = operator.subPlan.rhsPlan.schema().fields
        leftExpress = []
        leftAttributes = set(operator.subPlan.lhsPlan.schema().fields)
        rightAttributes = set(operator.subPlan.rhsPlan.schema().fields)
        rightExpress = []
        unpushedExpress = []

        for expr in selectExpress:
          select = ExpressionInfo(selectExpr).getAttributes()
          if select.issubset(leftAttributes):
            left.append(select)
          elif select.issubset(rightAttributes):
            right.append(select)
          else:
            unpushedExpress.append(select)


        if leftExpress:
          newExpression = ' and '.join(leftExpress)
          #lSelect
          op.subPlan.lhsPlan = self.pushdownHelper(Select(operator.subPlan.lhsPlan, newExpression))

        if rightExpress:
          newExpression = ' and '.join(rightExpress)
          op.subPlan.rhsPlan = self.pushdownHelper(Select(operator.subPlan.rhsPlan, newExpression))

        if unpushedExpress:
          return Select(operator.subPlan, ' and '.join(unpushedExpress))

        else:
          return operator
        return operator.subPlan

    elif operatorType == "UnionAll" or operatorType[-4:] == "Join":
      operator.lhsPlan = self.pushdownHelper(operator.lhsPlan)
      operator.rhsPlan = self.pushdownHelper(operator.rhsPlan)
      return operator

    elif operatorType == "GroupBy" or operatorType == "Sort":
      operator.subPlan = self.pushdownHelper(operator.subPlan)
      return operator