コード例 #1
0
ファイル: Optimizer.py プロジェクト: yliu120/dbsystem
    def pushdownProjections(self, operator):

        if operator.operatorType() == "TableScan":
            return operator
        elif (operator.operatorType() == "Select"
              or operator.operatorType() == "GroupBy"):
            newSubPlan = self.pushdownProjections(operator.subPlan)
            operator.subPlan = newSubPlan
            return operator
        elif (operator.operatorType() == "UnionAll"
              or operator.operatorType()[-4:] == "Join"):
            newlPlan = self.pushdownProjections(operator.lhsPlan)
            newrPlan = self.pushdownProjections(operator.rhsPlan)
            operator.lhsPlan = newlPlan
            operator.rhsPlan = newrPlan
            return operator
        else:
            subPlan = operator.subPlan
            if subPlan.operatorType() == "TableScan":
                return operator
            elif subPlan.operatorType() == "Select":
                subSubPlan = subPlan.subPlan
                operator.subPlan = subSubPlan
                subPlan.subPlan = operator
                return self.pushdownProjections(subPlan)
            elif subPlan.operatorType() == "GroupBy":
                newSubSubPlan = self.pushdownProjections(subPlan.subPlan)
                subPlan.subPlan = newSubSubPlan
                return operator
            elif subPlan.operatorType() == "Project":
                # Note that here we need to combine two projections
                # We assume that the upper projection must be based on the outputschema
                # of the lower one;
                subRepExp = {
                    k: v1
                    for (k, (v1, _)) in subPlan.projectExprs.items()
                }
                newExpr = dict()

                # Combine projections
                # TODO: Here we don't guarantee 100% success of replacement
                for (k, (v1, v2)) in operator.projectExprs.items():
                    newV1 = v1
                    for (key, value) in subRepExp.items():
                        newV1 = newV1.replace(key, value)
                    newExpr[k] = (newV1, v2)

                # Reorder the projection operator
                operator.projectExprs = newExpr
                operator.outputSchema  = DBSchema(operator.relationId(), \
                                  [(k, v[1]) for (k,v) in operator.projectExprs.items()])
                operator.subPlan = subPlan.subPlan
                return self.pushdownProjections(operator)
            elif subPlan.operatorType() == "UnionAll":
                # For Union operator, the push down is very simple
                subPlan.lhsPlan = Project(subPlan.lhsPlan,
                                          operator.projectExprs)
                subPlan.rhsPlan = Project(subPlan.rhsPlan,
                                          operator.projectExprs)
                subPlan.validateSchema()
                del operator
                return self.pushdownProjections(subPlan)
            else:
                # Here we deal with the Join Case
                # This is a lot harder than other cases
                # The first step is to collect input fields needed directly.
                # We grab out the fields in the projectExprs first
                # and then filter them with the project inputSchema
                fields = set()
                outputNames = [
                    k for (k, (v1, _)) in operator.projectExprs.items()
                ]
                inputNames = operator.inputSchemas()[0].fields
                lhsPlanNames = subPlan.lhsPlan.schema().fields
                rhsPlanNames = subPlan.rhsPlan.schema().fields

                for (k, (v1, _)) in operator.projectExprs.items():
                    attributes = ExpressionInfo(v1).getAttributes()
                    # filter attributes
                    for name in attributes:
                        if name not in inputNames:
                            attributes.remove(name)
                    fields = fields.union(attributes)

                # collecting join condition fields;
                if subPlan.joinMethod == "nested-loops" or subPlan.joinMethod == "block-nested-loops":
                    fields = fields.union(
                        ExpressionInfo(subPlan.joinExpr).getAttributes())
                elif subPlan.joinMethod == "hash":
                    fields = fields.union(
                        set(subPlan.lhsKeySchema.fields +
                            subPlan.rhsKeySchema.fields))
                else:
                    # We don't support indexed
                    raise NotImplementedError

                # constructing virtual l and r projections
                lprojectExpr = dict()
                rprojectExpr = dict()
                for (f, v) in subPlan.lhsPlan.schema().schema():
                    if f in fields:
                        lprojectExpr[f] = (f, v)
                for (f, v) in subPlan.rhsPlan.schema().schema():
                    if f in fields:
                        rprojectExpr[f] = (f, v)

                if len(lprojectExpr) != len(lhsPlanNames):
                    subPlan.lhsPlan = Project(subPlan.lhsPlan, lprojectExpr)
                    subPlan.lhsPlan.outputSchema  = DBSchema(subPlan.lhsPlan.relationId(), \
                                    [(k, v[1]) for (k,v) in subPlan.lhsPlan.projectExprs.items()])

                if len(rprojectExpr) != len(rhsPlanNames):
                    subPlan.rhsPlan = Project(subPlan.rhsPlan, rprojectExpr)
                    subPlan.rhsPlan.outputSchema  = DBSchema(subPlan.rhsPlan.relationId(), \
                                    [(k, v[1]) for (k,v) in subPlan.rhsPlan.projectExprs.items()])

                if subPlan.validateJoin():
                    subPlan.initializeSchema()
                # push down project through join
                operator.subPlan = self.pushdownProjections(subPlan)
                return operator