Beispiel #1
0
    def getSafeQueryPlan(self, init=True):
        # we are interested in whether the separator represents
        # a generic inequality constraint - if so, it doesn't matter which
        # component it occurs in, as it must have the same
        # constraint everywhere

        representativeSeparator = self.separator[0]
        if representativeSeparator.isInequality() and representativeSeparator.getInequalityConstraint().isGeneric():
            self.replacementVal = "generic_%s" % representativeSeparator.getInequalityConstraint().getConstant()
        else:
            self.replacementVal = algorithm.attCounter()

        # for webkb
        if representativeSeparator.domainSize:
            self.effectiveDomainSize = representativeSeparator.domainSize

        d = self.disjunctiveQuery

        self.usedSeparatorVars = d.getUsedSeparators()

        d.applySeparator(self.separator, self.replacementVal)
        if isinstance(d, query_exp.DisjunctiveQuery):
            cons = []
            for c in d.getComponents():
                cons.append(query_exp.ConjunctiveQuery(query_exp.decomposeComponent(c)))
            self.childDNF = query_exp.DNF(cons)
        else:
            self.childDNF = d

        if init:
            self.child = algorithm.getSafeOpenQueryPlanNaive(self.childDNF)
            symbolString = str(algorithm.dom) + "_" + str(representativeSeparator)
            self.lam = symbols(symbolString)* self.child.lam
        else:
            self.child = None
Beispiel #2
0
    def generateSQL_DNF(self, separatorSubs=None):
        #print separatorSubs
        if separatorSubs is None:
            separatorSubs = []
        replacementVal = algorithm.attCounter()

        groupBy = ["c%d" % i for (i, x) in separatorSubs]
        if len(groupBy):
            groupByString = 'group by ' + ', '.join(groupBy)
        else:
            groupByString = ''
        selectString = ', '.join(groupBy + ["l_ior_n(COALESCE(pUse,0))"])
        separatorSubs.append((self.replacementVal, self.separator))

        childSQL = self.child.generateSQL_DNF(separatorSubs[:])
        sql = "\n select %s as pUse from (%s\n) as q%d \n %s \n" % (selectString, childSQL, algorithm.counter(), groupByString)

        if self.child.hasGenericConstant():
            genericConstantStr = self.child.getGenericConstantStr()
            self.genericConstantStr = genericConstantStr
            groupBy.append(genericConstantStr)
            groupByString = 'group by ' + ', '.join(groupBy)
            sql = "\n select %s, %s as pUse from (%s\n) as q%d \n %s \n" % (genericConstantStr, selectString, childSQL, algorithm.counter(), groupByString)
        else:
            #Ground Tuple: self.child.groundTuple.name
            add_line = ','.join(groupBy + ["pUse + <%s> * (<%s_%s> - ct ) as pUse" % (self.child.lam, algorithm.dom, separatorSubs[-1][1][0])])
            sql = "\n select %s from (\n select %s as pUse, count(*) as ct from (%s\n) as q%d \n %s \n) as q%d \n" % (add_line, selectString, childSQL, algorithm.counter(), groupByString, algorithm.counter())

        return sql
Beispiel #3
0
    def generateSQL_DNF(self, separatorSubs=None):
        if separatorSubs is None:
            separatorSubs = []
        replacementVal = algorithm.attCounter()

        groupBy = ["c%d" % i for (i, x) in separatorSubs]
        if len(groupBy):
            groupByString = 'group by ' + ', '.join(groupBy)
        else:
            groupByString = ''
        selectString = ', '.join(groupBy + ['ior(COALESCE(pUse,0))'])
        separatorSubs.append((self.replacementVal, self.separator))

        childSQL = self.child.generateSQL_DNF(separatorSubs[:])
        sql = "\n -- independent project \n select %s as pUse from (%s) as q%d %s " % (
            selectString, childSQL, algorithm.counter(), groupByString)

        if self.child.hasGenericConstant():
            genericConstantStr = self.child.getGenericConstantStr()
            self.genericConstantStr = genericConstantStr
            groupBy.append(genericConstantStr)
            groupByString = 'group by ' + ', '.join(groupBy)
            sql = "\n -- independent project \n select %s, %s as pUse from (%s) as q%d %s " % (
                genericConstantStr, selectString, childSQL,
                algorithm.counter(), groupByString)
        else:
            sql = "\n -- independent project \n select %s as pUse from (%s) as q%d %s " % (
                selectString, childSQL, algorithm.counter(), groupByString)

        return sql
Beispiel #4
0
    def generateSQL_DNF(self, separatorSubs=None):
        if separatorSubs is None:
            separatorSubs = []
        replacementVal = algorithm.attCounter()

        groupBy = ["c%d" % i for (i, x) in separatorSubs]
        if len(groupBy):
            groupByString = 'group by ' + ', '.join(groupBy)
        else:
            groupByString = ''
        selectString = ', '.join(groupBy + ['ior(COALESCE(pUse,0))'])
        separatorSubs.append((self.replacementVal, self.separator))

        childSQL = self.child.generateSQL_DNF(separatorSubs[:])
        sql = "\n -- independent project \n select %s as pUse from (%s) as q%d %s " % (
            selectString, childSQL, algorithm.counter(), groupByString)

        if self.child.hasGenericConstant():
            genericConstantStr = self.child.getGenericConstantStr()
            self.genericConstantStr = genericConstantStr
            groupBy.append(genericConstantStr)
            groupByString = 'group by ' + ', '.join(groupBy)
            sql = "\n -- independent project \n select %s, %s as pUse from (%s) as q%d %s " % (
                genericConstantStr, selectString, childSQL, algorithm.counter(), groupByString)
        else:
            sql = "\n -- independent project \n select %s as pUse from (%s) as q%d %s " % (
                selectString, childSQL, algorithm.counter(), groupByString)

        return sql
Beispiel #5
0
    def getSafeQueryPlan(self, init=True):
        # we are interested in whether the separator represents
        # a generic inequality constraint - if so, it doesn't matter which
        # component it occurs in, as it must have the same
        # constraint everywhere

        representativeSeparator = self.separator[0]
        if representativeSeparator.isInequality(
        ) and representativeSeparator.getInequalityConstraint().isGeneric():
            self.replacementVal = "generic_%s" % representativeSeparator.getInequalityConstraint(
            ).getConstant()
        else:
            self.replacementVal = algorithm.attCounter()

        # for webkb
        if representativeSeparator.domainSize:
            self.effectiveDomainSize = representativeSeparator.domainSize

        d = self.disjunctiveQuery

        self.usedSeparatorVars = d.getUsedSeparators()

        d.applySeparator(self.separator, self.replacementVal)
        if isinstance(d, query_exp.DisjunctiveQuery):
            cons = []
            for c in d.getComponents():
                cons.append(
                    query_exp.ConjunctiveQuery(
                        query_exp.decomposeComponent(c)))
            self.childDNF = query_exp.DNF(cons)
        else:
            self.childDNF = d

        if init:
            self.child = algorithm.getSafeQueryPlan(self.childDNF)
        else:
            self.child = None
Beispiel #6
0
    def generateSQL_CNF(self, params):
        replacementVal = algorithm.attCounter()

        childSQL = self.child.generateSQL_CNF(params)
        self.trueOnMissing = self.child.trueOnMissing

        self.genericIdentifiers = self.child.genericIdentifiers.copy()
        subqueryAlias = 'q%d' % algorithm.counter()

        # this steps replaces a universally (\forall) quantified variable
        # with a product - if some tuples can be missing, we need to count
        # and invalidate any products with too few terms (missing terms =
        # false terms)
        if params['missingTuples']:

            # for webkb
            if self.effectiveDomainSize:
                effectiveDomainSize = self.effectiveDomainSize
            else:
                effectiveDomainSize = params['domainSize']
            if self.isInequalityVar(self.separator):
                effectiveDomainSize = effectiveDomainSize - 1

        groupByAttributes = []
        selectAttributes = []
        joinClause = ''

        # take care of generic constants, which have been projected out but
        # must still be "bubbled up" to the previous level
        for genericIdentifier in self.genericIdentifiers:
            groupByAttributes.append(genericIdentifier)
            selectAttributes.append(genericIdentifier)

        if self.isGenericInequalityVar(self.separator):
            genericConstantIdentifier = "sep_var_%s" % str(self.replacementVal)
            selectAttributes.append('A.v0 as %s' % genericConstantIdentifier)
            joinClause = ', A WHERE A.v0 != %s.%s' % (subqueryAlias, genericConstantIdentifier)
            groupByAttributes.append('A.v0')
            self.genericIdentifiers.add(genericConstantIdentifier)

        for storedReplacementVal in self.usedSeparatorVars:
            selectAttributes.append("sep_var_%s" % str(storedReplacementVal))
            groupByAttributes.append("sep_var_%s" % str(storedReplacementVal))

        if len(groupByAttributes):
            groupByClause = 'group by ' + (', ' . join(groupByAttributes))
        else:
            groupByClause = ''

        havingClause = ''
        if params['useLog']:
            if self.trueOnMissing:
                if params['useNull']:
                    selectAttributes.append('CASE WHEN COUNT(*) = COUNT(pUse) THEN SUM(pUse) ELSE NULL END AS pUse')
                    # if the result is empty and trueOnMissing=True, we should
                    # return empty set (true)
                    havingClause = ' HAVING COUNT(*) > 0'
                else:
                    selectAttributes.append('SUM(pUse) AS pUse')
            else:
                if params['useNull']:
                    if params['missingTuples']:
                        selectAttributes.append('CASE WHEN COUNT(*) = COUNT(pUse) and COUNT(*) = %d THEN SUM(pUse) ELSE NULL END AS pUse' % effectiveDomainSize)
                    else:
                        selectAttributes.append('CASE WHEN COUNT(*) = COUNT(pUse) THEN SUM(pUse) ELSE NULL END AS pUse')
                else:
                    if params['missingTuples']:
                        selectAttributes.append("CASE WHEN COUNT(*) = %d THEN SUM(pUse) ELSE '-Infinity' END AS pUse" % effectiveDomainSize)
                    else:
                        selectAttributes.append('SUM(pUse) AS pUse')
        else:
            if not self.trueOnMissing and params['missingTuples']:
                selectAttributes.append("CASE WHEN COUNT(*) = %d THEN prod_double(pUse) ELSE 0 END AS pUse" % effectiveDomainSize)
            else:
                selectAttributes.append('prod_double(pUse) AS pUse')
        selectClause = ', '.join(selectAttributes)

        sql = "\n select %s from (%s) as %s%s %s %s" % (selectClause, childSQL, subqueryAlias, joinClause, groupByClause, havingClause)
        return sql
Beispiel #7
0
    def generateSQL_CNF(self, params):
        replacementVal = algorithm.attCounter()

        childSQL = self.child.generateSQL_CNF(params)
        self.trueOnMissing = self.child.trueOnMissing

        self.genericIdentifiers = self.child.genericIdentifiers.copy()
        subqueryAlias = 'q%d' % algorithm.counter()

        # this steps replaces a universally (\forall) quantified variable
        # with a product - if some tuples can be missing, we need to count
        # and invalidate any products with too few terms (missing terms =
        # false terms)
        if params['missingTuples']:

            # for webkb
            if self.effectiveDomainSize:
                effectiveDomainSize = self.effectiveDomainSize
            else:
                effectiveDomainSize = params['domainSize']
            if self.isInequalityVar(self.separator):
                effectiveDomainSize = effectiveDomainSize - 1

        groupByAttributes = []
        selectAttributes = []
        joinClause = ''

        # take care of generic constants, which have been projected out but
        # must still be "bubbled up" to the previous level
        for genericIdentifier in self.genericIdentifiers:
            groupByAttributes.append(genericIdentifier)
            selectAttributes.append(genericIdentifier)

        if self.isGenericInequalityVar(self.separator):
            genericConstantIdentifier = "sep_var_%s" % str(self.replacementVal)
            selectAttributes.append('A.v0 as %s' % genericConstantIdentifier)
            joinClause = ', A WHERE A.v0 != %s.%s' % (
                subqueryAlias, genericConstantIdentifier)
            groupByAttributes.append('A.v0')
            self.genericIdentifiers.add(genericConstantIdentifier)

        for storedReplacementVal in self.usedSeparatorVars:
            selectAttributes.append("sep_var_%s" % str(storedReplacementVal))
            groupByAttributes.append("sep_var_%s" % str(storedReplacementVal))

        if len(groupByAttributes):
            groupByClause = 'group by ' + (', ' . join(groupByAttributes))
        else:
            groupByClause = ''

        havingClause = ''
        if params['useLog']:
            if self.trueOnMissing:
                if params['useNull']:
                    selectAttributes.append(
                        'CASE WHEN COUNT(*) = COUNT(pUse) THEN SUM(pUse) ELSE NULL END AS pUse')
                    # if the result is empty and trueOnMissing=True, we should
                    # return empty set (true)
                    havingClause = ' HAVING COUNT(*) > 0'
                else:
                    selectAttributes.append('SUM(pUse) AS pUse')
            else:
                if params['useNull']:
                    if params['missingTuples']:
                        selectAttributes.append(
                            'CASE WHEN COUNT(*) = COUNT(pUse) and COUNT(*) = %d THEN SUM(pUse) ELSE NULL END AS pUse' %
                            effectiveDomainSize)
                    else:
                        selectAttributes.append(
                            'CASE WHEN COUNT(*) = COUNT(pUse) THEN SUM(pUse) ELSE NULL END AS pUse')
                else:
                    if params['missingTuples']:
                        selectAttributes.append(
                            "CASE WHEN COUNT(*) = %d THEN SUM(pUse) ELSE '-Infinity' END AS pUse" %
                            effectiveDomainSize)
                    else:
                        selectAttributes.append('SUM(pUse) AS pUse')
        else:
            if not self.trueOnMissing and params['missingTuples']:
                selectAttributes.append(
                    "CASE WHEN COUNT(*) = %d THEN prod_double(pUse) ELSE 0 END AS pUse" %
                    effectiveDomainSize)
            else:
                selectAttributes.append('prod_double(pUse) AS pUse')
        selectClause = ', '.join(selectAttributes)

        sql = "\n -- independent project \n select %s from (%s) as %s%s %s %s" % (
            selectClause, childSQL, subqueryAlias, joinClause, groupByClause, havingClause)
        return sql