Ejemplo n.º 1
0
    def default(self, line):
        try:
            safeSampleEstimates = []
            safeSampleTimes = []
            karpLubyEstimates = []
            karpLubyTimes = []
            naiveEstimates = []
            naiveTimes = []
            exactProb = 0
            queryStr = line
            queryDNF = parse(queryStr)
            print "Query: ", queryDNF
            try:
                plan = getPlan(queryDNF)
                querySQL = plan.generateSQL_DNF()
                print algorithm.getPrettySQL(querySQL), "\n"
                if self.graphQueryPlan:
                    drawTree(plan, self.graphQueryPlanFile)
                    print "\nQuery plan saved to %s" % self.graphQueryPlanFile
                if self.execSQL:
                    exactProb = printProbability(querySQL)
            except algorithm.UnsafeException:
                if self.sample:
                    try:
                        startTime = time.time()
                        print ("Query unsafe, ",
                               "trying to find safe residual query")
                        (relationsToSample,
                         residualDNF,
                         querySQL,
                         relsObjects) = algorithm.\
                            findSafeResidualQuery(queryDNF)
                        print ("Relations to sample: ",
                               ', '.join(relationsToSample))
                        print "Residual Query: ", residualDNF
                        print algorithm.getPrettySQL(querySQL), "\n"
                        if self.execSQL:
                            ssExecutor = safe.SafeSample(conn)
                            estimate = ssExecutor.safeSample(
                                relationsToSample,
                                relsObjects,
                                querySQL,
                                self.numSamples,
                                self.epsilon,
                                self.delta)
                            print "SafeSample Estimate:", estimate
                            safeSampleTimes = ssExecutor.sampleTimes
                            print "SafeSample Total Time: %f seconds" % (
                                time.time() - startTime)
                            print "SafeSample Mean Sample Time: %f seconds" % (
                                sum(safeSampleTimes) /
                                float(len(safeSampleTimes)))
                            safeSampleEstimates = ssExecutor.getEstimates()
                            print "SafeSample Number of Samples: " + \
                                   "%d (%d + %d to estimate variance)" % (
                                len(safeSampleEstimates) +
                                ssExecutor.step2NumSamples,
                                len(safeSampleEstimates),
                                ssExecutor.step2NumSamples)
                    except algorithm.UnsafeException:
                        print "Error: No safe residual query found"
                else:
                    print "Query is unsafe"

            if self.naive:
                print ""
                startTime = time.time()
                naiveExecutor = naive.NaiveSampler(conn)
                estimate = naiveExecutor.naiveSample(queryDNF, self.numSamples)
                print "Naive Sampler Estimate: %f (%d samples)" % (
                    estimate, self.numSamples)
                naiveEstimates = naiveExecutor.estimatesRecord
                naiveTimes = naiveExecutor.sampleTimes
                print "Naive Sampler Total Time: %f seconds" % (
                    time.time() - startTime)
                print "Naive Sampler Mean Sample Time: %f seconds" % (
                    sum(naiveTimes) / float(len(naiveTimes)))

            if self.karpluby:
                print ""
                startTime = time.time()
                klExecutor = karp_luby.KarpLuby(conn)
                estimate = klExecutor.karpLuby(
                    queryDNF, self.numSamples, self.epsilon, self.delta)
                print "Karp-Luby Estimate:", estimate
                karpLubyTimes = klExecutor.sampleTimes
                print "Karp-Luby Total Time: %f seconds" % (
                    time.time() - startTime)
                print "Karp-Luby Mean Sample Time: %f seconds" % (
                    sum(karpLubyTimes) / float(len(karpLubyTimes)))
                karpLubyEstimates = klExecutor.getEstimates()
                print "Karp-Luby Number of Samples: %d (%d + %d to estimate variance)" % (
                    len(karpLubyEstimates) + klExecutor.step2NumSamples,
                    len(karpLubyEstimates), klExecutor.step2NumSamples)
                print "Karp-Luby Stopping Rule Samples " + \
                       "(included in total above): %d" % (
                    klExecutor.step1NumSamples)

        except ParseError:
            print "Failed to parse"
Ejemplo n.º 2
0
    def prepareQuery(self, query):
        maxComponentRels = max(
            [len(c.getRelations()) for c in query.getConjuncts()])
        conjunctSQL = []
        for conjunct in query.getConjuncts():
            relations = conjunct.getRelations()

            relationNamesUsed = {}
            relationSQLIds = {}
            for rel in relations:
                relName = rel.getName()
                if relName in relationNamesUsed:
                    relationSQLIds[rel] = relName + \
                        str(relationNamesUsed[relName] + 1)
                    relationNamesUsed[relName] += 1
                else:
                    relationSQLIds[rel] = relName + str(1)
                    relationNamesUsed[relName] = 1

            relationNames = []
            for rel in relations:
                relName = rel.getName()
                relationNames.append("%s as %s" %
                                     (relName, relationSQLIds[rel]))
            relationsSQL = ', '.join(relationNames)

            relationsUsed = {}
            selectAtts = []
            equalityConstraints = []

            for rel in relations:
                if rel.isNegated():
                    selectAtts.append("'%s', -%s.id, %s.p" %
                                      (rel.getName(), relationSQLIds[rel],
                                       relationSQLIds[rel]))
                else:
                    selectAtts.append("'%s', %s.id, %s.p" %
                                      (rel.getName(), relationSQLIds[rel],
                                       relationSQLIds[rel]))
                for i, constant in enumerate(rel.getConstraints()):
                    if not constant:
                        continue
                    elif constant > 0:
                        equalityConstraints.append(
                            "%s.v%d = %d" % (relationSQLIds[rel], i, constant))
                    else:
                        equalityConstraints.append(
                            "%s.v%d != %d" %
                            (relationSQLIds[rel], i, -1 * constant))

            if len(relations) < maxComponentRels:
                for dummy in range(len(relations), maxComponentRels):
                    selectAtts.append("'', 0, 0")

            selectSQL = ', '.join(selectAtts)

            varPositions = [
                component.getVarPositions()
                for component in conjunct.getComponents()
            ]
            for componentVarPositions in varPositions:
                for v in componentVarPositions.keys():
                    relsWithVar = componentVarPositions[v].keys()

                    for i in range(len(relsWithVar) - 1):
                        positionsRelI = componentVarPositions[v][
                            relsWithVar[i]]
                        positionsRelIPlus1 = componentVarPositions[v][
                            relsWithVar[i + 1]]
                        for pos1 in positionsRelI:
                            for pos2 in positionsRelIPlus1:
                                equalityConstraints.append(
                                    "%s.v%d = %s.v%d" %
                                    (relationSQLIds[relsWithVar[i]],
                                     relsWithVar[i].getTableColumn(pos1),
                                     relationSQLIds[relsWithVar[i + 1]],
                                     relsWithVar[i + 1].getTableColumn(pos2)))

            if len(equalityConstraints):
                whereSQL = "WHERE %s" % ' AND '.join(equalityConstraints)
            else:
                whereSQL = ""

            conjunctSQL.append("SELECT %s FROM %s %s" %
                               (selectSQL, relationsSQL, whereSQL))

        lineageQuery = ' UNION '.join(conjunctSQL)
        print algorithm.getPrettySQL(lineageQuery)
        return lineageQuery
Ejemplo n.º 3
0
    def prepareQuery(self, query):
        maxComponentRels = max([len(c.getRelations())
                                for c in query.getConjuncts()])
        conjunctSQL = []
        for conjunct in query.getConjuncts():
            relations = conjunct.getRelations()

            relationNamesUsed = {}
            relationSQLIds = {}
            for rel in relations:
                relName = rel.getName()
                if relName in relationNamesUsed:
                    relationSQLIds[rel] = relName + \
                        str(relationNamesUsed[relName] + 1)
                    relationNamesUsed[relName] += 1
                else:
                    relationSQLIds[rel] = relName + str(1)
                    relationNamesUsed[relName] = 1

            relationNames = []
            for rel in relations:
                relName = rel.getName()
                relationNames.append(
                    "%s as %s" % (relName, relationSQLIds[rel]))
            relationsSQL = ', '.join(relationNames)

            relationsUsed = {}
            selectAtts = []
            equalityConstraints = []

            for rel in relations:
                if rel.isNegated():
                    selectAtts.append(
                        "'%s', -%s.id, %s.p" % (rel.getName(), relationSQLIds[rel], relationSQLIds[rel]))
                else:
                    selectAtts.append("'%s', %s.id, %s.p" % (
                        rel.getName(), relationSQLIds[rel], relationSQLIds[rel]))
                for i, constant in enumerate(rel.getConstraints()):
                    if not constant:
                        continue
                    elif constant > 0:
                        equalityConstraints.append(
                            "%s.v%d = %d" % (relationSQLIds[rel], i, constant))
                    else:
                        equalityConstraints.append(
                            "%s.v%d != %d" % (relationSQLIds[rel], i, -1 * constant))

            if len(relations) < maxComponentRels:
                for dummy in range(len(relations), maxComponentRels):
                    selectAtts.append("'', 0, 0")

            selectSQL = ', '.join(selectAtts)

            varPositions = [component.getVarPositions()
                            for component in conjunct.getComponents()]
            for componentVarPositions in varPositions:
                for v in componentVarPositions.keys():
                    relsWithVar = componentVarPositions[v].keys()

                    for i in range(len(relsWithVar) - 1):
                        positionsRelI = componentVarPositions[
                            v][relsWithVar[i]]
                        positionsRelIPlus1 = componentVarPositions[
                            v][relsWithVar[i + 1]]
                        for pos1 in positionsRelI:
                            for pos2 in positionsRelIPlus1:
                                equalityConstraints.append("%s.v%d = %s.v%d" %
                                                           (relationSQLIds[relsWithVar[i]], relsWithVar[i].getTableColumn(pos1),
                                                            relationSQLIds[relsWithVar[i + 1]], relsWithVar[i + 1].getTableColumn(pos2)))

            if len(equalityConstraints):
                whereSQL = "WHERE %s" % ' AND '.join(equalityConstraints)
            else:
                whereSQL = ""

            conjunctSQL.append("SELECT %s FROM %s %s" % (selectSQL, relationsSQL,
                                                         whereSQL))

        lineageQuery = ' UNION '.join(conjunctSQL)
        print algorithm.getPrettySQL(lineageQuery)
        return lineageQuery
Ejemplo n.º 4
0
    def default(self, line):
        try:
            safeSampleEstimates = []
            safeSampleTimes = []
            karpLubyEstimates = []
            karpLubyTimes = []
            naiveEstimates = []
            naiveTimes = []
            exactProb = 0
            queryStr = line
            queryDNF = parse(queryStr)
            print "Query: ", queryDNF
            try:
                plan = getPlan(queryDNF)
                querySQL = plan.generateSQL_DNF()
                print algorithm.getPrettySQL(querySQL), "\n"
                if self.graphQueryPlan:
                    drawTree(plan, self.graphQueryPlanFile)
                    print "\nQuery plan saved to %s" % self.graphQueryPlanFile
                if self.execSQL:
                    exactProb = printProbability(querySQL)
            except algorithm.UnsafeException:
                if self.sample:
                    try:
                        startTime = time.time()
                        print("Query unsafe, ",
                              "trying to find safe residual query")
                        (relationsToSample,
                         residualDNF,
                         querySQL,
                         relsObjects) = algorithm.\
                            findSafeResidualQuery(queryDNF)
                        print("Relations to sample: ",
                              ', '.join(relationsToSample))
                        print "Residual Query: ", residualDNF
                        print algorithm.getPrettySQL(querySQL), "\n"
                        if self.execSQL:
                            ssExecutor = safe.SafeSample(conn)
                            estimate = ssExecutor.safeSample(
                                relationsToSample, relsObjects, querySQL,
                                self.numSamples, self.epsilon, self.delta)
                            print "SafeSample Estimate:", estimate
                            safeSampleTimes = ssExecutor.sampleTimes
                            print "SafeSample Total Time: %f seconds" % (
                                time.time() - startTime)
                            print "SafeSample Mean Sample Time: %f seconds" % (
                                sum(safeSampleTimes) /
                                float(len(safeSampleTimes)))
                            safeSampleEstimates = ssExecutor.getEstimates()
                            print "SafeSample Number of Samples: " + \
                                   "%d (%d + %d to estimate variance)" % (
                                len(safeSampleEstimates) +
                                ssExecutor.step2NumSamples,
                                len(safeSampleEstimates),
                                ssExecutor.step2NumSamples)
                    except algorithm.UnsafeException:
                        print "Error: No safe residual query found"
                else:
                    print "Query is unsafe"

            if self.naive:
                print ""
                startTime = time.time()
                naiveExecutor = naive.NaiveSampler(conn)
                estimate = naiveExecutor.naiveSample(queryDNF, self.numSamples)
                print "Naive Sampler Estimate: %f (%d samples)" % (
                    estimate, self.numSamples)
                naiveEstimates = naiveExecutor.estimatesRecord
                naiveTimes = naiveExecutor.sampleTimes
                print "Naive Sampler Total Time: %f seconds" % (time.time() -
                                                                startTime)
                print "Naive Sampler Mean Sample Time: %f seconds" % (
                    sum(naiveTimes) / float(len(naiveTimes)))

            if self.karpluby:
                print ""
                startTime = time.time()
                klExecutor = karp_luby.KarpLuby(conn)
                estimate = klExecutor.karpLuby(queryDNF, self.numSamples,
                                               self.epsilon, self.delta)
                print "Karp-Luby Estimate:", estimate
                karpLubyTimes = klExecutor.sampleTimes
                print "Karp-Luby Total Time: %f seconds" % (time.time() -
                                                            startTime)
                print "Karp-Luby Mean Sample Time: %f seconds" % (
                    sum(karpLubyTimes) / float(len(karpLubyTimes)))
                karpLubyEstimates = klExecutor.getEstimates()
                print "Karp-Luby Number of Samples: %d (%d + %d to estimate variance)" % (
                    len(karpLubyEstimates) + klExecutor.step2NumSamples,
                    len(karpLubyEstimates), klExecutor.step2NumSamples)
                print "Karp-Luby Stopping Rule Samples " + \
                       "(included in total above): %d" % (
                    klExecutor.step1NumSamples)

        except ParseError:
            print "Failed to parse"
Ejemplo n.º 5
0
    def prepareQuery(self, query):

        allRelationsUsed = set()
        conjunctSQL = []
        for conjunct in query.getConjuncts():
            relations = conjunct.getRelations()

            relationNamesUsed = {}
            relationSQLIds = {}
            for rel in relations:
                relName = rel.getName()
                if relName in relationNamesUsed:
                    relationSQLIds[rel] = relName + \
                        str(relationNamesUsed[relName] + 1)
                    relationNamesUsed[relName] += 1
                else:
                    relationSQLIds[rel] = relName + str(1)
                    relationNamesUsed[relName] = 1
                    if relName not in allRelationsUsed:
                        allRelationsUsed.add(relName)

            relationNames = []
            for rel in relations:
                relName = rel.getName()
                relationNames.append(
                    "%s as %s" % (relName, relationSQLIds[rel]))
            relationsSQL = ', '.join(relationNames)

            relationsUsed = {}
            equalityConstraints = []

            for rel in relations:
                if rel.isNegated():
                    equalityConstraints.append(
                        "%s.InSample = 0" % (relationSQLIds[rel]))
                else:
                    equalityConstraints.append(
                        "%s.InSample = 1" % (relationSQLIds[rel]))
                for i, constant in enumerate(rel.getConstraints()):
                    if not constant:
                        continue
                    elif constant > 0:
                        equalityConstraints.append(
                            "%s.v%d = %d" % (relationSQLIds[rel], i, constant))
                    else:
                        equalityConstraints.append(
                            "%s.v%d != %d" % (relationSQLIds[rel], i, -1 * constant))

            varPositions = [component.getVarPositions()
                            for component in conjunct.getComponents()]
            for componentVarPositions in varPositions:
                for v in componentVarPositions.keys():
                    relsWithVar = componentVarPositions[v].keys()

                    for i in range(len(relsWithVar) - 1):
                        positionsRelI = componentVarPositions[
                            v][relsWithVar[i]]
                        positionsRelIPlus1 = componentVarPositions[
                            v][relsWithVar[i + 1]]
                        for pos1 in positionsRelI:
                            for pos2 in positionsRelIPlus1:
                                equalityConstraints.append("%s.v%d = %s.v%d" %
                                                           (relationSQLIds[relsWithVar[i]], relsWithVar[i].getTableColumn(pos1),
                                                            relationSQLIds[relsWithVar[i + 1]], relsWithVar[i + 1].getTableColumn(pos2)))

            if len(equalityConstraints):
                whereSQL = "WHERE %s" % ' AND '.join(equalityConstraints)
            else:
                whereSQL = ""

            conjunctSQL.append("SELECT EXISTS (SELECT * FROM %s %s)" % (relationsSQL,
                                                                        whereSQL))

        sampledTables = []
        for relName in allRelationsUsed:
            sampledTables.append(
                "%s as (select *, CASE WHEN random() < p THEN 1 ELSE 0 END as InSample FROM %s)" % (relName, relName))

        sampleTableSQL = ', '.join(sampledTables)

        querySQL = 'WITH %s SELECT true IN (%s) Q' % (
            sampleTableSQL, ' UNION '.join(conjunctSQL))
        print algorithm.getPrettySQL(querySQL)
        return querySQL