def test_smokes_friends_cancer_gamma():
    conn = psycopg2.connect(dbname="sampling_test")
    conn.autocommit = True
    cur = conn.cursor()  
    cur.execute(open("test/test_data/smokesfriendscancer-symmetric-20.sql", "r").read())
    
    domain = 20
    P1 = 0.667129
    P2 = 0.776870
    F = 0.009952
    C = 0.091123

    dnf = safesample.query_parser.parse("P1(x,y),Rel*!(x),Friends(x,y),~Rel*!(y) v P2(x),Rel*!(x),~Cancer(x)")
    plan = algorithm.getSafeQueryPlan(dnf)
    sql = plan.generateSQL_DNF()

    for r in range(5):
        cur.execute("drop table if exists Rel;")
        cur.execute("create table Rel as (select id, v0, CASE WHEN random() < p THEN 1 ELSE 0 END as p from smokes);")
        cur.execute("select count(*) from Rel where p = 1")
        cardinality = cur.fetchone()[0]

        cur.execute(sql)
        prob = cur.fetchone()[0]
        #print prob
        #print (1 - (1-(1 - (1-P2*(1-C))**cardinality))*(1-(1-(1-P1*F)**(cardinality*(domain-cardinality)))))

        assert abs(prob - (1 - (1-(1 - (1-P2*(1-C))**cardinality))*(1-(1-(1-P1*F)**(cardinality*(domain-cardinality)))))) < 0.000001
def test_smokes_friends_cancer_query1_generic():
    conn = psycopg2.connect(dbname="sampling_test")
    conn.autocommit = True
    cur = conn.cursor()  
    cur.execute(open("test/test_data/smokesfriendscancer-symmetric-20.sql", "r").read())
    
    domain = 20
    P1 = 0.667129
    P2 = 0.776870
    F = 0.009952
    C = 0.091123

    for r in range(1):
        cur.execute("drop table if exists Rel;")
        cur.execute("create table Rel as (select id, v0, CASE WHEN random() < p THEN 1 ELSE 0 END as p from smokes);")
        cur.execute("select count(*) from Rel where p = 1")
        cardinality = cur.fetchone()[0]

        probGamma = (1 - (1-(1 - (1-P2*(1-C))**cardinality))*(1-(1-(1-P1*F)**(cardinality*(domain-cardinality)))))

        probConstants = [0]*(domain+1)
        dnf = safesample.query_parser.parse("Rel*![c]() v P1(x,y),Rel*!(x),Friends(x,y),~Rel*(y) v P2(x),Rel*!(x),~Cancer(x)" )
        plan = algorithm.getSafeQueryPlan(dnf)
        sql = plan.generateSQL_DNF()
        cur.execute(sql)
        for (const, prob) in cur:
            probConstants[const] = prob

        for c in range(1, domain+1):
            cur.execute("select count(*) from Rel where p = 1 and v0 = %d" % (c))
            inRelSample = (cur.fetchone()[0] > 0)
            if inRelSample:
                assert probConstants[c] == 1
            else:
                assert abs(probConstants[c] - probGamma) < 0.000001
def exactProb(queryDNF):
    conn = psycopg2.connect(dbname="sampling_test")
    conn.autocommit = True
    cur = conn.cursor()
    plan = algorithm.getSafeQueryPlan(queryDNF)
    querySQL = plan.generateSQL_DNF()
    cur.execute(querySQL)
    return cur.fetchone()[0]
def test_q3_generic():
    conn = psycopg2.connect(dbname="sampling_test")
    conn.autocommit = True
    cur = conn.cursor()  
    cur.execute(open("test/test_data/non_boolean_sampling.sql", "r").read())
    dnf = safesample.query_parser.parse("Smokes*![c](),Friends[-c](x)")
    plan = algorithm.getSafeQueryPlan(dnf)
    sql = plan.generateSQL_DNF()
    cur.execute(sql)
    constants = set()
    for (c, p) in cur:
        constants.add(c)
        assert p == 0.59349810824172 
    assert constants == set((3,4,6))
def test_q2_generic():
    conn = psycopg2.connect(dbname="sampling_test")
    conn.autocommit = True
    cur = conn.cursor()  
    cur.execute(open("test/test_data/non_boolean_sampling.sql", "r").read())
    dnf = safesample.query_parser.parse("Smokes*![-c](x),Friends[-c,c](x)")
    plan = algorithm.getSafeQueryPlan(dnf)
    sql = plan.generateSQL_DNF()
    cur.execute(sql)
    constants = set((3,4,6))
    count = 0
    for (c, p) in cur:
        count += 1
        if c in constants:
            assert p == 0.0198049576959999
        else:
            assert p == 0.0295598587570093 
    assert count == 10
def test_smokes_friends_cancer_query2():
    conn = psycopg2.connect(dbname="sampling_test")
    conn.autocommit = True
    cur = conn.cursor()  
    cur.execute(open("test/test_data/smokesfriendscancer-symmetric-20.sql", "r").read())
    
    domain = 20
    P1 = 0.667129
    P2 = 0.776870
    F = 0.009952
    C = 0.091123

    for r in range(1):
        cur.execute("drop table if exists Rel;")
        cur.execute("create table Rel as (select id, v0, CASE WHEN random() < p THEN 1 ELSE 0 END as p from smokes);")
        cur.execute("select count(*) from Rel where p = 1")
        cardinality = cur.fetchone()[0]

        for c in range(1, domain+1):
            dnf = safesample.query_parser.parse("Cancer[%d]() v P1(x,y),Rel*(x),Friends(x,y),~Rel*(y) v P2[-%d](x),Rel*[-%d](x),~Cancer[-%d](x) v P2[%d](),Rel*[%d]()" % (c,c,c,c,c,c))
            plan = algorithm.getSafeQueryPlan(dnf)
            sql = plan.generateSQL_DNF()
            cur.execute(sql)
            probQ1 = cur.fetchone()[0]

            cur.execute("select count(*) from Rel where p = 1 and v0 = %d" % (c))
            cardinalityJustC = cur.fetchone()[0] # because we shattered, need to see if c is in Rel* or not

            q1 = C
            q2 = 1-(1-P1*F)**(cardinality*(domain-cardinality))
            q3 = 1-(1-P2*(1-C))**(cardinality - cardinalityJustC)
            q4 = P2*cardinalityJustC
            probQuery = 1 - (1-q1)*(1-q2)*(1-q3)*(1-q4)
               
            print "Cancer[%d]() v P1(x,y),Rel*(x),Friends(x,y),~Rel*(y) v P2[-%d](x),Rel*[-%d](x),~Cancer[-%d](x) v P2[%d](),Rel*[%d]()" % (c,c,c,c,c,c)
            print probQ1
            print probQuery
            assert abs(probQ1 - probQuery) < 0.000001
Beispiel #7
0
def exactProb(queryDNF):
    plan = algorithm.getSafeQueryPlan(queryDNF)
    querySQL = plan.generateSQL_DNF()
    exactProb = safesample.query_parser.executeSQL(querySQL)
    return exactProb
Beispiel #8
0
def exactProb(queryDNF):
    plan = algorithm.getSafeQueryPlan(queryDNF)
    querySQL = plan.generateSQL_DNF()
    exactProb = safesample.query_parser.executeSQL(querySQL)
    return exactProb