def test_simulate_models(): assert parse_bql_string( 'simulate dependence probability of a with b from models of t;') == [ ast.SimulateModels([ ast.SimCol(ast.ExpBQLDepProb('a', 'b'), None), ], 't', None) ] assert parse_bql_string( 'simulate dependence probability of a with b AS q, ' 'mutual information of c with d given (e, r=2.7) ' 'using 100 samples as g ' 'from models of p modeled by z') == [ ast.SimulateModels([ ast.SimCol(ast.ExpBQLDepProb('a', 'b'), 'q'), ast.SimCol( ast.ExpBQLMutInf('c', 'd', [('e', ast.ExpLit(ast.LitNull(0))), ('r', ast.ExpLit(ast.LitFloat(2.7)))], ast.ExpLit(ast.LitInt(100))), 'g'), ], 'p', 'z') ] assert parse_bql_string( 'simulate probability of (a=2, c=1.1) given (b=0.5) ' 'from models of p') == [ ast.SimulateModels([ ast.SimCol( ast.ExpBQLProb([('a', ast.ExpLit(ast.LitInt(2))), ('c', ast.ExpLit(ast.LitFloat(1.1)))], [ ('b', ast.ExpLit(ast.LitFloat(0.5))) ]), None), ], 'p', None) ] for temp, ifnotexists in itertools.product(('temp', ''), ('if not exists', '')): assert parse_bql_string( 'create %s table %s f as ' 'simulate dependence probability of a with b AS q, ' 'mutual information of c with d ' 'given (e, r=2.7) using 100 samples as g ' 'from models of p modeled by z' % (temp, ifnotexists)) == [ ast.CreateTabSimModels( bool(temp), bool(ifnotexists), 'f', ast.SimulateModels([ ast.SimCol(ast.ExpBQLDepProb('a', 'b'), 'q'), ast.SimCol( ast.ExpBQLMutInf('c', 'd', [ ('e', ast.ExpLit(ast.LitNull(0))), ('r', ast.ExpLit(ast.LitFloat(2.7))) ], ast.ExpLit(ast.LitInt(100))), 'g'), ], 'p', 'z')) ]
def test_is_bql(): assert ast.is_bql(ast.ExpLit(ast.LitInt(0))) == False assert ast.is_bql(ast.ExpNumpar(0)) == False assert ast.is_bql(ast.ExpNampar(0, 'x')) == False assert ast.is_bql(ast.ExpCol('t', 'c')) == False # ... assert ast.is_bql(ast.ExpBQLPredProb('c')) assert ast.is_bql(ast.ExpBQLProb([('c', ast.ExpLit(ast.LitInt(0)))], [])) assert ast.is_bql(ast.ExpBQLProbFn(ast.ExpLit(ast.LitInt(0)), [])) assert ast.is_bql(ast.ExpBQLSim(ast.ExpLit(ast.LitInt(0)), [])) assert ast.is_bql(ast.ExpBQLDepProb('c0', 'c1')) assert ast.is_bql(ast.ExpBQLMutInf('c0', 'c1', None, 100)) assert ast.is_bql(ast.ExpBQLCorrel('c0', 'c1')) assert ast.is_bql(ast.ExpBQLPredict('c', ast.ExpLit(ast.LitInt(.5)), None)) assert ast.is_bql(ast.ExpBQLPredictConf('c', None))
def p_bqlfn_depprob(self, cols): return ast.ExpBQLDepProb(*cols)
def test_select_bql(): assert parse_bql_string('select predictive probability of c from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLPredProb('c'), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select predictive probability of c, * from t;') \ == \ [ast.Select(ast.SELQUANT_ALL, [ ast.SelColExp(ast.ExpBQLPredProb('c'), None), ast.SelColAll(None), ], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select c, predictive probability of d from t;') \ == \ [ast.Select(ast.SELQUANT_ALL, [ ast.SelColExp(ast.ExpCol(None, 'c'), None), ast.SelColExp(ast.ExpBQLPredProb('d'), None), ], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select predictive probability of c, d from t;') \ == \ [ast.Select(ast.SELQUANT_ALL, [ ast.SelColExp(ast.ExpBQLPredProb('c'), None), ast.SelColExp(ast.ExpCol(None, 'd'), None), ], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select probability of c = 42 from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLProb([('c', ast.ExpLit(ast.LitInt(42)))], []), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select similarity from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLSim(None, [ast.ColListAll()]), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select similarity to (rowid=8) from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLSim( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'rowid'), ast.ExpLit(ast.LitInt(8)) )), [ast.ColListAll()]), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select similarity with respect to c from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLSim(None, [ast.ColListLit(['c'])]), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string( 'select similarity to (rowid=8) with respect to c from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLSim( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'rowid'), ast.ExpLit(ast.LitInt(8)), )), [ast.ColListLit(['c'])]), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string( 'select similarity to (rowid=5) with respect to age from t1;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLSim( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'rowid'), ast.ExpLit(ast.LitInt(5)), )), [ast.ColListLit(['age'])]), None)], [ast.SelTab('t1', None)], None, None, None, None)] assert parse_bql_string( 'select similarity to (rowid=8) with respect to c, d from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ ast.SelColExp( ast.ExpBQLSim( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'rowid'), ast.ExpLit(ast.LitInt(8)), )), [ast.ColListLit(['c'])]), None), ast.SelColExp(ast.ExpCol(None, 'd'), None), ], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select similarity to (rowid=8)' ' with respect to (c, d) from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLSim( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'rowid'), ast.ExpLit(ast.LitInt(8)), )), [ast.ColListLit(['c']), ast.ColListLit(['d'])]), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select similarity to (rowid=8) with respect to' + ' (estimate * from columns of t order by ' + ' probability of value 4 limit 1)' + ' from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLSim( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'rowid'), ast.ExpLit(ast.LitInt(8)), )), [ast.ColListSub( ast.EstCols([ast.SelColAll(None)], 't', None, None, [ast.Ord(ast.ExpBQLProbFn( ast.ExpLit(ast.LitInt(4)), []), ast.ORD_ASC)], ast.Lim(ast.ExpLit(ast.LitInt(1)), None)) )]), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select dependence probability with c from t;') ==\ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLDepProb('c', None), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string( 'select dependence probability of c with d from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLDepProb('c', 'd'), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select mutual information with c from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLMutInf('c', None, None, None), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string( 'select mutual information of c with d from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLMutInf('c', 'd', None, None), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select mutual information of c with d' + ' using (1+2) samples from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLMutInf( 'c', 'd', None, ast.op( ast.OP_ADD, ast.ExpLit(ast.LitInt(1)), ast.ExpLit(ast.LitInt(2)))), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string(''' select mutual information with c given (d, a=1) using 10 samples from t; ''') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLMutInf( 'c', None, [('d', ast.ExpLit(ast.LitNull(0))), ('a',ast.ExpLit(ast.LitInt(1)))], ast.ExpLit(ast.LitInt(10)) ), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string(''' select mutual information of b with c given (d, a=1, e, r=2) from t;''') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLMutInf( 'b', 'c', [ ('d', ast.ExpLit(ast.LitNull(0))), ('a',ast.ExpLit(ast.LitInt(1))), ('e', ast.ExpLit(ast.LitNull(0))), ('r', ast.ExpLit(ast.LitInt(2))), ], None ), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select correlation with c from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLCorrel('c', None), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select correlation of c with d from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLCorrel('c', 'd'), None)], [ast.SelTab('t', None)], None, None, None, None)] # XXX This got broken a while ago: parenthesization in PROBABILITY # OF X = E is too permissive. I didn't notice because before I # introduced BQLParseError, this simply caught Exception -- which # covered the AssertionError that this turned into. # # with pytest.raises(parse.BQLParseError): # parse_bql_string('select probability of x = 1 -' + # ' probability of y = 0 from t;') # # XXX Should really be this test, but getting the grammar to # # admit this unambiguously is too much of a pain at the # # moment. # assert parse_bql_string('select probability of x = 1 -' + # ' probability of y = 0 from t;') == \ # [ast.Select(ast.SELQUANT_ALL, # [ast.SelColExp(ast.ExpBQLProb([('x', # ast.ExpOp(ast.OP_SUB, ( # ast.ExpLit(ast.LitInt(1)), # ast.ExpBQLProb([('y', # ast.ExpLit(ast.LitInt(0)))], # []), # )))], # []), # None)], # [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select probability of c1 = f(c2) from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLProb([('c1', ast.ExpApp(False, 'f', [ast.ExpCol(None, 'c2')]))], []), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select key, t.(estimate * from columns of t' ' order by dependence probability with c desc limit 4)' ' from t order by key asc') == \ [ast.Select(ast.SELQUANT_ALL, [ ast.SelColExp(ast.ExpCol(None, 'key'), None), ast.SelColSub('t', ast.EstCols([ast.SelColAll(None)], 't', None, None, [ast.Ord(ast.ExpBQLDepProb('c', None), ast.ORD_DESC)], ast.Lim(ast.ExpLit(ast.LitInt(4)), None))) ], [ast.SelTab('t', None)], None, None, [ast.Ord(ast.ExpCol(None, 'key'), ast.ORD_ASC)], None)]
def p_bqlfn_depprob(self, cols): return ast.ExpBQLDepProb(*cols) def p_bqlfn_mutinf(self, cols, constraints, nsamp):