def test_simulate_models_nontrivial(): # XXX test descent into ExpLit # XXX test descent into ExpNumpar # XXX test descent into ExpNampar # XXX test descent into ExpCol # XXX test descent into ExpSub # XXX test descent into ExpCollate # XXX test descent into ExpIn # XXX test descent into ExpCast # XXX test descent into ExpExists # XXX test descent into ExpApp # XXX test descent into ExpAppStar # XXX test descent into ExpCase mutinf0 = ast.ExpBQLMutInf(['c0'], ['c1', 'c2'], [('c3', ast.ExpLit(ast.LitInt(3)))], None) mutinf1 = ast.ExpBQLMutInf(['c4', 'c5'], ['c6'], [('c7', ast.ExpLit(ast.LitString('ergodic')))], 100) probdensity = ast.ExpBQLProbDensity( [('x', ast.ExpLit(ast.LitFloat(1.2)))], # No conditions for now -- that changes the weighting of the average. []) expression0 = ast.ExpOp(ast.OP_LT, [ mutinf0, ast.ExpOp(ast.OP_MUL, [ast.ExpLit(ast.LitFloat(0.1)), mutinf1]), ]) expression1 = probdensity simmodels = ast.SimulateModelsExp([ ast.SelColExp(expression0, 'quagga'), ast.SelColExp(expression1, 'eland'), ], 'p', 'g') assert macro.expand_simulate_models(simmodels) == \ ast.Select(ast.SELQUANT_ALL, [ ast.SelColExp( ast.ExpOp(ast.OP_LT, [ ast.ExpCol(None, 'v0'), ast.ExpOp(ast.OP_MUL, [ ast.ExpLit(ast.LitFloat(0.1)), ast.ExpCol(None, 'v1'), ]) ]), 'quagga'), ast.SelColExp(ast.ExpCol(None, 'v2'), 'eland'), ], [ast.SelTab( ast.SimulateModels( [ ast.SelColExp(mutinf0, 'v0'), ast.SelColExp(mutinf1, 'v1'), ast.SelColExp(probdensity, 'v2'), ], 'p', 'g'), None)], None, None, None, None)
def test_using_model(): assert parse_bql_string('simulate x from t using model 42' ' limit 10') == \ [ast.Simulate(['x'], 't', ast.ExpLit(ast.LitInt(42)), [], ast.ExpLit(ast.LitInt(10)))] with pytest.raises(parse.BQLParseError): assert parse_bql_string('simulate x from t' ' using model (87)') == \ [ast.Simulate(['x'], 't', ast.ExpLit(ast.LitInt(87)), [], ast.ExpLit(ast.LitInt(10)))] assert parse_bql_string('estimate x from t using model (1+2)') == \ [ast.Estimate(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpCol(None, 'x'), None)], 't', ast.ExpOp(ast.OP_ADD, ( ast.ExpLit(ast.LitInt(1)), ast.ExpLit(ast.LitInt(2)), )), None, None, None, None)] assert parse_bql_string('estimate * from columns of t' ' using model modelno') == \ [ast.EstCols([ast.SelColAll(None)], 't', ast.ExpCol(None, 'modelno'), None, None, None)] assert parse_bql_string('estimate 42 from columns of t' ' using model modelno') == \ [ast.EstCols([(ast.ExpLit(ast.LitInt(42)), None)], 't', ast.ExpCol(None, 'modelno'), None, None, None)] assert parse_bql_string('estimate 42 from pairwise columns of t' ' using model modelno') == \ [ast.EstPairCols([(ast.ExpLit(ast.LitInt(42)), None)], 't', None, ast.ExpCol(None, 'modelno'), None, None, None)] assert parse_bql_string('estimate similarity from pairwise t' ' using model modelno') == \ [ast.EstPairRow([ast.SelColExp(ast.ExpBQLSim(None, [ast.ColListAll()]), None)], 't', ast.ExpCol(None, 'modelno'), None, None, None)] assert parse_bql_string('infer x from t using model modelno') == \ [ast.InferAuto([ast.InfColOne('x', None)], ast.ExpLit(ast.LitInt(0)), 't', ast.ExpCol(None, 'modelno'), None, None, None, None)] assert parse_bql_string('infer explicit x from t using model modelno') == \ [ast.InferExplicit([ast.SelColExp(ast.ExpCol(None, 'x'), None)], 't', ast.ExpCol(None, 'modelno'), None, None, None, None)]
def test_expand_probability_estimate(): expression = ast.ExpOp(ast.OP_LT, [ ast.ExpBQLMutInf(['c0'], ['c1', 'c2'], [('c3', ast.ExpLit(ast.LitInt(3)))], None), ast.ExpLit(ast.LitFloat(0.1)), ]) probest = ast.ExpBQLProbEst(expression) assert macro.expand_probability_estimate(probest, 'p', 'g') == \ ast.ExpSub( ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpApp(False, 'AVG', [ast.ExpCol(None, 'x')]), None)], [ast.SelTab( ast.SimulateModelsExp([ast.SelColExp(expression, 'x')], 'p', 'g'), None)], None, None, None, None))
def _expand_simmodel_exp(exp, simcols): if isinstance(exp, ast.ExpCol) or ast.is_bql(exp): tmpname = 'v%d' % (len(simcols), ) simcols.append(ast.SelColExp(exp, tmpname)) return ast.ExpCol(None, tmpname) elif isinstance(exp, ast.ExpLit) or \ isinstance(exp, ast.ExpNumpar) or \ isinstance(exp, ast.ExpNampar): return exp elif isinstance(exp, ast.ExpSub) or \ isinstance(exp, ast.ExpExists): # XXX Not really right -- need to provide correct scoping. return exp # XXX subquery scoping elif isinstance(exp, ast.ExpCollate): subexp = _expand_simmodel_exp(exp.expression, simcols) return ast.ExpCollate(subexp, exp.collation) elif isinstance(exp, ast.ExpInQuery): subexp = _expand_simmodel_exp(exp.expression, simcols) subquery = exp.subquery # XXX subquery scoping return ast.ExpIn(subexp, exp.positive, subquery) elif isinstance(exp, ast.ExpInExp): subexp = _expand_simmodel_exp(exp.expression, simcols) subexps = [_expand_simmodel_exp(se, simcols) for se in exp.expressions] return ast.ExpIn(subexp, exp.positive, subexps) elif isinstance(exp, ast.ExpCast): subexp = _expand_simmodel_exp(exp.expression, simcols) return ast.ExpCast(subexp, exp.type) elif isinstance(exp, ast.ExpApp): operands = [ _expand_simmodel_exp(operand, simcols) for operand in exp.operands ] return ast.ExpApp(exp.distinct, exp.operator, operands) elif isinstance(exp, ast.ExpAppStar): return exp elif isinstance(exp, ast.ExpCase): raise NotImplementedError("I'm too lazy to do CASE right now.") elif isinstance(exp, ast.ExpOp): operands = [ _expand_simmodel_exp(operand, simcols) for operand in exp.operands ] return ast.ExpOp(exp.operator, operands) else: assert False, 'Invalid expression: %s' % (repr(exp), )
def test_parametrized(): assert parse_bql_string('select * from t where id = ?;') == \ [ast.Parametrized(ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], [ast.SelTab('t', None)], ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'id'), ast.ExpNumpar(1), )), None, None, None), 1, {})] assert parse_bql_string('select * from t where id = ?123;') == \ [ast.Parametrized(ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], [ast.SelTab('t', None)], ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'id'), ast.ExpNumpar(123), )), None, None, None), 123, {})] assert parse_bql_string('select * from t where id = :foo;') == \ [ast.Parametrized(ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], [ast.SelTab('t', None)], ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'id'), ast.ExpNampar(1, ':foo'), )), None, None, None), 1, {':foo': 1})] assert parse_bql_string('select * from t where a = :foo and b = @foo;') \ == \ [ast.Parametrized(ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], [ast.SelTab('t', None)], ast.ExpOp(ast.OP_BOOLAND, ( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'a'), ast.ExpNampar(1, ':foo'), )), ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'b'), ast.ExpNampar(2, '@foo'), )), )), None, None, None), 2, {':foo': 1, '@foo': 2})] assert parse_bql_string('select * from t where a = $foo and b = ?1;') == \ [ast.Parametrized(ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], [ast.SelTab('t', None)], ast.ExpOp(ast.OP_BOOLAND, ( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'a'), ast.ExpNampar(1, '$foo'), )), ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'b'), ast.ExpNumpar(1), )), )), None, None, None), 1, {'$foo': 1})] assert parse_bql_string('select * from t' + ' where a = ?123 and b = :foo and c = ?124;') == \ [ast.Parametrized(ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)], [ast.SelTab('t', None)], ast.ExpOp(ast.OP_BOOLAND, ( ast.ExpOp(ast.OP_BOOLAND, ( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'a'), ast.ExpNumpar(123), )), ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'b'), ast.ExpNampar(124, ':foo'), )), )), ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'c'), ast.ExpNumpar(124), )), )), None, None, None), 124, {':foo': 124})]
def test_select_bql(): assert parse_bql_string('select predictive probability of c from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLPredProb('c'), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select predictive probability of c, * from t;') \ == \ [ast.Select(ast.SELQUANT_ALL, [ ast.SelColExp(ast.ExpBQLPredProb('c'), None), ast.SelColAll(None), ], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select c, predictive probability of d from t;') \ == \ [ast.Select(ast.SELQUANT_ALL, [ ast.SelColExp(ast.ExpCol(None, 'c'), None), ast.SelColExp(ast.ExpBQLPredProb('d'), None), ], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select predictive probability of c, d from t;') \ == \ [ast.Select(ast.SELQUANT_ALL, [ ast.SelColExp(ast.ExpBQLPredProb('c'), None), ast.SelColExp(ast.ExpCol(None, 'd'), None), ], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select probability of c = 42 from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLProb([('c', ast.ExpLit(ast.LitInt(42)))], []), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select similarity from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLSim(None, [ast.ColListAll()]), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select similarity to (rowid=8) from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLSim( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'rowid'), ast.ExpLit(ast.LitInt(8)) )), [ast.ColListAll()]), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select similarity with respect to c from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLSim(None, [ast.ColListLit(['c'])]), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string( 'select similarity to (rowid=8) with respect to c from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLSim( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'rowid'), ast.ExpLit(ast.LitInt(8)), )), [ast.ColListLit(['c'])]), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string( 'select similarity to (rowid=5) with respect to age from t1;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLSim( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'rowid'), ast.ExpLit(ast.LitInt(5)), )), [ast.ColListLit(['age'])]), None)], [ast.SelTab('t1', None)], None, None, None, None)] assert parse_bql_string( 'select similarity to (rowid=8) with respect to c, d from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ ast.SelColExp( ast.ExpBQLSim( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'rowid'), ast.ExpLit(ast.LitInt(8)), )), [ast.ColListLit(['c'])]), None), ast.SelColExp(ast.ExpCol(None, 'd'), None), ], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select similarity to (rowid=8)' ' with respect to (c, d) from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLSim( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'rowid'), ast.ExpLit(ast.LitInt(8)), )), [ast.ColListLit(['c']), ast.ColListLit(['d'])]), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select similarity to (rowid=8) with respect to' + ' (estimate * from columns of t order by ' + ' probability of value 4 limit 1)' + ' from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLSim( ast.ExpOp(ast.OP_EQ, ( ast.ExpCol(None, 'rowid'), ast.ExpLit(ast.LitInt(8)), )), [ast.ColListSub( ast.EstCols([ast.SelColAll(None)], 't', None, None, [ast.Ord(ast.ExpBQLProbFn( ast.ExpLit(ast.LitInt(4)), []), ast.ORD_ASC)], ast.Lim(ast.ExpLit(ast.LitInt(1)), None)) )]), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select dependence probability with c from t;') ==\ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLDepProb('c', None), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string( 'select dependence probability of c with d from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLDepProb('c', 'd'), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select mutual information with c from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLMutInf('c', None, None, None), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string( 'select mutual information of c with d from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLMutInf('c', 'd', None, None), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select mutual information of c with d' + ' using (1+2) samples from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLMutInf( 'c', 'd', None, ast.op( ast.OP_ADD, ast.ExpLit(ast.LitInt(1)), ast.ExpLit(ast.LitInt(2)))), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string(''' select mutual information with c given (d, a=1) using 10 samples from t; ''') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLMutInf( 'c', None, [('d', ast.ExpLit(ast.LitNull(0))), ('a',ast.ExpLit(ast.LitInt(1)))], ast.ExpLit(ast.LitInt(10)) ), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string(''' select mutual information of b with c given (d, a=1, e, r=2) from t;''') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp( ast.ExpBQLMutInf( 'b', 'c', [ ('d', ast.ExpLit(ast.LitNull(0))), ('a',ast.ExpLit(ast.LitInt(1))), ('e', ast.ExpLit(ast.LitNull(0))), ('r', ast.ExpLit(ast.LitInt(2))), ], None ), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select correlation with c from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLCorrel('c', None), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select correlation of c with d from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLCorrel('c', 'd'), None)], [ast.SelTab('t', None)], None, None, None, None)] # XXX This got broken a while ago: parenthesization in PROBABILITY # OF X = E is too permissive. I didn't notice because before I # introduced BQLParseError, this simply caught Exception -- which # covered the AssertionError that this turned into. # # with pytest.raises(parse.BQLParseError): # parse_bql_string('select probability of x = 1 -' + # ' probability of y = 0 from t;') # # XXX Should really be this test, but getting the grammar to # # admit this unambiguously is too much of a pain at the # # moment. # assert parse_bql_string('select probability of x = 1 -' + # ' probability of y = 0 from t;') == \ # [ast.Select(ast.SELQUANT_ALL, # [ast.SelColExp(ast.ExpBQLProb([('x', # ast.ExpOp(ast.OP_SUB, ( # ast.ExpLit(ast.LitInt(1)), # ast.ExpBQLProb([('y', # ast.ExpLit(ast.LitInt(0)))], # []), # )))], # []), # None)], # [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select probability of c1 = f(c2) from t;') == \ [ast.Select(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpBQLProb([('c1', ast.ExpApp(False, 'f', [ast.ExpCol(None, 'c2')]))], []), None)], [ast.SelTab('t', None)], None, None, None, None)] assert parse_bql_string('select key, t.(estimate * from columns of t' ' order by dependence probability with c desc limit 4)' ' from t order by key asc') == \ [ast.Select(ast.SELQUANT_ALL, [ ast.SelColExp(ast.ExpCol(None, 'key'), None), ast.SelColSub('t', ast.EstCols([ast.SelColAll(None)], 't', None, None, [ast.Ord(ast.ExpBQLDepProb('c', None), ast.ORD_DESC)], ast.Lim(ast.ExpLit(ast.LitInt(4)), None))) ], [ast.SelTab('t', None)], None, None, [ast.Ord(ast.ExpCol(None, 'key'), ast.ORD_ASC)], None)]