def test_using_model(): assert parse_bql_string('simulate x from t using model 42' ' limit 10') == \ [ast.Simulate(['x'], 't', ast.ExpLit(ast.LitInt(42)), [], ast.ExpLit(ast.LitInt(10)))] with pytest.raises(parse.BQLParseError): assert parse_bql_string('simulate x from t' ' using model (87)') == \ [ast.Simulate(['x'], 't', ast.ExpLit(ast.LitInt(87)), [], ast.ExpLit(ast.LitInt(10)))] assert parse_bql_string('estimate x from t using model (1+2)') == \ [ast.Estimate(ast.SELQUANT_ALL, [ast.SelColExp(ast.ExpCol(None, 'x'), None)], 't', ast.ExpOp(ast.OP_ADD, ( ast.ExpLit(ast.LitInt(1)), ast.ExpLit(ast.LitInt(2)), )), None, None, None, None)] assert parse_bql_string('estimate * from columns of t' ' using model modelno') == \ [ast.EstCols([ast.SelColAll(None)], 't', ast.ExpCol(None, 'modelno'), None, None, None)] assert parse_bql_string('estimate 42 from columns of t' ' using model modelno') == \ [ast.EstCols([(ast.ExpLit(ast.LitInt(42)), None)], 't', ast.ExpCol(None, 'modelno'), None, None, None)] assert parse_bql_string('estimate 42 from pairwise columns of t' ' using model modelno') == \ [ast.EstPairCols([(ast.ExpLit(ast.LitInt(42)), None)], 't', None, ast.ExpCol(None, 'modelno'), None, None, None)] assert parse_bql_string('estimate similarity from pairwise t' ' using model modelno') == \ [ast.EstPairRow([ast.SelColExp(ast.ExpBQLSim(None, [ast.ColListAll()]), None)], 't', ast.ExpCol(None, 'modelno'), None, None, None)] assert parse_bql_string('infer x from t using model modelno') == \ [ast.InferAuto([ast.InfColOne('x', None)], ast.ExpLit(ast.LitInt(0)), 't', ast.ExpCol(None, 'modelno'), None, None, None, None)] assert parse_bql_string('infer explicit x from t using model modelno') == \ [ast.InferExplicit([ast.SelColExp(ast.ExpCol(None, 'x'), None)], 't', ast.ExpCol(None, 'modelno'), None, None, None, None)]
def test_infer_explicit_samples(): assert parse_bql_string('infer explicit x, predict y with confidence 0.9,' ' predict p with confidence 0.8 as q, predict u confidence v,' ' predict a as b confidence c,' ' predict h confidence k using 42 samples' ' from p') == \ [ast.InferExplicit([ ast.SelColExp(ast.ExpCol(None, 'x'), None), ast.SelColExp( ast.ExpBQLPredict('y', ast.ExpLit(ast.LitFloat(.9)), None), None, ), ast.SelColExp( ast.ExpBQLPredict('p', ast.ExpLit(ast.LitFloat(.8)), None), 'q', ), ast.PredCol('u', None, 'v', None), ast.PredCol('a', 'b', 'c', None), ast.PredCol('h', None, 'k', ast.ExpLit(ast.LitInt(42))), ], 'p', None, None, None, None, None)]
def test_trivial_commands(): assert parse_bql_string(''' create population satellites for satellites_ucs ( MODEL country_of_operator, orbit_type AS categorical; MODEL launch_mass AS numerical; MODEL perigee AS numerical; MODEL apogee, period AS numerical ) ''') == \ [ast.CreatePop(False, 'satellites', 'satellites_ucs', [ ast.PopModelVars( ['country_of_operator', 'orbit_type'], 'categorical'), ast.PopModelVars(['launch_mass'], 'numerical'), ast.PopModelVars(['perigee'], 'numerical'), ast.PopModelVars(['apogee', 'period'], 'numerical'), ])] assert parse_bql_string('drop population satellites') == \ [ast.DropPop(False, 'satellites')] assert parse_bql_string('create generator t_cc for t using crosscat' '(xyz numerical, pqr categorical, lmn cyclic)') == \ [ast.CreateGen('t_cc', False, 't', None, 'crosscat', [ ['xyz', 'numerical'], ['pqr', 'categorical'], ['lmn', 'cyclic'], ])] assert parse_bql_string('create generator t_cc for t with baseline crosscat' '(xyz numerical, pqr categorical, lmn cyclic)') == \ [ast.CreateGen( 't_cc', False, 't', ast.Baseline('crosscat', []), None, # Defaults to cgpm. [ ['xyz', 'numerical'], ['pqr', 'categorical'], ['lmn', 'cyclic'], ])] assert parse_bql_string('create generator t_cc if not exists' ' for t using crosscat' '(xyz numerical, pqr categorical, lmn cyclic)') == \ [ast.CreateGen('t_cc', True, 't', None, 'crosscat', [ ['xyz', 'numerical'], ['pqr', 'categorical'], ['lmn', 'cyclic'], ])] assert parse_bql_string('create generator if not exists t_cc' ' for t using crosscat' '(xyz numerical, pqr categorical, lmn cyclic)') == \ [ast.CreateGen('t_cc', True, 't', None, 'crosscat', [ ['xyz', 'numerical'], ['pqr', 'categorical'], ['lmn', 'cyclic'], ])] # XXX Schema of [[]] instead of [] is kinda wacky. Fix? (But # make sure the empty-parens and no-parens cases are equivalent.) assert parse_bql_string('create generator t_cc' ' for t using crosscat()') == \ [ast.CreateGen('t_cc', False, 't', None, 'crosscat', [[]])] assert parse_bql_string('create generator t_cc' ' for t using crosscat') == \ [ast.CreateGen('t_cc', False, 't', None, 'crosscat', [[]])] assert parse_bql_string('initialize 1 model for t;') == \ [ast.InitModels(False, 't', 1)] assert parse_bql_string('initialize 1 model if not exists for t;') == \ [ast.InitModels(True, 't', 1)] assert parse_bql_string('initialize 2 models for t;') == \ [ast.InitModels(False, 't', 2)] assert parse_bql_string('initialize 2 models if not exists for t;') == \ [ast.InitModels(True, 't', 2)] assert parse_bql_string('create temporary table tx as' ' infer explicit x, predict x as xi confidence xc from t_cc') == \ [ast.CreateTabAs(True, False, 'tx', ast.InferExplicit( [ ast.SelColExp(ast.ExpCol(None, 'x'), None), ast.PredCol('x', 'xi', 'xc', None), ], 't_cc', None, None, None, None, None, ))]
def p_infer_explicit(self, cols, population, generator, cond, grouping, ord, lim): return ast.InferExplicit(cols, population, generator, cond, grouping, ord, lim)
def test_trivial_commands(): assert parse_bql_string('create generator t_cc for t using crosscat' '(xyz numerical, pqr categorical, lmn cyclic)') == \ [ast.CreateGen(False, 't_cc', False, 't', 'crosscat', [ ['xyz', 'numerical'], ['pqr', 'categorical'], ['lmn', 'cyclic'], ])] assert parse_bql_string('create default generator t_cc for t using crosscat' '(xyz numerical, pqr categorical, lmn cyclic)') == \ [ast.CreateGen(True, 't_cc', False, 't', 'crosscat', [ ['xyz', 'numerical'], ['pqr', 'categorical'], ['lmn', 'cyclic'], ])] assert parse_bql_string('create generator t_cc if not exists' ' for t using crosscat' '(xyz numerical, pqr categorical, lmn cyclic)') == \ [ast.CreateGen(False, 't_cc', True, 't', 'crosscat', [ ['xyz', 'numerical'], ['pqr', 'categorical'], ['lmn', 'cyclic'], ])] assert parse_bql_string('initialize 1 model for t;') == \ [ast.InitModels(False, 't', 1, None)] assert parse_bql_string('initialize 1 model if not exists for t;') == \ [ast.InitModels(True, 't', 1, None)] assert parse_bql_string('initialize 2 models for t;') == \ [ast.InitModels(False, 't', 2, None)] assert parse_bql_string('initialize 2 models if not exists for t;') == \ [ast.InitModels(True, 't', 2, None)] assert parse_bql_string('analyze t for 1 iteration;') == \ [ast.AnalyzeModels('t', None, 1, None, None, None, False)] assert parse_bql_string('analyze t for 1 iteration wait;') == \ [ast.AnalyzeModels('t', None, 1, None, None, None, True)] assert parse_bql_string('analyze t for 1 minute;') == \ [ast.AnalyzeModels('t', None, None, 60, None, None, False)] assert parse_bql_string('analyze t for 1 minute wait;') == \ [ast.AnalyzeModels('t', None, None, 60, None, None, True)] assert parse_bql_string('analyze t for 2 minutes;') == \ [ast.AnalyzeModels('t', None, None, 120, None, None, False)] assert parse_bql_string('analyze t for 2 minutes wait;') == \ [ast.AnalyzeModels('t', None, None, 120, None, None, True)] assert parse_bql_string('analyze t for 1 second;') == \ [ast.AnalyzeModels('t', None, None, 1, None, None, False)] assert parse_bql_string('analyze t for 1 second wait;') == \ [ast.AnalyzeModels('t', None, None, 1, None, None, True)] assert parse_bql_string('analyze t for 2 seconds;') == \ [ast.AnalyzeModels('t', None, None, 2, None, None, False)] assert parse_bql_string('analyze t for 2 seconds wait;') == \ [ast.AnalyzeModels('t', None, None, 2, None, None, True)] assert parse_bql_string('analyze t model 1 for 1 iteration;') == \ [ast.AnalyzeModels('t', [1], 1, None, None, None, False)] assert parse_bql_string('analyze t models 1,2,3 for 1 iteration;') == \ [ast.AnalyzeModels('t', [1,2,3], 1, None, None, None, False)] assert parse_bql_string('analyze t models 1-3,5 for 1 iteration;') == \ [ast.AnalyzeModels('t', [1,2,3,5], 1, None, None, None, False)] assert parse_bql_string('analyze t for 10 iterations' ' checkpoint 3 iterations') == \ [ast.AnalyzeModels('t', None, 10, None, 3, None, False)] assert parse_bql_string('analyze t for 10 seconds' ' checkpoint 3 seconds') == \ [ast.AnalyzeModels('t', None, None, 10, None, 3, False)] assert parse_bql_string('create temporary table tx as' ' infer explicit x, predict x as xi confidence xc from t_cc') == \ [ast.CreateTabAs(True, False, 'tx', ast.InferExplicit( [ ast.SelColExp(ast.ExpCol(None, 'x'), None), ast.PredCol('x', 'xi', 'xc'), ], 't_cc', ast.ExpLit(ast.LitNull(None)), None, None, None, None, ))]
def p_infer_explicit(self, cols, generator, modelno, cond, grouping, ord, lim): return ast.InferExplicit(cols, generator, modelno, cond, grouping, ord, lim)