Esempio n. 1
0
def test_simulate_models_nontrivial():
    # XXX test descent into ExpLit
    # XXX test descent into ExpNumpar
    # XXX test descent into ExpNampar
    # XXX test descent into ExpCol
    # XXX test descent into ExpSub
    # XXX test descent into ExpCollate
    # XXX test descent into ExpIn
    # XXX test descent into ExpCast
    # XXX test descent into ExpExists
    # XXX test descent into ExpApp
    # XXX test descent into ExpAppStar
    # XXX test descent into ExpCase
    mutinf0 = ast.ExpBQLMutInf(['c0'], ['c1', 'c2'],
                               [('c3', ast.ExpLit(ast.LitInt(3)))], None)
    mutinf1 = ast.ExpBQLMutInf(['c4', 'c5'], ['c6'],
                               [('c7', ast.ExpLit(ast.LitString('ergodic')))],
                               100)
    probdensity = ast.ExpBQLProbDensity(
        [('x', ast.ExpLit(ast.LitFloat(1.2)))],
        # No conditions for now -- that changes the weighting of the average.
        [])
    expression0 = ast.ExpOp(ast.OP_LT, [
        mutinf0,
        ast.ExpOp(ast.OP_MUL, [ast.ExpLit(ast.LitFloat(0.1)), mutinf1]),
    ])
    expression1 = probdensity
    simmodels = ast.SimulateModelsExp([
        ast.SelColExp(expression0, 'quagga'),
        ast.SelColExp(expression1, 'eland'),
    ], 'p', 'g')
    assert macro.expand_simulate_models(simmodels) == \
        ast.Select(ast.SELQUANT_ALL,
            [
                ast.SelColExp(
                    ast.ExpOp(ast.OP_LT, [
                        ast.ExpCol(None, 'v0'),
                        ast.ExpOp(ast.OP_MUL, [
                            ast.ExpLit(ast.LitFloat(0.1)),
                            ast.ExpCol(None, 'v1'),
                        ])
                    ]),
                    'quagga'),
                ast.SelColExp(ast.ExpCol(None, 'v2'), 'eland'),
            ],
            [ast.SelTab(
                ast.SimulateModels(
                    [
                        ast.SelColExp(mutinf0, 'v0'),
                        ast.SelColExp(mutinf1, 'v1'),
                        ast.SelColExp(probdensity, 'v2'),
                    ], 'p', 'g'),
                None)],
            None, None, None, None)
Esempio n. 2
0
def test_using_model():
    assert parse_bql_string('simulate x from t using model 42'
            ' limit 10') == \
        [ast.Simulate(['x'], 't', ast.ExpLit(ast.LitInt(42)), [],
            ast.ExpLit(ast.LitInt(10)))]
    with pytest.raises(parse.BQLParseError):
        assert parse_bql_string('simulate x from t'
                ' using model (87)') == \
            [ast.Simulate(['x'], 't', ast.ExpLit(ast.LitInt(87)), [],
                ast.ExpLit(ast.LitInt(10)))]
    assert parse_bql_string('estimate x from t using model (1+2)') == \
        [ast.Estimate(ast.SELQUANT_ALL,
            [ast.SelColExp(ast.ExpCol(None, 'x'), None)],
            't',
            ast.ExpOp(ast.OP_ADD, (
                ast.ExpLit(ast.LitInt(1)),
                ast.ExpLit(ast.LitInt(2)),
            )),
            None, None, None, None)]
    assert parse_bql_string('estimate * from columns of t'
            ' using model modelno') == \
        [ast.EstCols([ast.SelColAll(None)], 't', ast.ExpCol(None, 'modelno'),
            None, None, None)]
    assert parse_bql_string('estimate 42 from columns of t'
            ' using model modelno') == \
        [ast.EstCols([(ast.ExpLit(ast.LitInt(42)), None)], 't',
            ast.ExpCol(None, 'modelno'),
            None, None, None)]
    assert parse_bql_string('estimate 42 from pairwise columns of t'
            ' using model modelno') == \
        [ast.EstPairCols([(ast.ExpLit(ast.LitInt(42)), None)], 't', None,
            ast.ExpCol(None, 'modelno'),
            None, None, None)]
    assert parse_bql_string('estimate similarity from pairwise t'
            ' using model modelno') == \
        [ast.EstPairRow([ast.SelColExp(ast.ExpBQLSim(None, [ast.ColListAll()]),
                None)],
            't', ast.ExpCol(None, 'modelno'),
            None, None, None)]
    assert parse_bql_string('infer x from t using model modelno') == \
        [ast.InferAuto([ast.InfColOne('x', None)], ast.ExpLit(ast.LitInt(0)),
            't', ast.ExpCol(None, 'modelno'),
            None, None, None, None)]
    assert parse_bql_string('infer explicit x from t using model modelno') == \
        [ast.InferExplicit([ast.SelColExp(ast.ExpCol(None, 'x'), None)],
            't', ast.ExpCol(None, 'modelno'),
            None, None, None, None)]
Esempio n. 3
0
def test_expand_probability_estimate():
    expression = ast.ExpOp(ast.OP_LT, [
        ast.ExpBQLMutInf(['c0'], ['c1', 'c2'],
                         [('c3', ast.ExpLit(ast.LitInt(3)))], None),
        ast.ExpLit(ast.LitFloat(0.1)),
    ])
    probest = ast.ExpBQLProbEst(expression)
    assert macro.expand_probability_estimate(probest, 'p', 'g') == \
        ast.ExpSub(
            ast.Select(ast.SELQUANT_ALL,
                [ast.SelColExp(
                    ast.ExpApp(False, 'AVG', [ast.ExpCol(None, 'x')]),
                    None)],
                [ast.SelTab(
                    ast.SimulateModelsExp([ast.SelColExp(expression, 'x')],
                        'p', 'g'),
                    None)],
                None, None, None, None))
Esempio n. 4
0
def _expand_simmodel_exp(exp, simcols):
    if isinstance(exp, ast.ExpCol) or ast.is_bql(exp):
        tmpname = 'v%d' % (len(simcols), )
        simcols.append(ast.SelColExp(exp, tmpname))
        return ast.ExpCol(None, tmpname)
    elif isinstance(exp, ast.ExpLit) or \
         isinstance(exp, ast.ExpNumpar) or \
         isinstance(exp, ast.ExpNampar):
        return exp
    elif isinstance(exp, ast.ExpSub) or \
         isinstance(exp, ast.ExpExists):
        # XXX Not really right -- need to provide correct scoping.
        return exp  # XXX subquery scoping
    elif isinstance(exp, ast.ExpCollate):
        subexp = _expand_simmodel_exp(exp.expression, simcols)
        return ast.ExpCollate(subexp, exp.collation)
    elif isinstance(exp, ast.ExpInQuery):
        subexp = _expand_simmodel_exp(exp.expression, simcols)
        subquery = exp.subquery  # XXX subquery scoping
        return ast.ExpIn(subexp, exp.positive, subquery)
    elif isinstance(exp, ast.ExpInExp):
        subexp = _expand_simmodel_exp(exp.expression, simcols)
        subexps = [_expand_simmodel_exp(se, simcols) for se in exp.expressions]
        return ast.ExpIn(subexp, exp.positive, subexps)
    elif isinstance(exp, ast.ExpCast):
        subexp = _expand_simmodel_exp(exp.expression, simcols)
        return ast.ExpCast(subexp, exp.type)
    elif isinstance(exp, ast.ExpApp):
        operands = [
            _expand_simmodel_exp(operand, simcols) for operand in exp.operands
        ]
        return ast.ExpApp(exp.distinct, exp.operator, operands)
    elif isinstance(exp, ast.ExpAppStar):
        return exp
    elif isinstance(exp, ast.ExpCase):
        raise NotImplementedError("I'm too lazy to do CASE right now.")
    elif isinstance(exp, ast.ExpOp):
        operands = [
            _expand_simmodel_exp(operand, simcols) for operand in exp.operands
        ]
        return ast.ExpOp(exp.operator, operands)
    else:
        assert False, 'Invalid expression: %s' % (repr(exp), )
Esempio n. 5
0
def test_parametrized():
    assert parse_bql_string('select * from t where id = ?;') == \
        [ast.Parametrized(ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)],
                [ast.SelTab('t', None)],
                ast.ExpOp(ast.OP_EQ, (
                    ast.ExpCol(None, 'id'),
                    ast.ExpNumpar(1),
                )),
                None, None, None),
            1, {})]
    assert parse_bql_string('select * from t where id = ?123;') == \
        [ast.Parametrized(ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)],
                [ast.SelTab('t', None)],
                ast.ExpOp(ast.OP_EQ, (
                    ast.ExpCol(None, 'id'),
                    ast.ExpNumpar(123),
                )),
                None, None, None),
            123, {})]
    assert parse_bql_string('select * from t where id = :foo;') == \
        [ast.Parametrized(ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)],
                [ast.SelTab('t', None)],
                ast.ExpOp(ast.OP_EQ, (
                    ast.ExpCol(None, 'id'),
                    ast.ExpNampar(1, ':foo'),
                )),
                None, None, None),
            1, {':foo': 1})]
    assert parse_bql_string('select * from t where a = :foo and b = @foo;') \
        == \
        [ast.Parametrized(ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)],
                [ast.SelTab('t', None)],
                ast.ExpOp(ast.OP_BOOLAND, (
                    ast.ExpOp(ast.OP_EQ, (
                        ast.ExpCol(None, 'a'),
                        ast.ExpNampar(1, ':foo'),
                    )),
                    ast.ExpOp(ast.OP_EQ, (
                        ast.ExpCol(None, 'b'),
                        ast.ExpNampar(2, '@foo'),
                    )),
                )),
                None, None, None),
            2, {':foo': 1, '@foo': 2})]
    assert parse_bql_string('select * from t where a = $foo and b = ?1;') == \
        [ast.Parametrized(ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)],
                [ast.SelTab('t', None)],
                ast.ExpOp(ast.OP_BOOLAND, (
                    ast.ExpOp(ast.OP_EQ, (
                        ast.ExpCol(None, 'a'),
                        ast.ExpNampar(1, '$foo'),
                    )),
                    ast.ExpOp(ast.OP_EQ, (
                        ast.ExpCol(None, 'b'),
                        ast.ExpNumpar(1),
                    )),
                )),
                None, None, None),
            1, {'$foo': 1})]
    assert parse_bql_string('select * from t' +
            ' where a = ?123 and b = :foo and c = ?124;') == \
        [ast.Parametrized(ast.Select(ast.SELQUANT_ALL, [ast.SelColAll(None)],
                [ast.SelTab('t', None)],
                ast.ExpOp(ast.OP_BOOLAND, (
                    ast.ExpOp(ast.OP_BOOLAND, (
                        ast.ExpOp(ast.OP_EQ, (
                            ast.ExpCol(None, 'a'),
                            ast.ExpNumpar(123),
                        )),
                        ast.ExpOp(ast.OP_EQ, (
                            ast.ExpCol(None, 'b'),
                            ast.ExpNampar(124, ':foo'),
                        )),
                    )),
                    ast.ExpOp(ast.OP_EQ, (
                        ast.ExpCol(None, 'c'),
                        ast.ExpNumpar(124),
                    )),
                )),
                None, None, None),
            124, {':foo': 124})]
Esempio n. 6
0
def test_select_bql():
    assert parse_bql_string('select predictive probability of c from t;') == \
        [ast.Select(ast.SELQUANT_ALL,
            [ast.SelColExp(ast.ExpBQLPredProb('c'), None)],
            [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string('select predictive probability of c, * from t;') \
        == \
        [ast.Select(ast.SELQUANT_ALL,
            [
                ast.SelColExp(ast.ExpBQLPredProb('c'), None),
                ast.SelColAll(None),
            ],
            [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string('select c, predictive probability of d from t;') \
        == \
        [ast.Select(ast.SELQUANT_ALL,
            [
                ast.SelColExp(ast.ExpCol(None, 'c'), None),
                ast.SelColExp(ast.ExpBQLPredProb('d'), None),
            ],
            [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string('select predictive probability of c, d from t;') \
        == \
        [ast.Select(ast.SELQUANT_ALL,
            [
                ast.SelColExp(ast.ExpBQLPredProb('c'), None),
                ast.SelColExp(ast.ExpCol(None, 'd'), None),
            ],
            [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string('select probability of c = 42 from t;') == \
        [ast.Select(ast.SELQUANT_ALL,
            [ast.SelColExp(ast.ExpBQLProb([('c', ast.ExpLit(ast.LitInt(42)))],
                    []),
                None)],
            [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string('select similarity from t;') == \
        [ast.Select(ast.SELQUANT_ALL,
            [ast.SelColExp(ast.ExpBQLSim(None, [ast.ColListAll()]), None)],
            [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string('select similarity to (rowid=8) from t;') == \
        [ast.Select(ast.SELQUANT_ALL,
            [ast.SelColExp(
                ast.ExpBQLSim(
                    ast.ExpOp(ast.OP_EQ, (
                        ast.ExpCol(None, 'rowid'),
                        ast.ExpLit(ast.LitInt(8))
                    )),
                    [ast.ColListAll()]),
                None)],
            [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string('select similarity with respect to c from t;') == \
        [ast.Select(ast.SELQUANT_ALL,
            [ast.SelColExp(ast.ExpBQLSim(None, [ast.ColListLit(['c'])]),
                None)],
            [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string(
            'select similarity to (rowid=8) with respect to c from t;') == \
        [ast.Select(ast.SELQUANT_ALL,
            [ast.SelColExp(
                ast.ExpBQLSim(
                    ast.ExpOp(ast.OP_EQ, (
                        ast.ExpCol(None, 'rowid'),
                        ast.ExpLit(ast.LitInt(8)),
                    )),
                    [ast.ColListLit(['c'])]),
                None)],
            [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string(
            'select similarity to (rowid=5) with respect to age from t1;') == \
        [ast.Select(ast.SELQUANT_ALL,
            [ast.SelColExp(
                ast.ExpBQLSim(
                    ast.ExpOp(ast.OP_EQ, (
                        ast.ExpCol(None, 'rowid'),
                        ast.ExpLit(ast.LitInt(5)),
                    )),
                    [ast.ColListLit(['age'])]),
                None)],
            [ast.SelTab('t1', None)], None, None, None, None)]
    assert parse_bql_string(
            'select similarity to (rowid=8) with respect to c, d from t;') == \
        [ast.Select(ast.SELQUANT_ALL,
            [
                ast.SelColExp(
                    ast.ExpBQLSim(
                        ast.ExpOp(ast.OP_EQ, (
                            ast.ExpCol(None, 'rowid'),
                            ast.ExpLit(ast.LitInt(8)),
                        )),
                        [ast.ColListLit(['c'])]),
                    None),
                ast.SelColExp(ast.ExpCol(None, 'd'), None),
            ],
            [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string('select similarity to (rowid=8)'
            ' with respect to (c, d) from t;') == \
        [ast.Select(ast.SELQUANT_ALL,
            [ast.SelColExp(
                ast.ExpBQLSim(
                    ast.ExpOp(ast.OP_EQ, (
                        ast.ExpCol(None, 'rowid'),
                        ast.ExpLit(ast.LitInt(8)),
                    )),
                    [ast.ColListLit(['c']), ast.ColListLit(['d'])]),
                None)],
            [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string('select similarity to (rowid=8) with respect to' +
            ' (estimate * from columns of t order by ' +
            '  probability of value 4 limit 1)' +
            ' from t;') == \
        [ast.Select(ast.SELQUANT_ALL,
            [ast.SelColExp(
                ast.ExpBQLSim(
                    ast.ExpOp(ast.OP_EQ, (
                        ast.ExpCol(None, 'rowid'),
                        ast.ExpLit(ast.LitInt(8)),
                    )),
                    [ast.ColListSub(
                        ast.EstCols([ast.SelColAll(None)], 't', None, None,
                            [ast.Ord(ast.ExpBQLProbFn(
                                    ast.ExpLit(ast.LitInt(4)),
                                    []),
                                ast.ORD_ASC)],
                            ast.Lim(ast.ExpLit(ast.LitInt(1)), None))
                    )]),
                None)],
            [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string('select dependence probability with c from t;') ==\
        [ast.Select(ast.SELQUANT_ALL,
            [ast.SelColExp(ast.ExpBQLDepProb('c', None), None)],
            [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string(
            'select dependence probability of c with d from t;') == \
        [ast.Select(ast.SELQUANT_ALL,
            [ast.SelColExp(ast.ExpBQLDepProb('c', 'd'), None)],
            [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string('select mutual information with c from t;') == \
        [ast.Select(ast.SELQUANT_ALL,
            [ast.SelColExp(ast.ExpBQLMutInf('c', None, None, None), None)],
            [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string(
            'select mutual information of c with d from t;') == \
        [ast.Select(ast.SELQUANT_ALL,
            [ast.SelColExp(ast.ExpBQLMutInf('c', 'd', None, None), None)],
            [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string('select mutual information of c with d' +
            ' using (1+2) samples from t;') == \
        [ast.Select(ast.SELQUANT_ALL,
            [ast.SelColExp(
                ast.ExpBQLMutInf(
                    'c', 'd', None,
                    ast.op(
                        ast.OP_ADD, ast.ExpLit(ast.LitInt(1)),
                        ast.ExpLit(ast.LitInt(2)))),
                None)],
            [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string('''
            select mutual information with c given (d, a=1) using
            10 samples from t;
            ''') == \
        [ast.Select(ast.SELQUANT_ALL,
            [ast.SelColExp(
                ast.ExpBQLMutInf(
                    'c', None,
                    [('d', ast.ExpLit(ast.LitNull(0))),
                        ('a',ast.ExpLit(ast.LitInt(1)))],
                    ast.ExpLit(ast.LitInt(10))
                ),
            None)],
            [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string('''
            select mutual information of b with c
            given (d, a=1, e, r=2) from t;''') == \
        [ast.Select(ast.SELQUANT_ALL,
            [ast.SelColExp(
                ast.ExpBQLMutInf(
                    'b', 'c',
                    [
                        ('d', ast.ExpLit(ast.LitNull(0))),
                        ('a',ast.ExpLit(ast.LitInt(1))),
                        ('e', ast.ExpLit(ast.LitNull(0))),
                        ('r', ast.ExpLit(ast.LitInt(2))),
                    ],
                    None
                ),
            None)],
            [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string('select correlation with c from t;') == \
        [ast.Select(ast.SELQUANT_ALL,
            [ast.SelColExp(ast.ExpBQLCorrel('c', None), None)],
            [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string('select correlation of c with d from t;') == \
        [ast.Select(ast.SELQUANT_ALL,
            [ast.SelColExp(ast.ExpBQLCorrel('c', 'd'), None)],
            [ast.SelTab('t', None)], None, None, None, None)]
    # XXX This got broken a while ago: parenthesization in PROBABILITY
    # OF X = E is too permissive.  I didn't notice because before I
    # introduced BQLParseError, this simply caught Exception -- which
    # covered the AssertionError that this turned into.
    #
    # with pytest.raises(parse.BQLParseError):
    #     parse_bql_string('select probability of x = 1 -' +
    #         ' probability of y = 0 from t;')
    #     # XXX Should really be this test, but getting the grammar to
    #     # admit this unambiguously is too much of a pain at the
    #     # moment.
    #     assert parse_bql_string('select probability of x = 1 -' +
    #             ' probability of y = 0 from t;') == \
    #         [ast.Select(ast.SELQUANT_ALL,
    #             [ast.SelColExp(ast.ExpBQLProb([('x',
    #                         ast.ExpOp(ast.OP_SUB, (
    #                             ast.ExpLit(ast.LitInt(1)),
    #                             ast.ExpBQLProb([('y',
    #                                     ast.ExpLit(ast.LitInt(0)))],
    #                                 []),
    #                         )))],
    #                     []),
    #                 None)],
    #             [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string('select probability of c1 = f(c2) from t;') == \
        [ast.Select(ast.SELQUANT_ALL,
            [ast.SelColExp(ast.ExpBQLProb([('c1',
                        ast.ExpApp(False, 'f', [ast.ExpCol(None, 'c2')]))],
                    []),
                None)],
            [ast.SelTab('t', None)], None, None, None, None)]
    assert parse_bql_string('select key, t.(estimate * from columns of t'
            ' order by dependence probability with c desc limit 4)'
            ' from t order by key asc') == \
        [ast.Select(ast.SELQUANT_ALL, [
                ast.SelColExp(ast.ExpCol(None, 'key'), None),
                ast.SelColSub('t',
                    ast.EstCols([ast.SelColAll(None)], 't', None, None,
                        [ast.Ord(ast.ExpBQLDepProb('c', None), ast.ORD_DESC)],
                        ast.Lim(ast.ExpLit(ast.LitInt(4)), None)))
            ],
            [ast.SelTab('t', None)],
            None, None,
            [ast.Ord(ast.ExpCol(None, 'key'), ast.ORD_ASC)],
            None)]