def test_term_new_data_numeric():
    data = pd.DataFrame({"x": [10, 10, 10]})
    var_expr = Parser(Scanner("x").scan(False)).parse()
    var_term = Variable(var_expr.name.lexeme, var_expr.level)
    var_term.set_type(data)
    var_term.set_data()
    assert (var_term.value == [10, 10, 10]).all()
    data = pd.DataFrame({"x": [1, 2, 3]})
    assert (var_term.eval_new_data(data).T == [1, 2, 3]).all()
def test_term_new_data_categoric():
    data = pd.DataFrame({"x": ["A", "B", "C"]})

    # Full rank encoding
    var_expr = Parser(Scanner("x").scan(False)).parse()
    var_term = Variable(var_expr.name.lexeme, var_expr.level)
    var_term.set_type(data)
    var_term.set_data(spans_intercept=True)
    assert (np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) == var_term.value).all()

    data = pd.DataFrame({"x": ["B", "C"]})
    assert (var_term.eval_new_data(data) == np.array([[0, 1, 0], [0, 0, 1]])).all()

    # It remembers it saw "A", "B", and "C", but not "D".
    # So when you pass a new level, it raises a ValueError.
    with pytest.raises(
        ValueError, match="The levels D in 'x' are not present in the original data set."
    ):
        data = pd.DataFrame({"x": ["B", "C", "D"]})
        var_term.eval_new_data(data)

    # The same with reduced encoding
    data = pd.DataFrame({"x": ["A", "B", "C"]})
    var_expr = Parser(Scanner("x").scan(False)).parse()
    var_term = Variable(var_expr.name.lexeme, var_expr.level)
    var_term.set_type(data)
    var_term.set_data()
    assert (np.array([[0, 0], [1, 0], [0, 1]]) == var_term.value).all()

    data = pd.DataFrame({"x": ["A", "C"]})
    assert (var_term.eval_new_data(data) == np.array([[0, 0], [0, 1]])).all()

    # It remembers it saw "A", "B", and "C", but not "D".
    # So when you pass a new level, it raises a ValueError.
    with pytest.raises(
        ValueError, match="The levels D in 'x' are not present in the original data set."
    ):
        data = pd.DataFrame({"x": ["B", "C", "D"]})
        var_term.eval_new_data(data)
Exemple #3
0
def test_variable_set_data_errors():
    x = Variable("x")
    with pytest.raises(ValueError):
        x.set_data(True)

    with pytest.raises(ValueError):
        x.kind = "hello"
        x.set_data(True)

    with pytest.raises(Exception):
        x.kind = "categoric"
        x.set_data(True)