Ejemplo n.º 1
0
def test_two_vars():
    sim = run_sim([
        v('a', 'a[t-1] + a[t-2]', '1'),
        v('b', 'a + 1'),
    ])
    sim.assert_values_match('a', [1, 2, 3, 5])
    sim.assert_values_match('b', [2, 3, 4, 6])
Ejemplo n.º 2
0
def test_duplicate_variable_names():
    user_variables = [
        v("x", "1"),
        v("x", "2"),
    ]
    sim = run_sim(user_variables)
    sim.assert_errors_match([ ['x', 'short_name'] ])
Ejemplo n.º 3
0
def test_missing_initial_value_in_other_variable_formula():
    sim = run_sim([
        v('x', 'y[t-1] + 1'),
        v('y', 'y[t-1] + 1'),
        ])
    # The calculation for both x and y will run into issues because of a missing
    # initial value for y. But the problem should be reported only once.
    sim.assert_errors_match([ ['y', 'initial'] ])
Ejemplo n.º 4
0
def test_sim_with_no_initial_vals():
    sim = run_sim([
        v('a', 't'),
        v('b', '2*a'),
    ], num_steps=2)
    sim.assert_values_match('a', [0, 1, 2])
    sim.assert_values_match('b', [0, 2, 4])
    assert sim.deps == {'a': set(), 'b': {'a'}}
Ejemplo n.º 5
0
def test_handling_circular_dep_in_dataset():
    db = deepcopy(ASSETS_DATASET_DEF)
    db._add_var(v("b", "a"))
    db._add_var(v("a", "b", 0))

    sim = run_sim(datasets=[db])
    sim.assert_errors_match([
        ['EXAMPLE_LABEL.a', 'equation'],
        ['EXAMPLE_LABEL.b', 'equation'],
    ])
Ejemplo n.º 6
0
def test_np_function_wrong_number_of_args():
    sim = run_sim(
        [
            v('x', '1'),
            v('y', '2'),
            v('z', 'ceil(x, y)'),
        ],
        allow_errs=True,
    )
    sim.assert_errors_match([['z', 'equation', 'arguments']])
Ejemplo n.º 7
0
def test_with_policy_formulas():
    sim = run_sim(
        [
            v('a', 'where(t>1, x[t-1], 1)', '1'),
            v('b', 'x[t-1]', '0'),
        ],
        policies=pols(x=['a*2', 'a*3']),
    )
    sim.assert_values_match('a', [1, 1, 2, 4], policy=0)
    sim.assert_values_match('a', [1, 1, 3, 9], policy=1)
    sim.assert_values_match('b', [0, 2, 2, 4], policy=0)
    sim.assert_values_match('b', [0, 3, 3, 9], policy=1)
Ejemplo n.º 8
0
def test_join_fn_multiple_policies():
    sim = run_sim(
        [
            v('a_var', '3'),
            v('b_var',
              "join(EXAMPLE_LABEL.factor_4 == a_var, EXAMPLE_LABEL.factor_2)"),
        ],
        policies=pols(attribute1=['1', '3']),
        dataset=ASSETS_DATASET_DEF,
        num_sims=2,
    )
    assert (sim.results.b_var == 20).all()
Ejemplo n.º 9
0
def test_multi_arg_max():
    sim = run_sim(
        [
            v('x', '1'),
            v('y', '2'),
            v('z', 't'),
            v('m', 'max(x, y, z)'),
        ],
        num_steps=4,
    )
    sim.assert_values_match(
        'm',
        [2, 2, 2, 3, 4],
    )
Ejemplo n.º 10
0
def test_not_actually_missing_initial_value():
    # Previously, this would have raised an error message about x needing an
    # initial value, because we identified that error state with looking up
    # a variable value of nan at t=0. x at 0 is nan (0/0) but it's not right
    # to say that it needs an initial value set.
    # NB: we could still create a fpos error message by defining y as x[t-1]
    # To address such situations fully, we'd need to be doing more complex 
    # dependency tracking, incorporating time offsets.
    sim = run_sim(
            [v('x', 't/0'),
             v('y', 'x[t]'),
             ],
            allow_errs=False,
    )
Ejemplo n.º 11
0
def test_join_fn_simple():
    sim = run_sim(
        [
            v('a_var', '3'),
            v('b_var',
              "join(EXAMPLE_LABEL.factor_4 == a_var, EXAMPLE_LABEL.factor_2)"),
        ],
        dataset=ASSETS_DATASET_DEF,
        num_sims=2,
    )
    sim.assert_values_match(
        'b_var',
        [20, 20, 20, 20],
        sim_id=1,
    )
Ejemplo n.º 12
0
def test_dummy_classifier_multiple_policies():
    sim = run_sim(
        [
            v('pred', 'CLF.predict(t)'),
            v('prob', 'CLF.predict_proba(t)'),
        ],
        policies=pols(x=['1', '2']),
        models=clf_factory('dummy.pickle'),
    )
    df = sim.results
    # All predicted labels should be 0 or 1
    assert df.pred.isin([0, 1]).all()
    # All probabilities should be .5
    assert (df.prob == .5).all()
    assert len(df) == 4 * 2  # 4 timesteps, 2 policies, 1 sim
Ejemplo n.º 13
0
def test_dummy_classifier():
    # Load a dummy binary classifier trained with strat=uniform.
    # Should always assign each class a probability of .5
    sim = run_sim(
        [
            v('pred', 'CLF.predict(t)'),
            v('prob', 'CLF.predict_proba(t)'),
        ],
        models=clf_factory('dummy.pickle'),
    )
    df = sim.results
    # All predicted labels should be 0 or 1
    assert df.pred.isin([0, 1]).all()
    # All probabilities should be .5
    assert (df.prob == .5).all()
Ejemplo n.º 14
0
def test_model_predict_dataset_deps():
    ds = deepcopy(INSURANCE_DATASET_DEF)
    ds._add_var(v('foo', '10'))
    ds._add_var(
        v('churn', 'CLF.predict(t, t, foo, initial_total_claims, location)'))
    sim = run_sim(
        [],
        models=clf_factory('insurance_churn_model.pkl'),
        dataset=ds,
    )
    df = sim.dataset_df()
    assert df.churn.isin([0, 1]).all()
    assert sim.deps == {
        'INS.churn': {'INS.foo'},
        'INS.foo': set(),
    }
Ejemplo n.º 15
0
def test_sum_constant_user_dataset_var():
    """Calling sum on a DatasetAdditionVar with a constant value
    (and also using multiple sims and policies)"""
    db = deepcopy(WORKERS_DATASET_DEF)
    db._add_var(v('seven', '7'))
    sim = run_sim(
        [
            v('x', 'sum(EXAMPLE_LABEL.seven)'),
        ],
        policies=pols(attribute1=['1', '2']),
        dataset=db,
        num_sims=5,
    )
    x = sim.results.x
    assert (x == 14).all()
    assert len(x) == 40  # 5 sims * 2 policies * 4 timesteps
Ejemplo n.º 16
0
def test_sum_user_dataset_var():
    """Calling sum on a "DatasetAdditionVar" rather than a column
    in the original dataset."""
    db = deepcopy(WORKERS_DATASET_DEF)
    db._add_var(v('workertime', 'EXAMPLE_LABEL.workers * t'))
    sim = run_sim(
        [
            v('x', 'sum(EXAMPLE_LABEL.workertime)'),
        ],
        dataset=db,
    )
    assert sim.deps == {
        'x': {'EXAMPLE_LABEL.workertime'},
        'EXAMPLE_LABEL.workertime': set(),
    }
    sim.assert_values_match('x', [0, 31, 62, 93])
Ejemplo n.º 17
0
def test_binomial():
    sim = run_sim([
        v('x', 'binomial(100, .5)'),
    ], )
    values = sim.get_values('x')
    assert (0 <= values).all()
    assert (values <= 100).all()
Ejemplo n.º 18
0
def test_join_fn_many_matching_rows():
    """Verify that we raise an error if more than one row matches the join
    condition and the join is not wrapped by an aggregator.
    """
    # This join is 1:1 for timesteps 0-2, but there are two rows with factor_1 == 3.
    # The first has factor_2=20, the other has factor_2=25.
    sim = run_sim(
        [
            v('a_var', 't'),
            v('b_var',
              "join(EXAMPLE_LABEL.factor_1 == a_var, EXAMPLE_LABEL.factor_2)"),
        ],
        dataset=ASSETS_DATASET_DEF,
        allow_errs=True,
    )
    sim.assert_errors_match([['b_var', 'equation', 'Join']])
Ejemplo n.º 19
0
def test_sum_join_composite_mask_with_different_shapes():
    workers = deepcopy(WORKERS_DATASET_DEF)
    workers.name = 'WORKERS'
    more = deepcopy(MOREWORKERS_DATASET_DEF)
    workers._add_var(
        v(
            'joined',
            'sum(join(workers == MOREWORKERS.borkers and MOREWORKERS.widgets==t, MOREWORKERS.borkers))'
        ))

    sim = run_sim(
        [],
        datasets=[more, workers],
        num_sims=2,
    )
    # We only get any matches when time is 2
    sim.assert_values_match(
        'WORKERS.joined',
        [0, 0],
        t=1,
    )
    sim.assert_values_match(
        'WORKERS.joined',
        [0, 30],
        t=2,
    )
Ejemplo n.º 20
0
def test_sum_non_dataset_dimension_input():
    sim = run_sim(
        [
            v('x', 'sum(1)'),
            v('one', '1'),
            v('y', 'sum(one)'),
        ],
        dataset=WORKERS_DATASET_DEF,
        allow_errs=True,
    )
    sim.assert_errors_match([[
        'x',
        'equation',
    ], [
        'y',
        'equation',
    ]])
Ejemplo n.º 21
0
def test_time_indexing_dataset_vars():
    db = deepcopy(ASSETS_DATASET_DEF)
    db._add_var(v('ds_var1', 'EXAMPLE_LABEL.factor_1'))
    db._add_var(v('ds_var2', 'ds_var1[t-1]', '0'))

    sim = run_sim(
        [
            v('lagged_var',
              'sum(EXAMPLE_LABEL.ds_var1[t-1] + EXAMPLE_LABEL.ds_var2)', '0')
        ],
        policies=pols(attribute1=['1']),
        dataset=db,
    )
    sim.assert_values_match(
        'lagged_var',
        [0, 48, 48, 48],
    )
Ejemplo n.º 22
0
def test_broadcasting_dataset_var_in_policy_expression():
    sim = run_sim(
        [v('x', 'attr')],
        policies=pols(attr=['EXAMPLE_LABEL.workers == 1', '5']),
        dataset=WORKERS_DATASET_DEF,
        allow_errs=True,
    )
    sim.assert_errors_match([['attr', 'equation', 'extra dimension']])
Ejemplo n.º 23
0
def test_join_fn_no_matching_rows():
    """Verify that we raise an error if no rows match the join condition and the
    join is not wrapped in an aggregator.
    """
    # We have a matching value (of factor_2=30) for a_var=5 (when t=1), but not for any other
    # timesteps
    sim = run_sim(
        [
            v('a_var', 't+4'),
            v('b_var',
              "join(EXAMPLE_LABEL.factor_1 == a_var, EXAMPLE_LABEL.factor_2)"),
        ],
        dataset=ASSETS_DATASET_DEF,
        allow_errs=True,
        num_steps=3,
    )
    sim.assert_errors_match([['b_var', 'equation', 'Join']])
Ejemplo n.º 24
0
def test_with_numeric_policies():
    sim = run_sim(
        [v('a', 't*x + 1')],
        policies=pols(x=['100', '1000']),
        num_steps=2,
    )
    sim.assert_values_match('a', [1, 101, 201], policy=0)
    sim.assert_values_match('a', [1, 1001, 2001], policy=1)
Ejemplo n.º 25
0
def test_external_model_dataset_variable():
    db = deepcopy(WORKERS_DATASET_DEF)
    db._add_var(v('pred', 'CLF.predict(EXAMPLE_LABEL.max_production, 25, t)'))
    models = clf_factory('dummy.pickle')
    sim = run_sim(models=models, dataset=db)
    df = sim.dataset_df()
    assert len(df) == 3 * 2  # 3 timesteps * 2 rows
    assert df.pred.isin([0, 1]).all()
Ejemplo n.º 26
0
def test_randomness_spread_over_dataset_rows():
    workers = deepcopy(MOREWORKERS_DATASET_DEF)
    workers._add_var(v('x', 'uniform(0, 100)'))
    sim = run_sim(dataset=workers)
    values = sim.get_values('MOREWORKERS.x', t=1)
    # We don't want to reuse randomness across rows, so ensure these values
    # aren't all equal (yes, there's an astronomically small chance of a false positive)
    assert values.min() != values.max()
Ejemplo n.º 27
0
def test_var_attribute_name_duplication():
    user_variables = [
        v("x", "1")
    ]
    pols = [
        {"policy_name": "p1", "x": "2"},
    ]
    sim = run_sim(user_variables, policies=pols)
    sim.assert_errors_match([ ['x', 'short_name'] ])
Ejemplo n.º 28
0
def test_external_model_basic():
    # A regression model taking three inputs.
    model = {
        "label": "DEMAND_MODEL",
        "localPath": "tests/sample_models/three_input_model.pkl",
    }
    sim = run_sim(
        [
            v('competitor_price', 'uniform(100, 200)'),
            v('temperature', 'uniform(20, 30)'),
            v('demand',
              'DEMAND_MODEL.predict(competitor_price, temperature, t)'),
        ],
        model=model,
    )
    df = sim.results
    assert df.demand.isnull().sum() == 0
    assert df.demand.std() > 0
Ejemplo n.º 29
0
def test_join_fn_reduced_dim_boolean_expr():
    """Verify that joins work even when the mask (the first arg) doesn't naturally
    have the shape of a dataset variable.
    """
    # This is another overmatching join, but it has the unusual feature that
    # the first arg does not have dataset dimensionality. Want to make sure
    # we just raise a user-visible join exception, rather than a hard crash.
    sim = run_sim(
        [
            v('a_var', 't+4'),
            v('b_var', "join(1==1, EXAMPLE_LABEL.factor_2)"),
        ],
        dataset=ASSETS_DATASET_DEF,
        num_sims=2,
        num_steps=3,
        allow_errs=True,
    )
    sim.assert_errors_match([['b_var', 'equation', 'Join']])
Ejemplo n.º 30
0
def test_sum_fn():
    db = deepcopy(WORKERS_DATASET_DEF)
    sim = run_sim(
        [v('x', 'sum(EXAMPLE_LABEL.workers)')],
        dataset=db,
    )
    assert sim.deps == {'x': set()}
    # expected sum = 1 + 30
    sim.assert_values_match('x', [31, 31, 31, 31])