def test_two_vars(): sim = run_sim([ v('a', 'a[t-1] + a[t-2]', '1'), v('b', 'a + 1'), ]) sim.assert_values_match('a', [1, 2, 3, 5]) sim.assert_values_match('b', [2, 3, 4, 6])
def test_duplicate_variable_names(): user_variables = [ v("x", "1"), v("x", "2"), ] sim = run_sim(user_variables) sim.assert_errors_match([ ['x', 'short_name'] ])
def test_missing_initial_value_in_other_variable_formula(): sim = run_sim([ v('x', 'y[t-1] + 1'), v('y', 'y[t-1] + 1'), ]) # The calculation for both x and y will run into issues because of a missing # initial value for y. But the problem should be reported only once. sim.assert_errors_match([ ['y', 'initial'] ])
def test_sim_with_no_initial_vals(): sim = run_sim([ v('a', 't'), v('b', '2*a'), ], num_steps=2) sim.assert_values_match('a', [0, 1, 2]) sim.assert_values_match('b', [0, 2, 4]) assert sim.deps == {'a': set(), 'b': {'a'}}
def test_handling_circular_dep_in_dataset(): db = deepcopy(ASSETS_DATASET_DEF) db._add_var(v("b", "a")) db._add_var(v("a", "b", 0)) sim = run_sim(datasets=[db]) sim.assert_errors_match([ ['EXAMPLE_LABEL.a', 'equation'], ['EXAMPLE_LABEL.b', 'equation'], ])
def test_np_function_wrong_number_of_args(): sim = run_sim( [ v('x', '1'), v('y', '2'), v('z', 'ceil(x, y)'), ], allow_errs=True, ) sim.assert_errors_match([['z', 'equation', 'arguments']])
def test_with_policy_formulas(): sim = run_sim( [ v('a', 'where(t>1, x[t-1], 1)', '1'), v('b', 'x[t-1]', '0'), ], policies=pols(x=['a*2', 'a*3']), ) sim.assert_values_match('a', [1, 1, 2, 4], policy=0) sim.assert_values_match('a', [1, 1, 3, 9], policy=1) sim.assert_values_match('b', [0, 2, 2, 4], policy=0) sim.assert_values_match('b', [0, 3, 3, 9], policy=1)
def test_join_fn_multiple_policies(): sim = run_sim( [ v('a_var', '3'), v('b_var', "join(EXAMPLE_LABEL.factor_4 == a_var, EXAMPLE_LABEL.factor_2)"), ], policies=pols(attribute1=['1', '3']), dataset=ASSETS_DATASET_DEF, num_sims=2, ) assert (sim.results.b_var == 20).all()
def test_multi_arg_max(): sim = run_sim( [ v('x', '1'), v('y', '2'), v('z', 't'), v('m', 'max(x, y, z)'), ], num_steps=4, ) sim.assert_values_match( 'm', [2, 2, 2, 3, 4], )
def test_not_actually_missing_initial_value(): # Previously, this would have raised an error message about x needing an # initial value, because we identified that error state with looking up # a variable value of nan at t=0. x at 0 is nan (0/0) but it's not right # to say that it needs an initial value set. # NB: we could still create a fpos error message by defining y as x[t-1] # To address such situations fully, we'd need to be doing more complex # dependency tracking, incorporating time offsets. sim = run_sim( [v('x', 't/0'), v('y', 'x[t]'), ], allow_errs=False, )
def test_join_fn_simple(): sim = run_sim( [ v('a_var', '3'), v('b_var', "join(EXAMPLE_LABEL.factor_4 == a_var, EXAMPLE_LABEL.factor_2)"), ], dataset=ASSETS_DATASET_DEF, num_sims=2, ) sim.assert_values_match( 'b_var', [20, 20, 20, 20], sim_id=1, )
def test_dummy_classifier_multiple_policies(): sim = run_sim( [ v('pred', 'CLF.predict(t)'), v('prob', 'CLF.predict_proba(t)'), ], policies=pols(x=['1', '2']), models=clf_factory('dummy.pickle'), ) df = sim.results # All predicted labels should be 0 or 1 assert df.pred.isin([0, 1]).all() # All probabilities should be .5 assert (df.prob == .5).all() assert len(df) == 4 * 2 # 4 timesteps, 2 policies, 1 sim
def test_dummy_classifier(): # Load a dummy binary classifier trained with strat=uniform. # Should always assign each class a probability of .5 sim = run_sim( [ v('pred', 'CLF.predict(t)'), v('prob', 'CLF.predict_proba(t)'), ], models=clf_factory('dummy.pickle'), ) df = sim.results # All predicted labels should be 0 or 1 assert df.pred.isin([0, 1]).all() # All probabilities should be .5 assert (df.prob == .5).all()
def test_model_predict_dataset_deps(): ds = deepcopy(INSURANCE_DATASET_DEF) ds._add_var(v('foo', '10')) ds._add_var( v('churn', 'CLF.predict(t, t, foo, initial_total_claims, location)')) sim = run_sim( [], models=clf_factory('insurance_churn_model.pkl'), dataset=ds, ) df = sim.dataset_df() assert df.churn.isin([0, 1]).all() assert sim.deps == { 'INS.churn': {'INS.foo'}, 'INS.foo': set(), }
def test_sum_constant_user_dataset_var(): """Calling sum on a DatasetAdditionVar with a constant value (and also using multiple sims and policies)""" db = deepcopy(WORKERS_DATASET_DEF) db._add_var(v('seven', '7')) sim = run_sim( [ v('x', 'sum(EXAMPLE_LABEL.seven)'), ], policies=pols(attribute1=['1', '2']), dataset=db, num_sims=5, ) x = sim.results.x assert (x == 14).all() assert len(x) == 40 # 5 sims * 2 policies * 4 timesteps
def test_sum_user_dataset_var(): """Calling sum on a "DatasetAdditionVar" rather than a column in the original dataset.""" db = deepcopy(WORKERS_DATASET_DEF) db._add_var(v('workertime', 'EXAMPLE_LABEL.workers * t')) sim = run_sim( [ v('x', 'sum(EXAMPLE_LABEL.workertime)'), ], dataset=db, ) assert sim.deps == { 'x': {'EXAMPLE_LABEL.workertime'}, 'EXAMPLE_LABEL.workertime': set(), } sim.assert_values_match('x', [0, 31, 62, 93])
def test_binomial(): sim = run_sim([ v('x', 'binomial(100, .5)'), ], ) values = sim.get_values('x') assert (0 <= values).all() assert (values <= 100).all()
def test_join_fn_many_matching_rows(): """Verify that we raise an error if more than one row matches the join condition and the join is not wrapped by an aggregator. """ # This join is 1:1 for timesteps 0-2, but there are two rows with factor_1 == 3. # The first has factor_2=20, the other has factor_2=25. sim = run_sim( [ v('a_var', 't'), v('b_var', "join(EXAMPLE_LABEL.factor_1 == a_var, EXAMPLE_LABEL.factor_2)"), ], dataset=ASSETS_DATASET_DEF, allow_errs=True, ) sim.assert_errors_match([['b_var', 'equation', 'Join']])
def test_sum_join_composite_mask_with_different_shapes(): workers = deepcopy(WORKERS_DATASET_DEF) workers.name = 'WORKERS' more = deepcopy(MOREWORKERS_DATASET_DEF) workers._add_var( v( 'joined', 'sum(join(workers == MOREWORKERS.borkers and MOREWORKERS.widgets==t, MOREWORKERS.borkers))' )) sim = run_sim( [], datasets=[more, workers], num_sims=2, ) # We only get any matches when time is 2 sim.assert_values_match( 'WORKERS.joined', [0, 0], t=1, ) sim.assert_values_match( 'WORKERS.joined', [0, 30], t=2, )
def test_sum_non_dataset_dimension_input(): sim = run_sim( [ v('x', 'sum(1)'), v('one', '1'), v('y', 'sum(one)'), ], dataset=WORKERS_DATASET_DEF, allow_errs=True, ) sim.assert_errors_match([[ 'x', 'equation', ], [ 'y', 'equation', ]])
def test_time_indexing_dataset_vars(): db = deepcopy(ASSETS_DATASET_DEF) db._add_var(v('ds_var1', 'EXAMPLE_LABEL.factor_1')) db._add_var(v('ds_var2', 'ds_var1[t-1]', '0')) sim = run_sim( [ v('lagged_var', 'sum(EXAMPLE_LABEL.ds_var1[t-1] + EXAMPLE_LABEL.ds_var2)', '0') ], policies=pols(attribute1=['1']), dataset=db, ) sim.assert_values_match( 'lagged_var', [0, 48, 48, 48], )
def test_broadcasting_dataset_var_in_policy_expression(): sim = run_sim( [v('x', 'attr')], policies=pols(attr=['EXAMPLE_LABEL.workers == 1', '5']), dataset=WORKERS_DATASET_DEF, allow_errs=True, ) sim.assert_errors_match([['attr', 'equation', 'extra dimension']])
def test_join_fn_no_matching_rows(): """Verify that we raise an error if no rows match the join condition and the join is not wrapped in an aggregator. """ # We have a matching value (of factor_2=30) for a_var=5 (when t=1), but not for any other # timesteps sim = run_sim( [ v('a_var', 't+4'), v('b_var', "join(EXAMPLE_LABEL.factor_1 == a_var, EXAMPLE_LABEL.factor_2)"), ], dataset=ASSETS_DATASET_DEF, allow_errs=True, num_steps=3, ) sim.assert_errors_match([['b_var', 'equation', 'Join']])
def test_with_numeric_policies(): sim = run_sim( [v('a', 't*x + 1')], policies=pols(x=['100', '1000']), num_steps=2, ) sim.assert_values_match('a', [1, 101, 201], policy=0) sim.assert_values_match('a', [1, 1001, 2001], policy=1)
def test_external_model_dataset_variable(): db = deepcopy(WORKERS_DATASET_DEF) db._add_var(v('pred', 'CLF.predict(EXAMPLE_LABEL.max_production, 25, t)')) models = clf_factory('dummy.pickle') sim = run_sim(models=models, dataset=db) df = sim.dataset_df() assert len(df) == 3 * 2 # 3 timesteps * 2 rows assert df.pred.isin([0, 1]).all()
def test_randomness_spread_over_dataset_rows(): workers = deepcopy(MOREWORKERS_DATASET_DEF) workers._add_var(v('x', 'uniform(0, 100)')) sim = run_sim(dataset=workers) values = sim.get_values('MOREWORKERS.x', t=1) # We don't want to reuse randomness across rows, so ensure these values # aren't all equal (yes, there's an astronomically small chance of a false positive) assert values.min() != values.max()
def test_var_attribute_name_duplication(): user_variables = [ v("x", "1") ] pols = [ {"policy_name": "p1", "x": "2"}, ] sim = run_sim(user_variables, policies=pols) sim.assert_errors_match([ ['x', 'short_name'] ])
def test_external_model_basic(): # A regression model taking three inputs. model = { "label": "DEMAND_MODEL", "localPath": "tests/sample_models/three_input_model.pkl", } sim = run_sim( [ v('competitor_price', 'uniform(100, 200)'), v('temperature', 'uniform(20, 30)'), v('demand', 'DEMAND_MODEL.predict(competitor_price, temperature, t)'), ], model=model, ) df = sim.results assert df.demand.isnull().sum() == 0 assert df.demand.std() > 0
def test_join_fn_reduced_dim_boolean_expr(): """Verify that joins work even when the mask (the first arg) doesn't naturally have the shape of a dataset variable. """ # This is another overmatching join, but it has the unusual feature that # the first arg does not have dataset dimensionality. Want to make sure # we just raise a user-visible join exception, rather than a hard crash. sim = run_sim( [ v('a_var', 't+4'), v('b_var', "join(1==1, EXAMPLE_LABEL.factor_2)"), ], dataset=ASSETS_DATASET_DEF, num_sims=2, num_steps=3, allow_errs=True, ) sim.assert_errors_match([['b_var', 'equation', 'Join']])
def test_sum_fn(): db = deepcopy(WORKERS_DATASET_DEF) sim = run_sim( [v('x', 'sum(EXAMPLE_LABEL.workers)')], dataset=db, ) assert sim.deps == {'x': set()} # expected sum = 1 + 30 sim.assert_values_match('x', [31, 31, 31, 31])