def test_sum_join_multiple_datasets_undermatching(): workers = deepcopy(WORKERS_DATASET_DEF) workers.name = 'WORKERS' more = deepcopy(MOREWORKERS_DATASET_DEF) more._add_var( v('joined', 'sum(join(name == WORKERS.name, WORKERS.max_production))')) sim = run_sim( [], datasets=[more, workers], ) # Second row of MOREWORKERS doesn't match any rows in WORKERS, hence sum is 0 sim.assert_values_match( 'MOREWORKERS.joined', [200, 0, 100], t=1, )
def test_time_indexing_dataset_vars(): db = deepcopy(ASSETS_DATASET_DEF) db._add_var(v('ds_var1', 'EXAMPLE_LABEL.factor_1')) db._add_var(v('ds_var2', 'ds_var1[t-1]', '0')) sim = run_sim( [ v('lagged_var', 'sum(EXAMPLE_LABEL.ds_var1[t-1] + EXAMPLE_LABEL.ds_var2)', '0') ], policies=pols(attribute1=['1']), dataset=db, ) sim.assert_values_match( 'lagged_var', [0, 48, 48, 48], )
def test_multiclass_classifier(): vars = [ { 'short_name': 'prob', 'equation': 'CLF.predict_proba(t)' }, ] # A dummy classifier with 3 classes. Not supported. sim = run_sim( [ v('prob', 'CLF.predict_proba(t)'), ], models=clf_factory('dummy_multiclass.pickle'), allow_errs=True, ) sim.assert_errors_match([['CLF', '', 'binary']]) sim.assert_null_values('prob')
def test_find_threshold_iterative(): #Test the find_threshold_iterative() with #multiple capacities. data = setUp() capacities = range(-50, 10**3, 50) precision = .001 for capacity in capacities: threshold = find_threshold_iterative(data, capacity, precision) threshold = round_decimals_up(threshold, 3) print(capacity, threshold) if capacity < 0: assert threshold == -1 continue assert run_sim(data, threshold, capacity)
def test_sklearn_pipeline(): """insurance_churn_model.pkl is an sklearn pipeline, with a classifier on the tail end. It takes as input [number, number, number, number, string]. """ ds = deepcopy(INSURANCE_DATASET_DEF) ds._add_var( v( 'churn', 'CLF.predict(t, t, initial_premium, initial_total_claims, location)' )) sim = run_sim( [], models=clf_factory('insurance_churn_model.pkl'), dataset=ds, ) df = sim.dataset_df() assert df.churn.isin([0, 1]).all()
def test_join_fn_reduced_dim_boolean_expr(): """Verify that joins work even when the mask (the first arg) doesn't naturally have the shape of a dataset variable. """ # This is another overmatching join, but it has the unusual feature that # the first arg does not have dataset dimensionality. Want to make sure # we just raise a user-visible join exception, rather than a hard crash. sim = run_sim( [ v('a_var', 't+4'), v('b_var', "join(1==1, EXAMPLE_LABEL.factor_2)"), ], dataset=ASSETS_DATASET_DEF, num_sims=2, num_steps=3, allow_errs=True, ) sim.assert_errors_match([['b_var', 'equation', 'Join']])
def test_sum_join_multiple_datasets_overmatching(): workers = deepcopy(WORKERS_DATASET_DEF) workers.name = 'WORKERS' more = deepcopy(MOREWORKERS_DATASET_DEF) workers._add_var( v('joined', 'sum(join(workers == MOREWORKERS.borkers, MOREWORKERS.widgets))')) sim = run_sim( [], datasets=[more, workers], num_sims=2, ) # Second row of WORKERS matches two rows of MOREWORKERS having values of 20 and 2 sim.assert_values_match( 'WORKERS.joined', [10, 20 + 2], t=1, )
def test_join_multiple_datasets_bijection(): """A join involving two datasets which happen to have a perfect 1-to-1 correspondence. """ workers = deepcopy(WORKERS_DATASET_DEF) workers.name = 'WORKERS' para = PARAWORKERS_DATASET_DEF workers._add_var( v('joined', 'join(name == PARAWORKERS.name, PARAWORKERS.widgets)')) sim = run_sim( [], datasets=[para, workers], ) sim.assert_values_match( 'WORKERS.joined', [10, 20], t=1, )
def test_external_model_basic(): # A regression model taking three inputs. model = { "label": "DEMAND_MODEL", "localPath": "tests/sample_models/three_input_model.pkl", } sim = run_sim( [ v('competitor_price', 'uniform(100, 200)'), v('temperature', 'uniform(20, 30)'), v('demand', 'DEMAND_MODEL.predict(competitor_price, temperature, t)'), ], model=model, ) df = sim.results assert df.demand.isnull().sum() == 0 assert df.demand.std() > 0
def test_sum_join_multiple_datasets_bijection(): workers = deepcopy(WORKERS_DATASET_DEF) workers.name = 'WORKERS' para = deepcopy(PARAWORKERS_DATASET_DEF) para.name = 'PARAWORKERS' workers._add_var( v('joined', 'sum(join(name == PARAWORKERS.name, PARAWORKERS.widgets))')) sim = run_sim( [], datasets=[para, workers], ) # Because this join is 1-1 the sum() should be a no-op sim.assert_values_match( 'WORKERS.joined', [10, 20], t=1, )
def test_join_multiple_datasets_injection(): """Similar to above scenario, except the dataset we're joining to has an extra row which doesn't participate in the join results """ workers = deepcopy(WORKERS_DATASET_DEF) workers.name = 'WORKERS' more = deepcopy(MOREWORKERS_DATASET_DEF) workers._add_var( v('joined', 'join(name == MOREWORKERS.name, MOREWORKERS.widgets)')) sim = run_sim( [], datasets=[more, workers], ) sim.assert_values_match( 'WORKERS.joined', [10, 20], t=1, )
def test_with_dataset_formulas_accessing_raw_data_and_attributes_and_vars(): db = deepcopy(WORKERS_DATASET_DEF) db._add_var( v( 'rev_per_worker', "EXAMPLE_LABEL.max_production / EXAMPLE_LABEL.workers * price_per_unit * a_var", )) sim = run_sim( [v('a_var', 't/10')], policies=pols(price_per_unit=['10', '5']), dataset=db, num_sims=4, ) sim.assert_values_match( 'EXAMPLE_LABEL.rev_per_worker', [50, 10 / 3], t=1, policy=1, )
def test_with_dataset_formulas_accessing_raw_data(): db = deepcopy(WORKERS_DATASET_DEF) db._add_var(v('t_again', 't')) sim = run_sim( policies=pols(price_per_unit=['10', '1']), dataset=db, ) sim.assert_values_match( 'EXAMPLE_LABEL.t_again', [ 1, 1, 2, 2, 3, 3, ], policy=0, )
def test_probabilistic_sampling(): """Ensure that .predict for a classifier samples rather than returning the argmax. """ vars = [ v('x', 'uniform(0, 1)'), v('prob', 'CLF.predict_proba(x[t])'), v('pred', 'CLF.predict(x[t])'), ] sim = run_sim( vars, models=clf_factory('usually_positive_classifier.pickle'), num_steps=1000, ) # We trained a logistic classifier on noise in (0, 1), and with 90% positive # labels. So all our predicted probabilities should be around .9, but we # should still almost always get at least one negative prediction. # (NB: There's a ~1e-46 of this test spuriously failing.) df = sim.results assert (df.prob > .5).all() assert 0 in df.pred assert 1 in df.pred
def test_sum_join_multiple_datasets_undermatching_multiple_policies(): """NB: because dimensions of size 1 get a uniquely lax treatment under numpy broadcasting rules, it's important to include some tests with >1 policy/sim, even if the quantity being tested has no dependence on policy attributes or inter-simulation randomness. """ workers = deepcopy(WORKERS_DATASET_DEF) workers.name = 'WORKERS' more = deepcopy(MOREWORKERS_DATASET_DEF) more._add_var( v('joined', 'sum(join(name == WORKERS.name, WORKERS.max_production))')) sim = run_sim( [], policies=DUMMY_POLS, datasets=[more, workers], num_sims=5, ) # Second row of MOREWORKERS doesn't match any rows in WORKERS, hence sum is 0 sim.assert_values_match( 'MOREWORKERS.joined', [200, 0, 100], t=1, policy=0, )
def test_with_dataset_formulas_accessing_attributes(): db = deepcopy(WORKERS_DATASET_DEF) db._add_var( v( 'rev_per_worker', "EXAMPLE_LABEL.max_production / EXAMPLE_LABEL.workers * price_per_unit" )) sim = run_sim( [], policies=pols(price_per_unit=['10', '5']), dataset=db, num_sims=4, ) sim.assert_values_match_across_time( 'EXAMPLE_LABEL.rev_per_worker', [1000, 200 / 3], policy=0, ) sim.assert_values_match_across_time( 'EXAMPLE_LABEL.rev_per_worker', [500, 100 / 3], policy=1, )
def test_arithmetic_index_expr(): sim = run_sim([ v('x', 't', '0'), v('y', 'x[5-5]'), ]) sim.assert_values_match('y', [0, 0, 0, 0])
def test_constant_index_expr(): sim = run_sim([v('x', 't', '0'), v('y', 'x[0]')]) sim.assert_values_match('y', [0, 0, 0, 0])
def test_multiline_formula(): sim = run_sim([v('x', '1 +\n1')], ) sim.assert_values_match( 'x', 2) # broadcasting comparison - should be 2 everywhere
def test_fibonacci(): sim = run_sim([v('a', 'a[t-1] + a[t-2]', initial=1)], num_steps=5) sim.assert_values_match('a', [1, 2, 3, 5, 8, 13])