Exemple #1
0
    def test_read_db_gz(self):
        road_test_scope_file = emat.package_file('model', 'tests',
                                                 'road_test.yaml')
        with pytest.raises(FileNotFoundError):
            emat.Scope(emat.package_file('nope.yaml'))
        s = emat.Scope(road_test_scope_file)
        with pytest.raises(FileNotFoundError):
            emat.SQLiteDB(emat.package_file('nope.db.gz'))
        db = emat.SQLiteDB(emat.package_file("examples", "roadtest.db.gz"))

        assert repr(db) == '<emat.SQLiteDB with scope "EMAT Road Test">'
        assert db.get_db_info()[:9] == 'SQLite @ '
        assert db.get_db_info()[-11:] == 'roadtest.db'

        assert db.read_scope_names() == ['EMAT Road Test']

        s1 = db.read_scope('EMAT Road Test')

        assert type(s1) == type(s)

        for k in ('_x_list', '_l_list', '_c_list', '_m_list', 'name', 'desc'):
            assert getattr(s, k) == getattr(s1, k), k

        assert s == s1

        experiments = db.read_experiment_all('EMAT Road Test', 'lhs')
        assert experiments.shape == (110, 20)
        assert list(experiments.columns) == [
            'free_flow_time',
            'initial_capacity',
            'alpha',
            'beta',
            'input_flow',
            'value_of_time',
            'unit_cost_expansion',
            'interest_rate',
            'yield_curve',
            'expand_capacity',
            'amortization_period',
            'debt_type',
            'interest_rate_lock',
            'no_build_travel_time',
            'build_travel_time',
            'time_savings',
            'value_of_time_savings',
            'net_benefits',
            'cost_of_capacity_expansion',
            'present_cost_expansion',
        ]

        from emat.model.core_python import Road_Capacity_Investment
        m = emat.PythonCoreModel(Road_Capacity_Investment, scope=s, db=db)
        assert m.metamodel_id == None
Exemple #2
0
def test_feature_scoring_with_nan():
	road_scope = emat.Scope(emat.package_file('model','tests','road_test_bogus.yaml'))
	road_test = PythonCoreModel(_Road_Capacity_Investment_with_Bogus_Output, scope=road_scope)
	road_test_design = road_test.design_experiments(n_samples=5000, sampler='lhs')
	road_test_results = road_test.run_experiments(design=road_test_design)
	fs = feature_scores(road_scope, road_test_results, random_state=234)
	assert isinstance(fs, pd.io.formats.style.Styler)
	stable_df("./road_test_feature_scores_bogus_1.pkl.gz", fs.data)
Exemple #3
0
def test_database_scope_updating():
    scope = emat.Scope("fake_filename.yaml", scope_yaml)
    db = emat.SQLiteDB()
    db.store_scope(scope)
    assert db.read_scope(scope.name) == scope
    scope.add_measure("plus1")
    db.update_scope(scope)
    assert db.read_scope(scope.name) == scope
    assert len(scope.get_measures()) == 5
    scope.add_measure("plus2", db=db)
    assert db.read_scope(scope.name) == scope
    assert len(scope.get_measures()) == 6
Exemple #4
0
def test_multiple_connections():
    import tempfile

    with tempfile.TemporaryDirectory() as tempdir:
        tempdbfile = os.path.join(tempdir, "test_db_file.db")
        db_test = SQLiteDB(tempdbfile, initialize=True)

        road_test_scope_file = emat.package_file("model", "tests",
                                                 "road_test.yaml")
        s = emat.Scope(road_test_scope_file)
        db_test.store_scope(s)

        assert db_test.read_scope_names() == ["EMAT Road Test"]

        db_test2 = SQLiteDB(tempdbfile, initialize=False)
        with pytest.raises(KeyError):
            db_test2.store_scope(s)

        # Neither database is in a transaction
        assert not db_test.conn.in_transaction
        assert not db_test2.conn.in_transaction

        from emat.model.core_python import Road_Capacity_Investment

        m1 = emat.PythonCoreModel(Road_Capacity_Investment,
                                  scope=s,
                                  db=db_test)
        m2 = emat.PythonCoreModel(Road_Capacity_Investment,
                                  scope=s,
                                  db=db_test2)
        d1 = m1.design_experiments(n_samples=3,
                                   random_seed=1,
                                   design_name="d1")
        d2 = m2.design_experiments(n_samples=3,
                                   random_seed=2,
                                   design_name="d2")
        r1 = m1.run_experiments(design_name="d1")
        r2 = m2.run_experiments(design_name="d2")

        # Check each model can load the other's results
        pd.testing.assert_frame_equal(
            r1,
            m2.db.read_experiment_all(scope_name=s.name,
                                      design_name="d1",
                                      ensure_dtypes=True)[r1.columns],
        )
        pd.testing.assert_frame_equal(
            r2,
            m1.db.read_experiment_all(scope_name=s.name,
                                      design_name="d2",
                                      ensure_dtypes=True)[r2.columns],
        )
Exemple #5
0
def test_feature_scoring_and_prim():
	road_scope = emat.Scope(emat.package_file('model','tests','road_test.yaml'))
	road_test = PythonCoreModel(Road_Capacity_Investment, scope=road_scope)
	road_test_design = road_test.design_experiments(n_samples=5000, sampler='lhs')
	road_test_results = road_test.run_experiments(design=road_test_design)
	fs = feature_scores(road_scope, road_test_results, random_state=123)
	assert isinstance(fs, pd.io.formats.style.Styler)
	stable_df("./road_test_feature_scores_1.pkl.gz", fs.data)

	prim1 = road_test_results.prim(target="net_benefits >= 0")
	pbox1 = prim1.find_box()

	assert pbox1._cur_box == 64
	ts1 = prim1.tradeoff_selector()
	assert len(ts1.data) == 1
	assert ts1.data[0]['x'] == approx(np.asarray([
		1., 1., 1., 1., 1.,
		0.99928315, 0.99856631, 0.99784946, 0.99569892, 0.99283154,
		0.98924731, 0.98351254, 0.97921147, 0.97491039, 0.96702509,
		0.95555556, 0.94982079, 0.94336918, 0.92903226, 0.91182796,
		0.89749104, 0.87598566, 0.85304659, 0.83942652, 0.83225806,
		0.82078853, 0.79713262, 0.77706093, 0.76415771, 0.75483871,
		0.74480287, 0.73261649, 0.71827957, 0.70394265, 0.68100358,
		0.65663082, 0.63225806, 0.61003584, 0.59569892, 0.57992832,
		0.55770609, 0.54193548, 0.52759857, 0.51111111, 0.49892473,
		0.48960573, 0.4781362, 0.45878136, 0.44229391, 0.42365591,
		0.409319, 0.39498208, 0.38064516, 0.36487455, 0.34767025,
		0.33261649, 0.31756272, 0.30322581, 0.28888889, 0.27741935,
		0.26379928, 0.25089606, 0.23942652, 0.22795699, 0.2172043]))
	pbox1.select(40)
	assert pbox1._cur_box == 40
	assert ts1.data[0]['marker']['symbol'] == approx(np.asarray([
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0,
		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))

	ebox1_40 = pbox1.to_emat_box()
	assert ebox1_40.coverage == approx(0.5577060931899641)
	assert ebox1_40.density == approx(0.8356605800214822)
	assert ebox1_40.mass == approx(0.1862)
	from emat import Bounds
	assert ebox1_40.thresholds == {'beta': Bounds(lowerbound=3.597806324946271, upperbound=None),
	                               'input_flow': Bounds(lowerbound=125, upperbound=None),
	                               'value_of_time': Bounds(lowerbound=0.07705746291056698, upperbound=None),
	                               'expand_capacity': Bounds(lowerbound=None, upperbound=95.01870815358643)}
	pbox1.splom()
	pbox1.hmm()
Exemple #6
0
def db_setup():
    db_test = SQLiteDB(config.get("test_db_filename", ":memory:"),
                       initialize=True)

    # load experiment variables and performance measures
    scp_xl = [("constant", "constant"), ("exp_var1", "risk"),
              ("exp_var2", "strategy")]
    scp_m = [("pm_1", "none"), ("pm_2", "ln")]

    db_test.init_xlm(scp_xl, scp_m)

    # create emat scope
    scope_name = "test"
    sheet = "emat_scope1.yaml"
    ex_xl = ["constant", "exp_var1", "exp_var2"]
    ex_m = ["pm_1", "pm_2"]
    db_test.delete_scope(scope_name)

    scope = emat.Scope(sheet, scope_yaml)

    db_test._write_scope(
        scope_name,
        sheet,
        ex_xl,
        ex_m,
        scope,
    )
    yield Bunch(
        db_test=db_test,
        scope_name=scope_name,
        sheet=sheet,
        scp_xl=scp_xl,
        scp_m=scp_m,
        ex_m=ex_m,
        ex_xl=ex_xl,
    )
    db_test.delete_scope(scope_name)
Exemple #7
0
# %% [markdown]
# # Interactive Explorer
#
# TMIP-EMAT includes an interactive visualizer, inspired by a
# [similar tool](https://htmlpreview.github.io/?https://github.com/VisionEval/VisionEval/blob/master/sources/VEScenarioViewer/verpat.html)
# provided with the [VisionEval](https://visioneval.org) package.
# To demonstrate the interactive visualizer, we will use the Road Test example model.
# First, we need to develop and run a design of experiments to have some
# data to explore.  We'll run 5,000 experiments to get a good size sample of
# data points to visualize.

# %%
import emat.examples

scope = emat.Scope("notebooks/scope.yaml")
results = pd.read_csv("notebooks/results.csv")

# %% [markdown]
# One feature of the visualizer is the ability to display not only a number of results,
# but also to contrast those results against a given "reference" model that represents
# a more traditional single-point forecast of inputs and results.  We'll prepare a
# reference point here using the `run_reference_experiment` method of the `CoreModel`
# class, which reads the input parameter defaults (as defined in the scope),
# and returns both inputs and outputs in a DataFrame (essentially, an experimental
# design with only a single experiment), suitable for use as the reference point marker in our
# visualizations.

# %%

# %% [markdown]
Exemple #8
0
# To demonstrate the feature scoring, we can define a scope to explore this 
# demo model:

# %%
demo_scope = emat.Scope(scope_file='', scope_def="""---
scope:
    name: demo
inputs:
    A:
        ptype: exogenous uncertainty
        dtype: float
        min: 0
        max: 1
    B:
        ptype: exogenous uncertainty
        dtype: float
        min: 0
        max: 1
    C:
        ptype: exogenous uncertainty
        dtype: float
        min: 0
        max: 1
outputs:
    Y:  
        kind: info
""")

# %% [markdown]
# And then we will design and run some experiments to generate data used for
# feature scoring.
Exemple #9
0
s.dump(filename="road_test_scope.yaml")

# %%
show_dir('.')

# %% [markdown]
# ### Reading In Raw Data
#
# Now, we're ready to begin anew, constructing a fresh database from scratch,
# using only the raw formatted files.
#
# First, let's load our scope from the yaml file, and initialize a clean database
# using that scope.

# %%
s2 = emat.Scope("road_test_scope.yaml")

# %%
db2 = emat.SQLiteDB("road_test_2.sqldb")

# %%
db2.store_scope(s2)

# %% [markdown]
# Just as we used pandas to save out our consolidated DataFrame of experimental results,
# we can use it to read in a consolidated table of experiments.

# %%
df2 = pd.read_csv("road_test_1.csv.gz", index_col='experiment')
df2
# %% [markdown]
# ## Defining the Exploratory Scope

# %% [raw] {"raw_mimetype": "text/restructuredtext"}
# The model scope is defined in a YAML file.  For this Road Test example, the scope file is named 
# :ref:`road_test.yaml <road_test_scope_file>` and is included in the model/tests directory.

# %%
road_test_scope_file = emat.package_file('model','tests','road_test.yaml')

# %% [raw] {"raw_mimetype": "text/restructuredtext"}
# The filename for the YAML file is the first argument when creating a :class:`Scope`
# object, which will load and process the content of the file.

# %%
road_scope = emat.Scope(road_test_scope_file)
road_scope

# %% [markdown]
# A short summary of the scope can be reviewed using the `info` method.

# %%
road_scope.info()

# %% [markdown]
# Alternatively, more detailed information about each part of the scope can be
# accessed in four list attributes:

# %%
road_scope.get_constants()
Exemple #11
0
    def test_read_db_gz(self):
        road_test_scope_file = emat.package_file('model', 'tests',
                                                 'road_test.yaml')
        with pytest.raises(FileNotFoundError):
            emat.Scope(emat.package_file('nope.yaml'))
        s = emat.Scope(road_test_scope_file)
        with pytest.raises(FileNotFoundError):
            emat.SQLiteDB(emat.package_file('nope.db.gz'))

        if not os.path.exists(emat.package_file("examples", "roadtest.db.gz")):
            db_w = emat.SQLiteDB(emat.package_file("examples",
                                                   "roadtest.db.tmp"),
                                 initialize=True)
            s.store_scope(db_w)
            s.design_experiments(n_samples=110,
                                 random_seed=1234,
                                 db=db_w,
                                 design_name='lhs')
            from emat.model.core_python import Road_Capacity_Investment
            m_w = emat.PythonCoreModel(Road_Capacity_Investment,
                                       scope=s,
                                       db=db_w)
            m_w.run_experiments(design_name='lhs', db=db_w)
            db_w.conn.close()
            import gzip
            import shutil
            with open(emat.package_file("examples", "roadtest.db.tmp"),
                      'rb') as f_in:
                with gzip.open(emat.package_file("examples", "roadtest.db.gz"),
                               'wb') as f_out:
                    shutil.copyfileobj(f_in, f_out)

        db = emat.SQLiteDB(emat.package_file("examples", "roadtest.db.gz"))

        assert repr(db) == '<emat.SQLiteDB with scope "EMAT Road Test">'
        assert db.get_db_info()[:9] == 'SQLite @ '
        assert db.get_db_info()[-11:] == 'roadtest.db'

        assert db.read_scope_names() == ['EMAT Road Test']

        s1 = db.read_scope('EMAT Road Test')

        assert type(s1) == type(s)

        for k in ('_x_list', '_l_list', '_c_list', '_m_list', 'name', 'desc'):
            assert getattr(s, k) == getattr(s1, k), k

        assert s == s1

        experiments = db.read_experiment_all('EMAT Road Test', 'lhs')
        assert experiments.shape == (110, 20)
        assert list(experiments.columns) == [
            'free_flow_time',
            'initial_capacity',
            'alpha',
            'beta',
            'input_flow',
            'value_of_time',
            'unit_cost_expansion',
            'interest_rate',
            'yield_curve',
            'expand_capacity',
            'amortization_period',
            'debt_type',
            'interest_rate_lock',
            'no_build_travel_time',
            'build_travel_time',
            'time_savings',
            'value_of_time_savings',
            'net_benefits',
            'cost_of_capacity_expansion',
            'present_cost_expansion',
        ]

        from emat.model.core_python import Road_Capacity_Investment
        m = emat.PythonCoreModel(Road_Capacity_Investment, scope=s, db=db)
        assert m.metamodel_id == None
Exemple #12
0
	def test_road_test(self):
		import os
		test_dir = os.path.dirname(__file__)
		os.chdir(test_dir)

		road_test_scope_file = emat.package_file('model', 'tests', 'road_test.yaml')

		road_scope = emat.Scope(road_test_scope_file)

		# <emat.Scope with 2 constants, 7 uncertainties, 4 levers, 7 measures>
		assert len(road_scope.get_measures()) == 7
		assert len(road_scope.get_levers()) == 4
		assert len(road_scope.get_uncertainties()) == 7
		assert len(road_scope.get_constants()) == 2

		emat_db = emat.SQLiteDB()

		road_scope.store_scope(emat_db)

		with pytest.raises(KeyError):
			road_scope.store_scope(emat_db)

		assert emat_db.read_scope_names() == ['EMAT Road Test']

		design = design_experiments(road_scope, db=emat_db, n_samples_per_factor=10, sampler='lhs')
		design.head()

		large_design = design_experiments(road_scope, db=emat_db, n_samples=5000, sampler='lhs',
										  design_name='lhs_large')
		large_design.head()

		assert list(large_design.columns) == [
			'alpha',
			'amortization_period',
			'beta',
			'debt_type',
			'expand_capacity',
			'input_flow',
			'interest_rate',
			'interest_rate_lock',
			'unit_cost_expansion',
			'value_of_time',
			'yield_curve',
			'free_flow_time',
			'initial_capacity',
		]

		assert list(large_design.head().index) == [111, 112, 113, 114, 115]

		assert emat_db.read_design_names('EMAT Road Test') == ['lhs', 'lhs_large']

		m = PythonCoreModel(Road_Capacity_Investment, scope=road_scope, db=emat_db)

		with SequentialEvaluator(m) as eval_seq:
			lhs_results = m.run_experiments(design_name='lhs', evaluator=eval_seq)

		lhs_results.head()

		assert lhs_results.head()['present_cost_expansion'].values == approx(
			[2154.41598475, 12369.38053473, 4468.50683924, 6526.32517089, 2460.91070514])

		assert lhs_results.head()['net_benefits'].values == approx(
			[ -22.29090499,  -16.84301382, -113.98841188,   11.53956058,        78.03661612])

		assert lhs_results.tail()['present_cost_expansion'].values == approx(
			[2720.51645703, 4000.91232689, 6887.83193063, 3739.47839941, 1582.52899124])

		assert lhs_results.tail()['net_benefits'].values == approx(
			[841.46278175, -146.71279267, -112.5681036, 25.48055303, 127.31154155])

		with SequentialEvaluator(m) as eval_seq:
			lhs_large_results = m.run_experiments(design_name='lhs_large', evaluator=eval_seq)
		lhs_large_results.head()

		assert lhs_large_results.head()['net_benefits'].values == approx(
			[-522.45283083, -355.1599307 , -178.6623215 ,   23.46263498,       -301.17700968])

		lhs_outcomes = m.read_experiment_measures(design_name='lhs')
		assert lhs_outcomes.head()['time_savings'].values == approx(
			[13.4519273, 26.34172999, 12.48385198, 15.10165981, 15.48056139])

		scores = m.get_feature_scores('lhs', random_state=123)
		stable_df("./road_test_feature_scores.pkl.gz", scores.data)

		from emat.workbench.analysis import prim

		x = m.read_experiment_parameters(design_name='lhs_large')

		prim_alg = prim.Prim(
			m.read_experiment_parameters(design_name='lhs_large'),
			m.read_experiment_measures(design_name='lhs_large')['net_benefits'] > 0,
			threshold=0.4,
		)

		box1 = prim_alg.find_box()

		stable_df("./road_test_box1_peeling_trajectory.pkl.gz", box1.peeling_trajectory)

		from emat.util.xmle import Show
		from emat.util.xmle.elem import Elem

		assert isinstance(Show(box1.show_tradeoff()), Elem)

		from emat.workbench.analysis import cart

		cart_alg = cart.CART(
			m.read_experiment_parameters(design_name='lhs_large'),
			m.read_experiment_measures(design_name='lhs_large')['net_benefits'] > 0,
		)
		cart_alg.build_tree()

		stable_df("./road_test_cart_box0.pkl.gz", cart_alg.boxes[0])

		cart_dict = dict(cart_alg.boxes[0].iloc[0])
		assert cart_dict['debt_type'] == {'GO Bond', 'Paygo', 'Rev Bond'}
		#assert cart_dict['interest_rate_lock'] == {False, True}

		assert isinstance(Show(cart_alg.show_tree(format='svg')), Elem)

		from emat import Measure

		MAXIMIZE = Measure.MAXIMIZE
		MINIMIZE = Measure.MINIMIZE

		robustness_functions = [
			Measure(
				'Expected Net Benefit',
				kind=Measure.INFO,
				variable_name='net_benefits',
				function=numpy.mean,
				#         min=-150,
				#         max=50,
			),

			Measure(
				'Probability of Net Loss',
				kind=MINIMIZE,
				variable_name='net_benefits',
				function=lambda x: numpy.mean(x < 0),
				min=0,
				max=1,
			),

			Measure(
				'95%ile Travel Time',
				kind=MINIMIZE,
				variable_name='build_travel_time',
				function=functools.partial(numpy.percentile, q=95),
				min=60,
				max=150,
			),

			Measure(
				'99%ile Present Cost',
				kind=Measure.INFO,
				variable_name='present_cost_expansion',
				function=functools.partial(numpy.percentile, q=99),
				#         min=0,
				#         max=10,
			),

			Measure(
				'Expected Present Cost',
				kind=Measure.INFO,
				variable_name='present_cost_expansion',
				function=numpy.mean,
				#         min=0,
				#         max=10,
			),

		]

		from emat import Constraint

		constraint_1 = Constraint(
			"Maximum Log Expected Present Cost",
			outcome_names="Expected Present Cost",
			function=Constraint.must_be_less_than(4000),
		)

		constraint_2 = Constraint(
			"Minimum Capacity Expansion",
			parameter_names="expand_capacity",
			function=Constraint.must_be_greater_than(10),
		)

		constraint_3 = Constraint(
			"Maximum Paygo",
			parameter_names='debt_type',
			outcome_names='99%ile Present Cost',
			function=lambda i, j: max(0, j - 1500) if i == 'Paygo' else 0,
		)

		from emat.optimization import HyperVolume, EpsilonProgress, SolutionViewer, ConvergenceMetrics

		convergence_metrics = ConvergenceMetrics(
			HyperVolume.from_outcomes(robustness_functions),
			EpsilonProgress(),
			SolutionViewer.from_model_and_outcomes(m, robustness_functions),
		)

		with SequentialEvaluator(m) as eval_seq:
			robust = m.robust_optimize(
				robustness_functions,
				scenarios=20,
				nfe=5,
				constraints=[
					constraint_1,
					constraint_2,
					constraint_3,
				],
				epsilons=[0.05, ] * len(robustness_functions),
				convergence=convergence_metrics,
				evaluator=eval_seq,
			)
		robust_results, convergence = robust.result, robust.convergence

		assert isinstance(robust_results, pandas.DataFrame)

		mm = m.create_metamodel_from_design('lhs')

		design2 = design_experiments(road_scope, db=emat_db, n_samples_per_factor=10, sampler='lhs', random_seed=2)

		design2_results = mm.run_experiments(design2)
Exemple #13
0
	def test_road_test(self):
		road_test_scope_file = emat.package_file('model', 'tests', 'road_test.yaml')

		road_scope = emat.Scope(road_test_scope_file)

		# <emat.Scope with 2 constants, 7 uncertainties, 4 levers, 7 measures>
		assert len(road_scope.get_measures()) == 7
		assert len(road_scope.get_levers()) == 4
		assert len(road_scope.get_uncertainties()) == 7
		assert len(road_scope.get_constants()) == 2

		emat_db = emat.SQLiteDB()

		road_scope.store_scope(emat_db)

		with pytest.raises(KeyError):
			road_scope.store_scope(emat_db)

		assert emat_db.read_scope_names() == ['EMAT Road Test']

		design = design_experiments(road_scope, db=emat_db, n_samples_per_factor=10, sampler='lhs')
		design.head()

		large_design = design_experiments(road_scope, db=emat_db, n_samples=5000, sampler='lhs',
										  design_name='lhs_large')
		large_design.head()

		assert list(large_design.columns) == [
			'alpha',
			'amortization_period',
			'beta',
			'debt_type',
			'expand_capacity',
			'input_flow',
			'interest_rate',
			'interest_rate_lock',
			'unit_cost_expansion',
			'value_of_time',
			'yield_curve',
			'free_flow_time',
			'initial_capacity',
		]

		assert list(large_design.head().index) == [111, 112, 113, 114, 115]

		assert emat_db.read_design_names('EMAT Road Test') == ['lhs', 'lhs_large']

		m = PythonCoreModel(Road_Capacity_Investment, scope=road_scope, db=emat_db)

		with SequentialEvaluator(m) as eval_seq:
			lhs_results = m.run_experiments(design_name='lhs', evaluator=eval_seq)

		lhs_results.head()

		assert lhs_results.head()['present_cost_expansion'].values == approx(
			[2154.41598475, 12369.38053473, 4468.50683924, 6526.32517089, 2460.91070514])

		assert lhs_results.head()['net_benefits'].values == approx(
			[-79.51551505, -205.32148044, -151.94431822, -167.62487134, -3.97293985])

		with SequentialEvaluator(m) as eval_seq:
			lhs_large_results = m.run_experiments(design_name='lhs_large', evaluator=eval_seq)
		lhs_large_results.head()

		assert lhs_large_results.head()['net_benefits'].values == approx(
			[-584.36098322, -541.5458395, -185.16661464, -135.85689709, -357.36106457])

		lhs_outcomes = m.read_experiment_measures(design_name='lhs')
		assert lhs_outcomes.head()['time_savings'].values == approx(
			[13.4519273, 26.34172999, 12.48385198, 15.10165981, 15.48056139])

		correct_scores = numpy.array(
			[[0.06603461, 0.04858595, 0.06458574, 0.03298163, 0.05018515, 0., 0., 0.53156587, 0.05060416, 0.02558088,
			  0.04676956, 0.04131266, 0.04179378],
			 [0.06003223, 0.04836434, 0.06059554, 0.03593644, 0.27734396, 0., 0., 0.28235419, 0.05303979, 0.03985181,
			  0.04303371, 0.05004349, 0.04940448],
			 [0.08760605, 0.04630414, 0.0795043, 0.03892201, 0.10182534, 0., 0., 0.42508457, 0.04634321, 0.03216387,
			  0.0497183, 0.04953772, 0.0429905],
			 [0.08365598, 0.04118732, 0.06716887, 0.03789444, 0.06509519, 0., 0., 0.31494171, 0.06517462, 0.02895742,
			  0.04731707, 0.17515158, 0.07345581],
			 [0.06789382, 0.07852257, 0.05066944, 0.04807088, 0.32054735, 0., 0., 0.15953055, 0.05320201, 0.02890069,
			  0.07033928, 0.06372418, 0.05859923],
			 [0.05105435, 0.09460353, 0.04614178, 0.04296901, 0.45179611, 0., 0., 0.04909801, 0.05478798, 0.023099,
			  0.08160785, 0.05642169, 0.04842069],
			 [0.04685703, 0.03490931, 0.03214081, 0.03191602, 0.56130318, 0., 0., 0.04011044, 0.04812986, 0.02228924,
			  0.09753361, 0.04273004, 0.04208045], ])

		scores = m.get_feature_scores('lhs', random_state=123)

		for _i in range(scores.metadata.values.shape[0]):
			for _j in range(scores.metadata.values.shape[1]):
				assert scores.metadata.values[_i,_j] == approx(correct_scores[_i,_j], rel=.1)

		from ema_workbench.analysis import prim

		x = m.read_experiment_parameters(design_name='lhs_large')

		prim_alg = prim.Prim(
			m.read_experiment_parameters(design_name='lhs_large'),
			m.read_experiment_measures(design_name='lhs_large')['net_benefits'] > 0,
			threshold=0.4,
		)

		box1 = prim_alg.find_box()

		assert dict(box1.peeling_trajectory.iloc[45]) == approx({
			'coverage': 0.8014705882352942,
			'density': 0.582109479305741,
			'id': 45,
			'mass': 0.1498,
			'mean': 0.582109479305741,
			'res_dim': 4,
		})

		from emat.util.xmle import Show
		from emat.util.xmle.elem import Elem

		assert isinstance(Show(box1.show_tradeoff()), Elem)

		from ema_workbench.analysis import cart

		cart_alg = cart.CART(
			m.read_experiment_parameters(design_name='lhs_large'),
			m.read_experiment_measures(design_name='lhs_large')['net_benefits'] > 0,
		)
		cart_alg.build_tree()

		cart_dict = dict(cart_alg.boxes[0].iloc[0])
		assert cart_dict['debt_type'] == {'GO Bond', 'Paygo', 'Rev Bond'}
		assert cart_dict['interest_rate_lock'] == {False, True}
		del cart_dict['debt_type']
		del cart_dict['interest_rate_lock']
		assert cart_dict == approx({
			'free_flow_time': 60,
			'initial_capacity': 100,
			'alpha': 0.10001988547129116,
			'beta': 3.500215589924521,
			'input_flow': 80.0,
			'value_of_time': 0.00100690634109406,
			'unit_cost_expansion': 95.00570832093116,
			'interest_rate': 0.0250022738169142,
			'yield_curve': -0.0024960505548531774,
			'expand_capacity': 0.0006718732232418368,
			'amortization_period': 15,
		})

		assert isinstance(Show(cart_alg.show_tree(format='svg')), Elem)

		from emat import Measure

		MAXIMIZE = Measure.MAXIMIZE
		MINIMIZE = Measure.MINIMIZE

		robustness_functions = [
			Measure(
				'Expected Net Benefit',
				kind=Measure.INFO,
				variable_name='net_benefits',
				function=numpy.mean,
				#         min=-150,
				#         max=50,
			),

			Measure(
				'Probability of Net Loss',
				kind=MINIMIZE,
				variable_name='net_benefits',
				function=lambda x: numpy.mean(x < 0),
				min=0,
				max=1,
			),

			Measure(
				'95%ile Travel Time',
				kind=MINIMIZE,
				variable_name='build_travel_time',
				function=functools.partial(numpy.percentile, q=95),
				min=60,
				max=150,
			),

			Measure(
				'99%ile Present Cost',
				kind=Measure.INFO,
				variable_name='present_cost_expansion',
				function=functools.partial(numpy.percentile, q=99),
				#         min=0,
				#         max=10,
			),

			Measure(
				'Expected Present Cost',
				kind=Measure.INFO,
				variable_name='present_cost_expansion',
				function=numpy.mean,
				#         min=0,
				#         max=10,
			),

		]

		from emat import Constraint

		constraint_1 = Constraint(
			"Maximum Log Expected Present Cost",
			outcome_names="Expected Present Cost",
			function=Constraint.must_be_less_than(4000),
		)

		constraint_2 = Constraint(
			"Minimum Capacity Expansion",
			parameter_names="expand_capacity",
			function=Constraint.must_be_greater_than(10),
		)

		constraint_3 = Constraint(
			"Maximum Paygo",
			parameter_names='debt_type',
			outcome_names='99%ile Present Cost',
			function=lambda i, j: max(0, j - 1500) if i == 'Paygo' else 0,
		)

		from emat.optimization import HyperVolume, EpsilonProgress, SolutionViewer, ConvergenceMetrics

		convergence_metrics = ConvergenceMetrics(
			HyperVolume.from_outcomes(robustness_functions),
			EpsilonProgress(),
			SolutionViewer.from_model_and_outcomes(m, robustness_functions),
		)

		with SequentialEvaluator(m) as eval_seq:
			robust_results, convergence = m.robust_optimize(
				robustness_functions,
				scenarios=20,
				nfe=5,
				constraints=[
					constraint_1,
					constraint_2,
					constraint_3,
				],
				epsilons=[0.05, ] * len(robustness_functions),
				convergence=convergence_metrics,
				evaluator=eval_seq,
			)

		assert isinstance(robust_results, pandas.DataFrame)

		mm = m.create_metamodel_from_design('lhs')

		design2 = design_experiments(road_scope, db=emat_db, n_samples_per_factor=10, sampler='lhs', random_seed=2)

		design2_results = mm.run_experiments(design2)
Exemple #14
0
def test_database_walkthrough(data_regression, dataframe_regression):

    # import os
    # import numpy as np
    # import pandas as pd
    # import seaborn;
    # seaborn.set_theme()
    # import plotly.io;
    # plotly.io.templates.default = "seaborn"
    # import emat
    # import yaml
    # from emat.util.show_dir import show_dir
    # from emat.analysis import display_experiments
    # emat.versions()

    # For this walkthrough of database features, we'll work in a temporary directory.
    # (In real projects you'll likely want to save your data somewhere less ephemeral,
    # so don't just copy this tempfile code into your work.)

    tempdir = tempfile.TemporaryDirectory()
    os.chdir(tempdir.name)

    # We begin our example by populating a database with some experimental data, by creating and
    # running a single design of experiments for the Road Test model.

    import emat.examples
    scope, db, model = emat.examples.road_test()
    design = model.design_experiments()
    model.run_experiments(design)

    # ## Single-Design Datasets

    # ### Writing Out Raw Data
    #
    # When the database has only a single design of experiments, or if we
    # don't care about any differentiation between multiple designs that we
    # may have created and ran, we can dump the entire set of model runs,
    # including uncertainties, policy levers, and performance measures, all
    # consolidated into a single pandas DataFrame using the
    # `read_experiment_all` function.  The constants even appear in this DataFrame
    # too, for good measure.

    df = db.read_experiment_all(scope.name)
    dataframe_regression.check(pd.DataFrame(df), basename='test_database__df')

    # Exporting this data is simply a matter of using the usual pandas
    # methods to save the dataframe to a format of your choosing.  We'll
    # save our data into a gzipped CSV file, which is somewhat compressed
    # (we're not monsters here) but still widely compatible for a variety of uses.

    df.to_csv("road_test_1.csv.gz")

    # This table contains most of the information we want to export from
    # our database, but not everything.  We also probably want to have access
    # to all of the information in the exploratory scope as well.  Our example
    # generator gives us a `Scope` reference directly, but if we didn't have that
    # we can still extract it from the database, using the `read_scope` method.

    s = db.read_scope()
    s.dump(filename="road_test_scope.yaml")

    # ### Reading In Raw Data
    #
    # Now, we're ready to begin anew, constructing a fresh database from scratch,
    # using only the raw formatted files.
    #
    # First, let's load our scope from the yaml file, and initialize a clean database
    # using that scope.

    s2 = emat.Scope("road_test_scope.yaml")
    db2 = emat.SQLiteDB("road_test_2.sqldb")
    db2.store_scope(s2)

    # Just as we used pandas to save out our consolidated DataFrame of experimental results,
    # we can use it to read in a consolidated table of experiments.

    df2 = pd.read_csv("road_test_1.csv.gz", index_col='experiment')
    # dataframe_regression.check(df2, basename='test_database__df2')

    # Writing experiments to a database is not quite as simple as reading them.  There
    # is a parallel `write_experiment_all` method for the `Database` class, but to use
    # it we need to provide not only the DataFrame of actual results, but also a name for
    # the design of experiments we are writing (all experiments exist within designs) and
    # the source of the performance measure results (zero means actual results from a
    # core model run, and non-zero values are ID numbers for metamodels). This allows many
    # different possible sets of performance measures to be stored for the same set
    # of input parameters.

    db2.write_experiment_all(
        scope_name=s2.name,
        design_name='general',
        source=0,
        xlm_df=df2,
    )
    df2b = db.read_experiment_all(scope.name)
    dataframe_regression.check(pd.DataFrame(df2b),
                               basename='test_database__df2b')

    # ## Multiple-Design Datasets
    #
    # The EMAT database is not limited to storing a single design of experiments.  Multiple designs
    # can be stored for the same scope.  We'll add a set of univariate sensitivity test to our
    # database, and a "ref" design that contains a single experiment with all inputs set to their
    # default values.

    design_uni = model.design_experiments(sampler='uni')
    model.run_experiments(design_uni)
    model.run_reference_experiment()

    # We now have three designs stored in our database. We can confirm this
    # by reading out the set of design names.

    assert sorted(db.read_design_names(s.name)) == sorted(
        ['lhs', 'ref', 'uni'])

    # Note that there
    # can be some experiments that are in more than one design.  This is
    # not merely duplicating the experiment and results, but actually
    # assigning the same experiment to both designs.  We can see this
    # for the 'uni' and 'ref' designs -- both contain the all-default
    # parameters experiment, and when we read these designs out of the
    # database, the same experiment number is reported out in both
    # designs.

    uni = db.read_experiment_all(scope.name, design_name='uni')
    ref = db.read_experiment_all(scope.name, design_name='ref')
    dataframe_regression.check(pd.DataFrame(uni),
                               basename='test_database__uni')
    dataframe_regression.check(pd.DataFrame(ref),
                               basename='test_database__ref')

    # ### Writing Out Raw Data
    #
    # We can read a single dataframe containing all the experiments associated with
    # this scope by omitting the `design_name` argument, just as if there was only
    # one design.

    df = db.read_experiment_all(scope.name)
    df.to_csv("road_test_2.csv.gz")

    # If we want to be able to reconstruct the various designs of experiments later,
    # we'll also need to write out instructions for that.  The `read_all_experiment_ids`
    # method can give us a dictionary of all the relevant information.

    design_experiments = db.read_all_experiment_ids(scope.name,
                                                    design_name='*',
                                                    grouped=True)
    data_regression.check(design_experiments)

    # We can write this dictionary to a file in 'yaml' format.

    with open("road_test_design_experiments.yaml", 'wt') as f:
        yaml.dump(design_experiments, f)

    ### Reading In Raw Data

    # To construct a new emat Database with multiple designs of experients,...

    db3 = emat.SQLiteDB("road_test_3.sqldb")
    db3.store_scope(s2)
    df3 = pd.read_csv("road_test_2.csv.gz", index_col='experiment')

    with open("road_test_design_experiments.yaml", 'rt') as f:
        design_experiments2 = yaml.safe_load(f)
    data_regression.check(design_experiments2)

    db3.write_experiment_all(
        scope_name=s2.name,
        design_name=design_experiments2,
        source=0,
        xlm_df=df3,
    )

    assert sorted(db3.read_design_names(s.name)) == sorted(
        ['lhs', 'ref', 'uni'])

    dx = db3.read_all_experiment_ids(scope.name, design_name='*', grouped=True)
    assert dx == {'lhs': '1-110', 'ref': '111', 'uni': '111-132'}

    uni3 = db3.read_experiment_all(scope.name, design_name='uni')
    dataframe_regression.check(pd.DataFrame(uni3),
                               basename='test_database__uni')

    ## Re-running Experiments

    # This section provides a short walkthrough of how to handle mistakes
    # in an EMAT database.  By "mistakes" we are referring to incorrect
    # values that have been written into the database by accident, generally
    # arising from core model runs that were misconfigured or suffered
    # non-fatal errors that caused the results to be invalid.
    #
    # One approach to handling such problems is to simply start over with a
    # brand new clean database file.  However, this may be inconvenient if
    # the database already includes a number of valid results, especially if
    # those valid results were expensive to generate.  It may also be desirable
    # to keep prior invalid results on hand, so as to easily recognized when
    # errors recur.
    #
    # We begin this example by populating our database with some more experimental data, by creating and
    # running a single design of experiments for the Road Test model, except these experiments will be
    # created with a misconfigured model (lane_width = 11, it should be 10), so the results will be bad.

    model.lane_width = 10.3
    oops = model.design_experiments(design_name='oops', random_seed=12345)
    model.run_experiments(oops)

    # We can review a dataframe of results as before, using the `read_experiment_all`
    # method. This time we will add `with_run_ids=True`, which will add an extra
    # column to the index, showing a universally unique id attached to each row
    # of results.

    oops_result1 = db.read_experiment_all(scope.name,
                                          'oops',
                                          with_run_ids=True)
    dataframe_regression.check(
        pd.DataFrame(oops_result1).reset_index(drop=True),
        basename='test_database__oops_result1')

    # Some of these results are obviously problematic.  Increasing capacity cannot possibly
    # result in a negative travel time savings. (Braess paradox doesn't apply here because
    # it's just one link, not a network.)  So those negative values are clearly wrong.  We
    # can fix the model so they won't be wrong, but by default the `run_experiments` method
    # won't actually re-run models when the results are already available in the database.
    # To solve this conundrum, we can mark the incorrect results as invalid, using a query
    # to pull out the rows that can be flagged as wrong.

    db.invalidate_experiment_runs(queries=['time_savings < 0'])

    # The `[73]` returned here indicates that 73 sets of results were invalidated by this command.
    # Now we can fix our model, and then use the `run_experiments` method to get new model runs for
    # the invalidated results.

    model.lane_width = 10
    oops_result2 = model.run_experiments(oops)
    dataframe_regression.check(
        pd.DataFrame(oops_result2).reset_index(drop=True),
        basename='test_database__oops_result2')

    # The re-run fixed the negative values, although it left in place the other
    # experimental runs in the database. By the way we constructed this example,
    # we know those are wrong too, and it's evident in the apparent discontinuity
    # in the input flow graph, which we can zoom in on.

    # ax = oops_result2.plot.scatter(x='input_flow', y='time_savings', color='r')
    # ax.plot([109, 135], [0, 35], '--', color='y');

    # Those original results are bad too, and we want to invalidate them as well.
    # In addition to giving conditional queries to the `invalidate_experiment_runs`
    # method, we can also give a dataframe of results that have run ids attached,
    # and those unique ids will be used to to find and invalidate results in the
    # database.  Here, we pass in the dataframe of all the results, which contains
    # all 110 runs, but only 37 runs are newly invalidated (77 were invalidated
    # previously).

    db.invalidate_experiment_runs(oops_result1)

    # Now when we run the experiments again, those 37 experiments are re-run.

    oops_result3 = model.run_experiments(oops)
    dataframe_regression.check(
        pd.DataFrame(oops_result3).reset_index(drop=True),
        basename='test_database__oops_result3')

    ### Writing Out All Runs
    #
    # By default, the `read_experiment_all` method returns the most recent valid set of
    # performance measures for each experiment, but we can override this behavior to
    # ask for all run results, or all valid or invalid results.  This allows us to easily
    # write out data files containing all the results stored in the database.

    oops_all = db.read_experiment_all(scope.name,
                                      with_run_ids=True,
                                      runs='all')
    dataframe_regression.check(pd.DataFrame(oops_all).reset_index(drop=True),
                               basename='test_database__oops_all')

    # If we want to mark the valid and invalid runs, we can read them
    # seperately and attach a tag to the two dataframes.

    runs_1 = db.read_experiment_all(scope.name,
                                    with_run_ids=True,
                                    runs='valid')
    runs_1['is_valid'] = True
    runs_0 = db.read_experiment_all(scope.name,
                                    with_run_ids=True,
                                    runs='invalid')
    runs_0['is_valid'] = False
    all_runs = pd.concat([runs_1, runs_0])
    dataframe_regression.check(pd.DataFrame(all_runs).reset_index(drop=True),
                               basename='test_database__all_runs')
Exemple #15
0
def test_database_merging():
    import emat

    road_test_scope_file = emat.package_file("model", "tests",
                                             "road_test.yaml")

    road_scope = emat.Scope(road_test_scope_file)
    emat_db = emat.SQLiteDB()
    road_scope.store_scope(emat_db)
    assert emat_db.read_scope_names() == ["EMAT Road Test"]

    from emat.experiment.experimental_design import design_experiments

    design = design_experiments(road_scope,
                                db=emat_db,
                                n_samples_per_factor=10,
                                sampler="lhs")
    large_design = design_experiments(road_scope,
                                      db=emat_db,
                                      n_samples=500,
                                      sampler="lhs",
                                      design_name="lhs_large")

    assert emat_db.read_design_names("EMAT Road Test") == ["lhs", "lhs_large"]

    from emat.model.core_python import PythonCoreModel, Road_Capacity_Investment

    m = PythonCoreModel(Road_Capacity_Investment, scope=road_scope, db=emat_db)

    lhs_results = m.run_experiments(design_name="lhs")

    lhs_large_results = m.run_experiments(design_name="lhs_large")

    reload_results = m.read_experiments(design_name="lhs")

    pd.testing.assert_frame_equal(
        reload_results,
        lhs_results,
        check_like=True,
    )

    lhs_params = m.read_experiment_parameters(design_name="lhs")
    assert len(lhs_params) == 110
    assert len(lhs_params.columns) == 13

    lhs_outcomes = m.read_experiment_measures(design_name="lhs")
    assert len(lhs_outcomes) == 110
    assert len(lhs_outcomes.columns) == 7

    mm = m.create_metamodel_from_design("lhs")

    assert mm.metamodel_id == 1

    assert isinstance(mm.function, emat.MetaModel)

    design2 = design_experiments(road_scope,
                                 db=emat_db,
                                 n_samples_per_factor=10,
                                 sampler="lhs",
                                 random_seed=2)

    design2_results = mm.run_experiments(design2)

    assert len(design2_results) == 110

    assert len(design2_results.columns) == 20

    assert emat_db.read_design_names(None) == ["lhs", "lhs_2", "lhs_large"]

    check = emat_db.read_experiment_measures(None, "lhs_2")
    assert len(check) == 110
    assert len(check.columns) == 7

    assert emat_db.read_experiment_measure_sources(None, "lhs_2") == [1]

    m.allow_short_circuit = False
    design2_results0 = m.run_experiments(design2.iloc[:5])

    assert len(design2_results0) == 5
    assert len(design2_results0.columns) == 20

    with pytest.raises(ValueError):
        # now there are two sources of some measures
        emat_db.read_experiment_measures(None, "lhs_2")

    assert set(emat_db.read_experiment_measure_sources(None,
                                                       "lhs_2")) == {0, 1}

    check = emat_db.read_experiment_measures(None, "lhs_2", source=0)
    assert len(check) == 5

    check = emat_db.read_experiment_measures(None, "lhs_2", source=1)
    assert len(check) == 110

    import emat.examples

    s2, db2, m2 = emat.examples.road_test()

    # write the design for lhs_2 into a different database.
    # it ends up giving different experient id's to these, which is fine.
    db2.write_experiment_parameters(
        None, "lhs_2", emat_db.read_experiment_parameters(None, "lhs_2"))

    check = db2.read_experiment_parameters(
        None,
        "lhs_2",
    )
    assert len(check) == 110
    assert len(check.columns) == 13

    pd.testing.assert_frame_equal(
        design2.reset_index(drop=True),
        check.reset_index(drop=True),
        check_like=True,
    )

    design2_results2 = m2.run_experiments("lhs_2")

    check = emat_db.read_experiment_measures(None, "lhs_2", source=0)
    assert len(check) == 5
    assert len(check.columns) == 7

    check = emat_db.read_experiment_measures(None, "lhs_2", runs="valid")
    assert len(check) == 115

    emat_db.merge_database(db2)

    check = emat_db.read_experiment_measures(None, "lhs_2", source=0)
    assert len(check) == 110
    assert len(check.columns) == 7

    check = emat_db.read_experiment_measures(None, "lhs_2", runs="valid")
    assert len(check) == 225
Exemple #16
0
def test_read_db_gz():
    road_test_scope_file = emat.package_file("model", "tests",
                                             "road_test.yaml")
    with pytest.raises(FileNotFoundError):
        emat.Scope(emat.package_file("nope.yaml"))
    s = emat.Scope(road_test_scope_file)
    with pytest.raises(FileNotFoundError):
        emat.SQLiteDB(emat.package_file("nope.db.gz"))

    if not os.path.exists(emat.package_file("examples", "roadtest.db.gz")):
        db_w = emat.SQLiteDB(emat.package_file("examples", "roadtest.db.tmp"),
                             initialize=True)
        s.store_scope(db_w)
        s.design_experiments(n_samples=110,
                             random_seed=1234,
                             db=db_w,
                             design_name="lhs")
        from emat.model.core_python import Road_Capacity_Investment

        m_w = emat.PythonCoreModel(Road_Capacity_Investment, scope=s, db=db_w)
        m_w.run_experiments(design_name="lhs", db=db_w)
        db_w.conn.close()
        import gzip
        import shutil

        with open(emat.package_file("examples", "roadtest.db.tmp"),
                  "rb") as f_in:
            with gzip.open(emat.package_file("examples", "roadtest.db.gz"),
                           "wb") as f_out:
                shutil.copyfileobj(f_in, f_out)

    db = emat.SQLiteDB(emat.package_file("examples", "roadtest.db.gz"))

    assert repr(db) == '<emat.SQLiteDB with scope "EMAT Road Test">'
    assert db.get_db_info()[:9] == "SQLite @ "
    assert db.get_db_info()[-11:] == "roadtest.db"

    assert db.read_scope_names() == ["EMAT Road Test"]

    s1 = db.read_scope("EMAT Road Test")

    assert type(s1) == type(s)

    for k in ("_x_list", "_l_list", "_c_list", "_m_list", "name", "desc"):
        assert getattr(s, k) == getattr(s1, k), k

    assert s == s1

    experiments = db.read_experiment_all("EMAT Road Test", "lhs")
    assert experiments.shape == (110, 20)
    assert list(experiments.columns) == [
        "free_flow_time",
        "initial_capacity",
        "alpha",
        "beta",
        "input_flow",
        "value_of_time",
        "unit_cost_expansion",
        "interest_rate",
        "yield_curve",
        "expand_capacity",
        "amortization_period",
        "debt_type",
        "interest_rate_lock",
        "no_build_travel_time",
        "build_travel_time",
        "time_savings",
        "value_of_time_savings",
        "net_benefits",
        "cost_of_capacity_expansion",
        "present_cost_expansion",
    ]

    from emat.model.core_python import Road_Capacity_Investment

    m = emat.PythonCoreModel(Road_Capacity_Investment, scope=s, db=db)
    assert m.metamodel_id == None