Ejemplo n.º 1
0
# Actually running the model can be done by the user on an *ad hoc* basis (i.e., manually defining every 
# combination of inputs that will be evaluated) but the real power of EMAT comes from runnning the model
# using algorithm-created experimental designs.
#
# An important experimental design used in exploratory modeling is the Latin Hypercube.  This design selects
# a random set of experiments across multiple input dimensions, to ensure "good" coverage of the 
# multi-dimensional modeling space.
#
# The `design_latin_hypercube` function creates such a design based on a `Scope`, and optionally
# stores the design of experiments in a database.

# %%
from emat.experiment.experimental_design import design_experiments

# %%
design = design_experiments(road_scope, db=emat_db, n_samples_per_factor=10, sampler='lhs')
design.head()

# %%
large_design = design_experiments(road_scope, db=emat_db, n_samples=5000, sampler='lhs', design_name='lhs_large')
large_design.head()

# %% [markdown]
# We can review what experimental designs have already been stored in the database using the 
# `read_design_names` method of the `Database` object.

# %%
emat_db.read_design_names('EMAT Road Test')

# %% [markdown]
# ## Core Model in Python
Ejemplo n.º 2
0
	def test_road_test(self):
		import os
		test_dir = os.path.dirname(__file__)
		os.chdir(test_dir)

		road_test_scope_file = emat.package_file('model', 'tests', 'road_test.yaml')

		road_scope = emat.Scope(road_test_scope_file)

		# <emat.Scope with 2 constants, 7 uncertainties, 4 levers, 7 measures>
		assert len(road_scope.get_measures()) == 7
		assert len(road_scope.get_levers()) == 4
		assert len(road_scope.get_uncertainties()) == 7
		assert len(road_scope.get_constants()) == 2

		emat_db = emat.SQLiteDB()

		road_scope.store_scope(emat_db)

		with pytest.raises(KeyError):
			road_scope.store_scope(emat_db)

		assert emat_db.read_scope_names() == ['EMAT Road Test']

		design = design_experiments(road_scope, db=emat_db, n_samples_per_factor=10, sampler='lhs')
		design.head()

		large_design = design_experiments(road_scope, db=emat_db, n_samples=5000, sampler='lhs',
										  design_name='lhs_large')
		large_design.head()

		assert list(large_design.columns) == [
			'alpha',
			'amortization_period',
			'beta',
			'debt_type',
			'expand_capacity',
			'input_flow',
			'interest_rate',
			'interest_rate_lock',
			'unit_cost_expansion',
			'value_of_time',
			'yield_curve',
			'free_flow_time',
			'initial_capacity',
		]

		assert list(large_design.head().index) == [111, 112, 113, 114, 115]

		assert emat_db.read_design_names('EMAT Road Test') == ['lhs', 'lhs_large']

		m = PythonCoreModel(Road_Capacity_Investment, scope=road_scope, db=emat_db)

		with SequentialEvaluator(m) as eval_seq:
			lhs_results = m.run_experiments(design_name='lhs', evaluator=eval_seq)

		lhs_results.head()

		assert lhs_results.head()['present_cost_expansion'].values == approx(
			[2154.41598475, 12369.38053473, 4468.50683924, 6526.32517089, 2460.91070514])

		assert lhs_results.head()['net_benefits'].values == approx(
			[ -22.29090499,  -16.84301382, -113.98841188,   11.53956058,        78.03661612])

		assert lhs_results.tail()['present_cost_expansion'].values == approx(
			[2720.51645703, 4000.91232689, 6887.83193063, 3739.47839941, 1582.52899124])

		assert lhs_results.tail()['net_benefits'].values == approx(
			[841.46278175, -146.71279267, -112.5681036, 25.48055303, 127.31154155])

		with SequentialEvaluator(m) as eval_seq:
			lhs_large_results = m.run_experiments(design_name='lhs_large', evaluator=eval_seq)
		lhs_large_results.head()

		assert lhs_large_results.head()['net_benefits'].values == approx(
			[-522.45283083, -355.1599307 , -178.6623215 ,   23.46263498,       -301.17700968])

		lhs_outcomes = m.read_experiment_measures(design_name='lhs')
		assert lhs_outcomes.head()['time_savings'].values == approx(
			[13.4519273, 26.34172999, 12.48385198, 15.10165981, 15.48056139])

		scores = m.get_feature_scores('lhs', random_state=123)
		stable_df("./road_test_feature_scores.pkl.gz", scores.data)

		from emat.workbench.analysis import prim

		x = m.read_experiment_parameters(design_name='lhs_large')

		prim_alg = prim.Prim(
			m.read_experiment_parameters(design_name='lhs_large'),
			m.read_experiment_measures(design_name='lhs_large')['net_benefits'] > 0,
			threshold=0.4,
		)

		box1 = prim_alg.find_box()

		stable_df("./road_test_box1_peeling_trajectory.pkl.gz", box1.peeling_trajectory)

		from emat.util.xmle import Show
		from emat.util.xmle.elem import Elem

		assert isinstance(Show(box1.show_tradeoff()), Elem)

		from emat.workbench.analysis import cart

		cart_alg = cart.CART(
			m.read_experiment_parameters(design_name='lhs_large'),
			m.read_experiment_measures(design_name='lhs_large')['net_benefits'] > 0,
		)
		cart_alg.build_tree()

		stable_df("./road_test_cart_box0.pkl.gz", cart_alg.boxes[0])

		cart_dict = dict(cart_alg.boxes[0].iloc[0])
		assert cart_dict['debt_type'] == {'GO Bond', 'Paygo', 'Rev Bond'}
		#assert cart_dict['interest_rate_lock'] == {False, True}

		assert isinstance(Show(cart_alg.show_tree(format='svg')), Elem)

		from emat import Measure

		MAXIMIZE = Measure.MAXIMIZE
		MINIMIZE = Measure.MINIMIZE

		robustness_functions = [
			Measure(
				'Expected Net Benefit',
				kind=Measure.INFO,
				variable_name='net_benefits',
				function=numpy.mean,
				#         min=-150,
				#         max=50,
			),

			Measure(
				'Probability of Net Loss',
				kind=MINIMIZE,
				variable_name='net_benefits',
				function=lambda x: numpy.mean(x < 0),
				min=0,
				max=1,
			),

			Measure(
				'95%ile Travel Time',
				kind=MINIMIZE,
				variable_name='build_travel_time',
				function=functools.partial(numpy.percentile, q=95),
				min=60,
				max=150,
			),

			Measure(
				'99%ile Present Cost',
				kind=Measure.INFO,
				variable_name='present_cost_expansion',
				function=functools.partial(numpy.percentile, q=99),
				#         min=0,
				#         max=10,
			),

			Measure(
				'Expected Present Cost',
				kind=Measure.INFO,
				variable_name='present_cost_expansion',
				function=numpy.mean,
				#         min=0,
				#         max=10,
			),

		]

		from emat import Constraint

		constraint_1 = Constraint(
			"Maximum Log Expected Present Cost",
			outcome_names="Expected Present Cost",
			function=Constraint.must_be_less_than(4000),
		)

		constraint_2 = Constraint(
			"Minimum Capacity Expansion",
			parameter_names="expand_capacity",
			function=Constraint.must_be_greater_than(10),
		)

		constraint_3 = Constraint(
			"Maximum Paygo",
			parameter_names='debt_type',
			outcome_names='99%ile Present Cost',
			function=lambda i, j: max(0, j - 1500) if i == 'Paygo' else 0,
		)

		from emat.optimization import HyperVolume, EpsilonProgress, SolutionViewer, ConvergenceMetrics

		convergence_metrics = ConvergenceMetrics(
			HyperVolume.from_outcomes(robustness_functions),
			EpsilonProgress(),
			SolutionViewer.from_model_and_outcomes(m, robustness_functions),
		)

		with SequentialEvaluator(m) as eval_seq:
			robust = m.robust_optimize(
				robustness_functions,
				scenarios=20,
				nfe=5,
				constraints=[
					constraint_1,
					constraint_2,
					constraint_3,
				],
				epsilons=[0.05, ] * len(robustness_functions),
				convergence=convergence_metrics,
				evaluator=eval_seq,
			)
		robust_results, convergence = robust.result, robust.convergence

		assert isinstance(robust_results, pandas.DataFrame)

		mm = m.create_metamodel_from_design('lhs')

		design2 = design_experiments(road_scope, db=emat_db, n_samples_per_factor=10, sampler='lhs', random_seed=2)

		design2_results = mm.run_experiments(design2)
Ejemplo n.º 3
0
# ## Efficient Designs

# %% [markdown]
# The univariate tests, while useful for model debugging and testing, are not the best choice for exploratory modeling, where we want to consider changing many input factors at once. To support multivariate exploratation, TMIP-EMAT includes a design_experiments function that offers several different multivariate samplers.

# %%
from emat.experiment.experimental_design import design_experiments

# %% [markdown]
# The default multivariate sampler is a basic Latin Hypercube sampler. Meta-models for deterministic simulation experiments, such as a TDM, are best supported by a “space filling” design of experiments, such as Latin Hypercube draws (Sacks, Welch, Mitchell, & Wynn, 1989).
#
# A Latin Hypercube sample for one dimension is constructed by subdividing the distribution of each input factor into N equally probable ranges, and drawing one random sample within each range. For example, if an input factor is assumed to be uniformly distributed between 0.1 and 0.2, that distribution can be divided into four regions (0.1-0.125, 0.125-0.15, 0.15-0.175, and 0.175-0.2), and one random draw can be made in each region. In the example below, we illustrate exactly this, for the "alpha" parameter in the demo scope.

# %%
n = 4
design = design_experiments(scope, n_samples=n, random_seed=0)
ax = design.plot.scatter(x='alpha',
                         y='initial_capacity',
                         grid=True,
                         figsize=(6, 1))
xlim = design.scope['alpha'].range
ax.set_xlim(xlim)
ax.set_xticks(np.linspace(*xlim, n + 1))
ax.set_ylabel('')
ax.set_yticks([])

# %% [markdown]
# This ensures better coverage of the entire input range than making four random draws from the full space, which could easily result in a cluster of observations in one part of the space and large void elsewhere.
#
# Generating a multi-dimensional Latin Hypercube sample for use with multiple input variables follows this same basic technique, although the various draws in each dimension are randomly reordered before being joined with draws in other dimensions, to avoid unintended correlation. Various techniques exist to conduct this random reordering, although there is no agreement in the literature about the “best” orthogonal Latin hypercube design. In most instances, a simple random reordering is sufficient to get pretty good coverage of the space; this is the default in TMIP-EMAT. If a greater degree of orthogonality is desired, one simple method to achieve it is to generate numerous possible sets of random draws, and greedily select the set that minimizes the maximum correlation.
#
Ejemplo n.º 4
0
	def test_road_test(self):
		road_test_scope_file = emat.package_file('model', 'tests', 'road_test.yaml')

		road_scope = emat.Scope(road_test_scope_file)

		# <emat.Scope with 2 constants, 7 uncertainties, 4 levers, 7 measures>
		assert len(road_scope.get_measures()) == 7
		assert len(road_scope.get_levers()) == 4
		assert len(road_scope.get_uncertainties()) == 7
		assert len(road_scope.get_constants()) == 2

		emat_db = emat.SQLiteDB()

		road_scope.store_scope(emat_db)

		with pytest.raises(KeyError):
			road_scope.store_scope(emat_db)

		assert emat_db.read_scope_names() == ['EMAT Road Test']

		design = design_experiments(road_scope, db=emat_db, n_samples_per_factor=10, sampler='lhs')
		design.head()

		large_design = design_experiments(road_scope, db=emat_db, n_samples=5000, sampler='lhs',
										  design_name='lhs_large')
		large_design.head()

		assert list(large_design.columns) == [
			'alpha',
			'amortization_period',
			'beta',
			'debt_type',
			'expand_capacity',
			'input_flow',
			'interest_rate',
			'interest_rate_lock',
			'unit_cost_expansion',
			'value_of_time',
			'yield_curve',
			'free_flow_time',
			'initial_capacity',
		]

		assert list(large_design.head().index) == [111, 112, 113, 114, 115]

		assert emat_db.read_design_names('EMAT Road Test') == ['lhs', 'lhs_large']

		m = PythonCoreModel(Road_Capacity_Investment, scope=road_scope, db=emat_db)

		with SequentialEvaluator(m) as eval_seq:
			lhs_results = m.run_experiments(design_name='lhs', evaluator=eval_seq)

		lhs_results.head()

		assert lhs_results.head()['present_cost_expansion'].values == approx(
			[2154.41598475, 12369.38053473, 4468.50683924, 6526.32517089, 2460.91070514])

		assert lhs_results.head()['net_benefits'].values == approx(
			[-79.51551505, -205.32148044, -151.94431822, -167.62487134, -3.97293985])

		with SequentialEvaluator(m) as eval_seq:
			lhs_large_results = m.run_experiments(design_name='lhs_large', evaluator=eval_seq)
		lhs_large_results.head()

		assert lhs_large_results.head()['net_benefits'].values == approx(
			[-584.36098322, -541.5458395, -185.16661464, -135.85689709, -357.36106457])

		lhs_outcomes = m.read_experiment_measures(design_name='lhs')
		assert lhs_outcomes.head()['time_savings'].values == approx(
			[13.4519273, 26.34172999, 12.48385198, 15.10165981, 15.48056139])

		correct_scores = numpy.array(
			[[0.06603461, 0.04858595, 0.06458574, 0.03298163, 0.05018515, 0., 0., 0.53156587, 0.05060416, 0.02558088,
			  0.04676956, 0.04131266, 0.04179378],
			 [0.06003223, 0.04836434, 0.06059554, 0.03593644, 0.27734396, 0., 0., 0.28235419, 0.05303979, 0.03985181,
			  0.04303371, 0.05004349, 0.04940448],
			 [0.08760605, 0.04630414, 0.0795043, 0.03892201, 0.10182534, 0., 0., 0.42508457, 0.04634321, 0.03216387,
			  0.0497183, 0.04953772, 0.0429905],
			 [0.08365598, 0.04118732, 0.06716887, 0.03789444, 0.06509519, 0., 0., 0.31494171, 0.06517462, 0.02895742,
			  0.04731707, 0.17515158, 0.07345581],
			 [0.06789382, 0.07852257, 0.05066944, 0.04807088, 0.32054735, 0., 0., 0.15953055, 0.05320201, 0.02890069,
			  0.07033928, 0.06372418, 0.05859923],
			 [0.05105435, 0.09460353, 0.04614178, 0.04296901, 0.45179611, 0., 0., 0.04909801, 0.05478798, 0.023099,
			  0.08160785, 0.05642169, 0.04842069],
			 [0.04685703, 0.03490931, 0.03214081, 0.03191602, 0.56130318, 0., 0., 0.04011044, 0.04812986, 0.02228924,
			  0.09753361, 0.04273004, 0.04208045], ])

		scores = m.get_feature_scores('lhs', random_state=123)

		for _i in range(scores.metadata.values.shape[0]):
			for _j in range(scores.metadata.values.shape[1]):
				assert scores.metadata.values[_i,_j] == approx(correct_scores[_i,_j], rel=.1)

		from ema_workbench.analysis import prim

		x = m.read_experiment_parameters(design_name='lhs_large')

		prim_alg = prim.Prim(
			m.read_experiment_parameters(design_name='lhs_large'),
			m.read_experiment_measures(design_name='lhs_large')['net_benefits'] > 0,
			threshold=0.4,
		)

		box1 = prim_alg.find_box()

		assert dict(box1.peeling_trajectory.iloc[45]) == approx({
			'coverage': 0.8014705882352942,
			'density': 0.582109479305741,
			'id': 45,
			'mass': 0.1498,
			'mean': 0.582109479305741,
			'res_dim': 4,
		})

		from emat.util.xmle import Show
		from emat.util.xmle.elem import Elem

		assert isinstance(Show(box1.show_tradeoff()), Elem)

		from ema_workbench.analysis import cart

		cart_alg = cart.CART(
			m.read_experiment_parameters(design_name='lhs_large'),
			m.read_experiment_measures(design_name='lhs_large')['net_benefits'] > 0,
		)
		cart_alg.build_tree()

		cart_dict = dict(cart_alg.boxes[0].iloc[0])
		assert cart_dict['debt_type'] == {'GO Bond', 'Paygo', 'Rev Bond'}
		assert cart_dict['interest_rate_lock'] == {False, True}
		del cart_dict['debt_type']
		del cart_dict['interest_rate_lock']
		assert cart_dict == approx({
			'free_flow_time': 60,
			'initial_capacity': 100,
			'alpha': 0.10001988547129116,
			'beta': 3.500215589924521,
			'input_flow': 80.0,
			'value_of_time': 0.00100690634109406,
			'unit_cost_expansion': 95.00570832093116,
			'interest_rate': 0.0250022738169142,
			'yield_curve': -0.0024960505548531774,
			'expand_capacity': 0.0006718732232418368,
			'amortization_period': 15,
		})

		assert isinstance(Show(cart_alg.show_tree(format='svg')), Elem)

		from emat import Measure

		MAXIMIZE = Measure.MAXIMIZE
		MINIMIZE = Measure.MINIMIZE

		robustness_functions = [
			Measure(
				'Expected Net Benefit',
				kind=Measure.INFO,
				variable_name='net_benefits',
				function=numpy.mean,
				#         min=-150,
				#         max=50,
			),

			Measure(
				'Probability of Net Loss',
				kind=MINIMIZE,
				variable_name='net_benefits',
				function=lambda x: numpy.mean(x < 0),
				min=0,
				max=1,
			),

			Measure(
				'95%ile Travel Time',
				kind=MINIMIZE,
				variable_name='build_travel_time',
				function=functools.partial(numpy.percentile, q=95),
				min=60,
				max=150,
			),

			Measure(
				'99%ile Present Cost',
				kind=Measure.INFO,
				variable_name='present_cost_expansion',
				function=functools.partial(numpy.percentile, q=99),
				#         min=0,
				#         max=10,
			),

			Measure(
				'Expected Present Cost',
				kind=Measure.INFO,
				variable_name='present_cost_expansion',
				function=numpy.mean,
				#         min=0,
				#         max=10,
			),

		]

		from emat import Constraint

		constraint_1 = Constraint(
			"Maximum Log Expected Present Cost",
			outcome_names="Expected Present Cost",
			function=Constraint.must_be_less_than(4000),
		)

		constraint_2 = Constraint(
			"Minimum Capacity Expansion",
			parameter_names="expand_capacity",
			function=Constraint.must_be_greater_than(10),
		)

		constraint_3 = Constraint(
			"Maximum Paygo",
			parameter_names='debt_type',
			outcome_names='99%ile Present Cost',
			function=lambda i, j: max(0, j - 1500) if i == 'Paygo' else 0,
		)

		from emat.optimization import HyperVolume, EpsilonProgress, SolutionViewer, ConvergenceMetrics

		convergence_metrics = ConvergenceMetrics(
			HyperVolume.from_outcomes(robustness_functions),
			EpsilonProgress(),
			SolutionViewer.from_model_and_outcomes(m, robustness_functions),
		)

		with SequentialEvaluator(m) as eval_seq:
			robust_results, convergence = m.robust_optimize(
				robustness_functions,
				scenarios=20,
				nfe=5,
				constraints=[
					constraint_1,
					constraint_2,
					constraint_3,
				],
				epsilons=[0.05, ] * len(robustness_functions),
				convergence=convergence_metrics,
				evaluator=eval_seq,
			)

		assert isinstance(robust_results, pandas.DataFrame)

		mm = m.create_metamodel_from_design('lhs')

		design2 = design_experiments(road_scope, db=emat_db, n_samples_per_factor=10, sampler='lhs', random_seed=2)

		design2_results = mm.run_experiments(design2)
Ejemplo n.º 5
0
def test_database_merging():
    import emat

    road_test_scope_file = emat.package_file("model", "tests",
                                             "road_test.yaml")

    road_scope = emat.Scope(road_test_scope_file)
    emat_db = emat.SQLiteDB()
    road_scope.store_scope(emat_db)
    assert emat_db.read_scope_names() == ["EMAT Road Test"]

    from emat.experiment.experimental_design import design_experiments

    design = design_experiments(road_scope,
                                db=emat_db,
                                n_samples_per_factor=10,
                                sampler="lhs")
    large_design = design_experiments(road_scope,
                                      db=emat_db,
                                      n_samples=500,
                                      sampler="lhs",
                                      design_name="lhs_large")

    assert emat_db.read_design_names("EMAT Road Test") == ["lhs", "lhs_large"]

    from emat.model.core_python import PythonCoreModel, Road_Capacity_Investment

    m = PythonCoreModel(Road_Capacity_Investment, scope=road_scope, db=emat_db)

    lhs_results = m.run_experiments(design_name="lhs")

    lhs_large_results = m.run_experiments(design_name="lhs_large")

    reload_results = m.read_experiments(design_name="lhs")

    pd.testing.assert_frame_equal(
        reload_results,
        lhs_results,
        check_like=True,
    )

    lhs_params = m.read_experiment_parameters(design_name="lhs")
    assert len(lhs_params) == 110
    assert len(lhs_params.columns) == 13

    lhs_outcomes = m.read_experiment_measures(design_name="lhs")
    assert len(lhs_outcomes) == 110
    assert len(lhs_outcomes.columns) == 7

    mm = m.create_metamodel_from_design("lhs")

    assert mm.metamodel_id == 1

    assert isinstance(mm.function, emat.MetaModel)

    design2 = design_experiments(road_scope,
                                 db=emat_db,
                                 n_samples_per_factor=10,
                                 sampler="lhs",
                                 random_seed=2)

    design2_results = mm.run_experiments(design2)

    assert len(design2_results) == 110

    assert len(design2_results.columns) == 20

    assert emat_db.read_design_names(None) == ["lhs", "lhs_2", "lhs_large"]

    check = emat_db.read_experiment_measures(None, "lhs_2")
    assert len(check) == 110
    assert len(check.columns) == 7

    assert emat_db.read_experiment_measure_sources(None, "lhs_2") == [1]

    m.allow_short_circuit = False
    design2_results0 = m.run_experiments(design2.iloc[:5])

    assert len(design2_results0) == 5
    assert len(design2_results0.columns) == 20

    with pytest.raises(ValueError):
        # now there are two sources of some measures
        emat_db.read_experiment_measures(None, "lhs_2")

    assert set(emat_db.read_experiment_measure_sources(None,
                                                       "lhs_2")) == {0, 1}

    check = emat_db.read_experiment_measures(None, "lhs_2", source=0)
    assert len(check) == 5

    check = emat_db.read_experiment_measures(None, "lhs_2", source=1)
    assert len(check) == 110

    import emat.examples

    s2, db2, m2 = emat.examples.road_test()

    # write the design for lhs_2 into a different database.
    # it ends up giving different experient id's to these, which is fine.
    db2.write_experiment_parameters(
        None, "lhs_2", emat_db.read_experiment_parameters(None, "lhs_2"))

    check = db2.read_experiment_parameters(
        None,
        "lhs_2",
    )
    assert len(check) == 110
    assert len(check.columns) == 13

    pd.testing.assert_frame_equal(
        design2.reset_index(drop=True),
        check.reset_index(drop=True),
        check_like=True,
    )

    design2_results2 = m2.run_experiments("lhs_2")

    check = emat_db.read_experiment_measures(None, "lhs_2", source=0)
    assert len(check) == 5
    assert len(check.columns) == 7

    check = emat_db.read_experiment_measures(None, "lhs_2", runs="valid")
    assert len(check) == 115

    emat_db.merge_database(db2)

    check = emat_db.read_experiment_measures(None, "lhs_2", source=0)
    assert len(check) == 110
    assert len(check.columns) == 7

    check = emat_db.read_experiment_measures(None, "lhs_2", runs="valid")
    assert len(check) == 225