def test_stats_to_dataframe(self): x, outcomes = utilities.load_flu_data() y = flu_classify(outcomes) alg = cart.CART(x, y, mode=RuleInductionType.BINARY) alg.build_tree() stats = alg.stats_to_dataframe() y = outcomes['deceased population region 1'][:, -1] alg = cart.CART(x, y, mode=RuleInductionType.REGRESSION) alg.build_tree() stats = alg.stats_to_dataframe() y = np.random.randint(1, 5, y.shape[0]) alg = cart.CART(x, y, mode=RuleInductionType.CLASSIFICATION) alg.build_tree() stats = alg.stats_to_dataframe() print(stats)
def test_stats(self): x = pd.DataFrame([(0,1,2), (2,5,6), (3,2,1)], columns=['a', 'b', 'c']) box = pd.DataFrame([(0,1,1), (3,5,3)], columns=['a', 'b', 'c']) y = np.array([0,1,1]) alg = cart.CART(x,y, mode=BINARY) alg._boxes = [box] alg.clf = "something" stats = alg.stats[0] self.assertEqual(stats['coverage'], 0.5) self.assertEqual(stats['density'], 0.5) self.assertEqual(stats['res dim'], 1) self.assertEqual(stats['mass'], 2/3) y = np.array([0,1,2]) alg = cart.CART(x,y, mode=REGRESSION) alg._boxes = [box] alg.clf = "something" stats = alg.stats[0] self.assertEqual(stats['mean'], 1) self.assertEqual(stats['res dim'], 1) self.assertEqual(stats['mass'], 2/3) y = np.array([0,1,2]) alg = cart.CART(x,y, mode=CLASSIFICATION) alg._boxes = [box] alg.clf = "something" stats = alg.stats[0] self.assertEqual(stats['gini'], 0.5) self.assertEqual(stats['box_composition'], [1, 0, 1]) self.assertEqual(stats['res dim'], 1) self.assertEqual(stats['mass'], 2/3) self.assertEqual(stats, alg.stats[0])
def test_boxes(self): np.random.seed(42) x = pd.DataFrame(np.random.rand(1000,2), columns=['a', 'b']) y = (x.a>0.5) & (x.b <0.5) alg = cart.CART(x,y, mode=BINARY) alg.build_tree() boxes = alg.boxes self.assertEqual(len(boxes), 3)
''' import matplotlib.pyplot as plt import ema_workbench.analysis.cart as cart from ema_workbench import ema_logging, load_results ema_logging.log_to_stderr(level=ema_logging.INFO) default_flow = 2.178849944502783e7 # load data fn = './data/5000 runs WCM.tar.gz' results = load_results(fn) x, outcomes = results ooi = 'throughput Rotterdam' outcome = outcomes[ooi] / default_flow y = outcome < 1 cart_alg = cart.CART(x, y) cart_alg.build_tree() # print cart to std_out print(cart_alg.stats_to_dataframe()) print(cart_alg.boxes_to_dataframe()) # visualize cart_alg.show_boxes(together=False) cart_alg.show_tree() plt.show()
def test_road_test(self): road_test_scope_file = emat.package_file('model', 'tests', 'road_test.yaml') road_scope = emat.Scope(road_test_scope_file) # <emat.Scope with 2 constants, 7 uncertainties, 4 levers, 7 measures> assert len(road_scope.get_measures()) == 7 assert len(road_scope.get_levers()) == 4 assert len(road_scope.get_uncertainties()) == 7 assert len(road_scope.get_constants()) == 2 emat_db = emat.SQLiteDB() road_scope.store_scope(emat_db) with pytest.raises(KeyError): road_scope.store_scope(emat_db) assert emat_db.read_scope_names() == ['EMAT Road Test'] design = design_experiments(road_scope, db=emat_db, n_samples_per_factor=10, sampler='lhs') design.head() large_design = design_experiments(road_scope, db=emat_db, n_samples=5000, sampler='lhs', design_name='lhs_large') large_design.head() assert list(large_design.columns) == [ 'alpha', 'amortization_period', 'beta', 'debt_type', 'expand_capacity', 'input_flow', 'interest_rate', 'interest_rate_lock', 'unit_cost_expansion', 'value_of_time', 'yield_curve', 'free_flow_time', 'initial_capacity', ] assert list(large_design.head().index) == [111, 112, 113, 114, 115] assert emat_db.read_design_names('EMAT Road Test') == ['lhs', 'lhs_large'] m = PythonCoreModel(Road_Capacity_Investment, scope=road_scope, db=emat_db) with SequentialEvaluator(m) as eval_seq: lhs_results = m.run_experiments(design_name='lhs', evaluator=eval_seq) lhs_results.head() assert lhs_results.head()['present_cost_expansion'].values == approx( [2154.41598475, 12369.38053473, 4468.50683924, 6526.32517089, 2460.91070514]) assert lhs_results.head()['net_benefits'].values == approx( [-79.51551505, -205.32148044, -151.94431822, -167.62487134, -3.97293985]) with SequentialEvaluator(m) as eval_seq: lhs_large_results = m.run_experiments(design_name='lhs_large', evaluator=eval_seq) lhs_large_results.head() assert lhs_large_results.head()['net_benefits'].values == approx( [-584.36098322, -541.5458395, -185.16661464, -135.85689709, -357.36106457]) lhs_outcomes = m.read_experiment_measures(design_name='lhs') assert lhs_outcomes.head()['time_savings'].values == approx( [13.4519273, 26.34172999, 12.48385198, 15.10165981, 15.48056139]) correct_scores = numpy.array( [[0.06603461, 0.04858595, 0.06458574, 0.03298163, 0.05018515, 0., 0., 0.53156587, 0.05060416, 0.02558088, 0.04676956, 0.04131266, 0.04179378], [0.06003223, 0.04836434, 0.06059554, 0.03593644, 0.27734396, 0., 0., 0.28235419, 0.05303979, 0.03985181, 0.04303371, 0.05004349, 0.04940448], [0.08760605, 0.04630414, 0.0795043, 0.03892201, 0.10182534, 0., 0., 0.42508457, 0.04634321, 0.03216387, 0.0497183, 0.04953772, 0.0429905], [0.08365598, 0.04118732, 0.06716887, 0.03789444, 0.06509519, 0., 0., 0.31494171, 0.06517462, 0.02895742, 0.04731707, 0.17515158, 0.07345581], [0.06789382, 0.07852257, 0.05066944, 0.04807088, 0.32054735, 0., 0., 0.15953055, 0.05320201, 0.02890069, 0.07033928, 0.06372418, 0.05859923], [0.05105435, 0.09460353, 0.04614178, 0.04296901, 0.45179611, 0., 0., 0.04909801, 0.05478798, 0.023099, 0.08160785, 0.05642169, 0.04842069], [0.04685703, 0.03490931, 0.03214081, 0.03191602, 0.56130318, 0., 0., 0.04011044, 0.04812986, 0.02228924, 0.09753361, 0.04273004, 0.04208045], ]) scores = m.get_feature_scores('lhs', random_state=123) for _i in range(scores.metadata.values.shape[0]): for _j in range(scores.metadata.values.shape[1]): assert scores.metadata.values[_i,_j] == approx(correct_scores[_i,_j], rel=.1) from ema_workbench.analysis import prim x = m.read_experiment_parameters(design_name='lhs_large') prim_alg = prim.Prim( m.read_experiment_parameters(design_name='lhs_large'), m.read_experiment_measures(design_name='lhs_large')['net_benefits'] > 0, threshold=0.4, ) box1 = prim_alg.find_box() assert dict(box1.peeling_trajectory.iloc[45]) == approx({ 'coverage': 0.8014705882352942, 'density': 0.582109479305741, 'id': 45, 'mass': 0.1498, 'mean': 0.582109479305741, 'res_dim': 4, }) from emat.util.xmle import Show from emat.util.xmle.elem import Elem assert isinstance(Show(box1.show_tradeoff()), Elem) from ema_workbench.analysis import cart cart_alg = cart.CART( m.read_experiment_parameters(design_name='lhs_large'), m.read_experiment_measures(design_name='lhs_large')['net_benefits'] > 0, ) cart_alg.build_tree() cart_dict = dict(cart_alg.boxes[0].iloc[0]) assert cart_dict['debt_type'] == {'GO Bond', 'Paygo', 'Rev Bond'} assert cart_dict['interest_rate_lock'] == {False, True} del cart_dict['debt_type'] del cart_dict['interest_rate_lock'] assert cart_dict == approx({ 'free_flow_time': 60, 'initial_capacity': 100, 'alpha': 0.10001988547129116, 'beta': 3.500215589924521, 'input_flow': 80.0, 'value_of_time': 0.00100690634109406, 'unit_cost_expansion': 95.00570832093116, 'interest_rate': 0.0250022738169142, 'yield_curve': -0.0024960505548531774, 'expand_capacity': 0.0006718732232418368, 'amortization_period': 15, }) assert isinstance(Show(cart_alg.show_tree(format='svg')), Elem) from emat import Measure MAXIMIZE = Measure.MAXIMIZE MINIMIZE = Measure.MINIMIZE robustness_functions = [ Measure( 'Expected Net Benefit', kind=Measure.INFO, variable_name='net_benefits', function=numpy.mean, # min=-150, # max=50, ), Measure( 'Probability of Net Loss', kind=MINIMIZE, variable_name='net_benefits', function=lambda x: numpy.mean(x < 0), min=0, max=1, ), Measure( '95%ile Travel Time', kind=MINIMIZE, variable_name='build_travel_time', function=functools.partial(numpy.percentile, q=95), min=60, max=150, ), Measure( '99%ile Present Cost', kind=Measure.INFO, variable_name='present_cost_expansion', function=functools.partial(numpy.percentile, q=99), # min=0, # max=10, ), Measure( 'Expected Present Cost', kind=Measure.INFO, variable_name='present_cost_expansion', function=numpy.mean, # min=0, # max=10, ), ] from emat import Constraint constraint_1 = Constraint( "Maximum Log Expected Present Cost", outcome_names="Expected Present Cost", function=Constraint.must_be_less_than(4000), ) constraint_2 = Constraint( "Minimum Capacity Expansion", parameter_names="expand_capacity", function=Constraint.must_be_greater_than(10), ) constraint_3 = Constraint( "Maximum Paygo", parameter_names='debt_type', outcome_names='99%ile Present Cost', function=lambda i, j: max(0, j - 1500) if i == 'Paygo' else 0, ) from emat.optimization import HyperVolume, EpsilonProgress, SolutionViewer, ConvergenceMetrics convergence_metrics = ConvergenceMetrics( HyperVolume.from_outcomes(robustness_functions), EpsilonProgress(), SolutionViewer.from_model_and_outcomes(m, robustness_functions), ) with SequentialEvaluator(m) as eval_seq: robust_results, convergence = m.robust_optimize( robustness_functions, scenarios=20, nfe=5, constraints=[ constraint_1, constraint_2, constraint_3, ], epsilons=[0.05, ] * len(robustness_functions), convergence=convergence_metrics, evaluator=eval_seq, ) assert isinstance(robust_results, pandas.DataFrame) mm = m.create_metamodel_from_design('lhs') design2 = design_experiments(road_scope, db=emat_db, n_samples_per_factor=10, sampler='lhs', random_seed=2) design2_results = mm.run_experiments(design2)
fig = plt.gcf() fig.set_size_inches(12, 12) plt.show() # %% # boxes = prim_alg.show_boxes() prim.Prim.show_boxes(prim_alg) # #visualize # prim.show_boxes_individually(boxes, results) # prim.show_boxes_together(boxes, results) # plt.show() # %% # CART from ema_workbench.analysis import cart cart_alg = cart.CART(x, y_welfare10, 0.05) cart_alg.build_tree() print(cart_alg.stats_to_dataframe()) print(cart_alg.boxes_to_dataframe()) cart_uWel_df = cart_alg.boxes_to_dataframe() # %% # dice_sm.constants # ema_workbench.analysis.kde_over_time() # ema_workbench.analysis.lines() # ema_workbench.analysis.plotting_util # perform_experiments()
# ============================================================================= ################ Cluster-based Subspace Partitioning with CART ################ # ============================================================================= import matplotlib.pyplot as plt from ema_workbench.analysis import (cart, RuleInductionType) # Choose the aggregation level of the clustering --> cluster-indices for ... #... total design. clustering = clustering_total # Load the uncertainty input per experiment. x = experiments x = experiments.drop(['policy', 'model'], axis=1) # Load the cluster-indices per experiment. y = clustering # Perform CLASSIFICATION based clustering, due to the cluster-indices. cart_alg = cart.CART(x, y, mode=RuleInductionType.CLASSIFICATION) cart_alg.build_tree() # visualize cart_alg.show_boxes(together=False) pp = cart_alg.show_tree(mplfig=True) fig = plt.gcf() fig.set_size_inches(36, 36) pp.savefig('./CARTTree9.png') plt.show()
def test_road_test(self): road_test_scope_file = emat.package_file('model', 'tests', 'road_test.yaml') road_scope = emat.Scope(road_test_scope_file) # <emat.Scope with 2 constants, 7 uncertainties, 4 levers, 7 measures> assert len(road_scope.get_measures()) == 7 assert len(road_scope.get_levers()) == 4 assert len(road_scope.get_uncertainties()) == 7 assert len(road_scope.get_constants()) == 2 emat_db = emat.SQLiteDB() road_scope.store_scope(emat_db) with pytest.raises(KeyError): road_scope.store_scope(emat_db) assert emat_db.read_scope_names() == ['EMAT Road Test'] design = design_experiments(road_scope, db=emat_db, n_samples_per_factor=10, sampler='lhs') design.head() large_design = design_experiments(road_scope, db=emat_db, n_samples=5000, sampler='lhs', design_name='lhs_large') large_design.head() assert list(large_design.columns) == [ 'alpha', 'amortization_period', 'beta', 'debt_type', 'expand_capacity', 'input_flow', 'interest_rate', 'interest_rate_lock', 'unit_cost_expansion', 'value_of_time', 'yield_curve', 'free_flow_time', 'initial_capacity', ] assert list(large_design.head().index) == [111, 112, 113, 114, 115] assert emat_db.read_design_names('EMAT Road Test') == [ 'lhs', 'lhs_large' ] m = PythonCoreModel(Road_Capacity_Investment, scope=road_scope, db=emat_db) with SequentialEvaluator(m) as eval_seq: lhs_results = m.run_experiments(design_name='lhs', evaluator=eval_seq) lhs_results.head() assert lhs_results.head()['present_cost_expansion'].values == approx([ 2154.41598475, 12369.38053473, 4468.50683924, 6526.32517089, 2460.91070514 ]) assert lhs_results.head()['net_benefits'].values == approx([ -22.29090499, -16.84301382, -113.98841188, 11.53956058, 78.03661612 ]) with SequentialEvaluator(m) as eval_seq: lhs_large_results = m.run_experiments(design_name='lhs_large', evaluator=eval_seq) lhs_large_results.head() assert lhs_large_results.head()['net_benefits'].values == approx([ -522.45283083, -355.1599307, -178.6623215, 23.46263498, -301.17700968 ]) lhs_outcomes = m.read_experiment_measures(design_name='lhs') assert lhs_outcomes.head()['time_savings'].values == approx( [13.4519273, 26.34172999, 12.48385198, 15.10165981, 15.48056139]) scores = m.get_feature_scores('lhs', random_state=123) stable_df("./road_test_feature_scores.pkl.gz", scores.data) from ema_workbench.analysis import prim x = m.read_experiment_parameters(design_name='lhs_large') prim_alg = prim.Prim( m.read_experiment_parameters(design_name='lhs_large'), m.read_experiment_measures(design_name='lhs_large')['net_benefits'] > 0, threshold=0.4, ) box1 = prim_alg.find_box() stable_df("./road_test_box1_peeling_trajectory.pkl.gz", box1.peeling_trajectory) from emat.util.xmle import Show from emat.util.xmle.elem import Elem assert isinstance(Show(box1.show_tradeoff()), Elem) from ema_workbench.analysis import cart cart_alg = cart.CART( m.read_experiment_parameters(design_name='lhs_large'), m.read_experiment_measures(design_name='lhs_large')['net_benefits'] > 0, ) cart_alg.build_tree() stable_df("./road_test_cart_box0.pkl.gz", cart_alg.boxes[0]) cart_dict = dict(cart_alg.boxes[0].iloc[0]) assert cart_dict['debt_type'] == {'GO Bond', 'Paygo', 'Rev Bond'} assert cart_dict['interest_rate_lock'] == {False, True} assert isinstance(Show(cart_alg.show_tree(format='svg')), Elem) from emat import Measure MAXIMIZE = Measure.MAXIMIZE MINIMIZE = Measure.MINIMIZE robustness_functions = [ Measure( 'Expected Net Benefit', kind=Measure.INFO, variable_name='net_benefits', function=numpy.mean, # min=-150, # max=50, ), Measure( 'Probability of Net Loss', kind=MINIMIZE, variable_name='net_benefits', function=lambda x: numpy.mean(x < 0), min=0, max=1, ), Measure( '95%ile Travel Time', kind=MINIMIZE, variable_name='build_travel_time', function=functools.partial(numpy.percentile, q=95), min=60, max=150, ), Measure( '99%ile Present Cost', kind=Measure.INFO, variable_name='present_cost_expansion', function=functools.partial(numpy.percentile, q=99), # min=0, # max=10, ), Measure( 'Expected Present Cost', kind=Measure.INFO, variable_name='present_cost_expansion', function=numpy.mean, # min=0, # max=10, ), ] from emat import Constraint constraint_1 = Constraint( "Maximum Log Expected Present Cost", outcome_names="Expected Present Cost", function=Constraint.must_be_less_than(4000), ) constraint_2 = Constraint( "Minimum Capacity Expansion", parameter_names="expand_capacity", function=Constraint.must_be_greater_than(10), ) constraint_3 = Constraint( "Maximum Paygo", parameter_names='debt_type', outcome_names='99%ile Present Cost', function=lambda i, j: max(0, j - 1500) if i == 'Paygo' else 0, ) from emat.optimization import HyperVolume, EpsilonProgress, SolutionViewer, ConvergenceMetrics convergence_metrics = ConvergenceMetrics( HyperVolume.from_outcomes(robustness_functions), EpsilonProgress(), SolutionViewer.from_model_and_outcomes(m, robustness_functions), ) with SequentialEvaluator(m) as eval_seq: robust = m.robust_optimize( robustness_functions, scenarios=20, nfe=5, constraints=[ constraint_1, constraint_2, constraint_3, ], epsilons=[ 0.05, ] * len(robustness_functions), convergence=convergence_metrics, evaluator=eval_seq, ) robust_results, convergence = robust.result, robust.convergence assert isinstance(robust_results, pandas.DataFrame) mm = m.create_metamodel_from_design('lhs') design2 = design_experiments(road_scope, db=emat_db, n_samples_per_factor=10, sampler='lhs', random_seed=2) design2_results = mm.run_experiments(design2)