def test_plot__individual_cdf(self): x, outcomes = utilities.load_flu_data() y = outcomes['deceased population region 1'][:, -1] > 1000000 fig, ax = plt.subplots() unc = 'fatality ratio region 1' regional_sa.plot_individual_cdf(ax, unc, x[unc], y, discrete=False, legend=True, xticklabels_on=True, yticklabels_on=True) fig, ax = plt.subplots() unc = 'model' regional_sa.plot_individual_cdf(ax, unc, x[unc], y, discrete=True, legend=True, xticklabels_on=True, yticklabels_on=True)
def test_get_univariate_feature_scores(self): x, outcomes = utilities.load_flu_data() def classify(data): #get the output for deceased population result = data['deceased population region 1'] #make an empty array of length equal to number of cases classes = np.zeros(result.shape[0]) #if deceased population is higher then 1.000.000 people, classify as 1 classes[result[:, -1] > 1000000] = 1 return classes y = classify(outcomes) # f classify scores = fs.get_univariate_feature_scores(x,y, score_func=F_CLASSIFICATION) self.assertEqual(len(scores), len(x.columns)-2) # chi2 scores = fs.get_univariate_feature_scores(x,y, score_func=CHI2) self.assertEqual(len(scores), len(x.columns)-2) # f regression y= outcomes['deceased population region 1'][:,-1] scores = fs.get_univariate_feature_scores(x,y, score_func=F_REGRESSION) self.assertEqual(len(scores), len(x.columns)-2)
def test_prepare_outcomes(self): results = utilities.load_flu_data() # string type correct ooi = 'nr deaths' results[1][ooi] = results[1]['deceased population region 1'][:,-1] y, categorical = fs._prepare_outcomes(results[1], ooi) self.assertFalse(categorical) self.assertTrue(len(y.shape)==1) # string type not correct --> KeyError with self.assertRaises(KeyError): fs._prepare_outcomes(results[1], "non existing key") # classify function correct def classify(data): result = data['deceased population region 1'] classes = np.zeros(result.shape[0]) classes[result[:, -1] > 1000000] = 1 return classes y, categorical = fs._prepare_outcomes(results[1], classify) self.assertTrue(categorical) self.assertTrue(len(y.shape)==1) # neither string nor classify function --> TypeError with self.assertRaises(TypeError): fs._prepare_outcomes(results[1], 1)
def test_setup_prim(self): self.results = utilities.load_flu_data() self.classify = flu_classify experiments, outcomes = self.results # test initialization, including t_coi calculation in case of searching # for results equal to or higher than the threshold outcomes['death toll'] = outcomes['deceased population region 1'][:, -1] results = experiments, outcomes threshold = 10000 prim_obj = prim.setup_prim(results, classify='death toll', threshold_type=prim.ABOVE, threshold=threshold) value = np.ones((experiments.shape[0], )) value = value[outcomes['death toll'] >= threshold].shape[0] self.assertTrue(prim_obj.t_coi == value) # test initialization, including t_coi calculation in case of searching # for results equal to or lower than the threshold threshold = 1000 prim_obj = prim.setup_prim(results, classify='death toll', threshold_type=prim.BELOW, threshold=threshold) value = np.ones((experiments.shape[0], )) value = value[outcomes['death toll'] <= threshold].shape[0] self.assertTrue(prim_obj.t_coi == value) prim.setup_prim(self.results, self.classify, threshold=prim.ABOVE)
def test_setup_cart(self): results = utilities.load_flu_data() alg = cart.setup_cart(results, flu_classify, mass_min=0.05) self.assertTrue(alg.mode==BINARY) x, outcomes = results y = {k:v[:, -1] for k,v in outcomes.items()} temp_results = (x,y) alg = cart.setup_cart(temp_results, 'deceased population region 1', mass_min=0.05) self.assertTrue(alg.mode==REGRESSION) n_cols = 5 unc = x.columns.values[0:n_cols] alg = cart.setup_cart(results, flu_classify, mass_min=0.05, incl_unc=unc) self.assertTrue(alg.mode==BINARY) self.assertTrue(alg.x.shape[1]==n_cols) with self.assertRaises(TypeError): alg = cart.setup_cart(results, 10, mass_min=0.05)
def test_get_lasso_feature_scores(self): x, outcomes = utilities.load_flu_data() def classify(data): #get the output for deceased population result = data['deceased population region 1'] #make an empty array of length equal to number of cases classes = np.zeros(result.shape[0]) #if deceased population is higher then 1.000.000 people, classify as 1 classes[result[:, -1] > 1000000] = 1 return classes y = classify(outcomes) # classification based scores = fs.get_lasso_feature_scores(x, y, mode=CLASSIFICATION, random_state=42) self.assertEqual(len(scores), len(x.dtype.fields)) self.assertRaises(ValueError, fs.get_lasso_feature_scores, x,y, mode='illegal argument') #regression based y = outcomes['deceased population region 1'][:,-1] scores = fs.get_lasso_feature_scores(x, y, mode=REGRESSION, random_state=42) self.assertEqual(len(scores), len(x.dtype.fields))
def test_get_rf_feature_scores(self): x, outcomes = utilities.load_flu_data() def classify(data): #get the output for deceased population result = data['deceased population region 1'] #make an empty array of length equal to number of cases classes = np.zeros(result.shape[0]) #if deceased population is higher then 1.000.000 people, classify as 1 classes[result[:, -1] > 1000000] = 1 return classes y = classify(outcomes) scores, forest = fs.get_rf_feature_scores(x,y, mode=RuleInductionType.CLASSIFICATION, random_state=10) self.assertEqual(len(scores), len(x.columns)-2) self.assertTrue(isinstance(forest, RandomForestClassifier)) self.assertRaises(ValueError, fs.get_rf_feature_scores, x,y, mode='illegal argument') y = outcomes['deceased population region 1'][:,-1] scores, forest = fs.get_rf_feature_scores(x,y, mode=RuleInductionType.REGRESSION, random_state=10) self.assertEqual(len(scores), len(x.columns)-2) self.assertTrue(isinstance(forest, RandomForestRegressor))
def test_prim_init_select(self): self.results = utilities.load_flu_data() self.classify = flu_classify experiments, outcomes = self.results unc = experiments.columns.values.tolist() # test initialization, including t_coi calculation in case of searching # for results equal to or higher than the threshold outcomes['death toll'] = outcomes['deceased population region 1'][:, -1] results = experiments, outcomes threshold = 10000 prim_obj = prim.setup_prim(results, classify='death toll', threshold_type=prim.ABOVE, threshold=threshold, incl_unc=unc) value = np.ones((experiments.shape[0],)) value = value[outcomes['death toll'] >= threshold].shape[0] self.assertTrue(prim_obj.t_coi==value) # test initialization, including t_coi calculation in case of searching # for results equal to or lower than the threshold threshold = 1000 prim_obj = prim.setup_prim(results, classify='death toll', threshold_type=prim.BELOW, threshold=threshold) value = np.ones((experiments.shape[0],)) value = value[outcomes['death toll'] <= threshold].shape[0] self.assertTrue(prim_obj.t_coi==value) prim.setup_prim(self.results, self.classify, threshold=prim.ABOVE)
def test_prim_exceptions(self): results = utilities.load_flu_data() x, outcomes = results y = outcomes['deceased population region 1'] self.assertRaises(prim.PrimException, prim.Prim, x, y, threshold=0.8, mode='regression')
def test_get_rf_feature_scores(self): x, outcomes = utilities.load_flu_data() def classify(data): #get the output for deceased population result = data['deceased population region 1'] #make an empty array of length equal to number of cases classes = np.zeros(result.shape[0]) #if deceased population is higher then 1.000.000 people, classify as 1 classes[result[:, -1] > 1000000] = 1 return classes y = classify(outcomes) scores, forest = fs.get_rf_feature_scores( x, y, mode=RuleInductionType.CLASSIFICATION, random_state=10) self.assertEqual(len(scores), len(x.columns) - 3) self.assertTrue(isinstance(forest, RandomForestClassifier)) self.assertRaises(ValueError, fs.get_rf_feature_scores, x, y, mode='illegal argument') y = outcomes['deceased population region 1'][:, -1] scores, forest = fs.get_rf_feature_scores( x, y, mode=RuleInductionType.REGRESSION, random_state=10) self.assertEqual(len(scores), len(x.columns) - 3) self.assertTrue(isinstance(forest, RandomForestRegressor))
def test_prepare_outcomes(self): results = utilities.load_flu_data() # string type correct ooi = 'nr deaths' results[1][ooi] = results[1]['deceased population region 1'][:, -1] y, categorical = fs._prepare_outcomes(results[1], ooi) self.assertFalse(categorical) self.assertTrue(len(y.shape) == 1) # string type not correct --> KeyError with self.assertRaises(KeyError): fs._prepare_outcomes(results[1], "non existing key") # classify function correct def classify(data): result = data['deceased population region 1'] classes = np.zeros(result.shape[0]) classes[result[:, -1] > 1000000] = 1 return classes y, categorical = fs._prepare_outcomes(results[1], classify) self.assertTrue(categorical) self.assertTrue(len(y.shape) == 1) # neither string nor classify function --> TypeError with self.assertRaises(TypeError): fs._prepare_outcomes(results[1], 1)
def test_get_univariate_feature_scores(self): x, outcomes = utilities.load_flu_data() def classify(data): #get the output for deceased population result = data['deceased population region 1'] #make an empty array of length equal to number of cases classes = np.zeros(result.shape[0]) #if deceased population is higher then 1.000.000 people, classify as 1 classes[result[:, -1] > 1000000] = 1 return classes y = classify(outcomes) # f classify scores = fs.get_univariate_feature_scores(x, y, score_func=F_CLASSIFICATION) self.assertEqual(len(scores), len(x.columns) - 3) # chi2 scores = fs.get_univariate_feature_scores(x, y, score_func=CHI2) self.assertEqual(len(scores), len(x.columns) - 3) # f regression y = outcomes['deceased population region 1'][:, -1] scores = fs.get_univariate_feature_scores(x, y, score_func=F_REGRESSION) self.assertEqual(len(scores), len(x.columns) - 3)
def test_setup_cart(self): results = utilities.load_flu_data() alg = cart.setup_cart(results, flu_classify, mass_min=0.05) self.assertTrue(alg.mode==RuleInductionType.BINARY) x, outcomes = results y = {k:v[:, -1] for k,v in outcomes.items()} temp_results = (x,y) alg = cart.setup_cart(temp_results, 'deceased population region 1', mass_min=0.05) self.assertTrue(alg.mode==RuleInductionType.REGRESSION) n_cols = 5 unc = x.columns.values[0:n_cols] alg = cart.setup_cart(results, flu_classify, mass_min=0.05, incl_unc=unc) self.assertTrue(alg.mode==RuleInductionType.BINARY) self.assertTrue(alg.x.shape[1]==n_cols) with self.assertRaises(TypeError): alg = cart.setup_cart(results, 10, mass_min=0.05)
def test_prim_exceptions(self): results = utilities.load_flu_data() x, outcomes = results y = outcomes['deceased population region 1'] self.assertRaises(prim.PrimException, prim.Prim, x, y, threshold=0.8, mode=RuleInductionType.REGRESSION)
def test_plot_cdfs(self): x, outcomes = utilities.load_flu_data() y = outcomes['deceased population region 1'][:, -1] > 1000000 regional_sa.plot_cdfs(x, y) regional_sa.plot_cdfs(x, y, ccdf=True) x = x.drop('scenario', axis=1) regional_sa.plot_cdfs(x, y, ccdf=True)
def test_plot_cdfs(self): x, outcomes = utilities.load_flu_data() y = outcomes['deceased population region 1'][:,-1] > 1000000 regional_sa.plot_cdfs(x, y) regional_sa.plot_cdfs(x, y, ccdf=True) x = x.drop('scenario', axis=1) regional_sa.plot_cdfs(x, y, ccdf=True)
def test_get_feature_scores_all(self): x, outcomes = utilities.load_flu_data() # we have timeseries so we need scalars y = {'deceased population':outcomes['deceased population region 1'][:, -1], 'max. infected fraction':np.max(outcomes['infected fraction R1'], axis=1)} scores = fs.get_feature_scores_all(x,y) self.assertEqual(len(scores), len(x.columns)-2) self.assertTrue(scores.ndim==2)
def test_get_feature_scores_all(self): x, outcomes = utilities.load_flu_data() # we have timeseries so we need scalars y = {'deceased population':outcomes['deceased population region 1'][:, -1], 'max. infected fraction':np.max(outcomes['infected fraction R1'], axis=1)} scores = fs.get_feature_scores_all(x,y) self.assertEqual(len(scores), len(x.columns)) self.assertTrue(scores.ndim==2)
def test_create_pivot_plot(self): x, outcomes = utilities.load_flu_data() y = outcomes['deceased population region 1'][:, -1] > 1000000 dimensional_stacking.create_pivot_plot(x, y, 2) dimensional_stacking.create_pivot_plot(x, y, 2, labels=False, bin_labels=True ) dimensional_stacking.create_pivot_plot(x, y, 1, labels=False) plt.draw() plt.close('all')
def test_show_tree(self): results = utilities.load_flu_data() alg = cart.setup_cart(results, flu_classify, mass_min=0.05) alg.build_tree() fig = alg.show_tree(mplfig=True) bytestream = alg.show_tree(mplfig=False) self.assertTrue(isinstance(fig, mpl.figure.Figure)) self.assertTrue(isinstance(bytestream, bytes))
def test_setup_prim_exceptions(self): results = utilities.load_flu_data() self.assertRaises(prim.PrimException, prim.setup_prim, results, 'deceased population region 1', threshold=0.8) def faulty_classify(outcomes): return outcomes['deceased population region 1'][:, 0:10] self.assertRaises(prim.PrimException, prim.setup_prim, results, faulty_classify, threshold=0.8)
def test_build_tree(self): results = utilities.load_flu_data() alg = cart.setup_cart(results, flu_classify, mass_min=0.05) alg.build_tree() self.assertTrue(isinstance(alg.clf, cart.tree.DecisionTreeClassifier)) x, outcomes = results y = {k: v[:, -1] for k, v in outcomes.items()} temp_results = (x, y) alg = cart.setup_cart(temp_results, 'deceased population region 1', mass_min=0.05) alg.build_tree() self.assertTrue(isinstance(alg.clf, cart.tree.DecisionTreeRegressor))
def test_stats_to_dataframe(self): x, outcomes = utilities.load_flu_data() y = flu_classify(outcomes) alg = cart.CART(x, y, mode=RuleInductionType.BINARY) alg.build_tree() stats = alg.stats_to_dataframe() y = outcomes['deceased population region 1'][:, -1] alg = cart.CART(x, y, mode=RuleInductionType.REGRESSION) alg.build_tree() stats = alg.stats_to_dataframe() y = np.random.randint(1, 5, y.shape[0]) alg = cart.CART(x, y, mode=RuleInductionType.CLASSIFICATION) alg.build_tree() stats = alg.stats_to_dataframe() print(stats)
def test_find_box(self): results = utilities.load_flu_data() classify = flu_classify prim_obj = prim.setup_prim(results, classify, threshold=0.8) box_1 = prim_obj.find_box() prim_obj._update_yi_remaining() after_find = box_1.yi.shape[0] + prim_obj.yi_remaining.shape[0] self.assertEqual(after_find, prim_obj.y.shape[0]) box_2 = prim_obj.find_box() prim_obj._update_yi_remaining() after_find = box_1.yi.shape[0] +\ box_2.yi.shape[0] +\ prim_obj.yi_remaining.shape[0] self.assertEqual(after_find, prim_obj.y.shape[0])
def test_boxes(self): x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) y = {'y': np.array([0, 1, 2])} results = (x, y) prim_obj = prim.setup_prim(results, 'y', threshold=0.8) boxes = prim_obj.boxes self.assertEqual(len(boxes), 1, 'box length not correct') # real data test case prim_obj = prim.setup_prim(utilities.load_flu_data(), flu_classify, threshold=0.8) prim_obj.find_box() boxes = prim_obj.boxes self.assertEqual(len(boxes), 1, 'box length not correct')
def test_build_tree(self): results = utilities.load_flu_data() alg = cart.setup_cart(results, flu_classify, mass_min=0.05) alg.build_tree() self.assertTrue(isinstance(alg.clf, cart.tree.DecisionTreeClassifier)) x, outcomes = results y = {k:v[:, -1] for k,v in outcomes.items()} temp_results = (x,y) alg = cart.setup_cart(temp_results, 'deceased population region 1', mass_min=0.05) alg.build_tree() self.assertTrue(isinstance(alg.clf, cart.tree.DecisionTreeRegressor))
def test_find_box(self): results = utilities.load_flu_data() classify = flu_classify prim_obj = prim.setup_prim(results, classify, threshold=0.8) box_1 = prim_obj.find_box() prim_obj._update_yi_remaining(prim_obj) after_find = box_1.yi.shape[0] + prim_obj.yi_remaining.shape[0] self.assertEqual(after_find, prim_obj.y.shape[0]) box_2 = prim_obj.find_box() prim_obj._update_yi_remaining(prim_obj) after_find = box_1.yi.shape[0] +\ box_2.yi.shape[0] +\ prim_obj.yi_remaining.shape[0] self.assertEqual(after_find, prim_obj.y.shape[0])
def test_boxes(self): x = pd.DataFrame([(0,1,2), (2,5,6), (3,2,1)], columns=['a', 'b', 'c']) y = {'y':np.array([0,1,2])} results = (x,y) prim_obj = prim.setup_prim(results, 'y', threshold=0.8) boxes = prim_obj.boxes self.assertEqual(len(boxes), 1, 'box length not correct') # real data test case prim_obj = prim.setup_prim(utilities.load_flu_data(), flu_classify, threshold=0.8) prim_obj.find_box() boxes = prim_obj.boxes self.assertEqual(len(boxes), 1, 'box length not correct')
def test_plot__individual_cdf(self): x, outcomes = utilities.load_flu_data() y = outcomes['deceased population region 1'][:,-1] > 1000000 fig, ax = plt.subplots() unc = 'fatality ratio region 1' regional_sa.plot_individual_cdf(ax, unc, x[unc], y, discrete=False, legend=True, xticklabels_on=True, yticklabels_on=True) fig, ax = plt.subplots() unc = 'model' regional_sa.plot_individual_cdf(ax, unc, x[unc], y, discrete=True, legend=True, xticklabels_on=True, yticklabels_on=True)
def test_get_ex_feature_scores(self): x, outcomes = utilities.load_flu_data() y = outcomes['deceased population region 1'][:, -1] > 1000000 scores, forest = fs.get_ex_feature_scores(x,y, mode=CLASSIFICATION, random_state=10) self.assertEqual(len(scores), len(x.columns)) self.assertTrue(isinstance(forest, ExtraTreesClassifier)) self.assertRaises(ValueError, fs.get_ex_feature_scores, x,y, mode='illegal argument') y = outcomes['deceased population region 1'][:,-1] scores, forest = fs.get_ex_feature_scores(x,y, mode=REGRESSION, random_state=10) self.assertEqual(len(scores), len(x.columns)) self.assertTrue(isinstance(forest, ExtraTreesRegressor))
def test_get_ex_feature_scores(self): x, outcomes = utilities.load_flu_data() y = outcomes['deceased population region 1'][:, -1] > 1000000 scores, forest = fs.get_ex_feature_scores(x,y, mode=RuleInductionType.CLASSIFICATION, random_state=10) self.assertEqual(len(scores), len(x.columns)-2) self.assertTrue(isinstance(forest, ExtraTreesClassifier)) self.assertRaises(ValueError, fs.get_ex_feature_scores, x,y, mode='illegal argument') y = outcomes['deceased population region 1'][:,-1] scores, forest = fs.get_ex_feature_scores(x,y, mode=RuleInductionType.REGRESSION, random_state=10) self.assertEqual(len(scores), len(x.columns)-2) self.assertTrue(isinstance(forest, ExtraTreesRegressor))
def test_get_lasso_feature_scores(self): x, outcomes = utilities.load_flu_data() def classify(data): #get the output for deceased population result = data['deceased population region 1'] #make an empty array of length equal to number of cases classes = np.zeros(result.shape[0]) #if deceased population is higher then 1.000.000 people, classify as 1 classes[result[:, -1] > 1000000] = 1 return classes y = classify(outcomes) # classification based scores = fs.get_lasso_feature_scores(x, y, mode=CLASSIFICATION, random_state=42) self.assertEqual(len(scores), len(x.dtype.fields)) self.assertRaises(ValueError, fs.get_lasso_feature_scores, x, y, mode='illegal argument') #regression based y = outcomes['deceased population region 1'][:, -1] scores = fs.get_lasso_feature_scores(x, y, mode=REGRESSION, random_state=42) self.assertEqual(len(scores), len(x.dtype.fields))