def test_prepare_outcomes(self): results = test_utilities.load_flu_data() # string type correct ooi = 'nr deaths' results[1][ooi] = results[1]['deceased population region 1'][:,-1] y, categorical = fs._prepare_outcomes(results[1], ooi) self.assertFalse(categorical) self.assertTrue(len(y.shape)==1) # string type not correct --> KeyError with self.assertRaises(KeyError): fs._prepare_outcomes(results[1], "non existing key") # classify function correct def classify(data): result = data['deceased population region 1'] classes = np.zeros(result.shape[0]) classes[result[:, -1] > 1000000] = 1 return classes y, categorical = fs._prepare_outcomes(results[1], classify) self.assertTrue(categorical) self.assertTrue(len(y.shape)==1) # neither string nor classify function --> TypeError with self.assertRaises(TypeError): fs._prepare_outcomes(results[1], 1)
def test_prim_init_select(self): self.results = test_utilities.load_flu_data() self.classify = flu_classify experiments, outcomes = self.results unc = recfunctions.get_names(experiments.dtype) # test initialization, including t_coi calculation in case of searching # for results equal to or higher than the threshold outcomes['death toll'] = outcomes['deceased population region 1'][:, -1] results = experiments, outcomes threshold = 10000 prim_obj = prim.setup_prim(results, classify='death toll', threshold_type=prim.ABOVE, threshold=threshold, incl_unc=unc) value = np.ones((experiments.shape[0],)) value = value[outcomes['death toll'] >= threshold].shape[0] self.assertTrue(prim_obj.t_coi==value) # test initialization, including t_coi calculation in case of searching # for results equal to or lower than the threshold threshold = 1000 prim_obj = prim.setup_prim(results, classify='death toll', threshold_type=prim.BELOW, threshold=threshold) value = np.ones((experiments.shape[0],)) value = value[outcomes['death toll'] <= threshold].shape[0] self.assertTrue(prim_obj.t_coi==value) prim.setup_prim(self.results, self.classify, threshold=prim.ABOVE)
def test_get_univariate_feature_scores(self): results = test_utilities.load_flu_data() def classify(data): #get the output for deceased population result = data['deceased population region 1'] #make an empty array of length equal to number of cases classes = np.zeros(result.shape[0]) #if deceased population is higher then 1.000.000 people, classify as 1 classes[result[:, -1] > 1000000] = 1 return classes # f classify scores = fs.get_univariate_feature_scores(results, classify) self.assertEqual(len(scores), len(results[0].dtype.fields)) # chi2 scores = fs.get_univariate_feature_scores(results, classify, score_func='chi2') self.assertEqual(len(scores), len(results[0].dtype.fields)) # f regression ooi = 'nr deaths' results[1][ooi] = results[1]['deceased population region 1'][:,-1] scores = fs.get_univariate_feature_scores(results, ooi) self.assertEqual(len(scores), len(results[0].dtype.fields))
def test_get_rf_feature_scores(self): results = test_utilities.load_flu_data() def classify(data): #get the output for deceased population result = data['deceased population region 1'] #make an empty array of length equal to number of cases classes = np.zeros(result.shape[0]) #if deceased population is higher then 1.000.000 people, classify as 1 classes[result[:, -1] > 1000000] = 1 return classes scores, forest = fs.get_rf_feature_scores(results, classify, random_state=10) self.assertEqual(len(scores), len(results[0].dtype.fields)) self.assertTrue(isinstance(forest, RandomForestClassifier)) ooi = 'nr deaths' results[1][ooi] = results[1]['deceased population region 1'][:,-1] scores, forest = fs.get_rf_feature_scores(results, ooi, random_state=10) self.assertEqual(len(scores), len(results[0].dtype.fields)) self.assertTrue(isinstance(forest, RandomForestRegressor))
def test_setup_prim_exceptions(self): results = test_utilities.load_flu_data() self.assertRaises(prim.PrimException, prim.setup_prim, results, 'deceased population region 1', threshold=0.8) def faulty_classify(outcomes): return outcomes['deceased population region 1'][:, 0:10] self.assertRaises(prim.PrimException, prim.setup_prim, results, faulty_classify, threshold=0.8)
def test_find_box(self): results = test_utilities.load_flu_data() classify = flu_classify prim_obj = prim.setup_prim(results, classify, threshold=0.8) box_1 = prim_obj.find_box() prim_obj._update_yi_remaining() after_find = box_1.yi.shape[0] + prim_obj.yi_remaining.shape[0] self.assertEqual(after_find, prim_obj.y.shape[0]) box_2 = prim_obj.find_box() prim_obj._update_yi_remaining() after_find = box_1.yi.shape[0] +\ box_2.yi.shape[0] +\ prim_obj.yi_remaining.shape[0] self.assertEqual(after_find, prim_obj.y.shape[0])
def test_boxes(self): x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) y = {'y': np.array([0, 1, 2])} results = (x, y) prim_obj = prim.setup_prim(results, 'y', threshold=0.8) boxes = prim_obj.boxes self.assertEqual(len(boxes), 1, 'box length not correct') # real data test case prim_obj = prim.setup_prim(test_utilities.load_flu_data(), flu_classify, threshold=0.8) prim_obj.find_box() boxes = prim_obj.boxes self.assertEqual(len(boxes), 2, 'box length not correct')
def test_boxes(self): x = np.array([(0,1,2), (2,5,6), (3,2,1)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) y = {'y':np.array([0,1,2])} results = (x,y) prim_obj = prim.setup_prim(results, 'y', threshold=0.8) boxes = prim_obj.boxes self.assertEqual(len(boxes), 1, 'box length not correct') # real data test case prim_obj = prim.setup_prim(test_utilities.load_flu_data(), flu_classify, threshold=0.8) prim_obj.find_box() boxes = prim_obj.boxes self.assertEqual(len(boxes), 2, 'box length not correct')
def test_setup_cart(self): results = test_utilities.load_flu_data() cart_algorithm = cart.setup_cart(results, flu_classify, mass_min=0.05)