def test_prim_init_select(self): self.results = utilities.load_flu_data() self.classify = flu_classify experiments, outcomes = self.results unc = experiments.columns.values.tolist() # test initialization, including t_coi calculation in case of searching # for results equal to or higher than the threshold outcomes['death toll'] = outcomes['deceased population region 1'][:, -1] results = experiments, outcomes threshold = 10000 prim_obj = prim.setup_prim(results, classify='death toll', threshold_type=prim.ABOVE, threshold=threshold, incl_unc=unc) value = np.ones((experiments.shape[0],)) value = value[outcomes['death toll'] >= threshold].shape[0] self.assertTrue(prim_obj.t_coi==value) # test initialization, including t_coi calculation in case of searching # for results equal to or lower than the threshold threshold = 1000 prim_obj = prim.setup_prim(results, classify='death toll', threshold_type=prim.BELOW, threshold=threshold) value = np.ones((experiments.shape[0],)) value = value[outcomes['death toll'] <= threshold].shape[0] self.assertTrue(prim_obj.t_coi==value) prim.setup_prim(self.results, self.classify, threshold=prim.ABOVE)
def test_setup_prim(self): self.results = utilities.load_flu_data() self.classify = flu_classify experiments, outcomes = self.results # test initialization, including t_coi calculation in case of searching # for results equal to or higher than the threshold outcomes['death toll'] = outcomes['deceased population region 1'][:, -1] results = experiments, outcomes threshold = 10000 prim_obj = prim.setup_prim(results, classify='death toll', threshold_type=prim.ABOVE, threshold=threshold) value = np.ones((experiments.shape[0], )) value = value[outcomes['death toll'] >= threshold].shape[0] self.assertTrue(prim_obj.t_coi == value) # test initialization, including t_coi calculation in case of searching # for results equal to or lower than the threshold threshold = 1000 prim_obj = prim.setup_prim(results, classify='death toll', threshold_type=prim.BELOW, threshold=threshold) value = np.ones((experiments.shape[0], )) value = value[outcomes['death toll'] <= threshold].shape[0] self.assertTrue(prim_obj.t_coi == value) prim.setup_prim(self.results, self.classify, threshold=prim.ABOVE)
def test_categorical_peel(self): x = pd.DataFrame(list(zip(np.random.rand(10,), ['a','b','a','b','a','a','b','a','b','a', ])), columns=['a', 'b']) y = np.random.randint(0,2, (10,)) y = y.astype(np.int) y = {'y':y} results = x, y classify = 'y' prim_obj = prim.setup_prim(results, classify, threshold=0.8) box_lims = pd.DataFrame([(0, set(['a','b'])), (1, set(['a','b']))], columns=['a', 'b'] ) box = prim.PrimBox(prim_obj, box_lims, prim_obj.yi) u = 'b' x = x.select_dtypes(exclude=np.number).values j = 0 peels = prim_obj._categorical_peel(box, u, j, x) self.assertEqual(len(peels), 2) for peel in peels: pl = peel[1][u] self.assertEqual(len(pl[0]), 1) self.assertEqual(len(pl[1]), 1) a = ('a',) b = ('b',) x = pd.DataFrame(list(zip(np.random.rand(10,), [a, b, a, b, a, a, b, a, b, a])), columns=['a', 'b']) y = np.random.randint(0,2, (10,)) y = y.astype(np.int) y = {'y':y} results = x, y classify = 'y' prim_obj = prim.setup_prim(results, classify, threshold=0.8) box_lims = prim_obj.box_init box = prim.PrimBox(prim_obj, box_lims, prim_obj.yi) u = 'b' x = x.select_dtypes(exclude=np.number).values j = 0 peels = prim_obj._categorical_peel(box, u, j, x) self.assertEqual(len(peels), 2) for peel in peels: pl = peel[1][u] self.assertEqual(len(pl[0]), 1) self.assertEqual(len(pl[1]), 1)
def test_categorical_peel(self): dtype = [('a', np.float),('b', np.object)] x = np.empty((10, ), dtype=dtype) x['a'] = np.random.rand(10,) x['b'] = ['a','b','a','b','a','a','b','a','b','a', ] y = np.random.randint(0,2, (10,)) y = y.astype(np.int) y = {'y':y} results = x,y classify = 'y' prim_obj = prim.setup_prim(results, classify, threshold=0.8) box_lims = np.array([(0, set(['a','b'])), (1, set(['a','b']))], dtype=dtype ) box = prim.PrimBox(prim_obj, box_lims, prim_obj.yi) u = 'b' x = x peels = prim_obj._categorical_peel(box, u, x) self.assertEqual(len(peels), 2) for peel in peels: pl = peel[1][u] self.assertEqual(len(pl[0]), 1) self.assertEqual(len(pl[1]), 1)
def test_update(self): x = np.array([(0,1,2), (2,5,6), (3,2,1)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) y = {'y':np.array([1,1,0])} results = (x,y) prim_obj = prim.setup_prim(results, 'y', threshold=0.8) box = PrimBox(prim_obj, prim_obj.box_init, prim_obj.yi) new_box_lim = np.array([(0,1,1), (2,5,6)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) indices = np.array([0,1], dtype=np.int) box.update(new_box_lim, indices) self.assertEqual(box.peeling_trajectory['mean'][1], 1) self.assertEqual(box.peeling_trajectory['coverage'][1], 1) self.assertEqual(box.peeling_trajectory['density'][1], 1) self.assertEqual(box.peeling_trajectory['res dim'][1], 1) self.assertEqual(box.peeling_trajectory['mass'][1], 2/3)
def test_categorical_paste(self): dtype = [('a', np.float),('b', np.object)] x = np.empty((10, ), dtype=dtype) x['a'] = np.random.rand(10,) x['b'] = ['a','b','a','b','a','a','b','a','b','a', ] y = np.random.randint(0,2, (10,)) y = y.astype(np.int) y = {'y':y} results = x,y classify = 'y' prim_obj = prim.setup_prim(results, classify, threshold=0.8) box_lims = np.array([(0, set(['a',])), (1, set(['a',]))], dtype=dtype ) yi = np.where(x['b']=='a') box = prim.PrimBox(prim_obj, box_lims, yi) u = 'b' pastes = prim_obj._categorical_paste(box, u) self.assertEqual(len(pastes), 1) for paste in pastes: indices, box_lims = paste self.assertEqual(indices.shape[0], 10) self.assertEqual(box_lims[u][0], set(['a','b']))
def test_categorical_peel(self): x = pd.DataFrame(list(zip(np.random.rand(10,), ['a','b','a','b','a','a','b','a','b','a', ])), columns=['a', 'b']) y = np.random.randint(0,2, (10,)) y = y.astype(np.int) y = {'y':y} results = x, y classify = 'y' prim_obj = prim.setup_prim(results, classify, threshold=0.8) box_lims = pd.DataFrame([(0, set(['a','b'])), (1, set(['a','b']))], columns=['a', 'b'] ) box = prim.PrimBox(prim_obj, box_lims, prim_obj.yi) u = 'b' x = x.select_dtypes(exclude=np.number).values j = 0 peels = prim_obj._categorical_peel(box, u, j, x) self.assertEqual(len(peels), 2) for peel in peels: pl = peel[1][u] self.assertEqual(len(pl[0]), 1) self.assertEqual(len(pl[1]), 1)
def test_categorical_paste(self): a = np.random.rand(10,) b = ['a','b','a','b','a','a','b','a','b','a', ] x = pd.DataFrame(list(zip(a,b)), columns=['a', 'b']) x['b'] = x['b'].astype('category') y = np.random.randint(0,2, (10,)) y = y.astype(np.int) y = {'y':y} results = x,y classify = 'y' prim_obj = prim.setup_prim(results, classify, threshold=0.8) box_lims = pd.DataFrame([(0, set(['a',])), (1, set(['a',]))], columns=x.columns) yi = np.where(x.loc[:,'b']=='a') box = prim.PrimBox(prim_obj, box_lims, yi) u = 'b' pastes = prim_obj._categorical_paste(box, u, x, ['b']) self.assertEqual(len(pastes), 1) for paste in pastes: indices, box_lims = paste self.assertEqual(indices.shape[0], 10) self.assertEqual(box_lims[u][0], set(['a','b']))
def test_drop_restriction(self): x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) y = {'y': np.array([1, 1, 0])} results = (x, y) prim_obj = prim.setup_prim(results, 'y', threshold=0.8) box = PrimBox(prim_obj, prim_obj.box_init, prim_obj.yi) new_box_lim = np.array([(0, 1, 1), (2, 2, 6)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) indices = np.array([0, 1], dtype=np.int) box.update(new_box_lim, indices) box.drop_restriction('b') correct_box_lims = np.array([(0, 1, 1), (2, 5, 6)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) box_lims = box.box_lims[-1] names = recfunctions.get_names(correct_box_lims.dtype) for entry in names: lim_correct = correct_box_lims[entry] lim_box = box_lims[entry] for i in range(len(lim_correct)): self.assertEqual(lim_correct[i], lim_box[i]) self.assertEqual(box.peeling_trajectory['mean'][2], 1) self.assertEqual(box.peeling_trajectory['coverage'][2], 1) self.assertEqual(box.peeling_trajectory['density'][2], 1) self.assertEqual(box.peeling_trajectory['res dim'][2], 1) self.assertEqual(box.peeling_trajectory['mass'][2], 2 / 3)
def test_drop_restriction(self): x = pd.DataFrame([(0,1,2), (2,5,6), (3,2,1)], columns=['a', 'b', 'c']) y = {'y':np.array([1,1,0])} results = (x,y) prim_obj = prim.setup_prim(results, 'y', threshold=0.8) box = PrimBox(prim_obj, prim_obj.box_init, prim_obj.yi) new_box_lim = pd.DataFrame([(0,1,1), (2,2,6)], columns=['a', 'b', 'c']) indices = np.array([0,1], dtype=np.int) box.update(new_box_lim, indices) box.drop_restriction('b') correct_box_lims = pd.DataFrame([(0,1,1), (2,5,6)], columns=['a', 'b', 'c']) box_lims = box.box_lims[-1] names = box_lims.columns for entry in names: lim_correct = correct_box_lims[entry] lim_box = box_lims[entry] for i in range(len(lim_correct)): self.assertEqual(lim_correct[i], lim_box[i]) self.assertEqual(box.peeling_trajectory['mean'][2], 1) self.assertEqual(box.peeling_trajectory['coverage'][2], 1) self.assertEqual(box.peeling_trajectory['density'][2], 1) self.assertEqual(box.peeling_trajectory['res_dim'][2], 1) self.assertEqual(box.peeling_trajectory['mass'][2], 2/3)
def test_boxes(self): x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) y = {'y': np.array([0, 1, 2])} results = (x, y) prim_obj = prim.setup_prim(results, 'y', threshold=0.8) boxes = prim_obj.boxes self.assertEqual(len(boxes), 1, 'box length not correct') # real data test case prim_obj = prim.setup_prim(utilities.load_flu_data(), flu_classify, threshold=0.8) prim_obj.find_box() boxes = prim_obj.boxes self.assertEqual(len(boxes), 1, 'box length not correct')
def test_init(self): x = pd.DataFrame([(0, 1, 2), (2, 5, 6), (3, 2, 1)], columns=['a', 'b', 'c']) y = {'y': np.array([0, 1, 2])} results = (x, y) prim_obj = prim.setup_prim(results, 'y', threshold=0.8) box = PrimBox(prim_obj, prim_obj.box_init, prim_obj.yi) self.assertEqual(box.peeling_trajectory.shape, (1, 6))
def test_boxes(self): x = pd.DataFrame([(0,1,2), (2,5,6), (3,2,1)], columns=['a', 'b', 'c']) y = {'y':np.array([0,1,2])} results = (x,y) prim_obj = prim.setup_prim(results, 'y', threshold=0.8) boxes = prim_obj.boxes self.assertEqual(len(boxes), 1, 'box length not correct') # real data test case prim_obj = prim.setup_prim(utilities.load_flu_data(), flu_classify, threshold=0.8) prim_obj.find_box() boxes = prim_obj.boxes self.assertEqual(len(boxes), 1, 'box length not correct')
def test_init(self): x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) y = {'y': np.array([0, 1, 2])} results = (x, y) prim_obj = prim.setup_prim(results, 'y', threshold=0.8) box = PrimBox(prim_obj, prim_obj.box_init, prim_obj.yi) self.assertEqual(box.peeling_trajectory.shape, (1, 5))
def test_init(self): x = pd.DataFrame([(0,1,2), (2,5,6), (3,2,1)], columns=['a', 'b', 'c']) y = {'y':np.array([0,1,2])} results = (x,y) prim_obj = prim.setup_prim(results, 'y', threshold=0.8) box = PrimBox(prim_obj, prim_obj.box_init, prim_obj.yi) self.assertEqual(box.peeling_trajectory.shape, (1,6))
def test_boxes(self): x = np.array([(0,1,2), (2,5,6), (3,2,1)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) y = {'y':np.array([0,1,2])} results = (x,y) prim_obj = prim.setup_prim(results, 'y', threshold=0.8) boxes = prim_obj.boxes self.assertEqual(len(boxes), 1, 'box length not correct') # real data test case prim_obj = prim.setup_prim(test_utilities.load_flu_data(), flu_classify, threshold=0.8) prim_obj.find_box() boxes = prim_obj.boxes self.assertEqual(len(boxes), 1, 'box length not correct')
def test_init(self): x = np.array([(0,1,2), (2,5,6), (3,2,1)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) y = {'y':np.array([0,1,2])} results = (x,y) prim_obj = prim.setup_prim(results, 'y', threshold=0.8) box = PrimBox(prim_obj, prim_obj.box_init, prim_obj.yi) self.assertEqual(box.peeling_trajectory.shape, (1,5))
def test_inspect(self): x = np.array([(0, 1, 2), (2, 5, 6), (3, 2, 1)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) y = {'y': np.array([1, 1, 0])} results = (x, y) prim_obj = prim.setup_prim(results, 'y', threshold=0.8) box = PrimBox(prim_obj, prim_obj.box_init, prim_obj.yi) new_box_lim = np.array([(0, 1, 1), (2, 5, 6)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) indices = np.array([0, 1], dtype=np.int) box.update(new_box_lim, indices) box.inspect(1)
def test_select(self): x = pd.DataFrame([(0, 1, 2), (2, 5, 6), (3, 2, 1)], columns=['a', 'b', 'c']) y = {'y': np.array([1, 1, 0])} results = (x, y) prim_obj = prim.setup_prim(results, 'y', threshold=0.8) box = PrimBox(prim_obj, prim_obj.box_init, prim_obj.yi) new_box_lim = pd.DataFrame([(0, 1, 1), (2, 5, 6)], columns=['a', 'b', 'c']) indices = np.array([0, 1], dtype=np.int) box.update(new_box_lim, indices) box.select(0) self.assertTrue(np.all(box.yi == prim_obj.yi))
def test_find_box(self): results = utilities.load_flu_data() classify = flu_classify prim_obj = prim.setup_prim(results, classify, threshold=0.8) box_1 = prim_obj.find_box() prim_obj._update_yi_remaining() after_find = box_1.yi.shape[0] + prim_obj.yi_remaining.shape[0] self.assertEqual(after_find, prim_obj.y.shape[0]) box_2 = prim_obj.find_box() prim_obj._update_yi_remaining() after_find = box_1.yi.shape[0] +\ box_2.yi.shape[0] +\ prim_obj.yi_remaining.shape[0] self.assertEqual(after_find, prim_obj.y.shape[0])
def test_find_box(self): results = utilities.load_flu_data() classify = flu_classify prim_obj = prim.setup_prim(results, classify, threshold=0.8) box_1 = prim_obj.find_box() prim_obj._update_yi_remaining(prim_obj) after_find = box_1.yi.shape[0] + prim_obj.yi_remaining.shape[0] self.assertEqual(after_find, prim_obj.y.shape[0]) box_2 = prim_obj.find_box() prim_obj._update_yi_remaining(prim_obj) after_find = box_1.yi.shape[0] +\ box_2.yi.shape[0] +\ prim_obj.yi_remaining.shape[0] self.assertEqual(after_find, prim_obj.y.shape[0])
def test_select(self): x = pd.DataFrame([(0,1,2), (2,5,6), (3,2,1)], columns=['a', 'b', 'c']) y = {'y':np.array([1,1,0])} results = (x,y) prim_obj = prim.setup_prim(results, 'y', threshold=0.8) box = PrimBox(prim_obj, prim_obj.box_init, prim_obj.yi) new_box_lim = pd.DataFrame([(0,1,1), (2,5,6)], columns=['a', 'b', 'c']) indices = np.array([0,1], dtype=np.int) box.update(new_box_lim, indices) box.select(0) self.assertTrue(np.all(box.yi==prim_obj.yi))
def test_inspect(self): x = np.array([(0,1,2), (2,5,6), (3,2,1)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) y = {'y':np.array([1,1,0])} results = (x,y) prim_obj = prim.setup_prim(results, 'y', threshold=0.8) box = PrimBox(prim_obj, prim_obj.box_init, prim_obj.yi) new_box_lim = np.array([(0,1,1), (2,5,6)], dtype=[('a', np.float), ('b', np.float), ('c', np.float)]) indices = np.array([0,1], dtype=np.int) box.update(new_box_lim, indices) box.inspect(1)
# make an empty array of length equal to number of cases classes = np.zeros(result.shape[0]) # if deceased population is higher then 1.000.000 people, classify as 1 classes[result[:, -1] > 1000000] = 1 return classes # load data fn = r"./data/1000 flu cases no policy.tar.gz" results = load_results(fn) # perform prim on modified results tuple prim_obj = prim.setup_prim(results, classify, threshold=0.8, threshold_type=1) box_1 = prim_obj.find_box() box_1.show_ppt() box_1.show_tradeoff() box_1.inspect(5) box_1.select(5) box_1.write_ppt_to_stdout() box_1.show_pairs_scatter() # print prim to std_out print(prim_obj.stats_to_dataframe()) print(prim_obj.boxes_to_dataframe()) # visualize prim_obj.display_boxes()
# make an empty array of length equal to number of cases classes = np.zeros(result.shape[0]) # if deceased population is higher then 1.000.000 people, classify as 1 classes[result[:, -1] > 1000000] = 1 return classes # load data fn = r'./data/1000 flu cases no policy.tar.gz' results = load_results(fn) # perform prim on modified results tuple prim_obj = prim.setup_prim(results, classify, threshold=0.8, threshold_type=1) box_1 = prim_obj.find_box() box_1.show_ppt() box_1.show_tradeoff() box_1.inspect(5, style='graph', boxlim_formatter="{: .2f}") box_1.inspect(5) box_1.select(5) box_1.write_ppt_to_stdout() box_1.show_pairs_scatter() # print prim to std_out print(prim_obj.stats_to_dataframe()) print(prim_obj.boxes_to_dataframe()) # visualize
def classify(outcomes): ooi = 'throughput Rotterdam' outcome = outcomes[ooi] outcome = outcome / default_flow classes = np.zeros(outcome.shape[0]) classes[outcome < 1] = 1 return classes fn = r'./data/5000 runs WCM.tar.gz' results = load_results(fn) prim_obj = prim.setup_prim(results, classify, mass_min=0.05, threshold=0.75) # let's find a first box box1 = prim_obj.find_box() # let's analyze the peeling trajectory box1.show_ppt() box1.show_tradeoff() box1.write_ppt_to_stdout() # based on the peeling trajectory, we pick entry number 44 box1.select(44) # show the resulting box prim_obj.show_boxes()
def classify(outcomes): ooi = 'throughput Rotterdam' outcome = outcomes[ooi] outcome = outcome/default_flow classes = np.zeros(outcome.shape[0]) classes[outcome < 1] = 1 return classes fn = r'./data/5000 runs WCM.tar.gz' results = load_results(fn) prim_obj = prim.setup_prim(results, classify, mass_min=0.05, threshold=0.75) # let's find a first box box1 = prim_obj.find_box() # let's analyze the peeling trajectory box1.show_ppt() box1.show_tradeoff() box1.write_ppt_to_stdout() # based on the peeling trajectory, we pick entry number 44 box1.select(44) # show the resulting box prim_obj.show_boxes()