def test_synthetic_EQF_5(self): d = EqualFrequency(5, 0) p2t = {1:[]} p2t[1] = [TimeStamp(-75,1,1,0),TimeStamp(25,1,1,0)] # min = -75, max = 25 expected_cutpoints = {1:[-55,-35,-15,5]} d.discretize_property_without_abstracting({},{},p2t,1) real_cutpoints = d.bins_cutpoints res, msg = assert_almost_equality(expected_cutpoints,real_cutpoints) self.assertTrue(res,msg)
def set_bin_ranges(self, property_to_entities: Dict[int, Set[Entity]], class_to_entities: Dict[int, Set[Entity]], property_to_timestamps: Dict[int, List[TimeStamp]]): equal_frequency = EqualFrequency(self.ACCURACY_MEASURE, -1) m1,m2,m3 = equal_frequency.discretize(property_to_entities, class_to_entities, property_to_timestamps) self.candidate_cutpoints = equal_frequency.bins_cutpoints cutpoints = {} for p in property_to_timestamps.keys(): cutpoints[p] = self.set_bin_ranges_for_property(m1, m2, m3, p) # cutpoints = self.parallel_cutpoint_set(m1, m2, m3) return cutpoints
def test_synthetic_EQF_Stress_Big_Request_4(self): p2t = {1: [TimeStamp(i,1,1,0) for i in range(STRESS_VALUE_COUNT)]} max_index = STRESS_VALUE_COUNT - 1 BIN_COUNT = 4 d = EqualFrequency(BIN_COUNT, 0) d.discretize_property_without_abstracting({}, {}, p2t, 1) expected_res = {1:[i*max_index/BIN_COUNT for i in range(1,BIN_COUNT)]} res = d.bins_cutpoints res, msg = assert_almost_equality(expected_res,res) self.assertTrue(res,msg)
def test_real_EQF_FAAgeGroup_F3_Property_44(self): PROPERTY_ID = 44 EXPECTED_RES = [0.71,0.84,0.95,1.1] d = EqualFrequency(5, 0) d.property_folder = PARTITIONS_PATH d.discretize_property_without_abstracting({}, {}, {}, PROPERTY_ID) real_cutpoints = d.bins_cutpoints expected_cutpoints = {PROPERTY_ID: EXPECTED_RES} res, msg = assert_almost_equality(expected_cutpoints, real_cutpoints) self.assertTrue(res,msg)
def test_synthetic_EQF_Stress_Many_Requests(self): res = True msg = "" p2t = {1: [TimeStamp(0,1,1,0),TimeStamp(1,1,1,0)]} for bin_count in range(2,10000): d = EqualFrequency(bin_count, 0) d.discretize_property_without_abstracting({},{},p2t,1) sum_real_cutpoints = sum(d.bins_cutpoints[1]) expected_sum = (bin_count-1)/2 t_res, t_msg = assert_almost_equality({1:[expected_sum]}, {1:[sum_real_cutpoints]}) res &= t_res msg += t_msg self.assertTrue(res,msg)
def set_bin_ranges_for_property(self, property_to_entities: Dict[int, Set[Entity]], class_to_entities: Dict[int, Set[Entity]], property_to_timestamps: Dict[ int, List[TimeStamp]], property_id: int) -> List[float]: property_to_entities, candidates = EqualFrequency.load_candidate_cuts( property_to_entities, class_to_entities, property_to_timestamps, property_id, 100, self.property_folder) property_to_entities = self.property_to_timestamps_to_property_to_entity( property_to_entities) self.candidate_cutpoints = candidates candidate_cutoffs: List[float] = sorted( self.candidate_cutpoints[property_id]) debug_print("%s: %s" % (property_id, candidate_cutoffs)) state_count = (len(candidate_cutoffs) + 1) A = np.zeros(shape=(state_count, state_count)) state_vector = [0] * state_count chosen_cutoffs = SortedList() chosen_cutoffs_indices = SortedList() self.load_state_information(A, property_id, property_to_entities, state_vector) cutoffs_according_to_order = [] chosen_scores = [] iterations_scores_and_cutoffs = [] for i in range(self.bin_count - 1): scores_and_cutoffs = [] max_distance = float('-inf') best_cutoff = float('-inf') best_index = float('-inf') for j in range(len(candidate_cutoffs)): cutoff = candidate_cutoffs[j] if j in chosen_cutoffs_indices: continue temp_cutoff_indices = chosen_cutoffs_indices.copy() temp_cutoff_indices.add(j) new_A = self.collapse_matrix(A, temp_cutoff_indices) distance_of_series = self.distance_measure(new_A, state_vector) scores_and_cutoffs.append((cutoff, distance_of_series)) if distance_of_series > max_distance: max_distance = distance_of_series best_cutoff = cutoff best_index = j iterations_scores_and_cutoffs.append(scores_and_cutoffs) chosen_cutoffs.add(best_cutoff) chosen_cutoffs_indices.add(best_index) cutoffs_according_to_order.append(best_cutoff) chosen_scores.append(max_distance) self.cutoffs_according_to_order.update( {property_id: cutoffs_according_to_order}) self.chosen_scores.update({property_id: chosen_scores}) return list(chosen_cutoffs)
def set_bin_ranges_for_property(self, property_to_entities: Dict[int, Set[Entity]], class_to_entities: Dict[int, Set[Entity]], property_to_timestamps: Dict[int, List[TimeStamp]], property_id: int): class_to_entities, candidates = EqualFrequency.load_candidate_cuts(property_to_entities,class_to_entities, property_to_timestamps,property_id, self.ACCURACY_MEASURE, self.property_folder) class_to_entities = self.property_to_timestamps_to_class_to_entities(class_to_entities) self.candidate_cutpoints = candidates candidate_cutoffs: List[float] = sorted(self.candidate_cutpoints[property_id]) chosen_cutoffs = SortedList() chosen_cutoffs_indices = SortedList() cutoffs_according_to_order = [] class_to_state_vector = TD4C.populate_state_vector(property_to_entities, class_to_entities, property_to_timestamps, len(candidate_cutoffs), property_id) chosen_scores = [] iterations_scores_and_cutoffs = [] debug_print("\n---------------------%s----------------------" % property_id) for i in range(self.bin_count - 1): scores_and_cutoffs = [] max_distance = float('-inf') best_cutoff = float('-inf') best_index = float('-inf') for j in range(len(candidate_cutoffs)): cutoff = candidate_cutoffs[j] if j in chosen_cutoffs_indices: continue temp_cutoff_indices = chosen_cutoffs_indices.copy() temp_cutoff_indices.add(j) probability_vector = self.calculate_probability_vector(class_to_state_vector, temp_cutoff_indices) distance_of_series = self.distance_measure(probability_vector) scores_and_cutoffs.append((cutoff, distance_of_series)) if distance_of_series > max_distance: max_distance = distance_of_series best_cutoff = cutoff best_index = j debug_print("%s: %s" % (best_cutoff, scores_and_cutoffs)) iterations_scores_and_cutoffs.append(scores_and_cutoffs) chosen_cutoffs.add(best_cutoff) chosen_cutoffs_indices.add(best_index) cutoffs_according_to_order.append(best_cutoff) chosen_scores.append(max_distance) self.cutoffs_according_to_order.update({property_id: cutoffs_according_to_order}) self.chosen_scores.update({property_id: chosen_scores}) return list(chosen_cutoffs)
def test_synthetic_EQF_array_partial_index_2(self): test_arr = [-1, 0, 1] res = EqualFrequency.get_cutpoint(2-EPSILON, test_arr) expected = 1 self.assertAlmostEqual(expected, res)
def test_synthetic_EQF_array_partial_index_1_half(self): test_arr = [-1, 0, 1] res = EqualFrequency.get_cutpoint(3/2, test_arr) expected = 1/2 self.assertAlmostEqual(expected, res)