def test_synthetic_EQW_5(self): d = EqualWidth(5, 0) p2t = {1: []} p2t[1] = [TimeStamp(-75, 1, 1, 0), TimeStamp(25, 1, 1, 0)] # min = -75, max = 25 expected_cutpoints = {1: [-55,-35,-15,5]} d.discretize_property_without_abstracting({}, {}, p2t, 1) real_cutpoints = d.bins_cutpoints res, msg = assert_almost_equality(expected_cutpoints, real_cutpoints) self.assertTrue(res, msg)
def test_synthetic_stress_EQW_2(self): d = EqualWidth(2, 0) p2t = {1:[]} p2t[1] = [TimeStamp(3, 1, 1, 0)] * STRESS_VALUE_COUNT + [TimeStamp(-75, 1, 1, 0)] + [TimeStamp(3, 1, 1, 0)] * STRESS_VALUE_COUNT + [ TimeStamp(25, 1, 1, 0)] + [TimeStamp(3, 1, 1, 0)] * STRESS_VALUE_COUNT # min = -75, max = 25 expected_cutpoints = {1:[-25]} d.discretize_property_without_abstracting({},{},p2t,1) real_cutpoints = d.bins_cutpoints res, msg = assert_almost_equality(expected_cutpoints,real_cutpoints) self.assertTrue(res,msg)
def test_write_output_c2e_no_information_loss(self): out_folder = "test_files_folder" if not exists(out_folder): mkdir(out_folder) msg = "" for p in properties: m = {} d = EqualWidth(3, 0) d.property_folder = PARTITIONS_PATH d.load_class_to_entity(m, p) write_partition_float({}, m, {}, out_folder, p) lines = [] msg = check_file_similiarity(lines, msg, out_folder, p) self.assertTrue(msg == "", msg)
def test_syntetic_cutpoint_generation(self): res = True msg = "" for c in range(2,1000): d = EqualWidth(c,0) p2t = {1: []} p2t[1] = [TimeStamp(-75, 1, 1, 0), TimeStamp(25, 1, 1, 0)] # min = -75, max = 25 interval = 100/c expected_cutpoints = {1: [-75+interval*i for i in range(1,c)]} d.discretize_property_without_abstracting({}, {}, p2t, 1) real_cutpoints = d.bins_cutpoints t_res, t_msg = assert_almost_equality(expected_cutpoints, real_cutpoints) res &= t_res msg += t_msg self.assertTrue(res, msg)
def test_cutpoints(MAX, MIN, PROPERTY_ID): res = True msg = "" m1 = {} m2 = {} m3 = {} for c in range(2, 1000): d = EqualWidth(c, 0) d.property_folder = PARTITIONS_PATH expected_cutpoints = {PROPERTY_ID: generate_cutpoints(MIN, MAX, c)} d.discretize_property_without_abstracting(m1, m2, m3, PROPERTY_ID) real_cutpoints = d.bins_cutpoints t_res, t_msg = assert_almost_equality(expected_cutpoints, real_cutpoints) res &= t_res msg += t_msg return msg, res
def test_PAA_Discretization_Difference(self): msg = "" res = True d = EqualWidth(2, 0, window_size=1) p2t = {1: []} p2t[1] = [ TimeStamp(-75, 1, 1, 0), TimeStamp(-25, 2, 2, 0), TimeStamp(1, 3, 3, 0), TimeStamp(25, 4, 4, 0) ] # min = -75, max = 25 expected_cutpoints = {1: [-25]} d.discretize_property({}, {}, p2t, 1) real_cutpoints = d.bins_cutpoints t_res, t_msg = assert_almost_equality(expected_cutpoints, real_cutpoints) msg += t_msg res &= t_res no_paa_cutpoints = real_cutpoints d = EqualWidth(2, 0, window_size=2) p2t = {1: []} p2t[1] = [ TimeStamp(-75, 1, 1, 0), TimeStamp(-25, 2, 2, 0), TimeStamp(1, 3, 3, 0), TimeStamp(25, 4, 4, 0) ] # min = -50 max = 13 expected_cutpoints = {1: [-50 + 63 / 2]} d.discretize_property({}, {}, p2t, 1) real_cutpoints = d.bins_cutpoints t_res, t_msg = assert_almost_equality(expected_cutpoints, real_cutpoints) if t_msg != "": t_msg = "\n" + t_msg msg += t_msg res &= t_res paa_cutpoints = real_cutpoints t_res, t_msg = assert_almost_equality({1: no_paa_cutpoints}, {1: paa_cutpoints}) if t_res: msg += "\nExpected different cutpoints with PAA! Got %s" % no_paa_cutpoints res = False self.assertTrue(res, msg)
_entity_element = entity_elements[i] f.write( str(_entity_element[0]) + ',' + str(_entity_element[1]) + ',' + str(_entity_element[2]) + ',' + str(_entity_element[3])) karma_output.write( str(_entity_element[0]) + ',' + str(_entity_element[1]) + ',' + str(property_to_base[_entity_element[3]] + _entity_element[2]) + ',' + str(_entity_element[3])) if i + 1 != len(entity_elements): f.write(';') karma_output.write(';') f.write('\n') karma_output.write('\n') if __name__ == '__main__': test_path = r'D:\test_stuff.txt' dataset_path = r'..\..\datasets\SAGender/SAGender.csv' m1, m2, m3 = get_maps_from_file(dataset_path, 55) d = EqualWidth(4) # _m1, _m2, _m3 = d.get_copy_of_maps(m1, m2, m3) _m1, _m2, _m3 = d.discretize(m1, m2, m3) convert_cutpoints_to_output(_m2, "D:\\", 'SAGender', d.get_discretization_name())