def test_PAA_Window_Stress(self): cutpoints = [] TOTAL_POINTS = STRESS_VALUE_COUNT message = "" for WINDOW_SIZE in range(2, 10000): p2t = {1: []} expected = {1: []} for i in range(TOTAL_POINTS + 1, WINDOW_SIZE): p2t[1] += [ TimeStamp(i + x, i + x, i + x, 0) for x in range(WINDOW_SIZE) ] expected[1].append( TimeStamp(i + (WINDOW_SIZE - 1) / 2, i, i + WINDOW_SIZE - 1, 0)) d: Discretization = Expert({1: cutpoints}, -1, window_size=WINDOW_SIZE) d_p2t = d.paa_p2t(p2t) t_msg = compare_time_stamps(expected, d_p2t) if t_msg != "": message += "WINDOW SIZE: %s\n%s\n" % (WINDOW_SIZE, t_msg) self.assertTrue(message == "", message)
def test_PAA_Window_1(self): cutpoints = [] TOTAL_POINTS = 1000 p2t = {1: []} WINDOW_SIZE = 1 for i in range(TOTAL_POINTS + 1, WINDOW_SIZE): p2t[1].append(TimeStamp(i, i, i, 0)) d: Discretization = Expert({1: cutpoints}, -1, window_size=WINDOW_SIZE) d_p2t = d.paa_p2t(p2t) message = "" message += compare_time_stamps(p2t, d_p2t) self.assertTrue(message == "", message)
def test_Abstraction_No_Impact_p2t(self): p2t = { 1: [ TimeStamp(0, 1, 1, 0, 0), TimeStamp(0, 1, 1, 1, 0), TimeStamp(1, 1, 1, 2, 1), TimeStamp(1, 1, 1, 3, 1) ] } d: Discretization = Expert({1: [1.5]}, 0) d_p2e, d_c2e, d_p2t = d.discretize_property({}, {}, p2t, 1) message = "" message += compare_time_stamps(p2t, d_p2t) self.assertTrue(message != "", "Original data was changed during abstraction")
def test_Abstraction_Most_Bins(self): cutpoints = [] TOTAL_POINTS = 1000 p2t = {1: []} expected = {1: []} c2e = {} p2e = {} for i in range(1, TOTAL_POINTS + 1): p2t[1].append(TimeStamp(i, i, i, i)) expected[1].append(TimeStamp(i - 1, i, i, i)) cutpoints.append(i + 0.5) d: Discretization = Expert({1: cutpoints}, -1) d_p2e, d_c2e, d_p2t = d.discretize_property(p2e, c2e, p2t, 1) message = "" message += compare_time_stamps(expected, d_p2t) self.assertTrue(message == "", message)
def test_Abstraction_No_Impact_c2e(self): e0: Entity = Entity(0, 0, -1) e1: Entity = Entity(1, 0, -1) e2: Entity = Entity(2, 1, -1) e3: Entity = Entity(3, 1, -1) e0.properties = {1: [TimeStamp(0, 1, 1, 0, 0)]} e1.properties = {1: [TimeStamp(0, 1, 1, 1, 0)]} e2.properties = {1: [TimeStamp(1, 1, 1, 2, 1)]} e3.properties = {1: [TimeStamp(1, 1, 1, 3, 1)]} c2e = {0: {e0, e1}, 1: {e2, e3}} d: Discretization = Expert({1: [1.5]}, 0) d_p2e, d_c2e, d_p2t = d.discretize_property({}, c2e, {}, 1) message = "" message += compare_time_stamps_c2e(c2e, d_c2e) self.assertTrue(message != "", "Original data was changed during abstraction")
def test_Abstraction_Ignore_Properties(self): cutpoints = [] TOTAL_POINTS = 1000 p2t = {1: [], 2: []} c2e = {} p2e = {} for i in range(1, TOTAL_POINTS + 1): p2t[1].append(TimeStamp(i, i, i, i)) cutpoints.append(5) p2t[2].append(TimeStamp(0, 0, 0, 0)) expected = {1: p2t[1], 2: [TimeStamp(0, 0, 0, 0)]} d: Discretization = Expert({2: cutpoints}, -1) d_p2e, d_c2e, d_p2t = d.discretize_property(p2e, c2e, p2t, 2) message = "" message += compare_time_stamps(p2t, d_p2t) self.assertTrue(message == "", message)
def test_Abstraction_MaxGap_0_Bins_2(self): cutpoints = [] TOTAL_POINTS = 1000 p2t = {1: []} c2e = {} p2e = {} for i in range(1, TOTAL_POINTS + 1): p2t[1].append(TimeStamp(i, i, i, 0)) cutpoints = [500] d: Discretization = Expert({1: cutpoints}, 0) d_p2e, d_c2e, d_p2t = d.discretize_property(p2e, c2e, p2t, 1) expected = {1: []} lst = expected[1] lst.append(TimeStamp(0, 1, 499, 0)) lst.append(TimeStamp(1, 500, 1000, 0)) message = "" message += compare_time_stamps(expected, d_p2t) self.assertTrue(message == "", message)
def test_Abstraction_MaxGap_1_Bins_2_Different_Entities(self): cutpoints = [] TOTAL_POINTS = 1000 p2t = {1: []} expected = {1: []} c2e = {} p2e = {} cutpoints = [500] for i in range(1, TOTAL_POINTS + 1): if i % 2 == 0: p2t[1].append(TimeStamp(i, i, i, 0)) else: p2t[1].append(TimeStamp(i, i, i, 1)) expected[1].append(TimeStamp(0, 1, 499, 1)) expected[1].append(TimeStamp(0, 2, 498, 0)) expected[1].append(TimeStamp(1, 500, 1000, 0)) expected[1].append(TimeStamp(1, 501, 999, 1)) d: Discretization = Expert({1: cutpoints}, 1) d_p2e, d_c2e, d_p2t = d.discretize_property(p2e, c2e, p2t, 1) message = "" message += compare_time_stamps(expected, d_p2t) self.assertTrue(message == "", message)
def first_method(running_configurations, root_folder, file_id): input_path = "%s\\%s\\%s.csv" % (root_folder, file_id, file_id) partitions_path = "%s\\%s\\%s" % (root_folder, file_id, "partitions") entities_path = "%s\\%s\\%s" % (root_folder, file_id, "entities.csv") discretizable = True p2e = {} c2e = {} p2t = {} property_ids = [] class_to_entity_count = {} entity_count = 0 print("Checking partitions...") try: get_maps_from_file(input_path, entities_path, CLASS_SEPARATOR) except FileFormatNotCorrect as e: discretizable = False with open(partitions_path + "\\properties.csv") as f: in_f = csv.reader(f) for line in in_f: property_ids = [int(x) for x in list(line)] with open(partitions_path + "\\class_to_entity_count.csv") as f: in_f = csv.reader(f) for line in in_f: class_to_entity_count[int(line[0])] = int(line[1]) with open(entities_path) as f: in_f = csv.reader(f) for line in f: entity_count += 1 entity_count -= 1 print("Partitions done") discretization_methods: List[Tuple(str, str, Discretization)] = [] for running_configuration in running_configurations: method_name = running_configuration[0] args = running_configuration[1] if method_name == "EXPERT": md5 = args.split("_")[0] max_gap = args.split("_")[1] discretization_methods.append( (method_name, args, Expert( "%s\\%s\\%s\\%s" % (root_folder, file_id, method_name, md5), max_gap))) else: discretization_methods.append( (method_name, args, methods_names_to_functions[method_name](*args.split("_")))) discretization_methods = sorted(discretization_methods, key=lambda x: x[2].get_map_used()) property_count = 0 total_properties = len(property_ids) discretization_count = 0 total_configurations = len(discretization_methods) for pid in property_ids: last_map_used = "" p2e = {} c2e = {} p2t = {} for running_configuration in discretization_methods: discretization_count += 1 method_name = running_configuration[0] args = running_configuration[1] print( " Discretizing property id %s in method %s, total: %s/%s" % (pid, method_name, discretization_count, total_configurations * total_properties)) print( " ------------------------------------------------------" ) output_path_folder = "%s\\%s\\%s\\%s" % (root_folder, file_id, method_name, args) vmap_path = "%s\\%s\\%s" % (root_folder, file_id, "vmap.csv") try: if not exists(output_path_folder): makedirs(output_path_folder) if method_name == "KARMALEGO": discretization_methods.remove(running_configuration) use_karma_lego(input_path, output_path_folder, "TIRPS.csv", args) run_KL(input_path, output_path_folder, *args) continue elif discretizable: d: Discretization = running_configuration[-1] d.property_folder = "%s\\%s\\%s" % (root_folder, file_id, "partitions") if not DEBUG_MODE and ( exists(output_path_folder + "\\states.csv") or exists(output_path_folder + "\\property%s_cutpoints.temp" % pid)): #discretization_methods.remove(running_configuration) discretization_count += total_properties - 1 print( "Output files found! Canceling discretization method for this dataset... %s/%s is the new count." % (discretization_count, total_configurations * total_properties)) continue if d.get_map_used() != last_map_used: print("***CLEANING MAPS FROM MEMORY***") last_map_used = d.get_map_used() del p2e del c2e del p2t p2e = {} c2e = {} p2t = {} d1, d2, d3 = d.discretize_property(p2e, c2e, p2t, pid) write_partition(d1, d2, d3, d.bins_cutpoints[pid], output_path_folder, pid) except Exception as e: print("\n*************EXCPETION THROWN!!!!*************") exception_text = "--------------------\nDate: %s\nInput file: %s\nOutput path: %s\nMethod: %s\nArgs: %s\nError: %s\n" % ( datetime.datetime.now(), input_path, output_path_folder, method_name, args, e) print(exception_text) print("***********************************************\n") with open(output_path_folder + "\\error.log", 'w') as f: f.write(exception_text) with open(SAD_LOG_PATH, 'a') as f: f.write(exception_text) raise print("Writing output...") configuration_count = 0 for running_configuration in discretization_methods: method_name = running_configuration[0] args = running_configuration[1] configuration_count += 1 method_name = running_configuration[0] if method_name == "KARMALEGO": continue args = running_configuration[1] print("Outputting method %s, total: %s/%s" % (method_name, configuration_count, total_configurations)) output_path_folder = "%s\\%s\\%s\\%s" % (root_folder, file_id, method_name, args) vmap_path = "%s\\%s\\%s" % (root_folder, file_id, "vmap.csv") try: merge_partitions(output_path_folder, vmap_path, method_name, property_ids, list(class_to_entity_count.keys()), class_to_entity_count, entity_count) except Exception as e: print( "\n*************EXCPETION THROWN WHILE OUTPUTTING!!!!*************" ) exception_text = "--------------------\nDate: %s\nInput file: %s\nOutput path: %s\nMethod: %s\nArgs: %s\nError: %s\n" % ( datetime.datetime.now(), input_path, output_path_folder, method_name, args, e) print(exception_text) print("***********************************************\n") with open(output_path_folder + "\\error.log", 'w') as f: f.write(exception_text) with open(SAD_LOG_PATH, 'a') as f: f.write(exception_text) raise
def discretize(): global m1,m2,m3 d = Expert({44: [0.74], 3: [26], 4:[21], 5:[70]}, max_gap=1) d1,d2,d3 = d.discretize(m1,m2,m3) print_maps(d1,d2,d3)