def marge_trees(data_set_path, dir_path_class_0, dir_path_class_1, states_by_name, min_ver_support): """ This function find tirps that exist only in class 1 and insert them to the right place in the tree :param data_set_path: :param states_by_name: :param min_ver_support: :return: """ root_elements_class_1 = list() root_elements_class_0 = list() temp_list = list() # for tirps in class 1 that not have childs with open(data_set_path + dir_path_class_0 + 'root.txt', "r") as fr: #load roots from class 0 lines = fr.readlines() for line in lines: tirp_obj_class_0 = TIRP() tirp_obj_class_0.__dict__.clear() tirp_obj_class_0.__dict__.update(json.loads(line)) root_elements_class_0.append(tirp_obj_class_0) with open(data_set_path + dir_path_class_1 + 'root.txt', "r") as fr: #load roots from class 1 lines = fr.readlines() for line in lines: tirp_obj_class_1 = TIRP() tirp_obj_class_1.__dict__.clear() tirp_obj_class_1.__dict__.update(json.loads(line)) root_elements_class_1.append(tirp_obj_class_1) for root_element_class_1 in root_elements_class_1: file_name = states_by_name[root_element_class_1.get_symbols() [0]] + '.txt' if not root_element_class_1.get_exist_in_class_0( ): # if the root not exist in class 0 with open(data_set_path + dir_path_class_0 + 'root.txt', "a") as fr: r = json.dumps(root_element_class_1, default=lambda x: x.__dict__) root_element_class_1.set_class_0_properties(min_ver_support) fr.write("%s\n" % r) temp_list.append( root_element_class_1) # insert it to temp list if os.path.isfile(data_set_path + dir_path_class_1 + file_name): # if the root has childs with open(data_set_path + dir_path_class_1 + file_name, "r") as fr: tirp = fr.readlines() with open(data_set_path + dir_path_class_0 + file_name, "a") as fs: fs.writelines( tirp) # insert all the branch to the tree else: # if the root exist in class 0 for root_element_class_0 in root_elements_class_0: if root_element_class_0.get_symbols( )[0] == root_element_class_1.get_symbols( )[0]: #find the right root in class 0 root_element_class_0.set_exist_in_class_0( ) # update root elements that were found to true if len(root_element_class_0.get_childes()) == 0: if os.path.isfile(data_set_path + dir_path_class_1 + file_name): root_element_class_0.set_childes(has_childs=True) break if os.path.isfile(data_set_path + dir_path_class_1 + file_name): # if the root has childs with open(data_set_path + dir_path_class_1 + file_name, "r") as fr: #load the branch tirp_class_1 = fr.readline() tirp_dict_class_1 = json.loads(tirp_class_1) tirp_obj_class_1 = TIRP() tirp_obj_class_1.__dict__.clear() tirp_obj_class_1.__dict__.update(tirp_dict_class_1) if os.path.isfile(data_set_path + dir_path_class_0 + file_name): # if the branch exist in class 0 with open(data_set_path + dir_path_class_0 + file_name, "r") as fr: #load the branch tirp_class_0 = fr.readline() tirp_dict_class_0 = json.loads(tirp_class_0) tirp_obj_class_0 = TIRP() tirp_obj_class_0.__dict__.clear() tirp_obj_class_0.__dict__.update(tirp_dict_class_0) tirp_obj_class_0.set_exist_in_class_0() # if os.path.isfile(data_set_path + '/tempChunks1/' + file_name): # if the branch exist in class 1 tirp_obj_class_0 = insert_tirp_to_tree( tirp_obj_class_1, tirp_obj_class_0, min_ver_support) #combine the branches else: # if the branch not exist in class 0 but the root exist if root_element_class_0.get_symbols( )[0] == root_element_class_1.get_symbols()[0]: if len(root_element_class_0.get_childes( )) > 0 and type( root_element_class_0.get_childes()[0]) == bool: root_element_class_0.update_childs(list()) tirp_obj_class_0 = insert_tirp_to_tree( tirp_obj_class_1, root_element_class_0, min_ver_support) else: if os.path.isfile(data_set_path + dir_path_class_1 + file_name): tirp_obj_class_0 = tirp_obj_class_1 else: tirp_obj_class_0 = root_element_class_1 tirp_obj_class_0_json = json.dumps( tirp_obj_class_0, default=lambda x: x.__dict__) with open(data_set_path + dir_path_class_0 + file_name, "w") as fs: fs.write(tirp_obj_class_0_json) os.remove(data_set_path + dir_path_class_0 + 'root.txt') with open(data_set_path + dir_path_class_0 + 'root.txt', "a") as fr: for root_element_class_0 in root_elements_class_0: r = json.dumps(root_element_class_0, default=lambda x: x.__dict__) fr.write("%s\n" % r) for root_element_class_1 in temp_list: r = json.dumps(root_element_class_1, default=lambda x: x.__dict__) fr.write("%s\n" % r)
def find_tirp_in_class_1(path, class_0_tirp, class_1_tirp_file_name, to_add_entities): if to_add_entities: dir_path = path + '/tempChunks1_with_entities/' else: dir_path = path + '/tempChunks1/' path_to_class_1_tirp = dir_path + class_1_tirp_file_name if os.path.isfile(path_to_class_1_tirp): with open(path_to_class_1_tirp, "r") as fr: tirp_dict = json.load(fr) class_1_tirp = TIRP() class_1_tirp.__dict__.clear() class_1_tirp.__dict__.update(tirp_dict) class_0_tirp_size = class_0_tirp.get_tirp_size() class_1_tirp_size = class_1_tirp.get_tirp_size() found = True if class_0_tirp_size == 1: #root element class_1_tirp.set_exist_in_class_0() with open(path_to_class_1_tirp, "w") as fw: class_1_tirp_json = json.dumps( class_1_tirp, default=lambda x: x.__dict__) fw.write(class_1_tirp_json) root_elements_class_1 = list() with open(dir_path + 'root.txt', "r") as fr: lines = fr.readlines() for line in lines: tirp_obj_class_1 = TIRP() tirp_obj_class_1.__dict__.clear() tirp_obj_class_1.__dict__.update(json.loads(line)) root_elements_class_1.append(tirp_obj_class_1) for root_element in root_elements_class_1: if root_element.get_symbols( )[0] == class_1_tirp.get_symbols()[0]: root_element.set_exist_in_class_0() break os.remove(dir_path + 'root.txt') with open(dir_path + 'root.txt', "a") as fr: for root_element in root_elements_class_1: r = json.dumps(root_element, default=lambda x: x.__dict__) fr.write("%s\n" % r) return class_1_tirp else: father = class_1_tirp childs = class_1_tirp.get_childes() while class_1_tirp_size < class_0_tirp_size and len( childs) > 0 and found: found = False for index, child in enumerate(childs): curr_tirp = TIRP() curr_tirp.__dict__.clear() curr_tirp.__dict__.update(child) curr_size = curr_tirp.get_tirp_size() new_symbols = class_0_tirp.get_symbols()[:curr_size] num_of_new_rels = int(curr_size * (curr_size - 1) / 2) new_rels = class_0_tirp.get_rels()[:num_of_new_rels] if curr_tirp.get_symbols() == new_symbols: if curr_tirp.get_rels() == new_rels: if curr_tirp.get_tirp_size( ) == class_0_tirp.get_tirp_size(): curr_tirp.set_exist_in_class_0() childs[index] = curr_tirp father.update_childs(childs) with open(path_to_class_1_tirp, "w") as fw: class_1_tirp_json = json.dumps( class_1_tirp, default=lambda x: x.__dict__) fw.write(class_1_tirp_json) return curr_tirp else: father = curr_tirp childs = curr_tirp.get_childes() class_1_tirp_size = curr_tirp.get_tirp_size( ) found = True break return None else: if class_0_tirp.get_tirp_size() == 1: root_elements_class_1 = list() with open(dir_path + 'root.txt', "r") as fr: lines = fr.readlines() for line in lines: tirp_obj_class_1 = TIRP() tirp_obj_class_1.__dict__.clear() tirp_obj_class_1.__dict__.update(json.loads(line)) root_elements_class_1.append(tirp_obj_class_1) for root_element in root_elements_class_1: if root_element.get_symbols()[0] == class_0_tirp.get_symbols( )[0]: root_element.set_exist_in_class_0() os.remove(dir_path + 'root.txt') with open(dir_path + 'root.txt', "a") as fr: for element in root_elements_class_1: r = json.dumps(element, default=lambda x: x.__dict__) fr.write("%s\n" % r) return root_element return None else: return None
def parse_output_file(filename, rel_number, states, path, class_name, min_ver_support, class_1_tirp_file_name, to_add_entities): """ This function create TIRP list from KarmaLego output file. Output file structure: [0]TIRP_size [1]symbolNumber-symbolNumber-sym...- [2]rel.rel.rel... [3]mean_duration [4]mean_offset_from_start [5]mean_offset_from_end [6]vertical_support [7]mean_horizontal_support [8]entity_id [9][start_time-end_time][10] duration [11]offset_from_start [12]offset_from_end :param filename: :param rel_number: :return: TIRPs list """ if not input_validation(filename): return if rel_allen_seven is rel_number: relations_dict = { "<": "before", "m": "meets", "o": "overlaps", "f": "finished by", "c": "contains", "=": "equals", "s": "starts", "-": 7 } else: print("Wrong number of relations") return TIRP_list = [] lines = [line.rstrip('\n') for line in open(filename)] for i in range(0, len(lines) - 1): if i % 2 == 1: continue line_vector = lines[i].split() next_line = lines[i + 1] instances = [] entities = list() TIRP_size = int(line_vector[0]) symbols = list(filter(None, line_vector[1].split('-'))) if states: for i in range(0, len(symbols)): symbol = states[symbols[i]] symbols[i] = symbol if TIRP_size > 1: index = 0 relations = list(filter(None, line_vector[index + 2].split('.'))) for r in range(0, len(relations)): relations[r] = relations_dict[relations[r]] else: relations = list() index = -1 mean_duration = float(line_vector[index + 3]) mean_offset_from_start = float(line_vector[index + 4]) mean_offset_from_end = float(line_vector[index + 5]) vertical_support = int(line_vector[index + 6]) mean_horizontal_support = float(line_vector[index + 7]) get_supporting_instances(entities, instances, line_vector, symbols, index=index, next_line=next_line) TIRP_obj = TIRP(tirp_size=TIRP_size, symbols=symbols, relation=relations, supporting_instances=instances, supporting_entities=entities, vertical_support=vertical_support, mean_horizontal_support=mean_horizontal_support, mean_duration=mean_duration, mean_offset_from_start=mean_offset_from_start, mean_offset_from_end=mean_offset_from_end, path=path, min_vertical_support=min_ver_support) if class_name == 'class_0': class_1_tirp = find_tirp_in_class_1(path, TIRP_obj, class_1_tirp_file_name, to_add_entities) TIRP_obj.set_exist_in_class_0() if class_1_tirp: if not to_add_entities: class_1_tirp = find_tirp_in_class_1( path, TIRP_obj, class_1_tirp_file_name, True) TIRP_obj.set_class_1_properties(class_1_tirp) if not to_add_entities: TIRP_obj.set_supporting_instances(list()) TIRP_obj.set_supporting_entitie(list()) TIRP_list.append(TIRP_obj) return TIRP_list
def parse_TIRP(line1, line2, states, path, class_name, min_ver_support, class_1_tirp_file_name, second_class_output_file_name, to_add_entities): relations_dict = { "<": "before", "m": "meets", "o": "overlaps", "f": "finished by", "c": "contains", "=": "equals", "s": "starts", "-": 7 } line_vector = line1.split() instances = [] entities = list() TIRP_size = int(line_vector[0]) symbols = list(filter(None, line_vector[1].split('-'))) if states: for i in range(0, len(symbols)): symbol = states[symbols[i]] symbols[i] = symbol if TIRP_size > 1: index = 0 relations = list(filter(None, line_vector[index + 2].split('.'))) for r in range(0, len(relations)): relations[r] = relations_dict[relations[r]] else: relations = list() index = -1 mean_duration = float(line_vector[index + 3]) mean_offset_from_start = float(line_vector[index + 4]) mean_offset_from_end = float(line_vector[index + 5]) vertical_support = int(line_vector[index + 6]) mean_horizontal_support = float(line_vector[index + 7]) get_supporting_instances(entities=entities, instances=instances, line_vector=line_vector, symbols=symbols, index=index, next_line=line2) TIRP_obj = TIRP(tirp_size=TIRP_size, symbols=symbols, relation=relations, supporting_instances=instances, supporting_entities=entities, vertical_support=vertical_support, mean_horizontal_support=mean_horizontal_support, mean_duration=mean_duration, mean_offset_from_start=mean_offset_from_start, mean_offset_from_end=mean_offset_from_end, path=path, min_vertical_support=min_ver_support) if class_name == 'class_0' and second_class_output_file_name != 'File does not exist': TIRP_obj.set_exist_in_class_0() class_1_tirp = find_tirp_in_class_1(path, TIRP_obj, class_1_tirp_file_name, to_add_entities) if class_1_tirp: if not to_add_entities: class_1_tirp = find_tirp_in_class_1(path, TIRP_obj, class_1_tirp_file_name, True) TIRP_obj.set_class_1_properties(class_1_tirp) if not to_add_entities: TIRP_obj.set_supporting_instances(list()) TIRP_obj.set_supporting_entitie(list()) return TIRP_obj