def get_sub_tree(TIRP, states, states_by_name, path, class_name, min_ver_support, class_1_tirp_file_name, to_add_entities): if states_by_name: TIRP_name = states_by_name[TIRP.get_symbols()[0]] + '.txt' else: TIRP_name = TIRP.get_symbols()[0] + '.txt' if class_name == 'class_0': file_name = path + '/chunks' + '/' + TIRP_name else: file_name = path + '/chunks1' + '/' + TIRP_name # ParseOutputFile.parse_states_file() TIRPs = ParseOutputFile.parse_output_file(file_name, 7, states, path, class_name, min_ver_support, class_1_tirp_file_name, to_add_entities) TIRP.set_childes(TIRPs_in_output_file=TIRPs) return TIRP
def insert_tirp_to_tree(tirp_obj_class_1, tirp_obj_class_0, min_ver_support): """ This is a recursive function that gets two branches and combines them :param tirp_obj_class_1: :param tirp_obj_class_0: :param min_ver_support: :return: combined branch """ childs = tirp_obj_class_1.get_childes().copy() for child in childs: if type(child) != TIRP: child_tirp = TIRP() child_tirp.__dict__.clear() child_tirp.__dict__.update(child) else: child_tirp = child if not child_tirp.get_exist_in_class_0(): curr_tirp = tirp_obj_class_0 childs_class_0 = tirp_obj_class_0.get_childes() while curr_tirp.get_tirp_size() + 1 < child_tirp.get_tirp_size( ): #need to go down in the tree until geting to right level to insert childes_new = list() for child_class_0 in childs_class_0: if type(child_class_0) != TIRP: curr_tirp = TIRP() curr_tirp.__dict__.clear() curr_tirp.__dict__.update(child_class_0) childes_new.append(curr_tirp) else: childes_new.append(child_class_0) childs_class_0 = childes_new for curr_tirp in childs_class_0: curr_size = curr_tirp.get_tirp_size() new_symbols = child_tirp.get_symbols()[:curr_size] num_of_new_rels = int(curr_size * (curr_size - 1) / 2) new_rels = child_tirp.get_rels()[:num_of_new_rels] if curr_tirp.get_symbols() == new_symbols: if curr_tirp.get_rels() == new_rels: # if curr_tirp.get_tirp_size()+1 < child_tirp.get_tirp_size() and curr_tirp.get_exist_in_class_0(): childs_class_0 = curr_tirp.get_childes() break # the right level to insert found if not curr_tirp.get_exist_in_class_0(): for i in range(0, len(childs_class_0)): if type(childs_class_0[i]) != TIRP: tirp = TIRP() tirp.__dict__.clear() tirp.__dict__.update(childs_class_0[i]) else: tirp = childs_class_0[i] if tirp.get_symbols() == child_tirp.get_symbols( ) and tirp.get_rels() == child_tirp.get_rels(): del childs_class_0[i] break child_tirp.set_class_0_properties(min_ver_support) childs_class_0.append(child_tirp) tirp_obj_class_0 = insert_tirp_to_tree(child_tirp, tirp_obj_class_0, min_ver_support) return tirp_obj_class_0
def marge_trees(data_set_path, dir_path_class_0, dir_path_class_1, states_by_name, min_ver_support): """ This function find tirps that exist only in class 1 and insert them to the right place in the tree :param data_set_path: :param states_by_name: :param min_ver_support: :return: """ root_elements_class_1 = list() root_elements_class_0 = list() temp_list = list() # for tirps in class 1 that not have childs with open(data_set_path + dir_path_class_0 + 'root.txt', "r") as fr: #load roots from class 0 lines = fr.readlines() for line in lines: tirp_obj_class_0 = TIRP() tirp_obj_class_0.__dict__.clear() tirp_obj_class_0.__dict__.update(json.loads(line)) root_elements_class_0.append(tirp_obj_class_0) with open(data_set_path + dir_path_class_1 + 'root.txt', "r") as fr: #load roots from class 1 lines = fr.readlines() for line in lines: tirp_obj_class_1 = TIRP() tirp_obj_class_1.__dict__.clear() tirp_obj_class_1.__dict__.update(json.loads(line)) root_elements_class_1.append(tirp_obj_class_1) for root_element_class_1 in root_elements_class_1: file_name = states_by_name[root_element_class_1.get_symbols() [0]] + '.txt' if not root_element_class_1.get_exist_in_class_0( ): # if the root not exist in class 0 with open(data_set_path + dir_path_class_0 + 'root.txt', "a") as fr: r = json.dumps(root_element_class_1, default=lambda x: x.__dict__) root_element_class_1.set_class_0_properties(min_ver_support) fr.write("%s\n" % r) temp_list.append( root_element_class_1) # insert it to temp list if os.path.isfile(data_set_path + dir_path_class_1 + file_name): # if the root has childs with open(data_set_path + dir_path_class_1 + file_name, "r") as fr: tirp = fr.readlines() with open(data_set_path + dir_path_class_0 + file_name, "a") as fs: fs.writelines( tirp) # insert all the branch to the tree else: # if the root exist in class 0 for root_element_class_0 in root_elements_class_0: if root_element_class_0.get_symbols( )[0] == root_element_class_1.get_symbols( )[0]: #find the right root in class 0 root_element_class_0.set_exist_in_class_0( ) # update root elements that were found to true if len(root_element_class_0.get_childes()) == 0: if os.path.isfile(data_set_path + dir_path_class_1 + file_name): root_element_class_0.set_childes(has_childs=True) break if os.path.isfile(data_set_path + dir_path_class_1 + file_name): # if the root has childs with open(data_set_path + dir_path_class_1 + file_name, "r") as fr: #load the branch tirp_class_1 = fr.readline() tirp_dict_class_1 = json.loads(tirp_class_1) tirp_obj_class_1 = TIRP() tirp_obj_class_1.__dict__.clear() tirp_obj_class_1.__dict__.update(tirp_dict_class_1) if os.path.isfile(data_set_path + dir_path_class_0 + file_name): # if the branch exist in class 0 with open(data_set_path + dir_path_class_0 + file_name, "r") as fr: #load the branch tirp_class_0 = fr.readline() tirp_dict_class_0 = json.loads(tirp_class_0) tirp_obj_class_0 = TIRP() tirp_obj_class_0.__dict__.clear() tirp_obj_class_0.__dict__.update(tirp_dict_class_0) tirp_obj_class_0.set_exist_in_class_0() # if os.path.isfile(data_set_path + '/tempChunks1/' + file_name): # if the branch exist in class 1 tirp_obj_class_0 = insert_tirp_to_tree( tirp_obj_class_1, tirp_obj_class_0, min_ver_support) #combine the branches else: # if the branch not exist in class 0 but the root exist if root_element_class_0.get_symbols( )[0] == root_element_class_1.get_symbols()[0]: if len(root_element_class_0.get_childes( )) > 0 and type( root_element_class_0.get_childes()[0]) == bool: root_element_class_0.update_childs(list()) tirp_obj_class_0 = insert_tirp_to_tree( tirp_obj_class_1, root_element_class_0, min_ver_support) else: if os.path.isfile(data_set_path + dir_path_class_1 + file_name): tirp_obj_class_0 = tirp_obj_class_1 else: tirp_obj_class_0 = root_element_class_1 tirp_obj_class_0_json = json.dumps( tirp_obj_class_0, default=lambda x: x.__dict__) with open(data_set_path + dir_path_class_0 + file_name, "w") as fs: fs.write(tirp_obj_class_0_json) os.remove(data_set_path + dir_path_class_0 + 'root.txt') with open(data_set_path + dir_path_class_0 + 'root.txt', "a") as fr: for root_element_class_0 in root_elements_class_0: r = json.dumps(root_element_class_0, default=lambda x: x.__dict__) fr.write("%s\n" % r) for root_element_class_1 in temp_list: r = json.dumps(root_element_class_1, default=lambda x: x.__dict__) fr.write("%s\n" % r)
def parse_main_index(path, dir_path_class_0, dir_path_class_1, states, states_by_name, class_name, min_ver_support, second_class_output_file_name, to_add_entities): TIRP_per_file.clear() root_elements = list() if class_name == 'class_0': path_to_chuncks = path + '/chunks/' else: path_to_chuncks = path + '/chunks1/' with open(path_to_chuncks + 'main_Index.txt') as fp: line1 = fp.readline() while line1: line2 = fp.readline() file_name = fp.readline().rstrip() # ParseOutputFile.parse_states_file() tirp = ParseOutputFile.parse_TIRP(line1, line2, states, path, class_name, min_ver_support, file_name, second_class_output_file_name, to_add_entities) if os.path.isfile(path_to_chuncks + file_name): tirp.set_childes(has_childs=True) tirp_with_childs = copy.deepcopy(tirp) tirp_with_childs = get_sub_tree(tirp_with_childs, states, states_by_name, path, class_name, min_ver_support, file_name, to_add_entities) tirp_with_childs_json = json.dumps( tirp_with_childs, default=lambda x: x.__dict__) if class_name == 'class_0': path_to_file = path + dir_path_class_0 + file_name else: path_to_file = path + dir_path_class_1 + file_name with open(path_to_file, "w") as fs: fs.write(tirp_with_childs_json) else: if class_name == 'class_0' and second_class_output_file_name != 'File does not exist': if not to_add_entities: # take the tirp with the entities dir_path_class_1_new = '/tempChunks1_with_entities/' else: dir_path_class_1_new = dir_path_class_1 if os.path.isfile(path + dir_path_class_1_new + file_name): with open(path + dir_path_class_1_new + file_name, "r") as fr: tirp_dict = json.load(fr) tirp_obj = TIRP() tirp_obj.__dict__.clear() tirp_obj.__dict__.update(tirp_dict) tirp.set_class_1_properties(tirp_obj) if not to_add_entities: tirp.set_supporting_instances(list()) tirp.set_supporting_entitie(list()) else: with open(path + dir_path_class_1_new + 'root.txt', "r") as fr: lines = fr.readlines() for line in lines: tirp_obj_class_1 = TIRP() tirp_obj_class_1.__dict__.clear() tirp_obj_class_1.__dict__.update( json.loads(line)) if tirp_obj_class_1.get_symbols( ) == tirp.get_symbols(): if tirp_obj_class_1.get_rels == tirp.get_rels( ): tirp.set_class_1_properties( tirp_obj_class_1) if not to_add_entities: tirp.set_supporting_instances( list()) tirp.set_supporting_entitie(list()) break s = json.dumps(tirp, default=lambda x: x.__dict__) root_elements.append(s) # root_elements.append(TIRP) TIRP_name = tirp.get_unique_name() TIRP_per_file[TIRP_name] = file_name line1 = fp.readline() return root_elements
def find_Path_of_tirps(symbols, rels, data_set_path, states, states_by_name=None, to_add_entities=None): try: relations_dict = { "<": "before", "m": "meets", "o": "overlaps", "f": "finished by", "c": "contains", "=": "equals", "s": "starts", "-": 7 } rels = list(filter(None, rels.split('.'))) tirps_path = [] symbols = list(filter(None, symbols.split('-'))) if not to_add_entities: file_name = symbols[0] + '.txt' dir_path = '/tempChunks/' for r in range(0, len(rels)): rels[r] = relations_dict[rels[r]] for i in range(0, len(symbols)): symbol = states[symbols[i]] symbols[i] = symbol else: file_name = states_by_name[symbols[0]] + '.txt' dir_path = '/tempChunks_with_entities/' tirp_size = len(symbols) if os.path.isfile(data_set_path + dir_path + file_name): with open(data_set_path + dir_path + file_name, "r") as fr: tirp_dict = json.load(fr) tirp_obj = TIRP() tirp_obj.__dict__.clear() tirp_obj.__dict__.update(tirp_dict) tirps_path.append(tirp_obj) if tirp_size > 1: childs = tirp_obj.get_childes() while len(tirps_path) < tirp_size: for child in childs: curr_tirp = TIRP() curr_tirp.__dict__.clear() curr_tirp.__dict__.update(child) curr_size = curr_tirp.get_tirp_size() new_symbols = symbols[:curr_size] num_of_new_rels = int(curr_size * (curr_size - 1) / 2) new_rels = rels[:num_of_new_rels] if curr_tirp.get_symbols() == new_symbols: if curr_tirp.get_rels() == new_rels: tirps_path.append(curr_tirp) childs = curr_tirp.get_childes() break return tirps_path else: with open(data_set_path + dir_path + 'root.txt', "r") as fr: roots_from_file = fr.readlines() for line in roots_from_file: tirp_dict = json.loads(line) tirp_obj = TIRP() tirp_obj.__dict__.clear() tirp_obj.__dict__.update(tirp_dict) if tirp_obj.get_symbols() == symbols: if tirp_obj.get_rels() == rels: return [tirp_obj] except Exception as e: print(e)
def parse_output_file(filename, rel_number, states, path, class_name, min_ver_support, class_1_tirp_file_name, to_add_entities): """ This function create TIRP list from KarmaLego output file. Output file structure: [0]TIRP_size [1]symbolNumber-symbolNumber-sym...- [2]rel.rel.rel... [3]mean_duration [4]mean_offset_from_start [5]mean_offset_from_end [6]vertical_support [7]mean_horizontal_support [8]entity_id [9][start_time-end_time][10] duration [11]offset_from_start [12]offset_from_end :param filename: :param rel_number: :return: TIRPs list """ if not input_validation(filename): return if rel_allen_seven is rel_number: relations_dict = { "<": "before", "m": "meets", "o": "overlaps", "f": "finished by", "c": "contains", "=": "equals", "s": "starts", "-": 7 } else: print("Wrong number of relations") return TIRP_list = [] lines = [line.rstrip('\n') for line in open(filename)] for i in range(0, len(lines) - 1): if i % 2 == 1: continue line_vector = lines[i].split() next_line = lines[i + 1] instances = [] entities = list() TIRP_size = int(line_vector[0]) symbols = list(filter(None, line_vector[1].split('-'))) if states: for i in range(0, len(symbols)): symbol = states[symbols[i]] symbols[i] = symbol if TIRP_size > 1: index = 0 relations = list(filter(None, line_vector[index + 2].split('.'))) for r in range(0, len(relations)): relations[r] = relations_dict[relations[r]] else: relations = list() index = -1 mean_duration = float(line_vector[index + 3]) mean_offset_from_start = float(line_vector[index + 4]) mean_offset_from_end = float(line_vector[index + 5]) vertical_support = int(line_vector[index + 6]) mean_horizontal_support = float(line_vector[index + 7]) get_supporting_instances(entities, instances, line_vector, symbols, index=index, next_line=next_line) TIRP_obj = TIRP(tirp_size=TIRP_size, symbols=symbols, relation=relations, supporting_instances=instances, supporting_entities=entities, vertical_support=vertical_support, mean_horizontal_support=mean_horizontal_support, mean_duration=mean_duration, mean_offset_from_start=mean_offset_from_start, mean_offset_from_end=mean_offset_from_end, path=path, min_vertical_support=min_ver_support) if class_name == 'class_0': class_1_tirp = find_tirp_in_class_1(path, TIRP_obj, class_1_tirp_file_name, to_add_entities) TIRP_obj.set_exist_in_class_0() if class_1_tirp: if not to_add_entities: class_1_tirp = find_tirp_in_class_1( path, TIRP_obj, class_1_tirp_file_name, True) TIRP_obj.set_class_1_properties(class_1_tirp) if not to_add_entities: TIRP_obj.set_supporting_instances(list()) TIRP_obj.set_supporting_entitie(list()) TIRP_list.append(TIRP_obj) return TIRP_list
def find_tirp_in_class_1(path, class_0_tirp, class_1_tirp_file_name, to_add_entities): if to_add_entities: dir_path = path + '/tempChunks1_with_entities/' else: dir_path = path + '/tempChunks1/' path_to_class_1_tirp = dir_path + class_1_tirp_file_name if os.path.isfile(path_to_class_1_tirp): with open(path_to_class_1_tirp, "r") as fr: tirp_dict = json.load(fr) class_1_tirp = TIRP() class_1_tirp.__dict__.clear() class_1_tirp.__dict__.update(tirp_dict) class_0_tirp_size = class_0_tirp.get_tirp_size() class_1_tirp_size = class_1_tirp.get_tirp_size() found = True if class_0_tirp_size == 1: #root element class_1_tirp.set_exist_in_class_0() with open(path_to_class_1_tirp, "w") as fw: class_1_tirp_json = json.dumps( class_1_tirp, default=lambda x: x.__dict__) fw.write(class_1_tirp_json) root_elements_class_1 = list() with open(dir_path + 'root.txt', "r") as fr: lines = fr.readlines() for line in lines: tirp_obj_class_1 = TIRP() tirp_obj_class_1.__dict__.clear() tirp_obj_class_1.__dict__.update(json.loads(line)) root_elements_class_1.append(tirp_obj_class_1) for root_element in root_elements_class_1: if root_element.get_symbols( )[0] == class_1_tirp.get_symbols()[0]: root_element.set_exist_in_class_0() break os.remove(dir_path + 'root.txt') with open(dir_path + 'root.txt', "a") as fr: for root_element in root_elements_class_1: r = json.dumps(root_element, default=lambda x: x.__dict__) fr.write("%s\n" % r) return class_1_tirp else: father = class_1_tirp childs = class_1_tirp.get_childes() while class_1_tirp_size < class_0_tirp_size and len( childs) > 0 and found: found = False for index, child in enumerate(childs): curr_tirp = TIRP() curr_tirp.__dict__.clear() curr_tirp.__dict__.update(child) curr_size = curr_tirp.get_tirp_size() new_symbols = class_0_tirp.get_symbols()[:curr_size] num_of_new_rels = int(curr_size * (curr_size - 1) / 2) new_rels = class_0_tirp.get_rels()[:num_of_new_rels] if curr_tirp.get_symbols() == new_symbols: if curr_tirp.get_rels() == new_rels: if curr_tirp.get_tirp_size( ) == class_0_tirp.get_tirp_size(): curr_tirp.set_exist_in_class_0() childs[index] = curr_tirp father.update_childs(childs) with open(path_to_class_1_tirp, "w") as fw: class_1_tirp_json = json.dumps( class_1_tirp, default=lambda x: x.__dict__) fw.write(class_1_tirp_json) return curr_tirp else: father = curr_tirp childs = curr_tirp.get_childes() class_1_tirp_size = curr_tirp.get_tirp_size( ) found = True break return None else: if class_0_tirp.get_tirp_size() == 1: root_elements_class_1 = list() with open(dir_path + 'root.txt', "r") as fr: lines = fr.readlines() for line in lines: tirp_obj_class_1 = TIRP() tirp_obj_class_1.__dict__.clear() tirp_obj_class_1.__dict__.update(json.loads(line)) root_elements_class_1.append(tirp_obj_class_1) for root_element in root_elements_class_1: if root_element.get_symbols()[0] == class_0_tirp.get_symbols( )[0]: root_element.set_exist_in_class_0() os.remove(dir_path + 'root.txt') with open(dir_path + 'root.txt', "a") as fr: for element in root_elements_class_1: r = json.dumps(element, default=lambda x: x.__dict__) fr.write("%s\n" % r) return root_element return None else: return None
def parse_TIRP(line1, line2, states, path, class_name, min_ver_support, class_1_tirp_file_name, second_class_output_file_name, to_add_entities): relations_dict = { "<": "before", "m": "meets", "o": "overlaps", "f": "finished by", "c": "contains", "=": "equals", "s": "starts", "-": 7 } line_vector = line1.split() instances = [] entities = list() TIRP_size = int(line_vector[0]) symbols = list(filter(None, line_vector[1].split('-'))) if states: for i in range(0, len(symbols)): symbol = states[symbols[i]] symbols[i] = symbol if TIRP_size > 1: index = 0 relations = list(filter(None, line_vector[index + 2].split('.'))) for r in range(0, len(relations)): relations[r] = relations_dict[relations[r]] else: relations = list() index = -1 mean_duration = float(line_vector[index + 3]) mean_offset_from_start = float(line_vector[index + 4]) mean_offset_from_end = float(line_vector[index + 5]) vertical_support = int(line_vector[index + 6]) mean_horizontal_support = float(line_vector[index + 7]) get_supporting_instances(entities=entities, instances=instances, line_vector=line_vector, symbols=symbols, index=index, next_line=line2) TIRP_obj = TIRP(tirp_size=TIRP_size, symbols=symbols, relation=relations, supporting_instances=instances, supporting_entities=entities, vertical_support=vertical_support, mean_horizontal_support=mean_horizontal_support, mean_duration=mean_duration, mean_offset_from_start=mean_offset_from_start, mean_offset_from_end=mean_offset_from_end, path=path, min_vertical_support=min_ver_support) if class_name == 'class_0' and second_class_output_file_name != 'File does not exist': TIRP_obj.set_exist_in_class_0() class_1_tirp = find_tirp_in_class_1(path, TIRP_obj, class_1_tirp_file_name, to_add_entities) if class_1_tirp: if not to_add_entities: class_1_tirp = find_tirp_in_class_1(path, TIRP_obj, class_1_tirp_file_name, True) TIRP_obj.set_class_1_properties(class_1_tirp) if not to_add_entities: TIRP_obj.set_supporting_instances(list()) TIRP_obj.set_supporting_entitie(list()) return TIRP_obj