def gain_txt(path): ''' 通过路径获取路径对应的txt中的文本内容,并转换成list :param path: txt对应路径 :return: txt对应的list列表 ''' content = normal_util.read_txt(path) content_list = [i for i in content.replace("\n", "\r\n")] return content_list
def gain_relation_contact_entity(entity_T, path): ''' 通过ann文件获取关系和实体的联系 :param path: ann路径 :return: ''' ann_contents = normal_util.read_txt(path) # entity_T = defaultdict() for ann_content in ann_contents.split("\n"): if len(ann_content) <= 1: continue if not filter_relation(ann_content.split("\t")[0]): gain_entity(entity_T, ann_content) else: gain_relation(entity_T, ann_content)
def clean_txt(path): ''' 将文件中的空格去除干净 :param path: 文件路径 ''' txts = normal_util.read_txt(path) list_txts = txts.split("\n") with open(path, "w", encoding="utf-8") as f: for index, list_txt in enumerate(list_txts): list_txt = list_txt.replace(" ", "").replace("\u3000", "") if len(list_txt) == 0: continue f.write(list_txt) if index < len(list_txts) - 1: f.write("\n")
def read_file(paths): ''' 获得路径下所有内容 :param path: 路径list :return: 内容list ''' data = normal_util.read_txt(paths) dic_value = [] for values in data.split("\n"): values = values.split("\t")[-1] if len(values) == 0: continue dic_value.append(values) return dic_value
def gain_label(path, word_count): ''' 获得label列表 :param path: ann路径 :param word_count: label对应的txt有多少个字 :return: label列表 ''' label_list = [] label_list.extend(["O"] * word_count) contents = normal_util.read_txt(path) for content in contents.split("\n"): if len(content.split("\t")) <= 1 or content.split("\t")[0].find("T") < 0: continue label_no = content.split("\t")[0] label_content = content.split("\t")[1] list = label_content.split(" ") # label_name = list[0] start_index = int(list[1]) end_index = int(list[2]) for i in range(start_index, end_index): label_list[i] = label_no return label_list
def read_txt(path): txts = normal_util.read_txt(path) list_txts = txts.split("\n") labels, contents, length = read_content(list_txts) return labels, contents, length
def copy(path, path_name): content = normal_util.read_txt(path) normal_util.write_content(content, path_name)