def __init__(self, parent, mode=None, entity=None): ''' :param parent: :param mode: 添加实体 修改实体 ''' self.entity = entity super().__init__() self.top = Toplevel(parent) # top.attributes('-alpha',0.9) # fun attribute "transparent" if mode == 'add': self.top.title("添加实体") if mode == 'alter': self.top.title("修改实体") utils.setCenteredDisply(self.top, 400, 300) self.ft_1Title = tkFont.Font(family="宋体", size=11, weight=tkFont.BOLD) self.ft_2Title = tkFont.Font(family="宋体", size=9, weight=tkFont.BOLD) # l_class 从配置文件中读取 self.conf = Config() ent_dict = self.conf.getConf('ENTITY') self.l_class = [ent for ent in ent_dict.keys()] self.v_class = StringVar() self.v_class.set(self.l_class) self.status = IntVar() # 标记复选框是否选中 self.checkEntity = "certain" # 是否对实体确认 self.entity_category = '' self.ui(self.top)
def val(embed_dim, lstm_dim, optimizer, lr, gpu, gpu_no, text): if gpu == 'ON': config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 os.environ['CUDA_VISIBLE_DEVICES'] = gpu_no with open('./model/weights/config.pkl', 'rb') as input_: (word_vocab, tags) = pickle.load(input_) TAGS = {} idx = 0 for i in tags: TAGS[idx] = i idx += 1 conf = Config() ent_dict = conf.getConf('ENTITY') ent_list = [w for w in ent_dict.keys()] B_I_dict = {} for w in ent_list: B_I_dict['B-' + w.capitalize()] = 'I-' + w.capitalize() num_classes = len(tags) num_steps = utils.maxlen word_dict_size = len(word_vocab) model = bilstm_crf_model.BiLSTM_CRF_Model(num_classes, num_steps, word_dict_size, embed_dim, lstm_dim, optimizer, lr).model() predict(model, TAGS, word_vocab, text) get_label_text(B_I_dict)
def __init__(self, parent): super().__init__() self.top = Toplevel(parent) self.parent = parent self.top.title('Label&ShortcutKey Setting') utils.setCenteredDisply(self.top, 500, 400) self.row = 10 self.column = 20 # 可以通过tuple(list) 将列表转为tuple self.conf = Config() self.entdict = self.conf.getConf('ENTITY') if type(self.entdict) == str: self.entdict = {} self.ui(self.top)
def sckSet(self): d = ConfigureEntRel_Dialog(self) self.wait_window(d.top) conf = Config() self.ent_dict = conf.getConf('ENTITY') for key in self.ent_dict.keys(): self.text.bind("<Control-Alt-" + self.ent_dict[key] + ">", self.runSckAdaptor(self.runSCK, 'uncertain', key)) self.text.bind("<Control-" + self.ent_dict[key] + ">", self.runSckAdaptor(self.runSCK, 'certain', key)) self.table.bind( "<Control-Alt-" + self.ent_dict[key] + ">", self.runSckAdaptor(self.tableAlter, 'uncertain', key)) self.table.bind( "<Control-" + self.ent_dict[key] + ">", self.runSckAdaptor(self.tableAlter, 'certain', key)) #print(self.ent_dict) self.loadShortKey() # 更新快捷键界面
def load_data(): train, train_word = _parse_data(open('./model/train_all_tag.txt', 'rb')) word_counts = Counter(row[0].lower() for sample in train_word for row in sample) #字符 .lower() word_vocab = [w for w, f in iter(word_counts.items())] # 字典大小 # get the tag conf = Config() ent_dict = conf.getConf('ENTITY') ent_list = [w for w in ent_dict.keys()] tags = [] tags.append('O') for w in ent_list: tags.append('B-' + w.capitalize()) tags.append('I-' + w.capitalize()) # save initial config data 存储配置信息 with open('./model/weights/config.pkl', 'wb') as output: pickle.dump((word_vocab, tags), output) train_word, train_y = _process_data(train_word, word_vocab, tags, maxlen) return train_word, train_y, word_vocab, tags
def __init__(self, parent_frame): ''' 初始化系统参数 :param parent: frame widget "parent=window" ''' Frame.__init__(self, parent_frame) self.Version = "EPAD V1.0" self.parent_frame = parent_frame self.currentPath = os.getcwd() self.fileName = "" self.textColumn = 7 self.fntSize = 13 self.allKey = "1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" self.pressCommand = {'a': "a1", 'b': "a2", 'c': "a3"} self.textFontSytle = "Times" self.textRow = 20 self.no = 0 # 记录实体的序号,并且在treeview中记录插入位置 conf = Config() self.recommendFlag1 = "ON" # 最大前向匹配 self.recommendFlag2 = "OFF" # 深度学习 self.recoEntity = '' # 设置当前要推荐的词语 self.entity_list = [] # 实体集合 self.ent_dict = conf.getConf('ENTITY') # 获得实体类别 self.pre_ent_list = [] if type(self.ent_dict) == str: self.ent_dict = {} else: self.ent_list = [ent for ent in self.ent_dict.keys()] # 这个是类别列表 self.selectColor = 'light salmon' # 选中的颜色 self.tagColor = 'lightgreen' # 标记的颜色 self.remColor = 'pink' # 推荐的颜色 self.preColor = 'purple' # 预标注的颜色 self.onlylabel = '' self.onlyhow = '' self.initUI()
def __init__(self, parent_frame, dir_name, file_path): ''' 初始化系统参数 :param parent: frame widget "parent=window" ''' super().__init__() self.file_path = file_path # 表示要修改的标注文件 self.dir_name = dir_name # 表示要修改的标注者 self.top = Toplevel(parent_frame) utils.setCenteredDisply(self.top, 1000, 700) self.Version = "EPAD-V1.0 Modify" self.currentPath = os.getcwd() self.fileName = "" self.textColumn = 7 self.fntSize = 13 self.allKey = "1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" self.pressCommand = {'a': "a1", 'b': "a2", 'c': "a3"} self.textFontSytle = "Times" self.textRow = 20 self.no = 0 # 记录实体的序号,并且在treeview中记录插入位置 self.conf = Config() self.recommendFlag1 = "ON" # 最大前向匹配 self.recommendFlag2 = "OFF" # 深度学习 self.recoEntity = '' # 设置当前要推荐的词语 self.entity_list = [] # entity list # 实体类别 self.ent_dict = self.conf.getConf('ENTITY') # 获得实体类别 self.ent_list = [ent for ent in self.ent_dict.keys() ] # !==[{'entity':,'pos':,},{},{},{}] 这个是类别列表 self.selectColor = 'light salmon' # 选中的颜色 self.tagColor = 'lightgreen' # 标记的颜色 self.remColor = 'pink' # 推荐的颜色 self.preColor = 'purple' # 预标注的颜色 self.onlylabel = '' self.onlyhow = '' self.initUI(self.top)
import os from model.model_utils import * from configparse import Config train_new_path='./model/train_data/' # label_dict need be modified , because the entity is Chinese. conf = Config() label_dict = conf.getConf('ENTITY_DESC') # label_dict = {'身体部位':'Body','症状和体征':'Symptom','检查和检验':'Test','治疗':'Treatment','疾病和诊断':'Disease'} def BIOtag(data_path): document_file = os.listdir(data_path) tag_all_file = open('./model/train_all_tag.txt', 'w', encoding='utf-8') document_file = [file for file in document_file if 'txt'in file] for file in document_file: ori_file = open(data_path + file, 'r', encoding='UTF-8') # ori text label_file= open(data_path + file[:-3]+'ann', 'r', encoding='UTF-8') # label text ori_data2=ori_file.readline() label_list = ['O'] * len(ori_data2) entity_list = label_file.read().split('\n') for entity in entity_list: if entity is not '': ent_index = entity.split('\t') ent_s = int(ent_index[1]) ent_e = int(ent_index[2])+1 ent_type=ent_index[3] ###——————————BIOS打标———————————— # if ent_e-ent_s>1: # 多字符实体 # label_list[ent_s]='B-'+ent_type # for i in range(ent_s+1,ent_e):