コード例 #1
0
ファイル: AddAltEnt_Dialog.py プロジェクト: cloudXia777/EPAD
 def __init__(self, parent, mode=None, entity=None):
     '''
     :param parent:
     :param mode: 添加实体 修改实体
     '''
     self.entity = entity
     super().__init__()
     self.top = Toplevel(parent)
     # top.attributes('-alpha',0.9) # fun attribute "transparent"
     if mode == 'add':
         self.top.title("添加实体")
     if mode == 'alter':
         self.top.title("修改实体")
     utils.setCenteredDisply(self.top, 400, 300)
     self.ft_1Title = tkFont.Font(family="宋体", size=11, weight=tkFont.BOLD)
     self.ft_2Title = tkFont.Font(family="宋体", size=9, weight=tkFont.BOLD)
     # l_class 从配置文件中读取
     self.conf = Config()
     ent_dict = self.conf.getConf('ENTITY')
     self.l_class = [ent for ent in ent_dict.keys()]
     self.v_class = StringVar()
     self.v_class.set(self.l_class)
     self.status = IntVar()  # 标记复选框是否选中
     self.checkEntity = "certain"  # 是否对实体确认
     self.entity_category = ''
     self.ui(self.top)
コード例 #2
0
def val(embed_dim, lstm_dim, optimizer, lr, gpu, gpu_no, text):
    if gpu == 'ON':
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # see issue #152
        os.environ['CUDA_VISIBLE_DEVICES'] = gpu_no
    with open('./model/weights/config.pkl', 'rb') as input_:
        (word_vocab, tags) = pickle.load(input_)
    TAGS = {}
    idx = 0
    for i in tags:
        TAGS[idx] = i
        idx += 1
    conf = Config()
    ent_dict = conf.getConf('ENTITY')
    ent_list = [w for w in ent_dict.keys()]
    B_I_dict = {}
    for w in ent_list:
        B_I_dict['B-' + w.capitalize()] = 'I-' + w.capitalize()
    num_classes = len(tags)
    num_steps = utils.maxlen
    word_dict_size = len(word_vocab)
    model = bilstm_crf_model.BiLSTM_CRF_Model(num_classes, num_steps,
                                              word_dict_size, embed_dim,
                                              lstm_dim, optimizer, lr).model()
    predict(model, TAGS, word_vocab, text)
    get_label_text(B_I_dict)
コード例 #3
0
ファイル: ConfigureEntRel.py プロジェクト: cloudXia777/EPAD
 def __init__(self, parent):
     super().__init__()
     self.top = Toplevel(parent)
     self.parent = parent
     self.top.title('Label&ShortcutKey Setting')
     utils.setCenteredDisply(self.top, 500, 400)
     self.row = 10
     self.column = 20
     # 可以通过tuple(list) 将列表转为tuple
     self.conf = Config()
     self.entdict = self.conf.getConf('ENTITY')
     if type(self.entdict) == str:
         self.entdict = {}
     self.ui(self.top)
コード例 #4
0
 def sckSet(self):
     d = ConfigureEntRel_Dialog(self)
     self.wait_window(d.top)
     conf = Config()
     self.ent_dict = conf.getConf('ENTITY')
     for key in self.ent_dict.keys():
         self.text.bind("<Control-Alt-" + self.ent_dict[key] + ">",
                        self.runSckAdaptor(self.runSCK, 'uncertain', key))
         self.text.bind("<Control-" + self.ent_dict[key] + ">",
                        self.runSckAdaptor(self.runSCK, 'certain', key))
         self.table.bind(
             "<Control-Alt-" + self.ent_dict[key] + ">",
             self.runSckAdaptor(self.tableAlter, 'uncertain', key))
         self.table.bind(
             "<Control-" + self.ent_dict[key] + ">",
             self.runSckAdaptor(self.tableAlter, 'certain', key))
     #print(self.ent_dict)
     self.loadShortKey()  # 更新快捷键界面
コード例 #5
0
ファイル: model_utils.py プロジェクト: cloudXia777/EPAD
def load_data():
    train, train_word = _parse_data(open('./model/train_all_tag.txt', 'rb'))
    word_counts = Counter(row[0].lower() for sample in train_word
                          for row in sample)  #字符    .lower()
    word_vocab = [w for w, f in iter(word_counts.items())]  # 字典大小
    # get the tag
    conf = Config()
    ent_dict = conf.getConf('ENTITY')
    ent_list = [w for w in ent_dict.keys()]
    tags = []
    tags.append('O')
    for w in ent_list:
        tags.append('B-' + w.capitalize())
        tags.append('I-' + w.capitalize())
    # save initial config data  存储配置信息
    with open('./model/weights/config.pkl', 'wb') as output:
        pickle.dump((word_vocab, tags), output)
    train_word, train_y = _process_data(train_word, word_vocab, tags, maxlen)
    return train_word, train_y, word_vocab, tags
コード例 #6
0
    def __init__(self, parent_frame):
        '''
        初始化系统参数
        :param parent: frame widget "parent=window"
        '''
        Frame.__init__(self, parent_frame)
        self.Version = "EPAD V1.0"
        self.parent_frame = parent_frame
        self.currentPath = os.getcwd()
        self.fileName = ""
        self.textColumn = 7
        self.fntSize = 13
        self.allKey = "1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
        self.pressCommand = {'a': "a1", 'b': "a2", 'c': "a3"}
        self.textFontSytle = "Times"
        self.textRow = 20
        self.no = 0  # 记录实体的序号,并且在treeview中记录插入位置

        conf = Config()
        self.recommendFlag1 = "ON"  # 最大前向匹配
        self.recommendFlag2 = "OFF"  # 深度学习
        self.recoEntity = ''  # 设置当前要推荐的词语
        self.entity_list = []  # 实体集合
        self.ent_dict = conf.getConf('ENTITY')  # 获得实体类别
        self.pre_ent_list = []
        if type(self.ent_dict) == str:
            self.ent_dict = {}
        else:
            self.ent_list = [ent
                             for ent in self.ent_dict.keys()]  #     这个是类别列表
        self.selectColor = 'light salmon'  # 选中的颜色
        self.tagColor = 'lightgreen'  # 标记的颜色
        self.remColor = 'pink'  # 推荐的颜色
        self.preColor = 'purple'  # 预标注的颜色
        self.onlylabel = ''
        self.onlyhow = ''
        self.initUI()
コード例 #7
0
ファイル: Modify.py プロジェクト: cloudXia777/EPAD
    def __init__(self, parent_frame, dir_name, file_path):
        '''
        初始化系统参数
        :param parent: frame widget "parent=window"
        '''
        super().__init__()
        self.file_path = file_path  # 表示要修改的标注文件
        self.dir_name = dir_name  # 表示要修改的标注者
        self.top = Toplevel(parent_frame)
        utils.setCenteredDisply(self.top, 1000, 700)
        self.Version = "EPAD-V1.0 Modify"
        self.currentPath = os.getcwd()
        self.fileName = ""
        self.textColumn = 7
        self.fntSize = 13
        self.allKey = "1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
        self.pressCommand = {'a': "a1", 'b': "a2", 'c': "a3"}
        self.textFontSytle = "Times"
        self.textRow = 20
        self.no = 0  # 记录实体的序号,并且在treeview中记录插入位置

        self.conf = Config()
        self.recommendFlag1 = "ON"  # 最大前向匹配
        self.recommendFlag2 = "OFF"  # 深度学习
        self.recoEntity = ''  # 设置当前要推荐的词语
        self.entity_list = []  # entity list # 实体类别
        self.ent_dict = self.conf.getConf('ENTITY')  # 获得实体类别
        self.ent_list = [ent for ent in self.ent_dict.keys()
                         ]  # !==[{'entity':,'pos':,},{},{},{}]    这个是类别列表
        self.selectColor = 'light salmon'  # 选中的颜色
        self.tagColor = 'lightgreen'  # 标记的颜色
        self.remColor = 'pink'  # 推荐的颜色
        self.preColor = 'purple'  # 预标注的颜色
        self.onlylabel = ''
        self.onlyhow = ''
        self.initUI(self.top)
コード例 #8
0
import os
from model.model_utils import *
from configparse import Config

train_new_path='./model/train_data/'
# label_dict need be modified , because the entity is Chinese.
conf = Config()
label_dict = conf.getConf('ENTITY_DESC')
# label_dict = {'身体部位':'Body','症状和体征':'Symptom','检查和检验':'Test','治疗':'Treatment','疾病和诊断':'Disease'}
def BIOtag(data_path):
    document_file = os.listdir(data_path)
    tag_all_file = open('./model/train_all_tag.txt', 'w', encoding='utf-8')
    document_file = [file for file in document_file if 'txt'in file]

    for file in document_file:
        ori_file = open(data_path + file, 'r', encoding='UTF-8')  # ori text
        label_file= open(data_path + file[:-3]+'ann', 'r', encoding='UTF-8')  # label text
        ori_data2=ori_file.readline()

        label_list = ['O'] * len(ori_data2)
        entity_list = label_file.read().split('\n')
        for entity in entity_list:
            if entity is not '':
                ent_index = entity.split('\t')
                ent_s = int(ent_index[1])
                ent_e = int(ent_index[2])+1
                ent_type=ent_index[3]
                ###——————————BIOS打标————————————
                # if ent_e-ent_s>1:  # 多字符实体
                #     label_list[ent_s]='B-'+ent_type
                #     for i in range(ent_s+1,ent_e):