def __init__(self): dir_path = os.path.dirname(os.path.realpath(__file__)) self.rep_frequency = re.compile( r"([\u4e00-\u9fa5]*['时','天','日','前','后','早','晚','午','餐','饭'][\u4e00-\u9fa5]*)" ) self.rep_route = re.compile( r"([\u4e00-\u9fa5]*['入','射','服','眼','耳','外','内','疗','用']+[\u4e00-\u9fa5]*)" ) self.aca: ACA = ACA() brandname_list_cfda = list( set( pd.read_csv(dir_path + '/data/brandname.csv', header=None).iloc[:, 0])) brandname_list_addition = list( pd.read_excel(dir_path + '/data/dict.xlsx', sheetname='brandname_dict', header=None).iloc[:, 0]) brandname_list = list( set(brandname_list_cfda + brandname_list_addition)) self.aca.add_words(brandname_list) self.aca_route: ACA = ACA() route_list = list( pd.read_excel(dir_path + '/data/dict.xlsx', sheetname='route_dict', header=None).iloc[:, 0]) self.aca_route.add_words(route_list) self.aca_frequency: ACA = ACA() frequency_list = list( pd.read_excel(dir_path + '/data/dict.xlsx', sheetname='frequency_dict', header=None).iloc[:, 0]) self.aca_frequency.add_words(frequency_list)
def __init__(self, doc_path, true_path, predict_path): self.dir_path = os.path.dirname(os.path.realpath(__file__)) self.doc_path = doc_path self.true_path = true_path self.predict_path = predict_path self.label_list = ['疾病和诊断', '解剖部位', '影像检查', '实验室检验', '药物', '手术', '@'] self._jieba = jieba.Tokenizer(dictionary=None) self._jieba.set_dictionary( os.path.join(self.dir_path, 'data/our_dict1.txt')) self._jieba.initialize() self._jieba_posseg = jieba.posseg.POSTokenizer(tokenizer=self._jieba) self.aca: ACA = ACA() type_list = ['疾病和诊断', '解剖部位', '影像检查', '实验室检验', '药物', '手术'] self.term_list = [] self.term_label_dict = dict() for typee in type_list: file_i = pd.read_csv(open(os.path.join( os.path.dirname(self.dir_path), 'analysis/res/term_frequency/' + typee + '_term_frequency.csv'), encoding='utf-8-sig'), header=0) self.term_list.extend(file_i['term']) for term_i in file_i['term']: self.term_label_dict[term_i] = typee self.aca.add_words(self.term_list)
def __init__(self): dir_path = os.path.dirname(os.path.realpath(__file__)) brandtype_list = list( pd.read_excel(dir_path + '/data/dict.xlsx', sheetname='drugtype_dict', header=None).iloc[:, 0]) self.aca: ACA = ACA() self.aca.add_words(brandtype_list)
def __init__(self): dir_path = os.path.dirname(os.path.realpath(__file__)) brandname_list_addition = list( pd.read_excel(dir_path + '/data/dict.xlsx', sheetname='brandname_dict', header=None).iloc[:, 0]) brandname_list_cfda = list( set( pd.read_csv(dir_path + '/data/brandname.csv', header=None).iloc[:, 0])) brandname_list = list( set(brandname_list_cfda + brandname_list_addition)) self.aca: ACA = ACA() self.aca.add_words(brandname_list)
def __init__(self): dir_path = os.path.dirname(os.path.realpath(__file__)) route_list = list( pd.read_excel(dir_path + r'/data/dict.xlsx', sheetname='route_dict', header=None).iloc[:, 0]) self.aca: ACA = ACA() self.aca.add_words(route_list) route_df = pd.read_excel(dir_path + r'/data/dict.xlsx', sheetname='route_dict', header=None) self.route_dict = dict() for i in range(len(route_df)): self.route_dict[route_df.iloc[i, 0]] = route_df.iloc[i, 1] self.rep_route = re.compile( r"([\u4e00-\u9fa5]*['入','射','服','眼','耳','外','内','疗','用']+[\u4e00-\u9fa5]*)" )
def __init__(self): dir_path = os.path.dirname(os.path.realpath(__file__)) frequency_list = list( pd.read_excel(dir_path + r'/data/dict.xlsx', sheetname='frequency_dict', header=None).iloc[:, 0]) self.aca: ACA = ACA() self.aca.add_words(frequency_list) fre_df = pd.read_excel(dir_path + r'/data/dict.xlsx', sheetname='frequency_dict', header=None) self.fre_dict = dict() for i in range(len(fre_df)): self.fre_dict[fre_df.iloc[i, 0]] = fre_df.iloc[i, 1] self.rep_frequency = re.compile( r"([\u4e00-\u9fa5]*['时','天','日','前','后','早','晚','午','餐','饭'][\u4e00-\u9fa5]*)" )