Ejemplo n.º 1
0
    def __init__(self):
        dir_path = os.path.dirname(os.path.realpath(__file__))
        self.rep_frequency = re.compile(
            r"([\u4e00-\u9fa5]*['时','天','日','前','后','早','晚','午','餐','饭'][\u4e00-\u9fa5]*)"
        )
        self.rep_route = re.compile(
            r"([\u4e00-\u9fa5]*['入','射','服','眼','耳','外','内','疗','用']+[\u4e00-\u9fa5]*)"
        )
        self.aca: ACA = ACA()
        brandname_list_cfda = list(
            set(
                pd.read_csv(dir_path + '/data/brandname.csv',
                            header=None).iloc[:, 0]))
        brandname_list_addition = list(
            pd.read_excel(dir_path + '/data/dict.xlsx',
                          sheetname='brandname_dict',
                          header=None).iloc[:, 0])
        brandname_list = list(
            set(brandname_list_cfda + brandname_list_addition))
        self.aca.add_words(brandname_list)

        self.aca_route: ACA = ACA()
        route_list = list(
            pd.read_excel(dir_path + '/data/dict.xlsx',
                          sheetname='route_dict',
                          header=None).iloc[:, 0])
        self.aca_route.add_words(route_list)

        self.aca_frequency: ACA = ACA()
        frequency_list = list(
            pd.read_excel(dir_path + '/data/dict.xlsx',
                          sheetname='frequency_dict',
                          header=None).iloc[:, 0])
        self.aca_frequency.add_words(frequency_list)
Ejemplo n.º 2
0
 def __init__(self, doc_path, true_path, predict_path):
     self.dir_path = os.path.dirname(os.path.realpath(__file__))
     self.doc_path = doc_path
     self.true_path = true_path
     self.predict_path = predict_path
     self.label_list = ['疾病和诊断', '解剖部位', '影像检查', '实验室检验', '药物', '手术', '@']
     self._jieba = jieba.Tokenizer(dictionary=None)
     self._jieba.set_dictionary(
         os.path.join(self.dir_path, 'data/our_dict1.txt'))
     self._jieba.initialize()
     self._jieba_posseg = jieba.posseg.POSTokenizer(tokenizer=self._jieba)
     self.aca: ACA = ACA()
     type_list = ['疾病和诊断', '解剖部位', '影像检查', '实验室检验', '药物', '手术']
     self.term_list = []
     self.term_label_dict = dict()
     for typee in type_list:
         file_i = pd.read_csv(open(os.path.join(
             os.path.dirname(self.dir_path),
             'analysis/res/term_frequency/' + typee +
             '_term_frequency.csv'),
                                   encoding='utf-8-sig'),
                              header=0)
         self.term_list.extend(file_i['term'])
         for term_i in file_i['term']:
             self.term_label_dict[term_i] = typee
     self.aca.add_words(self.term_list)
Ejemplo n.º 3
0
 def __init__(self):
     dir_path = os.path.dirname(os.path.realpath(__file__))
     brandtype_list = list(
         pd.read_excel(dir_path + '/data/dict.xlsx',
                       sheetname='drugtype_dict',
                       header=None).iloc[:, 0])
     self.aca: ACA = ACA()
     self.aca.add_words(brandtype_list)
 def __init__(self):
     dir_path = os.path.dirname(os.path.realpath(__file__))
     brandname_list_addition = list(
         pd.read_excel(dir_path + '/data/dict.xlsx',
                       sheetname='brandname_dict',
                       header=None).iloc[:, 0])
     brandname_list_cfda = list(
         set(
             pd.read_csv(dir_path + '/data/brandname.csv',
                         header=None).iloc[:, 0]))
     brandname_list = list(
         set(brandname_list_cfda + brandname_list_addition))
     self.aca: ACA = ACA()
     self.aca.add_words(brandname_list)
Ejemplo n.º 5
0
 def __init__(self):
     dir_path = os.path.dirname(os.path.realpath(__file__))
     route_list = list(
         pd.read_excel(dir_path + r'/data/dict.xlsx',
                       sheetname='route_dict',
                       header=None).iloc[:, 0])
     self.aca: ACA = ACA()
     self.aca.add_words(route_list)
     route_df = pd.read_excel(dir_path + r'/data/dict.xlsx',
                              sheetname='route_dict',
                              header=None)
     self.route_dict = dict()
     for i in range(len(route_df)):
         self.route_dict[route_df.iloc[i, 0]] = route_df.iloc[i, 1]
     self.rep_route = re.compile(
         r"([\u4e00-\u9fa5]*['入','射','服','眼','耳','外','内','疗','用']+[\u4e00-\u9fa5]*)"
     )
 def __init__(self):
     dir_path = os.path.dirname(os.path.realpath(__file__))
     frequency_list = list(
         pd.read_excel(dir_path + r'/data/dict.xlsx',
                       sheetname='frequency_dict',
                       header=None).iloc[:, 0])
     self.aca: ACA = ACA()
     self.aca.add_words(frequency_list)
     fre_df = pd.read_excel(dir_path + r'/data/dict.xlsx',
                            sheetname='frequency_dict',
                            header=None)
     self.fre_dict = dict()
     for i in range(len(fre_df)):
         self.fre_dict[fre_df.iloc[i, 0]] = fre_df.iloc[i, 1]
     self.rep_frequency = re.compile(
         r"([\u4e00-\u9fa5]*['时','天','日','前','后','早','晚','午','餐','饭'][\u4e00-\u9fa5]*)"
     )