コード例 #1
0
ファイル: cui.py プロジェクト: weilin510/Thesis
def extract_sldi(txt_file, output=True):
    '''
    沒有修改部份內容,使用須注意
    負責解析(mapping) txt 檔案中的 term 到 CUI
    輸入檔案為 sldi 模式
    '''
    concepts, error = mm.extract_concepts(filename=txt_file,
                                          word_sense_disambiguation=True)
    cui_list = list()
    for concept in concepts:
        '''
        Concept : 
        Index 儲存行的資訊
        semtype 儲存 semantic type 的資訊
        '''

        if True:  #concept.semtypes == "[clnd]" or concept.semtypes == "[dsyn]" :
            if output:
                print("Index:" + concept.index)

            if output:
                print("CUI:" + concept.cui, "SMT:" + concept.semtypes,
                      "POS:" + concept.pos_info,
                      "PREFER_N:" + concept.preferred_name,
                      "NEGATED:" + mmip.trigger_parser(concept.trigger),
                      "TRIGGER:" + concept.trigger)
            cui_list.append(concept.cui)
    return cui_list
コード例 #2
0
ファイル: cui.py プロジェクト: weilin510/Thesis
def debug_extract_cui(patient_id, target_date, input_path=''):
    if input_path == '':
        input_txt_path = 'Clinical_Note/' + str(patient_id) + '/output/' + str(
            target_date) + '_o.txt'
    else:
        input_txt_path = input_path
    print("-----Processing file : " + input_txt_path + "-----")
    sents, lines = read_line(input_txt_path)
    index_list = range(1, lines + 1)
    # 將剛剛讀出的行內容List, 傳給 mm.extract_concepts 取出概念
    # concepts, error = mm.extract_concepts(
    #     sents, index_list, word_sense_disambiguation=True, user_define_acronyms=False, file_format= 'sldiID', )
    concepts, error = mm.extract_concepts(
        filename=input_txt_path,
        word_sense_disambiguation=True,
        user_define_acronyms=False,
        file_format='sldiID',
    )
    number_for_everyone = 0
    # 每個 concept 都有的數值, 紀錄現在印到了哪個 index
    # 每個 index 只會印一次
    whether_print = True
    # 如果印出了 index 就不要印了 設為否
    cui_list = list()
    for concept in concepts:
        '''
        Concept : 
        Index 儲存行的資訊
        semtype 儲存 semantic type 的資訊
        '''
        if hasattr(concept, 'mm'):
            if number_for_everyone != int(concept.index[3:6]):
                whether_print = True
                number_for_everyone += 1
            if True:  #is_target_smt(concept) :
                # if True :
                if whether_print:
                    if True:
                        print("Index:" + str(int(concept.index[3:6])))
                    whether_print = False

                if True:
                    print("CUI:" + concept.cui, "SMT:" + concept.semtypes,
                          "POS:" + concept.pos_info,
                          "PREFER_N:" + concept.preferred_name,
                          "NEGATED:" + mmip.trigger_parser(concept.trigger),
                          "TRIGGER:" + concept.trigger)
                cui_list.append(concept.cui)

        elif hasattr(concept, 'ua'):
            print("----UDA----")
            print(" index :" + concept.index,
                  "short_form :" + concept.short_form,
                  "long_form :" + concept.long_form,
                  "POS :" + concept.pos_info)
            print("-----------")

    return cui_list
コード例 #3
0
ファイル: cui.py プロジェクト: weilin510/Thesis
def cui_compare(patient_id, ndays, startday, method="smt", mode="and"):
    '''
    patient_id :輸入要比較的病人ID
    method :使用的方法 semtaic type or similarity
    ndays :往前比較幾天
    startday :從第幾天開始
    mode :比較的模式 and/or

    '''
    sm_type = {
        "[clnd]": 0,
        "[dsyn]": 0,
        "[acab]": 0,
        "[anab]": 0,
        "[fndg]": 0,
        "[inpo]": 0,
        "[mobd]": 0,
        "[neop]": 0,
        "[patf]": 0,
        "[sosy]": 0,
        "[aapp]": 0,
        "[antb]": 0,
        "[bacs]": 0,
        "[chem]": 0,
        "[enzy]": 0,
        "[hops]": 0,
        "[horm]": 0,
        "[imft]": 0,
        "[inch]": 0,
        "[lbpr]": 0,
        "[medd]": 0,
        "[nnon]": 0,
        "[orch]": 0,
        "[phsu]": 0,
        "[topp]": 0,
        "[vita]": 0,
    }
    # how to access use : sm_typpe['[clnd]']
    smt_appeared_cur = list()
    smt_appeared_before_ndays = list()
    smt_appeared_all = list()

    # ----------------------------------------------------------
    date = ndays + 1
    # cnt = today, ndays = n天前
    for cnt in range(date, 0, -1):
        print("***** 這是第", cnt, "天*****")
        txt_file = str(patient_id) + '/' + str(patient_id) + '-' + str(
            cnt) + '_o.txt'
        sents, lines = read_line(txt_file)
        index_list = range(1, lines + 1)
        # 將剛剛讀出的行內容List, 傳給 mm.extract_concepts 取出概念
        concepts, error = mm.extract_concepts(sents,
                                              index_list,
                                              word_sense_disambiguation=True,
                                              derivational_variants=True,
                                              user_define_acronyms=True,
                                              file_format='sldiID')

        number_for_everyone = 0
        # 每個 concept 都有的數值, 紀錄現在印到了哪個 index
        # 每個 index 只會印一次
        whether_print = True
        # 如果印出了 index 就不要印了 設為否
        cui_list = list()
        for concept in concepts:
            '''
            Concept : 
            Index 儲存行的資訊
            semtype 儲存 semantic type 的資訊
            '''
            if hasattr(concept, 'mm'):
                if number_for_everyone != int(concept.index):
                    whether_print = True
                    number_for_everyone += 1
                if is_target_smt(concept):
                    # 如果是目標內的smt 才做以下事情
                    # if True :

                    print("Index:" + concept.index)
                    if cnt == date:
                        pass
                        # smt_appeared_all = smt_appeared_cur
                    else:
                        pass
                        # is_appeared(concept.semtypes, smt_appeared_before_ndays)
                    print("CUI:" + concept.cui, "SMT:" + concept.semtypes,
                          "POS:" + concept.pos_info,
                          "NEGATED:" + mmip.trigger_parser(concept.trigger)[5],
                          "TRIGGER:" + concept.trigger)
                    cui_list.append(concept.cui)
            elif hasattr(concept, 'ua'):
                print("----UDA----")
                print(" index :" + concept.index,
                      "short_form :" + concept.short_form,
                      "long_form :" + concept.long_form,
                      "POS :" + concept.pos_info)
                print("-----------")
        if cnt == date:
            # print("第",cnt,"天的semantic type:",smt_appeared_cur)
            sm_type = dict.fromkeys(sm_type, 0)
            print("dict rest.")

    smt_appeared_all = list(
        set(smt_appeared_cur) | set(smt_appeared_before_ndays))
    print("比較:第", date, "天 往前追朔", ndays, "天的Semantic type")
    print("全部出現的 semantic type:", smt_appeared_all)
    print("第", date, "天的semantic type:", smt_appeared_cur)
    print(ndays, "天之間出現的 semantic type:", smt_appeared_before_ndays)
    print("第", date, "天 跟前", ndays, "天相比,沒出現過的 SMT (新資訊) : ",
          list(set(smt_appeared_cur) - set(smt_appeared_before_ndays)))
    print(
        "第", date, "天 跟前", ndays, "天相比,已出現過的 SMT (舊資訊) : ",
        list(
            set(smt_appeared_cur) -
            (set(smt_appeared_cur) - set(smt_appeared_before_ndays))))
コード例 #4
0
ファイル: cui.py プロジェクト: weilin510/Thesis
def get_all_cui_list(patient_id, date, output=True):
    '''
    回傳 某病人 某幾天病例中的所有 CUI 及 部份資訊:第幾句、位置、否定詞 \n
    patient_id (str) : 指定某病人 \n
    date (int) : 指定取回CUI 病例天數範圍\n
    output (bool): 是否印出執行結果 訊息,預設為 是\n
    '''

    file_out = str(patient_id) + '/' + str(patient_id) + '_all_' + str(
        date) + '_cui.txt'
    for cnt in range(1, date + 1):
        txt_file = str(patient_id) + '/' + str(patient_id) + '-' + str(
            cnt) + '_o.txt'

        print("-----Processing file : " + txt_file + "-----")
        sents, lines = read_line(txt_file)
        index_list = range(1, lines + 1)
        # 將剛剛讀出的行內容List, 傳給 mm.extract_concepts 取出概念
        concepts, error = mm.extract_concepts(sents,
                                              index_list,
                                              word_sense_disambiguation=True,
                                              derivational_variants=True,
                                              user_define_acronyms=True,
                                              file_format='sldiID')

        number_for_everyone = 0
        # 每個 concept 都有的數值, 紀錄現在印到了哪個 index
        # 每個 index 只會印一次
        whether_print = True
        # 如果印出了 index 就不要印了 設為否
        cui_list = list()
        for concept in concepts:
            '''
            Concept : 
            Index 儲存行的資訊
            semtype 儲存 semantic type 的資訊
            '''
            if hasattr(concept, 'mm'):
                if number_for_everyone != int(concept.index):
                    whether_print = True
                    number_for_everyone += 1
                if (concept.semtypes == "[clnd]"
                        or concept.semtypes == "[dsyn]"
                        or concept.semtypes == "[acab]"
                        or concept.semtypes == "[anab]"
                        or concept.semtypes == "[fndg]"
                        or concept.semtypes == "[inpo]"
                        or concept.semtypes == "[mobd]"
                        or concept.semtypes == "[neop]"
                        or concept.semtypes == "[patf]"
                        or concept.semtypes == "[sosy]"
                        or concept.semtypes == "[aapp]"
                        or concept.semtypes == "[antb]"
                        or concept.semtypes == "[bacs]"
                        or concept.semtypes == "[chem]"
                        or concept.semtypes == "[enzy]"
                        or concept.semtypes == "[hops]"
                        or concept.semtypes == "[horm]"
                        or concept.semtypes == "[imft]"
                        or concept.semtypes == "[inch]"
                        or concept.semtypes == "[lbpr]"
                        or concept.semtypes == "[medd]"
                        or concept.semtypes == "[nnon]"
                        or concept.semtypes == "[orch]"
                        or concept.semtypes == "[phsu]"
                        or concept.semtypes == "[topp]"
                        or concept.semtypes == "[vita]"):
                    # if True :
                    if whether_print:
                        if output:
                            print("Index:" + concept.index)
                        whether_print = False

                    if output:
                        print(
                            concept.cui, "POS:" + concept.pos_info,
                            "NEGATED:" +
                            mmip.trigger_parser(concept.trigger)[5])
                    f = open(file_out, 'a')
                    f.write(concept.cui + ", " + concept.index + ", " +
                            concept.pos_info + ", " +
                            mmip.trigger_parser(concept.trigger)[5] + '\n')

                    cui_list.append(concept.cui)

            elif hasattr(concept, 'ua'):
                print("----UDA----")
                print(" index :" + concept.index,
                      "short_form :" + concept.short_form,
                      "long_form :" + concept.long_form,
                      "POS :" + concept.pos_info)
                print("-----------")
                f = open(file_out, 'a')
                f.write(concept.long_form + ", " + concept.index + ", " +
                        concept.pos_info + '\n')

    f.close()
コード例 #5
0
ファイル: cui.py プロジェクト: weilin510/Thesis
def extract_sldiID(patient_id, number_of_note, output=True, no_filterd=False):
    '''
    update !!
    負責解析(mapping) txt 檔案中的每一個句子中的每一個term 到 一個 concept
    輸入檔案為 sldiID 模式
    會將所有資訊輸出成csv 到 output 資料夾中
    patient_id : 欲做 mapping 的病人ID
    number_of_note : 這個病人有幾個病例
    此 function 會回傳mapping 到的concpet 所有資訊
    ( 如:cui, position, negation, smt_type...)
    '''
    for cnt in range(1, number_of_note + 1):
        txt_file = 'Clinical_Note/' + str(patient_id) + '/output/' + str(
            cnt) + '_o.txt'
        # 80001/80001-1_o.txt
        if no_filterd:
            # 不指定,通通進來
            output_file_path = 'Clinical_Note/' + str(
                patient_id) + '/csv_all/' + str(cnt) + '.csv'

        else:
            output_file_path = 'Clinical_Note/' + str(
                patient_id) + '/csv/' + str(cnt) + '.csv'

        csvfile = open(output_file_path, 'w', newline='')
        writer = csv.writer(csvfile)
        writer.writerow([
            'CUI', 'ROW', 'POSITION', 'NEGATION', 'TEXT_TRIGGER', 'SMT',
            'TRIGGER'
        ])
        print("-----Processing file : " + txt_file + "-----")
        # sents, lines = read_line(txt_file)
        # index_list = range(1, lines+1)
        # 將剛剛讀出的行內容List, 傳給 mm.extract_concepts 取出概念
        concepts, error = mm.extract_concepts(
            filename=txt_file,
            word_sense_disambiguation=True,
            user_define_acronyms=True,
            file_format='sldiID',
            composite_phrase=4,
            ignore_word_order=True,
            prefer_multiple_concepts=True,
            negated_setting=True,
            term_processing=False,
            allow_concept_gap=False,
        )
        number_for_everyone = 0
        # 每個 concept 都有的數值, 紀錄現在印到了哪個 index
        # 每個 index 只會印一次
        whether_print = True
        # 如果印出了 index 就不要印了 設為否
        cui_list = list()

        for concept in concepts:
            '''
            Concept : 
            Index 儲存行的資訊
            semtype 儲存 semantic type 的資訊
            '''
            if hasattr(concept, 'mm'):
                if number_for_everyone != int(concept.index[3:6]):
                    whether_print = True
                    number_for_everyone += 1
                if is_target_smt(concept) or no_filterd:
                    # if True :
                    if whether_print:
                        if output:
                            print("Index:" + str(int(concept.index[3:6])))
                        whether_print = False

                    if output:
                        print(
                            "CUI:" + concept.cui, "SMT:" + concept.semtypes,
                            "POS:" + concept.pos_info,
                            "PREFER_N:" + concept.preferred_name,
                            "NEGATED:" + mmip.trigger_parser(concept.trigger),
                            "TEXT_TRIGGER:" +
                            mmip.trigger_parser_getinfo(concept.trigger, 3),
                            "TRIGGER:" + concept.trigger)
                        writer.writerow([
                            concept.cui,
                            int(concept.index[3:6]), concept.pos_info,
                            mmip.trigger_parser(concept.trigger),
                            mmip.trigger_parser_getinfo(concept.trigger, 3),
                            concept.semtypes, concept.trigger
                        ])
                    cui_list.append(concept.cui)

            elif hasattr(concept, 'ua'):
                print("----UDA----")
                print(" Index :" + concept.index,
                      "short_form :" + concept.short_form,
                      "long_form :" + concept.long_form,
                      "POS :" + concept.pos_info)
                print("-----------")
        csvfile.close()
    print("---Process complete !---")
    return cui_list
コード例 #6
0
from pymetamap import MetaMap
from mytool import mmi_parser as mmip
mm = MetaMap.get_instance('/home/feng/public_mm/bin/metamap20')

txt_file = "clinical_txt/2.txt"

concepts, error = mm.extract_concepts(filename=txt_file,
                                      word_sense_disambiguation=True)
counter = 0
for concept in concepts:
    '''
    Concept : 
    Index 儲存行的資訊
    semtype 儲存 semantic type 的資訊
    '''

    if concept.semtypes == "[clnd]" or concept.semtypes == "[dsyn]":
        print("Index:" + concept.index)

        print("CUI:" + concept.cui, "SMT:" + concept.semtypes,
              "POS:" + concept.pos_info, "PREFER_N:" + concept.preferred_name,
              "NEGATED:" + mmip.trigger_parser(concept.trigger)[5],
              "TRIGGER:" + concept.trigger)
        counter = counter + 1
print(counter)