Ejemplo n.º 1
0
def all_dx_to_file():

    orig_stdout = sys.stdout
    newfile = open(f'R:/groups/seeley/Mack/NP report parser/NACC backlog dx sections_{date_today}.txt', 'w')
    sys.stdout = newfile

    for f in pNum_files.values():
        pNum = filepath_dict[f]

        dx_sxn = parser_functions.get_dx_sxn(f)
        all_content = parser_functions.open_file(f)

        print(f'\n --- {pNum} --- \n')

        primDx_list = parser_functions.get_PrimDx(dx_sxn)
        print(f'Number of primary diagnoses: {len(primDx_list)}')
        for counter, dx in enumerate(primDx_list, 1):
            print(f'{counter}: {dx}')

        contributingDx_list = parser_functions.get_ContributingDx(dx_sxn)
        print(f'\nNumber of contributing diagnoses: {len(contributingDx_list)}')
        for counter, dx in enumerate(contributingDx_list, 1):
            print(f'{counter}: {dx}')

        incidentalDx_list = parser_functions.get_IncidentalDx(dx_sxn)
        print(f'\nNumber of incidental diagnoses: {len(incidentalDx_list)}')
        for counter, dx in enumerate(incidentalDx_list, 1):
            print(f'{counter}: {dx}')

    sys.stdout = orig_stdout
    newfile.close()
Ejemplo n.º 2
0
def check_microinfarcts(pNum):
    dict_entry = pf.get_files([pNum])
    filename = dict_entry[pNum]
    #print(f'Filename: {filename}')
    lines = pf.open_file(filename)

    microinfarct_sxn = case_finder.get_microinfarct_section(lines)
    lines = []
    for line in microinfarct_sxn:
        if 'FOUND' in line:
            lines.append(line)

    distinct_sentences_list = []

    for line in lines:
        distinct_sentences = line.split('.')
        for sentence in distinct_sentences:
            sentence = sentence.replace('FINDINGS:', '')
            sentence = sentence.replace(r'\t', '')
            sentence = sentence.strip()
            if sentence == '':
                continue
            distinct_sentences_list.append(sentence)
    #print(f'{pNum} sentences: {distinct_sentences_list}')

    values = add_microinfarct_score(distinct_sentences_list)
    microinfarct_values = dict(zip(microinfarct_columns, values))
    #for key, value in microinfarct_values.items():
    #    print(f'{key}: {value}')
    return list(microinfarct_values.values())
Ejemplo n.º 3
0
def examine_CTE():
    file_list = parser_functions.get_files()
    for pNum, filename in file_list.items():
        lines = parser_functions.open_file(filename)
        for line in lines:
            if any(x in line for x in ('hronic traumatic', 'CTE')):
                print(f'{pNum}: {line}')
Ejemplo n.º 4
0
def examine_grossObs():

    cases_without_grossObs = []
    for pNum in pNum_files.keys():
        filename = pNum_files[pNum]
        all_content = parser_functions.open_file(filename)

        i = 0
        for line in all_content:
            line = line.upper()
            if 'GROSS OBSERVATIONS' in line:
                i += 1
        if i == 0:
            cases_without_grossObs.append(pNum)

    for pNum in cases_without_grossObs:
        print(pNum)
Ejemplo n.º 5
0
def get_all_dx():
    for f in pNum_files.values():
        pNum = filepath_dict[f]

        dx_sxn = parser_functions.get_dx_sxn(f)
        all_content = parser_functions.open_file(f)

        print(f'\n --- {pNum} --- \n')

        primDx_list = parser_functions.get_PrimDx(dx_sxn)
        print(f'Number of primary diagnoses: {len(primDx_list)}')
        for counter, dx in enumerate(primDx_list, 1):
            print(f'{counter}: {dx}')

        contributingDx_list = parser_functions.get_ContributingDx(dx_sxn)
        print(f'\nNumber of contributing diagnoses: {len(contributingDx_list)}')
        for counter, dx in enumerate(contributingDx_list, 1):
            print(f'{counter}: {dx}')

        incidentalDx_list = parser_functions.get_IncidentalDx(dx_sxn)
        print(f'\nNumber of incidental diagnoses: {len(incidentalDx_list)}')
        for counter, dx in enumerate(incidentalDx_list, 1):
            print(f'{counter}: {dx}')
Ejemplo n.º 6
0
def examine_vbi():
    pNum_list = []

    for f in pNum_files.values():
        print(f'Working on {f}...')
        pNum = filepath_dict[f]

        dx_sxn = parser_functions.get_dx_sxn(f)
        all_content = parser_functions.open_file(f)

        for line in all_content:
            line = line.upper()
            ## Note: needs to account for 'MICROINFARCTION(S), AGE, WAS/WERE FOUND'
            if 'MICROINFARCTION WAS FOUND' in line:
                pNum_list.append(pNum)
            if 'MICROINFARCT WAS FOUND' in line:
                pNum_list.append(pNum)
            if 'MICROINFARCTS WERE FOUND' in line:
                pNum_list.append(pNum)
            if 'MICROINFARCTIONS WERE FOUND' in line:
                pNum_list.append(pNum)

    pNum_list = sorted(set(pNum_list))
    return pNum_list
Ejemplo n.º 7
0
def parser_rows():
    file_dict = pf.get_files(pNums_to_check)

    # Set working fields to column names spanning entire DDS

    # Create empty dataframe with parser fields as columns
    data = []

    for pNum in file_dict.keys():
        print(f' --- {pNum} ---')
        filename = file_dict[pNum]
        all_contents = pf.open_file(filename)
        dx_sxn = pf.get_dx_sxn(filename)
        grossObs = pf.get_grossObs(filename)
        site = 'UCSF NDBB'
        author = pf.get_author(pNum)
        ADNC_dict = pf.get_ADNC(dx_sxn)
        Thal_phase = ADNC_dict['Thal Phase']
        AD_Braak = ADNC_dict['Braak Stage']
        AD_CERAD_NP = ADNC_dict['CERAD NP Score']
        AD_CERAD_DP = ADNC_dict['CERAD DP Score']
        NIAReag = ADNC_dict['NIA-Reagan']
        CAA = pf.get_CAA(dx_sxn)
        ADNC_level = ADNC_dict['ADNC level']
        LBD = pf.get_lbd_stage(dx_sxn)
        PD_Braak = pf.get_PDBraak(dx_sxn)
        ATAC = pf.get_ATAC(dx_sxn)
        #CTE = pf.get_CTE(dx_sxn)
        #HS = 'NA'
        #HS_laterality = 'NA'
        Arterio = pf.get_arterio(dx_sxn)
        Athero = pf.get_athero(grossObs)
        #TDP_proteinopathy = 'NA'
        AGD = pf.get_AGD(dx_sxn)
        HD = pf.get_huntington(dx_sxn)
        microinfarcts = check_microinfarcts(pNum)

        parser_values = [
            pNum, site, author, Thal_phase, AD_Braak, AD_CERAD_NP, AD_CERAD_DP,
            NIAReag, CAA, ADNC_level, LBD, PD_Braak, ATAC, Arterio, Athero,
            AGD, HD
        ] + microinfarcts
        data.append(parser_values)

        working_data = dict(zip(working_fields, parser_values))
        print(working_data)
        """
        primDx_list = pf.get_PrimDx(dx_sxn)
        print(f'\nNumber of primary diagnoses: {len(primDx_list)}')
        for counter, dx in enumerate(primDx_list, 1):
            print(f'{counter}: {dx}')

        contributingDx_list = pf.get_ContributingDx(dx_sxn)
        print(f'\nNumber of contributing diagnoses: {len(contributingDx_list)}')
        for counter, dx in enumerate(contributingDx_list, 1):
            print(f'{counter}: {dx}')

        incidentalDx_list = pf.get_IncidentalDx(dx_sxn)
        print(f'\nNumber of incidental diagnoses: {len(incidentalDx_list)}')
        for counter, dx in enumerate(incidentalDx_list, 1):
            print(f'{counter}: {dx}')
        """
    working_df = pd.DataFrame(data, columns=working_fields)
    #print(working_df)
    book = load_workbook(error_checking_sheet)
    with pd.ExcelWriter(error_checking_sheet, engine='openpyxl') as writer:
        writer.book = book
        working_df.to_excel(writer, 'parser values')
    return working_df
Ejemplo n.º 8
0
def examine_vbi_regions():

    all_vbi_terms = []
    distinct_sentences_list = []

    for pNum in examine_vbi():
        dict_entry = parser_functions.get_files([pNum])
        filename = dict_entry[pNum]
        print(f'Filename: {filename}')
        lines = parser_functions.open_file(filename)

        lines = get_microinfarct_section(lines)
        print(lines)

        for line in lines:
            line = line.upper()
            if any(x in line for x in microinfarct_phrases):
                print(line)
                distinct_sentences = line.split('.')
                for sentence in distinct_sentences:
                    distinct_sentences_list.append(sentence)
                    print(f'Sentence: {sentence}')
                    distinct_clauses = sentence.split('AND')
                    for clause in distinct_clauses:
                        print(f'Clause: {clause}')
                        distinct_terms = clause.split(',')
                        for term in distinct_terms:
                            print(f'Term: {term}')
                            all_vbi_terms.append(term)

    clean_vbi_terms = []
    before_vbi_terms = []
    gray_matter_terms = []
    white_matter_terms = []
    before_region_terms = []
    after_region_terms = []

    for term in all_vbi_terms:
        term = term.strip()
        if any(x in term for x in ['GRAY', 'CORTEX OF', '(CORTEX)', '(GRAY)', '(GRAY']):
            gray_matter_terms.append(term)
        if any(x in term for x in ['SUBCORTICAL', '(SUBCORTICAL', '(SUBCORTICAL)''WHITE MATTER', 'MATTER)']):
            white_matter_terms.append(term)
        if 'NO MICROINFARCTION' in term:
            continue
        elif r'FINDINGS:\t' in term:
            terms = term.split(r'FINDINGS:\t')
            if 'FOUND IN' in terms[1]:
                terms = term[1].split('FOUND IN')
                before_region_terms.append(terms[0])
                for article in ['THE ', 'A ']:
                    if article in terms[1]:
                        terms = terms[1].split(article)
                clean_vbi_terms.append(terms[1].strip())
            else:
                clean_vbi_terms.append(terms[1].strip())
        elif 'FOUND IN' in term:
            terms = term.split('FOUND IN')
            before_vbi_terms.append(terms[0])
            has_article = 0
            for article in ['THE ', 'A ']:
                if article in terms[1]:
                    has_article = 1
                    terms = terms[1].split(article)
                    clean_vbi_terms.append(term.strip())
            if has_article == 0:
                clean_vbi_terms.append(terms[1].strip())
        elif term == '':
            continue
        else:
            clean_vbi_terms.append(term.strip())
    clean_vbi_terms = set(clean_vbi_terms)
    before_vbi_terms = set(before_vbi_terms)
    gray_matter_terms = sorted(set(gray_matter_terms))
    white_matter_terms = sorted(set(white_matter_terms))
    #for term in sorted(clean_vbi_terms):
    #    print(term)
    #print(len(clean_vbi_terms))
    #for term in sorted(before_vbi_terms):
    #    print(term)
    #print(len(before_vbi_terms))
    print(f'Gray matter terms:')
    for term in gray_matter_terms:
        print(term)
    print(f'White matter terms:')
    for term in white_matter_terms:
        print(term)
    for sentence in set(distinct_sentences_list):
        print(sentence)
    print(len(set(distinct_sentences_list)))
Ejemplo n.º 9
0
def weird_vbi_to_file():
        current_time = time.strftime('%d-%m-%Y %H:%M:%S')

        orig_stdout = sys.stdout
        newfile = open(f'R:/groups/seeley/Mack/NP report parser/Abnormal VBI sxns_{date_today}.txt', 'w')
        sys.stdout = newfile

        print(f'| Cases with Abnormal VBI |\nRun from working_pNums file on {current_time}\n\n')
        vbi_list = []
        gross_infarct_list = []

        check_grossObs_list = []
        check_dx_list = []


        for f in pNum_files.values():
            pNum = filepath_dict[f]

            try:
                dx_sxn = parser_functions.get_dx_sxn(f)
                all_content = parser_functions.open_file(f)
            except UnboundLocalError:
                print(f'{pNum}: Diagnostic sxn index referenced before assignment')

            hx_sxn = {}
            try:
                gross_obs = parser_functions.get_grossObs(f)
                gross_obs = [item.upper() for item in gross_obs]
            except TypeError:
                print(f'{pNum}: No gross obs')

            for line in all_content:
                if '(H & E)' in line:
                    if 'Other significant pathology' not in line:
                        hx_sxn[line] = all_content[all_content.index(line) + 2]
                    else:
                        hx_sxn[line] = all_content[all_content.index(line) + 1]


            gross_infarcts = {}

            if gross_obs:
                for line in gross_obs:
                    if any(x in line for x in ['TERRITORIAL INFARCTS', 'LACUNAR INFARCTS']):
                        if any(x in line for x in ['ABSENT', 'NONE']):
                            continue
                        else:
                            line = line.split(':')
                            gross_infarcts[line[0]] = line[1]

            if gross_infarcts:
                vbi_list.append(pNum)
                gross_infarct_list.append(pNum)

                print(f'--- {pNum} ---\n')
                for key, value in gross_infarcts.items():
                    print(f'{key}: {value}\n')

            vbi_lineItem = ''

            for line in dx_sxn:
                if 'VASCULAR BRAIN INJURY' in line:
                    vbi_lineItem = line

            if vbi_lineItem != '':

                for key in hx_sxn:
                    if 'Microinfarcts' in key:
                        if all(x not in hx_sxn[key].upper() for x in ['SCARCE', 'MODERATE', 'SEVERE']):
                            #print(f'{key}:\n{hx_sxn[key]}\n')
                            vbi_list.append(pNum)

        vbi_list = sorted(set(vbi_list))
        gross_infarct_list = sorted(set(gross_infarct_list))

        print('Weird VBI P# List:')
        for pNum in vbi_list:
            print(f'{pNum}')
        print(len(vbi_list))

        print('Gross Infarct List:')
        for pNum in gross_infarct_list:
            print(f'{pNum}')
        print(len(gross_infarct_list))

        sys.stdout = orig_stdout
        newfile.close()
Ejemplo n.º 10
0
def vbi_dx_to_file():
    current_time = time.strftime('%d-%m-%Y %H:%M:%S')

    orig_stdout = sys.stdout
    newfile = open(f'R:/groups/seeley/Mack/NP report parser/VBI sxns_{date_today}.txt', 'w')
    sys.stdout = newfile

    print(f'| Cases with VBI |\nRun from all files {current_time}\n\n')
    vbi_list = []

    for f in pNum_files.values():
        print(f'Working on {f}...')
        pNum = filepath_dict[f]

        dx_sxn = parser_functions.get_dx_sxn(f)
        all_content = parser_functions.open_file(f)

        hx_sxn = {}

        try:
            gross_obs = parser_functions.get_grossObs(f)
            gross_obs = [item.upper() for item in gross_obs]
        except TypeError:
            print(f'No gross obs found for {pNum}')

        for line in all_content:
            if '(H & E)' in line:
                if 'Other significant pathology' not in line:
                    hx_sxn[line] = all_content[all_content.index(line) + 2]
                else:
                    hx_sxn[line] = all_content[all_content.index(line) + 1]

        gross_infarcts = {}

        if gross_obs:
            for line in gross_obs:
                if any(x in line for x in ['TERRITORIAL INFARCTS', 'LACUNAR INFARCTS']):
                    if any(x in line for x in ['ABSENT', 'NONE']):
                        continue
                    else:
                        line = line.split(':')
                        gross_infarcts[line[0]] = line[1]

        vbi_lineItem = ''

        for line in dx_sxn:
            if 'VASCULAR BRAIN INJURY' in line:
                vbi_lineItem = line
                vbi_list.append(pNum)

        if vbi_lineItem != '':
            print(f'--- {pNum} ---\n')
            for key in hx_sxn:
                if 'Microinfarcts' in key:
                    print(f'{key}:\n{hx_sxn[key]}\n')
                if 'Other significant pathology' in key:
                    print(f'{key}:\n{hx_sxn[key]}\n')

            if gross_infarcts:
                for key, value in gross_infarcts.items():
                    print(f'{key}: {value}')

    print('P# List:')
    for pNum in vbi_list:
        print(f'{pNum}')
    print(len(vbi_list))

    sys.stdout = orig_stdout
    newfile.close()