def patients_attempt_suicide(d_icd = "data/DIAGNOSES_ICD.csv"): ''' get patients whose diagnosis codes contain any of those suicidal codes from DIAGNOSES_ICD.csv :return: ''' d_icd_path = os.path.join(mimic, d_icd) ### The following files could be extended ########## suicide_icds = suicial_icd_codes() ####read.read_from_json("data/processed/suicide_icd_9") suicide_icd_codes = [suicide_icd[1] for suicide_icd in suicide_icds] with open(d_icd_path, 'r') as mycsvfile: files = csv.reader(mycsvfile, delimiter=',') dataset_diagnosis = [] dataset_diagnosis_with_header =[] for idx, row in enumerate(files): if idx ==0: dataset_diagnosis_with_header.append(row) if row[4] in suicide_icd_codes: ### Check whether the diagnosis codes contain any of the suicidal codes ### dataset_diagnosis.append(row) dataset_diagnosis_with_header.append(row+suicide_icds[suicide_icd_codes.index(row[4])]) ##################### suicide_patient.tsv has the table header ################ read.save_in_tsv(os.path.join(cache_folder, "suicide_patient.tsv"), dataset_diagnosis_with_header) print("No. of patients that have attempted suicide: ", len(dataset_diagnosis)) return dataset_diagnosis
def process_ontology(): ontology = read.read_from_tsv("data/ontology.tsv") concept_mentions = {} for idx, [synonym, concept] in enumerate(ontology): read.add_dict(concept_mentions, concept, synonym) concepts = list(concept_mentions.keys()) synonyms = [] concept_mention_idx = {} idx = 0 for concept in concepts: concept_synonyms = list(set(concept_mentions[concept])) synonyms += concept_synonyms end = idx + len(concept_synonyms) for index in range(idx, end): concept_mention_idx[concept] = (idx, end) idx = end synonyms = [[item] for item in synonyms] read.save_in_tsv("data/ontology/ontology_synonyms.tsv", synonyms) read.save_in_json("data/ontology/ontology_concept", concepts) read.save_in_json("data/ontology/ontology_concept_synonyms_idx", concept_mention_idx)
def get_documents(): patient_admission_before, patient_admission_meanwhile = patient_admission_duration() file_notes_title_before = [] file_notes_title_meanwhile = [] with open( "data/NOTEEVENTS.csv", 'r') as mycsvfile: files = csv.reader(mycsvfile, delimiter=',') for idx,row in enumerate(files): if row[0] in patient_admission_before: if row[6] == "Discharge summary": file_notes_title_before.append(row[:-1]) read.save_in_txt_string(os.path.join(output_folder, "discharge_summaries/mental_not_suicide_patient_before/" + row[0] + "_" + row[1] + "_" + row[2] + ".txt"), row[-1]) elif row[0] in patient_admission_meanwhile: if row[6] == "Discharge summary": file_notes_title_meanwhile.append(row[:-1]) read.save_in_txt_string(os.path.join(output_folder, "discharge_summaries/mental_not_suicide__patient_during/" + row[0] + "_" + row[1] + "_" + row[2] + ".txt"), row[-1]) read.save_in_tsv(os.path.join(output_folder,"discharge_summaries/mental_not_suicide_patient_before.tsv"), file_notes_title_before) read.save_in_tsv(os.path.join(output_folder,"discharge_summaries/mental_not_suicide_patient_during.tsv"), file_notes_title_meanwhile)
def add_oov_processed(): code_cuis = read.read_from_tsv( "data/AskAPatient/codes_single_synonyms_tsv.tsv") code_cuis_dict = { line[0]: line[2] for line in code_cuis if len(line[3]) > 2 } cui_synonyms = read.read_from_json("data/AskAPatient/cui_dict") cui_st = read.read_from_json("data/AskAPatient/cui_st_dict") code_labels = read.read_from_json( "data/AskAPatient/label_texts_dict_AskAPatient") codes_synonyms_tsv = {} codes_st_tsv = [] for code in ask: code_st_tsv = [code, code_labels[code]] if code in ask: if code in code_cuis_dict: cui = code_cuis_dict[code] synonym = list(set(cui_synonyms[cui])) code_st_tsv += [ cui, " [SEP] ".join(synonym)[:100], cui_st[cui] ] else: synonym = code_labels[code] codes_synonyms_tsv[code] = synonym codes_st_tsv.append(code_st_tsv) read.save_in_json("data/AskAPatient/code_dict_complete", codes_synonyms_tsv) read.save_in_tsv("data/AskAPatient/codes_st_tsv.tsv", codes_st_tsv)
def add_oov(): code_cuis = read.read_from_tsv("data/AskAPatient/code_cuis.tsv") code_cuis_dict = { line[0]: line[1:] for line in code_cuis if len(line[:-1]) > 0 } cui_synonyms = read.read_from_json("data/AskAPatient/cui_dict") cui_st = read.read_from_json("data/AskAPatient/cui_st_dict") code_labels = read.read_from_json( "data/AskAPatient/label_texts_dict_AskAPatient") codes_synonyms_tsv = [] codes_st_tsv = [] for code in ask: code_synonyms_tsv = [code, code_labels[code]] code_st_tsv = [code, code_labels[code]] if code in ask: if code in code_cuis_dict: cuis = code_cuis_dict[code] for cui in cuis: code_synonyms_tsv += [ cui, " [SEP] ".join(cui_synonyms[cui])[:100] ] code_st_tsv += [ cui, " [SEP] ".join(cui_synonyms[cui])[:100], cui_st[cui] ] codes_synonyms_tsv.append(code_synonyms_tsv) codes_st_tsv.append(code_st_tsv) read.save_in_tsv("data/AskAPatient/codes_synonyms_tsv.tsv", codes_synonyms_tsv) read.save_in_tsv("data/AskAPatient/codes_st_tsv.tsv", codes_st_tsv)
def get_documents(): suicide_meanwhile_patient_admission = read.read_from_tsv( os.path.join( output_folder, "suicide_patient_id/suicide_meanwhile_patient_admission.tsv")) file_notes_title_meanwhile = suicide_meanwhile_patient_admission[:1] file_notes_meanwhile = suicide_meanwhile_patient_admission[1:] row_meanwhile = [row[0] for row in file_notes_meanwhile] suicide_before_patient_admission = read.read_from_tsv( os.path.join( output_folder, "suicide_patient_id/suicide_before_patient_admission.tsv")) file_notes_title_before = suicide_before_patient_admission[:1] file_notes_before = suicide_before_patient_admission[1:] row_before = [row[0] for row in file_notes_before] with open("data/NOTEEVENTS.csv", 'r') as mycsvfile: files = csv.reader(mycsvfile, delimiter=',') for row in files: if row[0] in row_before: if row[6] == "Discharge summary": file_notes_title_before.append(row[:-1]) read.save_in_txt_string( os.path.join( output_folder, "discharge_summaries/suicide_patient_before/" + row[0] + "_" + row[1] + "_" + row[2] + ".txt"), row[-1]) elif row[0] in row_meanwhile: if row[6] == "Discharge summary": file_notes_title_meanwhile.append(row[:-1]) read.save_in_txt_string( os.path.join( output_folder, "discharge_summaries/suicide_patient_during/" + row[0] + "_" + row[1] + "_" + row[2] + ".txt"), row[-1]) read.save_in_tsv( os.path.join(output_folder, "discharge_summaries/suicide_patient_before.tsv"), file_notes_title_before) read.save_in_tsv( os.path.join(output_folder, "discharge_summaries/suicide_patient_during.tsv"), file_notes_title_meanwhile)
def suicide_meanwhile_notes(file_name): target_description = [ "Nursing/other", "Nursing", "Physician", "Discharge summary", "Social Work", "General", "Nutrition", "Rehab Services", "Case Management", "Consult" ] suicide_meanwhile = read.read_from_json( os.path.join(cache_folder, "suicide_patient_id/" + file_name)) title = read.read_from_tsv( os.path.join(cache_folder, "suicide_patient_notes_all.tsv"))[:1] patient_notes_all = read.read_from_tsv( os.path.join(cache_folder, "suicide_patient_notes_all.tsv"))[1:] suicide_meanwhile_notes = title none_admission_id = [] admission_id = [] notes_all = [] notes_all_subset = [] # admission_id_new = read.read_from_json(os.path.join(cache_folder,"suicide_patient_id/admission_id")) for row in patient_notes_all: if row[1] in suicide_meanwhile: if row[0] in suicide_meanwhile[row[1]]: if row[2] == "": # print(row) none_admission_id.append(row[0]) elif row[2] not in admission_id: admission_id.append(row[2]) else: None if row[6] in target_description: notes_all_subset.append(row[0]) suicide_meanwhile_notes.append(row) notes_all.append(row[0]) print("patients: ", len(suicide_meanwhile)) print("admission with id: ", len(admission_id)) print("admission without id: ", len(none_admission_id)) print("all notes: ", len(notes_all)) # for admission_id_1 in admission_id: # if admission_id_1 not in admission_id_new: # print(admission_id_1) read.save_in_tsv( os.path.join(output_folder, "suicide_patient_id/" + file_name + ".tsv"), suicide_meanwhile_notes)
def notes_suicidal_patients(): dataset = patients_attempt_suicide() patient_s = Counter([item[1] for item in dataset]) print(patient_s) patient_ids = list(set([item[1] for item in dataset])) print(len(patient_ids)) hosp_adm_ids = list(set([item[2] for item in dataset])) print(len(hosp_adm_ids)) with open(os.path.join(mimic,"NOTEEVENTS.csv"), 'r') as mycsvfile: files = csv.reader(mycsvfile, delimiter=',') note_events_all = [] note_events_suicidal = [] for row in files: if row[1] in patient_ids: note_events_all.append(row[:-1]) if row[2] in hosp_adm_ids: note_events_suicidal.append(row[:-1]) read.save_in_tsv( os.path.join(output_folder,"note_events_suicidal.tsv"), note_events_suicidal) read.save_in_tsv( os.path.join(output_folder,"note_events_all.tsv"), note_events_all)
def read_event_info_from_noteevents(): suicide_icds = read_suicide_icd_code_from_dicddiagnoses( os.path.join(mimic, "D_ICD_DIAGNOSES.csv")) mental_icds = read_mental_disorder_icd_code_from_dicddiagnoses( os.path.join(mimic, "D_ICD_DIAGNOSES.csv")) patient_suicide = read_patient_admission_from_diagnosesicd(suicide_icds) patient_mental = read_patient_admission_from_diagnosesicd(mental_icds) # read.save_in_tsv("data/new/suicide_patient_admission_id.tsv",dataset) suicide_patient_id = list(set([item[1] for item in patient_suicide])) mental_patient_id = list(set([item[1] for item in patient_mental])) patient_mental_not_suicide = [ item for item in patient_mental if item[1] not in suicide_patient_id ] ######################### no. of patient with suicidal events ##################### print("no. of patient with suicidal events: ", len(suicide_patient_id)) ######################### no. of patient with mental disorders ##################### print("no. of patient with mental disorders: ", len(mental_patient_id)) ######### no. of patient with mental disorders attempted suicide #################### dataset_mental_suicide = [ item for item in mental_patient_id if item in suicide_patient_id ] print("no. of patient with mental disorders attempted suicide: ", len(dataset_mental_suicide)) ######### no. of patient with mental disorders do not attempte suicide #################### dataset_mental_not_suicide = [ item for item in mental_patient_id if item not in suicide_patient_id ] print("no. of patient with mental disorders do not attempte suicide: ", len(dataset_mental_not_suicide)) ######### no. of patient with suicidal events with mental disorder #################### dataset_suicide_mental = [ item for item in suicide_patient_id if item in mental_patient_id ] print("no. of patient with suicidal events with mental disorder: ", len(dataset_suicide_mental)) ######### no. of patient with suicidal events but do not have mental disorder #################### dataset_suicide_not_mental = [ item for item in suicide_patient_id if item not in mental_patient_id ] print( "no. of patient with suicidal events but do not have mental disorder: ", len(dataset_suicide_not_mental)) hadm = list(set([item[2] for item in patient_mental_not_suicide])) patient_mental_not_suicide_id = list( set([item[1] for item in patient_mental_not_suicide])) mental_not_suicide_patient_admission = [] mental_not_suicida_patient = [] with open(os.path.join(mimic, "NOTEEVENTS.csv"), 'r') as mycsvfile: files = csv.reader(mycsvfile, delimiter=',') for row in files: if row[1] in patient_mental_not_suicide_id: mental_not_suicida_patient.append(row[:-1]) if row[2] in hadm: mental_not_suicide_patient_admission.append(row[:-1]) read.save_in_tsv( os.path.join(output_folder, "mental_patient_admission_notes.tsv"), mental_not_suicide_patient_admission) read.save_in_tsv( os.path.join(output_folder, "mental_patient_all_notes.tsv"), mental_not_suicida_patient)
if code not in code_cuis: code_cuis[code] = [cui] else: code_cuis[code] += [cui] print(len(code_cuis)) for code, cuis in code_cuis.items(): if len(cuis) >= 2: print(code, cuis) codes = read.read_from_json("data/AskAPatient/codes") print(len(codes)) tsv_lines = [] for code in ask: line = [code] if code in code_cuis: line += code_cuis[code] tsv_lines.append(line) read.save_in_tsv("data/AskAPatient/code_cuis.tsv", tsv_lines) # tsv_lines = [] # cuis = list(set(read.read_from_json("data/TwADR-L/cuis"))) # for code in twa: # line = [code] # if code in cuis: # line +=[code] # tsv_lines.append(line) # read.save_in_tsv("data/TwADR-L/cui_cuis.tsv",tsv_lines)