Beispiel #1
0
def get_diagnosis_events_distress(patients: PatientDB, row, date_str,
                                  patient_id):
    concept_text_distress = ["Emotional distress", "emotional distress"]

    PT_text_distress = ["Emotional distress"]
    PT_text = row.PT_text
    concept_text = row.concept_text

    # Check for distress
    distress_diagnosis_event_found = False
    diagnosis_long_name = ""
    if PT_text in PT_text_distress:
        distress_diagnosis_event_found = True
        diagnosis_long_name = PT_text
    if concept_text in concept_text_distress:
        distress_diagnosis_event_found = True
        diagnosis_long_name = concept_text

    # If we find a distress diagnosis event, create it
    if distress_diagnosis_event_found:
        distress_diagnosis_event = Event(chartdate=date_str,
                                         visit_id=date_str,
                                         patient_id=patient_id)
        distress_diagnosis_event.diagnosis_role(
            diagnosis_name="Distress", diagnosis_long_name=diagnosis_long_name)
        distress_diagnosis_event.add_meddra_roles(row)
        patients.add_event(distress_diagnosis_event)
    return distress_diagnosis_event_found
Beispiel #2
0
def get_diagnosis_events_depression(patients: PatientDB, row, date_str,
                                    patient_id):
    concept_text_depression = [
        "Anxious depression",
        "Bipolar depression",
        "Chronic depression",
        "Major depression",
        "Post stroke depression",
        "Postpartum depression",
        "Reactive depression",
        "ST segment depression",
        "bipolar depression",
        "chronic depression",
        "depression",
        "depression nos",
        "major depression",
        "manic depression",
        "mood depression",
        "post stroke depression",
        "postpartum depression",
        "reactive depression",
        "suicidal depression",
    ]

    PT_text_depression = [
        "Major depression",
        "Perinatal depression",
        "Post stroke depression",
    ]
    PT_text = row.PT_text
    concept_text = row.concept_text

    # Check for 'depression'
    depression_diagnosis_event_found = False
    diagnosis_long_name = ""
    if PT_text in PT_text_depression:
        depression_diagnosis_event_found = True
        diagnosis_long_name = PT_text
    if concept_text in concept_text_depression:
        depression_diagnosis_event_found = True
        diagnosis_long_name = concept_text

    # If we find a depression diagnosis event, create it
    if depression_diagnosis_event_found:
        # Create a basic event
        depression_diagnosis_event = Event(chartdate=date_str,
                                           visit_id=date_str,
                                           patient_id=patient_id)
        # Add diagnosis attributes
        depression_diagnosis_event.diagnosis_role(
            diagnosis_name="Depression",
            diagnosis_long_name=diagnosis_long_name)
        # Add MEDDRA attributes
        depression_diagnosis_event.add_meddra_roles(row)
        patients.add_event(depression_diagnosis_event)

    return depression_diagnosis_event_found
Beispiel #3
0
def get_diagnosis_events_anxiety(patients: PatientDB, row, date_str,
                                 patient_id):
    concept_text_anxiety = [
        "Adjustment disorder with anxiety",
        "Chronic anxiety",
        "Generalized anxiety disorder",
        "Situational anxiety",
        "Social anxiety disorder",
        "adjustment disorder with anxiety",
        "anxiety",
        "anxiety attack",
        "anxiety disorder",
        "anxiety symptoms",
        "chronic anxiety",
        "generalized anxiety disorder",
        "separation anxiety",
        "situational anxiety",
        "social anxiety disorder",
    ]

    PT_text_anxiety = [
        "Adjustment disorder with anxiety",
        "Generalised anxiety disorder",
        "Illness anxiety disorder",
        "Separation anxiety disorder",
        "Social anxiety disorder",
    ]

    PT_text = row.PT_text
    concept_text = row.concept_text

    # Check for anxiety
    anxiety_diagnosis_event_found = False
    diagnosis_long_name = ""
    if PT_text in PT_text_anxiety:
        anxiety_diagnosis_event_found = True
        diagnosis_long_name = PT_text
    if concept_text in concept_text_anxiety:
        anxiety_diagnosis_event_found = True
        diagnosis_long_name = concept_text

    # If we find an anxiety diagnosis event, create it
    if anxiety_diagnosis_event_found:
        anxiety_diagnosis_event = Event(chartdate=date_str,
                                        visit_id=date_str,
                                        patient_id=patient_id)
        anxiety_diagnosis_event.diagnosis_role(
            diagnosis_name="Anxiety", diagnosis_long_name=diagnosis_long_name)
        anxiety_diagnosis_event.add_meddra_roles(row)

        patients.add_event(anxiety_diagnosis_event)

    return anxiety_diagnosis_event_found
Beispiel #4
0
def get_diagnosis_events(patients: PatientDB, df):
    print("Getting diagnosis events...")
    columns: Dict[str, Counter] = dict()
    column_names = df.columns.tolist()
    for column in column_names:
        columns[column] = Counter()
    print(df.head())

    # See if you can find a generalizable way to iterate without using
    # itterrows
    # temporarily using to satisfy unkown columns addition to roles

    # FIXME, only look at 1000000 rows
    i_max = 10000
    print(f"Limiting iteration of dataframe to a maximum of {i_max} rows")
    for i, row in enumerate(df.itertuples()):
        if i % (i_max / 10) == 0:
            now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            print(f"{now_str} Tuple: {i}/{i_max}")
        if i >= i_max:
            break
        # import pdb;pdb.set_trace()
        date_str = row.date
        patient_id = str(row.patid)

        # Meddra column value counters
        # count_column_values(row, columns)

        # Check for different types of diagnosis events
        found_depression = get_diagnosis_events_depression(
            patients, row, date_str, patient_id)
        found_anxiety = get_diagnosis_events_anxiety(patients, row, date_str,
                                                     patient_id)
        found_insomnia = get_diagnosis_events_insomnia(patients, row, date_str,
                                                       patient_id)
        found_distress = get_diagnosis_events_distress(patients, row, date_str,
                                                       patient_id)

        # If we don't find a mental health symptom assume we have found
        # a diagnosis event/symptom without match
        found_any_events = any(
            [found_depression, found_anxiety, found_insomnia, found_distress])

        if found_any_events:
            continue
        # Add meddra event
        meddra_event = Event(chartdate=date_str,
                             visit_id=date_str,
                             patient_id=patient_id)
        meddra_event.meddra_role(row)
        patients.add_event(meddra_event)
Beispiel #5
0
def get_diagnosis_events_insomnia(patients: PatientDB, row, date_str,
                                  patient_id):
    concept_text_insomnia = [
        "Behavorial insomnia of childhood"
        "Chronic insomnia",
        "Initial insomnia",
        "Primary insomnia",
        "chronic insomnia",
        "insomnia",
        "primary insomnia",
        "psychological insomnia",
    ]

    PT_text_insomnia = [
        "Behavioural insomnia of childhood",
        "Initial insomnia",
        "Middle insomnia",
        "Psychophysiologi insomnia",
        "Terminal insomnia",
    ]

    # Match(SOC="*", HLGT="cardiac_valve_disorders", HLT="", PT="")
    PT_text = row.PT_text
    concept_text = row.concept_text

    # Check for insomnia
    insomnia_diagnosis_event_found = False
    diagnosis_long_name = ""
    if PT_text in PT_text_insomnia:
        insomnia_diagnosis_event_found = True
        diagnosis_long_name = PT_text
    if concept_text in concept_text_insomnia:
        insomnia_diagnosis_event_found = True
        diagnosis_long_name = concept_text

    # If we find an insomnia diagnosis event, create it
    if insomnia_diagnosis_event_found:
        insomnia_diagnosis_event = Event(chartdate=date_str,
                                         visit_id=date_str,
                                         patient_id=patient_id)
        insomnia_diagnosis_event.diagnosis_role(
            diagnosis_name="Insomnia", diagnosis_long_name=diagnosis_long_name)
        insomnia_diagnosis_event.add_meddra_roles(row)
        patients.add_event(insomnia_diagnosis_event)

    return insomnia_diagnosis_event_found
Beispiel #6
0
def generate_patient_db(
    demographics_path,
    meddra_extractions_dir,
    drug_exposure_dir,
    concept_dir,
    output_dir,
    debug,
    use_dask,
):

    # Create patient DB to store data
    patients = PatientDB(name="all")

    # Get demographics dataframe
    demographics = get_df(demographics_path, use_dask=use_dask, debug=debug)

    ### NLP TABLES ###
    # Get meddra extractions dataframe
    meddra_extractions_pattern = "*_*"
    meddra_extractions_pattern_re = ".*_.*"
    meddra_extractions = get_table(
        meddra_extractions_dir,
        prefix="all_POS_batch",
        pattern=meddra_extractions_pattern,
        pattern_re=meddra_extractions_pattern_re,
        extension=".parquet",
        use_dask=use_dask,
        debug=debug,
    )

    meddra_extractions_columns = sorted(meddra_extractions.columns.tolist())
    print(f"meddra extractions column names:\n\t{meddra_extractions_columns}",
          flush=True)

    ### OMOP TABLES ###
    # OMOP DRUG_EXPOSURE table
    drug_exposure_pattern = "0000000000*"
    drug_exposure_pattern_re = "0000000000.*"
    drug_exposure = omop_drug_exposure(
        drug_exposure_dir,
        prefix="drug_exposure",
        pattern=drug_exposure_pattern,
        pattern_re=drug_exposure_pattern_re,
        extension=".csv",
        use_dask=use_dask,
        debug=debug,
    )
    drug_exposure_columns = sorted(drug_exposure.columns.tolist())
    print(f"drug exposure column names:\n\t{drug_exposure_columns}",
          flush=True)

    # OMOP CONCEPT table
    concept = omop_concept(concept_dir, use_dask=use_dask, debug=debug)
    concept_columns = sorted(concept.columns.tolist())
    print(f"concept column names:\n\t{concept_columns}", flush=True)
    # import pdb;pdb.set_trace()

    patient_ids = get_all_patient_ids(demographics,
                                      meddra_extractions,
                                      drug_exposure,
                                      use_dask=use_dask)

    get_events(patients,
               concept,
               meddra_extractions,
               drug_exposure,
               use_dask=False)
    if not patients.data["events"]:
        print("Empty events dict! Exiting...", flush=True)
        sys.exit(0)
    print(f"Found {patients.num_events()} events", flush=True)

    print("Filter out patient IDs that don't have any events", flush=True)
    patient_ids = patients.select_non_empty_patients(patient_ids)

    print("Generate patients from IDs", flush=True)
    patients.generate_patients_from_ids(patient_ids)
    # import pdb
    # pdb.set_trace()

    # print('Get all patient visit dates...')
    # patient_visit_dates = \
    # get_all_patient_visit_dates(patients, meddra_extractions)
    # unique_dates = get_dates(meddra_extractions, args.use_dask)
    # unique_date_strs = [date_obj_to_str(d) for d in unique_dates]
    # patient_visit_dates = \
    #    create_patient_visit_dates(patient_ids, unique_date_strs)

    # print('Creating patient visits...')
    # create_patient_visits(patients, patient_visit_dates)

    # print('Attach visits to patients')
    # patients.attach_visits_to_patients(patient_ids)
    # import pdb
    # pdb.set_trace()

    # FIXME
    print("Attach events to visits...", flush=True)
    patients.attach_events_to_visits()
    # import pdb
    # pdb.set_trace()

    print("Attach demographic information to patients", flush=True)
    patients.add_demographic_info(demographics, use_dask)
    # import pdb
    # pdb.set_trace()

    print("Dump patients to a file", flush=True)
    patients.dump(output_dir, "patients", "jsonl", unique=True)
Beispiel #7
0
def get_medication_events(patients: PatientDB, concept_df, df, use_dask=False):
    print("Getting medication events...", flush=True)
    columns: Dict[str, Counter] = dict()
    column_names = df.columns.tolist()
    for column in column_names:
        columns[column] = Counter()
    print(df.head())

    # new_df = df.join(concept_df.set_index('concept_id'),
    # n='drug_concept_id', how="left", ruffix="")
    if use_dask:
        df_lib = pd
    else:
        df_lib = dd

    # new_df = df_lib.merge(
    #    df, concept_df, how="left", left_on="drug_concept_id",
    #    right_on="concept_id", suffixes=('', '_right'))
    # Large to Small Join
    # Ensure that the smaller concept table can fit into a single
    # partititon of memory
    if use_dask:
        concept_df = concept_df.repartition(npartitions=1)
    new_df = df.merge(
        concept_df,
        how="left",
        left_on="drug_concept_id",
        right_on="concept_id",
        suffixes=("", "_right"),
    )
    # import pdb;pdb.set_trace()

    drug_concept_class_ids: Set[str] = set()
    accepted_drug_concept_class_ids = set()
    # accepted_drug_concept_class_ids.add('Prescription Drug')
    # accepted_drug_concept_class_ids.add('Ingredient')
    # accepted_drug_concept_class_ids.add('CVX')
    # accepted_drug_concept_class_ids.add('Undefined')
    # accepted_drug_concept_class_ids.add('Drug Product')
    # accepted_drug_concept_class_ids.add('Branded Drug')
    # accepted_drug_concept_class_ids.add('Branded Drug Form')
    accepted_drug_concept_class_ids.add("Clinical Drug")
    # accepted_drug_concept_class_ids.add('Clinical Drug Comp')
    # Quantity first in string
    # accepted_drug_concept_class_ids.add('Quant Clinical Drug')

    # FIXME
    # get medication events from drug_exposure table
    i_max = 10000
    print(f"Limiting iteration of dataframe to a maximum of {i_max} rows")
    for i, row in enumerate(new_df.itertuples()):
        if i % (i_max / 10) == 0:
            now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            print(f"{now_str} Tuple: {i}/{i_max}")
        if i >= i_max:
            break
        # FIXME, should events be required to have a single date if
        # they are more of an event range?
        patient_id = str(row.person_id)
        date_str = row.drug_exposure_start_DATE

        # import pdb;pdb.set_trace()

        # dose_unit_concept_id = row.dose_unit_concept_id
        # drug_concept_id = row.drug_concept_id

        # Skip rows not in accepted concept class IDs
        # (prescriptions approximate)
        drug_concept_class_id = row.concept_class_id
        if drug_concept_class_id not in accepted_drug_concept_class_ids:
            continue

        drug_concept_name = row.concept_name
        # drug_concept_name = get_concept_name(concept_df, drug_concept_id)
        # if drug_concept_class_id != 'Clinical Drug':
        #    print(f"drug_concept_class_id: {drug_concept_class_id}, "
        #          f"drug_concept_name: {drug_concept_name}")

        # drug_source_concept_id = row.drug_source_concept_id
        # drug_source_concept_name = \
        #    get_concept_name(concept_df, drug_source_concept_id)

        # We could drop 'Patient Self-Reported Medication'?
        # The Drug era categories aren't clear
        # drug_type_concept_id = row.drug_type_concept_id
        # drug_type_concept_name = \
        #    get_concept_name(concept_df, drug_type_concept_id)

        # route_concept_id = row.route_concept_id
        # route_concept_name = get_concept_name(concept_df, route_concept_id)

        # import pdb;pdb.set_trace()

        # Make sure it is a drug event by checking the concept table
        drug_exposure_event = Event(chartdate=date_str,
                                    visit_id=date_str,
                                    patient_id=patient_id)
        drug_exposure_event.drug_exposure_role(row, drug_concept_name)
        # TODO, convert drug_exposure events to medication events?
        patients.add_event(drug_exposure_event)
    print(f"drug_concept_class_ids: {drug_concept_class_ids}")
Beispiel #8
0
def main(args):
    print("START OF PROGRAM\n")
    # FIXME

    concept_pattern = "*"
    concept_pattern_re = ".*"
    concept = omop_concept(
        args.concept_dir,
        prefix="concept",
        pattern=concept_pattern,
        pattern_re=concept_pattern_re,
        extension=".csv",
        use_dask=args.use_dask,
        debug=args.debug,
    )
    # import pdb;pdb.set_trace()

    # Create and load an instance of PatientDB
    patients = PatientDB(name="all")
    patients.load(args.patient_db_path)

    # Make sure output dirs are created
    prepare_output_dirs(args.output_dir, num_questions=9, prefix="q")

    # Q1 - What are the co-morbidities associated with mental health?
    question_one_terms = ["depression", "anxiety", "insomnia", "distress"]
    (
        question_one_matches,
        question_one_event_type_roles,
        question_one_cnt_event_type_roles,
    ) = run_q1(patients, question_one_terms,
               f"{args.output_dir}/q1/top_k.jsonl")

    # Q2 - What is the distribution of age groups for patients with major
    #      depression, anxiety, insomnia or distress?
    # question_two_terms = question_one_terms
    # run_q2(patients, question_two_terms, f"{args.output_dir}/q2")

    # Q3 - What is the distribution of age groups and gender groups for
    #      patients with major depression, anxiety, insomnia or distress?
    # question_three_terms = question_one_terms
    # run_q3(patients, question_three_terms, f"{args.output_dir}/q3")

    # Q4 - What is the trend associated with anxiety, loneliness, depression
    #      in both Dx Codes and Clinical Notes?
    # question_four_terms = ['anxiety', 'loneliness', 'depression']
    # run_q4(patients, question_four_terms, f"{args.output_dir}/q4")

    # Q5 - What is the trend assocaited with impaired cognitive function
    #      (Alzheimers, dementia, mild cognitive impairment) in both Dx Codes
    #      and Clinical notes?
    # WHERE ARE DX CODES?
    # question_five_terms = ['alzheimers',
    #                       'dementia', 'mild cognitivie impairment']
    # run_q5(patients, question_five_terms)

    # Q6 - What is the mental health trend associated with older adults with
    #      multi-morbiditty conditions?
    # run_q6(patients)

    # Q7 - What are the top reported causes (anger, anxiety, confusion, fear,
    #      guilt, sadness) for mental health related issues?

    # run_q7()

    # Q8 - What are the distribution of sentiment for mental health related issues?
    # run_q8()

    # Q9 - What are the top medications prescribed for patients with mental health related issues?
    question_nine_top_k, question_nine_cnt_event_type_roles = run_q9(
        patients,
        question_one_matches,
        question_one_event_type_roles,
        concept,
        f"{args.output_dir}/q9/top_k.jsonl",
    )

    print("END OF PROGRAM")