def get_death_outcome(data, death_time_frame):
    """Creates a data frame that contains whether the patient died within a time frame.

    Args:
        data: A dataframe containing the following columns:
        subject_id: The ID of the subject.
        date: The date to compare against the date of death.

        death_time_frame: The number of days that a patient must be alive after for the died outcome to be false.

    Returns:
        A dataframe with the following columns:
        subject_id: The ID of the subject.
        date: The date to compare against the date of death.
        died: Whether the patient died within the time frame from the date.
    """
    patients = get_patients()
    dod_by_subject_id = {p.subject_id: p.dod for p in patients.itertuples()}
    days_unit = Day(death_time_frame)
    dod = data.subject_id.map(dod_by_subject_id)
    end_of_type_frame = data.date.map(lambda d: d + days_unit)
    date_out_of_range = end_of_type_frame.max() + days_unit
    died_within_time = end_of_type_frame >= dod.fillna(date_out_of_range).astype(end_of_type_frame.dtype)
    outcome_df = pd.concat([data.subject_id, data.date, died_within_time], axis=1)
    outcome_df.columns = ['subject_id', 'date', 'died']
    return outcome_df
def get_processed_patient_info(use_cache=False):
    """Returns non medical information about a patient for a hospital admission stay.

    Args:
        use_cache: Skip computation and load results from previous computation.

    Returns:
        A DataFrame with the following columns:
        subject_id:
        hadm_id:
        sex:
        marital_status_descr:
        ethnicity_descr:
        overall_payor_group_descr:
        religion_descr:
        age:
    TODO: Add comments for each column
    """
    modify_dates_fn = get_modify_dates_fn()
    patients = get_patients()
    icu_details = get_icustay_details()
    demographic_details = get_demographic_details()

    target_patient_data = patients[["subject_id", "hadm_id", "sex", "dob"]]

    target_demographic_fields = [
        "subject_id",
        "hadm_id",
        "marital_status_descr",
        "ethnicity_descr",
        "overall_payor_group_descr",
        "religion_descr"
    ]

    target_demographic_details = demographic_details[target_demographic_fields]

    patients_info = target_patient_data.merge(target_demographic_details, on=["subject_id", "hadm_id"])

    icu_details = modify_dates_fn(icu_details, ["icustay_intime"])
    patients_info = modify_dates_fn(patients_info, ["dob"])

    ages = (icu_details.icustay_intime - patients_info.dob).astype("timedelta64[Y]")
    patients_info.drop("dob", axis=1, inplace=True)
    patients_info["age"] = ages

    # Replace hadm_id with icustay_id
    target_icu_fields = icu_details[['hadm_id', 'icustay_id']]
    patients_info = target_icu_fields.merge(patients_info)

    return patients_info.drop(['hadm_id'], axis=1).drop_duplicates()
Ejemplo n.º 3
0
def get_processed_patient_info(use_cache=False):
    """Returns non medical information about a patient for a hospital admission stay.

    Args:
        use_cache: Skip computation and load results from previous computation.

    Returns:
        A DataFrame with the following columns:
        subject_id:
        hadm_id:
        sex:
        marital_status_descr:
        ethnicity_descr:
        overall_payor_group_descr:
        religion_descr:
        age:
    TODO: Add comments for each column
    """
    modify_dates_fn = get_modify_dates_fn()
    patients = get_patients()
    icu_details = get_icustay_details()
    demographic_details = get_demographic_details()

    target_patient_data = patients[["subject_id", "hadm_id", "sex", "dob"]]

    target_demographic_fields = [
        "subject_id", "hadm_id", "marital_status_descr", "ethnicity_descr",
        "overall_payor_group_descr", "religion_descr"
    ]

    target_demographic_details = demographic_details[target_demographic_fields]

    patients_info = target_patient_data.merge(target_demographic_details,
                                              on=["subject_id", "hadm_id"])

    icu_details = modify_dates_fn(icu_details, ["icustay_intime"])
    patients_info = modify_dates_fn(patients_info, ["dob"])

    ages = (icu_details.icustay_intime -
            patients_info.dob).astype("timedelta64[Y]")
    patients_info.drop("dob", axis=1, inplace=True)
    patients_info["age"] = ages

    # Replace hadm_id with icustay_id
    target_icu_fields = icu_details[['hadm_id', 'icustay_id']]
    patients_info = target_icu_fields.merge(patients_info)

    return patients_info.drop(['hadm_id'], axis=1).drop_duplicates()
Ejemplo n.º 4
0
def __get_offset_by_subject_id():
    """Computes a year offset map that can be used to modify all dates related to a specific subject.
    Dates need to be modified because dates in MIMIC2 are obfuscated for anonymity, but the date ranges
    often fall outside of pandas max range. By modifying the dates we can utilize pandas datetime functionality.

    Returns:
        A dict mapping subject ID to the number of years in the offset. Example:
        {123478: 1249, 898281: -1000}
    """
    patients = get_patients()
    subject_ids = patients.subject_id
    year_as_int = pd.Series(patients.dob.apply(lambda x: x.year))
    # Must decrease in batches of 4 to preserve weird dates like leap year
    year_offset = ((year_as_int - __TARGET_YEAR) / 4).astype(int) * 4
    return {
        subject_id: offset
        for (subject_id, offset) in zip(subject_ids, year_offset)
    }