def getfeaturesFromEventsTables(config):
    conn = SQLConnection(config)
    
    # getting features from labevents
    query_labmsmts = '''
            select * from LABMEASURMENTS;
            '''
    df_labmsmts = conn.executeQuery(query_labmsmts)

    #observations from labevents at discharge time
    query_labslastmsmts = '''
        select * from labslastmsmts;
        '''
    df_labslastmsmts = conn.executeQuery(query_labslastmsmts)

    #all extacted features from labevents
    df_labs_features = get_features_from_labevents(df_labmsmts, df_labslastmsmts)

    # getting features from chartevents
    query_chartsmsmts = '''
            select * from CHARTSMEASURMENTS;
            '''
    df_chartsmsmts = conn.executeQuery(query_chartsmsmts)

    # getting features from chartevents
    query_chartslastmsmts = '''
            select * from CHARTSLASTMSMTS;
            '''
    df_chartslastmsmts = conn.executeQuery(query_chartslastmsmts)
    df_chart_features = get_features_from_chartevents(df_chartsmsmts, df_chartslastmsmts)
    df_events_tables = pd.merge(df_labs_features, df_chart_features, how='left', on='icustay_id')
    return df_events_tables
Exemple #2
0
def getfeaturesFromSeverityScoreConcepts(config):
    """
    The scripts generate features from MIMIC-III concepts tables for severity scores
    :param config:
    :return:
    """
    print(
        "\n Extracting features from MIMIC-III concepts tables for severity scores"
    )
    conn = SQLConnection(config)
    sapsii = conn.executeQuery('''select * from sapsii;''')
    sapsii = sapsii[sapsii.icustay_id.isnull() == False]
    sapsii['icustay_id'] = sapsii['icustay_id'].astype('int')
    cols_to_remove = ['subject_id', 'hadm_id']
    sapsii.drop(cols_to_remove, axis=1, inplace=True)

    sofa = conn.executeQuery('''select * from sofa;''')
    sofa = sofa[sofa.icustay_id.isnull() == False]
    sofa['icustay_id'] = sofa['icustay_id'].astype('int')
    cols_to_remove = ['subject_id', 'hadm_id']
    sofa.drop(cols_to_remove, axis=1, inplace=True)

    sirs = conn.executeQuery('''select * from sirs;''')
    sirs = sirs[sirs.icustay_id.isnull() == False]
    sirs['icustay_id'] = sirs['icustay_id'].astype('int')
    cols_to_remove = ['subject_id', 'hadm_id']
    sirs.drop(cols_to_remove, axis=1, inplace=True)

    lods = conn.executeQuery('''select * from lods;''')
    lods = lods[lods.icustay_id.isnull() == False]
    lods['icustay_id'] = lods['icustay_id'].astype('int')
    cols_to_remove = ['subject_id', 'hadm_id']
    lods.drop(cols_to_remove, axis=1, inplace=True)

    apsiii = conn.executeQuery('''select * from apsiii;''')
    apsiii = apsiii[apsiii.icustay_id.isnull() == False]
    apsiii['icustay_id'] = apsiii['icustay_id'].astype('int')
    cols_to_remove = ['subject_id', 'hadm_id']
    apsiii.drop(cols_to_remove, axis=1, inplace=True)

    oasis = conn.executeQuery('''select * from oasis;''')
    oasis = oasis[oasis.icustay_id.isnull() == False]
    oasis['icustay_id'] = oasis['icustay_id'].astype('int')
    cols_to_remove = ['subject_id', 'hadm_id']
    oasis.drop(cols_to_remove, axis=1, inplace=True)

    sapsii_sofa = pd.merge(sapsii, sofa, on='icustay_id')
    # removing repeated column
    sapsii_sofa.drop(['temp_score'], axis=1, inplace=True)
    sapsii_sofa_sirs = pd.merge(sapsii_sofa, sirs, on='icustay_id')
    # removing repeated column
    sapsii_sofa_sirs.drop(['cardiovascular'], axis=1, inplace=True)
    sapsii_sofa_sirs_lods = pd.merge(sapsii_sofa_sirs, lods, on='icustay_id')
    sapsii_sofa_sirs_lods_apsiii = pd.merge(sapsii_sofa_sirs_lods,
                                            apsiii,
                                            on='icustay_id')
    df_severity_scores = pd.merge(sapsii_sofa_sirs_lods_apsiii,
                                  oasis,
                                  on='icustay_id')

    # filling score columns with 0 since in mimic db mean scores is taken as 0
    score_columns = df_severity_scores.filter(regex='score').columns
    df_severity_scores[score_columns] = df_severity_scores[
        score_columns].fillna(0)

    # removing columns with > 50% NA value
    df_severity_scores = clean_nan_columns(df_severity_scores, thres=60)
    # cleaning up non-numeric columns from severity scores
    df_severity_scores.drop(['icustay_age_group', 'preiculos'],
                            axis=1,
                            inplace=True)

    return df_severity_scores