Ejemplo n.º 1
0
def write_notes(notes_df, outfile):
    """
    write_notes(notes_df, infile)

    Write notes from notes_df to infile    

    Parameters
    ----------

    infile: string
        filename of a wfdb file from the MIMIC3 matched dataset

    notes_df: notes
        DataFrame containing notes, times, etc
    """

    arr, saType = df_to_sarray(notes_df)

    with h5py.File(outfile, 'a') as f:
        clin = f.require_group('/clinical')
        note_ds = f.create_dataset('clinical/notes',
                                   maxshape=(None, ),
                                   data=arr,
                                   dtype=saType,
                                   compression="gzip",
                                   compression_opts=9,
                                   shuffle=True)
Ejemplo n.º 2
0
def write_labs(labs_df, outfile):

    # TODO: convert flag to category and encode in .flag_info
    arr, saType = df_to_sarray(labs_df)

    #   dt = h5py.special_dtype(vlen=str)
    #   comp_type = np.dtype([('time', dt), ('testid', 'i8'), ('value', dt), ('valuenum', 'f8'), ('flag', dt)])
    # define array for writing to dataset
    #    arr_data = np.empty((0,), dtype=comp_type)
    #    for idx, row in labs_df.iterrows():
    #        arr = np.array([(str(row['charttime']), row['itemid'], row['value'], row['valuenum'], row['flag'])],
    #                  dtype = comp_type)
    #        arr_data = np.append(arr_data, arr)

    #create metadata
    labs_grouped = labs_df.groupby('itemid')['itemid', 'label', 'category',
                                             'fluid', 'valueuom',
                                             'loinc_code'].first()
    labs_grouped = labs_grouped.set_index('itemid')
    test_info = labs_grouped.T.to_dict('dict')

    with h5py.File(outfile, 'a') as f:
        clin = f.require_group('/clinical')
        lab_ds = f.create_dataset('clinical/labs',
                                  maxshape=(None, ),
                                  data=arr,
                                  dtype=saType,
                                  compression="gzip",
                                  compression_opts=9,
                                  shuffle=True)
        lab_ds.attrs['.test_info'] = json.dumps(test_info)
Ejemplo n.º 3
0
def make_mapping (filename, mapper=None, overwrite=True):

    if mapper is None:
        mapper = LoincMapper(external_mapping_table="MIMICIII")
        
    # open the file
    f = audata.File.open(filename, readonly=False)
    
    #get the numerics
    
    
    #for each dataset, get the columns
    #convert to dict
    #convert to df
    #append to dataset
    #save as dataset
    """Numerics"""
    signals = []

    for dset_name in f['numerics'].list()['datasets']:
        dset = f['numerics/'+dset_name]
        columns = build_col_dict(dset, 'numerics', std_signals, mapper)
        df=pd.DataFrame.from_dict(columns, orient='index')
        signals.append(df)
    """Waveforms - if present"""
    
    for dset_name in f['waveforms'].list()['datasets']:
        dset = f['waveforms/'+dset_name]
        columns = build_col_dict(dset, 'waveforms', std_signals, mapper)
        df=pd.DataFrame.from_dict(columns, orient='index')
        signals.append(df)
    
    
    """write to file"""
    
    """ test for existance of mapping table first - option to overwrite  """ 
    signals = pd.concat(signals)
    sarray, saType =  df_to_sarray(signals)
    # test for existance
    if 'mapping' in f['/'].list()['datasets']:
        print('Mapping table exists')
        if overwrite:
            del f.hdf['mapping']
            f.hdf.create_dataset('mapping', data=sarray, dtype=saType)
        else:
            print('Not overwritten')
    else:
        f.hdf.create_dataset('mapping', data=sarray, dtype=saType)
            
    f.close()
Ejemplo n.º 4
0
    def write_labs(df, filename, test_metadata=False):
        """
        Parameters
        ----------
        df: DataFrame
            Datetime index
        filename: String
        test_metadata

        """
        with h5py.File(filename, 'a') as f:
            origin = pd.to_datetime(json.loads(f['/'].attrs['.meta'])['time_origin'])
            df.index = df.index.tz_localize('UTC').tz_convert('UTC')
            df['time']=(df.index - origin).total_seconds()

            del df['HADM_ID']
            
            #TODO: If test_metadata is True, strip repeated elemebts (loinc, etc) and store in .test_info
            df.columns = df.columns.str.strip().str.lower()
            arr, saType = df_to_sarray(df)
            clin = f.require_group('/clinical')
            lab_ds = clin.create_dataset('labs', maxshape = (None, ), data = arr, dtype=saType,
                                 compression="gzip", compression_opts = 9, shuffle = True)
            lab_ds.attrs['.test_info'] = 'none'