Esempio n. 1
0
 def __init__(self,
              variable_ranges=read_variable_ranges(
                  "preprocessing/resources/variable_ranges.csv"),
              columns=None,
              reader=WaveformReader(),
              records=None,
              num_workers=24,
              num_hours=24):
     '''
     Reader is an object capable of providing a record for cleaning
     variable_ranges is the DataFrame for variables, with OUTLIER_HIGH and OUTLIER_LOW values
     reader is an object which provides the record info
             It is expected that reader has columnsToUse instantiated, with variable_ranges including those columns along with outlier vals
     numericMapping is a dataframe which maps variables from numeric mapping to
     records is a list of record names to use
     num_hours is the number of hours after admission to keep (helps with efficiency to specify here), if none keep all
     '''
     self.reader = reader
     self.variable_ranges = variable_ranges
     self.manager = Manager()
     self.records = records
     self.num_workers = num_workers
     self.num_hours = num_hours
     if columns is not None:
         reader.columnsToUse = columns
Esempio n. 2
0
 def __init__(
         self,
         hadm_dir,
         file_name="episode_timeseries.csv",
         variable_ranges="preprocessing/resources/variable_ranges.csv"):
     '''
     :param hadm_dir the directory where each folder holding hadm_id data is located
     :param file_name the name of the file inside each directory that holds data
     :param vars_to_keep list of all variables to keep, if None use all variables given
     :param variable_ranges the file which holds key info on variables and the mapping to mean values for imputing completely
                             missing data
     '''
     self.hadm_dir = hadm_dir
     self.file_name = file_name
     ## Because I save some results in hadm directory (bad decision), I check to see we don't add results into here
     self.hadms = [
         hadmid for hadmid in os.listdir(os.path.join(hadm_dir))
         if os.path.exists(
             os.path.join(self.hadm_dir, hadmid, self.file_name))
     ]
     self.__current_hadm = self.hadms[
         0]  #to use when Hadm_Id_Reader is used like an iterator
     self.__index = 0  #to use when Hadm_Id_Reader is used like an iterator
     self.__ranges = read_variable_ranges(variable_ranges)
     self.__vars_to_keep = self.__ranges.index  ## keep originally all variables that have imputed vals
     self.__n_workers = 1
     self.manager = Manager()
Esempio n. 3
0
 def __init__(self, variable_ranges=read_variable_ranges("preprocessing/resources/variable_ranges.csv"), columns=None, reader = WaveformReader(), num_workers=24, num_missing_most=15):
     '''
     Reader is an object capable of providing a record for cleaning
     variable_ranges is the DataFrame for variables, with OUTLIER_HIGH and OUTLIER_LOW values
     reader is an object which provides the record info
     numericMapping is a dataframe which maps variables from numeric mapping to
     num_missing_most is the most number of CONSECUTIVE data points that can be missing in a segment
     '''
     self.reader = reader
     self.variable_ranges = variable_ranges
     self.manager = Manager()
     self.num_workers = num_workers
     self.num_missing_most = num_missing_most
     if columns is not None:
         reader.columnsToUse = columns
Esempio n. 4
0
 def __init__(self,
              traverser=WaveformFileTraverser(),
              numericMapping=None,
              columnsToUse=None,
              variable_ranges=read_variable_ranges(
                  "preprocessing/resources/variable_ranges.csv")):
     '''
     @param traverser is the object which provides paths and info about files
     @param numericMapping is the dataframe which maps the signal names to high level variables
             if None, don't use
     @param columnsToUse instead of all columns, only output dfs with the selected columnsToUse
             if None then return the columns from the file
     @param variable_ranges - used primarily to remove outliers
             if columnsToUse is set, then this must be set
     '''
     self.traverser = traverser
     if numericMapping is not None:
         numericMapping["numeric"] = numericMapping["numeric"].str.upper()
         numericMapping["high_level_var"] = numericMapping[
             "high_level_var"].str.upper()
     self.numericMapping = numericMapping
     self.columnsToUse = columnsToUse
     self.variable_ranges = variable_ranges
Esempio n. 5
0
from matplotlib.pyplot import plot, show, savefig, xlim, figure, \
                hold, ylim, legend, boxplot, setp, axes

__n_workers = 24
threshold = 1  # threshold is which section of the subjects to consider for a single variable
numericMapping = pd.read_csv(
    "preprocessing/resources/numeric_waveform_to_variable_map.csv")
numericMapping["numeric"] = numericMapping["numeric"].str.upper()
numericMapping["high_level_var"] = numericMapping["high_level_var"].str.upper()
hoursAfterAdmit = [12, 24, 36, 48]  #Hours after admission
columnsToAnalyze = [
    "RESPIRATORY RATE", "HEART RATE", "DIASTOLIC BLOOD PRESSURE",
    "SYSTOLIC BLOOD PRESSURE", "OXYGEN SATURATION"
]
variable_ranges = read_variable_ranges(
    "preprocessing/resources/variable_ranges.csv")
reader = WaveformReader(numericMapping=numericMapping,
                        columnsToUse=columnsToAnalyze,
                        variable_ranges=variable_ranges)
reader.traverser.numeric = True


def helperWaveformRunner(toRunQueue, toReturnQueue, numericMapping):
    '''
    Uses queues to analyze waveforms for key prelim stats
    '''
    for subject_id in iter(toRunQueue.get, None):
        print(toRunQueue.qsize())
        toReturn = processSubjectID(subject_id,
                                    numHours=hoursAfterAdmit,
                                    numericMapping=numericMapping)