Beispiel #1
0
def reload_data(LOGFILE = None, PICKLE_DATA = True,
    root_folder = 'Shared Sepsis Data', csv_filename = 'Sepsis_JCM.csv'):
    '''
    Reloads raw_data from folders.
    IN:
        LOGFILE -  fileobj - an open text file where logs are written
        PICKLE_DATA - bool - whether to pickle data once loaded
        root_folder - str - relative path to top level folder for all data and csv_file
        csv_filename - str - name of csv file containing the trial labels and locations
    '''
    csv_file = os.path.join(root_folder, csv_filename)

    X, y, used_column_headers, df, df_raw = load_data(root_folder, csv_file,
        verbose=False, LOGFILE=LOGFILE)

    # pickle data for later loading efficiency
    if PICKLE_DATA:
        start = time.time()
        ptf( '\n>> Pickling data ...\n', LOGFILE)
        for z, zname in izip([X,y,used_column_headers], PICKLE_NAMES):
            my_pickle(z, zname)
        end = time.time()
        ptf( 'Data pickled in %d seconds (%d total trials)' % ((end-start),
            len(X)), LOGFILE)

    return X, y, used_column_headers, df, df_raw
Beispiel #2
0
def reload_data(LOGFILE=None,
                PICKLE_DATA=True,
                root_folder='Shared Sepsis Data',
                csv_filename='Sepsis_JCM.csv'):
    '''
    Reloads raw_data from folders.
    IN:
        LOGFILE -  fileobj - an open text file where logs are written
        PICKLE_DATA - bool - whether to pickle data once loaded
        root_folder - str - relative path to top level folder for all data and
            csv_file
        csv_filename - str - name of csv file containing the trial labels and
            locations
    OUT:
        X - pd Series - Series of features.  Each row is a trial (index) and a
            number of features + 1 X number of times numpy array (data)
        y - pd DataFrame - labels data frame.  Each row is a trial (index) and
            the labels of each class the columns
        used_column_headers - list of str -
        df - pd DataFrame - DataFrame containing all trial data after elmination
            of extraneous spots, trials
        df_raw - pd DataFrame - DataFrame containing all trial data (before pruning)
    '''
    csv_file = os.path.join(root_folder, csv_filename)

    X, y, used_column_headers, df, df_raw = load_data(root_folder,
                                                      csv_file,
                                                      verbose=False,
                                                      LOGFILE=LOGFILE)

    # pickle data for later loading efficiency
    if PICKLE_DATA:
        start = time.time()
        ptf('\n>> Pickling data ...\n', LOGFILE)
        for z, zname in izip([X, y, used_column_headers], PICKLE_NAMES):
            my_pickle(z, zname)
        end = time.time()
        ptf(
            'Data pickled in %d seconds (%d total trials)' %
            ((end - start), len(X)), LOGFILE)

    return X, y, used_column_headers, df, df_raw
def reload_data(LOGFILE = None, PICKLE_DATA = True,
    root_folder = 'Shared Sepsis Data', csv_filename = 'Sepsis_JCM.csv'):
    '''
    Reloads raw_data from folders.
    IN:
        LOGFILE -  fileobj - an open text file where logs are written
        PICKLE_DATA - bool - whether to pickle data once loaded
        root_folder - str - relative path to top level folder for all data and
            csv_file
        csv_filename - str - name of csv file containing the trial labels and
            locations
    OUT:
        X - pd Series - Series of features.  Each row is a trial (index) and a
            number of features + 1 X number of times numpy array (data)
        y - pd DataFrame - labels data frame.  Each row is a trial (index) and
            the labels of each class the columns
        used_column_headers - list of str -
        df - pd DataFrame - DataFrame containing all trial data after elmination
            of extraneous spots, trials
        df_raw - pd DataFrame - DataFrame containing all trial data (before pruning)
    '''
    csv_file = os.path.join(root_folder, csv_filename)

    X, y, used_column_headers, df, df_raw = load_data(root_folder, csv_file,
        verbose=False, LOGFILE=LOGFILE)

    # pickle data for later loading efficiency
    if PICKLE_DATA:
        start = time.time()
        ptf( '\n>> Pickling data ...\n', LOGFILE)
        for z, zname in izip([X,y,used_column_headers], PICKLE_NAMES):
            my_pickle(z, zname)
        end = time.time()
        ptf( 'Data pickled in %d seconds (%d total trials)' % ((end-start),
            len(X)), LOGFILE)

    return X, y, used_column_headers, df, df_raw