def format_source(): """ timestamp the source """ print("begin format source") study_list = retrieve_ref('study_list') format_types = retrieve_ref('format_types') segment_list = retrieve_ref('segment_list') sensor_list = retrieve_ref('sensor_list') segment = 'All' format_type = 'source' for study in study_list: # print('study = ' + str(study)) df_meta = retrieve_meta(study) source_path = list(df_meta['source_path']) for record in source_path: for sensor in sensor_list: df_timestamped = timestamp_source(study, format_type, segment, record, sensor)
def segment_df(segment, df): """ Accept a segment and a dataframe Trim according to the segment name Pass back the trimmed dataframe """ segment_list = retrieve_ref('segment_list') timePreStudy = retrieve_ref('timePreStudy') timePostStudy = retrieve_ref('timePostStudy') if segment == segment_list[0]: timeEnd = timePreStudy df = df.drop(df[df['timeMinutes'] > timeEnd].index) if segment == segment_list[1]: timeBegin = timePreStudy timeEnd = timePostStudy df = df.drop(df[df['timeMinutes'] < timeBegin].index) df = df.drop(df[df['timeMinutes'] > timeEnd].index) if segment == segment_list[2]: timeBegin = timePostStudy df = df.drop(df[df['timeMinutes'] < timeBegin].index) return(df)
def count_inflections(): """ """ print('count inflections') study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') segment_list = retrieve_ref('segment_list') searchRange = retrieve_ref('searchRange') searchRange = [int(x) for x in searchRange] searchRange.reverse() for study in study_list: format_type = 'clean' clean_path = os.path.join('studies', study, 'formatted', format_type) recordNames = os.listdir(clean_path) recordNames.reverse() for sensor in sensor_list: if sensor == 'ACC' or sensor == 'BVP': continue for record in recordNames: segment = 'All' for range in searchRange: path = [ 'studies', study, 'analyzed', 'inflections', 'all_times', str(range), record, segment ] pathJoined = os.path.join(*path) file = os.path.join(pathJoined, sensor + ".csv") print('inflection file = ' + file) if os.path.isfile(file): print('file already found') continue find_inflections(path, file, study, format_type, record, sensor, segment, range) unique_inflections(study, format_type, record, sensor, segment) plot_inflections(study, record, sensor, segment) for segment in segment_list: for range in searchRange: segment_inflections(study, record, sensor, segment, range) segment = 'All' unique_inflections(study, format_type, record, sensor, segment) plot_inflections(study, record, sensor, segment)
def segment_data(): """ Clean the data """ print("segment data") study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') segment_list = retrieve_ref('segment_list') timePreStudy = retrieve_ref('timePreStudy') timePostStudy = retrieve_ref('timePostStudy') for study in study_list: df_meta = retrieve_meta(study) source_path = list(df_meta['source_path']) for record in source_path: for sensor in sensor_list: for segment in segment_list: # print('segment_list') # print(segment_list) analysis_type = 'truncate' df = retrieve_analyzed(study, analysis_type, record, sensor) if segment == segment_list[0]: timeEnd = timePreStudy df = df.drop(df[df['timeMinutes'] > timeEnd].index) if segment == segment_list[1]: timeBegin = timePreStudy timeEnd = timePostStudy df = df.drop(df[df['timeMinutes'] < timeBegin].index) df = df.drop(df[df['timeMinutes'] > timeEnd].index) if segment == segment_list[2]: timeBegin = timePostStudy df = df.drop(df[df['timeMinutes'] < timeBegin].index) path = os.path.join(study, 'segment') if not os.path.isdir(path): os.mkdir(path) # print(path) path = os.path.join(study, 'segment', str(segment)) if not os.path.isdir(path): os.mkdir(path) # print(path) path = os.path.join(study, 'segment', str(segment), record) if not os.path.isdir(path): os.mkdir(path) # print(path) path = os.path.join(study, 'segment', str(segment), record, sensor + ".csv") df.to_csv(path) print('segments file saved: ' + str(path))
def plot_regression(): """ """ print('plotting regression') study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') segment_list = retrieve_ref('segment_list') degree_list = retrieve_ref('degree_list') degree_list = [int(x) for x in degree_list] for study in study_list: format_type = 'clean' clean_path = os.path.join('studies', study, 'formatted', format_type) recordNames = os.listdir(clean_path) for sensor in sensor_list: for degree in degree_list: for record in recordNames: row_num, col_num, plot_num = len(segment_list) + 2, 1, 0 row_width_mulp, col_width_mulp = 14, 5 plot_width, plot_height = col_num * row_width_mulp, row_num * col_width_mulp plt.figure(figsize=(plot_width, plot_height)) for segment in segment_list: plot_num += 1 plt.subplot(row_num, col_num, plot_num) complete = plot_regression_segment( study, record, segment, sensor, degree) plot_num += 1 plt.subplot(row_num, col_num, plot_num) for segment in segment_list[:-1]: complete = plot_regression_segment( study, record, segment, sensor, degree) plt.title(' ') plot_num += 1 plt.subplot(row_num, col_num, plot_num) complete = plot_coefficient_bar(study, record, sensor, degree) plt.title(' ') path = [ 'studies', study, 'plotted', 'regression', str(degree), record ] path = build_path(path) file = os.path.join(path, sensor + ".png") plt.savefig(file, bbox_inches='tight') print('plotted regression for ' + file)
def find_record_end_using_temp(df): """ Find the record end by searching for the drop in the temperature If the temperature drops 2 deg C in 3 seconds and there is <5 minutes left - end the record If there is <5 minutes left, check if the temperature stays low """ max_record_time = float(retrieve_ref('max_record_time')) min_record_time = float(retrieve_ref('min_record_time')) timeUnix = list(df['timeUnix']) timeMinutes = list(df['timeMinutes']) measurements = list(df['measurement']) time_end = timeMinutes[-12] timeEndUnix = timeUnix[-12] for i in range(len(measurements) - 12): # the record has to be a minimum length if float(timeMinutes[i]) > float(min_record_time) + timeMinutes[0]: # look for a drop of at least 2 deg C over 3 seconds # the TEMP sensor takes 4 measurements each seconds, # which means the measurement 12 steps ahead is 3 seconds later if float(measurements[i]) - 2 > float(measurements[i + 12]): # print('measurement[i] & measurements[i+12] =' + str(measurements[i]), ' & ', str(measurements[i+12])) # print('timeMinutes[i] & timeMinutes[i+12] =' + str(timeMinutes[i]), ' & ', str(timeMinutes[i+12])) if timeMinutes[-1] - timeMinutes[i] > 5: if float(measurements[i]) - 2 > float( measurements[i + 100]): if float(measurements[i]) - 2 > float( measurements[i + 200]): if float(measurements[i]) - 3 > float( measurements[i + 300]): time_end = timeMinutes[i - 12] timeEndUnix = timeUnix[i - 12] break else: time_end = timeMinutes[i - 12] timeEndUnix = timeUnix[i - 12] break # trim the record back to a maximum if timeEndUnix - timeUnix[0] > 60 * max_record_time: timeEndUnix = timeUnix[0] + 60 * max_record_time return (int(timeEndUnix - 1))
def format_source(): """ define each record set the beginning of the record set the end of the record record the length of the record """ print("begin format_source") # timestamp and save the source measurements # no truncation # save as their recordName study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') # check each study for study in study_list: df_meta = retrieve_meta(study) print(df_meta) recordNames = list(df_meta['recordName']) for record in recordNames: i = df_meta[df_meta['recordName'] == record].index.values[0] recordSource = df_meta.loc[i, 'source_path'] recordBegin = df_meta.loc[i, 'recordBegin'] recordEnd = df_meta.loc[i, 'recordEnd'] print('i = ' + str(i)) print('record = ' + str(record)) print('recordSource = ' + str(recordSource)) for sensor in sensor_list: format_type, segment = 'source', 'All' source = os.path.join('studies', study, format_type, recordSource, sensor + '.csv') df_source = pd.read_csv(source) df_timestamped = build_timestamps(df_source, sensor) # df_timestamped = df_timestamped[df_timestamped['timeUnix'] > recordBegin] # df_timestamped = df_timestamped[df_timestamped['timeUnix'] < recordEnd] path = [ 'studies', study, 'formatted', format_type, record, segment ] path = build_path(path) file = os.path.join(path, sensor + ".csv") df_timestamped.to_csv(file) print('formatted source file = ' + str(file))
def segment_inflections(): """ """ study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') segment_list = retrieve_ref('segment_list') searchRange = retrieve_ref('searchRange') for study in study_list: format_type = 'clean' clean_path = os.path.join(study, 'formatted', format_type) recordNames = os.listdir(clean_path) for sensor in sensor_list: for record in recordNames: for range in searchRange: for segment in segment_list: if segment == 'All': continue segmentRef = 'All' path = [ study, 'analyzed', 'inflections', 'all_times', str(range), record, segmentRef ] path = build_path(path) file = os.path.join(path, sensor + ".csv") if os.path.isfile(file): df = pd.read_csv(file) for colName in df.columns: if 'Unnamed' in str(colName): del df[colName] df = segment_df(segment, df) path = [ study, 'analyzed', 'inflections', 'all_times', str(range), record, segmentRef ] path = build_path(path) file = os.path.join(path, sensor + ".csv") df.to_csv(file)
def find_record_end_from_temp(df_timestamped): """ Find the record end by searching for the dip in the temperature """ max_record_time = float(retrieve_ref('max_record_time')) min_record_time = float(retrieve_ref('min_record_time')) print('df_timestamped = ') print(df_timestamped) timeUnix = list(df_timestamped['timeUnix']) timeMinutes = list(df_timestamped['timeMinutes']) measurements = list(df_timestamped['measurement']) time_end = max_record_time time_end = max(timeMinutes) for i in range(len(measurements) - 12): """ print('i = ' + str(i) + ' len(measurements) = ' + str(len(measurements))) print('timeMinutes = ') print(timeMinutes) print('timeMinutes[i] = ' ) print(str(timeMinutes[i])) print('min_record_time + float(timeMinutes[0] = ' + str(min_record_time + float(timeMinutes[0]))) """ if timeMinutes[i] > min_record_time + float(timeMinutes[0]): # if the temperature drops more than 2 degrees in 3 seconds # end the record if measurements[i] - 2 > measurements[i + 12]: # print('measurement[i] & measurements[i+12] =' + str(measurements[i]), ' & ', str(measurements[i+12])) # print('timeMinutes[i] & timeMinutes[i+12] =' + str(timeMinutes[i]), ' & ', str(timeMinutes[i+12])) time_end = timeMinutes[i] timeEndUnix = timeUnix[i] break else: time_end = timeMinutes[i] timeEndUnix = timeUnix[i] break time_end = float(time_end) print('time_end = ' + str(time_end)) print('timeEndUnix = ' + str(timeEndUnix)) return (time_end)
def find_record_end(): """ timestamp the source """ print("begin timestamp source") study_list = retrieve_ref('study_list') format_types = retrieve_ref('format_types') segment_list = retrieve_ref('segment_list') sensor_list = retrieve_ref('sensor_list') # timestamp temp format_type = 'source' segment = 'All' sensor = 'TEMP' for study in study_list: print('study = ' + str(study)) df_meta = retrieve_meta(study) source_path = list(df_meta['source_path']) df_meta['recordBegin'] = [None] * len(source_path) df_meta['recordEnd'] = [None] * len(source_path) df_meta['fullLength'] = [None] * len(source_path) # summarize what has been found so far record_to_summary(study, 'Records found', len(source_path)) for record in source_path: # source = os.path.join(study, 'source', record, sensor + '.csv') df_timestamped = timestamp_source(study, format_type, segment, record, sensor) # Save the full length of the uneditted record i = df_meta[ df_meta['source_path']== record].index.values[0] recordSplit = record.split('_') df_meta.loc[i, 'recordBegin' ] = int(recordSplit[0]) df_meta.loc[i, 'recordEnd' ] = int(recordSplit[0]) + 60*(max(df_timestamped['timeMinutes'])) df_meta.loc[i, 'fullLength' ] = round(max(df_timestamped['timeMinutes']) , 4) save_meta(study, df_meta) find_temp_end() """
def format_truncate(): """ define each record set the beginning of the record set the end of the record record the length of the record """ print("begin format_truncate") # timestamp and save the source measurements # no truncation # save as their recordName study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') # check each study for study in study_list: df_meta = retrieve_meta(study) recordNames = list(df_meta['recordName']) for record in recordNames: i = df_meta[ df_meta['recordName']== record].index.values[0] recordBegin = df_meta.loc[i, 'recordBegin' ] recordEnd = df_meta.loc[i, 'recordEnd' ] print('i = ' + str(i)) for sensor in sensor_list: format_type, segment = 'source', 'All' source = os.path.join('studies', study, 'formatted', format_type, record, segment, sensor + '.csv') df = pd.read_csv(source) df = df[df['timeUnix'] > recordBegin] df = df[df['timeUnix'] < recordEnd] assert len(list(df['timeUnix'])) > 0, 'during format truncate, dataframe empty' format_type, segment = 'truncate', 'All' path = ['studies', study, 'formatted', format_type, record, segment] path = build_path(path) file = os.path.join(path, sensor + ".csv") df.to_csv(file) print('formatted truncated file = ' + str(file))
def define_record(): """ define the original start time, end time, and length record to the metadata remove any records shorter than the minimum lemgth requirements """ study_list = retrieve_ref('study_list') min_record_time = retrieve_ref('min_record_time') max_record_time = retrieve_ref('max_record_time') # check each study for study in study_list: # retrieve the list of records from the metadata.csv file df_meta = retrieve_meta(study) source_path = list(df_meta['source_path']) df_meta['recordBegin'] = [None] * len(source_path) df_meta['recordEnd'] = [None] * len(source_path) df_meta['recordLength'] = [None] * len(source_path) # define the original length of the record # remove records that are too short for record in source_path: i = df_meta[df_meta['source_path'] == record].index.values[0] originalBegin = df_meta.loc[i, 'originalBegin'] originalEnd = df_meta.loc[i, 'originalEnd'] originalLength = df_meta.loc[i, 'originalLength'] format_type, segment, sensor = 'source', 'All', 'TEMP' source = os.path.join('studies', study, 'formatted', format_type, record, segment, sensor + '.csv') df = pd.read_csv(source) timeEndUnix = find_record_end_using_temp(df) recordBegin = originalBegin recordEnd = timeEndUnix recordLength = (timeEndUnix - recordBegin) / 60 df_meta.loc[i, 'recordBegin'] = recordBegin df_meta.loc[i, 'recordEnd'] = recordEnd df_meta.loc[i, 'recordLength'] = round(recordLength, 4) # save the metadata file save_meta(study, df_meta)
def pair_records(): """ use the record begin time and wearable id to check all studies and records for pairs if found, find the latest common start time and earliest end times save as new columns in the metadata file """ study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') # check each study for study in study_list: df_meta = retrieve_meta(study) recordNames = list(df_meta['recordName']) # create column to list wearableName and coregister records df_meta = add_wearableName(df_meta) df_meta['coregisterRecords'] = recordNames # look for paired records using the unix time stamp for when the record begins for recordA in recordNames: i = df_meta[df_meta['recordName'] == recordA].index.values[0] recordBeginA = df_meta.loc[i, 'recordBegin'] wearableA = df_meta.loc[i, 'wearableName'] for recordB in recordNames: j = df_meta[df_meta['recordName'] == recordB].index.values[0] recordBeginB = df_meta.loc[j, 'recordBegin'] wearableB = df_meta.loc[j, 'wearableName'] if abs(recordBeginA - recordBeginB) < 300: if recordA != recordB: if wearableA != wearableB: print('coregister record found for ' + recordA + ' + ' + recordB) coregisterList = str(recordA + ' ' + recordB) df_meta.loc[i, 'coregisterRecords'] = coregisterList save_meta(study, df_meta)
def define_original(): """ define the original start time, end time, and length record to the metadata remove any records shorter than the minimum lemgth requirements """ study_list = retrieve_ref('study_list') min_record_time = retrieve_ref('min_record_time') # check each study for study in study_list: # retrieve the list of records from the metadata.csv file df_meta = retrieve_meta(study) source_path = list(df_meta['source_path']) # add the columns to define the original record df_meta['recordName'] = source_path df_meta['originalBegin'] = [None] * len(source_path) df_meta['originalEnd'] = [None] * len(source_path) df_meta['originalLength'] = [None] * len(source_path) # define the original length of the record # remove records that are too short for record in source_path: format_type, segment, sensor = 'source', 'All', 'TEMP' df_timestamped = timestamp_source(study, format_type, segment, record, sensor) originalBegin = int(min(list(df_timestamped['timeUnix']))) originalEnd = int(max(list(df_timestamped['timeUnix']))) originalLength = (originalEnd - originalBegin) / 60 i = df_meta[df_meta['source_path'] == record].index.values[0] df_meta.loc[i, 'originalBegin'] = originalBegin df_meta.loc[i, 'originalEnd'] = originalEnd df_meta.loc[i, 'originalLength'] = round(originalLength, 4) # save the metadata file save_meta(study, df_meta) df_meta = df_meta.drop( df_meta[df_meta['originalLength'] < min_record_time].index) save_meta(study, df_meta)
def clean_save(): """ for each record break the record into a PreStudy, Study, and PostStudy period save each segment as a separate .csv """ print("begin clean_save") study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') segment_list = retrieve_ref('segment_list') # check each study for study in study_list: df_meta = retrieve_meta(study) recordNames = list(df_meta['recordName']) for record in recordNames: i = df_meta[ df_meta['recordName']== record].index.values[0] print('i = ' + str(i)) for sensor in sensor_list: format_type, segment = 'coregister', 'All' source = os.path.join('studies', study, 'formatted', format_type, record, segment, sensor + '.csv') df = pd.read_csv(source) df = reset_minutes(segment, df) for colName in list(df.head()): if 'Unnamed' in colName: del df[colName] format_type = 'clean' path = ['studies', study, 'formatted', format_type, record, segment] path = build_path(path) file = os.path.join(path, sensor + ".csv") df.to_csv(file) print('formatted clean file = ' + str(file))
def plot_coefficient_bar(study, record, sensor, degree): """ """ segment_list = retrieve_ref('segment_list') for segment in segment_list: print('bar chart for segment = ' + str(segment)) df_coef = retrieve_regression(study, segment, sensor, degree) print('df_coef = ') print(df_coef) i = df_coef[df_coef['recordName'] == record].index.values[0] print('i = ' + str(i)) colNames = list(df_coef.head()) coeff = df_coef.loc[i, colNames[-1]] print('coeff = ' + str(coeff)) a = pd.isnull(df_coef.loc[i, colNames[-1]]) print('a = ' + str(a)) if a == 'True' or str(df_coef.loc[i, colNames[-1]]) == 'None': print('cell empty a = ' + str(a) + ' coeff = ') print(coeff) continue elif a != 'True': print('cell not empty a = ' + str(a) + ' coeff = ') print(coeff) try: float(coeff) coeff = [float(coeff)] print('try found coeff = ') print(coeff) except: coeff = coeff.split(' ') coeff = [float(x) for x in coeff] print('except found coeff = ') print(coeff) xx = [segment_list.index(segment)] yy = [coeff[0]] wearable_num = 1 colorSegment = retrieve_ref_color_wearable_segment( wearable_num, segment) plt.bar(xx, yy, color=colorSegment) plt.xticks(range(len(segment_list)), segment_list)
def reset_minutes(segment, df): """ reset the minutes to be from 0 """ segment_list = retrieve_ref('segment_list') timePreStudy = retrieve_ref('timePreStudy') timePostStudy = retrieve_ref('timePostStudy') timeMinutes = [] timeMinutesOriginal = list(df['timeMinutes']) for time in timeMinutesOriginal: timeReset = time - timeMinutesOriginal[0] timeMinutes.append(timeReset) df['timeMinutes'] = timeMinutes return(df)
def latex_report(comprehensive_report): """ Create a comprehensive report Compile the text and figures into a single pdf """ # retrieve variables needed to find the figures study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') analysis_list = retrieve_ref('analysis_list') # create a fle that has all the text and figures dst = open(comprehensive_report, 'a') # copy the text of the latex document to the report file src = os.path.join('code', 'latex', 'manuscript' + '.txt') file = open(src, 'r') dst.write(file.read()) # close the latex file, still a .txt file extension dst.close()
def decide_inclusion(): """ Determine inclusion based on length of the record """ print("begin decide inclusion") study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') max_record_time = retrieve_ref('max_record_time') min_record_time = retrieve_ref('min_record_time') for study in study_list: df_meta = retrieve_meta(study) df_meta = df_meta.sort_values(by=['recordLength']) records_found = list(df_meta['source_path']) recordLength = list(df_meta['recordLength']) inclusionList = [] for i in range(len(recordLength)): if recordLength[i] < min_record_time: inclusionList.append('excluded') else: inclusionList.append('included') # save the record length to meta file df_meta['included'] = inclusionList save_meta(study, df_meta) df_meta = df_meta.drop( df_meta[df_meta['included'] == 'excluded'].index) df_meta = df_meta.sort_values(by=['source_path']) save_meta(study, df_meta) print("completed decide inclusion")
def trim_record_to_max(): """ Input: path to a csv Output: list of timestamps """ print("finding the end of the record") study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') max_record_time = retrieve_ref('max_record_time') sensor = 'TEMP' for study in study_list: df_meta = retrieve_meta(study) source_path = list(df_meta['source_path']) df_meta['recordLength'] = [None] * len(source_path) for record in source_path: # timestamped_file = os.path.join(study, 'timestamp', record, sensor + ".csv") timestamped_file = os.path.join(study, 'formatted', 'source', record, 'All', sensor + ".csv") df_timestamped = pd.read_csv(timestamped_file) record_length = max(list(df_timestamped['timeMinutes'])) if record_length > max_record_time: record_length = max_record_time record_length = round(record_length, 4) i = df_meta[df_meta['source_path'] == record].index.values[0] df_meta.loc[i, 'recordLength'] = record_length # save the record length to meta file save_meta(study, df_meta)
def count_inflections(): """ """ print('begin counting inflections') study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') segment_list = retrieve_ref('segment_list') searchRange = retrieve_ref('searchRange') searchRange = [int(x) for x in searchRange] for study in study_list: format_type = 'truncate' clean_path = os.path.join(study, 'formatted', format_type) recordNames = os.listdir(clean_path) for sensor in sensor_list: for record in recordNames: for segment in segment_list: for range in searchRange: if segment == 'All': find_inflections(study, format_type, record, sensor, segment, int(range)) # list_unique_inflections(study, record, sensor, segment, range) plot_inflections(study, record, sensor, segment) segment_inflections(study, record, sensor, segment, range) print('completed counting inflections')
def machineLearningBasic(): """ Statistics """ print("begin machine learning basic") study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') for study in study_list: analyzed_path = os.path.join(study, 'analyzed', 'statistics') analyzed_file = os.path.join(analyzed_path, 'statistics.csv') df = pd.read_csv(analyzed_file) for name in list(df.columns): if 'Unnamed' in name: del df[name] print(df) print("end machine learning basic")
def segment_records(): """ segment records """ print("begin segmenting records") study_list = retrieve_ref('study_list') format_types = retrieve_ref('format_types') segment_list = retrieve_ref('segment_list') sensor_list = retrieve_ref('sensor_list') timePreStudy = retrieve_ref('timePreStudy') timePostStudy = retrieve_ref('timePostStudy') for study in study_list: df_meta = retrieve_meta(study) source_path = list(df_meta['source_path']) for record in source_path: for format in format_types: for sensor in sensor_list: df = retrieve_analyzed(study, format, record, 'All', sensor) for segment in segment_list: if segment == segment_list[0]: timeEnd = timePreStudy df = df.drop(df[df['timeMinutes'] > timeEnd].index) if segment == segment_list[1]: timeBegin = timePreStudy timeEnd = timePostStudy df = df.drop( df[df['timeMinutes'] < timeBegin].index) df = df.drop(df[df['timeMinutes'] > timeEnd].index) if segment == segment_list[2]: timeBegin = timePostStudy df = df.drop( df[df['timeMinutes'] < timeBegin].index) path = os.path.join(study, 'formatted', format, record, segment) if not os.path.isdir(path): os.mkdir(path) file_path = os.path.join(study, 'formatted', format, record, segment, sensor + ".csv") df.to_csv(file_path) print('dataframe saved for segments: ' + str(file_path))
def count_inflections(): """ """ print('begin counting inflections') searchRange = retrieve_ref('searchRange') searchRange.reverse() find_inflections() list_unique_inflections() segment_inflections() plot_inflections() print('completed counting inflections')
def segment_formatted(format_type): """ for each record break the record into a PreStudy, Study, and PostStudy period save each segment as a separate .csv """ print("begin segment_formatted") study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') format_types = retrieve_ref('format_types') segment_list = retrieve_ref('segment_list') timePreStudy = retrieve_ref('timePreStudy') timePostStudy = retrieve_ref('timePostStudy') # check each study for study in study_list: df_meta = retrieve_meta(study) recordNames = list(df_meta['recordName']) for record in recordNames: i = df_meta[df_meta['recordName'] == record].index.values[0] print('i = ' + str(i)) for sensor in sensor_list: for segment in segment_list: format_type, segmentRef = 'clean', 'All' source = os.path.join('studies', study, 'formatted', format_type, record, segmentRef, sensor + '.csv') df = pd.read_csv(source) df_segmented = segment_df(segment, df) path = [ 'studies', study, 'formatted', format_type, record, segment ] path = build_path(path) file = os.path.join(path, sensor + ".csv") df_segmented.to_csv(file) print('segmented clean file = ' + str(file))
def plot_cleaned(): """ check the quality of the clean data by plotting compare source / truncate / coregister / clean """ print("begin plotting the clean data ") study_list = retrieve_ref('study_list') for study in study_list: plot_source(study) plot_acc(study) plot_coregister(study) plot_segment(study) print("completed plotting the clean data")
def find_paired_duration(): """ Find the duration of the record Add the end of the coregistered record in the meta file """ print("begin find_paired_duration") study_list = retrieve_ref('study_list') for study in study_list: df_meta = retrieve_meta(study) # print(df_meta) source_path = list(df_meta['source_path']) # add emptyt column df_meta['recordDuration'] = [None] * len(source_path) for record in source_path: # save that value in the dataframe i = df_meta[df_meta['source_path'] == record].index.values[0] print('i = ' + str(i)) recordBegin = int(df_meta.loc[i, 'recordBegin']) print('recordBegin = ' + str(recordBegin)) recordEnd = int(df_meta.loc[i, 'recordEnd']) print('recordEnd = ' + str(recordEnd)) recordDuration = round((recordEnd - recordBegin) / 60, 4) df_meta.loc[i, 'recordDuration'] = recordDuration print('recordDuration = ' + str(recordDuration)) save_meta(study, df_meta) print('df_meta = ') print(df_meta)
def compile_formatted(format_type): """ collapse the information stored as separate csv into a single csv to make the information easier to plot in javascript/html also to upload less files to github """ print("begin compile_formatted") study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') format_types = retrieve_ref('format_types') segment_list = retrieve_ref('segment_list') timePreStudy = retrieve_ref('timePreStudy') timePostStudy = retrieve_ref('timePostStudy') # check each study for study in study_list: df_meta = retrieve_meta(study) recordNames = list(df_meta['recordName']) for record in recordNames: i = df_meta[ df_meta['recordName']== record].index.values[0] print('i = ' + str(i)) for sensor in sensor_list: for segment in segment_list: format_type, segmentRef = 'clean', 'All' source = os.path.join('studies', study, 'formatted', format_type, record, segmentRef, sensor + '.csv') df = pd.read_csv(source) df_segmented = segment_df(segment, df) path = ['studies', study, 'formatted', format_type, record, segment] path = build_path(path) file = os.path.join(path, sensor + ".csv") df_segmented.to_csv(file) print('segmented clean file = ' + str(file))
def study_figures(comprehensive_report): """ Create a comprehensive report Compile the text and figures into a single pdf """ # retrieve variables needed to find the figures study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') sensor_list.reverse() segment_list = retrieve_ref('segment_list') degree_list = retrieve_ref('degree_list') degree_list = [int(x) for x in degree_list] format_types = ['source', 'truncate', 'coregister', 'clean'] format_types.append('regression0') format_types.append('regression1') format_types.append('regression2') print('format_types = ') print(format_types) for study in study_list: source_path = os.path.join('studies', study, 'formatted', 'truncate') format_folders = os.listdir(source_path) format_folders.sort() for record in format_folders: i = format_folders.index(record) for format_type in format_types: for sensor in sensor_list: source = os.path.join('studies', study, 'plotted', format_type, record, sensor + '.png') if format_type == 'regression0': degree = 0 source = os.path.join('studies', study, 'plotted', 'regression', str(degree), record, sensor + '.png') if format_type == 'regression1': degree = 1 source = os.path.join('studies', study, 'plotted', 'regression', str(degree), record, sensor + '.png') if format_type == 'regression2': degree = 2 source = os.path.join('studies', study, 'plotted', 'regression', str(degree), record, sensor + '.png') if os.path.isfile(source): print('path found: plot_file = ' + str(source)) file = open(comprehensive_report, "a") file.write('\n') file.write('\n') file.write('\\begin{figure}[ht]') file.write('\n') file.write('\includegraphics') file.write('[width=\\textwidth]') file.write('{') file.write(source) file.write('}') file.write('\n') file.write('\\caption{' + str(study) + ' (record ' + str(i) + ' of ' + str(len(format_folders)) + ') ' + sensor + ' ' + format_type + '}' + '\n') file.write('\centering' + ' \n') file.write('\\end{figure}' + ' \n') file.write('\\clearpage' + ' \n' + '\n') file.close()
def multiple_record_check(): """ check the record for multiple records """ print("begin multiple record check") study_list = retrieve_ref('study_list') format_types = retrieve_ref('format_types') segment_list = retrieve_ref('segment_list') sensor_list = retrieve_ref('sensor_list') timePreStudy = retrieve_ref('timePreStudy') timePostStudy = retrieve_ref('timePostStudy') for study in study_list: df_meta = retrieve_meta(study) source_path = list(df_meta['source_path']) source_path_new = list(df_meta['source_path']) timeBegin_list = list(df_meta['recordBegin']) timeEnd_list = list(df_meta['recordEnd']) for record in source_path: i = df_meta[df_meta['source_path'] == record].index.values[0] fullLength = float(df_meta.loc[i, 'fullLength']) truncatedLength = float(df_meta.loc[i, 'truncatedLength']) format_type = 'source' segment = 'All' sensor = 'TEMP' df = retrieve_analyzed(study, format_type, record, segment, sensor) new_record_list = [] if fullLength > truncatedLength + 30: df = df.drop(df[df['timeMinutes'] < truncatedLength + 5].index) # print('df = ') # print(df) timeUnix = list(df['timeUnix']) timeMinutes = list(df['timeMinutes']) measurements = list(df['measurement']) for i in range(len(measurements)): if i < len(measurements) - 30: if measurements[i] + 3 < measurements[i + 28]: print('new record found') df = df.drop( df[df['timeMinutes'] < timeMinutes[i + 28]].index) time_end = find_record_end_from_temp(df) print('time_end = ' + str(time_end)) df = df.drop( df[df['timeMinutes'] > time_end].index) # print('df = ') # print(df) wearable_name = record.split('_') wearable_name = wearable_name[1] recordName = str( str(int(timeUnix[0])) + '_' + str(wearable_name)) print('recordName = ' + str(recordName)) new_record_list.append(recordName) source_path_new.append(record) timeBegin_list.append(int(timeUnix[0])) print('timeUnix[0:20] = ') print(timeUnix[0:20]) timeEnd = min(timeUnix) print('timeEnd = ' + str(timeEnd)) timeEnd = min(timeUnix) + 60 print('timeEnd = ' + str(timeEnd)) timeEnd_list.append(int(timeEnd)) break df_meta_new = pd.DataFrame() df_meta_new['source_path'] = source_path_new df_meta_new['recordBegin'] = timeBegin_list df_meta_new['recordEnd'] = timeEnd_list save_meta(study, df_meta_new)