def find_paired_duration(): """ Find the duration of the record Add the end of the coregistered record in the meta file """ print("begin find_paired_duration") study_list = retrieve_ref('study_list') for study in study_list: df_meta = retrieve_meta(study) # print(df_meta) source_path = list(df_meta['source_path']) # add emptyt column df_meta['recordDuration'] = [None] * len(source_path) for record in source_path: # save that value in the dataframe i = df_meta[df_meta['source_path'] == record].index.values[0] print('i = ' + str(i)) recordBegin = int(df_meta.loc[i, 'recordBegin']) print('recordBegin = ' + str(recordBegin)) recordEnd = int(df_meta.loc[i, 'recordEnd']) print('recordEnd = ' + str(recordEnd)) recordDuration = round((recordEnd - recordBegin) / 60, 4) df_meta.loc[i, 'recordDuration'] = recordDuration print('recordDuration = ' + str(recordDuration)) save_meta(study, df_meta) print('df_meta = ') print(df_meta)
def trim_record_to_max(): """ Input: path to a csv Output: list of timestamps """ print("finding the end of the record") study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') max_record_time = retrieve_ref('max_record_time') sensor = 'TEMP' for study in study_list: df_meta = retrieve_meta(study) source_path = list(df_meta['source_path']) df_meta['recordLength'] = [None] * len(source_path) for record in source_path: # timestamped_file = os.path.join(study, 'timestamp', record, sensor + ".csv") timestamped_file = os.path.join(study, 'formatted', 'source', record, 'All', sensor + ".csv") df_timestamped = pd.read_csv(timestamped_file) record_length = max(list(df_timestamped['timeMinutes'])) if record_length > max_record_time: record_length = max_record_time record_length = round(record_length, 4) i = df_meta[df_meta['source_path'] == record].index.values[0] df_meta.loc[i, 'recordLength'] = record_length # save the record length to meta file save_meta(study, df_meta)
def decide_inclusion(): """ Determine inclusion based on length of the record """ print("begin decide inclusion") study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') max_record_time = retrieve_ref('max_record_time') min_record_time = retrieve_ref('min_record_time') for study in study_list: df_meta = retrieve_meta(study) df_meta = df_meta.sort_values(by=['recordLength']) records_found = list(df_meta['source_path']) recordLength = list(df_meta['recordLength']) inclusionList = [] for i in range(len(recordLength)): if recordLength[i] < min_record_time: inclusionList.append('excluded') else: inclusionList.append('included') # save the record length to meta file df_meta['included'] = inclusionList save_meta(study, df_meta) df_meta = df_meta.drop( df_meta[df_meta['included'] == 'excluded'].index) df_meta = df_meta.sort_values(by=['source_path']) save_meta(study, df_meta) print("completed decide inclusion")
def multiple_record_check(): """ check the record for multiple records """ print("begin multiple record check") study_list = retrieve_ref('study_list') format_types = retrieve_ref('format_types') segment_list = retrieve_ref('segment_list') sensor_list = retrieve_ref('sensor_list') timePreStudy = retrieve_ref('timePreStudy') timePostStudy = retrieve_ref('timePostStudy') for study in study_list: df_meta = retrieve_meta(study) source_path = list(df_meta['source_path']) source_path_new = list(df_meta['source_path']) timeBegin_list = list(df_meta['recordBegin']) timeEnd_list = list(df_meta['recordEnd']) for record in source_path: i = df_meta[df_meta['source_path'] == record].index.values[0] fullLength = float(df_meta.loc[i, 'fullLength']) truncatedLength = float(df_meta.loc[i, 'truncatedLength']) format_type = 'source' segment = 'All' sensor = 'TEMP' df = retrieve_analyzed(study, format_type, record, segment, sensor) new_record_list = [] if fullLength > truncatedLength + 30: df = df.drop(df[df['timeMinutes'] < truncatedLength + 5].index) # print('df = ') # print(df) timeUnix = list(df['timeUnix']) timeMinutes = list(df['timeMinutes']) measurements = list(df['measurement']) for i in range(len(measurements)): if i < len(measurements) - 30: if measurements[i] + 3 < measurements[i + 28]: print('new record found') df = df.drop( df[df['timeMinutes'] < timeMinutes[i + 28]].index) time_end = find_record_end_from_temp(df) print('time_end = ' + str(time_end)) df = df.drop( df[df['timeMinutes'] > time_end].index) # print('df = ') # print(df) wearable_name = record.split('_') wearable_name = wearable_name[1] recordName = str( str(int(timeUnix[0])) + '_' + str(wearable_name)) print('recordName = ' + str(recordName)) new_record_list.append(recordName) source_path_new.append(record) timeBegin_list.append(int(timeUnix[0])) print('timeUnix[0:20] = ') print(timeUnix[0:20]) timeEnd = min(timeUnix) print('timeEnd = ' + str(timeEnd)) timeEnd = min(timeUnix) + 60 print('timeEnd = ' + str(timeEnd)) timeEnd_list.append(int(timeEnd)) break df_meta_new = pd.DataFrame() df_meta_new['source_path'] = source_path_new df_meta_new['recordBegin'] = timeBegin_list df_meta_new['recordEnd'] = timeEnd_list save_meta(study, df_meta_new)
def find_paired_end(): """ Find the end of the paired record Add the end of the coregistered record in the meta file """ print("begin find_paired_end") study_list = retrieve_ref('study_list') format_type = 'truncate' sensor = 'TEMP' segment = 'All' for study in study_list: df_meta = retrieve_meta(study) # print(df_meta) source_path = list(df_meta['source_path']) # recordCoregistered = list(df_meta['recordCoregistered']) df_meta['recordEnd'] = [None] * len(source_path) # there could be two wearables - or one # one wearable was turned off before the other # check if the participant record has one or two wearables # if there are two find the earlier stop time and save to meta file for record in source_path: # find the max value in the "timeUnix' column of analyzed data" df = retrieve_analyzed(study, format_type, record, segment, sensor) timeEndRecord = max(list(df['timeUnix'])) # save that value in the dataframe i = df_meta[df_meta['source_path'] == record].index.values[0] df_meta.loc[i, 'recordEnd'] = int(timeEndRecord) # print('i = ' + str(i)) recordCoregistered = df_meta.loc[i, 'recordCoregistered'] # print('recordCoregistered = ') # print(recordCoregistered) if pd.isnull(df_meta.loc[i, 'recordCoregistered']): print('no pair found') elif len(df_meta.loc[i, 'recordCoregistered']) > 3 + len(record): recordCoregisteredStr = str(df_meta.loc[i, 'recordCoregistered']) recordCoregisteredStrList = recordCoregisteredStr.split(' ') timeEndRecord = [] for recordCoregisteredStr in recordCoregisteredStrList: df = retrieve_analyzed(study, analysis_type, recordCoregisteredStr, sensor) timeEndRecord.append(max(list(df['timeUnix']))) df_meta.loc[i, 'recordEnd'] = int(min(timeEndRecord)) save_meta(study, df_meta) print('df_meta = ') print(df_meta)
def find_pairs(): """ Pair up records Note pairs in the meta file """ print("begin find_pairs") study_list = retrieve_ref('study_list') for study in study_list: df_meta = retrieve_meta(study) print(df_meta) source_path = list(df_meta['source_path']) df_meta['pairedRecord'] = [None] * len(source_path) df_meta['recordCoregistered'] = source_path df_meta['recordBegin'] = [None] * len(source_path) # df_meta['recordEnd'] = [None] * len(source_path) df_meta['wearableName'] = [None] * len(source_path) # sort dataframe by the wearable name for record in source_path: recordList = record.split('_') recordWearable = str(recordList[1]) i = df_meta[df_meta['source_path'] == record].index.values[0] df_meta.loc[i, 'wearableName'] = recordWearable df_meta = df_meta.sort_values(by='wearableName') for recordA in source_path: recordAList = recordA.split('_') recordABegin = int(recordAList[0]) recordAWearable = str(recordAList[1]) # print('recordAList = ') # print(recordAList) # print('recordABegin = ') # print(recordABegin) # print('recordAWearable = ') # print(recordAWearable) recordCoregistered = str(recordA) i = df_meta[df_meta['source_path'] == recordA].index.values[0] # df_meta.loc[i, 'pairedRecord' ] = str(recordA) # df_meta.loc[i, 'recordCoregistered' ] = str(recordCoregistered) df_meta.loc[i, 'recordBegin'] = recordABegin recordList = [] recordList.append(recordA) recordBegin = [recordABegin] for recordB in source_path: recordBList = recordB.split('_') recordBBegin = int(recordBList[0]) recordBWearable = str(recordBList[1]) if abs(recordABegin - recordBBegin ) < 300 and recordAWearable != recordBWearable: # print('pair found: ') # print('recordBList = ') # print(recordBList) # print('recordBBegin = ') # print(recordBBegin) # print('recordBWearable = ') # print(recordBWearable) recordList = list([recordA, recordB]) recordBegin = list([recordABegin, recordBBegin]) recordWearable = list([recordAWearable, recordBWearable]) # print('recordList = ') # print(recordList) # print('recordBegin = ') # print(recordBegin) # print('recordWearable = ') # print(recordWearable) recordBegin = max(recordBegin) recordCoregistered = str(recordA) + ' ' + str(recordB) df_meta.loc[i, 'pairedRecord'] = str(recordB) df_meta.loc[i, 'recordCoregistered'] = str(recordCoregistered) df_meta.loc[i, 'recordBegin'] = recordBegin save_meta(study, df_meta) # print('df_meta = ') # print(df_meta) # drop duplicated entries df_meta = df_meta.drop_duplicates('recordBegin', keep='last') df_meta = df_meta.sort_values(by='recordBegin') del df_meta['wearableName'] save_meta(study, df_meta) print('df_meta = ') print(df_meta)