def segment_inflections(): """ """ study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') segment_list = retrieve_ref('segment_list') searchRange = retrieve_ref('searchRange') for study in study_list: format_type = 'clean' clean_path = os.path.join(study, 'formatted', format_type) recordNames = os.listdir(clean_path) for sensor in sensor_list: for record in recordNames: for range in searchRange: for segment in segment_list: if segment == 'All': continue segmentRef = 'All' path = [ study, 'analyzed', 'inflections', 'all_times', str(range), record, segmentRef ] path = build_path(path) file = os.path.join(path, sensor + ".csv") if os.path.isfile(file): df = pd.read_csv(file) for colName in df.columns: if 'Unnamed' in str(colName): del df[colName] df = segment_df(segment, df) path = [ study, 'analyzed', 'inflections', 'all_times', str(range), record, segmentRef ] path = build_path(path) file = os.path.join(path, sensor + ".csv") df.to_csv(file)
def save_meta(study, df): """ save the metadata to folder save a copy to the archive folder in the metadata folder """ print("begin saving metadata") # remove unnamed columns created from reading in the csv col_names = df.head() for name in col_names: if 'Unnamed' in name: del df[name] # metadata_path = os.path.join('studies', study, 'meta') metadata_path = build_path(['studies', study, 'meta']) metadata_file = os.path.join(metadata_path, 'metadata.csv') # print('metadata_file = ' + str(metadata_file)) df.to_csv(metadata_file) # metadata_path = os.path.join('studies', study, 'meta', 'archive') metadata_path = build_path(['studies', study, 'meta', 'archive']) col_names = list(df.columns) # print('col_names ') # print(col_names) # print('len(col_names) = ' + str(len(col_names))) if len(col_names) == 1: print('metadata archive deleted. ') meta_files_archived = os.listdir(metadata_path) for file in meta_files_archived: file = os.path.join(metadata_path , file) # print('file = ' + str(file)) os.remove(file) if not os.path.isdir(metadata_path): os.mkdir(metadata_path) meta_files_archived = os.listdir(metadata_path) iteration = int(len(meta_files_archived))+1 # print('iteration = ' + str(iteration)) metadata_file = os.path.join(metadata_path, 'metadata' + '_' + str(iteration) + '.csv') # print('metadata_file = ' + str(metadata_file)) df.to_csv(metadata_file) print("completed saving metadata")
def segment_inflections(study, record, sensor, segment, range): """ """ segmentRef = 'All' path = [study, 'analyzed', 'inflections', 'all_times', str(range), record, segmentRef] file = os.path.join(*path, sensor + ".csv") if not os.path.isfile(file): return df = pd.read_csv(file) for colName in df.columns: if 'Unnamed' in str(colName): del df[colName] df = segment_df(segment, df) path = [study, 'analyzed', 'inflections', 'all_times', str(range), record, segmentRef] path = build_path(path) file = os.path.join(path, sensor + ".csv") df.to_csv(file) print('segmented inflection file saved - ' + file)
def timestamp_source(study, format_type, segment, record, sensor): """ Input: path to a csv Output: list of timestamps """ # read in the source source = os.path.join('studies', study, 'source', record, sensor + '.csv') df_source = pd.read_csv(source) # print('df_source = ') # print(df_source) df_timestamped = build_timestamps(df_source, sensor) path = [ 'studies', study, 'formatted', str(format_type), str(record), str(segment) ] path = build_path(path) file = os.path.join(path, sensor + ".csv") # print('timestamped_file = ' + str(timestamped_file)) df_timestamped.to_csv(file) # print('timestamped saved: ' + str(file)) return (df_timestamped)
def plot_regression(): """ """ print('plotting regression') study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') segment_list = retrieve_ref('segment_list') degree_list = retrieve_ref('degree_list') degree_list = [int(x) for x in degree_list] for study in study_list: format_type = 'clean' clean_path = os.path.join('studies', study, 'formatted', format_type) recordNames = os.listdir(clean_path) for sensor in sensor_list: for degree in degree_list: for record in recordNames: row_num, col_num, plot_num = len(segment_list) + 2, 1, 0 row_width_mulp, col_width_mulp = 14, 5 plot_width, plot_height = col_num * row_width_mulp, row_num * col_width_mulp plt.figure(figsize=(plot_width, plot_height)) for segment in segment_list: plot_num += 1 plt.subplot(row_num, col_num, plot_num) complete = plot_regression_segment( study, record, segment, sensor, degree) plot_num += 1 plt.subplot(row_num, col_num, plot_num) for segment in segment_list[:-1]: complete = plot_regression_segment( study, record, segment, sensor, degree) plt.title(' ') plot_num += 1 plt.subplot(row_num, col_num, plot_num) complete = plot_coefficient_bar(study, record, sensor, degree) plt.title(' ') path = [ 'studies', study, 'plotted', 'regression', str(degree), record ] path = build_path(path) file = os.path.join(path, sensor + ".png") plt.savefig(file, bbox_inches='tight') print('plotted regression for ' + file)
def format_source(): """ define each record set the beginning of the record set the end of the record record the length of the record """ print("begin format_source") # timestamp and save the source measurements # no truncation # save as their recordName study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') # check each study for study in study_list: df_meta = retrieve_meta(study) print(df_meta) recordNames = list(df_meta['recordName']) for record in recordNames: i = df_meta[df_meta['recordName'] == record].index.values[0] recordSource = df_meta.loc[i, 'source_path'] recordBegin = df_meta.loc[i, 'recordBegin'] recordEnd = df_meta.loc[i, 'recordEnd'] print('i = ' + str(i)) print('record = ' + str(record)) print('recordSource = ' + str(recordSource)) for sensor in sensor_list: format_type, segment = 'source', 'All' source = os.path.join('studies', study, format_type, recordSource, sensor + '.csv') df_source = pd.read_csv(source) df_timestamped = build_timestamps(df_source, sensor) # df_timestamped = df_timestamped[df_timestamped['timeUnix'] > recordBegin] # df_timestamped = df_timestamped[df_timestamped['timeUnix'] < recordEnd] path = [ 'studies', study, 'formatted', format_type, record, segment ] path = build_path(path) file = os.path.join(path, sensor + ".csv") df_timestamped.to_csv(file) print('formatted source file = ' + str(file))
def segment_formatted(format_type): """ for each record break the record into a PreStudy, Study, and PostStudy period save each segment as a separate .csv """ print("begin segment_formatted") study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') format_types = retrieve_ref('format_types') segment_list = retrieve_ref('segment_list') timePreStudy = retrieve_ref('timePreStudy') timePostStudy = retrieve_ref('timePostStudy') # check each study for study in study_list: df_meta = retrieve_meta(study) recordNames = list(df_meta['recordName']) for record in recordNames: i = df_meta[df_meta['recordName'] == record].index.values[0] print('i = ' + str(i)) for sensor in sensor_list: for segment in segment_list: format_type, segmentRef = 'clean', 'All' source = os.path.join('studies', study, 'formatted', format_type, record, segmentRef, sensor + '.csv') df = pd.read_csv(source) df_segmented = segment_df(segment, df) path = [ 'studies', study, 'formatted', format_type, record, segment ] path = build_path(path) file = os.path.join(path, sensor + ".csv") df_segmented.to_csv(file) print('segmented clean file = ' + str(file))
def format_truncate(): """ define each record set the beginning of the record set the end of the record record the length of the record """ print("begin format_truncate") # timestamp and save the source measurements # no truncation # save as their recordName study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') # check each study for study in study_list: df_meta = retrieve_meta(study) recordNames = list(df_meta['recordName']) for record in recordNames: i = df_meta[ df_meta['recordName']== record].index.values[0] recordBegin = df_meta.loc[i, 'recordBegin' ] recordEnd = df_meta.loc[i, 'recordEnd' ] print('i = ' + str(i)) for sensor in sensor_list: format_type, segment = 'source', 'All' source = os.path.join('studies', study, 'formatted', format_type, record, segment, sensor + '.csv') df = pd.read_csv(source) df = df[df['timeUnix'] > recordBegin] df = df[df['timeUnix'] < recordEnd] assert len(list(df['timeUnix'])) > 0, 'during format truncate, dataframe empty' format_type, segment = 'truncate', 'All' path = ['studies', study, 'formatted', format_type, record, segment] path = build_path(path) file = os.path.join(path, sensor + ".csv") df.to_csv(file) print('formatted truncated file = ' + str(file))
def retrieve_regression(study, segment, sensor, degree): """ """ # print('retrieving regression. ') path = ['studies', study, 'analyzed', 'regression', str(degree), segment] path = build_path(path) file = os.path.join(path, sensor + '.csv') df = pd.read_csv(file) colNames = list(df.head()) for colName in colNames: if 'Unnamed' in colName: del df[colName] colNames = list(df.head()) if pd.isnull(df.loc[1, colNames[-1]]) is True or pd.isnull(df.loc[1, colNames[-2]]): df['coefficients'] = [None]*len(list(df['recordName'])) for record in list(df['recordName']): i = df[df['recordName'] == record].index.values[0] for colName in colNames: if "record" not in colName: if pd.isnull(df.loc[i, colName]) is False: valueCol = df.loc[i, colName] df.loc[i,'coefficients'] = valueCol del df[colNames[-1]] del df[colNames[-2]] print('retrieve_regression df = ') print(df) return(df)
def compile_formatted(format_type): """ collapse the information stored as separate csv into a single csv to make the information easier to plot in javascript/html also to upload less files to github """ print("begin compile_formatted") study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') format_types = retrieve_ref('format_types') segment_list = retrieve_ref('segment_list') timePreStudy = retrieve_ref('timePreStudy') timePostStudy = retrieve_ref('timePostStudy') # check each study for study in study_list: df_meta = retrieve_meta(study) recordNames = list(df_meta['recordName']) for record in recordNames: i = df_meta[ df_meta['recordName']== record].index.values[0] print('i = ' + str(i)) for sensor in sensor_list: for segment in segment_list: format_type, segmentRef = 'clean', 'All' source = os.path.join('studies', study, 'formatted', format_type, record, segmentRef, sensor + '.csv') df = pd.read_csv(source) df_segmented = segment_df(segment, df) path = ['studies', study, 'formatted', format_type, record, segment] path = build_path(path) file = os.path.join(path, sensor + ".csv") df_segmented.to_csv(file) print('segmented clean file = ' + str(file))
def clean_save(): """ for each record break the record into a PreStudy, Study, and PostStudy period save each segment as a separate .csv """ print("begin clean_save") study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') segment_list = retrieve_ref('segment_list') # check each study for study in study_list: df_meta = retrieve_meta(study) recordNames = list(df_meta['recordName']) for record in recordNames: i = df_meta[ df_meta['recordName']== record].index.values[0] print('i = ' + str(i)) for sensor in sensor_list: format_type, segment = 'coregister', 'All' source = os.path.join('studies', study, 'formatted', format_type, record, segment, sensor + '.csv') df = pd.read_csv(source) df = reset_minutes(segment, df) for colName in list(df.head()): if 'Unnamed' in colName: del df[colName] format_type = 'clean' path = ['studies', study, 'formatted', format_type, record, segment] path = build_path(path) file = os.path.join(path, sensor + ".csv") df.to_csv(file) print('formatted clean file = ' + str(file))
def segment_inflections(study, record, sensor, segment, range): """ """ segmentRef = 'All' path = [ study, 'analyzed', 'inflections', 'all_times', str(range), record, segmentRef ] file = os.path.join(*path, sensor + ".csv") if not os.path.isfile(file): return df = pd.read_csv(file) df_segmented = segment_df(segment, df) path = [ study, 'analyzed', 'inflections', 'all_times', str(range), record, segment ] path = build_path(path) file = os.path.join(path, sensor + ".csv") df_segmented.to_csv(file)
def plot_source(study): """ """ study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') segment_list = retrieve_ref('segment_list') format_type = 'source' source_path = os.path.join('studies', study, 'formatted', format_type) format_folders = os.listdir(source_path) for record in format_folders: row_num, col_num, plot_num = len(sensor_list), 1, 0 row_width_mulp, col_width_mulp = 14, 5 plot_width, plot_height = col_num * row_width_mulp, row_num * col_width_mulp plt.figure(figsize=(plot_width, plot_height)) for sensor in sensor_list: plot_num += 1 plt.subplot(row_num, col_num, plot_num) for segment in segment_list: format_types = ['source', 'truncate'] for format_type in format_types: source = os.path.join('studies', study, 'formatted', format_type, record, segment, sensor + '.csv') if os.path.isfile(source): print('source = ' + source) df = pd.read_csv(source) colNames = list(df.head()) print('colNames = ') print(colNames) for colName in colNames: if str('measurement') in str(colName): colNameSplit = colName.split('_') labelName = format_type print('labelName = ' + labelName) valueColor = retrieve_ref_color(labelName) plt.scatter(df['timeUnix'], df[colName], color=valueColor, label=labelName) plt.xlabel('time Unix') sensor_unit = retrieve_sensor_unit(sensor) plt.ylabel(sensor + ' ' + sensor_unit) plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2), loc='upper left') path = ['studies', study, 'plotted', format_type, record] path = build_path(path) file = os.path.join(path, sensor + ".png") plt.savefig(file, bbox_inches='tight')
def plot_coregister(study): """ """ segment = 'All' study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') segment_list = retrieve_ref('segment_list') segment_list.reverse() format_type = 'clean' source_path = os.path.join('studies', study, 'formatted', format_type) format_folders = os.listdir(source_path) format_types = ['clean'] for record in format_folders: row_num, col_num, plot_num = len(sensor_list), 1, 0 row_width_mulp, col_width_mulp = 14, 5 plot_width, plot_height = col_num * row_width_mulp, row_num * col_width_mulp plt.figure(figsize=(plot_width, plot_height)) for sensor in sensor_list: plot_num += 1 plt.subplot(row_num, col_num, plot_num) for format_type in format_types: for segment in segment_list: source = os.path.join('studies', study, 'formatted', format_type, record, segment, sensor + '.csv') if os.path.isfile(source): print('source = ' + source) df = pd.read_csv(source) colNames = list(df.head()) print('colNames = ') print(colNames) for colName in colNames: if str('measurement') in str(colName): colNameSplit = colName.split('_') labelName = str(format_type + ' ' + colNameSplit[0]) print('labelName = ' + labelName) index_col = df.columns.get_loc(colName) wearable_num = len(colNames) - index_col print('wearable_num = ' + str(wearable_num)) colorWearableSegment = retrieve_ref_color_wearable_segment( wearable_num, segment) plt.scatter(df['timeMinutes'], df[colName], color=colorWearableSegment, label=labelName) plt.xlabel('Record Time (minutes)') sensor_unit = retrieve_sensor_unit(sensor) plt.ylabel(sensor + ' ' + sensor_unit) plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2), loc='upper left') path = ['studies', study, 'plotted', 'clean', record] path = build_path(path) file = os.path.join(path, sensor + ".png") plt.savefig(file, bbox_inches='tight')
def find_inflections(study, record, sensor, segment, range): """ Break each set of measurements into a subset - a range of ~30-120 seconds Use polyfit to find the best fit second order polynomial Find the inflection point of the best fit polynomial If the polyfit inflection point is very close to the median time point in the record An inflection is found """ # check if the inflections have already been found path = [study, 'analyzed', 'inflections', 'all_times', str(range), record, segment] pathJoined = os.path.join(*path) file = os.path.join(pathJoined, sensor + ".csv") if os.path.isfile(file): print('file found, not recalculated.') return print('finding inflections to build : ' + file) # retrieve the timestamped measurements for the study - record - sensor - segment format_type = 'truncate' source = os.path.join(study, 'formatted', format_type, record, segment, sensor + '.csv') print('source = ' + source) df = pd.read_csv(source) for colName in df.columns: # remove extra columns because the dataframe will be saved if 'Unnamed' in str(colName): del df[colName] # save the timestamps as a list elif 'Minutes' in str(colName): timeMinutes = list(df[colName]) # find the measurement elif 'meas' in colName: # add new columns to the dataframe to save the new variables newColNames = ['inflectionDecision', 'inflectionLocation', 'polyfitCoefficients', 'polyfitEquation', 'polyfitSolution', 'derivativeEquation', 'derivativeSolution'] colNameSplit = colName.split('_') print('colNameSplit[0] = ' + colNameSplit[0]) for suffix in newColNames: label = str(colNameSplit[0] + '_' + suffix) print('label = ' + label) if label not in df.columns: df[label] = [None]*len((list(df['timeMinutes']))) df['timeBegin'] = [None]*len((list(df['timeMinutes']))) df['timeEnd'] = [None]*len((list(df['timeMinutes']))) for timeMinute in timeMinutes: i = df[ df['timeMinutes']== timeMinute].index.values[0] timeDif = (float(df.loc[2,'timeMinutes']) - float(df.loc[1,'timeMinutes'])) timeTolerance = timeDif/2 iRange = int(range/60*1/(timeDif)) # print('iRange = ' + str(iRange)) if len(list(df['timeMinutes'])) - i <= iRange+2: continue timeMedian = df.loc[int(i+iRange/2), 'timeMinutes'] timeBegin = df.loc[int(i), 'timeMinutes'] timeEnd = df.loc[int(i+iRange), 'timeMinutes'] # print('timeMedian = ' + str(timeMedian) + ' timeBegin = ' + str(timeBegin) + ' timeEnd = ' + str(timeEnd)) # print('range = ' + str(range/60) + ' timeEnd-timeBegin = ' + str(timeEnd-timeBegin) + ' % = ' + str(range/60/(timeEnd-timeBegin))) df_truncate = df[df['timeMinutes'] >= timeMinute] df_truncate = df_truncate[df_truncate['timeMinutes'] <= timeMinute + range/60] # df_truncate = df[df['timeMinutes'] >= timeMinute & df_truncate['timeMinutes'] <= timeMinute + range/60] timeTruncate = list(df_truncate['timeMinutes']) df.loc[int(i+iRange/2), 'timeBegin'] = min(timeTruncate) df.loc[int(i+iRange/2), 'timeEnd'] = max(timeTruncate) measTruncate = list(df_truncate[colName]) coef = np.polyfit(timeTruncate, measTruncate, 2) # coef = [float(x) for x in coef] x = sym.Symbol('x') f = coef[0]*x*x+coef[1]*x+coef[2] # print('f = ') # print(f) dff = sym.diff(f,x) # print('dff = ') # print(dff) solf = sym.solve(f) soldf = sym.solve(dff) soldf = soldf[0] label = str(colNameSplit[0] + '_' + 'inflectionDecision') df.loc[int(i+iRange/2), label] = 'No' label = str(colNameSplit[0] + '_' + 'inflectionLocation') df.loc[int(i+iRange/2), label] = timeMinute label = str(colNameSplit[0] + '_' + 'polyfitCoefficients') df.loc[int(i+iRange/2), label] = str(''.join([str(x) for x in coef])) label = str(colNameSplit[0] + '_' + 'polyfitEquation') df.loc[int(i+iRange/2), label] = str(f) label = str(colNameSplit[0] + '_' + 'polyfitSolution') df.loc[int(i+iRange/2), label] = str(''.join([str(x) for x in solf])) label = str(colNameSplit[0] + '_' + 'derivativeEquation') df.loc[int(i+iRange/2), label] = str(dff) label = str(colNameSplit[0] + '_' + 'derivativeSolution') df.loc[int(i+iRange/2), label] = str(soldf) if soldf < timeMedian + timeTolerance: if soldf > timeMedian - timeTolerance: print('inflection found at time = ' + str(soldf)) label = str(colNameSplit[0] + '_' + 'inflectionDecision') df.loc[int(i+iRange/2), label] = 'Yes' path = build_path(path) file = os.path.join(path, sensor + ".csv") df.to_csv(file) print('inflection list saved : ' + file) return(file)
def plot_inflections(): """ """ study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') segment_list = retrieve_ref('segment_list') searchRange = retrieve_ref('searchRange') for study in study_list: for sensor in sensor_list: format_type = 'clean' clean_path = os.path.join(study, 'formatted', format_type) recordNames = os.listdir(clean_path) for sensor in sensor_list: for record in recordNames: row_num, col_num, plot_num = len(searchRange), 2, 0 row_width_mulp, col_width_mulp = 7, 5 plot_width, plot_height = col_num * row_width_mulp, row_num * col_width_mulp plt.figure(figsize=(plot_width, plot_height)) for range in searchRange: plot_num += 1 plt.subplot(row_num, col_num, plot_num) format_type = 'clean' segment = 'All' path = [ study, 'analyzed', 'inflections', 'all_times', str(range), record, segment ] path = build_path(path) file = os.path.join(path, sensor + ".csv") if os.path.isfile(file): source = os.path.join(study, 'formatted', format_type, record, segment, sensor + '.csv') print('source = ' + source) df = pd.read_csv(source) for colName in df.columns: if 'timeMinutes' in colName: timeMinutes = list(df[colName]) if 'meas' in colName: measList = list(df[colName]) measMin = min(measList) measMax = max(measList) plt.scatter(timeMinutes, measList, label=str(colName)) df = pd.read_csv(file) for colName in df.columns: if 'inflection' in colName: df = df.drop( df[(df[colName] != 'Yes')].index) timeInflections = list(df['timeMinutes']) for time in timeInflections: xx = np.linspace(time, time, 100) yy = np.linspace(measMin, measMax, 100) plt.plot(xx, yy, color=[0, .9, .6]) plt.xlabel('time Unix') sensor_unit = retrieve_sensor_unit(sensor) plt.ylabel(sensor + ' ' + sensor_unit) # plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2), loc='upper left') plt.title('Record = ' + str(record) + ' Range = ' + str(range) + ' seconds') path = [ study, 'plotted', 'inflection', 'each_record', record ] path = build_path(path) file = os.path.join(path, sensor + ".png") plt.savefig(file, bbox_inches='tight') print('inflection plot saved ' + file)
def analyze_mean(): """ analyze records """ print("begin statistical analysis of records") study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') segment_list = retrieve_ref('segment_list') for study in study_list: df_meta = retrieve_meta(study) format_type = 'clean' clean_path = os.path.join('studies', study, 'formatted', format_type) recordNames = os.listdir(clean_path) for sensor in sensor_list: for segment in segment_list: df_mean = pd.DataFrame() df_mean['recordName'] = recordNames i = df_meta[ df_meta['recordName']== recordNames[0]].index.values[0] coregisterRecords = df_meta.loc[i, 'coregisterRecords' ] if len(coregisterRecords) > 2*len(recordNames[0]): colNameSplit = colName.split('_') wearableName = colNameSplit[0] newColName = str(wearableName + '_mean') meanColName = newColName df_mean[newColName] = [None] * len(recordNames) recordRef = recordNames[0] source = os.path.join('studies', study, 'formatted', format_type, recordRef, segment, sensor + '.csv') df = pd.read_csv(source) colNames = list(df.head()) for colName in colNames: if str('meas') in str(colName): colNameSplit = colName.split('_') wearableName = colNameSplit[0] newColName = str(wearableName + '_mean') df_mean[newColName] = [None] * len(recordNames) meanColName = newColName else: newColName = 'mean' df_mean[newColName] = [None] * len(recordNames) for record in recordNames: i = df_meta[ df_meta['recordName']== record].index.values[0] coregisterRecords = df_meta.loc[i, 'coregisterRecords' ] source = os.path.join('studies', study, 'formatted', format_type, record, segment, sensor + '.csv') print('source = ' + str(source)) df = pd.read_csv(source) colNames = list(df.head()) for colName in colNames: if str('measurement') in str(colName): measurement = list(df[colName]) avg = np.mean(measurement) # print('measurement = ') # print(measurement) if len(coregisterRecords) > 2*len(record): colNameSplit = colName.split('_') wearableName = colNameSplit[0] newColName = str(wearableName + '_mean') meanColName = newColName else: newColName = 'mean' j = df_mean[ df_mean['recordName']== record].index.values[0] df_mean.loc[j, newColName ] = round(avg, 4) print('j = ' + str(j) + ' mean = ' + str(avg)) path = ['studies', study, 'analyzed', 'statistics', 'mean', segment] path = build_path(path) file = os.path.join(path, sensor + ".csv") df_mean.to_csv(file) print('mean file saved: ' + file) print("completed statistical analysis of records")
def find_inflections(path, file, study, format_type, record, sensor, segment, range): """ """ source = os.path.join(study, 'formatted', format_type, record, segment, sensor + '.csv') print('source = ' + source) df = pd.read_csv(source) print('df = ') print(df) for colName in df.columns: # remove extra columns because the dataframe will be saved if 'Unnamed' in str(colName): del df[colName] # save the timestamps as a list elif 'Minutes' in str(colName): timeMinutes = list(df[colName]) elif 'meas' in colName: # add new columns to the dataframe to save the new variables newColNames = [ 'inflectionDecision', 'inflectionLocation', 'polyfitCoefficients', 'polyfitEquation', 'polyfitSolution', 'derivativeEquation', 'derivativeSolution' ] colNameSplit = colName.split('_') print('colNameSplit[0] = ' + colNameSplit[0]) for suffix in newColNames: label = str(colNameSplit[0] + '_' + suffix) print('label = ' + label) if label not in df.columns: df[label] = [None] * len((list(df['timeMinutes']))) df['timeBegin'] = [None] * len((list(df['timeMinutes']))) df['timeEnd'] = [None] * len((list(df['timeMinutes']))) for timeMinute in timeMinutes: i = df[df['timeMinutes'] == timeMinute].index.values[0] timeDif = (float(df.loc[2, 'timeMinutes']) - float(df.loc[1, 'timeMinutes'])) timeTolerance = timeDif / 2 iRange = int(range / 60 * 1 / (timeDif)) # print('iRange = ' + str(iRange)) if len(list(df['timeMinutes'])) - i <= iRange + 2: continue timeMedian = df.loc[int(i + iRange / 2), 'timeMinutes'] timeBegin = df.loc[int(i), 'timeMinutes'] timeEnd = df.loc[int(i + iRange), 'timeMinutes'] # print('timeMedian = ' + str(timeMedian) + ' timeBegin = ' + str(timeBegin) + ' timeEnd = ' + str(timeEnd)) # print('range = ' + str(range/60) + ' timeEnd-timeBegin = ' + str(timeEnd-timeBegin) + ' % = ' + str(range/60/(timeEnd-timeBegin))) df_truncate = df[df['timeMinutes'] >= timeMinute] df_truncate = df_truncate[ df_truncate['timeMinutes'] <= timeMinute + range / 60] # df_truncate = df[df['timeMinutes'] >= timeMinute & df_truncate['timeMinutes'] <= timeMinute + range/60] timeTruncate = list(df_truncate['timeMinutes']) df.loc[int(i + iRange / 2), 'timeBegin'] = min(timeTruncate) df.loc[int(i + iRange / 2), 'timeEnd'] = max(timeTruncate) measTruncate = list(df_truncate[colName]) coef = np.polyfit(timeTruncate, measTruncate, 2) x = sym.Symbol('x') f = coef[0] * x * x + coef[1] * x + coef[2] dff = sym.diff(f, x) solf = sym.solve(f) soldf = sym.solve(dff) soldf = soldf[0] label = str(colNameSplit[0] + '_' + 'inflectionDecision') df.loc[int(i + iRange / 2), label] = 'No' label = str(colNameSplit[0] + '_' + 'inflectionLocation') df.loc[int(i + iRange / 2), label] = timeMinute label = str(colNameSplit[0] + '_' + 'polyfitCoefficients') df.loc[int(i + iRange / 2), label] = str(''.join([str(x) for x in coef])) label = str(colNameSplit[0] + '_' + 'polyfitEquation') df.loc[int(i + iRange / 2), label] = str(f) label = str(colNameSplit[0] + '_' + 'polyfitSolution') df.loc[int(i + iRange / 2), label] = str(''.join([str(x) for x in solf])) label = str(colNameSplit[0] + '_' + 'derivativeEquation') df.loc[int(i + iRange / 2), label] = str(dff) label = str(colNameSplit[0] + '_' + 'derivativeSolution') df.loc[int(i + iRange / 2), label] = str(soldf) if soldf < timeMedian + timeTolerance: if soldf > timeMedian - timeTolerance: print('inflection found at time = ' + str(soldf)) label = str(colNameSplit[0] + '_' + 'inflectionDecision') df.loc[int(i + iRange / 2), label] = 'Yes' path = build_path(path) file = os.path.join(path, sensor + ".csv") df.to_csv(file) print('inflection list saved : ' + file) return (file)
def unique_inflections(study, format_type, record, sensor, segment): """ """ uniqueList = [] searchRange = retrieve_ref('searchRange') searchRange = [int(x) for x in searchRange] for range in searchRange: path = [ study, 'analyzed', 'inflections', 'all_times', str(range), record, segment ] file = os.path.join(*path, sensor + ".csv") if not os.path.isfile(file): continue df = pd.read_csv(file) recordLength = max(list(df['timeMinutes'])) for colName in df.columns: if 'Decision' in colName: dfInflections = df.drop(df[(df[colName] != 'Yes')].index) listInflections = list(dfInflections['timeMinutes']) # uniqueList = [uniqueList.append(x) for x in listInflections] for time in listInflections: if time not in uniqueList: uniqueList.append(float(time)) if len(uniqueList) == 0: return uniqueList.sort() uniqueListBuffer = [] for time in uniqueList: if len(uniqueListBuffer ) == 0 or time > max(uniqueListBuffer) + 10 / 60: uniqueListBuffer.append(time) uniqueList = uniqueListBuffer print('uniqueList = ') print(uniqueList) print('length of uniqueList = ' + str(len(uniqueList))) inflectionRate = len(uniqueList) / recordLength df = pd.DataFrame() df['uniqueList'] = uniqueList df['inflectionRate'] = [inflectionRate] * len(uniqueList) path = [ study, 'analyzed', 'inflections', 'all_times', 'unique', record, segment ] path = build_path(path) file = os.path.join(path, sensor + ".csv") df.to_csv(file)
def analyze_records(): """ analyze records """ print("begin statistical analysis of records") study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') segment_list = retrieve_ref('segment_list') quanList = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] format_type = 'clean' for study in study_list: source_path = os.path.join('studies', study, 'formatted', format_type) format_folders = os.listdir(source_path) record, segment, sensor = str(format_folders[0]), 'All', 'TEMP' source = os.path.join('studies', study, 'formatted', format_type, record, segment, sensor + '.csv') df = pd.read_csv(source) df_quans = pd.DataFrame() df_quans['recordName'] = format_folders colNames = list(df.head()) for colName in colNames: if str('measurement') in str(colName): colNameSplit = colName.split('_') print('colNameSplit = ') print(colNameSplit) wearableName = colNameSplit[1] for quan in quanList: dfColName = str('quan' + str(quan) + '_' + wearableName) df_quans[dfColName] = [None] * len(format_folders) print('df_quans = ') print(df_quans) for sensor in sensor_list: for segment in segment_list: for record in format_folders: source = os.path.join('studies', study, 'formatted', format_type, record, segment, sensor + '.csv') df = pd.read_csv(source) print('clean file retrieved: ' + source) i = df_quans[df_quans['recordName'] == record].index.values[0] print('i = ' + str(i)) colNames = list(df.head()) for colName in colNames: if str('measurement') in str(colName): measurement = list(df[colName]) print('measurement = ') print(measurement[0:100]) for quan in quanList: dfColName = str('quan' + str(quan) + '_' + wearableName) df_quans.loc[i, dfColName] = np.quantile( measurement, quan) path = [ 'studies', study, 'analyzed', 'statistics', 'quantiles' ] path = build_path(path) file = os.path.join(path, sensor + ".csv") df_quans.to_csv(file) print('quantile file saved: ' + file) print("completed statistical analysis of records")
def calculate_regression(): """ """ print('analyzing regression. ') study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') segment_list = retrieve_ref('segment_list') degree_list = retrieve_ref('degree_list') degree_list = [int(x) for x in degree_list] for study in study_list: format_type = 'clean' clean_path = os.path.join('studies', study, 'formatted', format_type) recordNames = os.listdir(clean_path) for sensor in sensor_list: for segment in segment_list: df_coef = pd.DataFrame() df_coef['recordName'] = recordNames for degree in degree_list: for record in recordNames: source = os.path.join('studies', study, 'formatted', format_type, record, segment, sensor + '.csv') print('source = ' + source) df = pd.read_csv(source) if not len(list(df['timeUnix'])) > 0: continue for colName in list(df.head()): if 'meas' in colName: if colName not in list(df_coef.head()): df_coef[colName] = [None ] * len(recordNames) i = df_coef[df_coef['recordName'] == record].index.values[0] xx = list(df['timeMinutes']) yy = list(df[colName]) coef = np.polyfit(xx, yy, degree) print('coef = ') print(coef) coef_str = [str(x) for x in coef] print(' '.join(coef_str)) df_coef.loc[i, colName] = ' '.join(coef_str) path = [ 'studies', study, 'analyzed', 'regression', str(degree), segment ] path = build_path(path) file = os.path.join(path, sensor + ".csv") df_coef.to_csv(file) print('regression file saved: ' + file)
def format_coregister(): """ combine paired record in a single csv save in the coregister folder of formatted data """ study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') for study in study_list: df_meta = retrieve_meta(study) recordNames = list(df_meta['recordName']) # name the wearable used for each record for record in recordNames: i = df_meta[df_meta['recordName'] == record].index.values[0] print('i = ' + str(i)) coregisterBegin = df_meta.loc[i, 'coregisterBegin'] coregisterEnd = df_meta.loc[i, 'coregisterEnd'] coregisterRecords = df_meta.loc[i, 'coregisterRecords'] for sensor in sensor_list: df_coregister = pd.DataFrame() if len(coregisterRecords) == len(record): coregisterRecords = list([coregisterRecords]) elif len(coregisterRecords) > len(record): coregisterRecords = coregisterRecords.split(' ') print('coregisterRecords = ') print(coregisterRecords) for item in coregisterRecords: format_type, segment, recordRef = 'truncate', 'All', item source = os.path.join('studies', study, 'formatted', format_type, recordRef, segment, sensor + '.csv') df = pd.read_csv(source) assert coregisterEnd > coregisterBegin + 100, 'during coregister format, coregisterBegin >= coregisterEnd' assert coregisterEnd < max(list( df['timeUnix'])), 'possible error with time' print('coregisterEnd = ' + str(coregisterEnd) + ' timeUnixEnd = ' + str(max(list(df['timeUnix'])))) print('timeUnixEnd - coregisterEnd = ' + str((max(list(df['timeUnix'])) - coregisterEnd) / 60)) print('coregisterEnd - timeUnixBegin = ' + str((coregisterEnd - min(list(df['timeUnix']))) / 60)) assert coregisterEnd > min(list(df['timeUnix'])) df = df[df['timeUnix'] > coregisterBegin] df = df[df['timeUnix'] < coregisterEnd] assert len(list( df['timeUnix'])) > 0, 'coregistered df removed' recordSplit = item.split('_') wearableName = recordSplit[1] df_coregister['timeUnix'] = list(df['timeUnix']) df_coregister['timeMinutes'] = list(df['timeMinutes']) colName = str(wearableName + '_' + 'measurement') print('colName = ' + colName) df_coregister[colName] = list(df['measurement']) path = [ 'studies', study, 'formatted', 'coregister', record, segment ] path = build_path(path) file = os.path.join(path, sensor + ".csv") df_coregister.to_csv(file) print('formatted coregister file = ' + str(file))
def find_inflections(range, buffer): """ """ study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') segment_list = retrieve_ref('segment_list') searchRange = retrieve_ref('searchRange') for study in study_list: format_type = 'clean' clean_path = os.path.join(study, 'formatted', format_type) recordNames = os.listdir(clean_path) for sensor in sensor_list: for record in recordNames: segment = "All" path = [ study, 'analyzed', 'inflections', 'all_times', str(range), record, segment ] path = build_path(path) file = os.path.join(path, sensor + ".csv") if os.path.isfile(file): continue source = os.path.join(study, 'formatted', format_type, record, segment, sensor + '.csv') print('source = ' + source) df = pd.read_csv(source) # print('df[timeMinutes] = ') # print(list(df['timeMinutes'])) for colName in df.columns: if 'Unnamed' in str(colName): del df[colName] print('deleted ' + colName) continue if 'meas' in colName: colNameSplit = colName.split('_') print('colNameSplit = ') print(colNameSplit) print('colNameSplit[0] = ') print(colNameSplit[0]) for suffix in [ 'inflection', 'coefficients', 'derivativeZero', 'equation', 'derivative' ]: label = str(colNameSplit[0] + '_' + suffix) print('label = ' + label) if label not in df.columns: df[label] = [None] * len( (list(df['timeMinutes']))) df['timeBegin'] = [None] * len( (list(df['timeMinutes']))) df['timeEnd'] = [None] * len((list(df['timeMinutes']))) for timeMinute in list(df['timeMinutes']): i = df[df['timeMinutes'] == timeMinute].index.values[0] # print('i = ' + str(i)) timeTolerance = ( float(df.loc[2, 'timeMinutes']) - float(df.loc[1, 'timeMinutes'])) / 2 iRange = int(range / 60 * 1 / (timeTolerance * 2)) # print('iRange = ' + str(iRange)) if len(list(df['timeMinutes'])) - i <= iRange + 2: continue timeMedian = df.loc[int(i + iRange / 2), 'timeMinutes'] timeBegin = df.loc[int(i), 'timeMinutes'] timeEnd = df.loc[int(i + iRange), 'timeMinutes'] # print('timeMedian = ' + str(timeMedian) + ' timeBegin = ' + str(timeBegin) + ' timeEnd = ' + str(timeEnd)) # print('range = ' + str(range/60) + ' timeEnd-timeBegin = ' + str(timeEnd-timeBegin) + ' % = ' + str(range/60/(timeEnd-timeBegin))) df_truncate = df[df['timeMinutes'] >= timeMinute] df_truncate = df_truncate[ df_truncate['timeMinutes'] <= timeMinute + range / 60] # df_truncate = df[df['timeMinutes'] >= timeMinute & df_truncate['timeMinutes'] <= timeMinute + range/60] timeTruncate = list(df_truncate['timeMinutes']) df.loc[int(i + iRange / 2), 'timeBegin'] = min(timeTruncate) df.loc[int(i + iRange / 2), 'timeEnd'] = max(timeTruncate) measTruncate = list(df_truncate[colName]) coef = np.polyfit(timeTruncate, measTruncate, 2) # coef = [float(x) for x in coef] x = sym.Symbol('x') f = coef[0] * x * x + coef[1] * x + coef[2] # print('f = ') # print(f) dff = sym.diff(f, x) # print('dff = ') # print(dff) solf = sym.solve(f) soldf = sym.solve(dff) soldf = soldf[0] label = str(colNameSplit[0] + '_' + 'inflection') df.loc[int(i + iRange / 2), label] = 'No' label = str(colNameSplit[0] + '_' + 'coefficients') df.loc[int(i + iRange / 2), label] = str(''.join([str(x) for x in coef])) label = str(colNameSplit[0] + '_' + 'derivativeZero') df.loc[int(i + iRange / 2), label] = soldf label = str(colNameSplit[0] + '_' + 'equation') df.loc[int(i + iRange / 2), label] = str(f) label = str(colNameSplit[0] + '_' + 'derivative') df.loc[int(i + iRange / 2), label] = str(dff) if soldf > min(timeTruncate): if soldf < max(timeTruncate): if soldf < timeMedian + timeTolerance: if soldf > timeMedian - timeTolerance: print( 'inflection found at time = ' + str(soldf)) label = str(colNameSplit[0] + '_' + 'inflection') df.loc[int(i + iRange / 2), label] = 'Yes' df.to_csv(file) print('inflection list saved : ' + file) for colName in df.columns: if 'inflection' in colName: colNameSplit = colName.split('_') label = str(colNameSplit[0] + '_' + 'inflection') df = df.drop(df[(df[label] != 'Yes')].index) path = [ study, 'analyzed', 'inflections', 'inflection_only', str(range), record, segment ] path = build_path(path) file = os.path.join(path, sensor + ".csv") df.to_csv(file)
def plot_inflections(study, record, sensor, segment): """ """ searchRange = retrieve_ref('searchRange') searchRange = [int(x) for x in searchRange] searchRange.append('unique') row_num, col_num, plot_num = len(searchRange) + 2, 2, 0 row_width_mulp, col_width_mulp = 12, 5 plot_width, plot_height = col_num * row_width_mulp, row_num * col_width_mulp plt.figure(figsize=(plot_width, plot_height)) for range in searchRange: plot_num += 1 plt.subplot(row_num, col_num, plot_num) path = [ study, 'analyzed', 'inflections', 'all_times', str(range), record, segment ] file = os.path.join(*path, sensor + ".csv") if not os.path.isfile(file): return df = pd.read_csv(file) for colName in df.columns: if 'Minutes' in colName: timeMinutes = list(df[colName]) if 'measurement' in colName: measList = list(df[colName]) measMin = min(measList) measMax = max(measList) plt.scatter(timeMinutes, measList, label=str(colName)) if 'inflectionDecision' in colName or 'unique' in colName: if 'inflectionDecision' in colName: dfInflections = df.drop(df[(df[colName] != 'Yes')].index) timeInflections = list(dfInflections['timeMinutes']) if 'unique' in colName: plt.scatter(timeMinutes, measList) timeInflections = list(df[colName]) for time in timeInflections: # multp = searchRange.index(range)/len(searchRange) # colorScatter = [multp*x for x in [0,1,.5]] colorScatter = [0, .9, .6] xx = np.linspace(time, time, 100) yy = np.linspace(measMin, measMax, 100) plt.plot(xx, yy, color=colorScatter, linestyle='--') plt.title('Record = ' + str(record) + ' Range = ' + str(range) + ' seconds ' + ' Inflections Found = ' + str(len(timeInflections))) plt.xlabel('Time (Minutes)') sensor_unit = retrieve_sensor_unit(sensor) plt.ylabel(sensor + ' ' + sensor_unit) # plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2), loc='upper left') if 'polyfitEquation' in colName: polyfitCoeff = list(dfInflections[colName]) coeffPolyList = [] for coeff in polyfitCoeff: # print('coeff = ' + str(coeff)) coeff = str(coeff) coeff = coeff.replace("x", ",") coeff = coeff.replace("*", "") coeff = coeff.replace("**2", "") coeff = coeff.replace("**", "") coeff = coeff.replace(" ", "") coeff = coeff.replace(" ", "") coeff = coeff.replace("+", "") # print('coeff = ' + str(coeff)) coeffList = coeff.split(',') coeffPoly = float(coeffList[0]) # print('coeffPoly = ' + str(coeffPoly)) coeffPolyList.append(coeffPoly) plot_num += 1 plt.subplot(row_num, col_num, plot_num) plt.scatter(timeInflections, coeffPolyList) plt.title('Time Infletions vs Coefficients') plt.xlabel('Time (Minutes)') sensor_unit = retrieve_sensor_unit(sensor) plt.ylabel(sensor + ' ' + sensor_unit) # plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2), loc='upper left') path = [study, 'plotted', 'inflection', 'each_record', record] path = build_path(path) file = os.path.join(path, sensor + ".png") plt.savefig(file, bbox_inches='tight') print('inflection plot saved ' + file)
def plot_acc(study): """ """ print('compare ACC sensors') format_type, sensor, segment = 'truncate', 'ACC', 'All' source_path = os.path.join('studies', study, 'formatted', format_type) format_folders = os.listdir(source_path) for record in format_folders: row_num, col_num, plot_num = 5, 1, 0 row_width_mulp, col_width_mulp = 14, 5 plot_width, plot_height = col_num * row_width_mulp, row_num * col_width_mulp plt.figure(figsize=(plot_width, plot_height)) source = os.path.join('studies', study, 'formatted', format_type, record, segment, sensor + '.csv') if os.path.isfile(source): print('source = ' + source) df = pd.read_csv(source) colNames = list(df.head()) print('colNames = ') print(colNames) for colName in colNames: if str('eas') in str(colName): plot_num += 1 plt.subplot(row_num, col_num, plot_num) labelName = colName print('labelName = ' + labelName) valueColor = retrieve_ref_color( str('color_' + str(sensor) + '_' + str(colName))) plt.scatter(df['timeUnix'], df[colName], color=valueColor, label=labelName) plt.xlabel('time Unix') sensor_unit = retrieve_sensor_unit(sensor) plt.ylabel(sensor + ' ' + sensor_unit) plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2), loc='upper left') plot_num += 1 plt.subplot(row_num, col_num, plot_num) for colName in colNames: if str('eas') in str(colName): labelName = colName print('labelName = ' + labelName) valueColor = retrieve_ref_color( str('color_' + str(sensor) + '_' + str(colName))) plt.scatter(df['timeUnix'], df[colName], color=valueColor, label=labelName) plt.xlabel('time (Unix)') sensor_unit = retrieve_sensor_unit(sensor) plt.ylabel(sensor + ' ' + sensor_unit) plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2), loc='upper left') path = ['studies', study, 'plotted', format_type, record] path = build_path(path) file = os.path.join(path, sensor + ".png") plt.savefig(file, bbox_inches='tight') print('acc saved: ' + file)
def add_embedded_to_meta(): """ """ study_list = retrieve_ref('study_list') min_record_time = float(retrieve_ref('min_record_time')) # check each study for study in study_list: # retrieve the list of records from the metadata.csv file df_meta = retrieve_meta(study) source_path = list(df_meta['source_path']) # define the original length of the record # remove records that are too short for record in source_path: print('record = ' + str(record)) print('df_meta = ') print(df_meta) i = df_meta[df_meta['source_path'] == record].index.values[0] embeddedRecord = float(df_meta.loc[i, 'embeddedRecord']) if embeddedRecord > 0: format_type, segment, sensor = 'source', 'All', 'TEMP' source = os.path.join('studies', study, 'formatted', format_type, record, segment, sensor + '.csv') df = pd.read_csv(source) recordBegin = int(embeddedRecord) df = df[df['timeUnix'] > recordBegin] timeEndUnix = find_record_end_using_temp(df) recordLength = (timeEndUnix - recordBegin) / 60 df_row = df_meta[df_meta['source_path'] == record] record_split = record.split('_') recordName = str(str(recordBegin) + '_' + str(record_split[1])) print('embedded recordName = ' + recordName) df_row.loc[i, 'recordName'] = recordName df_row.loc[i, 'recordBegin'] = int(embeddedRecord) df_row.loc[i, 'recordEnd'] = int(timeEndUnix) df_row.loc[i, 'recordLength'] = round(recordLength, 4) print('df_row = ') print(df_row) df_meta = df_meta.append(df_row) # print(df_meta) format_type, segment, sensor = 'source', 'All', 'TEMP' source = os.path.join('studies', study, format_type, record, sensor + '.csv') df_source = pd.read_csv(source) df_timestamped = build_timestamps(df_source, sensor) path = build_path([ 'studies', study, 'formatted', format_type, recordName, segment ]) file = os.path.join(path, sensor + ".csv") df_timestamped.to_csv(file) print('formatted source file = ' + str(file)) df_meta = df_meta.sort_values(by='recordName') save_meta(study, df_meta)
def plot_inflections(study, record, sensor, segment): """ plot inflections """ if segment != 'All': return searchRange = retrieve_ref('searchRange') searchRange = [int(x) for x in searchRange] format_type, segment, range = 'trunate', 'All', searchRange[0] path = [study, 'analyzed', 'inflections', 'all_times', str(range), record, segment] file = os.path.join(*path, sensor + ".csv") if not os.path.isfile(file): return row_num, col_num, plot_num = len(searchRange)+2, 1, 0 row_width_mulp, col_width_mulp = 12, 5 plot_width, plot_height = col_num*row_width_mulp, row_num*col_width_mulp plt.figure(figsize=(plot_width, plot_height)) for range in searchRange: plot_num += 1 plt.subplot(row_num, col_num, plot_num) path = [study, 'analyzed', 'inflections', 'all_times', str(range), record, segment] file = os.path.join(*path, sensor + ".csv") if not os.path.isfile(file): continue # source = os.path.join(study, 'formatted', format_type, record, segment, sensor + '.csv') # print('source = ' + source) df = pd.read_csv(file) for colName in df.columns: if 'Unnamed' in colName: del df[colName] elif 'Minutes' in colName: timeMinutes = list(df[colName]) elif 'measurement' in colName and '_' not in colName: measList = list(df[colName]) measMin = min(measList) measMax = max(measList) plt.scatter(timeMinutes, measList, label = str(colName)) elif 'inflectionDecision' in colName: dfInflections = df.drop(df[(df[colName] != 'Yes')].index) timeInflections = list(dfInflections['timeMinutes']) print('timeInflections = ') print(timeInflections) for time in timeInflections: xx = np.linspace( time, time, 100) yy = np.linspace( measMin, measMax, 100) plt.plot(xx, yy, color=[0,.9,.6]) plt.xlabel('time Unix') sensor_unit = retrieve_sensor_unit(sensor) plt.ylabel(sensor + ' ' + sensor_unit ) # plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2), loc='upper left') plt.title('Record = ' + str(record) + ' Range = ' + str(range) + ' seconds ' + ' Inflections Found = ' + str(len(timeInflections)) ) path = [study, 'plotted', 'inflection', 'each_record', record] path = build_path(path) file = os.path.join(path, sensor + ".png") plt.savefig(file, bbox_inches='tight') print('inflection plot saved ' + file)
def modeling_test(): """ Write code for openscad to model parameters of the analysis """ print("openSCAD modeling begin") study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') for study in study_list: metadata_file = os.path.join('studies', study, 'meta', 'metadata.csv') df_meta = pd.read_csv(metadata_file) df_meta = df_meta.sort_values(by=['recordLength']) records_found = list(df_meta['source_path']) recordLength = list(df_meta['recordLength']) openscad_path = os.path.join('code', 'openSCAD', study) if not os.path.isdir(openscad_path ): path = build_path(['code', 'openSCAD', study]) openscad_file = os.path.join(openscad_path, str(study) + '_' + 'cleaning_data.scad') file = open(openscad_file, "w") file = open(openscad_file, "w") now = datetime.now() current_time = now.strftime("%H:%M:%S") file.write('// File made on ' + str(date.today()) + ' ' + str(current_time) ) file.write('\n' + '// records found = ' + str(len(records_found))) # file.write('\n' + 'd = ' + str(10) + ' ; ' + '\n') # file.write('\n' + 'sphere( size = ' + str(d) + ') ;') count_xaxis = math.sqrt(len(records_found)) spacing = round(max(recordLength)*2, 3) file.write('\n' + '// spacing = ' + str(spacing)) for i in range(len(records_found)): # print('index = ' + str(i)) x_num = int((i+1)/count_xaxis) y_num = int((i+1)%count_xaxis) z_num = 0 length = round(recordLength[i], 3) # print('x_num, y_num = ' + str(x_num) + ' , ' + str(y_num)) file.write('\n') file.write('\n' + 'translate([ ' + str(spacing*x_num) + ' , ' + str(spacing*y_num) + ' , ' + str(spacing*z_num) + '])') file.write('\n' + 'union() {') file.write(' ' + 'color([ ' + str(1) + ' , ' + str(0) +' , ' + str(1) + ' ])') file.write(' ' + 'sphere(' + str(length) + ' , $fn=60);') file.write(' ' + 'color([ ' + str(0.5) + ' , ' + str(0.5) +' , ' + str(1) + ' ])') file.write(' ' + 'cylinder( r= ' + str(length/2) + ', h= ' + str(2*length) + ' , $fn=60);') file.write(' } ') file.write('\n') file.write('\n') file.close() print("openSCAD modeling complete")
def plot_mean(): """ """ study_list = retrieve_ref('study_list') sensor_list = retrieve_ref('sensor_list') segment_list = retrieve_ref('segment_list') for study in study_list: row_num, col_num, plot_num = len(sensor_list), 3, 0 row_width_mulp, col_width_mulp = 6, 6 plot_width, plot_height = col_num*row_width_mulp, row_num*col_width_mulp plt.figure(figsize=(plot_width, plot_height)) for sensor in sensor_list: plot_num += 1 plt.subplot(row_num, col_num, plot_num) for segment in segment_list: path = ['studies', study, 'analyzed', 'statistics', 'mean', segment] # print('path = ' + path) file = os.path.join('studies', study, 'analyzed', 'statistics', 'mean', segment, sensor + '.csv') # file = os.path.join(path, sensor + '.csv') print('file = ' + file) df_mean = pd.read_csv(file) df_mean = df_mean.dropna() colNames = list(df_mean.head()) for colName in colNames: if 'Unnamed' in colName: del df_mean[colName] print('df_mean = ') print(df_mean) yy = df_mean.iloc[ : , 1] print('yy = ') print(yy) ylabel = colName xx = list(range(1, len(yy)+1)) xlabel = 'Records Num' if len(colNames) > 3: xx = df_mean.iloc[ : , 2] xlabel = colNames[-1] print('xx = ') print(xx) assert len(xx) == len(yy) assert sum(xx) > -1000000000 assert sum(yy) > -1000000000 if segment == 'All': xxsym = np.linspace(min(xx), max(xx), 200) yysym = np.linspace(min(yy), max(yy), 200) plt.scatter(xxsym, yysym, color = [.8, .8, .8]) wearable_num = 1 colorWearableSegment = retrieve_ref_color_wearable_segment(wearable_num, segment) plt.scatter(xx, yy, color = colorWearableSegment, label = str(segment)) plt.xlabel(xlabel + ' ' + sensor) plt.ylabel(ylabel + ' ' + sensor) print('xlabel / ylabel = ' + xlabel + ' ' + ylabel) if sensor == sensor_list[-1]: plt.legend(bbox_to_anchor=(1, 0.5, 0.3, 0.2), loc='upper left') path = ['studies', study, 'plotted', 'analysis', 'mean'] path = build_path(path) file = os.path.join(path, sensor + ".png") plt.savefig(file, bbox_inches='tight') print('plotted mean = ' + file)