def find_all_slopes(self, filename_or_data, plotfun=None): """Returns a dict of regressoion objects. A regression object has slots named intercept, slope, se_intercept, se_slope, mse, start, and stop Each of these is a float. se is standard error, mse is mean squared error. """ reg = regression.regression2 # regression.regression2 is a function which returns a regression object if isinstance(filename_or_data, str): data = get_data.get_file_data(filename_or_data) else: data = filename_or_data # data is a dict with keys 'CO2', 'N2O', 'Wind', and so on # data['CO2'][0] is a list of seconds # data['CO2'][1] is a list of ppmv values # similarly for 'N2O' # Wind is a little bit different; it has not ppmv, but m/s # We show here just the simple linear regression using all the data, # but wind and self.options is available and may be used in this function, print('this is my own regression function') print('self.options is:', self.options) print('wind was on average:', np.mean(data['Wind'][1]), 'm/s') resdict = divide_left_and_right.group_all(data) co2 = resdict['CO2'] n2o = resdict['N2O'] regressions = dict() regressions['left'] = dict() regressions['right'] = dict() regressions['left']['CO2'] = reg(co2['left'][0], co2['left'][1]) regressions['left']['N2O'] = reg(n2o['left'][0], n2o['left'][1]) regressions['right']['CO2'] = reg(co2['right'][0], co2['right'][1]) regressions['right']['N2O'] = reg(n2o['right'][0], n2o['right'][1]) return regressions
def plot_gas(filename, gas='CO2'): #fix single filename with directory path if not os.path.isfile(filename): filename = os.path.join(resdir.raw_data_path, filename) #load data for one file a = get_data.get_file_data(filename) #make simple plot of either N2O or CO2 plt.plot(a[gas][0], a[gas][1], '.')
def plot_raw(examplefilename, key='N2O'): "key may be 'N2O', 'CO2', 'Wind'" if not os.path.isfile(examplefilename): examplefilename = with_raw_dir(examplefilename) a = get_data.get_file_data(examplefilename) plt.plot(a[key][0], a[key][1], '.') plt.gca().set_xlabel('seconds') if key in ['N2O', 'CO2', 'CO']: plt.gca().set_ylabel('ppm') return a
def _write_raw(raw_filename, worksheet, column_start=0): data = get_data.get_file_data(raw_filename) reg = regr.find_all_slopes(raw_filename, do_plot=False) segments = find_regressions.get_regression_segments(data, reg) column = column_start w = worksheet w.write(0, column, raw_filename) def write_columns(title, columns, column_start, under_titles): w.write(1, column_start, title) for i, vector in enumerate(columns): w.write(2, column_start, under_titles[i]) for j, v in enumerate(vector): w.write(j + 3, column_start, v) column_start += 1 return column_start for subst, vals in data.items(): if not (isinstance(vals, list) and len(vals)==2\ and isinstance(vals[0], list) and isinstance(vals[1], list)\ and len(vals[0])==len(vals[1])): continue column = write_columns(subst, vals, column, ['time', 'signal']) t_orig, y_orig = vals for side in ['right', 'left']: if side in segments: if subst in segments[side]: tside, yside = segments[side][subst][2:] yy = [ y if t_orig[i] in tside else None for i, y in enumerate(y_orig) ] deb.append([segments, side, subst, t_orig]) column = write_columns( '%s_%s' % (subst, side), [yy], #segments[side][subst][2:], column, ['signal']) column += 1 reg_attrs = ['slope', 'intercept', 'se_slope', 'se_intercept', 'mse'] for side, regs in reg.items(): for gas in regs.keys(): if regs[gas] is None: continue w.write(1, column, 'reg:%s_%s' % (side, gas)) for i, s in enumerate(reg_attrs): w.write(i * 2 + 2, column, s) w.write(i * 2 + 3, column, getattr(regs[gas], s)) column += 1 return column + 2
#OR change y limits (apparently can't use ymin or ymax alone) plot_gas(filename) plt.gca().set_ylim(ymin=200, ymax=800) #custom filename = '2018-01-03-13-06-39-x599263_542108-y6615221_28657-z0_0-h-2_39905751122_right_Plot_1_' plot_gas(filename, gas='CO2') plt.gca().set_ylim(ymin=200, ymax=800) #%% """ Standard deviation of CO2 """ for i in range(18691, 18875): print(filenames[i]) a = get_data.get_file_data(filenames[i]) print(np.std(a['CO2'][1])) #print(a['CO2'][1]) #%% """ A more extensive shart showing N2O, CO2, points used, and regression lines """ #%% #Display charts for all files, or a subset: for filename in filenames[0:10] #To run for only one file, don't highlight the for loop beginning line. will run file currently in "filename" for filename in filenames[18645:18648]: filename = '2018-01-04-14-17-31-x599263_5599-y6615221_32418-z0_0-h-2_40099505776_right_Plot_1_' if not os.path.isfile(filename): filename = os.path.join(resdir.raw_data_path, filename) reg = regr.find_all_slopes(filename, do_plot=True)
def do_regressions(self, files, write_mode='w'): def print_info_maybe(i, n, t0, n_on_line): t = time.time() if t - t0 > 10: print('%d/%d ' % (i+1, n), end='') t0 = t n_on_line += 1 if n_on_line > 4: n_on_line = 0 print('') return t0, n_on_line if not files: print('\nNo regressions to do\n') return dict(), [] print('Doing regressions') n = len(files) t0 = time.time() resdict = {} n_on_line = 0 errors = [] with open(self.slopes_file_name, write_mode) as f: for i, name in enumerate(files): #do regression for each file t0, n_on_line = print_info_maybe(i, n, t0, n_on_line) try: data = get_data.get_file_data(name) reg = self.find_all_slopes(data) self.write_result_to_file(reg, name, f) resdict[os.path.split(name)[1]] = reg # Save images of each regression if wanted if self.save_options['show_images'] or self.save_options['save_images']: title_filename = data['filename'] title_options = 'options: ' + self.options.get_options_string(data['filename']) try: left_QC = reg['left']['N2O'].quality_check except: left_QC = None try: right_QC = reg['right']['N2O'].quality_check except: right_QC = None title_left_QC = 'Left: '+left_QC if left_QC else "" title_right_QC = 'Right: '+right_QC if right_QC else "" plt.title(title_filename + '\n' + title_options + '\n' + title_left_QC + '\n' + title_right_QC ) if self.save_options['save_images']: plt.savefig(os.path.join(self.detailed_output_path+"\\images", title_filename +'.png')) if left_QC: plt.savefig(os.path.join(self.detailed_output_path+"\\Check\\"+left_QC, "LEFT " + left_QC +" "+ title_filename +'.png')) elif right_QC: plt.savefig(os.path.join(self.detailed_output_path+"\\Check\\"+right_QC, "RIGHT "+ right_QC +" "+ title_filename +'.png')) if self.save_options['show_images']: plt.show() plt.clf() # Save detailed raw excel if wanted if self.save_options['save_detailed_excel']: filename = data['filename'] xls_write_raw_data_file(filename, self.detailed_output_path+'\\Values\\'+ 'DetailedRawData_'+ filename+'.xls', data, reg, False) except Exception as e: import traceback errors.append([name, traceback.format_exc()]) # continue print_info_maybe(i, n, 0, 100000) print('Regression done on %d files with %d errors' % (len(files), len(errors))) if len(errors): regression_errors.append(errors) print('See find_regressions.regression_errors[-1]') return resdict, errors
def find_all_slopes(self, filename_or_data, do_plot=False, given_specific_options=False): """Finds the regression lines for N2O and CO2, for left and right side returns {'left':{'CO2':(Regression, (x,y)),'N2O':...}, {'right': ...}} given_specific_options may be given; if not, specific_options will be found from the filename """ if isinstance(filename_or_data, str): data = get_data.get_file_data(filename_or_data) else: data = filename_or_data if given_specific_options is False: specific_options = self.options.get_specific_options(data['filename']) else: specific_options = given_specific_options cut_param = self._get_divide_cut_param(specific_options) rawdict = divide_left_and_right.group_all(data, **cut_param) keys = ['CO2', 'N2O', 'CO', 'H2O', 'licor_H2O'] regressions = {'left': {}, 'right': {}} regressions_tmp = {'left': {}, 'right': {}} for side in list(regressions.keys()): tbest = None for key in keys: tbest_orig = tbest regressions[side][key], tbest = self._regress1(rawdict, side, key, specific_options, tbest) if(regressions[side][key] != None and self.options.options["correct_negatives"] == True): specific_options_bcp = specific_options #print(data['filename'],specific_options_bcp) if((key=="N2O") and (regressions[side][key].slope < 0)): specific_options["co2_guides"] = False regressions_tmp[side][key], tbest_tmp = self._regress1(rawdict, side, key, specific_options, tbest_orig) #print(" trying co2 OFF") if(regressions_tmp[side][key].slope < 0): specific_options = specific_options_bcp specific_options[side] = {'N2O': {'start': 1, 'stop': 190}} regressions_tmp[side][key], tbest_tmp = self._regress1(rawdict, side, key, specific_options, tbest_orig) #print(", trying 190s") if(regressions_tmp[side][key].slope > 0): regressions[side][key] = regressions_tmp[side][key] tbest = tbest_tmp else: specific_options = specific_options_bcp #print(", positive N2O not found") self.options.specific_options_dict[data['filename']] = specific_options #this is not updating properly #print(specific_options) if len(list(regressions[side].keys())) == 0: regressions.pop(side) # Begin image and detailed excel output functionality ... move to their own functions and call here? if self.do_plot or do_plot or self.save_options['save_images'] or self.save_options['show_images']: self.plot_fun(data, regressions) #this calls plot_regressions, also defined in this .py file # if self.save_options['excel']: Fredrik was here! # self.xls_write_raw_data_file(title_filename, os.path.join(self.detailed_output_path+"\\excel",'DetailedRawData_' # +title_filename # +'.xls'), do_open=False) return regressions