def display(): Data.spec_id_l = entry_l.get() Data.spec_id_h = entry_h.get() if Data.spec_id_l is "": msg_window("Error", "Please provide min ID") elif Data.spec_id_h is "": msg_window("Error", "Please provide max ID") elif int(Data.spec_id_l) > int(Data.spec_id_h): msg_window("Error", "Lower bound > upper bound") else: spec_data = dba.getSpectrumFromRange(int(Data.spec_id_l), int(Data.spec_id_h)) Data.num_ave = int(Data.spec_id_h) - int(Data.spec_id_l) + 1 if spec_data is None: msg_window("No spectrum found", "Spectrum of this ID is not found") else: Data.x_series = dbapi.textToSeries(spec_data[0][1]) yseries, Data.y_ave = [], [] for i in range(len(spec_data)): yseries.append(dbapi.textToSeries(spec_data[i][2])) for x in range(len(Data.x_series)): Data.y_ave.append(mfn.mean([col[x] for col in yseries])) if len(Data.x_series) is not len(Data.y_ave): msg_window("Error", "x and y have different dimension") # use '-o' for dots ax.plot(Data.x_series, Data.y_ave, '-') canvas.show() canvas.get_tk_widget().pack(side=tkc.BOTTOM, fill=tkc.BOTH, expand=1) toolbar.update() canvas._tkcanvas.pack(side=tkc.TOP, fill=tkc.BOTH, expand=1)
def eliminate_std_dev(xs, y_series, std_dev_multi): """Eliminate outliers in a group of y-series Args: xs ([float]): the x-series data y_series ([[float]]): potentially multiple y-series data std_dev_multi (int): multiple of standard deviation out of which spectra will be eliminated Returns: Excluded indexes, and cleaned up y-series """ # calculate standard deviation for each row # in other words, for all y values at each x value y_std_dev_at_x, y_mean_at_x, ys_at_x = [], [], [] # get all y values for each x value for i in range(len(xs)): ys_at_x.append([col[i] for col in y_series]) # calculate standard deviations for nums in ys_at_x: y_std_dev_at_x.append(mfn.std_dev(nums)) y_mean_at_x.append(mfn.mean(nums)) # normalize each spectrum with the average of all spectrum ysum = sum(y_mean_at_x) for i in range(len(y_series)): y_series[i] = mfn.normalize(y_series[i], ysum) # pick out abnormal ys by comparing with # specified std dev threshold exclusions, excluded = [], [] for i in range(0, len(y_series)): for j in range(0, len(xs)): if abs(y_series[i][j] - y_mean_at_x[j]) > std_dev_multi * y_std_dev_at_x[j]: exclusions.append(i) break # generate filtered y series for num in [x for x in range(len(y_series)) if x not in exclusions]: excluded.append(y_series[num]) return exclusions, excluded
def group_average(xs, boxed, gap_min, gap_max, x_step): # count and export gap sizes for boxcar-ed data gap_stat = [] for col in boxed: # saving only five decimal places # have to use col[::-1] to reverse the list gap_stat.append([ "{0:.5f}".format( mfn.poly_gap(xs[0:20], col[0:20], gap_min, gap_max).real) ]) # export averaged spectra for each gap size group average_box, av_box_out = [[0] + xs], [] for i in f_range(gap_min, gap_max, x_step): ys_of_gap, this_y_ave = [], [i] for j in range(len(gap_stat)): if i < float(gap_stat[j][0]) < i + x_step: ys_of_gap.append(boxed[j]) for x in range(len(xs)): this_y_ave.append(mfn.mean([col[x] for col in ys_of_gap])) average_box.append(this_y_ave) for i in range(len(average_box[1])): av_box_out.append([row[i] for row in average_box]) return gap_stat, av_box_out
def testMean(self) -> None: self.assertEqual(mfn.mean([1, 2, 3]), 2) self.assertEqual(mfn.mean([]), 0)
def main(argv): # predefined vars stdev_multi = 2 boxcar_width = 5 gap_size_min = 0.025 gap_size_max = 0.425 csv_delim = ';' xstep = 0.025 path_read = [] if len(argv) == 1: absReadPath = argv[0] elif len(argv) == 2: absReadPath = None relReadPath = argv[1] elif len(argv) == 4: absReadPath = argv[0] relReadPath = argv[1] boxcar_width = argv[2] if absReadPath is not None: for path in absReadPath: path_read.append(path) else: for path in relReadPath: path_read.append(gen_path(path, None)) csv_delim = argv[3] else: print('Invalid arguments') sys.exit() direname = os.path.dirname(path_read[0]) path_gap = '{}/Out/gap_{}.csv'.format(direname, boxcar_width) path_log = '{}/Out/log_{}.txt'.format(direname, boxcar_width) path_ave = '{}/Out/ave_{}.csv'.format(direname, boxcar_width) txt_file = open(path_log, 'wb') xs, boxed = [], [] for path in path_read: print('Data read from file {}.'.format(path)) txt_file.write('-----{}-----\n'.format(os.path.basename(path))) txt_file.write('Data read from file {}.\n'.format(path)) # parse csv file with custom delimiter # 'rU' dealing with lines not ending with delim with open(path, 'rU') as csv_file: filecontent = csv.reader(csv_file, delimiter=csv_delim) openedFile = [row for row in filecontent] # obtain x values from the first column xs = [round(float(row[0]), 7) for row in openedFile] # obtain y values, by picking out each odd # column starting at index 1 yseries = [] for i in range(1, len(openedFile[0]), 2): # remove zero spectra newRow = [float(row[i]) for row in openedFile] if newRow[0] != 0.0: yseries.append(newRow) print('File contains x series with {} points.'.format(len(xs))) txt_file.write('File contains x series with {} points.\n'.format( len(xs))) print('File contains {} y series.'.format(len(yseries))) txt_file.write('File contains {} y series.\n'.format(len(yseries))) # calculate standard deviation for each row # in other words, for all y values at each x value yStdevAtx, yMeanAtx, ysAtx = [], [], [] # get all y values for each x value for i in range(0, len(xs)): ysAtx.append([col[i] for col in yseries]) # calculate standard deviations for nums in ysAtx: yStdevAtx.append(mfn.std_dev(nums)) yMeanAtx.append(mfn.mean(nums)) # normalize each spectrum with the average of all spectrum ysum = sum(yMeanAtx) for i in range(0, len(yseries)): yseries[i] = mfn.normalize(yseries[i], ysum) # pick out abnormal ys by comparing with # specified stdev threshold exclusions, excluded = [], [] for i in range(0, len(yseries)): for j in range(0, len(xs)): if abs(yseries[i][j] - yMeanAtx[j]) > stdev_multi * yStdevAtx[j]: exclusions.append(i) break # generate filtered y series for num in [x for x in range(0, len(yseries)) if x not in exclusions]: excluded.append(yseries[num]) # before making yseries = excluded, process using boxcar # or sampling to prevent averaging over sparse samples print('For defined {} * sigma threshold, {} y series are excluded.'. format(stdev_multi, len(exclusions))) txt_file.write( 'For defined {} * sigma threshold, {} y series are excluded.\n'. format(stdev_multi, len(exclusions))) # boxcar before gap determination if boxcar_width == 0: boxed = yseries else: boxing = mfn.boxcar(yseries, boxcar_width, exclusions) print('Boxcar width {}.'.format(boxcar_width)) txt_file.write('Boxcar width {}.\n'.format(boxcar_width)) boxed.extend(boxing) txt_file.write('-----summary-----\n') # count and export gap sizes for boxcared data gap_stat = [] for col in boxed: # saving only five decimal places # have to use col[::-1] to reverse the list gap_stat.append([ "{0:.5f}".format( mfn.poly_gap(xs[0:20], col[0:20], gap_size_min, gap_size_max).real) ]) csv_writer(gap_stat, path_gap) print('Gap stat written to file {}, containing {} numbers'.format( path_gap, len(gap_stat))) txt_file.write( 'Gap stat after boxcar written to file {}, containing {} numbers.\n'. format(path_gap, len(gap_stat))) # export averaged spectra for each gap size group average_box, avbox_out = [[0] + xs], [] for i in drange(gap_size_min, gap_size_max, xstep): ysOfGap, this_y_ave = [], [i] for j in range(0, len(gap_stat)): if i < float(gap_stat[j][0]) < i + xstep: ysOfGap.append(boxed[j]) for x in range(0, len(xs)): this_y_ave.append(mfn.mean([col[x] for col in ysOfGap])) average_box.append(this_y_ave) for i in range(0, len(average_box[1])): avbox_out.append([row[i] for row in average_box]) csv_writer(avbox_out, path_ave) print('Average in gap size group' ' written to file {}, containing {} series'.format( path_ave, len(avbox_out[0]))) txt_file.write('Average in gap size group' ' written to file {}, containing {} series\n'.format( path_ave, len(avbox_out[0]))) txt_file.close()