Esempio n. 1
0
        def display():
            Data.spec_id_l = entry_l.get()
            Data.spec_id_h = entry_h.get()
            if Data.spec_id_l is "":
                msg_window("Error", "Please provide min ID")
            elif Data.spec_id_h is "":
                msg_window("Error", "Please provide max ID")
            elif int(Data.spec_id_l) > int(Data.spec_id_h):
                msg_window("Error", "Lower bound > upper bound")
            else:
                spec_data = dba.getSpectrumFromRange(int(Data.spec_id_l),
                                                     int(Data.spec_id_h))
                Data.num_ave = int(Data.spec_id_h) - int(Data.spec_id_l) + 1
                if spec_data is None:
                    msg_window("No spectrum found",
                               "Spectrum of this ID is not found")
                else:
                    Data.x_series = dbapi.textToSeries(spec_data[0][1])
                    yseries, Data.y_ave = [], []
                    for i in range(len(spec_data)):
                        yseries.append(dbapi.textToSeries(spec_data[i][2]))
                    for x in range(len(Data.x_series)):
                        Data.y_ave.append(mfn.mean([col[x]
                                                    for col in yseries]))

                    if len(Data.x_series) is not len(Data.y_ave):
                        msg_window("Error", "x and y have different dimension")

                    # use '-o' for dots
                    ax.plot(Data.x_series, Data.y_ave, '-')
                    canvas.show()
                    canvas.get_tk_widget().pack(side=tkc.BOTTOM,
                                                fill=tkc.BOTH,
                                                expand=1)

                    toolbar.update()
                    canvas._tkcanvas.pack(side=tkc.TOP,
                                          fill=tkc.BOTH,
                                          expand=1)
Esempio n. 2
0
def eliminate_std_dev(xs, y_series, std_dev_multi):
    """Eliminate outliers in a group of y-series
    Args:
        xs ([float]): the x-series data
        y_series ([[float]]): potentially multiple y-series data
        std_dev_multi (int): multiple of standard deviation out of which
            spectra will be eliminated
    Returns:
        Excluded indexes, and cleaned up y-series
    """
    # calculate standard deviation for each row
    # in other words, for all y values at each x value
    y_std_dev_at_x, y_mean_at_x, ys_at_x = [], [], []
    # get all y values for each x value
    for i in range(len(xs)):
        ys_at_x.append([col[i] for col in y_series])
    # calculate standard deviations
    for nums in ys_at_x:
        y_std_dev_at_x.append(mfn.std_dev(nums))
        y_mean_at_x.append(mfn.mean(nums))

    # normalize each spectrum with the average of all spectrum
    ysum = sum(y_mean_at_x)
    for i in range(len(y_series)):
        y_series[i] = mfn.normalize(y_series[i], ysum)

    # pick out abnormal ys by comparing with
    # specified std dev threshold
    exclusions, excluded = [], []
    for i in range(0, len(y_series)):
        for j in range(0, len(xs)):
            if abs(y_series[i][j] -
                   y_mean_at_x[j]) > std_dev_multi * y_std_dev_at_x[j]:
                exclusions.append(i)
                break
    # generate filtered y series
    for num in [x for x in range(len(y_series)) if x not in exclusions]:
        excluded.append(y_series[num])
    return exclusions, excluded
Esempio n. 3
0
def group_average(xs, boxed, gap_min, gap_max, x_step):
    # count and export gap sizes for boxcar-ed data
    gap_stat = []
    for col in boxed:
        # saving only five decimal places
        # have to use col[::-1] to reverse the list
        gap_stat.append([
            "{0:.5f}".format(
                mfn.poly_gap(xs[0:20], col[0:20], gap_min, gap_max).real)
        ])

    # export averaged spectra for each gap size group
    average_box, av_box_out = [[0] + xs], []
    for i in f_range(gap_min, gap_max, x_step):
        ys_of_gap, this_y_ave = [], [i]
        for j in range(len(gap_stat)):
            if i < float(gap_stat[j][0]) < i + x_step:
                ys_of_gap.append(boxed[j])
        for x in range(len(xs)):
            this_y_ave.append(mfn.mean([col[x] for col in ys_of_gap]))
        average_box.append(this_y_ave)
    for i in range(len(average_box[1])):
        av_box_out.append([row[i] for row in average_box])
    return gap_stat, av_box_out
Esempio n. 4
0
 def testMean(self) -> None:
     self.assertEqual(mfn.mean([1, 2, 3]), 2)
     self.assertEqual(mfn.mean([]), 0)
Esempio n. 5
0
def main(argv):
    # predefined vars
    stdev_multi = 2
    boxcar_width = 5
    gap_size_min = 0.025
    gap_size_max = 0.425
    csv_delim = ';'
    xstep = 0.025

    path_read = []
    if len(argv) == 1:
        absReadPath = argv[0]
    elif len(argv) == 2:
        absReadPath = None
        relReadPath = argv[1]
    elif len(argv) == 4:
        absReadPath = argv[0]
        relReadPath = argv[1]
        boxcar_width = argv[2]
        if absReadPath is not None:
            for path in absReadPath:
                path_read.append(path)
        else:
            for path in relReadPath:
                path_read.append(gen_path(path, None))
        csv_delim = argv[3]
    else:
        print('Invalid arguments')
        sys.exit()
    direname = os.path.dirname(path_read[0])
    path_gap = '{}/Out/gap_{}.csv'.format(direname, boxcar_width)
    path_log = '{}/Out/log_{}.txt'.format(direname, boxcar_width)
    path_ave = '{}/Out/ave_{}.csv'.format(direname, boxcar_width)

    txt_file = open(path_log, 'wb')

    xs, boxed = [], []
    for path in path_read:
        print('Data read from file {}.'.format(path))
        txt_file.write('-----{}-----\n'.format(os.path.basename(path)))
        txt_file.write('Data read from file {}.\n'.format(path))

        # parse csv file with custom delimiter
        # 'rU' dealing with lines not ending with delim
        with open(path, 'rU') as csv_file:
            filecontent = csv.reader(csv_file, delimiter=csv_delim)
            openedFile = [row for row in filecontent]

        # obtain x values from the first column
        xs = [round(float(row[0]), 7) for row in openedFile]

        # obtain y values, by picking out each odd
        # column starting at index 1
        yseries = []
        for i in range(1, len(openedFile[0]), 2):
            # remove zero spectra
            newRow = [float(row[i]) for row in openedFile]
            if newRow[0] != 0.0:
                yseries.append(newRow)

        print('File contains x series with {} points.'.format(len(xs)))
        txt_file.write('File contains x series with {} points.\n'.format(
            len(xs)))
        print('File contains {} y series.'.format(len(yseries)))
        txt_file.write('File contains {} y series.\n'.format(len(yseries)))

        # calculate standard deviation for each row
        # in other words, for all y values at each x value
        yStdevAtx, yMeanAtx, ysAtx = [], [], []
        # get all y values for each x value
        for i in range(0, len(xs)):
            ysAtx.append([col[i] for col in yseries])
        # calculate standard deviations
        for nums in ysAtx:
            yStdevAtx.append(mfn.std_dev(nums))
            yMeanAtx.append(mfn.mean(nums))

        # normalize each spectrum with the average of all spectrum
        ysum = sum(yMeanAtx)
        for i in range(0, len(yseries)):
            yseries[i] = mfn.normalize(yseries[i], ysum)

        # pick out abnormal ys by comparing with
        # specified stdev threshold
        exclusions, excluded = [], []
        for i in range(0, len(yseries)):
            for j in range(0, len(xs)):
                if abs(yseries[i][j] -
                       yMeanAtx[j]) > stdev_multi * yStdevAtx[j]:
                    exclusions.append(i)
                    break
        # generate filtered y series
        for num in [x for x in range(0, len(yseries)) if x not in exclusions]:
            excluded.append(yseries[num])
        # before making yseries = excluded, process using boxcar
        # or sampling to prevent averaging over sparse samples

        print('For defined {} * sigma threshold, {} y series are excluded.'.
              format(stdev_multi, len(exclusions)))
        txt_file.write(
            'For defined {} * sigma threshold, {} y series are excluded.\n'.
            format(stdev_multi, len(exclusions)))

        # boxcar before gap determination
        if boxcar_width == 0:
            boxed = yseries
        else:
            boxing = mfn.boxcar(yseries, boxcar_width, exclusions)
            print('Boxcar width {}.'.format(boxcar_width))
            txt_file.write('Boxcar width {}.\n'.format(boxcar_width))
            boxed.extend(boxing)

    txt_file.write('-----summary-----\n')
    # count and export gap sizes for boxcared data
    gap_stat = []
    for col in boxed:
        # saving only five decimal places
        # have to use col[::-1] to reverse the list
        gap_stat.append([
            "{0:.5f}".format(
                mfn.poly_gap(xs[0:20], col[0:20], gap_size_min,
                             gap_size_max).real)
        ])
    csv_writer(gap_stat, path_gap)
    print('Gap stat written to file {}, containing {} numbers'.format(
        path_gap, len(gap_stat)))
    txt_file.write(
        'Gap stat after boxcar written to file {}, containing {} numbers.\n'.
        format(path_gap, len(gap_stat)))

    # export averaged spectra for each gap size group
    average_box, avbox_out = [[0] + xs], []
    for i in drange(gap_size_min, gap_size_max, xstep):
        ysOfGap, this_y_ave = [], [i]
        for j in range(0, len(gap_stat)):
            if i < float(gap_stat[j][0]) < i + xstep:
                ysOfGap.append(boxed[j])
        for x in range(0, len(xs)):
            this_y_ave.append(mfn.mean([col[x] for col in ysOfGap]))
        average_box.append(this_y_ave)
    for i in range(0, len(average_box[1])):
        avbox_out.append([row[i] for row in average_box])
    csv_writer(avbox_out, path_ave)
    print('Average in gap size group'
          ' written to file {}, containing {} series'.format(
              path_ave, len(avbox_out[0])))
    txt_file.write('Average in gap size group'
                   ' written to file {}, containing {} series\n'.format(
                       path_ave, len(avbox_out[0])))
    txt_file.close()