def Peak_detector(pp_im):
    """
    Peak detection and filtering and selection
    :param pp_im:
    :return:
    """

    peakz = []
    UID_list = []
    counter = 1

    for im in list(pp_im):

        poss_peaks = BillerBiemann(im, points=9, scans=2)                   #increase scan #
        pi = rel_threshold(poss_peaks, percent=2)
        nin = num_ions_threshold(pi, n=5, cutoff=10000)
        for peak in nin:
            area = peak_sum_area(im, peak)
            peak.set_area(area)

        peakz.append(nin)
        print("...", counter)
        counter += 1

    for pkz in peakz:pi
        print("Peaks detected: ", len(pkz))
        uid = pkz.get_UID()
        UID_list.append(uid)
Exemple #2
0
def call_peaks(im, tic, smooth, args):
    print "calling peaks"
    if smooth:
        print "Smoothing IM first..."
        im.crop_mass(args.lowmass, args.highmass)
        print "cropped masses..."
        # get the size of the intensity matrix
        n_scan, n_mz = im.get_size()
        print "# masses in intensity matrix: ", n_mz
        # smooth data
        for ii in range(n_mz):
            ic = im.get_ic_at_index(ii)
            #print "got ic for mass ", ii
            # ic1 = savitzky_golay(ic)
            ic_smooth = savitzky_golay(ic, window=args.window,
                                       degree=4)  #JT: changed to 4 from 2
            #print "savitky golay ran "
            ic_base = tophat(ic_smooth, struct="1.5m")
            #print "tophat ran "
            im.set_ic_at_index(ii, ic_base)
            #print "smoothed mass ", ii
        print "smoothed IM..."
        # noise level calc
        tic1 = savitzky_golay(tic)
        tic2 = tophat(tic1, struct="1.5m")  #JT: How does struct size work?
        noise_level = window_analyzer(tic2)
        print "Noise level in TIC: ", noise_level

    # get the list of Peak objects using BB peak detection / deconv
    pl = BillerBiemann(im, args.window, args.scans)
    print "Initial number of Peaks found:", len(pl)

    # filter down the peaks.
    #   - First: remove any masses from each peak that have intensity less than r percent of the max intensity in that peak
    #   - Second: remove any peak where there are less than n ions with intensity above the cutoff
    pl2 = rel_threshold(pl, percent=args.minintensity)
    pl3 = num_ions_threshold(
        pl2, n=args.minions, cutoff=100000
    )  #100000 for pegBT  #200 for peg3 #minions maybe 3 instead of 4?

    #JT: Was getting very different noise cutoff values so just made it 10^5
    # Which was decided on by looking at chromatograms to find baseline noise lvl
    print "Peaks remaining after filtering:", len(pl3)

    for peak in pl3:
        #peak.null_mass(73)
        #peak.null_mass(207)     # column bleed
        #peak.null_mass(84)      # solvent tailing

        area = peak_sum_area(im, peak)  # get the TIC area for this peak
        peak.set_area(area)
        area_dict = peak_top_ion_areas(
            im, peak, args.topions)  # get top n ion areas for this peak
        peak.set_ion_areas(area_dict)

    return pl3
Exemple #3
0
def call_peaks(im, tic, smooth, args):
    print "calling peaks"
    if smooth:
        print "Smoothing IM first..."
        im.crop_mass(args.lowmass, args.highmass)
        print "cropped masses..."
        # get the size of the intensity matrix
        n_scan, n_mz = im.get_size()
        print "# masses in intensity matrix: ", n_mz
        # smooth data
        for ii in range(n_mz):
            ic = im.get_ic_at_index(ii)
            #print "got ic for mass ", ii
            # ic1 = savitzky_golay(ic)
            ic_smooth = savitzky_golay(ic, window=args.window, degree=2)
            #print "savitky golay ran "
            ic_base = tophat(ic_smooth, struct="1.5m")
            #print "tophat ran "
            im.set_ic_at_index(ii, ic_base)
            #print "smoothed mass ", ii
        print "smoothed IM..."
        # noise level calc
        tic1 = savitzky_golay(tic)
        tic2 = tophat(tic1, struct="1.5m")
        noise_level = window_analyzer(tic2)
        print "Noise level in TIC: ", noise_level


    # get the list of Peak objects using BB peak detection / deconv
    pl = BillerBiemann(im, args.window, args.scans)
    print "Initial number of Peaks found:", len(pl)


    # filter down the peaks.
    #   - First: remove any masses from each peak that have intensity less than r percent of the max intensity in that peak
    #   - Second: remove any peak where there are less than n ions with intensity above the cutoff
    pl2 = rel_threshold(pl, percent=args.minintensity)
    pl3 = num_ions_threshold(pl2, n=args.minions, cutoff=noise_level * args.noisemult)
    print "Peaks remaining after filtering:", len(pl3)

    for peak in pl3:
        # peak.null_mass(73)
        peak.null_mass(207)     # column bleed
        peak.null_mass(84)      # solvent tailing

        area = peak_sum_area(im, peak)  # get the TIC area for this peak
        peak.set_area(area)
        area_dict = peak_top_ion_areas(im, peak, args.topions)  # get top n ion areas for this peak
        peak.set_ion_areas(area_dict)

    return pl3
Exemple #4
0
def Peak_detector(pp_im):
    # Peak detection and filtering and selection
    peakz = []
    counter = 1

    for im in list(pp_im):

        poss_peaks = BillerBiemann(im, points=9, scans=2)
        pi = rel_threshold(poss_peaks, percent=2)
        nin = num_ions_threshold(pi, n=5, cutoff=10000)
        for peak in nin:
            area = peak_sum_area(im, peak)
            peak.set_area(area)

        peakz.append(nin)
        print("...", counter)
        counter += 1

    for pkz in peakz:
        print("Peaks detected: ", len(pkz))

    return (peakz)
Exemple #5
0
# smooth data
for ii in range(n_mz):
    ic = im.get_ic_at_index(ii)
    ic1 = savitzky_golay(ic)
    ic_smooth = savitzky_golay(ic1)
    ic_base = tophat(ic_smooth, struct="1.5m")
    im.set_ic_at_index(ii, ic_base)

# do peak detection on pre-trimmed data

# get the list of Peak objects
pl = BillerBiemann(im, points, scans)

# trim by relative intensity
apl = rel_threshold(pl, r)

# trim by threshold
peak_list = num_ions_threshold(apl, n, t)

print "Number of Peaks found:", len(peak_list)

# ignore TMS ions and set mass range
for peak in peak_list:
    peak.crop_mass(50,540)
    peak.null_mass(73)
    peak.null_mass(147)
    # find area
    area = peak_sum_area(im, peak)
    peak.set_area(area)
Exemple #6
0
# first by removing all intensities in a peak less than a given relative
# threshold,
# then by removing all peaks that have less than a given number of ions above
# a given value

# Parameters
# percentage ratio of ion intensity to max ion intensity
r = 1

# minimum number of ions, n
n = 3
# greater than or equal to threshold, t
t = 10000

# trim by relative intensity
pl = rel_threshold(peak_list, r)

# trim by threshold
real_peak_list = num_ions_threshold(pl, n, t)
print "Number of filtered peaks in real data: ", len(real_peak_list)

# Set the peak areas
for peak in real_peak_list:
    area = peak_sum_area(real_im, peak)
    peak.set_area(area)

# real_peak_list is PyMS' best guess at the true peak list

################## Run Simulator ######################
# Simulator takes a peak list, time_list and mass_list
# and returns an IntensityMatrix object.
Exemple #7
0
def Peak_detector(pp_im, noise, name):
    # Peak detection and filtering and selection
    peakz = []
    counter = 1
    savePath = '/home/juicebox/utils/easyGC/MS_peak_data'

    for im, n, na in itertools.izip(list(pp_im), noise, name):

        ms_data = []

        #print(na)
        poss_peaks = BillerBiemann(im, points=140, scans=20)  #increase scan #
        pi = rel_threshold(poss_peaks, percent=2)
        nin = num_ions_threshold(pi, n=3, cutoff=n)

        completeName = os.path.join(savePath, na + "2y.csv")
        with open(completeName, 'w') as f:
            w = csv.writer(f)
            head = [
                'RTs', 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0,
                44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0,
                54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0,
                64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0,
                74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0,
                84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0,
                94.0, 95.0, 96.0, 97.0, 98.0, 99.0, 100.0, 101.0, 102.0, 103.0,
                104.0, 105.0, 106.0, 107.0, 108.0, 109.0, 110.0, 111.0, 112.0,
                113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0, 120.0, 121.0,
                122.0, 123.0, 124.0, 125.0, 126.0, 127.0, 128.0, 129.0, 130.0,
                131.0, 132.0, 133.0, 134.0, 135.0, 136.0, 137.0, 138.0, 139.0,
                140.0, 141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0, 148.0,
                149.0, 150.0, 151.0, 152.0, 153.0, 154.0, 155.0, 156.0, 157.0,
                158.0, 159.0, 160.0, 161.0, 162.0, 163.0, 164.0, 165.0, 166.0,
                167.0, 168.0, 169.0, 170.0, 171.0, 172.0, 173.0, 174.0, 175.0,
                176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0, 183.0, 184.0,
                185.0, 186.0, 187.0, 188.0, 189.0, 190.0, 191.0, 192.0, 193.0,
                194.0, 195.0, 196.0, 197.0, 198.0, 199.0, 200.0, 201.0, 202.0,
                203.0, 204.0, 205.0, 206.0, 207.0, 208.0, 209.0, 210.0, 211.0,
                212.0, 213.0, 214.0, 215.0, 216.0, 217.0, 218.0, 219.0, 220.0
            ]
            w.writerow(head)
            for peak in nin:

                area = peak_sum_area(im, peak)
                print('area=', area)
                peak.set_area(area)
                ms = peak.get_mass_spectrum()
                #print("Peaks rt: ", peak.get_rt())
                #print("Peaks ms_list: ", ms.mass_list)
                print("Peaks ms_spec: ", list(ms.mass_spec))
                p_rt = peak.get_rt()
                its = []
                items = list(ms.mass_spec)
                for i in items:
                    x = float(i)
                    its.append(x)
                ms_d = ([p_rt] + its)
                print(ms_d)
                # c = str(ms_d).split(',')
                #f.write(str(ms_d))

                w.writerow(ms_d)
        f.close()
        #
        #
        #         #print(peak.get_rt(), items)
        #         # ms_d = ([peak.get_rt()] + its)
        #         # print(ms_d)
        #         # w = csv.writer(f)
        #         # w.writerow(x for x in list(ms_d))
        #
        #         # w = csv.writer(f, delimiter=',')
        #         # w.writerows(list[p_rt + items])
        #         # ms_data.append((peak.get_rt(), list(ms.mass_spec)))
        #         # completeName = os.path.join(savePath, na+"2b.csv")
        #         # f = open(completeName, "w+")
        #         # for i in ms_data:
        #         #     f.write("%s" % str(i))
        #         # f.close()
        #         # with open(completeName, 'w') as f:
        #         #     f.write(str([peak.get_rt()] + items) + '\n')
        #         # f.write(str([peak.get_rt()] + items) + '\n')
        #         # f.write(str(peak.get_rt()) + str(items).replace('[', '').replace(']', '') + '\n')
        #         # x = str(peak.get_rt()) + str(items).replace('[', '').replace(']', '')
        #         # y = x.split(',')
        #         # print (str(y))
        #         # f.write(str(y) + '\n')

        peakz.append(nin)
        #print("...", counter)
        counter += 1

    #for pkz in peakz:
    # print("Peaks detected: ", len(pkz))
    #print("Peaks rt: ", pkz.get_rt())
    #print("Peaks ms: ", pkz.get_mass_spectrum())

    return peakz
Exemple #8
0
# first by removing all intensities in a peak less than a given relative
# threshold,
# then by removing all peaks that have less than a given number of ions above
# a given value

# Parameters
# percentage ratio of ion intensity to max ion intensity
r = 1

# minimum number of ions, n
n = 3
# greater than or equal to threshold, t
t = 10000

# trim by relative intensity
pl = rel_threshold(peak_list, r)

# trim by threshold
real_peak_list = num_ions_threshold(pl, n, t)
print "Number of filtered peaks in real data: ", len(real_peak_list)

# Set the peak areas
for peak in real_peak_list:
    area = peak_sum_area(real_im, peak)
    peak.set_area(area)


# real_peak_list is PyMS' best guess at the true peak list

################## Run Simulator ######################
# Simulator takes a peak list, time_list and mass_list
def Peak_detector(pp_im, noise, name, points, scans, percent, ni, name_tag, sdir):
    """
    Intake cleansed intensity matrices and CMD args
    Produces list of peaks and corresponding mass spectrum of each sample

    @param pp_im: Cleansed intensity matrices from the Preprocess_IntensityMatrices method
    @param noise: Noise level approximation produced by the matrix_from_cdf method
    @param name: Sample name use from creating mass spectrum .csv files
    @param points: Size of window use for peak detection in BillerBiemann method
    @param scans: Number of adjacent windows to compare for peak detection in BillerBiemann method
    @param percent: Percentile threshold a peak must exceed to be considered an informative peak
    @param ni: Number of ions required per peak to be considered an informative peak
    @param name_tag: String consisting of CMD args for identification, ie. 'p140s25%3n3'
    @param sdir: Directory to save the mass spectrum .csv files
    @return: List of peaks per sample
    @return: csv files containing mass spectrum corresponding to each peak
    """

    peakz = []
    savePath = sdir
    ms_data_files = []

    print("len pp_im", len(list(pp_im)))
    print("len noise", len(noise))
    print("len name", len(name), name)

    for im, n, na in itertools.izip(list(pp_im), noise, name):

        poss_peaks = BillerBiemann(im, points=points, scans=scans)
        pi = rel_threshold(poss_peaks, percent=percent)
        nin = num_ions_threshold(pi, n=ni, cutoff=n)

        completeName = os.path.join(savePath, na + name_tag + "ms_data.csv")
        with open(completeName, 'w') as f:
            w = csv.writer(f)
            head = ['Area', 'RTs'] + [float(i) for i in range(35,221)]


            w.writerow(head)
            for peak in nin:

                area = peak_sum_area(im, peak)
                peak.set_area(area)
                ms = peak.get_mass_spectrum()
                p_rt = peak.get_rt()
                its = []
                ms_items = list(ms.mass_spec)
                for spec in ms_items:
                    f_spec = float(spec)
                    its.append(f_spec)

                ms_d = ([area] + [p_rt] + its)

                w.writerow(ms_d)

            f.close()

        peakz.append(nin)
        ms_data_files.append(completeName)
    print('ms_data_files:', ms_data_files)

    return [peakz, ms_data_files]
Exemple #10
0
    # smooth data
    for ii in range(n_mz):
        ic = im.get_ic_at_index(ii)
        ic1 = savitzky_golay(ic)
        ic_smooth = savitzky_golay(ic1)
        ic_base = tophat(ic_smooth, struct="1.5m")
        im.set_ic_at_index(ii, ic_base)

    # do peak detection on pre-trimmed data

    # get the list of Peak objects
    pl = BillerBiemann(im, points, scans)

    # trim by relative intensity
    apl = rel_threshold(pl, r)

    # trim by threshold
    peak_list = num_ions_threshold(apl, n, t)

    print "\t -> Number of Peaks found:", len(peak_list)

    print "\t -> Executing peak post-procesing and quantification..."

    # ignore TMS ions and use same mass range for all experiments
    for peak in peak_list:
        peak.crop_mass(50, 540)
        peak.null_mass(73)
        peak.null_mass(147)
        # find peak areas
        area = peak_sum_area(im, peak)
Exemple #11
0
# first by removing all intensities in a peak less than a given relative
# threshold,
# then by removing all peaks that have less than a given number of ions above
# a given value

# Parameters
# percentage ratio of ion intensity to max ion intensity
r = 1

# minimum number of ions, n
n = 3
# greater than or equal to threshold, t
t = 10000

# trim by relative intensity
pl = rel_threshold(peak_list, r)

# trim by threshold
real_peak_list = num_ions_threshold(pl, n, t)
print "Number of filtered peaks in real data: ", len(real_peak_list)

# Set the peak areas
for peak in real_peak_list:
    area = peak_sum_area(real_im, peak)
    peak.set_area(area)

# real_peak_list is PyMS' best guess at the true peak list

################## Run Simulator ######################
# Simulator takes a peak list, time_list and mass_list
# and returns an IntensityMatrix object.
Exemple #12
0
# first by removing all intensities in a peak less than a given relative
# threshold,
# then by removing all peaks that have less than a given number of ions above
# a given value

# Parameters
# percentage ratio of ion intensity to max ion intensity
r = 1

# minimum number of ions, n
n = 3
# greater than or equal to threshold, t
t = 10000

# trim by relative intensity
pl = rel_threshold(peak_list, r)

# trim by threshold
real_peak_list = num_ions_threshold(pl, n, t)
print "Number of filtered peaks in real data: ", len(real_peak_list)

# Set the peak areas
for peak in real_peak_list:
    area = peak_sum_area(real_im, peak)
    peak.set_area(area)
    
    
# real_peak_list is PyMS' best guess at the true peak list

################## Run Simulator ######################
# Simulator takes a peak list, time_list and mass_list
Exemple #13
0
def Peak_detector(pp_im, noise, name, points, scans, percent, ni, name_tag,
                  sdir):
    # Peak detection and filtering and selection
    peakz = []
    # counter = 1
    savePath = sdir
    ms_data_files = []

    print("len pp_im", len(list(pp_im)))
    print("len noise", len(noise))
    print("len name", len(name), name)

    for im, n, na in itertools.izip(list(pp_im), noise, name):

        ms_data = []

        # print(na)
        poss_peaks = BillerBiemann(im, points=points,
                                   scans=scans)  # increase scan #
        pi = rel_threshold(poss_peaks, percent=percent)
        nin = num_ions_threshold(pi, n=ni, cutoff=n)

        completeName = os.path.join(savePath, na + name_tag + "ms_data.csv")
        with open(completeName, 'w') as f:
            w = csv.writer(f)
            # head = [35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0, 99.0, 100.0, 101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0, 109.0, 110.0, 111.0, 112.0, 113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0, 120.0, 121.0, 122.0, 123.0, 124.0, 125.0, 126.0, 127.0, 128.0, 129.0, 130.0, 131.0, 132.0, 133.0, 134.0, 135.0, 136.0, 137.0, 138.0, 139.0, 140.0, 141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0, 148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0, 155.0, 156.0, 157.0, 158.0, 159.0, 160.0, 161.0, 162.0, 163.0, 164.0, 165.0, 166.0, 167.0, 168.0, 169.0, 170.0, 171.0, 172.0, 173.0, 174.0, 175.0, 176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0, 183.0, 184.0, 185.0, 186.0, 187.0, 188.0, 189.0, 190.0, 191.0, 192.0, 193.0, 194.0, 195.0, 196.0, 197.0, 198.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0, 205.0, 206.0, 207.0, 208.0, 209.0, 210.0, 211.0, 212.0, 213.0, 214.0, 215.0, 216.0, 217.0, 218.0, 219.0, 220.0]
            head = [
                'Area', 'RTs', 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0,
                43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, 52.0,
                53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0,
                63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0,
                73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0,
                83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0,
                93.0, 94.0, 95.0, 96.0, 97.0, 98.0, 99.0, 100.0, 101.0, 102.0,
                103.0, 104.0, 105.0, 106.0, 107.0, 108.0, 109.0, 110.0, 111.0,
                112.0, 113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0, 120.0,
                121.0, 122.0, 123.0, 124.0, 125.0, 126.0, 127.0, 128.0, 129.0,
                130.0, 131.0, 132.0, 133.0, 134.0, 135.0, 136.0, 137.0, 138.0,
                139.0, 140.0, 141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0,
                148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0, 155.0, 156.0,
                157.0, 158.0, 159.0, 160.0, 161.0, 162.0, 163.0, 164.0, 165.0,
                166.0, 167.0, 168.0, 169.0, 170.0, 171.0, 172.0, 173.0, 174.0,
                175.0, 176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0, 183.0,
                184.0, 185.0, 186.0, 187.0, 188.0, 189.0, 190.0, 191.0, 192.0,
                193.0, 194.0, 195.0, 196.0, 197.0, 198.0, 199.0, 200.0, 201.0,
                202.0, 203.0, 204.0, 205.0, 206.0, 207.0, 208.0, 209.0, 210.0,
                211.0, 212.0, 213.0, 214.0, 215.0, 216.0, 217.0, 218.0, 219.0,
                220.0
            ]

            w.writerow(head)
            for peak in nin:

                area = peak_sum_area(im, peak)
                # print('area:', area)
                peak.set_area(area)
                ms = peak.get_mass_spectrum()
                # print("Peaks rt: ", peak.get_rt())
                # print("Peaks ms_list: ", ms.mass_list)
                # print("Peaks ms_spec: ", list(ms.mass_spec))
                p_rt = peak.get_rt()
                its = []
                items = list(ms.mass_spec)
                for i in items:
                    x = float(i)
                    its.append(x)

                ms_d = ([area] + [p_rt] + its)
                # ms_d = its
                # print('ms_d', ms_d)
                w.writerow(ms_d)

            f.close()

        peakz.append(nin)
        # #print("...", counter)
        # counter += 1
        ms_data_files.append(completeName)
    print('ms_data_files:', ms_data_files)

    return [peakz, ms_data_files]