def Peak_detector(pp_im): """ Peak detection and filtering and selection :param pp_im: :return: """ peakz = [] UID_list = [] counter = 1 for im in list(pp_im): poss_peaks = BillerBiemann(im, points=9, scans=2) #increase scan # pi = rel_threshold(poss_peaks, percent=2) nin = num_ions_threshold(pi, n=5, cutoff=10000) for peak in nin: area = peak_sum_area(im, peak) peak.set_area(area) peakz.append(nin) print("...", counter) counter += 1 for pkz in peakz:pi print("Peaks detected: ", len(pkz)) uid = pkz.get_UID() UID_list.append(uid)
def call_peaks(im, tic, smooth, args): print "calling peaks" if smooth: print "Smoothing IM first..." im.crop_mass(args.lowmass, args.highmass) print "cropped masses..." # get the size of the intensity matrix n_scan, n_mz = im.get_size() print "# masses in intensity matrix: ", n_mz # smooth data for ii in range(n_mz): ic = im.get_ic_at_index(ii) #print "got ic for mass ", ii # ic1 = savitzky_golay(ic) ic_smooth = savitzky_golay(ic, window=args.window, degree=4) #JT: changed to 4 from 2 #print "savitky golay ran " ic_base = tophat(ic_smooth, struct="1.5m") #print "tophat ran " im.set_ic_at_index(ii, ic_base) #print "smoothed mass ", ii print "smoothed IM..." # noise level calc tic1 = savitzky_golay(tic) tic2 = tophat(tic1, struct="1.5m") #JT: How does struct size work? noise_level = window_analyzer(tic2) print "Noise level in TIC: ", noise_level # get the list of Peak objects using BB peak detection / deconv pl = BillerBiemann(im, args.window, args.scans) print "Initial number of Peaks found:", len(pl) # filter down the peaks. # - First: remove any masses from each peak that have intensity less than r percent of the max intensity in that peak # - Second: remove any peak where there are less than n ions with intensity above the cutoff pl2 = rel_threshold(pl, percent=args.minintensity) pl3 = num_ions_threshold( pl2, n=args.minions, cutoff=100000 ) #100000 for pegBT #200 for peg3 #minions maybe 3 instead of 4? #JT: Was getting very different noise cutoff values so just made it 10^5 # Which was decided on by looking at chromatograms to find baseline noise lvl print "Peaks remaining after filtering:", len(pl3) for peak in pl3: #peak.null_mass(73) #peak.null_mass(207) # column bleed #peak.null_mass(84) # solvent tailing area = peak_sum_area(im, peak) # get the TIC area for this peak peak.set_area(area) area_dict = peak_top_ion_areas( im, peak, args.topions) # get top n ion areas for this peak peak.set_ion_areas(area_dict) return pl3
def call_peaks(im, tic, smooth, args): print "calling peaks" if smooth: print "Smoothing IM first..." im.crop_mass(args.lowmass, args.highmass) print "cropped masses..." # get the size of the intensity matrix n_scan, n_mz = im.get_size() print "# masses in intensity matrix: ", n_mz # smooth data for ii in range(n_mz): ic = im.get_ic_at_index(ii) #print "got ic for mass ", ii # ic1 = savitzky_golay(ic) ic_smooth = savitzky_golay(ic, window=args.window, degree=2) #print "savitky golay ran " ic_base = tophat(ic_smooth, struct="1.5m") #print "tophat ran " im.set_ic_at_index(ii, ic_base) #print "smoothed mass ", ii print "smoothed IM..." # noise level calc tic1 = savitzky_golay(tic) tic2 = tophat(tic1, struct="1.5m") noise_level = window_analyzer(tic2) print "Noise level in TIC: ", noise_level # get the list of Peak objects using BB peak detection / deconv pl = BillerBiemann(im, args.window, args.scans) print "Initial number of Peaks found:", len(pl) # filter down the peaks. # - First: remove any masses from each peak that have intensity less than r percent of the max intensity in that peak # - Second: remove any peak where there are less than n ions with intensity above the cutoff pl2 = rel_threshold(pl, percent=args.minintensity) pl3 = num_ions_threshold(pl2, n=args.minions, cutoff=noise_level * args.noisemult) print "Peaks remaining after filtering:", len(pl3) for peak in pl3: # peak.null_mass(73) peak.null_mass(207) # column bleed peak.null_mass(84) # solvent tailing area = peak_sum_area(im, peak) # get the TIC area for this peak peak.set_area(area) area_dict = peak_top_ion_areas(im, peak, args.topions) # get top n ion areas for this peak peak.set_ion_areas(area_dict) return pl3
def Peak_detector(pp_im): # Peak detection and filtering and selection peakz = [] counter = 1 for im in list(pp_im): poss_peaks = BillerBiemann(im, points=9, scans=2) pi = rel_threshold(poss_peaks, percent=2) nin = num_ions_threshold(pi, n=5, cutoff=10000) for peak in nin: area = peak_sum_area(im, peak) peak.set_area(area) peakz.append(nin) print("...", counter) counter += 1 for pkz in peakz: print("Peaks detected: ", len(pkz)) return (peakz)
# smooth data for ii in range(n_mz): ic = im.get_ic_at_index(ii) ic1 = savitzky_golay(ic) ic_smooth = savitzky_golay(ic1) ic_base = tophat(ic_smooth, struct="1.5m") im.set_ic_at_index(ii, ic_base) # do peak detection on pre-trimmed data # get the list of Peak objects pl = BillerBiemann(im, points, scans) # trim by relative intensity apl = rel_threshold(pl, r) # trim by threshold peak_list = num_ions_threshold(apl, n, t) print "Number of Peaks found:", len(peak_list) # ignore TMS ions and set mass range for peak in peak_list: peak.crop_mass(50,540) peak.null_mass(73) peak.null_mass(147) # find area area = peak_sum_area(im, peak) peak.set_area(area)
# first by removing all intensities in a peak less than a given relative # threshold, # then by removing all peaks that have less than a given number of ions above # a given value # Parameters # percentage ratio of ion intensity to max ion intensity r = 1 # minimum number of ions, n n = 3 # greater than or equal to threshold, t t = 10000 # trim by relative intensity pl = rel_threshold(peak_list, r) # trim by threshold real_peak_list = num_ions_threshold(pl, n, t) print "Number of filtered peaks in real data: ", len(real_peak_list) # Set the peak areas for peak in real_peak_list: area = peak_sum_area(real_im, peak) peak.set_area(area) # real_peak_list is PyMS' best guess at the true peak list ################## Run Simulator ###################### # Simulator takes a peak list, time_list and mass_list # and returns an IntensityMatrix object.
def Peak_detector(pp_im, noise, name): # Peak detection and filtering and selection peakz = [] counter = 1 savePath = '/home/juicebox/utils/easyGC/MS_peak_data' for im, n, na in itertools.izip(list(pp_im), noise, name): ms_data = [] #print(na) poss_peaks = BillerBiemann(im, points=140, scans=20) #increase scan # pi = rel_threshold(poss_peaks, percent=2) nin = num_ions_threshold(pi, n=3, cutoff=n) completeName = os.path.join(savePath, na + "2y.csv") with open(completeName, 'w') as f: w = csv.writer(f) head = [ 'RTs', 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0, 99.0, 100.0, 101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0, 109.0, 110.0, 111.0, 112.0, 113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0, 120.0, 121.0, 122.0, 123.0, 124.0, 125.0, 126.0, 127.0, 128.0, 129.0, 130.0, 131.0, 132.0, 133.0, 134.0, 135.0, 136.0, 137.0, 138.0, 139.0, 140.0, 141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0, 148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0, 155.0, 156.0, 157.0, 158.0, 159.0, 160.0, 161.0, 162.0, 163.0, 164.0, 165.0, 166.0, 167.0, 168.0, 169.0, 170.0, 171.0, 172.0, 173.0, 174.0, 175.0, 176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0, 183.0, 184.0, 185.0, 186.0, 187.0, 188.0, 189.0, 190.0, 191.0, 192.0, 193.0, 194.0, 195.0, 196.0, 197.0, 198.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0, 205.0, 206.0, 207.0, 208.0, 209.0, 210.0, 211.0, 212.0, 213.0, 214.0, 215.0, 216.0, 217.0, 218.0, 219.0, 220.0 ] w.writerow(head) for peak in nin: area = peak_sum_area(im, peak) print('area=', area) peak.set_area(area) ms = peak.get_mass_spectrum() #print("Peaks rt: ", peak.get_rt()) #print("Peaks ms_list: ", ms.mass_list) print("Peaks ms_spec: ", list(ms.mass_spec)) p_rt = peak.get_rt() its = [] items = list(ms.mass_spec) for i in items: x = float(i) its.append(x) ms_d = ([p_rt] + its) print(ms_d) # c = str(ms_d).split(',') #f.write(str(ms_d)) w.writerow(ms_d) f.close() # # # #print(peak.get_rt(), items) # # ms_d = ([peak.get_rt()] + its) # # print(ms_d) # # w = csv.writer(f) # # w.writerow(x for x in list(ms_d)) # # # w = csv.writer(f, delimiter=',') # # w.writerows(list[p_rt + items]) # # ms_data.append((peak.get_rt(), list(ms.mass_spec))) # # completeName = os.path.join(savePath, na+"2b.csv") # # f = open(completeName, "w+") # # for i in ms_data: # # f.write("%s" % str(i)) # # f.close() # # with open(completeName, 'w') as f: # # f.write(str([peak.get_rt()] + items) + '\n') # # f.write(str([peak.get_rt()] + items) + '\n') # # f.write(str(peak.get_rt()) + str(items).replace('[', '').replace(']', '') + '\n') # # x = str(peak.get_rt()) + str(items).replace('[', '').replace(']', '') # # y = x.split(',') # # print (str(y)) # # f.write(str(y) + '\n') peakz.append(nin) #print("...", counter) counter += 1 #for pkz in peakz: # print("Peaks detected: ", len(pkz)) #print("Peaks rt: ", pkz.get_rt()) #print("Peaks ms: ", pkz.get_mass_spectrum()) return peakz
# first by removing all intensities in a peak less than a given relative # threshold, # then by removing all peaks that have less than a given number of ions above # a given value # Parameters # percentage ratio of ion intensity to max ion intensity r = 1 # minimum number of ions, n n = 3 # greater than or equal to threshold, t t = 10000 # trim by relative intensity pl = rel_threshold(peak_list, r) # trim by threshold real_peak_list = num_ions_threshold(pl, n, t) print "Number of filtered peaks in real data: ", len(real_peak_list) # Set the peak areas for peak in real_peak_list: area = peak_sum_area(real_im, peak) peak.set_area(area) # real_peak_list is PyMS' best guess at the true peak list ################## Run Simulator ###################### # Simulator takes a peak list, time_list and mass_list
def Peak_detector(pp_im, noise, name, points, scans, percent, ni, name_tag, sdir): """ Intake cleansed intensity matrices and CMD args Produces list of peaks and corresponding mass spectrum of each sample @param pp_im: Cleansed intensity matrices from the Preprocess_IntensityMatrices method @param noise: Noise level approximation produced by the matrix_from_cdf method @param name: Sample name use from creating mass spectrum .csv files @param points: Size of window use for peak detection in BillerBiemann method @param scans: Number of adjacent windows to compare for peak detection in BillerBiemann method @param percent: Percentile threshold a peak must exceed to be considered an informative peak @param ni: Number of ions required per peak to be considered an informative peak @param name_tag: String consisting of CMD args for identification, ie. 'p140s25%3n3' @param sdir: Directory to save the mass spectrum .csv files @return: List of peaks per sample @return: csv files containing mass spectrum corresponding to each peak """ peakz = [] savePath = sdir ms_data_files = [] print("len pp_im", len(list(pp_im))) print("len noise", len(noise)) print("len name", len(name), name) for im, n, na in itertools.izip(list(pp_im), noise, name): poss_peaks = BillerBiemann(im, points=points, scans=scans) pi = rel_threshold(poss_peaks, percent=percent) nin = num_ions_threshold(pi, n=ni, cutoff=n) completeName = os.path.join(savePath, na + name_tag + "ms_data.csv") with open(completeName, 'w') as f: w = csv.writer(f) head = ['Area', 'RTs'] + [float(i) for i in range(35,221)] w.writerow(head) for peak in nin: area = peak_sum_area(im, peak) peak.set_area(area) ms = peak.get_mass_spectrum() p_rt = peak.get_rt() its = [] ms_items = list(ms.mass_spec) for spec in ms_items: f_spec = float(spec) its.append(f_spec) ms_d = ([area] + [p_rt] + its) w.writerow(ms_d) f.close() peakz.append(nin) ms_data_files.append(completeName) print('ms_data_files:', ms_data_files) return [peakz, ms_data_files]
# smooth data for ii in range(n_mz): ic = im.get_ic_at_index(ii) ic1 = savitzky_golay(ic) ic_smooth = savitzky_golay(ic1) ic_base = tophat(ic_smooth, struct="1.5m") im.set_ic_at_index(ii, ic_base) # do peak detection on pre-trimmed data # get the list of Peak objects pl = BillerBiemann(im, points, scans) # trim by relative intensity apl = rel_threshold(pl, r) # trim by threshold peak_list = num_ions_threshold(apl, n, t) print "\t -> Number of Peaks found:", len(peak_list) print "\t -> Executing peak post-procesing and quantification..." # ignore TMS ions and use same mass range for all experiments for peak in peak_list: peak.crop_mass(50, 540) peak.null_mass(73) peak.null_mass(147) # find peak areas area = peak_sum_area(im, peak)
def Peak_detector(pp_im, noise, name, points, scans, percent, ni, name_tag, sdir): # Peak detection and filtering and selection peakz = [] # counter = 1 savePath = sdir ms_data_files = [] print("len pp_im", len(list(pp_im))) print("len noise", len(noise)) print("len name", len(name), name) for im, n, na in itertools.izip(list(pp_im), noise, name): ms_data = [] # print(na) poss_peaks = BillerBiemann(im, points=points, scans=scans) # increase scan # pi = rel_threshold(poss_peaks, percent=percent) nin = num_ions_threshold(pi, n=ni, cutoff=n) completeName = os.path.join(savePath, na + name_tag + "ms_data.csv") with open(completeName, 'w') as f: w = csv.writer(f) # head = [35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0, 99.0, 100.0, 101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0, 109.0, 110.0, 111.0, 112.0, 113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0, 120.0, 121.0, 122.0, 123.0, 124.0, 125.0, 126.0, 127.0, 128.0, 129.0, 130.0, 131.0, 132.0, 133.0, 134.0, 135.0, 136.0, 137.0, 138.0, 139.0, 140.0, 141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0, 148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0, 155.0, 156.0, 157.0, 158.0, 159.0, 160.0, 161.0, 162.0, 163.0, 164.0, 165.0, 166.0, 167.0, 168.0, 169.0, 170.0, 171.0, 172.0, 173.0, 174.0, 175.0, 176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0, 183.0, 184.0, 185.0, 186.0, 187.0, 188.0, 189.0, 190.0, 191.0, 192.0, 193.0, 194.0, 195.0, 196.0, 197.0, 198.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0, 205.0, 206.0, 207.0, 208.0, 209.0, 210.0, 211.0, 212.0, 213.0, 214.0, 215.0, 216.0, 217.0, 218.0, 219.0, 220.0] head = [ 'Area', 'RTs', 35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, 88.0, 89.0, 90.0, 91.0, 92.0, 93.0, 94.0, 95.0, 96.0, 97.0, 98.0, 99.0, 100.0, 101.0, 102.0, 103.0, 104.0, 105.0, 106.0, 107.0, 108.0, 109.0, 110.0, 111.0, 112.0, 113.0, 114.0, 115.0, 116.0, 117.0, 118.0, 119.0, 120.0, 121.0, 122.0, 123.0, 124.0, 125.0, 126.0, 127.0, 128.0, 129.0, 130.0, 131.0, 132.0, 133.0, 134.0, 135.0, 136.0, 137.0, 138.0, 139.0, 140.0, 141.0, 142.0, 143.0, 144.0, 145.0, 146.0, 147.0, 148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0, 155.0, 156.0, 157.0, 158.0, 159.0, 160.0, 161.0, 162.0, 163.0, 164.0, 165.0, 166.0, 167.0, 168.0, 169.0, 170.0, 171.0, 172.0, 173.0, 174.0, 175.0, 176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0, 183.0, 184.0, 185.0, 186.0, 187.0, 188.0, 189.0, 190.0, 191.0, 192.0, 193.0, 194.0, 195.0, 196.0, 197.0, 198.0, 199.0, 200.0, 201.0, 202.0, 203.0, 204.0, 205.0, 206.0, 207.0, 208.0, 209.0, 210.0, 211.0, 212.0, 213.0, 214.0, 215.0, 216.0, 217.0, 218.0, 219.0, 220.0 ] w.writerow(head) for peak in nin: area = peak_sum_area(im, peak) # print('area:', area) peak.set_area(area) ms = peak.get_mass_spectrum() # print("Peaks rt: ", peak.get_rt()) # print("Peaks ms_list: ", ms.mass_list) # print("Peaks ms_spec: ", list(ms.mass_spec)) p_rt = peak.get_rt() its = [] items = list(ms.mass_spec) for i in items: x = float(i) its.append(x) ms_d = ([area] + [p_rt] + its) # ms_d = its # print('ms_d', ms_d) w.writerow(ms_d) f.close() peakz.append(nin) # #print("...", counter) # counter += 1 ms_data_files.append(completeName) print('ms_data_files:', ms_data_files) return [peakz, ms_data_files]