def test_main(self, peak, im_i): areas = peak_top_ion_areas(im_i, peak, 5) assert isinstance(areas, dict) assert len(areas) == 5 assert areas[100] == 4534.0 assert isinstance(areas[100], float)
def expr_list(pyms_datadir): with tempfile.TemporaryDirectory() as tmpdir: outputdir = pathlib.Path(tmpdir) # Create experiment files for jcamp_file in eley_codes: im = build_intensity_matrix_i( JCAMP_reader(pyms_datadir / f"{jcamp_file}.JDX")) # Intensity matrix size (scans, masses) n_scan, n_mz = im.size # noise filter and baseline correct for ii in range(n_mz): ic = im.get_ic_at_index(ii) ic_smooth = savitzky_golay(ic) ic_bc = tophat(ic_smooth, struct="1.5m") im.set_ic_at_index(ii, ic_bc) peak_list = BillerBiemann(im, points=9, scans=2) print('#') apl = rel_threshold(peak_list, 2) new_peak_list = num_ions_threshold(apl, 3, 3000) print('#') # ignore TMS ions and set mass range for peak in new_peak_list: peak.crop_mass(50, 400) peak.null_mass(73) peak.null_mass(147) # find area area = peak_sum_area(im, peak) peak.area = area area_dict = peak_top_ion_areas(im, peak) peak.ion_areas = area_dict expr = Experiment(jcamp_file, new_peak_list) # set time range for all experiments expr.sele_rt_range(["6.5m", "21m"]) print('#') expr.dump(outputdir / f"{jcamp_file}.expr") print('#') # Load experiments expr_list = [] for expr_code in eley_codes: expr = load_expr(outputdir / f"{expr_code}.expr") assert isinstance(expr, Experiment) expr_list.append(expr) yield expr_list
def call_peaks(im, tic, smooth, args): print "calling peaks" if smooth: print "Smoothing IM first..." im.crop_mass(args.lowmass, args.highmass) print "cropped masses..." # get the size of the intensity matrix n_scan, n_mz = im.get_size() print "# masses in intensity matrix: ", n_mz # smooth data for ii in range(n_mz): ic = im.get_ic_at_index(ii) #print "got ic for mass ", ii # ic1 = savitzky_golay(ic) ic_smooth = savitzky_golay(ic, window=args.window, degree=4) #JT: changed to 4 from 2 #print "savitky golay ran " ic_base = tophat(ic_smooth, struct="1.5m") #print "tophat ran " im.set_ic_at_index(ii, ic_base) #print "smoothed mass ", ii print "smoothed IM..." # noise level calc tic1 = savitzky_golay(tic) tic2 = tophat(tic1, struct="1.5m") #JT: How does struct size work? noise_level = window_analyzer(tic2) print "Noise level in TIC: ", noise_level # get the list of Peak objects using BB peak detection / deconv pl = BillerBiemann(im, args.window, args.scans) print "Initial number of Peaks found:", len(pl) # filter down the peaks. # - First: remove any masses from each peak that have intensity less than r percent of the max intensity in that peak # - Second: remove any peak where there are less than n ions with intensity above the cutoff pl2 = rel_threshold(pl, percent=args.minintensity) pl3 = num_ions_threshold( pl2, n=args.minions, cutoff=100000 ) #100000 for pegBT #200 for peg3 #minions maybe 3 instead of 4? #JT: Was getting very different noise cutoff values so just made it 10^5 # Which was decided on by looking at chromatograms to find baseline noise lvl print "Peaks remaining after filtering:", len(pl3) for peak in pl3: #peak.null_mass(73) #peak.null_mass(207) # column bleed #peak.null_mass(84) # solvent tailing area = peak_sum_area(im, peak) # get the TIC area for this peak peak.set_area(area) area_dict = peak_top_ion_areas( im, peak, args.topions) # get top n ion areas for this peak peak.set_ion_areas(area_dict) return pl3
def call_peaks(im, tic, smooth, args): print "calling peaks" if smooth: print "Smoothing IM first..." im.crop_mass(args.lowmass, args.highmass) print "cropped masses..." # get the size of the intensity matrix n_scan, n_mz = im.get_size() print "# masses in intensity matrix: ", n_mz # smooth data for ii in range(n_mz): ic = im.get_ic_at_index(ii) #print "got ic for mass ", ii # ic1 = savitzky_golay(ic) ic_smooth = savitzky_golay(ic, window=args.window, degree=2) #print "savitky golay ran " ic_base = tophat(ic_smooth, struct="1.5m") #print "tophat ran " im.set_ic_at_index(ii, ic_base) #print "smoothed mass ", ii print "smoothed IM..." # noise level calc tic1 = savitzky_golay(tic) tic2 = tophat(tic1, struct="1.5m") noise_level = window_analyzer(tic2) print "Noise level in TIC: ", noise_level # get the list of Peak objects using BB peak detection / deconv pl = BillerBiemann(im, args.window, args.scans) print "Initial number of Peaks found:", len(pl) # filter down the peaks. # - First: remove any masses from each peak that have intensity less than r percent of the max intensity in that peak # - Second: remove any peak where there are less than n ions with intensity above the cutoff pl2 = rel_threshold(pl, percent=args.minintensity) pl3 = num_ions_threshold(pl2, n=args.minions, cutoff=noise_level * args.noisemult) print "Peaks remaining after filtering:", len(pl3) for peak in pl3: # peak.null_mass(73) peak.null_mass(207) # column bleed peak.null_mass(84) # solvent tailing area = peak_sum_area(im, peak) # get the TIC area for this peak peak.set_area(area) area_dict = peak_top_ion_areas(im, peak, args.topions) # get top n ion areas for this peak peak.set_ion_areas(area_dict) return pl3
def _filtered_peak_list(im_i, _peak_list): peak_list = copy.deepcopy(_peak_list) # do peak detection on pre-trimmed data # trim by relative intensity apl = rel_threshold(peak_list, 2, copy_peaks=False) # trim by threshold new_peak_list = num_ions_threshold(apl, 3, 3000, copy_peaks=False) # ignore TMS ions and set mass range for peak in new_peak_list: peak.crop_mass(50, 400) peak.null_mass(73) peak.null_mass(147) # find area area = peak_sum_area(im_i, peak) peak.area = area area_dict = peak_top_ion_areas(im_i, peak) peak.ion_areas = area_dict return new_peak_list
# trim by threshold peak_list = num_ions_threshold(apl, n, t) print("\t -> Number of Peaks found:", len(peak_list)) print("\t -> Executing peak post-procesing and quantification...") # ignore TMS ions and use same mass range for all experiments for peak in peak_list: peak.crop_mass(50,540) peak.null_mass(73) peak.null_mass(147) # find peak areas area = peak_sum_area(im, peak) peak.area = area area_dict = peak_top_ion_areas(im, peak) peak.set_ion_areas(area_dict) # create an experiment expr = Experiment(expr_code, peak_list) # use same retention time range for all experiments lo_rt_limit = "6.5m" hi_rt_limit = "21m" print(f"\t -> Selecting retention time range between '{lo_rt_limit}' and '{hi_rt_limit}'") expr.sele_rt_range([lo_rt_limit, hi_rt_limit]) # store processed data as experiment object output_file = "output/" + expr_code + ".expr"
def test_max_bound_errors(self, im_i, peak, obj): with pytest.raises(TypeError): peak_top_ion_areas(im_i, peak, max_bound=obj)
def test_n_top_ions_errors(self, im_i, peak, obj): with pytest.raises(TypeError): peak_top_ion_areas(im_i, peak, n_top_ions=obj)
def test_peak_errors(self, im_i, obj): with pytest.raises(TypeError): peak_top_ion_areas(im_i, obj)
def test_im_errors(self, peak, obj): with pytest.raises(TypeError): peak_top_ion_areas(obj, peak)
def test_align_2_alignments(A1, pyms_datadir, tmp_pathplus): expr_list = [] for jcamp_file in geco_codes: im = build_intensity_matrix_i( JCAMP_reader(pyms_datadir / f"{jcamp_file}.JDX")) # Intensity matrix size (scans, masses) n_scan, n_mz = im.size # noise filter and baseline correct for ii in range(n_mz): ic = im.get_ic_at_index(ii) ic_smooth = savitzky_golay(ic) ic_bc = tophat(ic_smooth, struct="1.5m") im.set_ic_at_index(ii, ic_bc) peak_list = BillerBiemann(im, points=9, scans=2) apl = rel_threshold(peak_list, 2) new_peak_list = num_ions_threshold(apl, 3, 3000) # ignore TMS ions and set mass range for peak in new_peak_list: peak.crop_mass(50, 400) peak.null_mass(73) peak.null_mass(147) # find area area = peak_sum_area(im, peak) peak.area = area area_dict = peak_top_ion_areas(im, peak) peak.ion_areas = area_dict expr = Experiment(jcamp_file, new_peak_list) # set time range for all experiments expr.sele_rt_range(["6.5m", "21m"]) expr_list.append(expr) F2 = exprl2alignment(expr_list) T2 = PairwiseAlignment(F2, Dw, Gw) A2 = align_with_tree(T2, min_peaks=2) # top_ion_list = A2.common_ion() # A2.write_common_ion_csv(tmp_pathplus/'area1.csv', top_ion_list) # between replicates alignment parameters Db = 10.0 # rt modulation Gb = 0.30 # gap penalty print("Aligning input {1,2}") T9 = PairwiseAlignment([A1, A2], Db, Gb) A9 = align_with_tree(T9) A9.write_csv(tmp_pathplus / "rt.csv", tmp_pathplus / "area.csv") aligned_peaks = list(filter(None, A9.aligned_peaks())) store_peaks(aligned_peaks, tmp_pathplus / "peaks.bin") top_ion_list = A9.common_ion() A9.write_common_ion_csv(tmp_pathplus / "area.csv", top_ion_list)
# trim by threshold peak_list = num_ions_threshold(apl, n, t) print "\t -> Number of Peaks found:", len(peak_list) print "\t -> Executing peak post-procesing and quantification..." # ignore TMS ions and use same mass range for all experiments for peak in peak_list: peak.crop_mass(50,540) peak.null_mass(73) peak.null_mass(147) # find peak areas area = peak_sum_area(im, peak) peak.set_area(area) area_dict = peak_top_ion_areas(im, peak) peak.set_ion_areas(area_dict) # create an experiment expr = Experiment(expr_code, peak_list) # use same retention time range for all experiments lo_rt_limit = "6.5m" hi_rt_limit = "21m" print "\t -> Selecting retention time range between '%s' and '%s'" % \ (lo_rt_limit, hi_rt_limit) expr.sele_rt_range([lo_rt_limit, hi_rt_limit]) # store processed data as experiment object