def test_num_ions_threshold(self, peak_list, tic): """ Filter the peak list, first by removing all intensities in a peak less than a given relative threshold, then by removing all peaks that have less than a given number of ions above a given value """ # trim by relative intensity pl = rel_threshold(peak_list, 2) # trim by threshold new_peak_list = num_ions_threshold(pl, 3, 10000) assert isinstance(new_peak_list, list) assert isinstance(new_peak_list[0], Peak) assert len(new_peak_list) == 215 assert len(new_peak_list) <= len(peak_list) assert len(new_peak_list) <= len(pl) # With window_analyzer # estimate noise level from the TIC, used later to # discern true signal peaks noise_level = window_analyzer(tic) # trim by relative intensity apl = rel_threshold(peak_list, 1) # trim by number of ions above threshold peak_list = num_ions_threshold(apl, 3, noise_level) assert isinstance(peak_list, list) assert isinstance(peak_list[0], Peak) assert len(peak_list) in (87, 88) assert len(peak_list) <= len(peak_list)
def expr_list(pyms_datadir): with tempfile.TemporaryDirectory() as tmpdir: outputdir = pathlib.Path(tmpdir) # Create experiment files for jcamp_file in eley_codes: im = build_intensity_matrix_i( JCAMP_reader(pyms_datadir / f"{jcamp_file}.JDX")) # Intensity matrix size (scans, masses) n_scan, n_mz = im.size # noise filter and baseline correct for ii in range(n_mz): ic = im.get_ic_at_index(ii) ic_smooth = savitzky_golay(ic) ic_bc = tophat(ic_smooth, struct="1.5m") im.set_ic_at_index(ii, ic_bc) peak_list = BillerBiemann(im, points=9, scans=2) print('#') apl = rel_threshold(peak_list, 2) new_peak_list = num_ions_threshold(apl, 3, 3000) print('#') # ignore TMS ions and set mass range for peak in new_peak_list: peak.crop_mass(50, 400) peak.null_mass(73) peak.null_mass(147) # find area area = peak_sum_area(im, peak) peak.area = area area_dict = peak_top_ion_areas(im, peak) peak.ion_areas = area_dict expr = Experiment(jcamp_file, new_peak_list) # set time range for all experiments expr.sele_rt_range(["6.5m", "21m"]) print('#') expr.dump(outputdir / f"{jcamp_file}.expr") print('#') # Load experiments expr_list = [] for expr_code in eley_codes: expr = load_expr(outputdir / f"{expr_code}.expr") assert isinstance(expr, Experiment) expr_list.append(expr) yield expr_list
def _filtered_peak_list(im_i, _peak_list): peak_list = copy.deepcopy(_peak_list) # do peak detection on pre-trimmed data # trim by relative intensity apl = rel_threshold(peak_list, 2, copy_peaks=False) # trim by threshold new_peak_list = num_ions_threshold(apl, 3, 3000, copy_peaks=False) # ignore TMS ions and set mass range for peak in new_peak_list: peak.crop_mass(50, 400) peak.null_mass(73) peak.null_mass(147) # find area area = peak_sum_area(im_i, peak) peak.area = area area_dict = peak_top_ion_areas(im_i, peak) peak.ion_areas = area_dict return new_peak_list
def test_percent_errors(self, obj, peak_list): with pytest.raises(TypeError): rel_threshold(peak_list, percent=obj)
def test_peak_list_errors(self, obj): with pytest.raises(TypeError): rel_threshold(obj)
# Use Biller and Biemann technique to find apexing ions at a scan. # Find apex oven 9 points and combine with neighbouring peak if two scans apex # next to each other. peak_list = BillerBiemann(im, points=9, scans=2) print("Number of peaks found: ", len(peak_list)) # Filter the peak list, # first by removing all intensities in a peak less than a given relative # threshold, # then by removing all peaks that have less than a given number of ions above # a given value # Parameters # percentage ratio of ion intensity to max ion intensity r = 2 # minimum number of ions, n n = 3 # greater than or equal to threshold, t t = 10000 # trim by relative intensity pl = rel_threshold(peak_list, r) # trim by threshold new_peak_list = num_ions_threshold(pl, n, t) print("Number of filtered peaks: ", len(new_peak_list))
# smooth data for ii in range(n_mz): ic = im.get_ic_at_index(ii) ic1 = savitzky_golay(ic) ic_smooth = savitzky_golay(ic1) ic_base = tophat(ic_smooth, struct="1.5m") im.set_ic_at_index(ii, ic_base) # do peak detection on pre-trimmed data # get the list of Peak objects pl = BillerBiemann(im, points, scans) # trim by relative intensity apl = rel_threshold(pl, r) # trim by threshold peak_list = num_ions_threshold(apl, n, t) print("\t -> Number of Peaks found:", len(peak_list)) print("\t -> Executing peak post-procesing and quantification...") # ignore TMS ions and use same mass range for all experiments for peak in peak_list: peak.crop_mass(50,540) peak.null_mass(73) peak.null_mass(147) # find peak areas area = peak_sum_area(im, peak)
def test_align_2_alignments(A1, pyms_datadir, tmp_pathplus): expr_list = [] for jcamp_file in geco_codes: im = build_intensity_matrix_i( JCAMP_reader(pyms_datadir / f"{jcamp_file}.JDX")) # Intensity matrix size (scans, masses) n_scan, n_mz = im.size # noise filter and baseline correct for ii in range(n_mz): ic = im.get_ic_at_index(ii) ic_smooth = savitzky_golay(ic) ic_bc = tophat(ic_smooth, struct="1.5m") im.set_ic_at_index(ii, ic_bc) peak_list = BillerBiemann(im, points=9, scans=2) apl = rel_threshold(peak_list, 2) new_peak_list = num_ions_threshold(apl, 3, 3000) # ignore TMS ions and set mass range for peak in new_peak_list: peak.crop_mass(50, 400) peak.null_mass(73) peak.null_mass(147) # find area area = peak_sum_area(im, peak) peak.area = area area_dict = peak_top_ion_areas(im, peak) peak.ion_areas = area_dict expr = Experiment(jcamp_file, new_peak_list) # set time range for all experiments expr.sele_rt_range(["6.5m", "21m"]) expr_list.append(expr) F2 = exprl2alignment(expr_list) T2 = PairwiseAlignment(F2, Dw, Gw) A2 = align_with_tree(T2, min_peaks=2) # top_ion_list = A2.common_ion() # A2.write_common_ion_csv(tmp_pathplus/'area1.csv', top_ion_list) # between replicates alignment parameters Db = 10.0 # rt modulation Gb = 0.30 # gap penalty print("Aligning input {1,2}") T9 = PairwiseAlignment([A1, A2], Db, Gb) A9 = align_with_tree(T9) A9.write_csv(tmp_pathplus / "rt.csv", tmp_pathplus / "area.csv") aligned_peaks = list(filter(None, A9.aligned_peaks())) store_peaks(aligned_peaks, tmp_pathplus / "peaks.bin") top_ion_list = A9.common_ion() A9.write_common_ion_csv(tmp_pathplus / "area.csv", top_ion_list)
# Now the Biller and Biemann based technique can be applied to detect peaks. # In[4]: from pyms.BillerBiemann import BillerBiemann pl = BillerBiemann(im, points=9, scans=2) len(pl) # Trim the peak list by relative intensity # In[5]: from pyms.BillerBiemann import num_ions_threshold, rel_threshold apl = rel_threshold(pl, percent=2) len(apl) # Trim the peak list by noise threshold # In[6]: peak_list = num_ions_threshold(apl, n=3, cutoff=3000) len(peak_list) # Set the mass range, remove unwanted ions and estimate the peak area # In[7]: from pyms.Peak.Function import peak_sum_area
# ## Example: Peak List Filtering # # There are two functions to filter the list of Peak objects. # # The first, |rel_threshold()| modifies the mass spectrum stored in each peak so # any intensity that is less than a given percentage of the maximum intensity for the peak is removed. # # The second, |num_ions_threshold()|, removes any peak that has less than a given # number of ions above a given threshold. # # Once the peak list has been constructed, the filters can be applied as follows: # In[8]: from pyms.BillerBiemann import num_ions_threshold, rel_threshold pl = rel_threshold(peak_list, percent=2) print(pl[:10]) # In[9]: new_peak_list = num_ions_threshold(pl, n=3, cutoff=10000) print(new_peak_list[:10]) # In[10]: len(new_peak_list) # The number of detected peaks is now more realistic of what would be expected in # the test sample.
# first by removing all intensities in a peak less than a given relative # threshold, # then by removing all peaks that have less than a given number of ions above # a given value # Parameters # percentage ratio of ion intensity to max ion intensity r = 1 # minimum number of ions, n n = 3 # greater than or equal to threshold, t t = 10000 # trim by relative intensity pl = rel_threshold(peak_list, r) # trim by threshold real_peak_list = num_ions_threshold(pl, n, t) print("Number of filtered peaks in real data: ", len(real_peak_list)) # Set the peak areas for peak in real_peak_list: area = peak_sum_area(real_im, peak) peak.area = area # real_peak_list is PyMassSpec' best guess at the true peak list ################## Run Simulator ###################### # Simulator takes a peak list, time_list and mass_list # and returns an IntensityMatrix object.