def test_main(self, peak, im_i): ion_area_val = ion_area(list(range(100)), 20) assert isinstance(ion_area_val, tuple) assert len(ion_area_val) == 5 assert ion_area_val[0] == 231 assert ion_area_val[1] == 19 assert ion_area_val[2] == 1 assert ion_area_val[3] is False assert ion_area_val[4] is True
def missing_peak_finder( sample: Sample, file_name: str, points: int = 3, null_ions: Optional[List] = None, crop_ions: Optional[List] = None, threshold: int = 1000, rt_window: float = 1, filetype: MissingPeakFiletype = MZML, ): r""" Integrates raw data around missing peak locations to fill ``NA``\s in the data matrix. :param sample: The sample object containing missing peaks :param file_name: Name of the raw data file :param points: Peak finding - Peak if maxima over 'points' number of scans. :param null_ions: Ions to be deleted in the matrix. :default null_ions: ``[73, 147]`` :param crop_ions: Range of Ions to be considered. :default crop_ions: ``[50, 540]`` :param threshold: Minimum intensity of IonChromatogram allowable to fill. :param rt_window: Window in seconds around average RT to look for. :param filetype: :author: Sean O'Callaghan """ if not null_ions: null_ions = [73, 147] if not crop_ions: crop_ions = [50, 540] # TODO: some error checks on null and crop ions # TODO: a for root,files,dirs in os.path.walk(): loop print("Sample:", sample.name, "File:", file_name) if filetype == NETCDF: # this package from pyms.GCMS.IO.ANDI import ANDI_reader data = ANDI_reader(file_name) elif filetype == MZML: # this package from pyms.GCMS.IO.MZML import mzML_reader data = mzML_reader(file_name) else: print("file type not valid") # build integer intensity matrix im = build_intensity_matrix_i(data) for null_ion in null_ions: im.null_mass(null_ion) im.crop_mass(crop_ions[0], crop_ions[1]) # get the size of the intensity matrix n_scan, n_mz = im.size # smooth data for ii in range(n_mz): ic = im.get_ic_at_index(ii) ic1 = savitzky_golay(ic, points) ic_smooth = savitzky_golay(ic1, points) ic_base = tophat(ic_smooth, struct="1.5m") im.set_ic_at_index(ii, ic_base) for mp in sample.missing_peaks: mp_rt = mp.rt common_ion = mp.common_ion qual_ion_1 = float(mp.qual_ion1) qual_ion_2 = float(mp.qual_ion2) ci_ion_chrom = im.get_ic_at_mass(common_ion) print("ci = ", common_ion) qi1_ion_chrom = im.get_ic_at_mass(qual_ion_1) print("qi1 = ", qual_ion_1) qi2_ion_chrom = im.get_ic_at_mass(qual_ion_2) print("qi2 = ", qual_ion_2) ###### # Integrate the CI around that particular RT ####### # Convert time to points # How long between scans? points_1 = ci_ion_chrom.get_index_at_time(float(mp_rt)) points_2 = ci_ion_chrom.get_index_at_time(float(mp_rt) - rt_window) print("rt_window = ", points_1 - points_2) rt_window_points = points_1 - points_2 maxima_list = get_maxima_list_reduced(ci_ion_chrom, mp_rt, rt_window_points) large_peaks = [] for rt, intens in maxima_list: if intens > threshold: q1_index = qi1_ion_chrom.get_index_at_time(rt) q2_index = qi2_ion_chrom.get_index_at_time(rt) q1_intensity = qi1_ion_chrom.get_intensity_at_index(q1_index) q2_intensity = qi2_ion_chrom.get_intensity_at_index(q2_index) if q1_intensity > threshold / 2 and q2_intensity > threshold / 2: large_peaks.append([rt, intens]) print(f"found {len(large_peaks):d} peaks above threshold") areas = [] for peak in large_peaks: apex = ci_ion_chrom.get_index_at_time(peak[0]) ia = ci_ion_chrom.intensity_array.tolist() area, left, right, l_share, r_share = ion_area(ia, apex, 0) areas.append(area) ######################## areas.sort() if len(areas) > 0: biggest_area = areas[-1] mp.common_ion_area = biggest_area # mp.exact_rt = f"{float(mp_rt) / 60.0:.3f}" mp.exact_rt = float(mp_rt) / 60.0 print("found area:", biggest_area, "at rt:", mp_rt) else: print("Missing peak at rt = ", mp_rt) mp.common_ion_area = None
def missing_peak_finder(sample, andi_file, points=7, null_ions=[73, 207],\ crop_ions=[45,300], threshold=100000, rt_window=10): """ @summary: Integrates raw data around missing peak locations to fill in NAs in the data matrix @param sample: The sample object containing missing peaks @type sample: pyms.MissingPeak.Class.Sample @param andi_file: Name of the raw data file @type andi_file: stringType @param points: Peak finding - Peak if maxima over 'points' \ number of scans (Default 3) @type points: intType @param null_ions: Ions to be deleted in the matrix @type null_ions: listType @param crop_ions: Range of Ions to be considered @type crop_ions: listType @param threshold: Minimum intensity of IonChromatogram allowable to fill\ missing peak @type threshold: intType @param rt_window: Window in seconds around average RT to look for \ missing peak @type rt_window: floatType @author: Sean O'Callaghan """ ### some error checks on null and crop ions ### a for root,files,dirs in os.path.walk(): loop print "Sample:", sample.get_name(), "andi_file:", andi_file data = ANDI_reader(andi_file) # build integer intensity matrix im = build_intensity_matrix_i(data) for null_ion in null_ions: im.null_mass(null_ion) im.crop_mass(crop_ions[0], crop_ions[1]) # get the size of the intensity matrix n_scan, n_mz = im.get_size() # smooth data for ii in range(n_mz): ic = im.get_ic_at_index(ii) ic1 = savitzky_golay(ic, points) ic_smooth = savitzky_golay(ic1, points) ic_base = tophat(ic_smooth, struct="1.5m") im.set_ic_at_index(ii, ic_base) for mp in sample.get_missing_peaks(): #JT: Debug peak attributes #attrs = vars(mp) #print ', '.join("%s: %s" % item for item in attrs.items()) mp_rt = mp.get_rt() #print(repr(mp_rt)) common_ion = mp.get_ci() qual_ion_1 = float(mp.get_qual_ion1()) qual_ion_2 = float(mp.get_qual_ion2()) ci_ion_chrom = im.get_ic_at_mass(common_ion) #print "ci = ",common_ion qi1_ion_chrom = im.get_ic_at_mass(qual_ion_1) #print "qi1 = ", qual_ion_1 qi2_ion_chrom = im.get_ic_at_mass(qual_ion_2) #print "qi2 = ", qual_ion_2 ###### # Integrate the CI around that particular RT ####### #Convert time to points # How long between scans? points_1 = ci_ion_chrom.get_index_at_time(float(mp_rt)) points_2 = ci_ion_chrom.get_index_at_time(float(mp_rt) - rt_window) #print "rt_window = ", points_1 - points_2 rt_window_points = points_1 - points_2 maxima_list = get_maxima_list_reduced(ci_ion_chrom, mp_rt, \ rt_window_points) large_peaks = [] for rt, intens in maxima_list: if intens > threshold: q1_index = qi1_ion_chrom.get_index_at_time(rt) q2_index = qi2_ion_chrom.get_index_at_time(rt) q1_intensity = qi1_ion_chrom.get_intensity_at_index(q1_index) q2_intensity = qi2_ion_chrom.get_intensity_at_index(q2_index) if q1_intensity > threshold / 2 and q2_intensity > threshold / 2: large_peaks.append([rt, intens]) #print('found %d peaks above threshold'%len(large_peaks)) areas = [] for peak in large_peaks: apex = ci_ion_chrom.get_index_at_time(peak[0]) ia = ci_ion_chrom.get_intensity_array().tolist() area, left, fight, l_share, r_share = ion_area(ia, apex, 0) areas.append(area) ######################## areas.sort() if len(areas) > 0: biggest_area = areas[-1] mp.set_ci_area(biggest_area) #print "found area:", biggest_area, "at rt:", mp_rt else: #print "Missing peak at rt = ", mp_rt mp.set_ci_area('NA')
def test_tol_errors(self, obj): with pytest.raises(TypeError): ion_area(list(range(100)), 20, tol=obj)
def test_max_bound_errors(self, obj): with pytest.raises(TypeError): ion_area(list(range(100)), 20, max_bound=obj)
def test_apex_errors(self, obj): with pytest.raises(TypeError): ion_area(list(range(100)), obj)
def test_ia_errors(self, obj): with pytest.raises(TypeError): ion_area(obj, 20)
def missing_peak_finder(sample, filename, points=13, null_ions=[73, 147],\ crop_ions=[50,540], threshold=1000, rt_window=1, filetype='cdf'): """ @summary: Integrates raw data around missing peak locations to fill in NAs in the data matrix @param sample: The sample object containing missing peaks @type sample: pyms.MissingPeak.Class.Sample @param andi_file: Name of the raw data file @type andi_file: stringType @param points: Peak finding - Peak if maxima over 'points' \ number of scans (Default 3) @type points: intType @param null_ions: Ions to be deleted in the matrix @type null_ions: listType @param crop_ions: Range of Ions to be considered @type crop_ions: listType @param threshold: Minimum intensity of IonChromatogram allowable to fill\ missing peak @type threshold: intType @param rt_window: Window in seconds around average RT to look for \ missing peak @type rt_window: floatType @author: Sean O'Callaghan """ ### some error checks on null and crop ions ### a for root,files,dirs in os.path.walk(): loop print "Sample:", sample.get_name(), "File:", filename if filetype == 'cdf': data = ANDI_reader(filename) elif filetype == 'mzml': data = mzML_reader(filename) else: print "file type not valid" # build integer intensity matrix im = build_intensity_matrix_i(data) for null_ion in null_ions: im.null_mass(null_ion) im.crop_mass(crop_ions[0], crop_ions[1]) # get the size of the intensity matrix n_scan, n_mz = im.get_size() # smooth data for ii in range(n_mz): ic = im.get_ic_at_index(ii) ic1 = savitzky_golay(ic, points) ic_smooth = savitzky_golay(ic1, points) ic_base = tophat(ic_smooth, struct="1.5m") im.set_ic_at_index(ii, ic_base) for mp in sample.get_missing_peaks(): mp_rt = mp.get_rt() common_ion = mp.get_ci() qual_ion_1 = float(mp.get_qual_ion1()) qual_ion_2 = float(mp.get_qual_ion2()) ci_ion_chrom = im.get_ic_at_mass(common_ion) print "ci = ",common_ion qi1_ion_chrom = im.get_ic_at_mass(qual_ion_1) print "qi1 = ", qual_ion_1 qi2_ion_chrom = im.get_ic_at_mass(qual_ion_2) print "qi2 = ", qual_ion_2 ###### # Integrate the CI around that particular RT ####### #Convert time to points # How long between scans? points_1 = ci_ion_chrom.get_index_at_time(float(mp_rt)) points_2 = ci_ion_chrom.get_index_at_time(float(mp_rt)-rt_window) print "rt_window = ", points_1 - points_2 rt_window_points = points_1 - points_2 maxima_list = get_maxima_list_reduced(ci_ion_chrom, mp_rt, \ rt_window_points) large_peaks = [] for rt, intens in maxima_list: if intens > threshold: q1_index = qi1_ion_chrom.get_index_at_time(rt) q2_index = qi2_ion_chrom.get_index_at_time(rt) q1_intensity = qi1_ion_chrom.get_intensity_at_index(q1_index) q2_intensity = qi2_ion_chrom.get_intensity_at_index(q2_index) if q1_intensity > threshold/2 and q2_intensity > threshold/2: large_peaks.append([rt, intens]) print('found %d peaks above threshold'%len(large_peaks)) areas = [] for peak in large_peaks: apex = ci_ion_chrom.get_index_at_time(peak[0]) ia = ci_ion_chrom.get_intensity_array().tolist() area, left, fight, l_share, r_share = ion_area(ia, apex, 0) areas.append(area) ######################## areas.sort() if len(areas)>0: biggest_area = areas[-1] mp.set_ci_area(biggest_area) print "found area:", biggest_area, "at rt:", mp_rt else: print "Missing peak at rt = ", mp_rt mp.set_ci_area('na')