def test_get_third_highest_mz(peak): assert peak.get_third_highest_mz() == 59 assert isinstance(peak.get_third_highest_mz(), int) assert Peak(test_float, test_float).get_third_highest_mz() is None # with pytest.raises(AttributeError): assert Peak(test_float).get_third_highest_mz() is None
def test_another_peak(im_i, peak): # A different peak scan_i = im_i.get_index_at_time(31.44 * 60.0) ms = im_i.get_ms_at_index(scan_i) peak2 = Peak(31.44, ms, minutes=True) assert peak2.rt == 1886.4 assert peak2.UID == "207-68-42-1886.40" assert peak.UID != peak2.UID
def test_UID(peak): # Get the peak's unique ID # Consists of the two most abundant ions and their ratio, # and the retention time (in the format set by minutes=True or False) assert isinstance(peak.UID, str) assert peak.UID == "131-73-42-12.34" assert isinstance(Peak(test_float).UID, str)
def test_ic_mass(): peak = Peak(12.34, 55) uid = peak.UID assert isinstance(peak.ic_mass, Number) assert peak.ic_mass == 55 peak.ic_mass = 12 assert peak.mass_spectrum is None assert peak.UID != uid assert peak.ic_mass == 12 peak.ic_mass = 1234 assert peak.ic_mass == 1234 # Errors for obj in [*test_sequences, test_string, test_dict]: with pytest.raises(TypeError): peak.ic_mass = obj
def test_Peak(im_i, peak): assert isinstance(peak, Peak) # Get the scan of a known TIC peak (at RT 31.17 minutes) # get the index of the scan nearest to 31.17 minutes (converted to seconds) scan_i = im_i.get_index_at_time(31.17 * 60.0) # get the MassSpectrum Object ms = im_i.get_ms_at_index(scan_i) # create a Peak object Peak(31.17) Peak(31.17, ms, outlier=True) Peak(31.17, ms, minutes=True) # Errors for obj in [test_string, *test_lists, test_dict]: with pytest.raises(TypeError): Peak(obj, ms, minutes=True) # type: ignore with pytest.raises(TypeError): Peak(test_float, obj, minutes=False) # type: ignore Peak(test_float, test_int) ICPeak(test_float, test_int) Peak(test_float, test_float) ICPeak(test_float, test_float)
def test_area(im_i, peak): peak = copy.deepcopy(peak) # determine and set area area = peak_sum_area(im_i, peak) assert isinstance(area, float) peak.area = area assert peak.area == area assert isinstance(peak.area, float) scan_i = im_i.get_index_at_time(31.17 * 60.0) ms = im_i.get_ms_at_index(scan_i) for obj in [test_string, test_dict, test_list_strs, test_list_ints]: with pytest.raises(TypeError): Peak(test_float, ms).area = obj with pytest.raises(ValueError): Peak(test_float, ms).area = -1
def peak_top_ion_areas( im: IntensityMatrix, peak: Peak, n_top_ions: int = 5, max_bound: int = 0, ) -> Dict[float, float]: """ Calculate and return the ion areas of the five most abundant ions in the peak. :param im: The originating IntensityMatrix object. :param peak: :param n_top_ions: Number of top ions to return areas for. :param max_bound: Optional value to limit size of detected bound. :return: Dictionary of ``ion : ion_area pairs``. :authors: Sean O'Callaghan, Dominic Davis-Foster (type assertions) """ if not isinstance(im, IntensityMatrix): raise TypeError("'im' must be an IntensityMatrix object") if not isinstance(peak, Peak): raise TypeError("'peak' must be a Peak object") if not isinstance(n_top_ions, int): raise TypeError("'n_top_ions' must be an integer") if not isinstance(max_bound, int): raise TypeError("'max_bound' must be an integer") # ms = peak.mass_spectrum rt = peak.rt apex = im.get_index_at_time(rt) ion_areas = {} # Dictionary to store ion:ion_area pairs top_ions = peak.top_ions(n_top_ions) # print(top_ions) for ion in top_ions: ion_chrom = im.get_ic_at_mass(ion) # need ia as a list not numpy array so use .tolist() ia = ion_chrom.intensity_array.tolist() area, left, right, l_share, r_share = ion_area(ia, apex, max_bound) # need actual mass for single ion areas ion_areas[ion] = area return ion_areas
def test_null_mass(peak): peak = copy.deepcopy(peak) uid = peak.UID peak.null_mass(73) peak.null_mass(147.0) index_73 = peak.mass_spectrum.mass_list.index(73) assert peak.mass_spectrum.mass_spec[index_73] == 0 index_147 = peak.mass_spectrum.mass_list.index(147) assert peak.mass_spectrum.mass_spec[index_147] == 0 assert peak.UID != uid # Errors with pytest.raises(ValueError, match="Mass spectrum is unset."): Peak(test_float).null_mass(1) for obj in [test_string, *test_lists, test_dict]: with pytest.raises(TypeError): Peak(test_float, peak.mass_spectrum).null_mass(obj) # type: ignore with pytest.raises(IndexError): Peak(test_float, peak.mass_spectrum).null_mass(1) with pytest.raises(IndexError): Peak(test_float, peak.mass_spectrum).null_mass(10000)
def test_mass_spectrum(peak, im_i): scan_i = im_i.get_index_at_time(31.17 * 60.0) ms = im_i.get_ms_at_index(scan_i) assert isinstance(peak.mass_spectrum, MassSpectrum) assert peak.mass_spectrum == ms assert peak.ic_mass is None peak = Peak(test_float) assert peak.mass_spectrum is None peak.mass_spectrum = ms assert peak.mass_spectrum == ms assert peak.ic_mass is None peak = Peak(test_float) assert peak.mass_spectrum is None peak.mass_spectrum = ms assert isinstance(peak.mass_spectrum, MassSpectrum) assert isinstance(peak.mass_spectrum.mass_spec, list) for obj in [test_string, *test_numbers, test_dict, *test_lists]: with pytest.raises(TypeError): peak.mass_spectrum = obj
def composite_peak(peak_list: List, ignore_outliers: bool = False) -> Peak: """ Create a peak that consists of a composite spectrum from all spectra in the list of peaks. :param peak_list: A list of peak objects :type peak_list: list :param ignore_outliers: :type ignore_outliers: bool, optional :return: The composite peak :type: pyms.Peak.Class.Peak :author: Andrew Isaac :author: Dominic Davis-Foster (type assertions) """ if not is_peak_list(peak_list): raise TypeError("'peak_list' must be a list of Peak objects") first = True count = 0 avg_rt = 0 # new_ms = None # DK: first mark peaks in the list that are outliers by RT, but only if there are more than 3 peaks in the list if ignore_outliers: rts = [] if len(peak_list) > 3: for peak in peak_list: rts.append(peak.rt) is_outlier = median_outliers(rts) for i, val in enumerate(is_outlier): if val: peak_list[i].isoutlier = True # DK: the average RT and average mass spec for the compound peak is now calculated from peaks that are NOT outliers. # This should improve the ability to order peaks and figure out badly aligned entries for peak in peak_list: if peak is not None and ((ignore_outliers and not peak.is_outlier) or not ignore_outliers): ms = peak.mass_spectrum spec = numpy.array(ms.mass_spec, dtype='d') if first: avg_spec = numpy.zeros(len(ms.mass_spec), dtype='d') mass_list = ms.mass_list first = False # scale all intensities to [0,100] max_spec = max(spec) / 100.0 if max_spec > 0: spec = spec / max_spec else: spec = spec * 0 avg_rt += peak.rt avg_spec += spec count += 1 if count > 0: avg_rt = avg_rt / count avg_spec = avg_spec / count new_ms = MassSpectrum(mass_list, avg_spec) return Peak(avg_rt, new_ms) else: return None
def fill_peaks(data, peak_list: List, D: float, minutes: bool = False) -> Peak: """ Gets the best matching Retention Time and spectra from 'data' for each peak in the peak list. :param data: A data IntensityMatrix that has the same mass range as the peaks in the peak list :type data: pyms.IntensityMatrix.IntensityMatrix :param peak_list: A list of peak objects :type peak_list: list :param D: Peak width standard deviation in seconds. Determines search window width. :type D: float :param minutes: Return retention time as minutes :type minutes: bool, optional :return: List of Peak Objects :type: list of :class:`pyms.Peak.Class.Peak` :author: Andrew Isaac :author: Dominic Davis-Foster (type assertions) """ if not is_peak_list(peak_list): raise TypeError("'peak_list' must be a list of Peak objects") if not isinstance(D, float): raise TypeError("'D' must be a float") # Test for best match in range where RT weight is greater than _TOL _TOL = 0.001 cutoff = D * math.sqrt(-2.0 * math.log(_TOL)) # Penalise for neighboring peaks # reweight so RT weight at nearest peak is _PEN _PEN = 0.5 datamat = data.intensity_array mass_list = data.mass_list datatimes = data.time_list minrt = min(datatimes) maxrt = max(datatimes) rtl = 0 rtr = 0 new_peak_list = [] for ii in range(len(peak_list)): spec = peak_list[ii].mass_spectrum.mass_spec spec = numpy.array(spec, dtype='d') rt = peak_list[ii].rt sum_spec_squared = numpy.sum(spec**2, axis=0) # get neighbour RT's if ii > 0: rtl = peak_list[ii - 1].rt if ii < len(peak_list) - 1: rtr = peak_list[ii + 1].rt # adjust weighting for neighbours rtclose = min(abs(rt - rtl), abs(rt - rtr)) Dclose = rtclose / math.sqrt(-2.0 * math.log(_PEN)) if Dclose > 0: Dclose = min(D, Dclose) else: Dclose = D # Get bounds rtlow = rt - cutoff if rtlow < minrt: rtlow = minrt lowii = data.get_index_at_time(rtlow) rtup = rt + cutoff if rtup > maxrt: rtup = maxrt upii = data.get_index_at_time(rtup) # Get sub matrix of scans in bounds submat = datamat[lowii:upii + 1] submat = numpy.array(submat, dtype='d') subrts = datatimes[lowii:upii + 1] subrts = numpy.array(subrts, dtype='d') sum_summat_squared = numpy.sum(submat**2, axis=1) # transpose spec (as matrix) for dot product spec = numpy.transpose([spec]) # dot product on rows toparr = numpy.dot(submat, spec) botarr = numpy.sqrt(sum_spec_squared * sum_summat_squared) # convert back to 1-D array toparr = toparr.ravel() # scaled dot product of each scan cosarr = toparr / botarr # RT weight of each scan rtimearr = numpy.exp(-((subrts - rt) / float(Dclose))**2 / 2.0) # weighted scores scorearr = cosarr * rtimearr # index of best score best_ii = scorearr.argmax() # Add new peak bestrt = subrts[best_ii] bestspec = submat[best_ii].tolist() ms = MassSpectrum(mass_list, bestspec) new_peak_list.append(Peak(bestrt, ms, minutes)) return new_peak_list
def test_bounds(peak): peak = copy.copy(peak) # Setter peak.bounds = (11, 12, 13) for obj in [ test_string, *test_numbers, test_dict, ['a', 'b', 'c'], test_tuple ]: with pytest.raises(TypeError): peak.bounds = obj for obj in [*test_lists, (1, 2), [1, 2, 3, 4]]: with pytest.raises(ValueError, match="'Peak.bounds' must have exactly 3 elements"): peak.bounds = obj # Getter assert peak.bounds == (11, 12, 13) assert isinstance(peak.bounds, tuple) peak2 = Peak(test_float) peak2.bounds = [11, 12, 13] # type: ignore assert peak2.bounds == (11, 12, 13) assert isinstance(peak2.bounds, tuple) # set_bounds peak3 = Peak(test_float) peak3.set_bounds(11, 12, 13) assert peak3.bounds == (11, 12, 13) assert isinstance(peak3.bounds, tuple) for obj in [*test_sequences, test_string, test_dict, test_float]: print(obj) with pytest.raises(TypeError): peak3.set_bounds(obj, 12, 13) # type: ignore with pytest.raises(TypeError): peak3.set_bounds(11, obj, 13) # type: ignore with pytest.raises(TypeError): peak3.set_bounds(11, 12, obj) # type: ignore
def test_equality(peak): assert peak == Peak(peak.rt, peak.mass_spectrum) assert peak != Peak(50, peak.mass_spectrum)
def test_outlier(peak): assert isinstance(peak.is_outlier, bool) assert peak.is_outlier is False assert Peak(12.34, outlier=True).is_outlier is True
def test_get_third_highest_mz(peak): assert peak.get_third_highest_mz() == 59 assert isinstance(peak.get_third_highest_mz(), int) with pytest.raises(ValueError, match="Mass spectrum is unset."): Peak(test_float).get_third_highest_mz()
def test_bounds(peak): peak = copy.copy(peak) # Setter peak.bounds = (11, 12, 13) for obj in [ test_string, *test_numbers, test_dict, ["a", "b", "c"], test_tuple ]: with pytest.raises(TypeError): peak.bounds = obj for obj in [*test_lists, (1, 2), [1, 2, 3, 4]]: with pytest.raises(ValueError): peak.bounds = obj # Getter assert peak.bounds == (11, 12, 13) assert isinstance(peak.bounds, tuple) peak2 = Peak(test_float) peak2.bounds = [11, 12, 13] assert peak2.bounds == [11, 12, 13] assert isinstance(peak2.bounds, list) # set_bounds peak3 = Peak(test_float) peak3.set_bounds(11, 12, 13) assert peak3.bounds == (11, 12, 13) assert isinstance(peak3.bounds, tuple) for obj in [*test_sequences, test_string, test_dict, test_float]: with pytest.raises(TypeError): print(obj) peak3.set_bounds(obj, 12, 13) with pytest.raises(TypeError): peak3.set_bounds(11, obj, 13) with pytest.raises(TypeError): peak3.set_bounds(11, 12, obj)