Example #1
0
def test_get_third_highest_mz(peak):
    assert peak.get_third_highest_mz() == 59
    assert isinstance(peak.get_third_highest_mz(), int)

    assert Peak(test_float, test_float).get_third_highest_mz() is None

    # with pytest.raises(AttributeError):
    assert Peak(test_float).get_third_highest_mz() is None
Example #2
0
def test_another_peak(im_i, peak):
    # A different peak
    scan_i = im_i.get_index_at_time(31.44 * 60.0)
    ms = im_i.get_ms_at_index(scan_i)
    peak2 = Peak(31.44, ms, minutes=True)
    assert peak2.rt == 1886.4
    assert peak2.UID == "207-68-42-1886.40"
    assert peak.UID != peak2.UID
Example #3
0
def test_UID(peak):
    # Get the peak's unique ID
    # Consists of the two most abundant ions and their ratio,
    # and the retention time (in the format set by minutes=True or False)
    assert isinstance(peak.UID, str)
    assert peak.UID == "131-73-42-12.34"

    assert isinstance(Peak(test_float).UID, str)
Example #4
0
def test_ic_mass():
    peak = Peak(12.34, 55)
    uid = peak.UID
    assert isinstance(peak.ic_mass, Number)
    assert peak.ic_mass == 55
    peak.ic_mass = 12
    assert peak.mass_spectrum is None
    assert peak.UID != uid
    assert peak.ic_mass == 12

    peak.ic_mass = 1234
    assert peak.ic_mass == 1234

    # Errors
    for obj in [*test_sequences, test_string, test_dict]:
        with pytest.raises(TypeError):
            peak.ic_mass = obj
Example #5
0
def test_Peak(im_i, peak):
    assert isinstance(peak, Peak)

    # Get the scan of a known TIC peak (at RT 31.17 minutes)
    # get the index of the scan nearest to 31.17 minutes (converted to seconds)
    scan_i = im_i.get_index_at_time(31.17 * 60.0)
    # get the MassSpectrum Object
    ms = im_i.get_ms_at_index(scan_i)

    # create a Peak object
    Peak(31.17)
    Peak(31.17, ms, outlier=True)
    Peak(31.17, ms, minutes=True)

    # Errors
    for obj in [test_string, *test_lists, test_dict]:

        with pytest.raises(TypeError):
            Peak(obj, ms, minutes=True)  # type: ignore
        with pytest.raises(TypeError):
            Peak(test_float, obj, minutes=False)  # type: ignore

    Peak(test_float, test_int)
    ICPeak(test_float, test_int)
    Peak(test_float, test_float)
    ICPeak(test_float, test_float)
Example #6
0
def test_area(im_i, peak):
    peak = copy.deepcopy(peak)

    # determine and set area
    area = peak_sum_area(im_i, peak)
    assert isinstance(area, float)
    peak.area = area

    assert peak.area == area
    assert isinstance(peak.area, float)

    scan_i = im_i.get_index_at_time(31.17 * 60.0)
    ms = im_i.get_ms_at_index(scan_i)

    for obj in [test_string, test_dict, test_list_strs, test_list_ints]:
        with pytest.raises(TypeError):
            Peak(test_float, ms).area = obj
    with pytest.raises(ValueError):
        Peak(test_float, ms).area = -1
Example #7
0
def peak_top_ion_areas(
    im: IntensityMatrix,
    peak: Peak,
    n_top_ions: int = 5,
    max_bound: int = 0,
) -> Dict[float, float]:
    """
	Calculate and return the ion areas of the five most abundant ions in the peak.

	:param im: The originating IntensityMatrix object.
	:param peak:
	:param n_top_ions: Number of top ions to return areas for.
	:param max_bound: Optional value to limit size of detected bound.

	:return: Dictionary of ``ion : ion_area pairs``.

	:authors: Sean O'Callaghan,  Dominic Davis-Foster (type assertions)
	"""

    if not isinstance(im, IntensityMatrix):
        raise TypeError("'im' must be an IntensityMatrix object")

    if not isinstance(peak, Peak):
        raise TypeError("'peak' must be a Peak object")

    if not isinstance(n_top_ions, int):
        raise TypeError("'n_top_ions' must be an integer")

    if not isinstance(max_bound, int):
        raise TypeError("'max_bound' must be an integer")

    # ms = peak.mass_spectrum
    rt = peak.rt
    apex = im.get_index_at_time(rt)

    ion_areas = {}  # Dictionary to store ion:ion_area pairs

    top_ions = peak.top_ions(n_top_ions)
    # print(top_ions)

    for ion in top_ions:
        ion_chrom = im.get_ic_at_mass(ion)
        # need ia as a list not numpy array so use .tolist()
        ia = ion_chrom.intensity_array.tolist()
        area, left, right, l_share, r_share = ion_area(ia, apex, max_bound)
        # need actual mass for single ion areas
        ion_areas[ion] = area

    return ion_areas
Example #8
0
def test_null_mass(peak):
    peak = copy.deepcopy(peak)
    uid = peak.UID

    peak.null_mass(73)
    peak.null_mass(147.0)

    index_73 = peak.mass_spectrum.mass_list.index(73)
    assert peak.mass_spectrum.mass_spec[index_73] == 0
    index_147 = peak.mass_spectrum.mass_list.index(147)
    assert peak.mass_spectrum.mass_spec[index_147] == 0

    assert peak.UID != uid

    # Errors
    with pytest.raises(ValueError, match="Mass spectrum is unset."):
        Peak(test_float).null_mass(1)
    for obj in [test_string, *test_lists, test_dict]:
        with pytest.raises(TypeError):
            Peak(test_float, peak.mass_spectrum).null_mass(obj)  # type: ignore
    with pytest.raises(IndexError):
        Peak(test_float, peak.mass_spectrum).null_mass(1)
    with pytest.raises(IndexError):
        Peak(test_float, peak.mass_spectrum).null_mass(10000)
Example #9
0
def test_mass_spectrum(peak, im_i):
    scan_i = im_i.get_index_at_time(31.17 * 60.0)
    ms = im_i.get_ms_at_index(scan_i)

    assert isinstance(peak.mass_spectrum, MassSpectrum)
    assert peak.mass_spectrum == ms
    assert peak.ic_mass is None

    peak = Peak(test_float)
    assert peak.mass_spectrum is None
    peak.mass_spectrum = ms
    assert peak.mass_spectrum == ms
    assert peak.ic_mass is None

    peak = Peak(test_float)
    assert peak.mass_spectrum is None
    peak.mass_spectrum = ms
    assert isinstance(peak.mass_spectrum, MassSpectrum)
    assert isinstance(peak.mass_spectrum.mass_spec, list)

    for obj in [test_string, *test_numbers, test_dict, *test_lists]:
        with pytest.raises(TypeError):
            peak.mass_spectrum = obj
Example #10
0
def composite_peak(peak_list: List, ignore_outliers: bool = False) -> Peak:
    """
    Create a peak that consists of a composite spectrum from all spectra in the list of peaks.

    :param peak_list: A list of peak objects
    :type peak_list: list
    :param ignore_outliers:
    :type ignore_outliers: bool, optional

    :return: The composite peak
    :type: pyms.Peak.Class.Peak

    :author: Andrew Isaac
    :author: Dominic Davis-Foster (type assertions)
    """

    if not is_peak_list(peak_list):
        raise TypeError("'peak_list' must be a list of Peak objects")

    first = True
    count = 0
    avg_rt = 0
    # new_ms = None

    # DK: first mark peaks in the list that are outliers by RT, but only if there are more than 3 peaks in the list
    if ignore_outliers:
        rts = []
        if len(peak_list) > 3:
            for peak in peak_list:
                rts.append(peak.rt)

            is_outlier = median_outliers(rts)

            for i, val in enumerate(is_outlier):
                if val:
                    peak_list[i].isoutlier = True

    # DK: the average RT and average mass spec for the compound peak is now calculated from peaks that are NOT outliers.
    # This should improve the ability to order peaks and figure out badly aligned entries
    for peak in peak_list:
        if peak is not None and ((ignore_outliers and not peak.is_outlier)
                                 or not ignore_outliers):
            ms = peak.mass_spectrum
            spec = numpy.array(ms.mass_spec, dtype='d')
            if first:
                avg_spec = numpy.zeros(len(ms.mass_spec), dtype='d')
                mass_list = ms.mass_list
                first = False
            # scale all intensities to [0,100]
            max_spec = max(spec) / 100.0
            if max_spec > 0:
                spec = spec / max_spec
            else:
                spec = spec * 0
            avg_rt += peak.rt
            avg_spec += spec
            count += 1
    if count > 0:
        avg_rt = avg_rt / count
        avg_spec = avg_spec / count
        new_ms = MassSpectrum(mass_list, avg_spec)

        return Peak(avg_rt, new_ms)
    else:
        return None
Example #11
0
def fill_peaks(data, peak_list: List, D: float, minutes: bool = False) -> Peak:
    """
    Gets the best matching Retention Time and spectra from 'data' for each peak
    in the peak list.

    :param data: A data IntensityMatrix that has the same mass range as the
        peaks in the peak list
    :type data: pyms.IntensityMatrix.IntensityMatrix
    :param peak_list: A list of peak objects
    :type peak_list: list
    :param D: Peak width standard deviation in seconds.
        Determines search window width.
    :type D: float
    :param minutes: Return retention time as minutes
    :type minutes: bool, optional

    :return: List of Peak Objects
    :type: list of :class:`pyms.Peak.Class.Peak`

    :author: Andrew Isaac
    :author: Dominic Davis-Foster (type assertions)
    """

    if not is_peak_list(peak_list):
        raise TypeError("'peak_list' must be a list of Peak objects")

    if not isinstance(D, float):
        raise TypeError("'D' must be a float")

    # Test for best match in range where RT weight is greater than _TOL
    _TOL = 0.001
    cutoff = D * math.sqrt(-2.0 * math.log(_TOL))

    # Penalise for neighboring peaks
    # reweight so RT weight at nearest peak is _PEN
    _PEN = 0.5

    datamat = data.intensity_array
    mass_list = data.mass_list
    datatimes = data.time_list
    minrt = min(datatimes)
    maxrt = max(datatimes)
    rtl = 0
    rtr = 0
    new_peak_list = []
    for ii in range(len(peak_list)):
        spec = peak_list[ii].mass_spectrum.mass_spec
        spec = numpy.array(spec, dtype='d')
        rt = peak_list[ii].rt
        sum_spec_squared = numpy.sum(spec**2, axis=0)

        # get neighbour RT's
        if ii > 0:
            rtl = peak_list[ii - 1].rt
        if ii < len(peak_list) - 1:
            rtr = peak_list[ii + 1].rt
        # adjust weighting for neighbours
        rtclose = min(abs(rt - rtl), abs(rt - rtr))
        Dclose = rtclose / math.sqrt(-2.0 * math.log(_PEN))

        if Dclose > 0:
            Dclose = min(D, Dclose)
        else:
            Dclose = D

        # Get bounds
        rtlow = rt - cutoff
        if rtlow < minrt:
            rtlow = minrt
        lowii = data.get_index_at_time(rtlow)

        rtup = rt + cutoff
        if rtup > maxrt:
            rtup = maxrt
        upii = data.get_index_at_time(rtup)

        # Get sub matrix of scans in bounds
        submat = datamat[lowii:upii + 1]
        submat = numpy.array(submat, dtype='d')
        subrts = datatimes[lowii:upii + 1]
        subrts = numpy.array(subrts, dtype='d')

        sum_summat_squared = numpy.sum(submat**2, axis=1)

        # transpose spec (as matrix) for dot product
        spec = numpy.transpose([spec])
        # dot product on rows

        toparr = numpy.dot(submat, spec)
        botarr = numpy.sqrt(sum_spec_squared * sum_summat_squared)

        # convert back to 1-D array
        toparr = toparr.ravel()

        # scaled dot product of each scan
        cosarr = toparr / botarr

        # RT weight of each scan
        rtimearr = numpy.exp(-((subrts - rt) / float(Dclose))**2 / 2.0)

        # weighted scores
        scorearr = cosarr * rtimearr

        # index of best score
        best_ii = scorearr.argmax()

        # Add new peak
        bestrt = subrts[best_ii]
        bestspec = submat[best_ii].tolist()
        ms = MassSpectrum(mass_list, bestspec)
        new_peak_list.append(Peak(bestrt, ms, minutes))

    return new_peak_list
Example #12
0
def test_bounds(peak):
    peak = copy.copy(peak)

    # Setter
    peak.bounds = (11, 12, 13)

    for obj in [
            test_string, *test_numbers, test_dict, ['a', 'b', 'c'], test_tuple
    ]:
        with pytest.raises(TypeError):
            peak.bounds = obj

    for obj in [*test_lists, (1, 2), [1, 2, 3, 4]]:
        with pytest.raises(ValueError,
                           match="'Peak.bounds' must have exactly 3 elements"):
            peak.bounds = obj

    # Getter
    assert peak.bounds == (11, 12, 13)
    assert isinstance(peak.bounds, tuple)
    peak2 = Peak(test_float)
    peak2.bounds = [11, 12, 13]  # type: ignore
    assert peak2.bounds == (11, 12, 13)
    assert isinstance(peak2.bounds, tuple)

    # set_bounds
    peak3 = Peak(test_float)
    peak3.set_bounds(11, 12, 13)
    assert peak3.bounds == (11, 12, 13)
    assert isinstance(peak3.bounds, tuple)

    for obj in [*test_sequences, test_string, test_dict, test_float]:
        print(obj)

        with pytest.raises(TypeError):
            peak3.set_bounds(obj, 12, 13)  # type: ignore
        with pytest.raises(TypeError):
            peak3.set_bounds(11, obj, 13)  # type: ignore
        with pytest.raises(TypeError):
            peak3.set_bounds(11, 12, obj)  # type: ignore
Example #13
0
def test_equality(peak):
    assert peak == Peak(peak.rt, peak.mass_spectrum)
    assert peak != Peak(50, peak.mass_spectrum)
Example #14
0
def test_outlier(peak):
    assert isinstance(peak.is_outlier, bool)
    assert peak.is_outlier is False

    assert Peak(12.34, outlier=True).is_outlier is True
Example #15
0
def test_get_third_highest_mz(peak):
    assert peak.get_third_highest_mz() == 59
    assert isinstance(peak.get_third_highest_mz(), int)

    with pytest.raises(ValueError, match="Mass spectrum is unset."):
        Peak(test_float).get_third_highest_mz()
Example #16
0
def test_bounds(peak):
    peak = copy.copy(peak)

    # Setter
    peak.bounds = (11, 12, 13)

    for obj in [
            test_string, *test_numbers, test_dict, ["a", "b", "c"], test_tuple
    ]:
        with pytest.raises(TypeError):
            peak.bounds = obj

    for obj in [*test_lists, (1, 2), [1, 2, 3, 4]]:
        with pytest.raises(ValueError):
            peak.bounds = obj

    # Getter
    assert peak.bounds == (11, 12, 13)
    assert isinstance(peak.bounds, tuple)
    peak2 = Peak(test_float)
    peak2.bounds = [11, 12, 13]
    assert peak2.bounds == [11, 12, 13]
    assert isinstance(peak2.bounds, list)

    # set_bounds
    peak3 = Peak(test_float)
    peak3.set_bounds(11, 12, 13)
    assert peak3.bounds == (11, 12, 13)
    assert isinstance(peak3.bounds, tuple)

    for obj in [*test_sequences, test_string, test_dict, test_float]:
        with pytest.raises(TypeError):
            print(obj)
            peak3.set_bounds(obj, 12, 13)
        with pytest.raises(TypeError):
            peak3.set_bounds(11, obj, 13)
        with pytest.raises(TypeError):
            peak3.set_bounds(11, 12, obj)