Esempio n. 1
0
 def test_get_maxima_list_reduced(self, tic):
     maxima_list = get_maxima_list_reduced(tic, 12.34)
     assert isinstance(maxima_list, list)
     for peak in maxima_list:
         assert isinstance(peak, tuple)
         assert len(peak) == 2
         rt, intensity = peak
         assert isinstance(rt, float)
         assert isinstance(intensity, float)
     assert maxima_list[0][0] == 10.5559998751
Esempio n. 2
0
def missing_peak_finder(
    sample: Sample,
    file_name: str,
    points: int = 3,
    null_ions: Optional[List] = None,
    crop_ions: Optional[List] = None,
    threshold: int = 1000,
    rt_window: float = 1,
    filetype: MissingPeakFiletype = MZML,
):
    r"""
	Integrates raw data around missing peak locations to fill ``NA``\s in the data matrix.

	:param sample: The sample object containing missing peaks
	:param file_name: Name of the raw data file
	:param points: Peak finding - Peak if maxima over 'points' number of scans.
	:param null_ions: Ions to be deleted in the matrix.
	:default null_ions: ``[73, 147]``
	:param crop_ions: Range of Ions to be considered.
	:default crop_ions: ``[50, 540]``
	:param threshold: Minimum intensity of IonChromatogram allowable to fill.
	:param rt_window: Window in seconds around average RT to look for.
	:param filetype:

	:author: Sean O'Callaghan
	"""

    if not null_ions:
        null_ions = [73, 147]
    if not crop_ions:
        crop_ions = [50, 540]

    # TODO: some error checks on null and crop ions

    # TODO: a for root,files,dirs in os.path.walk(): loop
    print("Sample:", sample.name, "File:", file_name)

    if filetype == NETCDF:
        # this package
        from pyms.GCMS.IO.ANDI import ANDI_reader
        data = ANDI_reader(file_name)

    elif filetype == MZML:
        # this package
        from pyms.GCMS.IO.MZML import mzML_reader
        data = mzML_reader(file_name)

    else:
        print("file type not valid")

    # build integer intensity matrix
    im = build_intensity_matrix_i(data)

    for null_ion in null_ions:
        im.null_mass(null_ion)

    im.crop_mass(crop_ions[0], crop_ions[1])

    # get the size of the intensity matrix
    n_scan, n_mz = im.size

    # smooth data
    for ii in range(n_mz):
        ic = im.get_ic_at_index(ii)
        ic1 = savitzky_golay(ic, points)
        ic_smooth = savitzky_golay(ic1, points)
        ic_base = tophat(ic_smooth, struct="1.5m")
        im.set_ic_at_index(ii, ic_base)

    for mp in sample.missing_peaks:

        mp_rt = mp.rt
        common_ion = mp.common_ion
        qual_ion_1 = float(mp.qual_ion1)
        qual_ion_2 = float(mp.qual_ion2)

        ci_ion_chrom = im.get_ic_at_mass(common_ion)
        print("ci = ", common_ion)
        qi1_ion_chrom = im.get_ic_at_mass(qual_ion_1)
        print("qi1 = ", qual_ion_1)
        qi2_ion_chrom = im.get_ic_at_mass(qual_ion_2)
        print("qi2 = ", qual_ion_2)
        ######
        # Integrate the CI around that particular RT
        #######

        # Convert time to points
        # How long between scans?

        points_1 = ci_ion_chrom.get_index_at_time(float(mp_rt))
        points_2 = ci_ion_chrom.get_index_at_time(float(mp_rt) - rt_window)
        print("rt_window = ", points_1 - points_2)

        rt_window_points = points_1 - points_2

        maxima_list = get_maxima_list_reduced(ci_ion_chrom, mp_rt,
                                              rt_window_points)

        large_peaks = []

        for rt, intens in maxima_list:
            if intens > threshold:
                q1_index = qi1_ion_chrom.get_index_at_time(rt)
                q2_index = qi2_ion_chrom.get_index_at_time(rt)

                q1_intensity = qi1_ion_chrom.get_intensity_at_index(q1_index)
                q2_intensity = qi2_ion_chrom.get_intensity_at_index(q2_index)

                if q1_intensity > threshold / 2 and q2_intensity > threshold / 2:
                    large_peaks.append([rt, intens])

        print(f"found {len(large_peaks):d} peaks above threshold")

        areas = []
        for peak in large_peaks:
            apex = ci_ion_chrom.get_index_at_time(peak[0])
            ia = ci_ion_chrom.intensity_array.tolist()
            area, left, right, l_share, r_share = ion_area(ia, apex, 0)
            areas.append(area)

        ########################

        areas.sort()
        if len(areas) > 0:
            biggest_area = areas[-1]
            mp.common_ion_area = biggest_area
            # mp.exact_rt = f"{float(mp_rt) / 60.0:.3f}"
            mp.exact_rt = float(mp_rt) / 60.0
            print("found area:", biggest_area, "at rt:", mp_rt)
        else:
            print("Missing peak at rt = ", mp_rt)
            mp.common_ion_area = None
Esempio n. 3
0
 def test_window_errors(self, obj, tic):
     with pytest.raises(TypeError):
         get_maxima_list_reduced(tic, test_float, window=obj)
Esempio n. 4
0
 def test_points_errors(self, obj, tic):
     with pytest.raises(TypeError):
         get_maxima_list_reduced(tic, test_float, points=obj)
Esempio n. 5
0
 def test_mp_rt_errors(self, obj, tic):
     with pytest.raises(TypeError):
         get_maxima_list_reduced(tic, mp_rt=obj)
Esempio n. 6
0
 def test__errors(self, obj):
     with pytest.raises(TypeError):
         get_maxima_list_reduced(obj, 0)