Beispiel #1
0
def BillerBiemann(im, points=3, scans=1):
    """
    @summary: BillerBiemann Deconvolution

        Deconvolution based on the algorithm of Biller and Biemann (1974)

    @param im: An IntensityMatrix object
    @type im: pyms.GCMS.Class.IntensityMatrix
    @param points: Peak if maxima over 'points' number of scans (Default 3)
    @type points: IntType
    @param scans: To compensate for spectra skewing,
        peaks from 'scans' scans are combined (Default 1).
    @type scans: IntType

    @return: List of Peak objects
    @rtype: ListType

    @author: Andrew Isaac
    """

    rt_list = im.get_time_list()
    mass_list = im.get_mass_list()
    peak_list = []
    maxima_im = get_maxima_matrix(im, points, scans)
    numrows = len(maxima_im)
    for row in range(numrows):
        if sum(maxima_im[row]) > 0:
            rt = rt_list[row]
            ms = MassSpectrum(mass_list, maxima_im[row])
            peak = Peak(rt, ms)
            peak.set_pt_bounds([0, row, 0])  # store IM index for convenience
            peak_list.append(peak)

    return peak_list
Beispiel #2
0
def BillerBiemann(im, points=3, scans=1):

    """
    @summary: BillerBiemann Deconvolution

        Deconvolution based on the algorithm of Biller and Biemann (1974)

    @param im: An IntensityMatrix object
    @type im: pyms.GCMS.Class.IntensityMatrix
    @param points: Peak if maxima over 'points' number of scans (Default 3)
    @type points: IntType
    @param scans: To compensate for spectra skewing,
        peaks from 'scans' scans are combined (Default 1).
    @type scans: IntType

    @return: List of Peak objects
    @rtype: ListType

    @author: Andrew Isaac
    """

    rt_list = im.get_time_list()
    mass_list = im.get_mass_list()
    peak_list = []
    maxima_im = get_maxima_matrix(im, points, scans)
    numrows = len(maxima_im)
    for row in range(numrows):
        if sum(maxima_im[row]) > 0:
            rt = rt_list[row]
            ms = MassSpectrum(mass_list, maxima_im[row])
            peak = Peak(rt, ms)
            peak.set_pt_bounds([0,row,0])  # store IM index for convenience
            peak_list.append(peak)

    return peak_list
Beispiel #3
0
def BillerBiemann(im: IntensityMatrix,
                  points: int = 3,
                  scans: int = 1) -> List[Peak]:
    """
    Deconvolution based on the algorithm of Biller and Biemann (1974)

    :param im: An :class:`~pyms.IntensityMatrix.IntensityMatrix` object
    :type im: ~pyms.IntensityMatrix.IntensityMatrix
    :param points: Number of scans over which to consider a maxima to be a peak. Default ``3``
    :type points: int, optional
    :param scans: Number of scans to combine peaks from to compensate for spectra skewing. Default ``1``
    :type scans: int, optional

    :return: List of detected peaks
    :rtype: List[:class:`pyms.Peak.Class.Peak`]

    :authors: Andrew Isaac, Dominic Davis-Foster (type assertions)
    """

    if not isinstance(im, IntensityMatrix):
        raise TypeError("'im' must be an IntensityMatrix object")

    if not isinstance(points, int):
        raise TypeError("'points' must be an integer")

    if not isinstance(scans, int):
        raise TypeError("'scans' must be an integer")

    rt_list = im.time_list
    mass_list = im.mass_list
    peak_list = []
    maxima_im = get_maxima_matrix(im, points, scans)
    numrows = len(maxima_im)

    for row in range(numrows):
        if sum(maxima_im[row]) > 0:
            rt = rt_list[row]
            ms = MassSpectrum(mass_list, maxima_im[row])
            peak = Peak(rt, ms)
            peak.bounds = [0, row, 0]  # store IM index for convenience
            peak_list.append(peak)

    return peak_list
Beispiel #4
0
def BillerBiemann(im: BaseIntensityMatrix,
                  points: int = 3,
                  scans: int = 1) -> List[Peak]:
    """
	Deconvolution based on the algorithm of Biller and Biemann (1974).

	:param im:
	:param points: Number of scans over which to consider a maxima to be a peak.
	:param scans: Number of scans to combine peaks from to compensate for spectra skewing.

	:return: List of detected peaks

	:authors: Andrew Isaac, Dominic Davis-Foster (type assertions)
	"""

    if not isinstance(im, BaseIntensityMatrix):
        raise TypeError("'im' must be an IntensityMatrix object")

    if not isinstance(points, int):
        raise TypeError("'points' must be an integer")

    if not isinstance(scans, int):
        raise TypeError("'scans' must be an integer")

    rt_list = im.time_list
    mass_list = im.mass_list
    peak_list = []
    maxima_im = get_maxima_matrix(im, points, scans)

    for row_idx, row in enumerate(maxima_im):
        if sum(row) > 0:
            rt = rt_list[row_idx]
            ms = MassSpectrum(mass_list, row)
            peak = Peak(rt, ms)
            peak.bounds = (0, row_idx, 0)  # store IM index for convenience
            # TODO: can the bounds be determined from the intensity matrix?
            peak_list.append(peak)

    return peak_list
Beispiel #5
0
def composite_peak(peak_list, minutes=False):
    """
    @summary: Create a peak that consists of a composite spectrum from all
        spectra in the list of peaks.

    @param peak_list: A list of peak objects
    @type peak_list: ListType
    @param minutes: Return retention time as minutes
    @type minutes: BooleanType

    @return: Peak Object with combined mass spectra of 'peak_list'
    @type: pyms.Peak.Class.Peak

    @author: Andrew Isaac
    """

    first = True
    count = 0
    avg_rt = 0
    new_ms = None
    for peak in peak_list:
        if peak is not None:
            ms = peak.get_mass_spectrum()
            spec = numpy.array(ms.mass_spec, dtype='d')
            if first:
                avg_spec = numpy.zeros(len(ms.mass_spec), dtype='d')
                mass_list = ms.mass_list
                first = False
            # scale all intensities to [0,100]
            max_spec = max(spec) / 100.0
            if max_spec > 0:
                spec = spec / max_spec
            else:
                spec = spec * 0
            avg_rt += peak.get_rt()
            avg_spec += spec
            count += 1
    if count > 0:
        avg_rt = avg_rt / count
        #if minutes == True:
        #avg_rt = avg_rt/60.0
        avg_spec = avg_spec / count
        avg_spec = avg_spec.tolist()  # list more compact than ndarray
        new_ms = MassSpectrum(mass_list, avg_spec)
        return Peak(avg_rt, new_ms, minutes)
    else:
        return None
Beispiel #6
0
def fill_peaks(data, peak_list, D, minutes=False):
    """
    @summary: Gets the best matching Retention Time and spectra from 'data' for
        each peak in the peak list.

    @param data: A data IntensityMatrix that has the same mass range as the
        peaks in the peak list
    @type data: pyms.GCMS.Class.IntensityMatrix
    @param peak_list: A list of peak objects
    @type peak_list: ListType
    @param D: Peak width standard deviation in seconds.  Determines search
        window width.
    @type D: FloatType
    @param minutes: Return retention time as minutes
    @type minutes: BooleanType

    @return: List of Peak Objects
    @type: ListType

    @author: Andrew Isaac
    """

    # Test for best match in range where RT weight is greater than _TOL
    _TOL = 0.001
    cutoff = D * math.sqrt(-2.0 * math.log(_TOL))

    # Penalise for neighboring peaks
    # reweight so RT weight at nearest peak is _PEN
    _PEN = 0.5

    datamat = data.get_matrix_list()
    mass_list = data.get_mass_list()
    datatimes = data.get_time_list()
    minrt = min(datatimes)
    maxrt = max(datatimes)
    rtl = 0
    rtr = 0
    new_peak_list = []
    for ii in xrange(len(peak_list)):
        spec = peak_list[ii].get_mass_spectrum().mass_spec
        spec = numpy.array(spec, dtype='d')
        rt = peak_list[ii].get_rt()
        spec_SS = numpy.sum(spec**2, axis=0)

        # get neighbour RT's
        if ii > 0:
            rtl = peak_list[ii - 1].rt
        if ii < len(peak_list) - 1:
            rtr = peak_list[ii + 1].rt
        # adjust weighting for neighbours
        rtclose = min(abs(rt - rtl), abs(rt - rtr))
        Dclose = rtclose / math.sqrt(-2.0 * math.log(_PEN))

        if Dclose > 0:
            Dclose = min(D, Dclose)
        else:
            Dclose = D

        # Get bounds
        rtlow = rt - cutoff
        if rtlow < minrt:
            rtlow = minrt
        lowii = data.get_index_at_time(rtlow)

        rtup = rt + cutoff
        if rtup > maxrt:
            rtup = maxrt
        upii = data.get_index_at_time(rtup)

        # Get sub matrix of scans in bounds
        submat = datamat[lowii:upii + 1]
        submat = numpy.array(submat, dtype='d')
        subrts = datatimes[lowii:upii + 1]
        subrts = numpy.array(subrts, dtype='d')

        submat_SS = numpy.sum(submat**2, axis=1)

        # transpose spec (as matrix) for dot product
        spec = numpy.transpose([spec])
        # dot product on rows

        toparr = numpy.dot(submat, spec)
        botarr = numpy.sqrt(spec_SS * submat_SS)

        # convert back to 1-D array
        toparr = toparr.ravel()

        # scaled dot product of each scan
        cosarr = toparr / botarr

        # RT weight of each scan
        rtimearr = numpy.exp(-((subrts - rt) / float(Dclose))**2 / 2.0)

        # weighted scores
        scorearr = cosarr * rtimearr

        # index of best score
        best_ii = scorearr.argmax()

        # Add new peak
        bestrt = subrts[best_ii]
        bestspec = submat[best_ii].tolist()
        ms = MassSpectrum(mass_list, bestspec)
        new_peak_list.append(Peak(bestrt, ms, minutes))

    return new_peak_list
Beispiel #7
0
# stdlib
import os
from copy import copy, deepcopy
from timeit import timeit

# pyms
from pyms.GCMS.IO.JCAMP import JCAMP_reader
from pyms.IntensityMatrix import build_intensity_matrix_i
from pyms.Peak.Class import Peak

data = JCAMP_reader(os.path.join("data", "ELEY_1_SUBTRACT.JDX"))
im_i = build_intensity_matrix_i(data)
scan_i = im_i.get_index_at_time(31.17 * 60.0)
ms = im_i.get_ms_at_index(scan_i)
peak = Peak(12.34, ms)


def copy_peak():
    return copy(peak)


def deepcopy_peak():
    return deepcopy(peak)


print(timeit(copy_peak))
print(timeit(deepcopy_peak))


def copy_ms():
Beispiel #8
0
def composite_peak(peak_list, minutes=False):

    """
    @summary: Create a peak that consists of a composite spectrum from all
        spectra in the list of peaks

    @param peak_list: A list of peak objects
    @type peak_list: ListType
    @param minutes: Return retention time as minutes
    @type minutes: BooleanType

    @return: Peak Object with combined mass spectra of 'peak_list'
    @type: pyms.Peak.Class.Peak

    @author: Andrew Isaac
    @author: David Kainer
    """

    first = True
    count = 0
    avg_rt = 0
    new_ms = None

    # DK: first mark peaks in the list that are outliers by RT, but only if there are more than 3 peaks in the list
    rts = []
    if len(peak_list) > 3:
        for peak in peak_list:
            rts.append( peak.get_rt() )

        is_outlier = median_outliers(rts)

        for i, val in enumerate(is_outlier):
            if val:
                peak_list[i].isoutlier = True


    # DK: the average RT and average mass spec for the compo peak is now calculated from peaks that are NOT outliers.
    # This should improve the ability to order peaks and figure out badly aligned entries

    for peak in peak_list:
        if peak is not None and peak.check_outlier() == False:
            ms = peak.get_mass_spectrum()
            spec = numpy.array(ms.mass_spec, dtype='d')
            if first:
                avg_spec = numpy.zeros(len(ms.mass_spec), dtype='d')
                mass_list = ms.mass_list
                first = False
            # scale all intensities to [0,100]
            max_spec = max(spec)/100.0
            if max_spec > 0:
                spec = spec/max_spec
            else:
                spec = spec*0
            avg_rt += peak.get_rt()
            avg_spec += spec
            count += 1
    if count > 0:
        avg_rt = avg_rt/count
        if minutes == True:
            avg_rt = avg_rt/60.0
        avg_spec = avg_spec/count
        avg_spec = avg_spec.tolist()  # list more compact than ndarray
        new_ms = MassSpectrum(mass_list, avg_spec)
        return Peak(avg_rt, new_ms, minutes)
    else:
        return None
Beispiel #9
0
from pyms.GCMS.IO.ANDI.Function import ANDI_reader
from pyms.Peak.Class import Peak

# read file and convert to intensity matrix
andi_file = "/x/PyMS/data/gc01_0812_066.cdf"
data = ANDI_reader(andi_file)
im = build_intensity_matrix_i(data)

# Get the scan of a known TIC peak (at RT 31.17 minutes)
# get the index of the scan nearest to 31.17 minutes (converted to seconds)
scan_i = im.get_index_at_time(31.17*60.0)
# get the MassSpectrum Object
ms = im.get_ms_at_index(scan_i)

# create a Peak object
peak = Peak(31.17, ms, minutes=True)

print peak.get_UID()

# modify the range and null TMS ions
peak.crop_mass(60, 450)
peak.null_mass(73)
peak.null_mass(147)

# New UID after modification
print peak.get_UID()

# Create another peak from an isomer of the first peak (at RT 31.44 minutes)
scan_i = im.get_index_at_time(31.44*60.0)
ms = im.get_ms_at_index(scan_i)
Beispiel #10
0
# Extract the |MassSpectrum| at 31.17 minutes in this example.

# In[4]:


index = im.get_index_at_time(31.17*60.0)
ms = im.get_ms_at_index(index)


# Create a |Peak| object for the given retention time.

# In[5]:


from pyms.Peak.Class import Peak
peak = Peak(31.17, ms, minutes=True)


# By default the retention time is assumed to be in seconds. The parameter 
# ``minutes`` can be set to ``True`` if the retention time is given in minutes.
# Internally, PyMassSpec stores retention times in seconds, so the ``minutes``
# parameter ensures the input and output of the retention time are in the same
# units.
# 
# ## Peak Object properties
# 
# The retention time of the peak, in seconds, can be returned with |pyms.Peak.Class.Peak.rt|. 
# The mass spectrum can be returned with |pyms.Peak.Class.Peak.mass_spectrum|.
# 
# The |Peak| object constructs a unique identification (UID) based on the spectrum
# and retention time. This helps in managing lists of peaks (covered in the next
Beispiel #11
0
from pyms.GCMS.IO.ANDI.Function import ANDI_reader
from pyms.Peak.Class import Peak

# read file and convert to intensity matrix
andi_file = "/x/PyMS/data/gc01_0812_066.cdf"
data = ANDI_reader(andi_file)
im = build_intensity_matrix_i(data)

# Get the scan of a known TIC peak (at RT 31.17 minutes)
# get the index of the scan nearest to 31.17 minutes (converted to seconds)
scan_i = im.get_index_at_time(31.17 * 60.0)
# get the MassSpectrum Object
ms = im.get_ms_at_index(scan_i)

# create a Peak object
peak = Peak(31.17, ms, minutes=True)

# Get the retention time (in seconds)
print peak.get_rt()

# Get the peaks unique ID
# Consists of the two most abundant ions and their ratio,
# and the retention time (in the format set by minutes=True or False)
print peak.get_UID()

# Create another peak from an isomer of the first peak (at RT 31.44 minutes)
scan_i = im.get_index_at_time(31.44 * 60.0)
ms = im.get_ms_at_index(scan_i)
peak2 = Peak(31.44, ms, minutes=True)
print peak2.get_UID()
Beispiel #12
0
def peak(im_i):
    scan_i = im_i.get_index_at_time(31.17 * 60.0)
    ms = im_i.get_ms_at_index(scan_i)
    return Peak(12.34, ms)
Beispiel #13
0
from pyms.GCMS.IO.ANDI.Function import ANDI_reader
from pyms.Peak.Class import Peak

# read file and convert to intensity matrix
andi_file = "/x/PyMS/data/gc01_0812_066.cdf"
data = ANDI_reader(andi_file)
im = build_intensity_matrix_i(data)

# Get the scan of a known TIC peak (at RT 31.17 minutes)
# get the index of the scan nearest to 31.17 minutes (converted to seconds)
scan_i = im.get_index_at_time(31.17*60.0)
# get the MassSpectrum Object
ms = im.get_ms_at_index(scan_i)

# create a Peak object
peak = Peak(31.17, ms, minutes=True)

# Get the retention time (in seconds)
print peak.get_rt()

# Get the peaks unique ID
# Consists of the two most abundant ions and their ratio,
# and the retention time (in the format set by minutes=True or False)
print peak.get_UID()

# Create another peak from an isomer of the first peak (at RT 31.44 minutes)
scan_i = im.get_index_at_time(31.44*60.0)
ms = im.get_ms_at_index(scan_i)
peak2 = Peak(31.44, ms, minutes=True)
print peak2.get_UID()
Beispiel #14
0
from pyms.GCMS.IO.ANDI import ANDI_reader
from pyms.Peak.Class import Peak

# read file and convert to intensity matrix
andi_file = "data/gc01_0812_066.cdf"
data = ANDI_reader(andi_file)
im = build_intensity_matrix_i(data)

# Get the scan of a known TIC peak (at RT 31.17 minutes)
# get the index of the scan nearest to 31.17 minutes (converted to seconds)
scan_i = im.get_index_at_time(31.17*60.0)
# get the MassSpectrum Object
ms = im.get_ms_at_index(scan_i)

# create a Peak object
peak = Peak(31.17, ms, minutes=True)

# Get the retention time (in seconds)
print(peak.rt)

# Get the peaks unique ID
# Consists of the two most abundant ions and their ratio,
# and the retention time (in the format set by minutes=True or False)
print(peak.UID)

# Create another peak from an isomer of the first peak (at RT 31.44 minutes)
scan_i = im.get_index_at_time(31.44*60.0)
ms = im.get_ms_at_index(scan_i)
peak2 = Peak(31.44, ms, minutes=True)
print(peak2.UID)
Beispiel #15
0
from pyms.GCMS.IO.ANDI.Function import ANDI_reader
from pyms.Peak.Class import Peak

# read file and convert to intensity matrix
andi_file = "/x/PyMS/data/gc01_0812_066.cdf"
data = ANDI_reader(andi_file)
im = build_intensity_matrix_i(data)

# Get the scan of a known TIC peak (at RT 31.17 minutes)
# get the index of the scan nearest to 31.17 minutes (converted to seconds)
scan_i = im.get_index_at_time(31.17 * 60.0)
# get the MassSpectrum Object
ms = im.get_ms_at_index(scan_i)

# create a Peak object
peak = Peak(31.17, ms, minutes=True)

print peak.get_UID()

# modify the range and null TMS ions
peak.crop_mass(60, 450)
peak.null_mass(73)
peak.null_mass(147)

# New UID after modification
print peak.get_UID()

# Create another peak from an isomer of the first peak (at RT 31.44 minutes)
scan_i = im.get_index_at_time(31.44 * 60.0)
ms = im.get_ms_at_index(scan_i)