Beispiel #1
0
def fill_peaks(data, peak_list, D, minutes=False):

    """
    @summary: Gets the best matching Retention Time and spectra from 'data' for
        each peak in the peak list.

    @param data: A data IntensityMatrix that has the same mass range as the
        peaks in the peak list
    @type data: pyms.GCMS.Class.IntensityMatrix
    @param peak_list: A list of peak objects
    @type peak_list: ListType
    @param D: Peak width standard deviation in seconds.  Determines search
        window width.
    @type D: FloatType
    @param minutes: Return retention time as minutes
    @type minutes: BooleanType

    @return: List of Peak Objects
    @type: ListType

    @author: Andrew Isaac
    """

    # Test for best match in range where RT weight is greater than _TOL
    _TOL = 0.001
    cutoff = D*math.sqrt(-2.0*math.log(_TOL))

    # Penalise for neighboring peaks
    # reweight so RT weight at nearest peak is _PEN
    _PEN = 0.5

    datamat = data.get_matrix_list()
    mass_list = data.get_mass_list()
    datatimes = data.get_time_list()
    minrt = min(datatimes)
    maxrt = max(datatimes)
    rtl = 0
    rtr = 0
    new_peak_list = []
    for ii in xrange(len(peak_list)):
        spec = peak_list[ii].get_mass_spectrum().mass_spec
        spec = numpy.array(spec, dtype='d')
        rt = peak_list[ii].get_rt()
        spec_SS = numpy.sum(spec**2, axis=0)

        # get neighbour RT's
        if ii > 0:
            rtl = peak_list[ii-1].rt
        if ii < len(peak_list)-1:
            rtr = peak_list[ii+1].rt
        # adjust weighting for neighbours
        rtclose = min(abs(rt-rtl), abs(rt-rtr))
        Dclose = rtclose/math.sqrt(-2.0*math.log(_PEN))

        if Dclose > 0:
            Dclose = min(D, Dclose)
        else:
            Dclose = D

        # Get bounds
        rtlow = rt - cutoff
        if rtlow < minrt:
            rtlow = minrt
        lowii = data.get_index_at_time(rtlow)

        rtup = rt + cutoff
        if rtup > maxrt:
            rtup = maxrt
        upii = data.get_index_at_time(rtup)

        # Get sub matrix of scans in bounds
        submat = datamat[lowii:upii+1]
        submat = numpy.array(submat, dtype='d')
        subrts = datatimes[lowii:upii+1]
        subrts = numpy.array(subrts, dtype='d')

        submat_SS = numpy.sum(submat**2, axis=1)

        # transpose spec (as matrix) for dot product
        spec = numpy.transpose([spec])
        # dot product on rows

        toparr = numpy.dot(submat, spec)
        botarr = numpy.sqrt(spec_SS*submat_SS)

        # convert back to 1-D array
        toparr = toparr.ravel()

        # scaled dot product of each scan
        cosarr = toparr/botarr

        # RT weight of each scan
        rtimearr = numpy.exp(-((subrts-rt)/float(Dclose))**2 / 2.0)

        # weighted scores
        scorearr = cosarr*rtimearr

        # index of best score
        best_ii = scorearr.argmax()

        # Add new peak
        bestrt = subrts[best_ii]
        bestspec = submat[best_ii].tolist()
        ms = MassSpectrum(mass_list, bestspec)
        new_peak_list.append(Peak(bestrt, ms, minutes))

    return new_peak_list
Beispiel #2
0
# Extract the |MassSpectrum| at 31.17 minutes in this example.

# In[4]:


index = im.get_index_at_time(31.17*60.0)
ms = im.get_ms_at_index(index)


# Create a |Peak| object for the given retention time.

# In[5]:


from pyms.Peak.Class import Peak
peak = Peak(31.17, ms, minutes=True)


# By default the retention time is assumed to be in seconds. The parameter 
# ``minutes`` can be set to ``True`` if the retention time is given in minutes.
# Internally, PyMassSpec stores retention times in seconds, so the ``minutes``
# parameter ensures the input and output of the retention time are in the same
# units.
# 
# ## Peak Object properties
# 
# The retention time of the peak, in seconds, can be returned with |pyms.Peak.Class.Peak.rt|. 
# The mass spectrum can be returned with |pyms.Peak.Class.Peak.mass_spectrum|.
# 
# The |Peak| object constructs a unique identification (UID) based on the spectrum
# and retention time. This helps in managing lists of peaks (covered in the next
Beispiel #3
0
def peak(im_i):
    scan_i = im_i.get_index_at_time(31.17 * 60.0)
    ms = im_i.get_ms_at_index(scan_i)
    return Peak(12.34, ms)
Beispiel #4
0
from pyms.GCMS.IO.ANDI.Function import ANDI_reader
from pyms.Peak.Class import Peak

# read file and convert to intensity matrix
andi_file = "/x/PyMS/data/gc01_0812_066.cdf"
data = ANDI_reader(andi_file)
im = build_intensity_matrix_i(data)

# Get the scan of a known TIC peak (at RT 31.17 minutes)
# get the index of the scan nearest to 31.17 minutes (converted to seconds)
scan_i = im.get_index_at_time(31.17 * 60.0)
# get the MassSpectrum Object
ms = im.get_ms_at_index(scan_i)

# create a Peak object
peak = Peak(31.17, ms, minutes=True)

# Get the retention time (in seconds)
print peak.get_rt()

# Get the peaks unique ID
# Consists of the two most abundant ions and their ratio,
# and the retention time (in the format set by minutes=True or False)
print peak.get_UID()

# Create another peak from an isomer of the first peak (at RT 31.44 minutes)
scan_i = im.get_index_at_time(31.44 * 60.0)
ms = im.get_ms_at_index(scan_i)
peak2 = Peak(31.44, ms, minutes=True)
print peak2.get_UID()
Beispiel #5
0
def composite_peak(peak_list, minutes=False):
    """
    @summary: Create a peak that consists of a composite spectrum from all
        spectra in the list of peaks

    @param peak_list: A list of peak objects
    @type peak_list: ListType
    @param minutes: Return retention time as minutes
    @type minutes: BooleanType

    @return: Peak Object with combined mass spectra of 'peak_list'
    @type: pyms.Peak.Class.Peak

    @author: Andrew Isaac
    @author: David Kainer
    """

    first = True
    count = 0
    avg_rt = 0
    new_ms = None

    # DK: first mark peaks in the list that are outliers by RT, but only if there are more than 3 peaks in the list
    rts = []
    if len(peak_list) > 3:
        for peak in peak_list:
            rts.append(peak.get_rt())

        is_outlier = median_outliers(rts)

        #JT: Cannot enumerate over numpy array like a list so
        #    I had to change the way the loop worked here using nditer
        #    and another looping variable. May be a better way to do this

        i = 0
        for j in numpy.nditer(is_outlier):
            if j:
                peak_list[i].isoutlier = True
            i = i + 1

    # DK: the average RT and average mass spec for the compo peak is now calculated from peaks that are NOT outliers.
    # This should improve the ability to order peaks and figure out badly aligned entries

    for peak in peak_list:
        if peak is not None and peak.check_outlier() == False:
            ms = peak.get_mass_spectrum()
            spec = numpy.array(ms.mass_spec, dtype='d')
            if first:
                avg_spec = numpy.zeros(len(ms.mass_spec), dtype='d')
                mass_list = ms.mass_list
                first = False
            # scale all intensities to [0,100]
            max_spec = max(spec) / 100.0
            if max_spec > 0:
                spec = spec / max_spec
            else:
                spec = spec * 0
            avg_rt += peak.get_rt()
            avg_spec += spec
            count += 1
    if count > 0:
        avg_rt = avg_rt / count
        if minutes == True:
            avg_rt = avg_rt / 60.0
        avg_spec = avg_spec / count
        avg_spec = avg_spec.tolist()  # list more compact than ndarray
        new_ms = MassSpectrum(mass_list, avg_spec)
        return Peak(avg_rt, new_ms, minutes)
    else:
        return None