Beispiel #1
0
def fragmentgains(fragments,
                  gains=[],
                  filterIn={
                      'H2O': ['b'],
                      'CO': ['b', 'c', 'break']
                  },
                  filterOut={}):
    """Apply specified neutral gains to fragments.
        fragments: (list) list of sequence fragments
        gains: (list) list of neutral gains
        filterIn: (dic) allowed series for specified gains
        filterOut: (dic) not allowed series for specified gains
    """

    # generate fragments
    buff = []
    for frag in fragments:

        CHECK_FORCE_QUIT()

        # is parent cyclic?
        cyclicParent = False
        for item in frag.history:
            if 'break' in item:
                cyclicParent = True
                break

        # apply gains
        for gain in gains:

            # check neutral losses
            if gain in frag.fragmentLosses:
                continue

            # check fragment type filters
            if (gain in filterOut and frag.fragmentSerie in filterOut[gain]) \
                or (gain in filterIn and not frag.fragmentSerie in filterIn[gain]):
                continue

            # check break (cyclic parent)
            if gain in filterIn and 'break' in filterIn[
                    gain] and not cyclicParent:
                continue

            # make fragment
            newFrag = frag.duplicate()
            newFrag.fragmentGains.append(gain)

            # check fragment composition
            if not newFrag.isvalid():
                continue

            # store fragment
            buff.append(newFrag)

    return buff
Beispiel #2
0
def savgol(signal, window, cycles=1, order=3):
    """Smooth signal by Savitzky-Golay filter. New array is returned.
        signal (numpy array) - signal data points
        window (float) - m/z window size for smoothing
        cycles (int) - number of repeating cycles
        order (int) - order of polynom used
    """

    # approximate number of points within window
    window = int(window * len(signal) / (signal[-1][0] - signal[0][0]))
    if window <= order:
        return signal.copy()

    # unpack axes
    xAxis, yAxis = numpy.hsplit(signal, 2)
    yAxis = yAxis.flatten()

    # coeficients
    orderRange = range(order + 1)
    halfWindow = (window - 1) // 2
    b = numpy.mat([[k**i for i in orderRange]
                   for k in range(-halfWindow, halfWindow + 1)])
    m = numpy.linalg.pinv(b).A[0]
    window = len(m)
    halfWindow = (window - 1) // 2

    # precompute the offset values for better performance
    offsets = range(-halfWindow, halfWindow + 1)
    offsetData = zip(offsets, m)

    # smooth the data
    while cycles:
        smoothData = list()

        yAxis = numpy.concatenate((numpy.zeros(halfWindow) + yAxis[0], yAxis,
                                   numpy.zeros(halfWindow) + yAxis[-1]))
        for i in range(halfWindow, len(yAxis) - halfWindow):

            CHECK_FORCE_QUIT()

            value = 0.0
            for offset, weight in offsetData:
                value += weight * yAxis[i + offset]
            smoothData.append(value)

        yAxis = smoothData
        cycles -= 1

    # return smoothed data
    yAxis = numpy.array(yAxis)
    yAxis.shape = (-1, 1)
    data = numpy.concatenate((xAxis, yAxis), axis=1)

    return data.copy()
Beispiel #3
0
def deconvolute(peaklist, massType=0):
    """Recalculate peaklist to singly charged.
        peaklist (mspy.peaklist) - peak list to deconvolute
        massType (0 or 1) - mass type used for m/z re-calculation, 0 = monoisotopic, 1 = average
    """
    
    # recalculate peaks
    buff = []
    for peak in copy.deepcopy(peaklist):
        
        CHECK_FORCE_QUIT()
        
        # uncharged peak
        if not peak.charge:
            continue
        
        # charge is correct
        elif abs(peak.charge) == 1:
            buff.append(peak)
        
        # recalculate peak
        else:
            
            # set fwhm
            if peak.fwhm:
                newFwhm = abs(peak.fwhm*peak.charge)
                peak.setfwhm(newFwhm)
            
            # set m/z and charge
            if peak.charge < 0:
                newMz = mod_basics.mz(mass=peak.mz, charge=-1, currentCharge=peak.charge, massType=massType)
                peak.setmz(newMz)
                peak.setcharge(-1)
            else:
                newMz = mod_basics.mz(mass=peak.mz, charge=1, currentCharge=peak.charge, massType=massType)
                peak.setmz(newMz)
                peak.setcharge(1)
            
            # store peak
            buff.append(peak)
    
    # remove baseline
    if buff:
        for peak in buff:
            peak.setsn(None)
            peak.setai(peak.intensity)
            peak.setbase(0.)
    
    # update peaklist
    peaklist = obj_peaklist.peaklist(buff)
    
    return peaklist
Beispiel #4
0
    def _makeModels(self, raster, reset=True):
        """Calculate pattern for every model."""

        models = []
        exchanged = []

        # get raster
        rasterMin = raster[0] - self.fwhm
        rasterMax = raster[-1] + self.fwhm

        for x in sorted(self.models.keys()):

            CHECK_FORCE_QUIT()

            # get compound
            compound = self.models[x][0]

            # check if mz is within raster
            mz = compound.mz(self.charge)
            if mz[0] > rasterMax or mz[1] < rasterMin:
                continue

            # calculate isotopic pattern
            pattern = self.models[x][1]
            if reset or pattern == []:
                pattern = compound.pattern(fwhm=self.fwhm,
                                           charge=self.charge,
                                           real=False)
                self.models[x][1] = pattern

            # calculate model profile
            profile = mod_pattern.profile(pattern,
                                          fwhm=self.fwhm,
                                          raster=raster,
                                          model=self.peakShape)
            model = profile[:, 1].flatten()

            # check model profile
            if model.any():
                models.append(model)
                exchanged.append(x)

        # make models matrix
        models = numpy.array(models)

        return models, exchanged
Beispiel #5
0
def movaver(signal, window, cycles=1, style='flat'):
    """Smooth signal by moving average filter. New array is returned.
        signal (numpy array) - signal data points
        window (float) - m/z window size for smoothing
        cycles (int) - number of repeating cycles
    """

    # approximate number of points within window
    window = int(window * len(signal) / (signal[-1][0] - signal[0][0]))
    window = min(window, len(signal))
    if window < 3:
        return signal.copy()
    if not window % 2:
        window -= 1

    # unpack mz and intensity
    xAxis, yAxis = numpy.hsplit(signal, 2)
    xAxis = xAxis.flatten()
    yAxis = yAxis.flatten()

    # smooth the points
    while cycles:

        CHECK_FORCE_QUIT()

        if style == 'flat':
            w = numpy.ones(window, 'f')
        elif style == 'gaussian':
            r = numpy.array([(i - (window - 1) / 2.) for i in range(window)])
            w = numpy.exp(-(r**2 / (window / 4.)**2))
        else:
            w = eval('numpy.' + style + '(window)')

        s = numpy.r_[yAxis[window - 1:0:-1], yAxis, yAxis[-2:-window - 1:-1]]
        y = numpy.convolve(w / w.sum(), s, mode='same')
        yAxis = y[window - 1:-window + 1]
        cycles -= 1

    # return smoothed data
    xAxis.shape = (-1, 1)
    yAxis.shape = (-1, 1)
    data = numpy.concatenate((xAxis, yAxis), axis=1)

    return data.copy()
Beispiel #6
0
    def _leastSquare(self, data, models, iterLimit=None, chiLimit=1e-3):
        """Least-square fitting. Adapted from the original code by Konrad Hinsen."""

        normf = 100. / numpy.max(data)
        data *= normf

        params = [50.] * len(models)
        id = numpy.identity(len(params))
        chisq, alpha = self._chiSquare(data, models, params)
        l = 0.001

        niter = 0
        while True:

            CHECK_FORCE_QUIT()

            niter += 1
            delta = solveLinEq(alpha + l * numpy.diagonal(alpha) * id,
                               -0.5 * numpy.array(chisq[1]))
            next_params = map(lambda a, b: a + b, params, delta)

            for x in range(len(next_params)):
                if next_params[x] < 0.:
                    next_params[x] = 0.

            next_chisq, next_alpha = self._chiSquare(data, models, next_params)
            if next_chisq[0] > chisq[0]:
                l = 5. * l
            elif chisq[0] - next_chisq[0] < chiLimit:
                break
            else:
                l = 0.5 * l
                params = next_params
                chisq = next_chisq
                alpha = next_alpha

            if iterLimit and niter == iterLimit:
                break

        next_params /= normf

        return next_params
Beispiel #7
0
    def _initModels(self, scales):
        """Init theoretical envelope models."""

        self.models = {}

        # generate possible models to fit
        for x in scales:

            CHECK_FORCE_QUIT()

            # make compound
            item = "%s(%s)%d(%s)%d" % (self.formula, self._lossFormula, x,
                                       self._gainFormula, x)
            compound = obj_compound.compound(item)

            # check compound
            if not compound.isvalid(charge=self.charge):
                continue

            # append model [0-compound, 1-pattern, 2-abs abundance, 3-rel abundance]
            self.models[x] = [compound, [], 0.0, 0.0]
Beispiel #8
0
    def variations(self, maxMods=1, position=True, enzyme=None):
        """Calculate all possible combinations of variable modifications.
            maxMods: (int) maximum modifications allowed per one residue
            position: (bool) retain modifications positions (much slower)
            enzyme: (str) enzyme name to ensure that modifications are not presented in cleavage site
        """

        variablePeptides = []

        # get modifications
        fixedMods = []
        variableMods = []
        for mod in self.modifications:

            # fixed modifications
            if mod[2] == 'f':
                fixedMods.append(mod)

            # positioned modifications
            elif type(mod[1]) == int:
                variableMods.append(mod)

            # terminal modifications
            elif mod[1] in ('nTerm', 'cTerm'):
                variableMods.append(mod)

            # retain position of global modifications
            elif position:
                for x, symbol in enumerate(self.chain):
                    if symbol == mod[1]:
                        variableMods.append([mod[0], x, 'v'])
            else:
                variableMods += [mod] * self.chain.count(mod[1])

        # make combinations of variable modifications
        variableMods = self._countUniqueModifications(variableMods)
        combinations = []
        for x in self._uniqueCombinations(variableMods):
            combinations.append(x)

        # disable positions occupied by fixed modifications
        occupied = []
        for mod in fixedMods:
            count = max(1, self.chain.count(str(mod[1])))
            occupied += [mod[1]] * count

        # disable modifications at cleavage sites
        if enzyme:
            enz = blocks.enzymes[enzyme]
            if not enz.modsBefore and self.itemAfter:
                occupied += [len(self) - 1] * maxMods
            if not enz.modsAfter and self.itemBefore:
                occupied += [0] * maxMods

        CHECK_FORCE_QUIT()

        # filter modifications
        buff = []
        for combination in combinations:
            positions = occupied[:]
            for mod in combination:
                positions += [mod[0][1]] * mod[1]
            if self._checkModifications(positions, self.chain, maxMods):
                buff.append(combination)
        combinations = buff

        CHECK_FORCE_QUIT()

        # format modifications and filter same
        buff = []
        for combination in combinations:
            mods = []
            for mod in combination:
                if position:
                    mods += [[mod[0][0], mod[0][1], 'f']] * mod[1]
                elif mod[0][1] in ('nTerm', 'cTerm'):
                    mods += [[mod[0][0], mod[0][1], 'f']]
                else:
                    mods += [[mod[0][0], '', 'f']] * mod[1]
            mods.sort()
            if not mods in buff:
                buff.append(mods)
        combinations = buff

        # make new peptides
        for combination in combinations:

            CHECK_FORCE_QUIT()

            variablePeptide = self.duplicate()
            variablePeptide.modifications[:] = fixedMods + combination

            # check composition
            if variablePeptide.isvalid():
                variablePeptides.append(variablePeptide)

        return variablePeptides
Beispiel #9
0
    def search(self,
               mass,
               charge,
               tolerance,
               enzyme=None,
               semiSpecific=True,
               tolUnits='Da',
               massType=0,
               maxMods=1,
               position=False):
        """Search sequence for specified ion.
            mass: (float) m/z value to search for
            charge: (int) charge of the m/z value
            tolerance: (float) mass tolerance
            tolUnits: ('Da', 'ppm') tolerance units
            enzyme: (str) enzyme used for peptides endings, if None H/OH is used
            semiSpecific: (bool) semispecific cleavage is checked (enzyme must be set)
            massType: (0 or 1) mass type of the mass value, 0 = monoisotopic, 1 = average
            maxMods: (int) maximum number of modifications at one residue
            position: (bool) retain position for variable modifications (much slower)
        """

        # check cyclic peptides
        if self.cyclic:
            raise TypeError(
                'Search function is not supported for cyclic peptides!')

        matches = []

        # set terminal modifications
        if enzyme:
            enzyme = blocks.enzymes[enzyme]
            expression = re.compile(enzyme.expression + '$')
            nTerm = enzyme.nTermFormula
            cTerm = enzyme.cTermFormula
        else:
            semiSpecific = False
            nTerm = 'H'
            cTerm = 'OH'

        # set mass limits
        if tolUnits == 'ppm':
            lowMass = mass - (tolerance * mass / 1000000)
            highMass = mass + (tolerance * mass / 1000000)
        else:
            lowMass = mass - tolerance
            highMass = mass + tolerance

        # search sequence
        length = len(self)
        for i in range(length):
            for j in range(i + 1, length + 1):

                CHECK_FORCE_QUIT()

                # get peptide
                peptide = self[i:j]
                if i != 0:
                    peptide.nTerminalFormula = nTerm
                if j != length:
                    peptide.cTerminalFormula = cTerm

                # check enzyme specifity
                if semiSpecific and peptide.itemBefore and peptide.itemAfter:
                    if not expression.search(
                            peptide.itemBefore +
                            peptide.chain[0]) and not expression.search(
                                peptide.chain[-1] + peptide.itemAfter):
                        continue

                # variate modifications
                variants = peptide.variations(maxMods=maxMods,
                                              position=position)

                # check mass limits
                peptides = []
                masses = []
                for pep in variants:
                    pepMZ = pep.mz(charge)[massType]
                    peptides.append((pepMZ, pep))
                    masses.append(pepMZ)
                if max(masses) < lowMass:
                    continue
                elif min(masses) > highMass:
                    break

                # search for matches
                for pep in peptides:
                    if lowMass <= pep[0] <= highMass:
                        matches.append(pep[1])

        return matches
Beispiel #10
0
def pattern(compound,
            fwhm=0.1,
            threshold=0.01,
            charge=0,
            agentFormula='H',
            agentCharge=1,
            real=True,
            model='gaussian'):
    """Calculate isotopic pattern for given compound.
        compound (str or mspy.compound) - compound
        fwhm (float) - gaussian peak width
        threshold (float) - relative intensity threshold for isotopes (in %/100)
        charge (int) - charge to be calculated
        agentFormula (str or mspy.compound) - charging agent formula
        agentCharge (int) - charging agent unit charge
        real (bool) - get real peaks from calculated profile
        model (gaussian, lorentzian, gausslorentzian) - peak shape function
    """

    # check compound
    if not isinstance(compound, obj_compound.compound):
        compound = obj_compound.compound(compound)

    # check agent formula
    if agentFormula != 'e' and not isinstance(agentFormula,
                                              obj_compound.compound):
        agentFormula = obj_compound.compound(agentFormula)

    # add charging agent to compound
    if charge and agentFormula != 'e':
        formula = compound.formula()
        for atom, count in agentFormula.composition().items():
            formula += '%s%d' % (atom, count * (charge / agentCharge))
        compound = obj_compound.compound(formula)

    # get composition and check for negative atom counts
    composition = compound.composition()
    for atom in composition:
        if composition[atom] < 0:
            raise ValueError, 'Pattern cannot be calculated for this formula! --> ' + compound.formula(
            )

    # set internal thresholds
    internalThreshold = threshold / 100.
    groupingWindow = fwhm / 4.

    # calculate pattern
    finalPattern = []
    for atom in composition:

        # get isotopic profile for current atom or specified isotope only
        atomCount = composition[atom]
        atomPattern = []
        match = mod_basics.ELEMENT_PATTERN.match(atom)
        symbol, massNumber, tmp = match.groups()
        if massNumber:
            isotope = blocks.elements[symbol].isotopes[int(massNumber)]
            atomPattern.append([isotope[0], 1.])  # [mass, abundance]
        else:
            for massNumber, isotope in blocks.elements[atom].isotopes.items():
                if isotope[1] > 0.:
                    atomPattern.append(list(isotope))  # [mass, abundance]

        # add atoms
        for i in range(atomCount):

            CHECK_FORCE_QUIT()

            # if pattern is empty (first atom) add current atom pattern
            if len(finalPattern) == 0:
                finalPattern = _normalize(atomPattern)
                continue

            # add atom to each peak of final pattern
            currentPattern = []
            for patternIsotope in finalPattern:

                # skip peak under relevant abundance threshold
                if patternIsotope[1] < internalThreshold:
                    continue

                # add each isotope of current atom to peak
                for atomIsotope in atomPattern:
                    mass = patternIsotope[0] + atomIsotope[0]
                    abundance = patternIsotope[1] * atomIsotope[1]
                    currentPattern.append([mass, abundance])

            # group isotopes and normalize pattern
            finalPattern = _consolidate(currentPattern, groupingWindow)
            finalPattern = _normalize(finalPattern)

    # correct charge
    if charge:
        for i in range(len(finalPattern)):
            finalPattern[i][0] = (
                finalPattern[i][0] -
                mod_basics.ELECTRON_MASS * charge) / abs(charge)

    # group isotopes
    finalPattern = _consolidate(finalPattern, groupingWindow)

    # get real peaks from profile
    if real:
        prof = profile(finalPattern, fwhm=fwhm, points=100, model=model)
        finalPattern = []
        for isotope in mod_signal.maxima(prof):
            finalPattern.append(isotope)
            centroid = mod_signal.centroid(prof, isotope[0], isotope[1] * 0.99)
            if abs(centroid - isotope[0]) < fwhm / 100.:
                finalPattern[-1][0] = centroid

    # normalize pattern
    finalPattern = _normalize(finalPattern)

    # discard peaks below threshold
    filteredPeaks = []
    for peak in finalPattern:
        if peak[1] >= threshold:
            filteredPeaks.append(list(peak))
    finalPattern = filteredPeaks

    return finalPattern
Beispiel #11
0
def formulator(mz,
               charge=0,
               tolerance=1.,
               units='ppm',
               composition={},
               agentFormula='H',
               agentCharge=1,
               limit=1000):
    """Generate formulae for given mass, tolerance and composition limits.
        mz (float) - searched m/z value
        charge (int) - current charge
        tolerance (float) - mass tolerance
        units (ppm or Da) - mass tolerance units
        composition (dict of 'element':[min count, max count]) - composition limits
        agentFormula (str) - charging agent formula
        agentCharge (int) - charging agent unit charge
        limit (int) - maximum formulae allowed to be calculated
    """

    # get neutral mass
    if charge != 0 and agentFormula:
        mass = mod_basics.mz(mz,
                             0,
                             currentCharge=charge,
                             agentFormula=agentFormula,
                             agentCharge=agentCharge)
    else:
        mass = mz

    # check neutral mass
    if mass <= 0:
        return []

    # get mass limits
    if units == 'ppm':
        loMass = mass - (mass / 1e6) * tolerance
        hiMass = mass + (mass / 1e6) * tolerance
    elif charge != 0:
        loMass = mass - abs(charge) * tolerance
        hiMass = mass + abs(charge) * tolerance
    else:
        loMass = mass - tolerance
        hiMass = mass + tolerance

    # sort elements by masses to speed up processing
    buff = []
    for el in composition:
        elMass = obj_compound.compound(el).mass(0)
        buff.append([elMass, el])
    buff.sort(reverse=True)

    # compile elements and counts
    elementMasses = []
    elements = []
    minComposition = []
    maxComposition = []
    for el in buff:
        elementMasses.append(el[0])
        elements.append(el[1])
        minComposition.append(composition[el[1]][0])
        maxComposition.append(composition[el[1]][1])

    # check max composition
    for i in range(len(maxComposition)):
        maxComposition[i] = min(maxComposition[i],
                                int(hiMass / elementMasses[i]))

    # generate compositions
    formulae = []
    comps = _compositions(minComposition, maxComposition, elementMasses,
                          loMass, hiMass, limit)
    for comp in comps:

        CHECK_FORCE_QUIT()

        formula = ''
        for i in range(len(comp)):
            formula += '%s%d' % (elements[i], comp[i])

        formulae.append(formula)

    return formulae
Beispiel #12
0
def digest(sequence, enzyme, miscleavage=0, allowMods=False, strict=True):
    """Digest seuence by specified enzyme.
        sequence: (sequence) mspy sequence object
        enzyme: (str) enzyme name - must be defined in mspy.enzymes
        miscleavage: (int) number of allowed misscleavages
        allowMods: (bool) do not care about modifications in cleavage site
        strict: (bool) do not cleave even if variable modification is in cleavage site
    """

    # check sequence object
    if not isinstance(sequence, obj_sequence.sequence):
        raise TypeError, "Cannot digest non-sequence object!"

    # check cyclic peptides
    if sequence.chainType != 'aminoacids':
        raise TypeError, 'Digest function is not supported for non-amino sequences!'

    # check cyclic peptides
    if sequence.cyclic:
        raise TypeError, 'Digest function is not supported for cyclic peptides!'

    # check sequence
    if not sequence.chain:
        return []

    # get enzyme
    if enzyme in blocks.enzymes:
        enzyme = blocks.enzymes[enzyme]
        expression = re.compile(enzyme.expression + '$')
    else:
        raise KeyError, 'Unknown enzyme! -> ' + enzyme

    # get digest indices
    slices = []  # from | to | miscl
    lastIndex = 0
    peptide = ''
    for x, aa in enumerate(sequence):

        # check expression
        peptide += aa
        if expression.search(peptide):

            # skip not allowed modifications
            if not allowMods and sequence.ismodified(
                    x - 1, strict) and not enzyme.modsBefore:
                continue
            elif not allowMods and sequence.ismodified(
                    x, strict) and not enzyme.modsAfter:
                continue
            else:
                slices.append((lastIndex, x, 0))
                lastIndex = x

    # add last peptide
    slices.append((lastIndex, x + 1, 0))

    # add indices for partials
    indices = len(slices)
    for x in range(indices):
        for y in range(1, miscleavage + 1):
            if x + y < indices:
                slices.append((slices[x][0], slices[x + y][1], y))
            else:
                break

    # get peptides slices from protein
    peptides = []
    for indices in slices:

        CHECK_FORCE_QUIT()

        # get peptide
        peptide = sequence[indices[0]:indices[1]]
        peptide.miscleavages = indices[2]

        # add terminal groups
        if indices[0] != 0:
            peptide.nTermFormula = enzyme.nTermFormula
        if indices[1] != len(sequence):
            peptide.cTermFormula = enzyme.cTermFormula

        peptides.append(peptide)

    return peptides
Beispiel #13
0
def fragmentlosses(fragments,
                   losses=[],
                   defined=False,
                   limit=1,
                   filterIn={},
                   filterOut={}):
    """Apply specified neutral losses to fragments.
        fragments: (list) list of sequence fragments
        losses: (list) list of neutral losses
        defined: (bool) use monomer-defined neutral losses
        limit: (int) max length of loss combination
        filterIn: (dic) allowed series for specified losses
        filterOut: (dic) not allowed series for specified losses
    """

    # make losses combinations
    combinations = []
    for x in range(1, min(len(losses), limit) + 1):
        for c in itertools.combinations(losses, x):
            combinations.append(list(c))

    # generate fragments
    buff = []
    for frag in fragments:

        CHECK_FORCE_QUIT()

        # get monomer-defined losses to check specifity
        definedLosses = []
        for monomer in frag:
            definedLosses += blocks.monomers[monomer].losses

        # append new combinations with monomer-defined losses
        lossesToApply = combinations[:]
        if defined:
            for monomer in frag:
                for item in ([[]] + lossesToApply[:]):
                    for loss in blocks.monomers[monomer].losses:
                        newItem = item + [loss]
                        newItem.sort()

                        if not [loss] in lossesToApply:
                            lossesToApply.append([loss])
                        if len(newItem
                               ) <= limit and not newItem in lossesToApply:
                            lossesToApply.append(newItem)

        # make fragment
        for combination in lossesToApply:
            newFrag = frag.duplicate()
            skip = False

            # apply losses
            for loss in combination:
                newFrag.fragmentLosses.append(loss)

                # check neutral gains
                if loss in frag.fragmentGains:
                    skip = True
                    break

                # check fragment type filter
                if (loss in filterOut and frag.fragmentSerie in filterOut[loss]) \
                    or (loss in filterIn and not frag.fragmentSerie in filterIn[loss]):
                    skip = True
                    break

                # check fragment composition
                if not newFrag.isvalid():
                    skip = True
                    break

                # filter non-specific losses
                if not loss in definedLosses:
                    newFrag.fragmentFiltered = True

            # store fragment
            if not skip:
                buff.append(newFrag)

    return buff
Beispiel #14
0
def fragmentserie(sequence, serie, cyclicParent=False):
    """Generate list of neutral peptide fragments from given peptide.
        sequence: (sequence) mspy sequence object
        serie: (str) fragment serie name - must be defined in mspy.fragments
    """

    # check sequence object
    if not isinstance(sequence, obj_sequence.sequence):
        raise TypeError, "Cannot fragment non-sequence object!"

    # check cyclic peptides
    if sequence.cyclic:
        raise TypeError, 'Direct fragmentation of cyclic peptides is not supported!'

    frags = []
    length = len(sequence)

    # get serie definition
    serie = blocks.fragments[serie]

    # molecular ion
    if serie.terminus == 'M':
        frag = sequence[:]
        frag.fragmentSerie = serie.name
        frags.append(frag)

    # N-terminal fragments
    elif serie.terminus == 'N':
        for x in range(length):
            frag = sequence[:x + 1]
            frag.fragmentSerie = serie.name
            frag.fragmentIndex = (x + 1)
            frag.cTermFormula = serie.cTermFormula
            frags.append(frag)

            CHECK_FORCE_QUIT()

    # C-terminal fragments
    elif serie.terminus == 'C':
        for x in range(length):
            frag = sequence[length - (x + 1):]
            frag.fragmentSerie = serie.name
            frag.fragmentIndex = (x + 1)
            frag.nTermFormula = serie.nTermFormula
            frags.append(frag)

            CHECK_FORCE_QUIT()

    # singlet fragments
    elif serie.terminus == 'S':
        for x in range(length):
            frag = sequence[x:x + 1]
            frag.fragmentSerie = serie.name
            frag.fragmentIndex = (x + 1)
            frag.nTermFormula = serie.nTermFormula
            frag.cTermFormula = serie.cTermFormula
            frags.append(frag)

            CHECK_FORCE_QUIT()

    # internal fragments
    elif serie.terminus == 'I':
        for x in range(1, length - 1):
            for y in range(2, length - x):
                frag = sequence[x:x + y]
                frag.fragmentSerie = serie.name
                frag.nTermFormula = serie.nTermFormula
                frag.cTermFormula = serie.cTermFormula
                frags.append(frag)

                CHECK_FORCE_QUIT()

    # correct termini for cyclic peptides
    if cyclicParent:
        for frag in frags:
            if serie.terminus == 'M':
                frag.nTermFormula = ''
                frag.cTermFormula = ''
            elif serie.terminus == 'N':
                frag.nTermFormula = 'H'
            elif serie.terminus == 'C':
                frag.cTermFormula = 'H-1'

    # remove nonsense terminal fragments
    if serie.terminus == 'N':
        if frags and serie.nTermFilter:
            del frags[0]
        if frags and serie.cTermFilter:
            del frags[-1]
    elif serie.terminus == 'C':
        if frags and serie.nTermFilter:
            del frags[-1]
        if frags and serie.cTermFilter:
            del frags[0]
    elif serie.terminus == 'S':
        if frags and serie.nTermFilter:
            del frags[0]
        if frags and serie.cTermFilter:
            del frags[-1]

    return frags
Beispiel #15
0
def deisotope(peaklist, maxCharge=1, mzTolerance=0.15, intTolerance=0.5, isotopeShift=0.0):
    """Isotopes determination and calculation of peaks charge.
        peaklist (mspy.peaklist) - peaklist to process
        maxCharge (float) - max charge to be searched
        mzTolerance (float) - absolute m/z tolerance for isotopes distance
        intTolerance (float) - relative intensity tolerance for isotopes and model (in %/100)
        isotopeShift (float) - isotope distance correction (neutral mass) (for HDX etc.)
    """
    
    # check peaklist
    if not isinstance(peaklist, obj_peaklist.peaklist):
        raise TypeError, "Peak list must be mspy.peaklist object!"
    
    # clear previous results
    for peak in peaklist:
        peak.setcharge(None)
        peak.setisotope(None)
    
    # get charges
    if maxCharge < 0:
        charges = [-x for x in range(1, abs(maxCharge)+1)]
    else:
        charges = [x for x in range(1, maxCharge+1)]
    charges.reverse()
    
    # walk in a peaklist
    maxIndex = len(peaklist)
    for x, parent in enumerate(peaklist):
        
        CHECK_FORCE_QUIT()
        
        # skip assigned peaks
        if parent.isotope != None:
            continue
        
        # try all charge states
        for z in charges:
            cluster = [parent]
            
            # search for next isotope within m/z tolerance
            difference = (ISOTOPE_DISTANCE + isotopeShift)/abs(z)
            y = 1
            while x+y < maxIndex:
                mzError = (peaklist[x+y].mz - cluster[-1].mz - difference)
                if abs(mzError) <= mzTolerance:
                    cluster.append(peaklist[x+y])
                elif mzError > mzTolerance:
                    break
                y += 1
            
            # no isotope found
            if len(cluster) == 1:
                continue
            
            # get theoretical isotopic pattern
            mass = min(15000, int( mod_basics.mz( parent.mz, 0, z))) / 200
            pattern = patternLookupTable[mass]
            
            # check minimal number of isotopes in the cluster
            limit = 0
            for p in pattern:
                if p >= 0.33:
                    limit += 1
            if len(cluster) < limit and abs(z) > 1:
                continue
            
            # check peak intensities in cluster
            valid = True
            isotope = 1
            limit = min(len(pattern), len(cluster))
            while (isotope < limit):
                
                # calc theoretical intensity from previous peak and current error
                intTheoretical = (cluster[isotope-1].intensity / pattern[isotope-1]) * pattern[isotope]
                intError = cluster[isotope].intensity - intTheoretical
                
                # intensity in tolerance
                if abs(intError) <= (intTheoretical * intTolerance):
                    cluster[isotope].setisotope(isotope)
                    cluster[isotope].setcharge(z)
                
                # intensity is higher (overlap)
                elif intError > 0:
                    pass
                
                # intensity is lower and first isotope is checked (nonsense)
                elif (intError < 0 and isotope == 1):
                    valid = False
                    break
                
                # try next peak
                isotope += 1
            
            # cluster is OK, set parent peak and skip other charges
            if valid:
                parent.setisotope(0)
                parent.setcharge(z)
                break
Beispiel #16
0
def labelscan(signal, minX=None, maxX=None, pickingHeight=0.75, absThreshold=0., relThreshold=0., snThreshold=0., baseline=None):
    """Return centroided peaklist for given data points.
        signal (numpy array) - signal data points
        minX (float) - x-range start
        maxX (float) - x-range end
        pickingHeight (float) - centroiding height
        absThreshold (float) - absolute intensity threshold
        relThreshold (float) - relative intensity threshold
        snThreshold (float) - signal to noise threshold
        baseline (numpy array) - signal baseline
    """
    
    # check signal type
    if not isinstance(signal, numpy.ndarray):
        raise TypeError, "Signal must be NumPy array!"
    
   # check baseline type
    if baseline != None and not isinstance(baseline, numpy.ndarray):
        raise TypeError, "Baseline must be NumPy array!"
    
    # crop data
    if minX != None and maxX != None:
        i1 = mod_signal.locate(signal, minX)
        i2 = mod_signal.locate(signal, maxX)
        signal = signal[i1:i2]
    
    # check data points
    if len(signal) == 0:
        return obj_peaklist.peaklist([])
    
    # get local maxima
    buff = []
    basepeak = mod_signal.basepeak(signal)
    threshold = max(signal[basepeak][1] * relThreshold, absThreshold)
    for peak in mod_signal.maxima(signal):
        if peak[1] >= threshold:
            buff.append( [peak[0], peak[1], 0., None, None] ) # mz, ai, base, sn, fwhm
    
    CHECK_FORCE_QUIT()
    
    # get peaks baseline and s/n
    basepeak = 0.0
    if baseline != None:
        for peak in buff:
            idx = mod_signal.locate(baseline, peak[0])
            if (idx > 0) and (idx < len(baseline)):
                p1 = baseline[idx-1]
                p2 = baseline[idx]
                peak[2] = mod_signal.interpolate( (p1[0], p1[1]), (p2[0], p2[1]), x=peak[0])
                noise = mod_signal.interpolate( (p1[0], p1[2]), (p2[0], p2[2]), x=peak[0])
                intens = peak[1] - peak[2]
                if noise:
                    peak[3] = intens / noise
                if intens > basepeak:
                    basepeak = intens
    
    CHECK_FORCE_QUIT()
    
    # remove peaks bellow threshold
    threshold = max(basepeak * relThreshold, absThreshold)
    candidates = []
    for peak in buff:
        if peak[0] > 0 and (peak[1] - peak[2]) >= threshold and (not peak[3] or peak[3] >= snThreshold):
            candidates.append(peak)
    
    # make centroides
    if pickingHeight < 1.:
        buff = []
        previous = None
        for peak in candidates:
            
            CHECK_FORCE_QUIT()
            
            # calc peak height
            h = ((peak[1]-peak[2]) * pickingHeight) + peak[2]
            
            # get centroid indexes
            idx = mod_signal.locate(signal, peak[0])
            if (idx == 0) or (idx == len(signal)):
                continue
            
            ileft = idx-1
            while (ileft > 0) and (signal[ileft][1] > h):
                ileft -= 1
            
            iright = idx
            while (iright < len(signal)-1) and (signal[iright][1] > h):
                iright += 1
            
            # calculate peak mz
            leftMZ = mod_signal.interpolate(signal[ileft], signal[ileft+1], y=h)
            rightMZ = mod_signal.interpolate(signal[iright-1], signal[iright], y=h)
            peak[0] = (leftMZ + rightMZ)/2.
            
            # get peak intensity
            intens = mod_signal.intensity(signal, peak[0])
            if intens and intens <= peak[1]:
                peak[1] = intens
            else:
                continue
            
            # try to group with previous peak
            if previous != None and leftMZ < previous:
                if peak[1] > buff[-1][1]:
                    buff[-1] = peak
                    previous = rightMZ
            else:
                buff.append(peak)
                previous = rightMZ
        
        # store as candidates
        candidates = buff
    
    CHECK_FORCE_QUIT()
    
    # get peaks baseline and s/n
    basepeak = 0.0
    if baseline != None:
        for peak in candidates:
            idx = mod_signal.locate(baseline, peak[0])
            if (idx > 0) and (idx < len(baseline)):
                p1 = baseline[idx-1]
                p2 = baseline[idx]
                peak[2] = mod_signal.interpolate( (p1[0], p1[1]), (p2[0], p2[1]), x=peak[0])
                noise = mod_signal.interpolate( (p1[0], p1[2]), (p2[0], p2[2]), x=peak[0])
                intens = peak[1] - peak[2]
                if noise:
                    peak[3] = intens / noise
                if intens > basepeak:
                    basepeak = intens
    
    CHECK_FORCE_QUIT()
    
    # remove peaks bellow threshold and calculate fwhm
    threshold = max(basepeak * relThreshold, absThreshold)
    centroides = []
    for peak in candidates:
        if peak[0] > 0 and (peak[1] - peak[2]) >= threshold and (not peak[3] or peak[3] >= snThreshold):
            peak[4] = mod_signal.width(signal, peak[0], (peak[2] + ((peak[1] - peak[2]) * 0.5)))
            centroides.append(obj_peak.peak(mz=peak[0], ai=peak[1], base=peak[2], sn=peak[3], fwhm=peak[4]))
    
    # return peaklist object
    return obj_peaklist.peaklist(centroides)