def fragmentgains(fragments, gains=[], filterIn={ 'H2O': ['b'], 'CO': ['b', 'c', 'break'] }, filterOut={}): """Apply specified neutral gains to fragments. fragments: (list) list of sequence fragments gains: (list) list of neutral gains filterIn: (dic) allowed series for specified gains filterOut: (dic) not allowed series for specified gains """ # generate fragments buff = [] for frag in fragments: CHECK_FORCE_QUIT() # is parent cyclic? cyclicParent = False for item in frag.history: if 'break' in item: cyclicParent = True break # apply gains for gain in gains: # check neutral losses if gain in frag.fragmentLosses: continue # check fragment type filters if (gain in filterOut and frag.fragmentSerie in filterOut[gain]) \ or (gain in filterIn and not frag.fragmentSerie in filterIn[gain]): continue # check break (cyclic parent) if gain in filterIn and 'break' in filterIn[ gain] and not cyclicParent: continue # make fragment newFrag = frag.duplicate() newFrag.fragmentGains.append(gain) # check fragment composition if not newFrag.isvalid(): continue # store fragment buff.append(newFrag) return buff
def savgol(signal, window, cycles=1, order=3): """Smooth signal by Savitzky-Golay filter. New array is returned. signal (numpy array) - signal data points window (float) - m/z window size for smoothing cycles (int) - number of repeating cycles order (int) - order of polynom used """ # approximate number of points within window window = int(window * len(signal) / (signal[-1][0] - signal[0][0])) if window <= order: return signal.copy() # unpack axes xAxis, yAxis = numpy.hsplit(signal, 2) yAxis = yAxis.flatten() # coeficients orderRange = range(order + 1) halfWindow = (window - 1) // 2 b = numpy.mat([[k**i for i in orderRange] for k in range(-halfWindow, halfWindow + 1)]) m = numpy.linalg.pinv(b).A[0] window = len(m) halfWindow = (window - 1) // 2 # precompute the offset values for better performance offsets = range(-halfWindow, halfWindow + 1) offsetData = zip(offsets, m) # smooth the data while cycles: smoothData = list() yAxis = numpy.concatenate((numpy.zeros(halfWindow) + yAxis[0], yAxis, numpy.zeros(halfWindow) + yAxis[-1])) for i in range(halfWindow, len(yAxis) - halfWindow): CHECK_FORCE_QUIT() value = 0.0 for offset, weight in offsetData: value += weight * yAxis[i + offset] smoothData.append(value) yAxis = smoothData cycles -= 1 # return smoothed data yAxis = numpy.array(yAxis) yAxis.shape = (-1, 1) data = numpy.concatenate((xAxis, yAxis), axis=1) return data.copy()
def deconvolute(peaklist, massType=0): """Recalculate peaklist to singly charged. peaklist (mspy.peaklist) - peak list to deconvolute massType (0 or 1) - mass type used for m/z re-calculation, 0 = monoisotopic, 1 = average """ # recalculate peaks buff = [] for peak in copy.deepcopy(peaklist): CHECK_FORCE_QUIT() # uncharged peak if not peak.charge: continue # charge is correct elif abs(peak.charge) == 1: buff.append(peak) # recalculate peak else: # set fwhm if peak.fwhm: newFwhm = abs(peak.fwhm*peak.charge) peak.setfwhm(newFwhm) # set m/z and charge if peak.charge < 0: newMz = mod_basics.mz(mass=peak.mz, charge=-1, currentCharge=peak.charge, massType=massType) peak.setmz(newMz) peak.setcharge(-1) else: newMz = mod_basics.mz(mass=peak.mz, charge=1, currentCharge=peak.charge, massType=massType) peak.setmz(newMz) peak.setcharge(1) # store peak buff.append(peak) # remove baseline if buff: for peak in buff: peak.setsn(None) peak.setai(peak.intensity) peak.setbase(0.) # update peaklist peaklist = obj_peaklist.peaklist(buff) return peaklist
def _makeModels(self, raster, reset=True): """Calculate pattern for every model.""" models = [] exchanged = [] # get raster rasterMin = raster[0] - self.fwhm rasterMax = raster[-1] + self.fwhm for x in sorted(self.models.keys()): CHECK_FORCE_QUIT() # get compound compound = self.models[x][0] # check if mz is within raster mz = compound.mz(self.charge) if mz[0] > rasterMax or mz[1] < rasterMin: continue # calculate isotopic pattern pattern = self.models[x][1] if reset or pattern == []: pattern = compound.pattern(fwhm=self.fwhm, charge=self.charge, real=False) self.models[x][1] = pattern # calculate model profile profile = mod_pattern.profile(pattern, fwhm=self.fwhm, raster=raster, model=self.peakShape) model = profile[:, 1].flatten() # check model profile if model.any(): models.append(model) exchanged.append(x) # make models matrix models = numpy.array(models) return models, exchanged
def movaver(signal, window, cycles=1, style='flat'): """Smooth signal by moving average filter. New array is returned. signal (numpy array) - signal data points window (float) - m/z window size for smoothing cycles (int) - number of repeating cycles """ # approximate number of points within window window = int(window * len(signal) / (signal[-1][0] - signal[0][0])) window = min(window, len(signal)) if window < 3: return signal.copy() if not window % 2: window -= 1 # unpack mz and intensity xAxis, yAxis = numpy.hsplit(signal, 2) xAxis = xAxis.flatten() yAxis = yAxis.flatten() # smooth the points while cycles: CHECK_FORCE_QUIT() if style == 'flat': w = numpy.ones(window, 'f') elif style == 'gaussian': r = numpy.array([(i - (window - 1) / 2.) for i in range(window)]) w = numpy.exp(-(r**2 / (window / 4.)**2)) else: w = eval('numpy.' + style + '(window)') s = numpy.r_[yAxis[window - 1:0:-1], yAxis, yAxis[-2:-window - 1:-1]] y = numpy.convolve(w / w.sum(), s, mode='same') yAxis = y[window - 1:-window + 1] cycles -= 1 # return smoothed data xAxis.shape = (-1, 1) yAxis.shape = (-1, 1) data = numpy.concatenate((xAxis, yAxis), axis=1) return data.copy()
def _leastSquare(self, data, models, iterLimit=None, chiLimit=1e-3): """Least-square fitting. Adapted from the original code by Konrad Hinsen.""" normf = 100. / numpy.max(data) data *= normf params = [50.] * len(models) id = numpy.identity(len(params)) chisq, alpha = self._chiSquare(data, models, params) l = 0.001 niter = 0 while True: CHECK_FORCE_QUIT() niter += 1 delta = solveLinEq(alpha + l * numpy.diagonal(alpha) * id, -0.5 * numpy.array(chisq[1])) next_params = map(lambda a, b: a + b, params, delta) for x in range(len(next_params)): if next_params[x] < 0.: next_params[x] = 0. next_chisq, next_alpha = self._chiSquare(data, models, next_params) if next_chisq[0] > chisq[0]: l = 5. * l elif chisq[0] - next_chisq[0] < chiLimit: break else: l = 0.5 * l params = next_params chisq = next_chisq alpha = next_alpha if iterLimit and niter == iterLimit: break next_params /= normf return next_params
def _initModels(self, scales): """Init theoretical envelope models.""" self.models = {} # generate possible models to fit for x in scales: CHECK_FORCE_QUIT() # make compound item = "%s(%s)%d(%s)%d" % (self.formula, self._lossFormula, x, self._gainFormula, x) compound = obj_compound.compound(item) # check compound if not compound.isvalid(charge=self.charge): continue # append model [0-compound, 1-pattern, 2-abs abundance, 3-rel abundance] self.models[x] = [compound, [], 0.0, 0.0]
def variations(self, maxMods=1, position=True, enzyme=None): """Calculate all possible combinations of variable modifications. maxMods: (int) maximum modifications allowed per one residue position: (bool) retain modifications positions (much slower) enzyme: (str) enzyme name to ensure that modifications are not presented in cleavage site """ variablePeptides = [] # get modifications fixedMods = [] variableMods = [] for mod in self.modifications: # fixed modifications if mod[2] == 'f': fixedMods.append(mod) # positioned modifications elif type(mod[1]) == int: variableMods.append(mod) # terminal modifications elif mod[1] in ('nTerm', 'cTerm'): variableMods.append(mod) # retain position of global modifications elif position: for x, symbol in enumerate(self.chain): if symbol == mod[1]: variableMods.append([mod[0], x, 'v']) else: variableMods += [mod] * self.chain.count(mod[1]) # make combinations of variable modifications variableMods = self._countUniqueModifications(variableMods) combinations = [] for x in self._uniqueCombinations(variableMods): combinations.append(x) # disable positions occupied by fixed modifications occupied = [] for mod in fixedMods: count = max(1, self.chain.count(str(mod[1]))) occupied += [mod[1]] * count # disable modifications at cleavage sites if enzyme: enz = blocks.enzymes[enzyme] if not enz.modsBefore and self.itemAfter: occupied += [len(self) - 1] * maxMods if not enz.modsAfter and self.itemBefore: occupied += [0] * maxMods CHECK_FORCE_QUIT() # filter modifications buff = [] for combination in combinations: positions = occupied[:] for mod in combination: positions += [mod[0][1]] * mod[1] if self._checkModifications(positions, self.chain, maxMods): buff.append(combination) combinations = buff CHECK_FORCE_QUIT() # format modifications and filter same buff = [] for combination in combinations: mods = [] for mod in combination: if position: mods += [[mod[0][0], mod[0][1], 'f']] * mod[1] elif mod[0][1] in ('nTerm', 'cTerm'): mods += [[mod[0][0], mod[0][1], 'f']] else: mods += [[mod[0][0], '', 'f']] * mod[1] mods.sort() if not mods in buff: buff.append(mods) combinations = buff # make new peptides for combination in combinations: CHECK_FORCE_QUIT() variablePeptide = self.duplicate() variablePeptide.modifications[:] = fixedMods + combination # check composition if variablePeptide.isvalid(): variablePeptides.append(variablePeptide) return variablePeptides
def search(self, mass, charge, tolerance, enzyme=None, semiSpecific=True, tolUnits='Da', massType=0, maxMods=1, position=False): """Search sequence for specified ion. mass: (float) m/z value to search for charge: (int) charge of the m/z value tolerance: (float) mass tolerance tolUnits: ('Da', 'ppm') tolerance units enzyme: (str) enzyme used for peptides endings, if None H/OH is used semiSpecific: (bool) semispecific cleavage is checked (enzyme must be set) massType: (0 or 1) mass type of the mass value, 0 = monoisotopic, 1 = average maxMods: (int) maximum number of modifications at one residue position: (bool) retain position for variable modifications (much slower) """ # check cyclic peptides if self.cyclic: raise TypeError( 'Search function is not supported for cyclic peptides!') matches = [] # set terminal modifications if enzyme: enzyme = blocks.enzymes[enzyme] expression = re.compile(enzyme.expression + '$') nTerm = enzyme.nTermFormula cTerm = enzyme.cTermFormula else: semiSpecific = False nTerm = 'H' cTerm = 'OH' # set mass limits if tolUnits == 'ppm': lowMass = mass - (tolerance * mass / 1000000) highMass = mass + (tolerance * mass / 1000000) else: lowMass = mass - tolerance highMass = mass + tolerance # search sequence length = len(self) for i in range(length): for j in range(i + 1, length + 1): CHECK_FORCE_QUIT() # get peptide peptide = self[i:j] if i != 0: peptide.nTerminalFormula = nTerm if j != length: peptide.cTerminalFormula = cTerm # check enzyme specifity if semiSpecific and peptide.itemBefore and peptide.itemAfter: if not expression.search( peptide.itemBefore + peptide.chain[0]) and not expression.search( peptide.chain[-1] + peptide.itemAfter): continue # variate modifications variants = peptide.variations(maxMods=maxMods, position=position) # check mass limits peptides = [] masses = [] for pep in variants: pepMZ = pep.mz(charge)[massType] peptides.append((pepMZ, pep)) masses.append(pepMZ) if max(masses) < lowMass: continue elif min(masses) > highMass: break # search for matches for pep in peptides: if lowMass <= pep[0] <= highMass: matches.append(pep[1]) return matches
def pattern(compound, fwhm=0.1, threshold=0.01, charge=0, agentFormula='H', agentCharge=1, real=True, model='gaussian'): """Calculate isotopic pattern for given compound. compound (str or mspy.compound) - compound fwhm (float) - gaussian peak width threshold (float) - relative intensity threshold for isotopes (in %/100) charge (int) - charge to be calculated agentFormula (str or mspy.compound) - charging agent formula agentCharge (int) - charging agent unit charge real (bool) - get real peaks from calculated profile model (gaussian, lorentzian, gausslorentzian) - peak shape function """ # check compound if not isinstance(compound, obj_compound.compound): compound = obj_compound.compound(compound) # check agent formula if agentFormula != 'e' and not isinstance(agentFormula, obj_compound.compound): agentFormula = obj_compound.compound(agentFormula) # add charging agent to compound if charge and agentFormula != 'e': formula = compound.formula() for atom, count in agentFormula.composition().items(): formula += '%s%d' % (atom, count * (charge / agentCharge)) compound = obj_compound.compound(formula) # get composition and check for negative atom counts composition = compound.composition() for atom in composition: if composition[atom] < 0: raise ValueError, 'Pattern cannot be calculated for this formula! --> ' + compound.formula( ) # set internal thresholds internalThreshold = threshold / 100. groupingWindow = fwhm / 4. # calculate pattern finalPattern = [] for atom in composition: # get isotopic profile for current atom or specified isotope only atomCount = composition[atom] atomPattern = [] match = mod_basics.ELEMENT_PATTERN.match(atom) symbol, massNumber, tmp = match.groups() if massNumber: isotope = blocks.elements[symbol].isotopes[int(massNumber)] atomPattern.append([isotope[0], 1.]) # [mass, abundance] else: for massNumber, isotope in blocks.elements[atom].isotopes.items(): if isotope[1] > 0.: atomPattern.append(list(isotope)) # [mass, abundance] # add atoms for i in range(atomCount): CHECK_FORCE_QUIT() # if pattern is empty (first atom) add current atom pattern if len(finalPattern) == 0: finalPattern = _normalize(atomPattern) continue # add atom to each peak of final pattern currentPattern = [] for patternIsotope in finalPattern: # skip peak under relevant abundance threshold if patternIsotope[1] < internalThreshold: continue # add each isotope of current atom to peak for atomIsotope in atomPattern: mass = patternIsotope[0] + atomIsotope[0] abundance = patternIsotope[1] * atomIsotope[1] currentPattern.append([mass, abundance]) # group isotopes and normalize pattern finalPattern = _consolidate(currentPattern, groupingWindow) finalPattern = _normalize(finalPattern) # correct charge if charge: for i in range(len(finalPattern)): finalPattern[i][0] = ( finalPattern[i][0] - mod_basics.ELECTRON_MASS * charge) / abs(charge) # group isotopes finalPattern = _consolidate(finalPattern, groupingWindow) # get real peaks from profile if real: prof = profile(finalPattern, fwhm=fwhm, points=100, model=model) finalPattern = [] for isotope in mod_signal.maxima(prof): finalPattern.append(isotope) centroid = mod_signal.centroid(prof, isotope[0], isotope[1] * 0.99) if abs(centroid - isotope[0]) < fwhm / 100.: finalPattern[-1][0] = centroid # normalize pattern finalPattern = _normalize(finalPattern) # discard peaks below threshold filteredPeaks = [] for peak in finalPattern: if peak[1] >= threshold: filteredPeaks.append(list(peak)) finalPattern = filteredPeaks return finalPattern
def formulator(mz, charge=0, tolerance=1., units='ppm', composition={}, agentFormula='H', agentCharge=1, limit=1000): """Generate formulae for given mass, tolerance and composition limits. mz (float) - searched m/z value charge (int) - current charge tolerance (float) - mass tolerance units (ppm or Da) - mass tolerance units composition (dict of 'element':[min count, max count]) - composition limits agentFormula (str) - charging agent formula agentCharge (int) - charging agent unit charge limit (int) - maximum formulae allowed to be calculated """ # get neutral mass if charge != 0 and agentFormula: mass = mod_basics.mz(mz, 0, currentCharge=charge, agentFormula=agentFormula, agentCharge=agentCharge) else: mass = mz # check neutral mass if mass <= 0: return [] # get mass limits if units == 'ppm': loMass = mass - (mass / 1e6) * tolerance hiMass = mass + (mass / 1e6) * tolerance elif charge != 0: loMass = mass - abs(charge) * tolerance hiMass = mass + abs(charge) * tolerance else: loMass = mass - tolerance hiMass = mass + tolerance # sort elements by masses to speed up processing buff = [] for el in composition: elMass = obj_compound.compound(el).mass(0) buff.append([elMass, el]) buff.sort(reverse=True) # compile elements and counts elementMasses = [] elements = [] minComposition = [] maxComposition = [] for el in buff: elementMasses.append(el[0]) elements.append(el[1]) minComposition.append(composition[el[1]][0]) maxComposition.append(composition[el[1]][1]) # check max composition for i in range(len(maxComposition)): maxComposition[i] = min(maxComposition[i], int(hiMass / elementMasses[i])) # generate compositions formulae = [] comps = _compositions(minComposition, maxComposition, elementMasses, loMass, hiMass, limit) for comp in comps: CHECK_FORCE_QUIT() formula = '' for i in range(len(comp)): formula += '%s%d' % (elements[i], comp[i]) formulae.append(formula) return formulae
def digest(sequence, enzyme, miscleavage=0, allowMods=False, strict=True): """Digest seuence by specified enzyme. sequence: (sequence) mspy sequence object enzyme: (str) enzyme name - must be defined in mspy.enzymes miscleavage: (int) number of allowed misscleavages allowMods: (bool) do not care about modifications in cleavage site strict: (bool) do not cleave even if variable modification is in cleavage site """ # check sequence object if not isinstance(sequence, obj_sequence.sequence): raise TypeError, "Cannot digest non-sequence object!" # check cyclic peptides if sequence.chainType != 'aminoacids': raise TypeError, 'Digest function is not supported for non-amino sequences!' # check cyclic peptides if sequence.cyclic: raise TypeError, 'Digest function is not supported for cyclic peptides!' # check sequence if not sequence.chain: return [] # get enzyme if enzyme in blocks.enzymes: enzyme = blocks.enzymes[enzyme] expression = re.compile(enzyme.expression + '$') else: raise KeyError, 'Unknown enzyme! -> ' + enzyme # get digest indices slices = [] # from | to | miscl lastIndex = 0 peptide = '' for x, aa in enumerate(sequence): # check expression peptide += aa if expression.search(peptide): # skip not allowed modifications if not allowMods and sequence.ismodified( x - 1, strict) and not enzyme.modsBefore: continue elif not allowMods and sequence.ismodified( x, strict) and not enzyme.modsAfter: continue else: slices.append((lastIndex, x, 0)) lastIndex = x # add last peptide slices.append((lastIndex, x + 1, 0)) # add indices for partials indices = len(slices) for x in range(indices): for y in range(1, miscleavage + 1): if x + y < indices: slices.append((slices[x][0], slices[x + y][1], y)) else: break # get peptides slices from protein peptides = [] for indices in slices: CHECK_FORCE_QUIT() # get peptide peptide = sequence[indices[0]:indices[1]] peptide.miscleavages = indices[2] # add terminal groups if indices[0] != 0: peptide.nTermFormula = enzyme.nTermFormula if indices[1] != len(sequence): peptide.cTermFormula = enzyme.cTermFormula peptides.append(peptide) return peptides
def fragmentlosses(fragments, losses=[], defined=False, limit=1, filterIn={}, filterOut={}): """Apply specified neutral losses to fragments. fragments: (list) list of sequence fragments losses: (list) list of neutral losses defined: (bool) use monomer-defined neutral losses limit: (int) max length of loss combination filterIn: (dic) allowed series for specified losses filterOut: (dic) not allowed series for specified losses """ # make losses combinations combinations = [] for x in range(1, min(len(losses), limit) + 1): for c in itertools.combinations(losses, x): combinations.append(list(c)) # generate fragments buff = [] for frag in fragments: CHECK_FORCE_QUIT() # get monomer-defined losses to check specifity definedLosses = [] for monomer in frag: definedLosses += blocks.monomers[monomer].losses # append new combinations with monomer-defined losses lossesToApply = combinations[:] if defined: for monomer in frag: for item in ([[]] + lossesToApply[:]): for loss in blocks.monomers[monomer].losses: newItem = item + [loss] newItem.sort() if not [loss] in lossesToApply: lossesToApply.append([loss]) if len(newItem ) <= limit and not newItem in lossesToApply: lossesToApply.append(newItem) # make fragment for combination in lossesToApply: newFrag = frag.duplicate() skip = False # apply losses for loss in combination: newFrag.fragmentLosses.append(loss) # check neutral gains if loss in frag.fragmentGains: skip = True break # check fragment type filter if (loss in filterOut and frag.fragmentSerie in filterOut[loss]) \ or (loss in filterIn and not frag.fragmentSerie in filterIn[loss]): skip = True break # check fragment composition if not newFrag.isvalid(): skip = True break # filter non-specific losses if not loss in definedLosses: newFrag.fragmentFiltered = True # store fragment if not skip: buff.append(newFrag) return buff
def fragmentserie(sequence, serie, cyclicParent=False): """Generate list of neutral peptide fragments from given peptide. sequence: (sequence) mspy sequence object serie: (str) fragment serie name - must be defined in mspy.fragments """ # check sequence object if not isinstance(sequence, obj_sequence.sequence): raise TypeError, "Cannot fragment non-sequence object!" # check cyclic peptides if sequence.cyclic: raise TypeError, 'Direct fragmentation of cyclic peptides is not supported!' frags = [] length = len(sequence) # get serie definition serie = blocks.fragments[serie] # molecular ion if serie.terminus == 'M': frag = sequence[:] frag.fragmentSerie = serie.name frags.append(frag) # N-terminal fragments elif serie.terminus == 'N': for x in range(length): frag = sequence[:x + 1] frag.fragmentSerie = serie.name frag.fragmentIndex = (x + 1) frag.cTermFormula = serie.cTermFormula frags.append(frag) CHECK_FORCE_QUIT() # C-terminal fragments elif serie.terminus == 'C': for x in range(length): frag = sequence[length - (x + 1):] frag.fragmentSerie = serie.name frag.fragmentIndex = (x + 1) frag.nTermFormula = serie.nTermFormula frags.append(frag) CHECK_FORCE_QUIT() # singlet fragments elif serie.terminus == 'S': for x in range(length): frag = sequence[x:x + 1] frag.fragmentSerie = serie.name frag.fragmentIndex = (x + 1) frag.nTermFormula = serie.nTermFormula frag.cTermFormula = serie.cTermFormula frags.append(frag) CHECK_FORCE_QUIT() # internal fragments elif serie.terminus == 'I': for x in range(1, length - 1): for y in range(2, length - x): frag = sequence[x:x + y] frag.fragmentSerie = serie.name frag.nTermFormula = serie.nTermFormula frag.cTermFormula = serie.cTermFormula frags.append(frag) CHECK_FORCE_QUIT() # correct termini for cyclic peptides if cyclicParent: for frag in frags: if serie.terminus == 'M': frag.nTermFormula = '' frag.cTermFormula = '' elif serie.terminus == 'N': frag.nTermFormula = 'H' elif serie.terminus == 'C': frag.cTermFormula = 'H-1' # remove nonsense terminal fragments if serie.terminus == 'N': if frags and serie.nTermFilter: del frags[0] if frags and serie.cTermFilter: del frags[-1] elif serie.terminus == 'C': if frags and serie.nTermFilter: del frags[-1] if frags and serie.cTermFilter: del frags[0] elif serie.terminus == 'S': if frags and serie.nTermFilter: del frags[0] if frags and serie.cTermFilter: del frags[-1] return frags
def deisotope(peaklist, maxCharge=1, mzTolerance=0.15, intTolerance=0.5, isotopeShift=0.0): """Isotopes determination and calculation of peaks charge. peaklist (mspy.peaklist) - peaklist to process maxCharge (float) - max charge to be searched mzTolerance (float) - absolute m/z tolerance for isotopes distance intTolerance (float) - relative intensity tolerance for isotopes and model (in %/100) isotopeShift (float) - isotope distance correction (neutral mass) (for HDX etc.) """ # check peaklist if not isinstance(peaklist, obj_peaklist.peaklist): raise TypeError, "Peak list must be mspy.peaklist object!" # clear previous results for peak in peaklist: peak.setcharge(None) peak.setisotope(None) # get charges if maxCharge < 0: charges = [-x for x in range(1, abs(maxCharge)+1)] else: charges = [x for x in range(1, maxCharge+1)] charges.reverse() # walk in a peaklist maxIndex = len(peaklist) for x, parent in enumerate(peaklist): CHECK_FORCE_QUIT() # skip assigned peaks if parent.isotope != None: continue # try all charge states for z in charges: cluster = [parent] # search for next isotope within m/z tolerance difference = (ISOTOPE_DISTANCE + isotopeShift)/abs(z) y = 1 while x+y < maxIndex: mzError = (peaklist[x+y].mz - cluster[-1].mz - difference) if abs(mzError) <= mzTolerance: cluster.append(peaklist[x+y]) elif mzError > mzTolerance: break y += 1 # no isotope found if len(cluster) == 1: continue # get theoretical isotopic pattern mass = min(15000, int( mod_basics.mz( parent.mz, 0, z))) / 200 pattern = patternLookupTable[mass] # check minimal number of isotopes in the cluster limit = 0 for p in pattern: if p >= 0.33: limit += 1 if len(cluster) < limit and abs(z) > 1: continue # check peak intensities in cluster valid = True isotope = 1 limit = min(len(pattern), len(cluster)) while (isotope < limit): # calc theoretical intensity from previous peak and current error intTheoretical = (cluster[isotope-1].intensity / pattern[isotope-1]) * pattern[isotope] intError = cluster[isotope].intensity - intTheoretical # intensity in tolerance if abs(intError) <= (intTheoretical * intTolerance): cluster[isotope].setisotope(isotope) cluster[isotope].setcharge(z) # intensity is higher (overlap) elif intError > 0: pass # intensity is lower and first isotope is checked (nonsense) elif (intError < 0 and isotope == 1): valid = False break # try next peak isotope += 1 # cluster is OK, set parent peak and skip other charges if valid: parent.setisotope(0) parent.setcharge(z) break
def labelscan(signal, minX=None, maxX=None, pickingHeight=0.75, absThreshold=0., relThreshold=0., snThreshold=0., baseline=None): """Return centroided peaklist for given data points. signal (numpy array) - signal data points minX (float) - x-range start maxX (float) - x-range end pickingHeight (float) - centroiding height absThreshold (float) - absolute intensity threshold relThreshold (float) - relative intensity threshold snThreshold (float) - signal to noise threshold baseline (numpy array) - signal baseline """ # check signal type if not isinstance(signal, numpy.ndarray): raise TypeError, "Signal must be NumPy array!" # check baseline type if baseline != None and not isinstance(baseline, numpy.ndarray): raise TypeError, "Baseline must be NumPy array!" # crop data if minX != None and maxX != None: i1 = mod_signal.locate(signal, minX) i2 = mod_signal.locate(signal, maxX) signal = signal[i1:i2] # check data points if len(signal) == 0: return obj_peaklist.peaklist([]) # get local maxima buff = [] basepeak = mod_signal.basepeak(signal) threshold = max(signal[basepeak][1] * relThreshold, absThreshold) for peak in mod_signal.maxima(signal): if peak[1] >= threshold: buff.append( [peak[0], peak[1], 0., None, None] ) # mz, ai, base, sn, fwhm CHECK_FORCE_QUIT() # get peaks baseline and s/n basepeak = 0.0 if baseline != None: for peak in buff: idx = mod_signal.locate(baseline, peak[0]) if (idx > 0) and (idx < len(baseline)): p1 = baseline[idx-1] p2 = baseline[idx] peak[2] = mod_signal.interpolate( (p1[0], p1[1]), (p2[0], p2[1]), x=peak[0]) noise = mod_signal.interpolate( (p1[0], p1[2]), (p2[0], p2[2]), x=peak[0]) intens = peak[1] - peak[2] if noise: peak[3] = intens / noise if intens > basepeak: basepeak = intens CHECK_FORCE_QUIT() # remove peaks bellow threshold threshold = max(basepeak * relThreshold, absThreshold) candidates = [] for peak in buff: if peak[0] > 0 and (peak[1] - peak[2]) >= threshold and (not peak[3] or peak[3] >= snThreshold): candidates.append(peak) # make centroides if pickingHeight < 1.: buff = [] previous = None for peak in candidates: CHECK_FORCE_QUIT() # calc peak height h = ((peak[1]-peak[2]) * pickingHeight) + peak[2] # get centroid indexes idx = mod_signal.locate(signal, peak[0]) if (idx == 0) or (idx == len(signal)): continue ileft = idx-1 while (ileft > 0) and (signal[ileft][1] > h): ileft -= 1 iright = idx while (iright < len(signal)-1) and (signal[iright][1] > h): iright += 1 # calculate peak mz leftMZ = mod_signal.interpolate(signal[ileft], signal[ileft+1], y=h) rightMZ = mod_signal.interpolate(signal[iright-1], signal[iright], y=h) peak[0] = (leftMZ + rightMZ)/2. # get peak intensity intens = mod_signal.intensity(signal, peak[0]) if intens and intens <= peak[1]: peak[1] = intens else: continue # try to group with previous peak if previous != None and leftMZ < previous: if peak[1] > buff[-1][1]: buff[-1] = peak previous = rightMZ else: buff.append(peak) previous = rightMZ # store as candidates candidates = buff CHECK_FORCE_QUIT() # get peaks baseline and s/n basepeak = 0.0 if baseline != None: for peak in candidates: idx = mod_signal.locate(baseline, peak[0]) if (idx > 0) and (idx < len(baseline)): p1 = baseline[idx-1] p2 = baseline[idx] peak[2] = mod_signal.interpolate( (p1[0], p1[1]), (p2[0], p2[1]), x=peak[0]) noise = mod_signal.interpolate( (p1[0], p1[2]), (p2[0], p2[2]), x=peak[0]) intens = peak[1] - peak[2] if noise: peak[3] = intens / noise if intens > basepeak: basepeak = intens CHECK_FORCE_QUIT() # remove peaks bellow threshold and calculate fwhm threshold = max(basepeak * relThreshold, absThreshold) centroides = [] for peak in candidates: if peak[0] > 0 and (peak[1] - peak[2]) >= threshold and (not peak[3] or peak[3] >= snThreshold): peak[4] = mod_signal.width(signal, peak[0], (peak[2] + ((peak[1] - peak[2]) * 0.5))) centroides.append(obj_peak.peak(mz=peak[0], ai=peak[1], base=peak[2], sn=peak[3], fwhm=peak[4])) # return peaklist object return obj_peaklist.peaklist(centroides)