def md(mass, mdType='standard', kendrickFormula='CH2', rounding='floor'): """Calculate mass defect for given monoisotopic mass. mass (float) - monoisotopic mass mdType (fraction | standard | relative | kendrick) - mass defect type kendrickFormula (str) - kendrick group formula rounding (floor | ceil | round) - nominal mass rounding function """ # return fractional part if mdType == 'fraction': return mass - math.floor(mass) # return standard mass defect elif mdType == 'standard': return mass - nominalmass(mass, rounding) # return relative mass defect elif mdType == 'relative': return 1e6 * (mass - nominalmass(mass, rounding)) / mass # return Kendrick mass defect elif mdType == 'kendrick': if not isinstance(kendrickFormula, obj_compound.compound): kendrickFormula = obj_compound.compound(kendrickFormula) kendrickF = kendrickFormula.nominalmass()/kendrickFormula.mass(0) return nominalmass(mass * kendrickF, rounding) - (mass * kendrickF) # unknown mass defect type else: raise ValueError, 'Unknown mass defect type! --> ' + mdType
def __init__(self, formula, charge, scales, loss='H', gain='H{2}', peakShape='gaussian'): loss = obj_compound.compound(loss) loss.negate() self._lossFormula = loss.formula() self._gainFormula = gain self.formula = formula self.charge = charge self.fwhm = 0.1 self.peakShape = peakShape self.mzrange = [0.0, float('inf')] self.spectrum = [] self.data = [] self.model = [] self.models = {} self.composition = None self.ncomposition = None self.average = None self._initModels(scales) self._initRange()
def averagine(mz, charge=0, composition=AVERAGE_AMINO): """Calculate average formula for given mass and building block composition. mz (float) - peak m/z charge (int) - peak charge composition (dict) - building block composition """ # get average mass of block blockMass = 0. for element in composition: blockMass += blocks.elements[element].mass[1] * composition[element] # get block count neutralMass = mod_basics.mz(mz, charge=0, currentCharge=charge, massType=1) count = max(1, neutralMass / blockMass) # make formula formula = '' for element in composition: formula += '%s%d' % (element, int(composition[element]*count)) formula = obj_compound.compound(formula) # add some hydrogens to reach the mass hydrogens = int(round((neutralMass - formula.mass(1)) / blocks.elements['H'].mass[1])) hydrogens = max(hydrogens, -1*formula.count('H')) formula += 'H%d' % hydrogens return formula
def md(mass, mdType='standard', kendrickFormula='CH2', rounding='floor'): """Calculate mass defect for given monoisotopic mass. mass (float) - monoisotopic mass mdType (fraction | standard | relative | kendrick) - mass defect type kendrickFormula (str) - kendrick group formula rounding (floor | ceil | round) - nominal mass rounding function """ # return fractional part if mdType == 'fraction': return mass - math.floor(mass) # return standard mass defect elif mdType == 'standard': return mass - nominalmass(mass, rounding) # return relative mass defect elif mdType == 'relative': return 1e6 * (mass - nominalmass(mass, rounding)) / mass # return Kendrick mass defect elif mdType == 'kendrick': if not isinstance(kendrickFormula, obj_compound.compound): kendrickFormula = obj_compound.compound(kendrickFormula) kendrickF = kendrickFormula.nominalmass() / kendrickFormula.mass(0) return nominalmass(mass * kendrickF, rounding) - (mass * kendrickF) # unknown mass defect type else: raise ValueError, 'Unknown mass defect type! --> ' + mdType
def rdbe(compound): """Get RDBE (Range or Double Bonds Equivalents) of a given compound. compound (str or mspy.compound) - compound """ # check compound if not isinstance(compound, obj_compound.compound): compound = obj_compound.compound(compound) # get composition comp = compound.composition() # get atoms from composition atoms = [] for item in comp: match = ELEMENT_PATTERN.match(item) if match and not match.group(1) in atoms: atoms.append(match.group(1)) # get rdbe rdbeValue = 0. for a in atoms: valence = blocks.elements[a].valence if valence: rdbeValue += (valence - 2) * compound.count(a, groupIsotopes=True) rdbeValue /= 2. rdbeValue += 1. return rdbeValue
def __init__(self, formula, charge, scales, loss='H', gain='H{2}', peakShape='gaussian'): loss = obj_compound.compound(loss) loss.negate() self._lossFormula = loss.formula() self._gainFormula = gain self.formula = formula self.charge = charge self.fwhm = 0.1 self.peakShape = peakShape self.mzrange = [0.0, float('inf')] self.spectrum = [] self.data = [] self.model = [] self.models = {} self.composition = None self.ncomposition = None self.average = None self.intensity = None self._initModels(scales) self._initRange()
def isvalid(self, charge=0, agentFormula='H', agentCharge=1): """Utility to check ion composition.""" # make compound formula = obj_compound.compound(self.formula()) # check ion composition return formula.isvalid(charge=charge, agentFormula=agentFormula, agentCharge=agentCharge)
def isvalid(self, charge=0, agentFormula='H', agentCharge=1): """Utility to check ion composition.""" # make compound formula = obj_compound.compound(self.formula()) # check ion composition return formula.isvalid( charge = charge, agentFormula = agentFormula, agentCharge = agentCharge )
def mass(self, massType=None): """Get mass.""" # get mass if self._mass == None: self._mass = obj_compound.compound(self.formula()).mass() # return mass if massType == 0: return self._mass[0] elif massType == 1: return self._mass[1] else: return self._mass
def mz(mass, charge, currentCharge=0, agentFormula='H', agentCharge=1, massType=0): """Calculate m/z value for given mass and charge. mass (tuple of (Mo, Av) or float) - current mass charge (int) - final charge of ion currentCharge (int) - current mass charge agentFormula (str or mspy.compound) - charging agent formula agentCharge (int) - charging agent unit charge massType (0 or 1) - used mass type if mass value is float, 0 = monoisotopic, 1 = average """ # check agent formula if agentFormula != 'e' and not isinstance(agentFormula, obj_compound.compound): agentFormula = obj_compound.compound(agentFormula) # get agent mass if agentFormula == 'e': agentMass = [ELECTRON_MASS, ELECTRON_MASS] else: agentMass = agentFormula.mass() agentMass = (agentMass[0] - agentCharge * ELECTRON_MASS, agentMass[1] - agentCharge * ELECTRON_MASS) # recalculate zero charge agentCount = currentCharge / agentCharge if currentCharge != 0: if type(mass) in (tuple, list): massMo = mass[0] * abs(currentCharge) - agentMass[0] * agentCount massAv = mass[1] * abs(currentCharge) - agentMass[1] * agentCount mass = (massMo, massAv) else: mass = mass * abs(currentCharge) - agentMass[massType] * agentCount if charge == 0: return mass # calculate final charge agentCount = charge / agentCharge if type(mass) in (tuple, list): massMo = (mass[0] + agentMass[0] * agentCount) / abs(charge) massAv = (mass[1] + agentMass[1] * agentCount) / abs(charge) return (massMo, massAv) else: return (mass + agentMass[massType] * agentCount) / abs(charge)
def _initModels(self, scales): """Init theoretical envelope models.""" self.models = {} # generate possible models to fit for x in scales: CHECK_FORCE_QUIT() # make compound item = "%s(%s)%d(%s)%d" % (self.formula, self._lossFormula, x, self._gainFormula, x) compound = obj_compound.compound(item) # check compound if not compound.isvalid(charge=self.charge): continue # append model [0-compound, 1-pattern, 2-abs abundance, 3-rel abundance] self.models[x] = [compound, [], 0.0, 0.0]
def mz(mass, charge, currentCharge=0, agentFormula='H', agentCharge=1, massType=0): """Calculate m/z value for given mass and charge. mass (tuple of (Mo, Av) or float) - current mass charge (int) - final charge of ion currentCharge (int) - current mass charge agentFormula (str or mspy.compound) - charging agent formula agentCharge (int) - charging agent unit charge massType (0 or 1) - used mass type if mass value is float, 0 = monoisotopic, 1 = average """ # check agent formula if agentFormula != 'e' and not isinstance(agentFormula, obj_compound.compound): agentFormula = obj_compound.compound(agentFormula) # get agent mass if agentFormula == 'e': agentMass = [ELECTRON_MASS, ELECTRON_MASS] else: agentMass = agentFormula.mass() agentMass = (agentMass[0]-agentCharge*ELECTRON_MASS, agentMass[1]-agentCharge*ELECTRON_MASS) # recalculate zero charge agentCount = currentCharge/agentCharge if currentCharge != 0: if type(mass) in (tuple, list): massMo = mass[0]*abs(currentCharge) - agentMass[0]*agentCount massAv = mass[1]*abs(currentCharge) - agentMass[1]*agentCount mass = (massMo, massAv) else: mass = mass*abs(currentCharge) - agentMass[massType]*agentCount if charge == 0: return mass # calculate final charge agentCount = charge/agentCharge if type(mass) in (tuple, list): massMo = (mass[0] + agentMass[0]*agentCount)/abs(charge) massAv = (mass[1] + agentMass[1]*agentCount)/abs(charge) return (massMo, massAv) else: return (mass + agentMass[massType]*agentCount)/abs(charge)
def composition(self): """Get elemental composition.""" # check composition buffer if self._composition != None: return self._composition self._composition = {} # add monomers to formula for monomer in self.chain: for el, count in blocks.monomers[monomer].composition.items(): if el in self._composition: self._composition[el] += count else: self._composition[el] = count # add modifications and labels mods = self.modifications + self.labels for name, position, state in mods: multi = 1 if type(position) in ( str, unicode ) and position != '' and not position in ('nTerm', 'cTerm'): multi = self.chain.count(position) for el, count in blocks.modifications[name].composition.items(): if el in self._composition: self._composition[el] += multi * count else: self._composition[el] = multi * count # add terminal formulae if not self.cyclic: termCmpd = obj_compound.compound(self.nTermFormula + self.cTermFormula) for el, count in termCmpd.composition().items(): if el in self._composition: self._composition[el] += count else: self._composition[el] = count # subtract neutral losses for fragments for loss in self.fragmentLosses: lossCmpd = obj_compound.compound(loss) for el, count in lossCmpd.composition().items(): if el in self._composition: self._composition[el] -= count else: self._composition[el] = -1 * count # add neutral gains for fragments for gain in self.fragmentGains: gainCmpd = obj_compound.compound(gain) for el, count in gainCmpd.composition().items(): if el in self._composition: self._composition[el] += count else: self._composition[el] = count # remove zeros for atom in self._composition.keys(): if self._composition[atom] == 0: del self._composition[atom] return self._composition
def pattern(compound, fwhm=0.1, threshold=0.01, charge=0, agentFormula='H', agentCharge=1, real=True, model='gaussian'): """Calculate isotopic pattern for given compound. compound (str or mspy.compound) - compound fwhm (float) - gaussian peak width threshold (float) - relative intensity threshold for isotopes (in %/100) charge (int) - charge to be calculated agentFormula (str or mspy.compound) - charging agent formula agentCharge (int) - charging agent unit charge real (bool) - get real peaks from calculated profile model (gaussian, lorentzian, gausslorentzian) - peak shape function """ # check compound if not isinstance(compound, obj_compound.compound): compound = obj_compound.compound(compound) # check agent formula if agentFormula != 'e' and not isinstance(agentFormula, obj_compound.compound): agentFormula = obj_compound.compound(agentFormula) # add charging agent to compound if charge and agentFormula != 'e': formula = compound.formula() for atom, count in agentFormula.composition().items(): formula += '%s%d' % (atom, count * (charge / agentCharge)) compound = obj_compound.compound(formula) # get composition and check for negative atom counts composition = compound.composition() for atom in composition: if composition[atom] < 0: raise ValueError, 'Pattern cannot be calculated for this formula! --> ' + compound.formula( ) # set internal thresholds internalThreshold = threshold / 100. groupingWindow = fwhm / 4. # calculate pattern finalPattern = [] for atom in composition: # get isotopic profile for current atom or specified isotope only atomCount = composition[atom] atomPattern = [] match = mod_basics.ELEMENT_PATTERN.match(atom) symbol, massNumber, tmp = match.groups() if massNumber: isotope = blocks.elements[symbol].isotopes[int(massNumber)] atomPattern.append([isotope[0], 1.]) # [mass, abundance] else: for massNumber, isotope in blocks.elements[atom].isotopes.items(): if isotope[1] > 0.: atomPattern.append(list(isotope)) # [mass, abundance] # add atoms for i in range(atomCount): CHECK_FORCE_QUIT() # if pattern is empty (first atom) add current atom pattern if len(finalPattern) == 0: finalPattern = _normalize(atomPattern) continue # add atom to each peak of final pattern currentPattern = [] for patternIsotope in finalPattern: # skip peak under relevant abundance threshold if patternIsotope[1] < internalThreshold: continue # add each isotope of current atom to peak for atomIsotope in atomPattern: mass = patternIsotope[0] + atomIsotope[0] abundance = patternIsotope[1] * atomIsotope[1] currentPattern.append([mass, abundance]) # group isotopes and normalize pattern finalPattern = _consolidate(currentPattern, groupingWindow) finalPattern = _normalize(finalPattern) # correct charge if charge: for i in range(len(finalPattern)): finalPattern[i][0] = ( finalPattern[i][0] - mod_basics.ELECTRON_MASS * charge) / abs(charge) # group isotopes finalPattern = _consolidate(finalPattern, groupingWindow) # get real peaks from profile if real: prof = profile(finalPattern, fwhm=fwhm, points=100, model=model) finalPattern = [] for isotope in mod_signal.maxima(prof): finalPattern.append(isotope) centroid = mod_signal.centroid(prof, isotope[0], isotope[1] * 0.99) if abs(centroid - isotope[0]) < fwhm / 100.: finalPattern[-1][0] = centroid # normalize pattern finalPattern = _normalize(finalPattern) # discard peaks below threshold filteredPeaks = [] for peak in finalPattern: if peak[1] >= threshold: filteredPeaks.append(list(peak)) finalPattern = filteredPeaks return finalPattern
def frules(compound, rules=['HC', 'NOPSC', 'NOPS', 'RDBE', 'RDBEInt'], HC=(0.1, 3.0), NOPSC=(4, 3, 2, 3), RDBE=(-1, 40)): """Check formula rules for a given compound. compound (str or mspy.compound) - compound rules (list of str) - rules to be checked HC (tuple) - H/C limits NOPSC (tuple) - NOPS/C max values RDBE (tuple) - RDBE limits """ # check compound if not isinstance(compound, obj_compound.compound): compound = obj_compound.compound(compound) # get element counts countC = float(compound.count('C', groupIsotopes=True)) countH = float(compound.count('H', groupIsotopes=True)) countN = float(compound.count('N', groupIsotopes=True)) countO = float(compound.count('O', groupIsotopes=True)) countP = float(compound.count('P', groupIsotopes=True)) countS = float(compound.count('S', groupIsotopes=True)) # get carbon ratios if countC: ratioHC = countH / countC ratioNC = countN / countC ratioOC = countO / countC ratioPC = countP / countC ratioSC = countS / countC # get RDBE rdbeValue = rdbe(compound) # check HC rule if 'HC' in rules and countC: if (ratioHC < HC[0] or ratioHC > HC[1]): return False # check NOPS rule if 'NOPSC' in rules and countC: if (ratioNC > NOPSC[0] or ratioOC > NOPSC[1] or ratioPC > NOPSC[2] or ratioSC > NOPSC[3]): return False # check NOPS all > 1 rule if 'NOPS' in rules and (countN > 1 and countO > 1 and countP > 1 and countS > 1): if (countN >= 10 or countO >= 20 or countP >= 4 or countS >= 3): return False # check NOP all > 3 rule if 'NOPS' in rules and (countN > 3 and countO > 3 and countP > 3): if (countN >= 11 or countO >= 22 or countP >= 6): return False # check NOS all > 1 rule if 'NOPS' in rules and (countN > 1 and countO > 1 and countS > 1): if (countN >= 19 or countO >= 14 or countS >= 8): return False # check NPS all > 1 rule if 'NOPS' in rules and (countN > 1 and countP > 1 and countS > 1): if (countN >= 3 or countP >= 3 or countS >= 3): return False # check OPS all > 1 rule if 'NOPS' in rules and (countO > 1 and countP > 1 and countS > 1): if (countO >= 14 or countP >= 3 or countS >= 3): return False # check RDBE range if 'RDBE' in rules: if rdbeValue < RDBE[0] or rdbeValue > RDBE[1]: return False # check integer RDBE if 'RDBEInt' in rules: if rdbeValue % 1: return False # all ok return True
def formulator(mz, charge=0, tolerance=1., units='ppm', composition={}, agentFormula='H', agentCharge=1, limit=1000): """Generate formulae for given mass, tolerance and composition limits. mz (float) - searched m/z value charge (int) - current charge tolerance (float) - mass tolerance units (ppm or Da) - mass tolerance units composition (dict of 'element':[min count, max count]) - composition limits agentFormula (str) - charging agent formula agentCharge (int) - charging agent unit charge limit (int) - maximum formulae allowed to be calculated """ # get neutral mass if charge != 0 and agentFormula: mass = mod_basics.mz(mz, 0, currentCharge=charge, agentFormula=agentFormula, agentCharge=agentCharge) else: mass = mz # check neutral mass if mass <= 0: return [] # get mass limits if units == 'ppm': loMass = mass - (mass/1e6) * tolerance hiMass = mass + (mass/1e6) * tolerance elif charge != 0: loMass = mass - abs(charge)*tolerance hiMass = mass + abs(charge)*tolerance else: loMass = mass - tolerance hiMass = mass + tolerance # sort elements by masses to speed up processing buff = [] for el in composition: elMass = obj_compound.compound(el).mass(0) buff.append([elMass, el]) buff.sort(reverse=True) # compile elements and counts elementMasses = [] elements = [] minComposition = [] maxComposition = [] for el in buff: elementMasses.append(el[0]) elements.append(el[1]) minComposition.append(composition[el[1]][0]) maxComposition.append(composition[el[1]][1]) # check max composition for i in range(len(maxComposition)): maxComposition[i] = min(maxComposition[i], int(hiMass/elementMasses[i])) # generate compositions formulae = [] comps = _compositions(minComposition, maxComposition, elementMasses, loMass, hiMass, limit) for comp in comps: CHECK_FORCE_QUIT() formula = '' for i in range(len(comp)): formula += '%s%d' % (elements[i], comp[i]) formulae.append(formula) return formulae
def pattern(compound, fwhm=0.1, threshold=0.01, charge=0, agentFormula='H', agentCharge=1, real=True, model='gaussian'): """Calculate isotopic pattern for given compound. compound (str or mspy.compound) - compound fwhm (float) - gaussian peak width threshold (float) - relative intensity threshold for isotopes (in %/100) charge (int) - charge to be calculated agentFormula (str or mspy.compound) - charging agent formula agentCharge (int) - charging agent unit charge real (bool) - get real peaks from calculated profile model (gaussian, lorentzian, gausslorentzian) - peak shape function """ # check compound if not isinstance(compound, obj_compound.compound): compound = obj_compound.compound(compound) # check agent formula if agentFormula != 'e' and not isinstance(agentFormula, obj_compound.compound): agentFormula = obj_compound.compound(agentFormula) # add charging agent to compound if charge and agentFormula != 'e': formula = compound.formula() for atom, count in agentFormula.composition().items(): formula += '%s%d' % (atom, count*(charge/agentCharge)) compound = obj_compound.compound(formula) # get composition and check for negative atom counts composition = compound.composition() for atom in composition: if composition[atom] < 0: raise ValueError, 'Pattern cannot be calculated for this formula! --> ' + compound.formula() # set internal thresholds internalThreshold = threshold/100. groupingWindow = fwhm/4. # calculate pattern finalPattern = [] for atom in composition: # get isotopic profile for current atom or specified isotope only atomCount = composition[atom] atomPattern = [] match = mod_basics.ELEMENT_PATTERN.match(atom) symbol, massNumber, tmp = match.groups() if massNumber: isotope = blocks.elements[symbol].isotopes[int(massNumber)] atomPattern.append([isotope[0], 1.]) # [mass, abundance] else: for massNumber, isotope in blocks.elements[atom].isotopes.items(): if isotope[1] > 0.: atomPattern.append(list(isotope)) # [mass, abundance] # add atoms for i in range(atomCount): CHECK_FORCE_QUIT() # if pattern is empty (first atom) add current atom pattern if len(finalPattern) == 0: finalPattern = _normalize(atomPattern) continue # add atom to each peak of final pattern currentPattern = [] for patternIsotope in finalPattern: # skip peak under relevant abundance threshold if patternIsotope[1] < internalThreshold: continue # add each isotope of current atom to peak for atomIsotope in atomPattern: mass = patternIsotope[0] + atomIsotope[0] abundance = patternIsotope[1] * atomIsotope[1] currentPattern.append([mass, abundance]) # group isotopes and normalize pattern finalPattern = _consolidate(currentPattern, groupingWindow) finalPattern = _normalize(finalPattern) # correct charge if charge: for i in range(len(finalPattern)): finalPattern[i][0] = (finalPattern[i][0] - mod_basics.ELECTRON_MASS*charge) / abs(charge) # group isotopes finalPattern = _consolidate(finalPattern, groupingWindow) # get real peaks from profile if real: prof = profile(finalPattern, fwhm=fwhm, points=100, model=model) finalPattern = [] for isotope in mod_signal.maxima(prof): finalPattern.append(isotope) centroid = mod_signal.centroid(prof, isotope[0], isotope[1]*0.99) if abs(centroid-isotope[0]) < fwhm/100.: finalPattern[-1][0] = centroid # normalize pattern finalPattern = _normalize(finalPattern) # discard peaks below threshold filteredPeaks = [] for peak in finalPattern: if peak[1] >= threshold: filteredPeaks.append(list(peak)) finalPattern = filteredPeaks return finalPattern
def formulator(mz, charge=0, tolerance=1., units='ppm', composition={}, agentFormula='H', agentCharge=1, limit=1000): """Generate formulae for given mass, tolerance and composition limits. mz (float) - searched m/z value charge (int) - current charge tolerance (float) - mass tolerance units (ppm or Da) - mass tolerance units composition (dict of 'element':[min count, max count]) - composition limits agentFormula (str) - charging agent formula agentCharge (int) - charging agent unit charge limit (int) - maximum formulae allowed to be calculated """ # get neutral mass if charge != 0 and agentFormula: mass = mod_basics.mz(mz, 0, currentCharge=charge, agentFormula=agentFormula, agentCharge=agentCharge) else: mass = mz # check neutral mass if mass <= 0: return [] # get mass limits if units == 'ppm': loMass = mass - (mass / 1e6) * tolerance hiMass = mass + (mass / 1e6) * tolerance elif charge != 0: loMass = mass - abs(charge) * tolerance hiMass = mass + abs(charge) * tolerance else: loMass = mass - tolerance hiMass = mass + tolerance # sort elements by masses to speed up processing buff = [] for el in composition: elMass = obj_compound.compound(el).mass(0) buff.append([elMass, el]) buff.sort(reverse=True) # compile elements and counts elementMasses = [] elements = [] minComposition = [] maxComposition = [] for el in buff: elementMasses.append(el[0]) elements.append(el[1]) minComposition.append(composition[el[1]][0]) maxComposition.append(composition[el[1]][1]) # check max composition for i in range(len(maxComposition)): maxComposition[i] = min(maxComposition[i], int(hiMass / elementMasses[i])) # generate compositions formulae = [] comps = _compositions(minComposition, maxComposition, elementMasses, loMass, hiMass, limit) for comp in comps: CHECK_FORCE_QUIT() formula = '' for i in range(len(comp)): formula += '%s%d' % (elements[i], comp[i]) formulae.append(formula) return formulae
def composition(self): """Get elemental composition.""" # check composition buffer if self._composition != None: return self._composition self._composition = {} # add monomers to formula for monomer in self.chain: for el, count in blocks.monomers[monomer].composition.items(): if el in self._composition: self._composition[el] += count else: self._composition[el] = count # add modifications and labels mods = self.modifications + self.labels for name, position, state in mods: multi = 1 if type(position) in (str, unicode) and position !='' and not position in ('nTerm', 'cTerm'): multi = self.chain.count(position) for el, count in blocks.modifications[name].composition.items(): if el in self._composition: self._composition[el] += multi*count else: self._composition[el] = multi*count # add terminal formulae if not self.cyclic: termCmpd = obj_compound.compound(self.nTermFormula + self.cTermFormula) for el, count in termCmpd.composition().items(): if el in self._composition: self._composition[el] += count else: self._composition[el] = count # subtract neutral losses for fragments for loss in self.fragmentLosses: lossCmpd = obj_compound.compound(loss) for el, count in lossCmpd.composition().items(): if el in self._composition: self._composition[el] -= count else: self._composition[el] = -1*count # add neutral gains for fragments for gain in self.fragmentGains: gainCmpd = obj_compound.compound(gain) for el, count in gainCmpd.composition().items(): if el in self._composition: self._composition[el] += count else: self._composition[el] = count # remove zeros for atom in self._composition.keys(): if self._composition[atom] == 0: del self._composition[atom] return self._composition
def frules(compound, rules=['HC','NOPSC','NOPS','RDBE','RDBEInt'], HC=(0.1, 3.0), NOPSC=(4,3,2,3), RDBE=(-1,40)): """Check formula rules for a given compound. compound (str or mspy.compound) - compound rules (list of str) - rules to be checked HC (tuple) - H/C limits NOPSC (tuple) - NOPS/C max values RDBE (tuple) - RDBE limits """ # check compound if not isinstance(compound, obj_compound.compound): compound = obj_compound.compound(compound) # get element counts countC = float(compound.count('C', groupIsotopes=True)) countH = float(compound.count('H', groupIsotopes=True)) countN = float(compound.count('N', groupIsotopes=True)) countO = float(compound.count('O', groupIsotopes=True)) countP = float(compound.count('P', groupIsotopes=True)) countS = float(compound.count('S', groupIsotopes=True)) # get carbon ratios if countC: ratioHC = countH / countC ratioNC = countN / countC ratioOC = countO / countC ratioPC = countP / countC ratioSC = countS / countC # get RDBE rdbeValue = rdbe(compound) # check HC rule if 'HC' in rules and countC: if (ratioHC < HC[0] or ratioHC > HC[1]): return False # check NOPS rule if 'NOPSC' in rules and countC: if (ratioNC > NOPSC[0] or ratioOC > NOPSC[1] or ratioPC > NOPSC[2] or ratioSC > NOPSC[3]): return False # check NOPS all > 1 rule if 'NOPS' in rules and (countN > 1 and countO > 1 and countP > 1 and countS > 1): if (countN >= 10 or countO >= 20 or countP >= 4 or countS >= 3): return False # check NOP all > 3 rule if 'NOPS' in rules and (countN > 3 and countO > 3 and countP > 3): if (countN >= 11 or countO >= 22 or countP >= 6): return False # check NOS all > 1 rule if 'NOPS' in rules and (countN > 1 and countO > 1 and countS > 1): if (countN >= 19 or countO >= 14 or countS >= 8): return False # check NPS all > 1 rule if 'NOPS' in rules and (countN > 1 and countP > 1 and countS > 1): if (countN >= 3 or countP >= 3 or countS >= 3): return False # check OPS all > 1 rule if 'NOPS' in rules and (countO > 1 and countP > 1 and countS > 1): if (countO >= 14 or countP >= 3 or countS >= 3): return False # check RDBE range if 'RDBE' in rules: if rdbeValue < RDBE[0] or rdbeValue > RDBE[1]: return False # check integer RDBE if 'RDBEInt' in rules: if rdbeValue % 1: return False # all ok return True