def main(): logging.basicConfig(level='INFO', format="%(name)s: %(message)s") args = app.parse_args() molecular_composition_losses = [None] # The None case for the loss-less case for loss in args.losses: logger.info("Converting loss %s -> %s", loss, Composition(loss)) molecular_composition_losses.append(MolecularComposition(loss, Composition(loss))) length_range = sorted(map(int, args.chain_length_range)) max_charge = -abs(args.max_charge) has_anhydromanose = bool(args.has_anhydromanose) gag_type = args.gag_type logger.info("GAG Chain Range: %d-%d" % tuple(length_range)) mass_error_tolerance = args.mass_error_tolerance # reducing_end_type = args.reduced # if reducing_end_type: # reducing_end_type = Composition(reducing_end_type) output_path = args.output_path output_format = args.output_format if not output_format: output_format = ['csv'] pick_peaks = args.pick_peaks run( args.peaklist_path, gag_type, length_range, has_anhydromanose, molecular_composition_losses, None, max_charge, output_path, output_format, pick_peaks=pick_peaks, mass_error_tolerance=mass_error_tolerance)
def _compute_composition(self): composition = Composition() tandem_composition = Composition() charge_carrier = 0 for k, v in self.counts.items(): composition += k.composition * v tandem_composition += k.tandem_composition * v charge_carrier += k.charge_carrier * v self.composition = composition self.mass = composition.mass self.tandem_composition = tandem_composition self.tandem_mass = tandem_composition.mass self.charge_carrier = charge_carrier
def _make_glycan_composition_proxy(self): if self.aggregate is not None: base = self.aggregate.clone() else: base = HashableGlycanComposition() # Represent the initial amide bond between the peptide # and the first glycan. Subsequent glycans do not need # further chemical losses because of the dehyration built # directly into the Residue abstraction. base.composition_offset -= Composition({"H": 2, "O": 1}) for key, value in self.items(): if value.rule.is_core: continue elif value.rule.is_composition: base += value.rule.glycan else: # Convert Glycan object into a composition, using the original # detatched topology to omit the "aglycone" group which represents # the connection between the glycan and the peptide, which penalizes # the composition by H2O. This H2O is lost when that bond is formed, # but doesn't need to be explicitly included as the loss is tracked # when initializing the base above. gc = HashableGlycanComposition.from_glycan( value.rule._original) base += gc return GlycanCompositionProxy(base)
def __init__(self, *args, **kwargs): self._reducing_end = kwargs.pop("reducing_end", None) dict.__init__(self) self._mass = None self._charge = None self._composition_offset = Composition("H2O") self.update(*args, **kwargs)
def total_composition(self): total = Composition() has_aggregate = self.aggregate is not None for key, value in self.items(): if has_aggregate and value.rule.is_core: continue total += value.composition if has_aggregate: total += self.aggregate.total_composition() return total
def validate_reduction(context, reduction_string): if reduction_string is None: return None try: if str(reduction_string).lower() in named_reductions: return named_reductions[str(reduction_string).lower()] else: if len(Composition(str(reduction_string))) > 0: return str(reduction_string) else: raise Exception("Invalid") except Exception: click.secho("Could not validate reduction '%s'" % reduction_string) raise click.Abort("Could not validate reduction '%s'" % reduction_string)
def __init__(self, *args, **kwargs): self._reducing_end = None dict.__init__(self) self._mass = None self._charge = None self._composition_offset = Composition("H2O") self.update(*args, **kwargs) try: template = args[0] except IndexError: template = None if template is not None and isinstance(template, GlycanComposition): reduced = template.reducing_end if reduced is not None: self.reducing_end = reduced.clone() self._composition_offset = template._composition_offset.clone()
def validate_mass_shift(mass_shift_string, multiplicity=1): multiplicity = int(multiplicity) if mass_shift_string.lower() in mass_shifts: return (mass_shifts[mass_shift_string.lower()], multiplicity) else: try: mass_shift_string = str(mass_shift_string) composition = Composition(mass_shift_string) shift = MassShift(mass_shift_string, composition) return (shift, multiplicity) except Exception as e: click.secho("%r" % (e, )) click.secho("Could not validate mass_shift %r" % (mass_shift_string, ), fg='yellow') raise click.Abort("Could not validate mass_shift %r" % (mass_shift_string, ))
class GlycanComposition(dict, SaccharideCollection): @classmethod def from_glycan(cls, glycan): inst = cls() glycan = tree(glycan) inst.extend(glycan) inst.reducing_end = glycan.reducing_end deriv = has_derivatization(glycan.root) if deriv: inst._composition_offset += ( deriv.total_composition() - deriv.attachment_composition_loss()) * 2 return inst def __init__(self, *args, **kwargs): self._reducing_end = kwargs.pop("reducing_end", None) dict.__init__(self) self._mass = None self._charge = None self._composition_offset = Composition("H2O") self.update(*args, **kwargs) def __setitem__(self, key, value): if isinstance(key, basestring): key = from_iupac_lite(key) if key.node_type is Monosaccharide.node_type and key.reducing_end is not None: self.reducing_end = key.reducing_end key.reducing_end = None dict.__setitem__(self, key, value) self._mass = None def __getitem__(self, key): if isinstance(key, basestring): key = from_iupac_lite(key) return dict.__getitem__(self, key) def __delitem__(self, key): if isinstance(key, basestring): key = from_iupac_lite(key) dict.__delitem__(self, key) self._mass = None def mass(self, average=False, charge=0, mass_data=None): if self._mass is not None and charge == self._charge: return self._mass if charge == 0: mass = self._composition_offset.mass for residue_type, count in list(self.items()): mass += residue_type.mass(average=average, charge=0, mass_data=mass_data) * count if self._reducing_end is not None: mass += self._reducing_end.mass(average=average, charge=0, mass_data=mass_data) self._mass = mass self._charge = 0 else: mass = self.total_composition().calc_mass(average=average, charge=charge, mass_data=mass_data) self._mass = mass self._charge = charge return mass def update(self, *args, **kwargs): if len(args) == 1: if isinstance(args[0], dict): args = list(args) for name, count in args[0].items(): if count != 0: self[name] = count else: for name, count in args: if count != 0: self[name] = count for name, count in kwargs.items(): if count != 0: self[name] = count self._mass = None def extend(self, *args): if not isinstance(args[0], MonosaccharideResidue): if isinstance(args[0], (Monosaccharide)): args = map(MonosaccharideResidue.from_monosaccharide, args) elif isinstance(args[0], Glycan): args = map( MonosaccharideResidue.from_monosaccharide, [node for node in args[0] if node.node_type is MonosaccharideResidue.node_type]) else: raise TypeError( "Can't convert {} to MonosaccharideResidue".format( type(args[0]))) for residue in args: self[residue] += 1 def __iadd__(self, other): for elem, cnt in (other.items()): self[elem] += cnt return self def __add__(self, other): result = self.clone() for elem, cnt in other.items(): result[elem] += cnt return result def __radd__(self, other): return self + other def __isub__(self, other): for elem, cnt in other.items(): self[elem] -= cnt return self def __sub__(self, other): result = self.clone() for elem, cnt in other.items(): result[elem] -= cnt return result def __rsub__(self, other): return (self - other) * (-1) def __mul__(self, other): if not isinstance(other, int): raise TypeError( 'Cannot multiply Composition by non-integer', other) prod = {} for k, v in self.items(): prod[k] = v * other return GlycanComposition(prod) def __rmul__(self, other): return self * other def __eq__(self, other): if not isinstance(other, dict): return False self_items = set([i for i in self.items() if i[1]]) other_items = set([i for i in other.items() if i[1]]) return self_items == other_items def __neg__(self): return -1 * self def __missing__(self, key): return 0 def __contains__(self, key): if isinstance(key, basestring): key = from_iupac_lite(key) return dict.__contains__(self, key) def drop_stems(self): for t in self: drop_stem(t) return self def drop_positions(self): for t in self: drop_positions(t) return self def drop_configurations(self): for t in self: drop_configuration(t) def total_composition(self): comp = self._composition_offset.clone() for residue, count in self.items(): comp += residue.total_composition() * count if self._reducing_end is not None: comp += self._reducing_end.total_composition() return comp def collapse(self): ''' Merge redundant keys. After performing a structure-detail removing operation like :meth:`drop_positions`, :meth:`drop_configurations`, or :meth:`drop_stems`, monosaccharide keys may be redundant. `collapse` will merge keys which refer to the same type of molecule. ''' items = list(self.items()) self.clear() for k, v in items: self[k] += v @property def reducing_end(self): return self._reducing_end @reducing_end.setter def reducing_end(self, value): self._mass = None self._reducing_end = value def set_reducing_end(self, value): self._mass = None self._reducing_end = value @property def composition_offset(self): return self._composition_offset @composition_offset.setter def composition_offset(self, value): self._mass = None self._composition_offset = value def clone(self): return self.__class__(self) def serialize(self): return "{%s}" % '; '.join("{}:{}".format(str(k), v) for k, v in sorted( self.items(), key=lambda x: x[0].mass()) if v > 0) __str__ = serialize @classmethod def parse(cls, string): inst = cls() tokens = string[1:-1].split('; ') for token in tokens: residue, count = token.split(":") inst[from_iupac_lite(residue)] = int(count) return inst def _derivatized(self, substituent, id_base): n = 2 for k, v in self.items(): if k.node_type is Substituent.node_type: n -= v self._composition_offset += ( substituent.total_composition() - substituent.attachment_composition_loss() * 2) * n if self._reducing_end is not None: _derivatize_reducing_end(self._reducing_end, substituent, id_base) self._mass = None def _strip_derivatization(self): self._composition_offset = Composition("H2O") if self._reducing_end is not None: _strip_derivatization_reducing_end(self._reducing_end) self._mass = None
class GlycanComposition(dict, SaccharideCollection): """ Describe a glycan as a collection of :class:`MonosaccharideResidue` counts without explicit linkage information relating how each monosaccharide is connected to its neighbors. This class subclasses |dict|, and assumes that keys will either be :class:`MonosaccharideResidue` instances, :class:`SubstituentResidue` instances, or strings in `iupac_lite` format which will be parsed into one of these types. While other types may be used, this is not recommended. All standard |dict| methods are supported. |GlycanComposition| objects may be derivatized just as |Glycan| objects are, with :func:`glypy.composition.composition_transform.derivatize` and :func:`glypy.composition.composition_transform.strip_derivatization`. GlycanComposition objects also support composition arithmetic, and can be added or subtracted from each other or multiplied by an integer. As GlycanComposition is not a complete structure, they cannot be translated into text formats as full |Glycan| objects are. They may instead be converted to and from a short-form text notation using :meth:`GlycanComposition.serialize` and reconstructed from this format using :meth:`GlycanComposition.parse`. Attributes ---------- reducing_end : |ReducingEnd| Describe the reducing end of the aggregate without binding it to a specific monosaccharide. This will contribute to composition and mass calculations. _composition_offset: |Composition| Account for the one water molecule's worth of composition left over from applying the "residue" transformation to each monosaccharide in the aggregate. """ @classmethod def from_glycan(cls, glycan): """ Convert a |Glycan| into a |GlycanComposition|. Parameters ---------- glycan : Glycan The instance to be converted Returns ------- GlycanComposition """ inst = cls() glycan = tree(glycan) inst.extend(glycan) inst.reducing_end = glycan.reducing_end deriv = has_derivatization(glycan.root) if deriv: inst._composition_offset += ( deriv.total_composition() - deriv.attachment_composition_loss()) * 2 return inst def __init__(self, *args, **kwargs): self._reducing_end = None dict.__init__(self) self._mass = None self._charge = None self._composition_offset = Composition("H2O") self.update(*args, **kwargs) try: template = args[0] except IndexError: template = None if template is not None and isinstance(template, GlycanComposition): reduced = template.reducing_end if reduced is not None: self.reducing_end = reduced.clone() self._composition_offset = template._composition_offset.clone() def __setitem__(self, key, value): """ Set the quantity of `key` to `value` If `key` is a string, it will be passed through :func:`from_iupac_lite` If `key` has a reducing end value, that reducing end will be set on `self` Parameters ---------- key : str, MonosaccharideResidue, SubstituentResidue, or MolecularComposition The entity to store value : int The value to store """ if isinstance(key, basestring): key = from_iupac_lite(key) if key.node_type is Monosaccharide.node_type and key.reducing_end is not None: self.reducing_end = key.reducing_end key = key.clone() key.reducing_end = None dict.__setitem__(self, key, int(value)) self._mass = None def __getitem__(self, key): """ Get the quantity of `key` If `key` is a string, it will be passed through :func:`from_iupac_lite` If `key` has a reducing end value, that reducing end will be set on `self` Parameters ---------- key : str, MonosaccharideResidue, SubstituentResidue, or MolecularComposition The entity to store Returns ------- int """ if isinstance(key, basestring): key = from_iupac_lite(key) try: return dict.__getitem__(self, key) except KeyError: return 0 def __delitem__(self, key): if isinstance(key, basestring): key = from_iupac_lite(key) dict.__delitem__(self, key) self._mass = None def mass(self, average=False, charge=0, mass_data=None): if self._mass is not None and charge == self._charge: return self._mass if charge == 0: mass = self._composition_offset.mass for residue_type, count in list(self.items()): mass += residue_type.mass(average=average, charge=0, mass_data=mass_data) * count if self._reducing_end is not None: mass += self._reducing_end.mass(average=average, charge=0, mass_data=mass_data) self._mass = mass self._charge = 0 else: mass = self.total_composition().calc_mass(average=average, charge=charge, mass_data=mass_data) self._mass = mass self._charge = charge return mass def update(self, *args, **kwargs): if len(args) == 1: if isinstance(args[0], Mapping): args = list(args) for name, count in args[0].items(): if count != 0: self[name] = count else: for name, count in args: if count != 0: self[name] = count for name, count in kwargs.items(): if count != 0: self[name] = count self._mass = None def extend(self, *args): if not isinstance(args[0], MonosaccharideResidue): if isinstance(args[0], (Monosaccharide)): args = map(MonosaccharideResidue.from_monosaccharide, args) elif isinstance(args[0], Glycan): args = map( MonosaccharideResidue.from_monosaccharide, [node for node in args[0] if node.node_type is MonosaccharideResidue.node_type]) else: raise TypeError( "Can't convert {} to MonosaccharideResidue".format( type(args[0]))) for residue in args: self[residue] += 1 def __iadd__(self, other): for elem, cnt in (other.items()): self[elem] += cnt return self def __add__(self, other): result = self.clone() for elem, cnt in other.items(): result[elem] += cnt return result def __radd__(self, other): return self + other def __isub__(self, other): for elem, cnt in other.items(): self[elem] -= cnt return self def __sub__(self, other): result = self.clone() for elem, cnt in other.items(): result[elem] -= cnt return result def __rsub__(self, other): return (self - other) * (-1) def __mul__(self, other): if not isinstance(other, int): raise TypeError( 'Cannot multiply Composition by non-integer', other) prod = {} for k, v in self.items(): prod[k] = v * other return GlycanComposition(prod) def __rmul__(self, other): return self * other def __eq__(self, other): if not isinstance(other, dict): return False self_items = set([i for i in self.items() if i[1]]) other_items = set([i for i in other.items() if i[1]]) return self_items == other_items def __neg__(self): return -1 * self def __missing__(self, key): return 0 def __contains__(self, key): if isinstance(key, basestring): key = from_iupac_lite(key) return dict.__contains__(self, key) def drop_stems(self): for t in self: drop_stem(t) self.collapse() def drop_positions(self): for t in self: drop_positions(t) self.collapse() def drop_configurations(self): for t in self: drop_configuration(t) self.collapse() def total_composition(self): comp = self._composition_offset.clone() for residue, count in self.items(): comp += residue.total_composition() * count if self._reducing_end is not None: comp += self._reducing_end.total_composition() return comp def collapse(self): ''' Merge redundant keys. After performing a structure-detail removing operation like :meth:`drop_positions`, :meth:`drop_configurations`, or :meth:`drop_stems`, monosaccharide keys may be redundant. `collapse` will merge keys which refer to the same type of molecule. ''' items = list(self.items()) self.clear() for k, v in items: self[k] += v @property def reducing_end(self): return self._reducing_end @reducing_end.setter def reducing_end(self, value): self._mass = None self._reducing_end = value def set_reducing_end(self, value): self._mass = None self._reducing_end = value @property def composition_offset(self): return self._composition_offset @composition_offset.setter def composition_offset(self, value): self._mass = None self._composition_offset = value def clone(self, propogate_composition_offset=True): dup = self.__class__(self) if not propogate_composition_offset: dup.composition_offset = Composition('H2O') return dup def serialize(self): form = "{%s}" % '; '.join("{}:{}".format(str(k), v) for k, v in sorted( self.items(), key=lambda x: x[0].mass()) if v > 0) reduced = self.reducing_end if reduced is not None: form = "%s$%s" % (form, formula(reduced.total_composition())) return form __str__ = serialize @classmethod def _get_parse_tokens(cls, string): string = str(string) parts = string.split('$') if len(parts) == 1: tokens = parts[0] reduced = None elif len(parts) == 2: tokens, reduced = parts else: raise ValueError("Could not interpret %r" % string) tokens = tokens[1:-1].split('; ') return tokens, reduced def _handle_reduction_and_derivatization(self, reduced): if reduced: reduced = ReducedEnd(Composition(reduced)) self.reducing_end = reduced deriv = None for key in self: deriv = has_derivatization(key) if deriv: break if deriv: # strip_derivatization(self) # derivatize(self, deriv) self._derivatized(deriv.clone(), make_counter(uid()), include_reducing_end=False) @classmethod def parse(cls, string): tokens, reduced = cls._get_parse_tokens(string) inst = cls() for token in tokens: try: residue, count = token.split(":") except ValueError: if string == "{}": return inst else: raise ValueError("Malformed Token, %s" % (token,)) inst[from_iupac_lite(residue)] = int(count) inst._handle_reduction_and_derivatization(reduced) return inst def _derivatized(self, substituent, id_base, include_reducing_end=True): n = 2 for k, v in self.items(): if k.node_type is Substituent.node_type: n -= v self._composition_offset += ( substituent.total_composition() - substituent.attachment_composition_loss() * 2) * n if self._reducing_end is not None and include_reducing_end: _derivatize_reducing_end(self._reducing_end, substituent, id_base) self.collapse() self._invalidate() def _strip_derivatization(self): self._composition_offset = Composition("H2O") if self._reducing_end is not None: _strip_derivatization_reducing_end(self._reducing_end) self.collapse() self._invalidate() def _invalidate(self): self._mass = None self._charge = None
def _compute_composition(self): composition = Composition() for k, v in self.counts.items(): composition += k.composition * v self.composition = composition self.mass = composition.mass
elif isinstance(other, CompoundMassShift): counts = defaultdict(int, self.counts) for k, v in other.counts.items(): counts[k] += v return self.__class__(counts) else: return NotImplemented def __mul__(self, i): if self.composition == {}: return self if isinstance(i, int): counts = defaultdict(int, self.counts) for k in counts: if k == Unmodified: continue counts[k] *= i return self.__class__(counts) else: raise TypeError("Cannot multiply MassShift by non-integer") def __repr__(self): return "MassShift(%s, %s)" % (self.name, self.composition) Unmodified = MassShift("Unmodified", Composition()) Formate = MassShift("Formate", Composition('HCOOH')) Ammonium = MassShift("Ammonium", Composition("NH3")) Sodium = MassShift("Sodium", Composition("Na")) Potassium = MassShift("Potassium", Composition("K"))
def total_composition(self): if self._total_composition is None: self._total_composition = Composition(self.formula) return self._total_composition
def convert(self): return MemoryMassShift(str(self.name), Composition(str(self.composition)))
def _strip_derivatization(self): self._composition_offset = Composition("H2O") if self._reducing_end is not None: _strip_derivatization_reducing_end(self._reducing_end) self._mass = None
def _patch_aggregate(self): offset = Composition({"H": 2, "O": 1}) self.aggregate.composition_offset -= offset
return self if isinstance(i, int): counts = defaultdict(int, self.counts) for k in counts: if k == Unmodified: continue counts[k] *= i return self.__class__(counts) else: raise TypeError("Cannot multiply MassShift by non-integer") def __repr__(self): return "MassShift(%s, %s)" % (self.name, self.composition) Unmodified = MassShift("Unmodified", Composition()) Formate = MassShift("Formate", Composition('HCOOH'), charge_carrier=1) Ammonium = MassShift("Ammonium", Composition("NH3"), Composition()) Sodium = MassShift("Sodium", Composition("Na1H-1"), charge_carrier=1) Potassium = MassShift("Potassium", Composition("K1H-1"), charge_carrier=1) class MassShiftCollection(object): def __init__(self, mass_shifts): self.mass_shifts = list(mass_shifts) self.mass_shift_map = {} self._invalidate() def _invalidate(self): self.mass_shift_map = { mass_shift.name: mass_shift for mass_shift in self.mass_shifts
def _strip_derivatization(self): self._composition_offset = Composition("H2O") if self._reducing_end is not None: _strip_derivatization_reducing_end(self._reducing_end) self.collapse() self._invalidate()
def dehydrated_mass(self, water_mass=Composition("H2O").mass): mass = self.calculated_mass return mass - (water_mass * self.count)
def dehydrated_composition(self): if self._dehydrated_composition is None: self._dehydrated_composition = self.total_composition() - ( self.count * Composition("H2O")) return self._dehydrated_composition
def handle_peptide(self, peptide): water = Composition("H2O") peptide_composition = Composition(str(peptide.formula)) obj = peptide.convert() # Handle N-linked glycosylation sites n_glycosylation_unoccupied_sites = set(peptide.n_glycosylation_sites) for site in list(n_glycosylation_unoccupied_sites): if obj[site][1]: n_glycosylation_unoccupied_sites.remove(site) for i in range(len(n_glycosylation_unoccupied_sites)): i += 1 for gc in self.glycan_combination_partitions[i, {GlycanTypes.n_glycan: i}]: total_mass = peptide.calculated_mass + gc.calculated_mass - (gc.count * water.mass) formula_string = formula(peptide_composition + Composition(str(gc.formula)) - (water * gc.count)) for site_set in limiting_combinations(n_glycosylation_unoccupied_sites, i): sequence = peptide.convert() for site in site_set: sequence.add_modification(site, _n_glycosylation.name) sequence.glycan = gc.convert() glycopeptide_sequence = str(sequence) glycopeptide = Glycopeptide( calculated_mass=total_mass, formula=formula_string, glycopeptide_sequence=glycopeptide_sequence, peptide_id=peptide.id, protein_id=peptide.protein_id, hypothesis_id=peptide.hypothesis_id, glycan_combination_id=gc.id) yield glycopeptide # Handle O-linked glycosylation sites o_glycosylation_unoccupied_sites = set(peptide.o_glycosylation_sites) for site in list(o_glycosylation_unoccupied_sites): if obj[site][1]: o_glycosylation_unoccupied_sites.remove(site) for i in range(len(o_glycosylation_unoccupied_sites)): i += 1 for gc in self.glycan_combination_partitions[i, {GlycanTypes.o_glycan: i}]: total_mass = peptide.calculated_mass + gc.calculated_mass - (gc.count * water.mass) formula_string = formula(peptide_composition + Composition(str(gc.formula)) - (water * gc.count)) for site_set in limiting_combinations(o_glycosylation_unoccupied_sites, i): sequence = peptide.convert() for site in site_set: sequence.add_modification(site, _o_glycosylation.name) sequence.glycan = gc.convert() glycopeptide_sequence = str(sequence) glycopeptide = Glycopeptide( calculated_mass=total_mass, formula=formula_string, glycopeptide_sequence=glycopeptide_sequence, peptide_id=peptide.id, protein_id=peptide.protein_id, hypothesis_id=peptide.hypothesis_id, glycan_combination_id=gc.id) yield glycopeptide # Handle GAG glycosylation sites gag_unoccupied_sites = set(peptide.gagylation_sites) for site in list(gag_unoccupied_sites): if obj[site][1]: gag_unoccupied_sites.remove(site) for i in range(len(gag_unoccupied_sites)): i += 1 for gc in self.glycan_combination_partitions[i, {GlycanTypes.gag_linker: i}]: total_mass = peptide.calculated_mass + gc.calculated_mass - (gc.count * water.mass) formula_string = formula(peptide_composition + Composition(str(gc.formula)) - (water * gc.count)) for site_set in limiting_combinations(gag_unoccupied_sites, i): sequence = peptide.convert() for site in site_set: sequence.add_modification(site, _gag_linker_glycosylation.name) sequence.glycan = gc.convert() glycopeptide_sequence = str(sequence) glycopeptide = Glycopeptide( calculated_mass=total_mass, formula=formula_string, glycopeptide_sequence=glycopeptide_sequence, peptide_id=peptide.id, protein_id=peptide.protein_id, hypothesis_id=peptide.hypothesis_id, glycan_combination_id=gc.id) yield glycopeptide
def _formula_parser(self, formula): counts = dict() for symbol, count in re.findall(r"([A-Za-z]+)(-?\d+)", formula): count = int(count) counts[symbol] = count return Composition(counts)