class GlycanComposition(dict, SaccharideCollection): """ Describe a glycan as a collection of :class:`MonosaccharideResidue` counts without explicit linkage information relating how each monosaccharide is connected to its neighbors. This class subclasses |dict|, and assumes that keys will either be :class:`MonosaccharideResidue` instances, :class:`SubstituentResidue` instances, or strings in `iupac_lite` format which will be parsed into one of these types. While other types may be used, this is not recommended. All standard |dict| methods are supported. |GlycanComposition| objects may be derivatized just as |Glycan| objects are, with :func:`glypy.composition.composition_transform.derivatize` and :func:`glypy.composition.composition_transform.strip_derivatization`. GlycanComposition objects also support composition arithmetic, and can be added or subtracted from each other or multiplied by an integer. As GlycanComposition is not a complete structure, they cannot be translated into text formats as full |Glycan| objects are. They may instead be converted to and from a short-form text notation using :meth:`GlycanComposition.serialize` and reconstructed from this format using :meth:`GlycanComposition.parse`. Attributes ---------- reducing_end : |ReducingEnd| Describe the reducing end of the aggregate without binding it to a specific monosaccharide. This will contribute to composition and mass calculations. _composition_offset: |Composition| Account for the one water molecule's worth of composition left over from applying the "residue" transformation to each monosaccharide in the aggregate. """ @classmethod def from_glycan(cls, glycan): """ Convert a |Glycan| into a |GlycanComposition|. Parameters ---------- glycan : Glycan The instance to be converted Returns ------- GlycanComposition """ inst = cls() glycan = tree(glycan) inst.extend(glycan) inst.reducing_end = glycan.reducing_end deriv = has_derivatization(glycan.root) if deriv: inst._composition_offset += ( deriv.total_composition() - deriv.attachment_composition_loss()) * 2 return inst def __init__(self, *args, **kwargs): self._reducing_end = None dict.__init__(self) self._mass = None self._charge = None self._composition_offset = Composition("H2O") self.update(*args, **kwargs) try: template = args[0] except IndexError: template = None if template is not None and isinstance(template, GlycanComposition): reduced = template.reducing_end if reduced is not None: self.reducing_end = reduced.clone() self._composition_offset = template._composition_offset.clone() def __setitem__(self, key, value): """ Set the quantity of `key` to `value` If `key` is a string, it will be passed through :func:`from_iupac_lite` If `key` has a reducing end value, that reducing end will be set on `self` Parameters ---------- key : str, MonosaccharideResidue, SubstituentResidue, or MolecularComposition The entity to store value : int The value to store """ if isinstance(key, basestring): key = from_iupac_lite(key) if key.node_type is Monosaccharide.node_type and key.reducing_end is not None: self.reducing_end = key.reducing_end key = key.clone() key.reducing_end = None dict.__setitem__(self, key, int(value)) self._mass = None def __getitem__(self, key): """ Get the quantity of `key` If `key` is a string, it will be passed through :func:`from_iupac_lite` If `key` has a reducing end value, that reducing end will be set on `self` Parameters ---------- key : str, MonosaccharideResidue, SubstituentResidue, or MolecularComposition The entity to store Returns ------- int """ if isinstance(key, basestring): key = from_iupac_lite(key) try: return dict.__getitem__(self, key) except KeyError: return 0 def __delitem__(self, key): if isinstance(key, basestring): key = from_iupac_lite(key) dict.__delitem__(self, key) self._mass = None def mass(self, average=False, charge=0, mass_data=None): if self._mass is not None and charge == self._charge: return self._mass if charge == 0: mass = self._composition_offset.mass for residue_type, count in list(self.items()): mass += residue_type.mass(average=average, charge=0, mass_data=mass_data) * count if self._reducing_end is not None: mass += self._reducing_end.mass(average=average, charge=0, mass_data=mass_data) self._mass = mass self._charge = 0 else: mass = self.total_composition().calc_mass(average=average, charge=charge, mass_data=mass_data) self._mass = mass self._charge = charge return mass def update(self, *args, **kwargs): if len(args) == 1: if isinstance(args[0], Mapping): args = list(args) for name, count in args[0].items(): if count != 0: self[name] = count else: for name, count in args: if count != 0: self[name] = count for name, count in kwargs.items(): if count != 0: self[name] = count self._mass = None def extend(self, *args): if not isinstance(args[0], MonosaccharideResidue): if isinstance(args[0], (Monosaccharide)): args = map(MonosaccharideResidue.from_monosaccharide, args) elif isinstance(args[0], Glycan): args = map( MonosaccharideResidue.from_monosaccharide, [node for node in args[0] if node.node_type is MonosaccharideResidue.node_type]) else: raise TypeError( "Can't convert {} to MonosaccharideResidue".format( type(args[0]))) for residue in args: self[residue] += 1 def __iadd__(self, other): for elem, cnt in (other.items()): self[elem] += cnt return self def __add__(self, other): result = self.clone() for elem, cnt in other.items(): result[elem] += cnt return result def __radd__(self, other): return self + other def __isub__(self, other): for elem, cnt in other.items(): self[elem] -= cnt return self def __sub__(self, other): result = self.clone() for elem, cnt in other.items(): result[elem] -= cnt return result def __rsub__(self, other): return (self - other) * (-1) def __mul__(self, other): if not isinstance(other, int): raise TypeError( 'Cannot multiply Composition by non-integer', other) prod = {} for k, v in self.items(): prod[k] = v * other return GlycanComposition(prod) def __rmul__(self, other): return self * other def __eq__(self, other): if not isinstance(other, dict): return False self_items = set([i for i in self.items() if i[1]]) other_items = set([i for i in other.items() if i[1]]) return self_items == other_items def __neg__(self): return -1 * self def __missing__(self, key): return 0 def __contains__(self, key): if isinstance(key, basestring): key = from_iupac_lite(key) return dict.__contains__(self, key) def drop_stems(self): for t in self: drop_stem(t) self.collapse() def drop_positions(self): for t in self: drop_positions(t) self.collapse() def drop_configurations(self): for t in self: drop_configuration(t) self.collapse() def total_composition(self): comp = self._composition_offset.clone() for residue, count in self.items(): comp += residue.total_composition() * count if self._reducing_end is not None: comp += self._reducing_end.total_composition() return comp def collapse(self): ''' Merge redundant keys. After performing a structure-detail removing operation like :meth:`drop_positions`, :meth:`drop_configurations`, or :meth:`drop_stems`, monosaccharide keys may be redundant. `collapse` will merge keys which refer to the same type of molecule. ''' items = list(self.items()) self.clear() for k, v in items: self[k] += v @property def reducing_end(self): return self._reducing_end @reducing_end.setter def reducing_end(self, value): self._mass = None self._reducing_end = value def set_reducing_end(self, value): self._mass = None self._reducing_end = value @property def composition_offset(self): return self._composition_offset @composition_offset.setter def composition_offset(self, value): self._mass = None self._composition_offset = value def clone(self, propogate_composition_offset=True): dup = self.__class__(self) if not propogate_composition_offset: dup.composition_offset = Composition('H2O') return dup def serialize(self): form = "{%s}" % '; '.join("{}:{}".format(str(k), v) for k, v in sorted( self.items(), key=lambda x: x[0].mass()) if v > 0) reduced = self.reducing_end if reduced is not None: form = "%s$%s" % (form, formula(reduced.total_composition())) return form __str__ = serialize @classmethod def _get_parse_tokens(cls, string): string = str(string) parts = string.split('$') if len(parts) == 1: tokens = parts[0] reduced = None elif len(parts) == 2: tokens, reduced = parts else: raise ValueError("Could not interpret %r" % string) tokens = tokens[1:-1].split('; ') return tokens, reduced def _handle_reduction_and_derivatization(self, reduced): if reduced: reduced = ReducedEnd(Composition(reduced)) self.reducing_end = reduced deriv = None for key in self: deriv = has_derivatization(key) if deriv: break if deriv: # strip_derivatization(self) # derivatize(self, deriv) self._derivatized(deriv.clone(), make_counter(uid()), include_reducing_end=False) @classmethod def parse(cls, string): tokens, reduced = cls._get_parse_tokens(string) inst = cls() for token in tokens: try: residue, count = token.split(":") except ValueError: if string == "{}": return inst else: raise ValueError("Malformed Token, %s" % (token,)) inst[from_iupac_lite(residue)] = int(count) inst._handle_reduction_and_derivatization(reduced) return inst def _derivatized(self, substituent, id_base, include_reducing_end=True): n = 2 for k, v in self.items(): if k.node_type is Substituent.node_type: n -= v self._composition_offset += ( substituent.total_composition() - substituent.attachment_composition_loss() * 2) * n if self._reducing_end is not None and include_reducing_end: _derivatize_reducing_end(self._reducing_end, substituent, id_base) self.collapse() self._invalidate() def _strip_derivatization(self): self._composition_offset = Composition("H2O") if self._reducing_end is not None: _strip_derivatization_reducing_end(self._reducing_end) self.collapse() self._invalidate() def _invalidate(self): self._mass = None self._charge = None
class GlycanComposition(dict, SaccharideCollection): @classmethod def from_glycan(cls, glycan): inst = cls() glycan = tree(glycan) inst.extend(glycan) inst.reducing_end = glycan.reducing_end deriv = has_derivatization(glycan.root) if deriv: inst._composition_offset += ( deriv.total_composition() - deriv.attachment_composition_loss()) * 2 return inst def __init__(self, *args, **kwargs): self._reducing_end = kwargs.pop("reducing_end", None) dict.__init__(self) self._mass = None self._charge = None self._composition_offset = Composition("H2O") self.update(*args, **kwargs) def __setitem__(self, key, value): if isinstance(key, basestring): key = from_iupac_lite(key) if key.node_type is Monosaccharide.node_type and key.reducing_end is not None: self.reducing_end = key.reducing_end key.reducing_end = None dict.__setitem__(self, key, value) self._mass = None def __getitem__(self, key): if isinstance(key, basestring): key = from_iupac_lite(key) return dict.__getitem__(self, key) def __delitem__(self, key): if isinstance(key, basestring): key = from_iupac_lite(key) dict.__delitem__(self, key) self._mass = None def mass(self, average=False, charge=0, mass_data=None): if self._mass is not None and charge == self._charge: return self._mass if charge == 0: mass = self._composition_offset.mass for residue_type, count in list(self.items()): mass += residue_type.mass(average=average, charge=0, mass_data=mass_data) * count if self._reducing_end is not None: mass += self._reducing_end.mass(average=average, charge=0, mass_data=mass_data) self._mass = mass self._charge = 0 else: mass = self.total_composition().calc_mass(average=average, charge=charge, mass_data=mass_data) self._mass = mass self._charge = charge return mass def update(self, *args, **kwargs): if len(args) == 1: if isinstance(args[0], dict): args = list(args) for name, count in args[0].items(): if count != 0: self[name] = count else: for name, count in args: if count != 0: self[name] = count for name, count in kwargs.items(): if count != 0: self[name] = count self._mass = None def extend(self, *args): if not isinstance(args[0], MonosaccharideResidue): if isinstance(args[0], (Monosaccharide)): args = map(MonosaccharideResidue.from_monosaccharide, args) elif isinstance(args[0], Glycan): args = map( MonosaccharideResidue.from_monosaccharide, [node for node in args[0] if node.node_type is MonosaccharideResidue.node_type]) else: raise TypeError( "Can't convert {} to MonosaccharideResidue".format( type(args[0]))) for residue in args: self[residue] += 1 def __iadd__(self, other): for elem, cnt in (other.items()): self[elem] += cnt return self def __add__(self, other): result = self.clone() for elem, cnt in other.items(): result[elem] += cnt return result def __radd__(self, other): return self + other def __isub__(self, other): for elem, cnt in other.items(): self[elem] -= cnt return self def __sub__(self, other): result = self.clone() for elem, cnt in other.items(): result[elem] -= cnt return result def __rsub__(self, other): return (self - other) * (-1) def __mul__(self, other): if not isinstance(other, int): raise TypeError( 'Cannot multiply Composition by non-integer', other) prod = {} for k, v in self.items(): prod[k] = v * other return GlycanComposition(prod) def __rmul__(self, other): return self * other def __eq__(self, other): if not isinstance(other, dict): return False self_items = set([i for i in self.items() if i[1]]) other_items = set([i for i in other.items() if i[1]]) return self_items == other_items def __neg__(self): return -1 * self def __missing__(self, key): return 0 def __contains__(self, key): if isinstance(key, basestring): key = from_iupac_lite(key) return dict.__contains__(self, key) def drop_stems(self): for t in self: drop_stem(t) return self def drop_positions(self): for t in self: drop_positions(t) return self def drop_configurations(self): for t in self: drop_configuration(t) def total_composition(self): comp = self._composition_offset.clone() for residue, count in self.items(): comp += residue.total_composition() * count if self._reducing_end is not None: comp += self._reducing_end.total_composition() return comp def collapse(self): ''' Merge redundant keys. After performing a structure-detail removing operation like :meth:`drop_positions`, :meth:`drop_configurations`, or :meth:`drop_stems`, monosaccharide keys may be redundant. `collapse` will merge keys which refer to the same type of molecule. ''' items = list(self.items()) self.clear() for k, v in items: self[k] += v @property def reducing_end(self): return self._reducing_end @reducing_end.setter def reducing_end(self, value): self._mass = None self._reducing_end = value def set_reducing_end(self, value): self._mass = None self._reducing_end = value @property def composition_offset(self): return self._composition_offset @composition_offset.setter def composition_offset(self, value): self._mass = None self._composition_offset = value def clone(self): return self.__class__(self) def serialize(self): return "{%s}" % '; '.join("{}:{}".format(str(k), v) for k, v in sorted( self.items(), key=lambda x: x[0].mass()) if v > 0) __str__ = serialize @classmethod def parse(cls, string): inst = cls() tokens = string[1:-1].split('; ') for token in tokens: residue, count = token.split(":") inst[from_iupac_lite(residue)] = int(count) return inst def _derivatized(self, substituent, id_base): n = 2 for k, v in self.items(): if k.node_type is Substituent.node_type: n -= v self._composition_offset += ( substituent.total_composition() - substituent.attachment_composition_loss() * 2) * n if self._reducing_end is not None: _derivatize_reducing_end(self._reducing_end, substituent, id_base) self._mass = None def _strip_derivatization(self): self._composition_offset = Composition("H2O") if self._reducing_end is not None: _strip_derivatization_reducing_end(self._reducing_end) self._mass = None