class GlycanComposition(dict, SaccharideCollection):
    """
    Describe a glycan  as a collection of :class:`MonosaccharideResidue` counts without
    explicit linkage information relating how each monosaccharide is connected to its neighbors.

    This class subclasses |dict|, and assumes that keys will either be :class:`MonosaccharideResidue`
    instances, :class:`SubstituentResidue` instances, or strings in `iupac_lite` format which will be parsed
    into one of these types. While other types may be used, this is not recommended. All standard |dict| methods
    are supported.

    |GlycanComposition| objects may be derivatized just as |Glycan| objects are, with
    :func:`glypy.composition.composition_transform.derivatize` and
    :func:`glypy.composition.composition_transform.strip_derivatization`.

    GlycanComposition objects also support composition arithmetic, and can be added or subtracted from each other
    or multiplied by an integer.

    As GlycanComposition is not a complete structure, they cannot be translated into text formats as
    full |Glycan| objects are. They may instead be converted to and from a short-form text notation using
    :meth:`GlycanComposition.serialize` and reconstructed from this format using :meth:`GlycanComposition.parse`.

    Attributes
    ----------
    reducing_end : |ReducingEnd|
        Describe the reducing end of the aggregate without binding it to a specific monosaccharide.
        This will contribute to composition and mass calculations.
    _composition_offset: |Composition|
        Account for the one water molecule's worth of composition left over from applying the "residue"
        transformation to each monosaccharide in the aggregate.
    """
    @classmethod
    def from_glycan(cls, glycan):
        """
        Convert a |Glycan| into a |GlycanComposition|.

        Parameters
        ----------
        glycan : Glycan
            The instance to be converted

        Returns
        -------
        GlycanComposition
        """
        inst = cls()
        glycan = tree(glycan)
        inst.extend(glycan)
        inst.reducing_end = glycan.reducing_end
        deriv = has_derivatization(glycan.root)
        if deriv:
            inst._composition_offset += (
                deriv.total_composition() - deriv.attachment_composition_loss()) * 2
        return inst

    def __init__(self, *args, **kwargs):
        self._reducing_end = None
        dict.__init__(self)
        self._mass = None
        self._charge = None
        self._composition_offset = Composition("H2O")
        self.update(*args, **kwargs)
        try:
            template = args[0]
        except IndexError:
            template = None
        if template is not None and isinstance(template, GlycanComposition):
            reduced = template.reducing_end
            if reduced is not None:
                self.reducing_end = reduced.clone()
            self._composition_offset = template._composition_offset.clone()

    def __setitem__(self, key, value):
        """
        Set the quantity of `key` to `value`

        If `key` is a string, it will be passed through :func:`from_iupac_lite`

        If `key` has a reducing end value, that reducing end will be set on `self`

        Parameters
        ----------
        key : str, MonosaccharideResidue, SubstituentResidue, or MolecularComposition
            The entity to store
        value : int
            The value to store
        """
        if isinstance(key, basestring):
            key = from_iupac_lite(key)
        if key.node_type is Monosaccharide.node_type and key.reducing_end is not None:
            self.reducing_end = key.reducing_end
            key = key.clone()
            key.reducing_end = None
        dict.__setitem__(self, key, int(value))
        self._mass = None

    def __getitem__(self, key):
        """
        Get the quantity of `key`

        If `key` is a string, it will be passed through :func:`from_iupac_lite`

        If `key` has a reducing end value, that reducing end will be set on `self`

        Parameters
        ----------
        key : str, MonosaccharideResidue, SubstituentResidue, or MolecularComposition
            The entity to store

        Returns
        -------
        int
        """
        if isinstance(key, basestring):
            key = from_iupac_lite(key)
        try:
            return dict.__getitem__(self, key)
        except KeyError:
            return 0

    def __delitem__(self, key):
        if isinstance(key, basestring):
            key = from_iupac_lite(key)
        dict.__delitem__(self, key)
        self._mass = None

    def mass(self, average=False, charge=0, mass_data=None):
        if self._mass is not None and charge == self._charge:
            return self._mass
        if charge == 0:
            mass = self._composition_offset.mass
            for residue_type, count in list(self.items()):
                mass += residue_type.mass(average=average, charge=0, mass_data=mass_data) * count
            if self._reducing_end is not None:
                mass += self._reducing_end.mass(average=average, charge=0, mass_data=mass_data)
            self._mass = mass
            self._charge = 0
        else:
            mass = self.total_composition().calc_mass(average=average, charge=charge, mass_data=mass_data)
            self._mass = mass
            self._charge = charge
        return mass

    def update(self, *args, **kwargs):
        if len(args) == 1:
            if isinstance(args[0], Mapping):
                args = list(args)
                for name, count in args[0].items():
                    if count != 0:
                        self[name] = count
            else:
                for name, count in args:
                    if count != 0:
                        self[name] = count
        for name, count in kwargs.items():
            if count != 0:
                self[name] = count
        self._mass = None

    def extend(self, *args):
        if not isinstance(args[0], MonosaccharideResidue):
            if isinstance(args[0], (Monosaccharide)):
                args = map(MonosaccharideResidue.from_monosaccharide, args)
            elif isinstance(args[0], Glycan):
                args = map(
                    MonosaccharideResidue.from_monosaccharide,
                    [node for node in args[0] if node.node_type is MonosaccharideResidue.node_type])
            else:
                raise TypeError(
                    "Can't convert {} to MonosaccharideResidue".format(
                        type(args[0])))
        for residue in args:
            self[residue] += 1

    def __iadd__(self, other):
        for elem, cnt in (other.items()):
            self[elem] += cnt
        return self

    def __add__(self, other):
        result = self.clone()
        for elem, cnt in other.items():
            result[elem] += cnt
        return result

    def __radd__(self, other):
        return self + other

    def __isub__(self, other):
        for elem, cnt in other.items():
            self[elem] -= cnt
        return self

    def __sub__(self, other):
        result = self.clone()
        for elem, cnt in other.items():
            result[elem] -= cnt
        return result

    def __rsub__(self, other):
        return (self - other) * (-1)

    def __mul__(self, other):
        if not isinstance(other, int):
            raise TypeError(
                'Cannot multiply Composition by non-integer',
                other)
        prod = {}
        for k, v in self.items():
            prod[k] = v * other

        return GlycanComposition(prod)

    def __rmul__(self, other):
        return self * other

    def __eq__(self, other):
        if not isinstance(other, dict):
            return False
        self_items = set([i for i in self.items() if i[1]])
        other_items = set([i for i in other.items() if i[1]])
        return self_items == other_items

    def __neg__(self):
        return -1 * self

    def __missing__(self, key):
        return 0

    def __contains__(self, key):
        if isinstance(key, basestring):
            key = from_iupac_lite(key)
        return dict.__contains__(self, key)

    def drop_stems(self):
        for t in self:
            drop_stem(t)
        self.collapse()

    def drop_positions(self):
        for t in self:
            drop_positions(t)
        self.collapse()

    def drop_configurations(self):
        for t in self:
            drop_configuration(t)
        self.collapse()

    def total_composition(self):
        comp = self._composition_offset.clone()
        for residue, count in self.items():
            comp += residue.total_composition() * count
        if self._reducing_end is not None:
            comp += self._reducing_end.total_composition()
        return comp

    def collapse(self):
        '''
        Merge redundant keys.

        After performing a structure-detail removing operation like
        :meth:`drop_positions`, :meth:`drop_configurations`, or :meth:`drop_stems`,
        monosaccharide keys may be redundant.

        `collapse` will merge keys which refer to the same type of molecule.
        '''
        items = list(self.items())
        self.clear()
        for k, v in items:
            self[k] += v

    @property
    def reducing_end(self):
        return self._reducing_end

    @reducing_end.setter
    def reducing_end(self, value):
        self._mass = None
        self._reducing_end = value

    def set_reducing_end(self, value):
        self._mass = None
        self._reducing_end = value

    @property
    def composition_offset(self):
        return self._composition_offset

    @composition_offset.setter
    def composition_offset(self, value):
        self._mass = None
        self._composition_offset = value

    def clone(self, propogate_composition_offset=True):
        dup = self.__class__(self)
        if not propogate_composition_offset:
            dup.composition_offset = Composition('H2O')
        return dup

    def serialize(self):
        form = "{%s}" % '; '.join("{}:{}".format(str(k), v) for k, v in sorted(
            self.items(), key=lambda x: x[0].mass()) if v > 0)
        reduced = self.reducing_end
        if reduced is not None:
            form = "%s$%s" % (form, formula(reduced.total_composition()))
        return form

    __str__ = serialize

    @classmethod
    def _get_parse_tokens(cls, string):
        string = str(string)
        parts = string.split('$')
        if len(parts) == 1:
            tokens = parts[0]
            reduced = None
        elif len(parts) == 2:
            tokens, reduced = parts
        else:
            raise ValueError("Could not interpret %r" % string)
        tokens = tokens[1:-1].split('; ')
        return tokens, reduced

    def _handle_reduction_and_derivatization(self, reduced):
        if reduced:
            reduced = ReducedEnd(Composition(reduced))
            self.reducing_end = reduced
        deriv = None
        for key in self:
            deriv = has_derivatization(key)
            if deriv:
                break
        if deriv:
            # strip_derivatization(self)
            # derivatize(self, deriv)
            self._derivatized(deriv.clone(), make_counter(uid()), include_reducing_end=False)

    @classmethod
    def parse(cls, string):
        tokens, reduced = cls._get_parse_tokens(string)
        inst = cls()
        for token in tokens:
            try:
                residue, count = token.split(":")
            except ValueError:
                if string == "{}":
                    return inst
                else:
                    raise ValueError("Malformed Token, %s" % (token,))
            inst[from_iupac_lite(residue)] = int(count)
        inst._handle_reduction_and_derivatization(reduced)
        return inst

    def _derivatized(self, substituent, id_base, include_reducing_end=True):
        n = 2
        for k, v in self.items():
            if k.node_type is Substituent.node_type:
                n -= v
        self._composition_offset += (
            substituent.total_composition() -
            substituent.attachment_composition_loss() * 2) * n
        if self._reducing_end is not None and include_reducing_end:
            _derivatize_reducing_end(self._reducing_end, substituent, id_base)
        self.collapse()
        self._invalidate()

    def _strip_derivatization(self):
        self._composition_offset = Composition("H2O")
        if self._reducing_end is not None:
            _strip_derivatization_reducing_end(self._reducing_end)
        self.collapse()
        self._invalidate()

    def _invalidate(self):
        self._mass = None
        self._charge = None
Example #2
0
class GlycanComposition(dict, SaccharideCollection):

    @classmethod
    def from_glycan(cls, glycan):
        inst = cls()
        glycan = tree(glycan)
        inst.extend(glycan)
        inst.reducing_end = glycan.reducing_end
        deriv = has_derivatization(glycan.root)
        if deriv:
            inst._composition_offset += (
                deriv.total_composition() - deriv.attachment_composition_loss()) * 2
        return inst

    def __init__(self, *args, **kwargs):
        self._reducing_end = kwargs.pop("reducing_end", None)
        dict.__init__(self)
        self._mass = None
        self._charge = None
        self._composition_offset = Composition("H2O")
        self.update(*args, **kwargs)

    def __setitem__(self, key, value):
        if isinstance(key, basestring):
            key = from_iupac_lite(key)
        if key.node_type is Monosaccharide.node_type and key.reducing_end is not None:
            self.reducing_end = key.reducing_end
            key.reducing_end = None
        dict.__setitem__(self, key, value)
        self._mass = None

    def __getitem__(self, key):
        if isinstance(key, basestring):
            key = from_iupac_lite(key)
        return dict.__getitem__(self, key)

    def __delitem__(self, key):
        if isinstance(key, basestring):
            key = from_iupac_lite(key)
        dict.__delitem__(self, key)
        self._mass = None

    def mass(self, average=False, charge=0, mass_data=None):
        if self._mass is not None and charge == self._charge:
            return self._mass
        if charge == 0:
            mass = self._composition_offset.mass
            for residue_type, count in list(self.items()):
                mass += residue_type.mass(average=average, charge=0, mass_data=mass_data) * count
            if self._reducing_end is not None:
                mass += self._reducing_end.mass(average=average, charge=0, mass_data=mass_data)
            self._mass = mass
            self._charge = 0
        else:
            mass = self.total_composition().calc_mass(average=average, charge=charge, mass_data=mass_data)
            self._mass = mass
            self._charge = charge
        return mass

    def update(self, *args, **kwargs):
        if len(args) == 1:
            if isinstance(args[0], dict):
                args = list(args)
                for name, count in args[0].items():
                    if count != 0:
                        self[name] = count
            else:
                for name, count in args:
                    if count != 0:
                        self[name] = count
        for name, count in kwargs.items():
            if count != 0:
                self[name] = count
        self._mass = None

    def extend(self, *args):
        if not isinstance(args[0], MonosaccharideResidue):
            if isinstance(args[0], (Monosaccharide)):
                args = map(MonosaccharideResidue.from_monosaccharide, args)
            elif isinstance(args[0], Glycan):
                args = map(
                    MonosaccharideResidue.from_monosaccharide,
                    [node for node in args[0] if node.node_type is MonosaccharideResidue.node_type])
            else:
                raise TypeError(
                    "Can't convert {} to MonosaccharideResidue".format(
                        type(args[0])))
        for residue in args:
            self[residue] += 1

    def __iadd__(self, other):
        for elem, cnt in (other.items()):
            self[elem] += cnt
        return self

    def __add__(self, other):
        result = self.clone()
        for elem, cnt in other.items():
            result[elem] += cnt
        return result

    def __radd__(self, other):
        return self + other

    def __isub__(self, other):
        for elem, cnt in other.items():
            self[elem] -= cnt
        return self

    def __sub__(self, other):
        result = self.clone()
        for elem, cnt in other.items():
            result[elem] -= cnt
        return result

    def __rsub__(self, other):
        return (self - other) * (-1)

    def __mul__(self, other):
        if not isinstance(other, int):
            raise TypeError(
                'Cannot multiply Composition by non-integer',
                other)
        prod = {}
        for k, v in self.items():
            prod[k] = v * other

        return GlycanComposition(prod)

    def __rmul__(self, other):
        return self * other

    def __eq__(self, other):
        if not isinstance(other, dict):
            return False
        self_items = set([i for i in self.items() if i[1]])
        other_items = set([i for i in other.items() if i[1]])
        return self_items == other_items

    def __neg__(self):
        return -1 * self

    def __missing__(self, key):
        return 0

    def __contains__(self, key):
        if isinstance(key, basestring):
            key = from_iupac_lite(key)
        return dict.__contains__(self, key)

    def drop_stems(self):
        for t in self:
            drop_stem(t)
        return self

    def drop_positions(self):
        for t in self:
            drop_positions(t)
        return self

    def drop_configurations(self):
        for t in self:
            drop_configuration(t)

    def total_composition(self):
        comp = self._composition_offset.clone()
        for residue, count in self.items():
            comp += residue.total_composition() * count
        if self._reducing_end is not None:
            comp += self._reducing_end.total_composition()
        return comp

    def collapse(self):
        '''
        Merge redundant keys.

        After performing a structure-detail removing operation like
        :meth:`drop_positions`, :meth:`drop_configurations`, or :meth:`drop_stems`,
        monosaccharide keys may be redundant.

        `collapse` will merge keys which refer to the same type of molecule.
        '''
        items = list(self.items())
        self.clear()
        for k, v in items:
            self[k] += v

    @property
    def reducing_end(self):
        return self._reducing_end

    @reducing_end.setter
    def reducing_end(self, value):
        self._mass = None
        self._reducing_end = value

    def set_reducing_end(self, value):
        self._mass = None
        self._reducing_end = value

    @property
    def composition_offset(self):
        return self._composition_offset

    @composition_offset.setter
    def composition_offset(self, value):
        self._mass = None
        self._composition_offset = value

    def clone(self):
        return self.__class__(self)

    def serialize(self):
        return "{%s}" % '; '.join("{}:{}".format(str(k), v) for k, v in sorted(
            self.items(), key=lambda x: x[0].mass()) if v > 0)

    __str__ = serialize

    @classmethod
    def parse(cls, string):
        inst = cls()
        tokens = string[1:-1].split('; ')
        for token in tokens:
            residue, count = token.split(":")
            inst[from_iupac_lite(residue)] = int(count)
        return inst

    def _derivatized(self, substituent, id_base):
        n = 2
        for k, v in self.items():
            if k.node_type is Substituent.node_type:
                n -= v
        self._composition_offset += (
            substituent.total_composition() -
            substituent.attachment_composition_loss() * 2) * n
        if self._reducing_end is not None:
            _derivatize_reducing_end(self._reducing_end, substituent, id_base)
        self._mass = None

    def _strip_derivatization(self):
        self._composition_offset = Composition("H2O")
        if self._reducing_end is not None:
            _strip_derivatization_reducing_end(self._reducing_end)
        self._mass = None