Example #1
0
def main():
    logging.basicConfig(level='INFO', format="%(name)s: %(message)s")
    args = app.parse_args()
    molecular_composition_losses = [None]  # The None case for the loss-less case
    for loss in args.losses:
        logger.info("Converting loss %s -> %s", loss, Composition(loss))
        molecular_composition_losses.append(MolecularComposition(loss, Composition(loss)))
    length_range = sorted(map(int, args.chain_length_range))
    max_charge = -abs(args.max_charge)
    has_anhydromanose = bool(args.has_anhydromanose)
    gag_type = args.gag_type

    logger.info("GAG Chain Range: %d-%d" % tuple(length_range))

    mass_error_tolerance = args.mass_error_tolerance

    # reducing_end_type = args.reduced
    # if reducing_end_type:
    #     reducing_end_type = Composition(reducing_end_type)

    output_path = args.output_path
    output_format = args.output_format
    if not output_format:
        output_format = ['csv']

    pick_peaks = args.pick_peaks

    run(
        args.peaklist_path, gag_type, length_range, has_anhydromanose, molecular_composition_losses,
        None, max_charge, output_path, output_format,
        pick_peaks=pick_peaks, mass_error_tolerance=mass_error_tolerance)
Example #2
0
 def _compute_composition(self):
     composition = Composition()
     tandem_composition = Composition()
     charge_carrier = 0
     for k, v in self.counts.items():
         composition += k.composition * v
         tandem_composition += k.tandem_composition * v
         charge_carrier += k.charge_carrier * v
     self.composition = composition
     self.mass = composition.mass
     self.tandem_composition = tandem_composition
     self.tandem_mass = tandem_composition.mass
     self.charge_carrier = charge_carrier
 def _make_glycan_composition_proxy(self):
     if self.aggregate is not None:
         base = self.aggregate.clone()
     else:
         base = HashableGlycanComposition()
         # Represent the initial amide bond between the peptide
         # and the first glycan. Subsequent glycans do not need
         # further chemical losses because of the dehyration built
         # directly into the Residue abstraction.
         base.composition_offset -= Composition({"H": 2, "O": 1})
     for key, value in self.items():
         if value.rule.is_core:
             continue
         elif value.rule.is_composition:
             base += value.rule.glycan
         else:
             # Convert Glycan object into a composition, using the original
             # detatched topology to omit the "aglycone" group which represents
             # the connection between the glycan and the peptide, which penalizes
             # the composition by H2O. This H2O is lost when that bond is formed,
             # but doesn't need to be explicitly included as the loss is tracked
             # when initializing the base above.
             gc = HashableGlycanComposition.from_glycan(
                 value.rule._original)
             base += gc
     return GlycanCompositionProxy(base)
Example #4
0
 def __init__(self, *args, **kwargs):
     self._reducing_end = kwargs.pop("reducing_end", None)
     dict.__init__(self)
     self._mass = None
     self._charge = None
     self._composition_offset = Composition("H2O")
     self.update(*args, **kwargs)
 def total_composition(self):
     total = Composition()
     has_aggregate = self.aggregate is not None
     for key, value in self.items():
         if has_aggregate and value.rule.is_core:
             continue
         total += value.composition
     if has_aggregate:
         total += self.aggregate.total_composition()
     return total
Example #6
0
def validate_reduction(context, reduction_string):
    if reduction_string is None:
        return None
    try:
        if str(reduction_string).lower() in named_reductions:
            return named_reductions[str(reduction_string).lower()]
        else:
            if len(Composition(str(reduction_string))) > 0:
                return str(reduction_string)
            else:
                raise Exception("Invalid")
    except Exception:
        click.secho("Could not validate reduction '%s'" % reduction_string)
        raise click.Abort("Could not validate reduction '%s'" %
                          reduction_string)
 def __init__(self, *args, **kwargs):
     self._reducing_end = None
     dict.__init__(self)
     self._mass = None
     self._charge = None
     self._composition_offset = Composition("H2O")
     self.update(*args, **kwargs)
     try:
         template = args[0]
     except IndexError:
         template = None
     if template is not None and isinstance(template, GlycanComposition):
         reduced = template.reducing_end
         if reduced is not None:
             self.reducing_end = reduced.clone()
         self._composition_offset = template._composition_offset.clone()
Example #8
0
def validate_mass_shift(mass_shift_string, multiplicity=1):
    multiplicity = int(multiplicity)
    if mass_shift_string.lower() in mass_shifts:
        return (mass_shifts[mass_shift_string.lower()], multiplicity)
    else:
        try:
            mass_shift_string = str(mass_shift_string)
            composition = Composition(mass_shift_string)
            shift = MassShift(mass_shift_string, composition)
            return (shift, multiplicity)
        except Exception as e:
            click.secho("%r" % (e, ))
            click.secho("Could not validate mass_shift %r" %
                        (mass_shift_string, ),
                        fg='yellow')
            raise click.Abort("Could not validate mass_shift %r" %
                              (mass_shift_string, ))
Example #9
0
class GlycanComposition(dict, SaccharideCollection):

    @classmethod
    def from_glycan(cls, glycan):
        inst = cls()
        glycan = tree(glycan)
        inst.extend(glycan)
        inst.reducing_end = glycan.reducing_end
        deriv = has_derivatization(glycan.root)
        if deriv:
            inst._composition_offset += (
                deriv.total_composition() - deriv.attachment_composition_loss()) * 2
        return inst

    def __init__(self, *args, **kwargs):
        self._reducing_end = kwargs.pop("reducing_end", None)
        dict.__init__(self)
        self._mass = None
        self._charge = None
        self._composition_offset = Composition("H2O")
        self.update(*args, **kwargs)

    def __setitem__(self, key, value):
        if isinstance(key, basestring):
            key = from_iupac_lite(key)
        if key.node_type is Monosaccharide.node_type and key.reducing_end is not None:
            self.reducing_end = key.reducing_end
            key.reducing_end = None
        dict.__setitem__(self, key, value)
        self._mass = None

    def __getitem__(self, key):
        if isinstance(key, basestring):
            key = from_iupac_lite(key)
        return dict.__getitem__(self, key)

    def __delitem__(self, key):
        if isinstance(key, basestring):
            key = from_iupac_lite(key)
        dict.__delitem__(self, key)
        self._mass = None

    def mass(self, average=False, charge=0, mass_data=None):
        if self._mass is not None and charge == self._charge:
            return self._mass
        if charge == 0:
            mass = self._composition_offset.mass
            for residue_type, count in list(self.items()):
                mass += residue_type.mass(average=average, charge=0, mass_data=mass_data) * count
            if self._reducing_end is not None:
                mass += self._reducing_end.mass(average=average, charge=0, mass_data=mass_data)
            self._mass = mass
            self._charge = 0
        else:
            mass = self.total_composition().calc_mass(average=average, charge=charge, mass_data=mass_data)
            self._mass = mass
            self._charge = charge
        return mass

    def update(self, *args, **kwargs):
        if len(args) == 1:
            if isinstance(args[0], dict):
                args = list(args)
                for name, count in args[0].items():
                    if count != 0:
                        self[name] = count
            else:
                for name, count in args:
                    if count != 0:
                        self[name] = count
        for name, count in kwargs.items():
            if count != 0:
                self[name] = count
        self._mass = None

    def extend(self, *args):
        if not isinstance(args[0], MonosaccharideResidue):
            if isinstance(args[0], (Monosaccharide)):
                args = map(MonosaccharideResidue.from_monosaccharide, args)
            elif isinstance(args[0], Glycan):
                args = map(
                    MonosaccharideResidue.from_monosaccharide,
                    [node for node in args[0] if node.node_type is MonosaccharideResidue.node_type])
            else:
                raise TypeError(
                    "Can't convert {} to MonosaccharideResidue".format(
                        type(args[0])))
        for residue in args:
            self[residue] += 1

    def __iadd__(self, other):
        for elem, cnt in (other.items()):
            self[elem] += cnt
        return self

    def __add__(self, other):
        result = self.clone()
        for elem, cnt in other.items():
            result[elem] += cnt
        return result

    def __radd__(self, other):
        return self + other

    def __isub__(self, other):
        for elem, cnt in other.items():
            self[elem] -= cnt
        return self

    def __sub__(self, other):
        result = self.clone()
        for elem, cnt in other.items():
            result[elem] -= cnt
        return result

    def __rsub__(self, other):
        return (self - other) * (-1)

    def __mul__(self, other):
        if not isinstance(other, int):
            raise TypeError(
                'Cannot multiply Composition by non-integer',
                other)
        prod = {}
        for k, v in self.items():
            prod[k] = v * other

        return GlycanComposition(prod)

    def __rmul__(self, other):
        return self * other

    def __eq__(self, other):
        if not isinstance(other, dict):
            return False
        self_items = set([i for i in self.items() if i[1]])
        other_items = set([i for i in other.items() if i[1]])
        return self_items == other_items

    def __neg__(self):
        return -1 * self

    def __missing__(self, key):
        return 0

    def __contains__(self, key):
        if isinstance(key, basestring):
            key = from_iupac_lite(key)
        return dict.__contains__(self, key)

    def drop_stems(self):
        for t in self:
            drop_stem(t)
        return self

    def drop_positions(self):
        for t in self:
            drop_positions(t)
        return self

    def drop_configurations(self):
        for t in self:
            drop_configuration(t)

    def total_composition(self):
        comp = self._composition_offset.clone()
        for residue, count in self.items():
            comp += residue.total_composition() * count
        if self._reducing_end is not None:
            comp += self._reducing_end.total_composition()
        return comp

    def collapse(self):
        '''
        Merge redundant keys.

        After performing a structure-detail removing operation like
        :meth:`drop_positions`, :meth:`drop_configurations`, or :meth:`drop_stems`,
        monosaccharide keys may be redundant.

        `collapse` will merge keys which refer to the same type of molecule.
        '''
        items = list(self.items())
        self.clear()
        for k, v in items:
            self[k] += v

    @property
    def reducing_end(self):
        return self._reducing_end

    @reducing_end.setter
    def reducing_end(self, value):
        self._mass = None
        self._reducing_end = value

    def set_reducing_end(self, value):
        self._mass = None
        self._reducing_end = value

    @property
    def composition_offset(self):
        return self._composition_offset

    @composition_offset.setter
    def composition_offset(self, value):
        self._mass = None
        self._composition_offset = value

    def clone(self):
        return self.__class__(self)

    def serialize(self):
        return "{%s}" % '; '.join("{}:{}".format(str(k), v) for k, v in sorted(
            self.items(), key=lambda x: x[0].mass()) if v > 0)

    __str__ = serialize

    @classmethod
    def parse(cls, string):
        inst = cls()
        tokens = string[1:-1].split('; ')
        for token in tokens:
            residue, count = token.split(":")
            inst[from_iupac_lite(residue)] = int(count)
        return inst

    def _derivatized(self, substituent, id_base):
        n = 2
        for k, v in self.items():
            if k.node_type is Substituent.node_type:
                n -= v
        self._composition_offset += (
            substituent.total_composition() -
            substituent.attachment_composition_loss() * 2) * n
        if self._reducing_end is not None:
            _derivatize_reducing_end(self._reducing_end, substituent, id_base)
        self._mass = None

    def _strip_derivatization(self):
        self._composition_offset = Composition("H2O")
        if self._reducing_end is not None:
            _strip_derivatization_reducing_end(self._reducing_end)
        self._mass = None
class GlycanComposition(dict, SaccharideCollection):
    """
    Describe a glycan  as a collection of :class:`MonosaccharideResidue` counts without
    explicit linkage information relating how each monosaccharide is connected to its neighbors.

    This class subclasses |dict|, and assumes that keys will either be :class:`MonosaccharideResidue`
    instances, :class:`SubstituentResidue` instances, or strings in `iupac_lite` format which will be parsed
    into one of these types. While other types may be used, this is not recommended. All standard |dict| methods
    are supported.

    |GlycanComposition| objects may be derivatized just as |Glycan| objects are, with
    :func:`glypy.composition.composition_transform.derivatize` and
    :func:`glypy.composition.composition_transform.strip_derivatization`.

    GlycanComposition objects also support composition arithmetic, and can be added or subtracted from each other
    or multiplied by an integer.

    As GlycanComposition is not a complete structure, they cannot be translated into text formats as
    full |Glycan| objects are. They may instead be converted to and from a short-form text notation using
    :meth:`GlycanComposition.serialize` and reconstructed from this format using :meth:`GlycanComposition.parse`.

    Attributes
    ----------
    reducing_end : |ReducingEnd|
        Describe the reducing end of the aggregate without binding it to a specific monosaccharide.
        This will contribute to composition and mass calculations.
    _composition_offset: |Composition|
        Account for the one water molecule's worth of composition left over from applying the "residue"
        transformation to each monosaccharide in the aggregate.
    """
    @classmethod
    def from_glycan(cls, glycan):
        """
        Convert a |Glycan| into a |GlycanComposition|.

        Parameters
        ----------
        glycan : Glycan
            The instance to be converted

        Returns
        -------
        GlycanComposition
        """
        inst = cls()
        glycan = tree(glycan)
        inst.extend(glycan)
        inst.reducing_end = glycan.reducing_end
        deriv = has_derivatization(glycan.root)
        if deriv:
            inst._composition_offset += (
                deriv.total_composition() - deriv.attachment_composition_loss()) * 2
        return inst

    def __init__(self, *args, **kwargs):
        self._reducing_end = None
        dict.__init__(self)
        self._mass = None
        self._charge = None
        self._composition_offset = Composition("H2O")
        self.update(*args, **kwargs)
        try:
            template = args[0]
        except IndexError:
            template = None
        if template is not None and isinstance(template, GlycanComposition):
            reduced = template.reducing_end
            if reduced is not None:
                self.reducing_end = reduced.clone()
            self._composition_offset = template._composition_offset.clone()

    def __setitem__(self, key, value):
        """
        Set the quantity of `key` to `value`

        If `key` is a string, it will be passed through :func:`from_iupac_lite`

        If `key` has a reducing end value, that reducing end will be set on `self`

        Parameters
        ----------
        key : str, MonosaccharideResidue, SubstituentResidue, or MolecularComposition
            The entity to store
        value : int
            The value to store
        """
        if isinstance(key, basestring):
            key = from_iupac_lite(key)
        if key.node_type is Monosaccharide.node_type and key.reducing_end is not None:
            self.reducing_end = key.reducing_end
            key = key.clone()
            key.reducing_end = None
        dict.__setitem__(self, key, int(value))
        self._mass = None

    def __getitem__(self, key):
        """
        Get the quantity of `key`

        If `key` is a string, it will be passed through :func:`from_iupac_lite`

        If `key` has a reducing end value, that reducing end will be set on `self`

        Parameters
        ----------
        key : str, MonosaccharideResidue, SubstituentResidue, or MolecularComposition
            The entity to store

        Returns
        -------
        int
        """
        if isinstance(key, basestring):
            key = from_iupac_lite(key)
        try:
            return dict.__getitem__(self, key)
        except KeyError:
            return 0

    def __delitem__(self, key):
        if isinstance(key, basestring):
            key = from_iupac_lite(key)
        dict.__delitem__(self, key)
        self._mass = None

    def mass(self, average=False, charge=0, mass_data=None):
        if self._mass is not None and charge == self._charge:
            return self._mass
        if charge == 0:
            mass = self._composition_offset.mass
            for residue_type, count in list(self.items()):
                mass += residue_type.mass(average=average, charge=0, mass_data=mass_data) * count
            if self._reducing_end is not None:
                mass += self._reducing_end.mass(average=average, charge=0, mass_data=mass_data)
            self._mass = mass
            self._charge = 0
        else:
            mass = self.total_composition().calc_mass(average=average, charge=charge, mass_data=mass_data)
            self._mass = mass
            self._charge = charge
        return mass

    def update(self, *args, **kwargs):
        if len(args) == 1:
            if isinstance(args[0], Mapping):
                args = list(args)
                for name, count in args[0].items():
                    if count != 0:
                        self[name] = count
            else:
                for name, count in args:
                    if count != 0:
                        self[name] = count
        for name, count in kwargs.items():
            if count != 0:
                self[name] = count
        self._mass = None

    def extend(self, *args):
        if not isinstance(args[0], MonosaccharideResidue):
            if isinstance(args[0], (Monosaccharide)):
                args = map(MonosaccharideResidue.from_monosaccharide, args)
            elif isinstance(args[0], Glycan):
                args = map(
                    MonosaccharideResidue.from_monosaccharide,
                    [node for node in args[0] if node.node_type is MonosaccharideResidue.node_type])
            else:
                raise TypeError(
                    "Can't convert {} to MonosaccharideResidue".format(
                        type(args[0])))
        for residue in args:
            self[residue] += 1

    def __iadd__(self, other):
        for elem, cnt in (other.items()):
            self[elem] += cnt
        return self

    def __add__(self, other):
        result = self.clone()
        for elem, cnt in other.items():
            result[elem] += cnt
        return result

    def __radd__(self, other):
        return self + other

    def __isub__(self, other):
        for elem, cnt in other.items():
            self[elem] -= cnt
        return self

    def __sub__(self, other):
        result = self.clone()
        for elem, cnt in other.items():
            result[elem] -= cnt
        return result

    def __rsub__(self, other):
        return (self - other) * (-1)

    def __mul__(self, other):
        if not isinstance(other, int):
            raise TypeError(
                'Cannot multiply Composition by non-integer',
                other)
        prod = {}
        for k, v in self.items():
            prod[k] = v * other

        return GlycanComposition(prod)

    def __rmul__(self, other):
        return self * other

    def __eq__(self, other):
        if not isinstance(other, dict):
            return False
        self_items = set([i for i in self.items() if i[1]])
        other_items = set([i for i in other.items() if i[1]])
        return self_items == other_items

    def __neg__(self):
        return -1 * self

    def __missing__(self, key):
        return 0

    def __contains__(self, key):
        if isinstance(key, basestring):
            key = from_iupac_lite(key)
        return dict.__contains__(self, key)

    def drop_stems(self):
        for t in self:
            drop_stem(t)
        self.collapse()

    def drop_positions(self):
        for t in self:
            drop_positions(t)
        self.collapse()

    def drop_configurations(self):
        for t in self:
            drop_configuration(t)
        self.collapse()

    def total_composition(self):
        comp = self._composition_offset.clone()
        for residue, count in self.items():
            comp += residue.total_composition() * count
        if self._reducing_end is not None:
            comp += self._reducing_end.total_composition()
        return comp

    def collapse(self):
        '''
        Merge redundant keys.

        After performing a structure-detail removing operation like
        :meth:`drop_positions`, :meth:`drop_configurations`, or :meth:`drop_stems`,
        monosaccharide keys may be redundant.

        `collapse` will merge keys which refer to the same type of molecule.
        '''
        items = list(self.items())
        self.clear()
        for k, v in items:
            self[k] += v

    @property
    def reducing_end(self):
        return self._reducing_end

    @reducing_end.setter
    def reducing_end(self, value):
        self._mass = None
        self._reducing_end = value

    def set_reducing_end(self, value):
        self._mass = None
        self._reducing_end = value

    @property
    def composition_offset(self):
        return self._composition_offset

    @composition_offset.setter
    def composition_offset(self, value):
        self._mass = None
        self._composition_offset = value

    def clone(self, propogate_composition_offset=True):
        dup = self.__class__(self)
        if not propogate_composition_offset:
            dup.composition_offset = Composition('H2O')
        return dup

    def serialize(self):
        form = "{%s}" % '; '.join("{}:{}".format(str(k), v) for k, v in sorted(
            self.items(), key=lambda x: x[0].mass()) if v > 0)
        reduced = self.reducing_end
        if reduced is not None:
            form = "%s$%s" % (form, formula(reduced.total_composition()))
        return form

    __str__ = serialize

    @classmethod
    def _get_parse_tokens(cls, string):
        string = str(string)
        parts = string.split('$')
        if len(parts) == 1:
            tokens = parts[0]
            reduced = None
        elif len(parts) == 2:
            tokens, reduced = parts
        else:
            raise ValueError("Could not interpret %r" % string)
        tokens = tokens[1:-1].split('; ')
        return tokens, reduced

    def _handle_reduction_and_derivatization(self, reduced):
        if reduced:
            reduced = ReducedEnd(Composition(reduced))
            self.reducing_end = reduced
        deriv = None
        for key in self:
            deriv = has_derivatization(key)
            if deriv:
                break
        if deriv:
            # strip_derivatization(self)
            # derivatize(self, deriv)
            self._derivatized(deriv.clone(), make_counter(uid()), include_reducing_end=False)

    @classmethod
    def parse(cls, string):
        tokens, reduced = cls._get_parse_tokens(string)
        inst = cls()
        for token in tokens:
            try:
                residue, count = token.split(":")
            except ValueError:
                if string == "{}":
                    return inst
                else:
                    raise ValueError("Malformed Token, %s" % (token,))
            inst[from_iupac_lite(residue)] = int(count)
        inst._handle_reduction_and_derivatization(reduced)
        return inst

    def _derivatized(self, substituent, id_base, include_reducing_end=True):
        n = 2
        for k, v in self.items():
            if k.node_type is Substituent.node_type:
                n -= v
        self._composition_offset += (
            substituent.total_composition() -
            substituent.attachment_composition_loss() * 2) * n
        if self._reducing_end is not None and include_reducing_end:
            _derivatize_reducing_end(self._reducing_end, substituent, id_base)
        self.collapse()
        self._invalidate()

    def _strip_derivatization(self):
        self._composition_offset = Composition("H2O")
        if self._reducing_end is not None:
            _strip_derivatization_reducing_end(self._reducing_end)
        self.collapse()
        self._invalidate()

    def _invalidate(self):
        self._mass = None
        self._charge = None
 def _compute_composition(self):
     composition = Composition()
     for k, v in self.counts.items():
         composition += k.composition * v
     self.composition = composition
     self.mass = composition.mass
        elif isinstance(other, CompoundMassShift):
            counts = defaultdict(int, self.counts)
            for k, v in other.counts.items():
                counts[k] += v
            return self.__class__(counts)
        else:
            return NotImplemented

    def __mul__(self, i):
        if self.composition == {}:
            return self
        if isinstance(i, int):
            counts = defaultdict(int, self.counts)
            for k in counts:
                if k == Unmodified:
                    continue
                counts[k] *= i
            return self.__class__(counts)
        else:
            raise TypeError("Cannot multiply MassShift by non-integer")

    def __repr__(self):
        return "MassShift(%s, %s)" % (self.name, self.composition)


Unmodified = MassShift("Unmodified", Composition())
Formate = MassShift("Formate", Composition('HCOOH'))
Ammonium = MassShift("Ammonium", Composition("NH3"))
Sodium = MassShift("Sodium", Composition("Na"))
Potassium = MassShift("Potassium", Composition("K"))
Example #13
0
 def total_composition(self):
     if self._total_composition is None:
         self._total_composition = Composition(self.formula)
     return self._total_composition
Example #14
0
 def convert(self):
     return MemoryMassShift(str(self.name),
                            Composition(str(self.composition)))
Example #15
0
 def _strip_derivatization(self):
     self._composition_offset = Composition("H2O")
     if self._reducing_end is not None:
         _strip_derivatization_reducing_end(self._reducing_end)
     self._mass = None
 def _patch_aggregate(self):
     offset = Composition({"H": 2, "O": 1})
     self.aggregate.composition_offset -= offset
Example #17
0
            return self
        if isinstance(i, int):
            counts = defaultdict(int, self.counts)
            for k in counts:
                if k == Unmodified:
                    continue
                counts[k] *= i
            return self.__class__(counts)
        else:
            raise TypeError("Cannot multiply MassShift by non-integer")

    def __repr__(self):
        return "MassShift(%s, %s)" % (self.name, self.composition)


Unmodified = MassShift("Unmodified", Composition())
Formate = MassShift("Formate", Composition('HCOOH'), charge_carrier=1)
Ammonium = MassShift("Ammonium", Composition("NH3"), Composition())
Sodium = MassShift("Sodium", Composition("Na1H-1"), charge_carrier=1)
Potassium = MassShift("Potassium", Composition("K1H-1"), charge_carrier=1)


class MassShiftCollection(object):
    def __init__(self, mass_shifts):
        self.mass_shifts = list(mass_shifts)
        self.mass_shift_map = {}
        self._invalidate()

    def _invalidate(self):
        self.mass_shift_map = {
            mass_shift.name: mass_shift for mass_shift in self.mass_shifts
 def _strip_derivatization(self):
     self._composition_offset = Composition("H2O")
     if self._reducing_end is not None:
         _strip_derivatization_reducing_end(self._reducing_end)
     self.collapse()
     self._invalidate()
Example #19
0
 def dehydrated_mass(self, water_mass=Composition("H2O").mass):
     mass = self.calculated_mass
     return mass - (water_mass * self.count)
Example #20
0
 def dehydrated_composition(self):
     if self._dehydrated_composition is None:
         self._dehydrated_composition = self.total_composition() - (
             self.count * Composition("H2O"))
     return self._dehydrated_composition
Example #21
0
    def handle_peptide(self, peptide):
        water = Composition("H2O")
        peptide_composition = Composition(str(peptide.formula))
        obj = peptide.convert()

        # Handle N-linked glycosylation sites

        n_glycosylation_unoccupied_sites = set(peptide.n_glycosylation_sites)
        for site in list(n_glycosylation_unoccupied_sites):
            if obj[site][1]:
                n_glycosylation_unoccupied_sites.remove(site)
        for i in range(len(n_glycosylation_unoccupied_sites)):
            i += 1
            for gc in self.glycan_combination_partitions[i, {GlycanTypes.n_glycan: i}]:
                total_mass = peptide.calculated_mass + gc.calculated_mass - (gc.count * water.mass)
                formula_string = formula(peptide_composition + Composition(str(gc.formula)) - (water * gc.count))

                for site_set in limiting_combinations(n_glycosylation_unoccupied_sites, i):
                    sequence = peptide.convert()
                    for site in site_set:
                        sequence.add_modification(site, _n_glycosylation.name)
                    sequence.glycan = gc.convert()

                    glycopeptide_sequence = str(sequence)

                    glycopeptide = Glycopeptide(
                        calculated_mass=total_mass,
                        formula=formula_string,
                        glycopeptide_sequence=glycopeptide_sequence,
                        peptide_id=peptide.id,
                        protein_id=peptide.protein_id,
                        hypothesis_id=peptide.hypothesis_id,
                        glycan_combination_id=gc.id)
                    yield glycopeptide

        # Handle O-linked glycosylation sites
        o_glycosylation_unoccupied_sites = set(peptide.o_glycosylation_sites)
        for site in list(o_glycosylation_unoccupied_sites):
            if obj[site][1]:
                o_glycosylation_unoccupied_sites.remove(site)

        for i in range(len(o_glycosylation_unoccupied_sites)):
            i += 1
            for gc in self.glycan_combination_partitions[i, {GlycanTypes.o_glycan: i}]:
                total_mass = peptide.calculated_mass + gc.calculated_mass - (gc.count * water.mass)
                formula_string = formula(peptide_composition + Composition(str(gc.formula)) - (water * gc.count))

                for site_set in limiting_combinations(o_glycosylation_unoccupied_sites, i):
                    sequence = peptide.convert()
                    for site in site_set:
                        sequence.add_modification(site, _o_glycosylation.name)
                    sequence.glycan = gc.convert()

                    glycopeptide_sequence = str(sequence)

                    glycopeptide = Glycopeptide(
                        calculated_mass=total_mass,
                        formula=formula_string,
                        glycopeptide_sequence=glycopeptide_sequence,
                        peptide_id=peptide.id,
                        protein_id=peptide.protein_id,
                        hypothesis_id=peptide.hypothesis_id,
                        glycan_combination_id=gc.id)
                    yield glycopeptide

        # Handle GAG glycosylation sites
        gag_unoccupied_sites = set(peptide.gagylation_sites)
        for site in list(gag_unoccupied_sites):
            if obj[site][1]:
                gag_unoccupied_sites.remove(site)
        for i in range(len(gag_unoccupied_sites)):
            i += 1
            for gc in self.glycan_combination_partitions[i, {GlycanTypes.gag_linker: i}]:
                total_mass = peptide.calculated_mass + gc.calculated_mass - (gc.count * water.mass)
                formula_string = formula(peptide_composition + Composition(str(gc.formula)) - (water * gc.count))
                for site_set in limiting_combinations(gag_unoccupied_sites, i):
                    sequence = peptide.convert()
                    for site in site_set:
                        sequence.add_modification(site, _gag_linker_glycosylation.name)
                    sequence.glycan = gc.convert()

                    glycopeptide_sequence = str(sequence)

                    glycopeptide = Glycopeptide(
                        calculated_mass=total_mass,
                        formula=formula_string,
                        glycopeptide_sequence=glycopeptide_sequence,
                        peptide_id=peptide.id,
                        protein_id=peptide.protein_id,
                        hypothesis_id=peptide.hypothesis_id,
                        glycan_combination_id=gc.id)
                    yield glycopeptide
Example #22
0
 def _formula_parser(self, formula):
     counts = dict()
     for symbol, count in re.findall(r"([A-Za-z]+)(-?\d+)", formula):
         count = int(count)
         counts[symbol] = count
     return Composition(counts)