Example #1
0
def main():
    logging.basicConfig(level='INFO', format="%(name)s: %(message)s")
    args = app.parse_args()
    molecular_composition_losses = [None]  # The None case for the loss-less case
    for loss in args.losses:
        logger.info("Converting loss %s -> %s", loss, Composition(loss))
        molecular_composition_losses.append(MolecularComposition(loss, Composition(loss)))
    length_range = sorted(map(int, args.chain_length_range))
    max_charge = -abs(args.max_charge)
    has_anhydromanose = bool(args.has_anhydromanose)
    gag_type = args.gag_type

    logger.info("GAG Chain Range: %d-%d" % tuple(length_range))

    mass_error_tolerance = args.mass_error_tolerance

    # reducing_end_type = args.reduced
    # if reducing_end_type:
    #     reducing_end_type = Composition(reducing_end_type)

    output_path = args.output_path
    output_format = args.output_format
    if not output_format:
        output_format = ['csv']

    pick_peaks = args.pick_peaks

    run(
        args.peaklist_path, gag_type, length_range, has_anhydromanose, molecular_composition_losses,
        None, max_charge, output_path, output_format,
        pick_peaks=pick_peaks, mass_error_tolerance=mass_error_tolerance)
Example #2
0
 def _compute_composition(self):
     composition = Composition()
     tandem_composition = Composition()
     charge_carrier = 0
     for k, v in self.counts.items():
         composition += k.composition * v
         tandem_composition += k.tandem_composition * v
         charge_carrier += k.charge_carrier * v
     self.composition = composition
     self.mass = composition.mass
     self.tandem_composition = tandem_composition
     self.tandem_mass = tandem_composition.mass
     self.charge_carrier = charge_carrier
 def _make_glycan_composition_proxy(self):
     if self.aggregate is not None:
         base = self.aggregate.clone()
     else:
         base = HashableGlycanComposition()
         # Represent the initial amide bond between the peptide
         # and the first glycan. Subsequent glycans do not need
         # further chemical losses because of the dehyration built
         # directly into the Residue abstraction.
         base.composition_offset -= Composition({"H": 2, "O": 1})
     for key, value in self.items():
         if value.rule.is_core:
             continue
         elif value.rule.is_composition:
             base += value.rule.glycan
         else:
             # Convert Glycan object into a composition, using the original
             # detatched topology to omit the "aglycone" group which represents
             # the connection between the glycan and the peptide, which penalizes
             # the composition by H2O. This H2O is lost when that bond is formed,
             # but doesn't need to be explicitly included as the loss is tracked
             # when initializing the base above.
             gc = HashableGlycanComposition.from_glycan(
                 value.rule._original)
             base += gc
     return GlycanCompositionProxy(base)
 def total_composition(self):
     total = Composition()
     has_aggregate = self.aggregate is not None
     for key, value in self.items():
         if has_aggregate and value.rule.is_core:
             continue
         total += value.composition
     if has_aggregate:
         total += self.aggregate.total_composition()
     return total
Example #5
0
def validate_reduction(context, reduction_string):
    if reduction_string is None:
        return None
    try:
        if str(reduction_string).lower() in named_reductions:
            return named_reductions[str(reduction_string).lower()]
        else:
            if len(Composition(str(reduction_string))) > 0:
                return str(reduction_string)
            else:
                raise Exception("Invalid")
    except Exception:
        click.secho("Could not validate reduction '%s'" % reduction_string)
        raise click.Abort("Could not validate reduction '%s'" %
                          reduction_string)
Example #6
0
def validate_mass_shift(mass_shift_string, multiplicity=1):
    multiplicity = int(multiplicity)
    if mass_shift_string.lower() in mass_shifts:
        return (mass_shifts[mass_shift_string.lower()], multiplicity)
    else:
        try:
            mass_shift_string = str(mass_shift_string)
            composition = Composition(mass_shift_string)
            shift = MassShift(mass_shift_string, composition)
            return (shift, multiplicity)
        except Exception as e:
            click.secho("%r" % (e, ))
            click.secho("Could not validate mass_shift %r" %
                        (mass_shift_string, ),
                        fg='yellow')
            raise click.Abort("Could not validate mass_shift %r" %
                              (mass_shift_string, ))
Example #7
0
    def handle_peptide(self, peptide):
        water = Composition("H2O")
        peptide_composition = Composition(str(peptide.formula))
        obj = peptide.convert()

        # Handle N-linked glycosylation sites

        n_glycosylation_unoccupied_sites = set(peptide.n_glycosylation_sites)
        for site in list(n_glycosylation_unoccupied_sites):
            if obj[site][1]:
                n_glycosylation_unoccupied_sites.remove(site)
        for i in range(len(n_glycosylation_unoccupied_sites)):
            i += 1
            for gc in self.glycan_combination_partitions[i, {GlycanTypes.n_glycan: i}]:
                total_mass = peptide.calculated_mass + gc.calculated_mass - (gc.count * water.mass)
                formula_string = formula(peptide_composition + Composition(str(gc.formula)) - (water * gc.count))

                for site_set in limiting_combinations(n_glycosylation_unoccupied_sites, i):
                    sequence = peptide.convert()
                    for site in site_set:
                        sequence.add_modification(site, _n_glycosylation.name)
                    sequence.glycan = gc.convert()

                    glycopeptide_sequence = str(sequence)

                    glycopeptide = Glycopeptide(
                        calculated_mass=total_mass,
                        formula=formula_string,
                        glycopeptide_sequence=glycopeptide_sequence,
                        peptide_id=peptide.id,
                        protein_id=peptide.protein_id,
                        hypothesis_id=peptide.hypothesis_id,
                        glycan_combination_id=gc.id)
                    yield glycopeptide

        # Handle O-linked glycosylation sites
        o_glycosylation_unoccupied_sites = set(peptide.o_glycosylation_sites)
        for site in list(o_glycosylation_unoccupied_sites):
            if obj[site][1]:
                o_glycosylation_unoccupied_sites.remove(site)

        for i in range(len(o_glycosylation_unoccupied_sites)):
            i += 1
            for gc in self.glycan_combination_partitions[i, {GlycanTypes.o_glycan: i}]:
                total_mass = peptide.calculated_mass + gc.calculated_mass - (gc.count * water.mass)
                formula_string = formula(peptide_composition + Composition(str(gc.formula)) - (water * gc.count))

                for site_set in limiting_combinations(o_glycosylation_unoccupied_sites, i):
                    sequence = peptide.convert()
                    for site in site_set:
                        sequence.add_modification(site, _o_glycosylation.name)
                    sequence.glycan = gc.convert()

                    glycopeptide_sequence = str(sequence)

                    glycopeptide = Glycopeptide(
                        calculated_mass=total_mass,
                        formula=formula_string,
                        glycopeptide_sequence=glycopeptide_sequence,
                        peptide_id=peptide.id,
                        protein_id=peptide.protein_id,
                        hypothesis_id=peptide.hypothesis_id,
                        glycan_combination_id=gc.id)
                    yield glycopeptide

        # Handle GAG glycosylation sites
        gag_unoccupied_sites = set(peptide.gagylation_sites)
        for site in list(gag_unoccupied_sites):
            if obj[site][1]:
                gag_unoccupied_sites.remove(site)
        for i in range(len(gag_unoccupied_sites)):
            i += 1
            for gc in self.glycan_combination_partitions[i, {GlycanTypes.gag_linker: i}]:
                total_mass = peptide.calculated_mass + gc.calculated_mass - (gc.count * water.mass)
                formula_string = formula(peptide_composition + Composition(str(gc.formula)) - (water * gc.count))
                for site_set in limiting_combinations(gag_unoccupied_sites, i):
                    sequence = peptide.convert()
                    for site in site_set:
                        sequence.add_modification(site, _gag_linker_glycosylation.name)
                    sequence.glycan = gc.convert()

                    glycopeptide_sequence = str(sequence)

                    glycopeptide = Glycopeptide(
                        calculated_mass=total_mass,
                        formula=formula_string,
                        glycopeptide_sequence=glycopeptide_sequence,
                        peptide_id=peptide.id,
                        protein_id=peptide.protein_id,
                        hypothesis_id=peptide.hypothesis_id,
                        glycan_combination_id=gc.id)
                    yield glycopeptide
 def _compute_composition(self):
     composition = Composition()
     for k, v in self.counts.items():
         composition += k.composition * v
     self.composition = composition
     self.mass = composition.mass
        elif isinstance(other, CompoundMassShift):
            counts = defaultdict(int, self.counts)
            for k, v in other.counts.items():
                counts[k] += v
            return self.__class__(counts)
        else:
            return NotImplemented

    def __mul__(self, i):
        if self.composition == {}:
            return self
        if isinstance(i, int):
            counts = defaultdict(int, self.counts)
            for k in counts:
                if k == Unmodified:
                    continue
                counts[k] *= i
            return self.__class__(counts)
        else:
            raise TypeError("Cannot multiply MassShift by non-integer")

    def __repr__(self):
        return "MassShift(%s, %s)" % (self.name, self.composition)


Unmodified = MassShift("Unmodified", Composition())
Formate = MassShift("Formate", Composition('HCOOH'))
Ammonium = MassShift("Ammonium", Composition("NH3"))
Sodium = MassShift("Sodium", Composition("Na"))
Potassium = MassShift("Potassium", Composition("K"))
Example #10
0
 def total_composition(self):
     if self._total_composition is None:
         self._total_composition = Composition(self.formula)
     return self._total_composition
Example #11
0
 def convert(self):
     return MemoryMassShift(str(self.name),
                            Composition(str(self.composition)))
Example #12
0
 def dehydrated_mass(self, water_mass=Composition("H2O").mass):
     mass = self.calculated_mass
     return mass - (water_mass * self.count)
Example #13
0
 def dehydrated_composition(self):
     if self._dehydrated_composition is None:
         self._dehydrated_composition = self.total_composition() - (
             self.count * Composition("H2O"))
     return self._dehydrated_composition
 def _patch_aggregate(self):
     offset = Composition({"H": 2, "O": 1})
     self.aggregate.composition_offset -= offset
Example #15
0
            return self
        if isinstance(i, int):
            counts = defaultdict(int, self.counts)
            for k in counts:
                if k == Unmodified:
                    continue
                counts[k] *= i
            return self.__class__(counts)
        else:
            raise TypeError("Cannot multiply MassShift by non-integer")

    def __repr__(self):
        return "MassShift(%s, %s)" % (self.name, self.composition)


Unmodified = MassShift("Unmodified", Composition())
Formate = MassShift("Formate", Composition('HCOOH'), charge_carrier=1)
Ammonium = MassShift("Ammonium", Composition("NH3"), Composition())
Sodium = MassShift("Sodium", Composition("Na1H-1"), charge_carrier=1)
Potassium = MassShift("Potassium", Composition("K1H-1"), charge_carrier=1)


class MassShiftCollection(object):
    def __init__(self, mass_shifts):
        self.mass_shifts = list(mass_shifts)
        self.mass_shift_map = {}
        self._invalidate()

    def _invalidate(self):
        self.mass_shift_map = {
            mass_shift.name: mass_shift for mass_shift in self.mass_shifts
Example #16
0
 def _formula_parser(self, formula):
     counts = dict()
     for symbol, count in re.findall(r"([A-Za-z]+)(-?\d+)", formula):
         count = int(count)
         counts[symbol] = count
     return Composition(counts)