def main(): logging.basicConfig(level='INFO', format="%(name)s: %(message)s") args = app.parse_args() molecular_composition_losses = [None] # The None case for the loss-less case for loss in args.losses: logger.info("Converting loss %s -> %s", loss, Composition(loss)) molecular_composition_losses.append(MolecularComposition(loss, Composition(loss))) length_range = sorted(map(int, args.chain_length_range)) max_charge = -abs(args.max_charge) has_anhydromanose = bool(args.has_anhydromanose) gag_type = args.gag_type logger.info("GAG Chain Range: %d-%d" % tuple(length_range)) mass_error_tolerance = args.mass_error_tolerance # reducing_end_type = args.reduced # if reducing_end_type: # reducing_end_type = Composition(reducing_end_type) output_path = args.output_path output_format = args.output_format if not output_format: output_format = ['csv'] pick_peaks = args.pick_peaks run( args.peaklist_path, gag_type, length_range, has_anhydromanose, molecular_composition_losses, None, max_charge, output_path, output_format, pick_peaks=pick_peaks, mass_error_tolerance=mass_error_tolerance)
def _compute_composition(self): composition = Composition() tandem_composition = Composition() charge_carrier = 0 for k, v in self.counts.items(): composition += k.composition * v tandem_composition += k.tandem_composition * v charge_carrier += k.charge_carrier * v self.composition = composition self.mass = composition.mass self.tandem_composition = tandem_composition self.tandem_mass = tandem_composition.mass self.charge_carrier = charge_carrier
def _make_glycan_composition_proxy(self): if self.aggregate is not None: base = self.aggregate.clone() else: base = HashableGlycanComposition() # Represent the initial amide bond between the peptide # and the first glycan. Subsequent glycans do not need # further chemical losses because of the dehyration built # directly into the Residue abstraction. base.composition_offset -= Composition({"H": 2, "O": 1}) for key, value in self.items(): if value.rule.is_core: continue elif value.rule.is_composition: base += value.rule.glycan else: # Convert Glycan object into a composition, using the original # detatched topology to omit the "aglycone" group which represents # the connection between the glycan and the peptide, which penalizes # the composition by H2O. This H2O is lost when that bond is formed, # but doesn't need to be explicitly included as the loss is tracked # when initializing the base above. gc = HashableGlycanComposition.from_glycan( value.rule._original) base += gc return GlycanCompositionProxy(base)
def total_composition(self): total = Composition() has_aggregate = self.aggregate is not None for key, value in self.items(): if has_aggregate and value.rule.is_core: continue total += value.composition if has_aggregate: total += self.aggregate.total_composition() return total
def validate_reduction(context, reduction_string): if reduction_string is None: return None try: if str(reduction_string).lower() in named_reductions: return named_reductions[str(reduction_string).lower()] else: if len(Composition(str(reduction_string))) > 0: return str(reduction_string) else: raise Exception("Invalid") except Exception: click.secho("Could not validate reduction '%s'" % reduction_string) raise click.Abort("Could not validate reduction '%s'" % reduction_string)
def validate_mass_shift(mass_shift_string, multiplicity=1): multiplicity = int(multiplicity) if mass_shift_string.lower() in mass_shifts: return (mass_shifts[mass_shift_string.lower()], multiplicity) else: try: mass_shift_string = str(mass_shift_string) composition = Composition(mass_shift_string) shift = MassShift(mass_shift_string, composition) return (shift, multiplicity) except Exception as e: click.secho("%r" % (e, )) click.secho("Could not validate mass_shift %r" % (mass_shift_string, ), fg='yellow') raise click.Abort("Could not validate mass_shift %r" % (mass_shift_string, ))
def handle_peptide(self, peptide): water = Composition("H2O") peptide_composition = Composition(str(peptide.formula)) obj = peptide.convert() # Handle N-linked glycosylation sites n_glycosylation_unoccupied_sites = set(peptide.n_glycosylation_sites) for site in list(n_glycosylation_unoccupied_sites): if obj[site][1]: n_glycosylation_unoccupied_sites.remove(site) for i in range(len(n_glycosylation_unoccupied_sites)): i += 1 for gc in self.glycan_combination_partitions[i, {GlycanTypes.n_glycan: i}]: total_mass = peptide.calculated_mass + gc.calculated_mass - (gc.count * water.mass) formula_string = formula(peptide_composition + Composition(str(gc.formula)) - (water * gc.count)) for site_set in limiting_combinations(n_glycosylation_unoccupied_sites, i): sequence = peptide.convert() for site in site_set: sequence.add_modification(site, _n_glycosylation.name) sequence.glycan = gc.convert() glycopeptide_sequence = str(sequence) glycopeptide = Glycopeptide( calculated_mass=total_mass, formula=formula_string, glycopeptide_sequence=glycopeptide_sequence, peptide_id=peptide.id, protein_id=peptide.protein_id, hypothesis_id=peptide.hypothesis_id, glycan_combination_id=gc.id) yield glycopeptide # Handle O-linked glycosylation sites o_glycosylation_unoccupied_sites = set(peptide.o_glycosylation_sites) for site in list(o_glycosylation_unoccupied_sites): if obj[site][1]: o_glycosylation_unoccupied_sites.remove(site) for i in range(len(o_glycosylation_unoccupied_sites)): i += 1 for gc in self.glycan_combination_partitions[i, {GlycanTypes.o_glycan: i}]: total_mass = peptide.calculated_mass + gc.calculated_mass - (gc.count * water.mass) formula_string = formula(peptide_composition + Composition(str(gc.formula)) - (water * gc.count)) for site_set in limiting_combinations(o_glycosylation_unoccupied_sites, i): sequence = peptide.convert() for site in site_set: sequence.add_modification(site, _o_glycosylation.name) sequence.glycan = gc.convert() glycopeptide_sequence = str(sequence) glycopeptide = Glycopeptide( calculated_mass=total_mass, formula=formula_string, glycopeptide_sequence=glycopeptide_sequence, peptide_id=peptide.id, protein_id=peptide.protein_id, hypothesis_id=peptide.hypothesis_id, glycan_combination_id=gc.id) yield glycopeptide # Handle GAG glycosylation sites gag_unoccupied_sites = set(peptide.gagylation_sites) for site in list(gag_unoccupied_sites): if obj[site][1]: gag_unoccupied_sites.remove(site) for i in range(len(gag_unoccupied_sites)): i += 1 for gc in self.glycan_combination_partitions[i, {GlycanTypes.gag_linker: i}]: total_mass = peptide.calculated_mass + gc.calculated_mass - (gc.count * water.mass) formula_string = formula(peptide_composition + Composition(str(gc.formula)) - (water * gc.count)) for site_set in limiting_combinations(gag_unoccupied_sites, i): sequence = peptide.convert() for site in site_set: sequence.add_modification(site, _gag_linker_glycosylation.name) sequence.glycan = gc.convert() glycopeptide_sequence = str(sequence) glycopeptide = Glycopeptide( calculated_mass=total_mass, formula=formula_string, glycopeptide_sequence=glycopeptide_sequence, peptide_id=peptide.id, protein_id=peptide.protein_id, hypothesis_id=peptide.hypothesis_id, glycan_combination_id=gc.id) yield glycopeptide
def _compute_composition(self): composition = Composition() for k, v in self.counts.items(): composition += k.composition * v self.composition = composition self.mass = composition.mass
elif isinstance(other, CompoundMassShift): counts = defaultdict(int, self.counts) for k, v in other.counts.items(): counts[k] += v return self.__class__(counts) else: return NotImplemented def __mul__(self, i): if self.composition == {}: return self if isinstance(i, int): counts = defaultdict(int, self.counts) for k in counts: if k == Unmodified: continue counts[k] *= i return self.__class__(counts) else: raise TypeError("Cannot multiply MassShift by non-integer") def __repr__(self): return "MassShift(%s, %s)" % (self.name, self.composition) Unmodified = MassShift("Unmodified", Composition()) Formate = MassShift("Formate", Composition('HCOOH')) Ammonium = MassShift("Ammonium", Composition("NH3")) Sodium = MassShift("Sodium", Composition("Na")) Potassium = MassShift("Potassium", Composition("K"))
def total_composition(self): if self._total_composition is None: self._total_composition = Composition(self.formula) return self._total_composition
def convert(self): return MemoryMassShift(str(self.name), Composition(str(self.composition)))
def dehydrated_mass(self, water_mass=Composition("H2O").mass): mass = self.calculated_mass return mass - (water_mass * self.count)
def dehydrated_composition(self): if self._dehydrated_composition is None: self._dehydrated_composition = self.total_composition() - ( self.count * Composition("H2O")) return self._dehydrated_composition
def _patch_aggregate(self): offset = Composition({"H": 2, "O": 1}) self.aggregate.composition_offset -= offset
return self if isinstance(i, int): counts = defaultdict(int, self.counts) for k in counts: if k == Unmodified: continue counts[k] *= i return self.__class__(counts) else: raise TypeError("Cannot multiply MassShift by non-integer") def __repr__(self): return "MassShift(%s, %s)" % (self.name, self.composition) Unmodified = MassShift("Unmodified", Composition()) Formate = MassShift("Formate", Composition('HCOOH'), charge_carrier=1) Ammonium = MassShift("Ammonium", Composition("NH3"), Composition()) Sodium = MassShift("Sodium", Composition("Na1H-1"), charge_carrier=1) Potassium = MassShift("Potassium", Composition("K1H-1"), charge_carrier=1) class MassShiftCollection(object): def __init__(self, mass_shifts): self.mass_shifts = list(mass_shifts) self.mass_shift_map = {} self._invalidate() def _invalidate(self): self.mass_shift_map = { mass_shift.name: mass_shift for mass_shift in self.mass_shifts
def _formula_parser(self, formula): counts = dict() for symbol, count in re.findall(r"([A-Za-z]+)(-?\d+)", formula): count = int(count) counts[symbol] = count return Composition(counts)