def _get_sequence(self, gpm): try: glycopeptide = gpm.structure glycopeptide = PeptideSequence(str(glycopeptide)) return glycopeptide except AttributeError: return PeptideSequence(str(gpm))
def build_structures(self): gp = PeptideSequence( 'YLGN(N-Glycosylation)ATAIFFLPDEGK{Hex:5; HexNAc:4; Neu5Ac:1}') gp2 = PeptideSequence( 'YLGN(#:iupac,glycosylation_type=N-Linked:?-?-Hexp-(?-?)-?-?-' 'Hexp2NAc-(?-?)-a-D-Manp-(1-6)-[a-D-Neup5Ac-(?-?)-?-?-Hexp-(?-?' ')-?-?-Hexp2NAc-(?-?)-a-D-Manp-(1-3)]b-D-Manp-(1-4)-b-D-Glcp2NA' 'c-(1-4)-b-D-Glcp2NAc)ATAIFFLPDEGK') return gp, gp2
def glycopeptide_string(sequence, long=False, include_glycan=True): sequence = PeptideSequence(str(sequence)) parts = [] template = "(<span class='modification-chip'"\ " style='background-color:%s;padding-left:1px;padding-right:2px;border-radius:2px;'"\ " title='%s' data-modification='%s'>%s</span>)" n_term_template = template.replace("(", "").replace(")", "") + '-' c_term_template = "-" + (template.replace("(", "").replace(")", "")) def render(mod, template=template): color = colors.get_color(str(mod)) letter = escape(mod.name if long else mod.name[0]) name = escape(mod.name) parts.append(template % (rgbpack(color), name, name, letter)) if sequence.n_term.modification is not None: render(sequence.n_term.modification, n_term_template) for res, mods in sequence: parts.append(res.symbol) if mods: for mod in mods: render(mod) if sequence.c_term.modification is not None: render(sequence.c_term.modification, c_term_template) parts.append((' ' + glycan_composition_string(str(sequence.glycan)) if sequence. glycan is not None else "") if include_glycan else "") return ''.join(parts)
def _compute_sequence_color(self, gpm): try: glycopeptide = gpm.structure glycopeptide = PeptideSequence(str(glycopeptide)) if "N-Glycosylation" in glycopeptide.modification_index: return 'forestgreen', 0.5 elif 'O-Glycosylation' in glycopeptide.modification_index: return 'aquamarine', 0.5 elif 'GAG-Linker' in glycopeptide.modification_index: return 'orange', 0.5 else: raise ValueError(glycopeptide) except AttributeError: return 'red', 0.5
def classify_proton_mobility(scan: ProcessedScan, structure: glycopeptidepy.PeptideSequence) -> str: try: k = structure.proton_mobility except AttributeError: k = proton_mobility(structure) # Try to abuse non-strict attributes for caching. try: structure.proton_mobility = k except AttributeError: pass charge = scan.precursor_information.charge if charge == ChargeNotProvided: return "mobile" elif k < charge: return 'mobile' elif k == charge: return 'partial' else: return 'immobile'
def get_base_peptide(peptide_obj): if isinstance(peptide_obj, Peptide): return PeptideSequence(peptide_obj.base_peptide_sequence) else: return PeptideSequence(str(peptide_obj))
import unittest import glypy from glycopeptidepy import PeptideSequence from ms_deisotope.output import ProcessedMzMLDeserializer from glycan_profiling.test.fixtures import get_test_data from glycan_profiling.tandem.glycopeptide import core_search from glycan_profiling.tandem.glycopeptide.core_search import ( GlycanCombinationRecord, GlycanTypes, GlycanFilteringPeptideMassEstimator) peptide_mass = PeptideSequence("YLGNATAIFFLPDEGK").mass gc1 = glypy.glycan_composition.HashableGlycanComposition.parse( "{Hex:5; HexNAc:4; Neu5Ac:1}") gc2 = glypy.glycan_composition.HashableGlycanComposition.parse( "{Hex:5; HexNAc:4; Neu5Ac:2}") gc3 = glypy.glycan_composition.HashableGlycanComposition.parse( "{Hex:6; HexNAc:5; Neu5Ac:2}") glycan_compositions = [gc1, gc2, gc3] glycan_database = [] for i, gc in enumerate(glycan_compositions): record = GlycanCombinationRecord(i + 1, gc.mass() - gc.composition_offset.mass, gc, 1, [ GlycanTypes.n_glycan, GlycanTypes.o_glycan, ]) glycan_database.append(record)
def _get_peptide_key(self, chromatogram): return PeptideSequence(str(chromatogram.structure)).deglycosylate()
def from_obj(cls, obj, **kwargs): gp = PeptideSequence(str(obj.structure)) return super(GlycopeptideChromatogramProxy, cls).from_obj(obj, structure=gp, **kwargs)
def structure(self): if self._structure is None: self._structure = PeptideSequence(str(self.kwargs["structure"])) return self._structure
def _get_sequence(self, gpm): try: return gpm.structure except AttributeError: return PeptideSequence(str(gpm))
def get_sequoninfodict_from_files_accid(csvfilelist, accessionid=None, seq='', score_cutoff=30, replicate_cutoff=2, sequon_length=4): """Takes list of DictReader CSV file objects produced from GlycReSoft glycopeptide-identification. Requires either a protein sequence or UniProt accession ID; if both provided, defaults to sequence. Returns dictionary mapping glycosylation sites of protein associated with accessionid to sequon and list of averaged SiteSpecificGlycan objects. Glycans with an ms2 score less than score_cutoff and observed in fewer replicates than replicate_cutoff will not be included in the returned siteinfodict. Sequon length determined by sequon_length. Dictionary returned is of the form: { SITE: { "sequon": "XXXX", "glycans": [ glycan1, glycan2, ... ] }, ... }""" # check number of csvs is not less than the number of required replicates assert replicate_cutoff <= len(csvfilelist), \ 'Replicate cutoff = ' + \ str(replicate_cutoff) + ' > ' + \ str(len(csvfilelist)) + ' = number of csvs' # check either accessionid or seq provided assert accessionid is not None or seq is not None, 'accessionid or sequence required' # if sequence not provided, get sequence from accessionid if len(seq) == 0: seq = get_seq(accessionid) pepseq = PeptideSequence(seq) # init siteinfodict site_to_glycans = {x: [] for x in pepseq.n_glycan_sequon_sites} # use csv index as replicate ID # add initial glycans to siteinfodict for replicate, content in enumerate(csvfilelist): site_to_glycans = update_siteinfodict(site_to_glycans, content, replicate, accessionid, pepseq, score_cutoff=score_cutoff) # prune off all glycans with replicate count < replicate_cutoff # averaging remaining glycans site_to_glycans = prune_siteinfodict(site_to_glycans, replicate_cutoff=replicate_cutoff) # build dict relating site to sequon and glycan list, then return sequoninfodict = { seq[x:x + sequon_length]: { 'site': x, 'glycans': y } for x, y in site_to_glycans.items() } return sequoninfodict