def _init_sites(self): try: parsed_sequence = self._get_sequence() except residue.UnknownAminoAcidException: return sites = [] try: n_glycosites = sequence.find_n_glycosylation_sequons(parsed_sequence) for n_glycosite in n_glycosites: sites.append( ProteinSite(name=ProteinSite.N_GLYCOSYLATION, location=n_glycosite)) except residue.UnknownAminoAcidException: pass # The O- and GAG-linker sites are not determined by a multi AA sequon. We don't # need to abstract them away and they are much too common. # try: # o_glycosites = sequence.find_o_glycosylation_sequons( # parsed_sequence) # for o_glycosite in o_glycosites: # sites.append( # ProteinSite(name=ProteinSite.O_GLYCOSYLATION, location=o_glycosite)) # except residue.UnknownAminoAcidException: # pass # try: # gag_sites = sequence.find_glycosaminoglycan_sequons( # parsed_sequence) # for gag_site in gag_sites: # sites.append( # ProteinSite(name=ProteinSite.GAGYLATION, location=gag_site)) # except residue.UnknownAminoAcidException: # pass self.sites.extend(sites)
def __init__(self, peptide_dict, enzyme=None, constant_modifications=None, modification_translation_table=None, process=True): if modification_translation_table is None: modification_translation_table = dict() if constant_modifications is None: constant_modifications = list() self.peptide_dict = peptide_dict self.insert_sites = [] self.deleteion_sites = [] self.modification_counter = 0 self.missed_cleavages = 0 self.base_sequence = peptide_dict["PeptideSequence"] self.peptide_sequence = PeptideSequence(peptide_dict["PeptideSequence"]) self.glycosite_candidates = sequence.find_n_glycosylation_sequons( self.peptide_sequence, WHITELIST_GLYCOSITE_PTMS) self.constant_modifications = constant_modifications self.modification_translation_table = modification_translation_table self.enzyme = enzyme self.mzid_id = peptide_dict.get('id') if process: self.process()
def n_glycan_sequon_sites(self): if self._n_glycan_sequon_sites is None: try: self._n_glycan_sequon_sites = sequence.find_n_glycosylation_sequons( self.protein_sequence) except residue.UnknownAminoAcidException: return [] return self._n_glycan_sequon_sites
def n_glycan_sequon_sites(peptide, protein, use_local_sequence=False): sites = set() sites |= set(site - peptide.start_position for site in span_test( protein.n_glycan_sequon_sites, peptide.start_position, peptide.end_position)) if use_local_sequence: sites |= set(sequence.find_n_glycosylation_sequons( peptide.modified_peptide_sequence)) return sorted(sites)
def n_glycan_sequon_sites(peptide, protein, use_local_sequence=False): sites = set() sites |= set(site - peptide.start_position for site in protein.n_glycan_sequon_sites if peptide.start_position <= site < peptide.end_position) if use_local_sequence: sites |= set(sequence.find_n_glycosylation_sequons( peptide.modified_peptide_sequence)) return list(sites)
def parent_sequence_aware_n_glycan_sequon_sites(peptide, protein): sites = set( sequence.find_n_glycosylation_sequons( peptide.modified_peptide_sequence)) sites |= set(site - peptide.start_position for site in protein.glycosylation_sites if peptide.start_position <= site < peptide.end_position) return list(sites)
def extract_proteins(self): i = 0 for protein in ProteinFastaFileParser(self.fasta_file): original_sequence = protein.protein_sequence n = len(original_sequence) if "(" in protein.protein_sequence: try: protein.protein_sequence = str( reverse_sequence(protein.protein_sequence, suffix_len=0)) except UnknownAminoAcidException: continue else: protein.protein_sequence = protein.protein_sequence[::-1] protein.hypothesis_id = self.hypothesis_id sites = [] try: original_sequence = PeptideSequence(original_sequence) except UnknownAminoAcidException: continue try: n_glycosites = find_n_glycosylation_sequons(original_sequence) for n_glycosite in n_glycosites: sites.append( ProteinSite(name=ProteinSite.N_GLYCOSYLATION, location=n - n_glycosite - 1)) except UnknownAminoAcidException: pass # See Protein._init_sites for explanation # try: # o_glycosites = find_o_glycosylation_sequons(original_sequence) # for o_glycosite in o_glycosites: # sites.append( # ProteinSite(name=ProteinSite.O_GLYCOSYLATION, location=n - o_glycosite - 1)) # except UnknownAminoAcidException: # pass # try: # gag_sites = find_glycosaminoglycan_sequons(original_sequence) # for gag_site in gag_sites: # sites.append( # ProteinSite(name=ProteinSite.GAGYLATION, location=n - gag_site - 1)) # except UnknownAminoAcidException: # pass protein.sites.extend(sites) self.session.add(protein) i += 1 if i % 5000 == 0: self.log("... %d Proteins Extracted" % (i, )) self.session.commit() self.session.commit() self.log("%d Proteins Extracted" % (i, )) return i
def n_glycan_sequon_sites(self): if self._n_glycan_sequon_sites is None: sites = self.sites.filter(ProteinSite.name == ProteinSite.N_GLYCOSYLATION).all() if sites: self._n_glycan_sequon_sites = [int(i) for i in sites] elif self.sites.count() == 0: try: self._n_glycan_sequon_sites = sequence.find_n_glycosylation_sequons(self._get_sequence()) except residue.UnknownAminoAcidException: return [] else: return [] return self._n_glycan_sequon_sites
def extract_proteins(self): i = 0 for protein in ProteinFastaFileParser(self.fasta_file): original_sequence = protein.protein_sequence n = len(original_sequence) protein.protein_sequence = str(reverse_sequence(protein.protein_sequence, suffix_len=0)) protein.hypothesis_id = self.hypothesis_id original_sequence = PeptideSequence(original_sequence) try: n_glycosites = find_n_glycosylation_sequons(original_sequence) for n_glycosite in n_glycosites: protein.sites.append( ProteinSite(name=ProteinSite.N_GLYCOSYLATION, location=n - n_glycosite - 1)) except UnknownAminoAcidException: pass try: o_glycosites = find_o_glycosylation_sequons(original_sequence) for o_glycosite in o_glycosites: protein.sites.append( ProteinSite(name=ProteinSite.O_GLYCOSYLATION, location=n - o_glycosite - 1)) except UnknownAminoAcidException: pass try: gag_sites = find_glycosaminoglycan_sequons(original_sequence) for gag_site in gag_sites: protein.sites.append( ProteinSite(name=ProteinSite.GAGYLATION, location=n - gag_site - 1)) except UnknownAminoAcidException: pass self.session.add(protein) i += 1 if i % 100 == 0: self.log("... %d Proteins Extracted" % (i,)) self.session.commit() self.session.commit() self.log("%d Proteins Extracted" % (i,)) return i
def _init_sites(self): try: n_glycosites = sequence.find_n_glycosylation_sequons(self._get_sequence()) for n_glycosite in n_glycosites: self.sites.append( ProteinSite(name=ProteinSite.N_GLYCOSYLATION, location=n_glycosite)) except residue.UnknownAminoAcidException: pass try: o_glycosites = sequence.find_o_glycosylation_sequons(self._get_sequence()) for o_glycosite in o_glycosites: self.sites.append( ProteinSite(name=ProteinSite.O_GLYCOSYLATION, location=o_glycosite)) except residue.UnknownAminoAcidException: pass try: gag_sites = sequence.find_glycosaminoglycan_sequons(self._get_sequence()) for gag_site in gag_sites: self.sites.append( ProteinSite(name=ProteinSite.GAGYLATION, location=gag_site)) except residue.UnknownAminoAcidException: pass