def setUp(self): self.prots = data_sets.Proteins(proteins=(data_sets.Protein( accession='P03995', gene='Gfap', description='Glial fibrillary acidic protein', full_sequence=( 'MERRRITSARRSYASETVVRGLGPSRQLGTMPRFSLSRMTPPLPARVDFSLAG' 'ALNAGFKETRASERAEMMELNDRFASYIEKVRFLEQQNKALAAELNQLRAKEP' 'TKLADVYQAELRELRLRLDQLTANSARLEVERDNFAQDLGTLRQKLQDETNLR' 'LEAENNLAAYRQEADEATLARVDLERKVESLEEEIQFLRKIYEEEVRELREQL' 'AQQQVHVEMDVAKPDLTAALREIRTQYEAVATSNMQETEEWYRSKFADLTDAA' 'SRNAELLRQAKHEANDYRRQLQALTCDLESLRGTNESLERQMREQEERHARES' 'ASYQEALARLEEEGQSLKEEMARHLQEYQDLLNVKLALDIEIATYRKLLEGEE' 'NRITIPVQTFSNLQIRETSLDTKSVSEGHLKRNIVVKTVEMRDGEVIKDSKQE' 'HKDVVM'), ), ), ) self.seq = data_sets.extract_sequence(self.prots, 'QEADEATLAR') self.mods = data_sets.Modifications(mods=[ data_sets.Modification( rel_pos=0, mod_type='TMT6plex', nterm=True, sequence=self.seq, ), ], ) self.seq.modifications = self.mods self.channels = OrderedDict([ ('low1', '126'), ('low2', '127'), ('low3', '128'), ('med', '129'), ('high', '130'), ('norm', '131'), ]) self.groups = OrderedDict([ ('base', ['low1', 'low2', 'low3']), ('stim', ['med', 'high']), ]) insert = { 'Proteins': self.prots, 'Sequence': self.seq, 'Modifications': self.mods, '126': 1e4, '127': 1e4, '128': np.nan, '129': 4e4, '130': 4e4, '131': 1e4, } self.data = data_sets.DataSet( channels=self.channels, groups=self.groups, ) self.data.add_peptide(insert)
def _reassign_mods(mods, psp_val, probability_cutoff=75): reassigned = False ambiguous = False # phophoRS example format: 'T(4): 99.6; S(6): 0.4; S(10): 0.0' # Error messages include: 'Too many isoforms' if psp_val is None: psp_val = '' psp_val = [ RE_PSP.match(i.strip()) for i in psp_val.split(';') ] psp_val = [ i.groups() for i in psp_val if i ] psp_val = [ (i[0], int(i[1]), float(i[2])) for i in psp_val ] o_mods = [i for i in mods if not _is_pmod(i)] p_mods = [i for i in mods if _is_pmod(i)] psp_val_f = [i for i in psp_val if i[2] > probability_cutoff] if len(p_mods) != len(psp_val_f): LOGGER.debug( 'Not enough info to assign phophosite: {}'.format(psp_val) ) ambiguous = True elif set(i.rel_pos + 1 for i in p_mods) != set(i[1] for i in psp_val_f): p_mods = [ data_sets.Modification( rel_pos=i[1] - 1, mod_type='Phospho', nterm=False, cterm=False, sequence=p_mods[0].sequence, ) for i in psp_val_f ] reassigned = True mods = data_sets.Modifications( mods=_sort_mods(o_mods + p_mods), ) for mod in mods.mods: mod.sequence.modifications = mods return mods, reassigned, ambiguous
def _get_mods(row): peptide_id = row.name mods = data_sets.Modifications(mods=mod_dict.get(peptide_id, tuple()), ) for mod in mods.mods: assert mod.sequence is None mod.sequence = row['Sequence'] row['Sequence'].modifications = mods return mods
def setUp(self): self.sequence = data_sets.Sequence( pep_seq="GEPNVsyICSR", protein_matches=(data_sets.ProteinMatch( protein=data_sets.Protein( accession="Q9WV60", gene="Gsk3b", description="Glycogen synthase kinase-3 beta", full_sequence=( "MSGRPRTTSFAESCKPVQQPSAFGSMKVSRDKDGSKVTTVVATPGQGPD" "RPQEVSYTDTKVIGNGSFGVVYQAKLCDSGELVAIKKVLQDKRFKNREL" "QIMRKLDHCNIVRLRYFFYSSGEKKDEVYLNLVLDYVPETVYRVARHYS" "RAKQTLPVIYVKLYMYQLFRSLAYIHSFGICHRDIKPQNLLLDPDTAVL" "KLCDFGSAKQLVRGEPNVSYICSRYYRAPELIFGATDYTSSIDVWSAGC" "VLAELLLGQPIFPGDSGVDQLVEIIKVLGTPTREQIREMNPNYTEFKFP" "QIKAHPWTKVFRPRTPPEAIALCSRLLEYTPTARLTPLEACAHSFFDEL" "RDPNVKLPNGRDTPALFNFTTQELSSNPPLATILIPPHARIQAAASPPA" "NATAASDTNAGDRGQTNNAASASASNST"), ), rel_pos=209, exact=True, ), ), ) self.sequence.modifications = data_sets.Modifications( ( # S215-p data_sets.Modification( rel_pos=5, mod_type="Phospho", sequence=self.sequence, ), # Y216-p data_sets.Modification( rel_pos=6, mod_type="Phospho", sequence=self.sequence, ), ), ) self.sequences = list(motif.generate_n_mers(self.sequence)) self.foreground = self.sequences self.background = self.sequences
def setUp(self): self.sequence = data_sets.Sequence( pep_seq='GEPNVsyICSR', protein_matches=(data_sets.ProteinMatch( protein=data_sets.Protein( accession='Q9WV60', gene='Gsk3b', description='Glycogen synthase kinase-3 beta', full_sequence=( 'MSGRPRTTSFAESCKPVQQPSAFGSMKVSRDKDGSKVTTVVATPGQGPD' 'RPQEVSYTDTKVIGNGSFGVVYQAKLCDSGELVAIKKVLQDKRFKNREL' 'QIMRKLDHCNIVRLRYFFYSSGEKKDEVYLNLVLDYVPETVYRVARHYS' 'RAKQTLPVIYVKLYMYQLFRSLAYIHSFGICHRDIKPQNLLLDPDTAVL' 'KLCDFGSAKQLVRGEPNVSYICSRYYRAPELIFGATDYTSSIDVWSAGC' 'VLAELLLGQPIFPGDSGVDQLVEIIKVLGTPTREQIREMNPNYTEFKFP' 'QIKAHPWTKVFRPRTPPEAIALCSRLLEYTPTARLTPLEACAHSFFDEL' 'RDPNVKLPNGRDTPALFNFTTQELSSNPPLATILIPPHARIQAAASPPA' 'NATAASDTNAGDRGQTNNAASASASNST'), ), rel_pos=209, exact=True, ), ), ) self.sequence.modifications = data_sets.Modifications( ( # S215-p data_sets.Modification( rel_pos=5, mod_type='Phospho', sequence=self.sequence, ), # Y216-p data_sets.Modification( rel_pos=6, mod_type='Phospho', sequence=self.sequence, ), ), )