Exemple #1
0
    def setUp(self):
        self.prots = data_sets.Proteins(proteins=(data_sets.Protein(
            accession='P03995',
            gene='Gfap',
            description='Glial fibrillary acidic protein',
            full_sequence=(
                'MERRRITSARRSYASETVVRGLGPSRQLGTMPRFSLSRMTPPLPARVDFSLAG'
                'ALNAGFKETRASERAEMMELNDRFASYIEKVRFLEQQNKALAAELNQLRAKEP'
                'TKLADVYQAELRELRLRLDQLTANSARLEVERDNFAQDLGTLRQKLQDETNLR'
                'LEAENNLAAYRQEADEATLARVDLERKVESLEEEIQFLRKIYEEEVRELREQL'
                'AQQQVHVEMDVAKPDLTAALREIRTQYEAVATSNMQETEEWYRSKFADLTDAA'
                'SRNAELLRQAKHEANDYRRQLQALTCDLESLRGTNESLERQMREQEERHARES'
                'ASYQEALARLEEEGQSLKEEMARHLQEYQDLLNVKLALDIEIATYRKLLEGEE'
                'NRITIPVQTFSNLQIRETSLDTKSVSEGHLKRNIVVKTVEMRDGEVIKDSKQE'
                'HKDVVM'),
        ), ), )
        self.seq = data_sets.extract_sequence(self.prots, 'QEADEATLAR')

        self.mods = data_sets.Modifications(mods=[
            data_sets.Modification(
                rel_pos=0,
                mod_type='TMT6plex',
                nterm=True,
                sequence=self.seq,
            ),
        ], )

        self.seq.modifications = self.mods
        self.channels = OrderedDict([
            ('low1', '126'),
            ('low2', '127'),
            ('low3', '128'),
            ('med', '129'),
            ('high', '130'),
            ('norm', '131'),
        ])
        self.groups = OrderedDict([
            ('base', ['low1', 'low2', 'low3']),
            ('stim', ['med', 'high']),
        ])

        insert = {
            'Proteins': self.prots,
            'Sequence': self.seq,
            'Modifications': self.mods,
            '126': 1e4,
            '127': 1e4,
            '128': np.nan,
            '129': 4e4,
            '130': 4e4,
            '131': 1e4,
        }

        self.data = data_sets.DataSet(
            channels=self.channels,
            groups=self.groups,
        )

        self.data.add_peptide(insert)
Exemple #2
0
def _reassign_mods(mods, psp_val, probability_cutoff=75):
    reassigned = False
    ambiguous = False

    # phophoRS example format: 'T(4): 99.6; S(6): 0.4; S(10): 0.0'
    # Error messages include: 'Too many isoforms'
    if psp_val is None:
        psp_val = ''

    psp_val = [
        RE_PSP.match(i.strip())
        for i in psp_val.split(';')
    ]
    psp_val = [
        i.groups()
        for i in psp_val
        if i
    ]
    psp_val = [
        (i[0], int(i[1]), float(i[2]))
        for i in psp_val
    ]

    o_mods = [i for i in mods if not _is_pmod(i)]
    p_mods = [i for i in mods if _is_pmod(i)]
    psp_val_f = [i for i in psp_val if i[2] > probability_cutoff]

    if len(p_mods) != len(psp_val_f):
        LOGGER.debug(
            'Not enough info to assign phophosite: {}'.format(psp_val)
        )
        ambiguous = True
    elif set(i.rel_pos + 1 for i in p_mods) != set(i[1] for i in psp_val_f):
        p_mods = [
            data_sets.Modification(
                rel_pos=i[1] - 1,
                mod_type='Phospho',
                nterm=False,
                cterm=False,
                sequence=p_mods[0].sequence,
            )
            for i in psp_val_f
        ]
        reassigned = True

        mods = data_sets.Modifications(
            mods=_sort_mods(o_mods + p_mods),
        )

        for mod in mods.mods:
            mod.sequence.modifications = mods

    return mods, reassigned, ambiguous
Exemple #3
0
    def _get_mods(row):
        peptide_id = row.name

        mods = data_sets.Modifications(mods=mod_dict.get(peptide_id,
                                                         tuple()), )

        for mod in mods.mods:
            assert mod.sequence is None
            mod.sequence = row['Sequence']

        row['Sequence'].modifications = mods

        return mods
Exemple #4
0
 def setUp(self):
     self.sequence = data_sets.Sequence(
         pep_seq="GEPNVsyICSR",
         protein_matches=(data_sets.ProteinMatch(
             protein=data_sets.Protein(
                 accession="Q9WV60",
                 gene="Gsk3b",
                 description="Glycogen synthase kinase-3 beta",
                 full_sequence=(
                     "MSGRPRTTSFAESCKPVQQPSAFGSMKVSRDKDGSKVTTVVATPGQGPD"
                     "RPQEVSYTDTKVIGNGSFGVVYQAKLCDSGELVAIKKVLQDKRFKNREL"
                     "QIMRKLDHCNIVRLRYFFYSSGEKKDEVYLNLVLDYVPETVYRVARHYS"
                     "RAKQTLPVIYVKLYMYQLFRSLAYIHSFGICHRDIKPQNLLLDPDTAVL"
                     "KLCDFGSAKQLVRGEPNVSYICSRYYRAPELIFGATDYTSSIDVWSAGC"
                     "VLAELLLGQPIFPGDSGVDQLVEIIKVLGTPTREQIREMNPNYTEFKFP"
                     "QIKAHPWTKVFRPRTPPEAIALCSRLLEYTPTARLTPLEACAHSFFDEL"
                     "RDPNVKLPNGRDTPALFNFTTQELSSNPPLATILIPPHARIQAAASPPA"
                     "NATAASDTNAGDRGQTNNAASASASNST"),
             ),
             rel_pos=209,
             exact=True,
         ), ),
     )
     self.sequence.modifications = data_sets.Modifications(
         (
             # S215-p
             data_sets.Modification(
                 rel_pos=5,
                 mod_type="Phospho",
                 sequence=self.sequence,
             ),
             # Y216-p
             data_sets.Modification(
                 rel_pos=6,
                 mod_type="Phospho",
                 sequence=self.sequence,
             ),
         ), )
     self.sequences = list(motif.generate_n_mers(self.sequence))
     self.foreground = self.sequences
     self.background = self.sequences
Exemple #5
0
 def setUp(self):
     self.sequence = data_sets.Sequence(
         pep_seq='GEPNVsyICSR',
         protein_matches=(data_sets.ProteinMatch(
             protein=data_sets.Protein(
                 accession='Q9WV60',
                 gene='Gsk3b',
                 description='Glycogen synthase kinase-3 beta',
                 full_sequence=(
                     'MSGRPRTTSFAESCKPVQQPSAFGSMKVSRDKDGSKVTTVVATPGQGPD'
                     'RPQEVSYTDTKVIGNGSFGVVYQAKLCDSGELVAIKKVLQDKRFKNREL'
                     'QIMRKLDHCNIVRLRYFFYSSGEKKDEVYLNLVLDYVPETVYRVARHYS'
                     'RAKQTLPVIYVKLYMYQLFRSLAYIHSFGICHRDIKPQNLLLDPDTAVL'
                     'KLCDFGSAKQLVRGEPNVSYICSRYYRAPELIFGATDYTSSIDVWSAGC'
                     'VLAELLLGQPIFPGDSGVDQLVEIIKVLGTPTREQIREMNPNYTEFKFP'
                     'QIKAHPWTKVFRPRTPPEAIALCSRLLEYTPTARLTPLEACAHSFFDEL'
                     'RDPNVKLPNGRDTPALFNFTTQELSSNPPLATILIPPHARIQAAASPPA'
                     'NATAASDTNAGDRGQTNNAASASASNST'),
             ),
             rel_pos=209,
             exact=True,
         ), ),
     )
     self.sequence.modifications = data_sets.Modifications(
         (
             # S215-p
             data_sets.Modification(
                 rel_pos=5,
                 mod_type='Phospho',
                 sequence=self.sequence,
             ),
             # Y216-p
             data_sets.Modification(
                 rel_pos=6,
                 mod_type='Phospho',
                 sequence=self.sequence,
             ),
         ), )