def test_isoforms_universal(self): self.assertEqual( set(parser.isoforms('PEPTIDE', variable_mods={'xx-': True})), {'PEPTIDE', 'xx-PEPTIDE'}) self.assertEqual( set(parser.isoforms('PEPTIDE', variable_mods={'-xx': True})), {'PEPTIDE', 'PEPTIDE-xx'}) for seq in self.simple_sequences: self.assertEqual( sum(1 for _ in parser.isoforms(seq, variable_mods={'x': True})), 2**len(seq))
def test_isoforms_terminal(self): self.assertEqual( set( parser.isoforms('PEPTIDE', variable_mods={ 'xx': ['ntermP'], 'yy-': 'P' })), {'PEPTIDE', 'xxPEPTIDE', 'yy-PEPTIDE', 'yy-xxPEPTIDE'})
def test_isoforms_fixed_simple(self): self.assertEqual( list( parser.isoforms('PEPTIDE', fixed_mods={ 'n-': True, '-c': True, 'x': ['P', 'T'] })), ['n-xPExPxTIDE-c'])
def test_isoforms_maxmods(self): for j in range(50): L = random.randint(1, 10) M = random.randint(1, 10) peptide = ''.join([random.choice(self.labels) for _ in range(L)]) modseqs = parser.isoforms(peptide, variable_mods=self.potential, labels=self.labels, max_mods=M, format='split') pp = parser.parse(peptide, labels=self.extlabels, split=True) for ms in modseqs: self.assertEqual(len(pp), len(ms)) self.assertLessEqual(sum(i != j for i, j in zip(pp, ms)), M)
def test_isoforms_len(self): for j in range(50): L = random.randint(1, 10) peptide = ''.join(random.choice(self.labels) for _ in range(L)) modseqs = list(parser.isoforms(peptide, variable_mods=self.potential, fixed_mods=self.constant, labels=self.labels)) pp = parser.parse(peptide, labels=self.extlabels) N = (pp[0] == 'N') + (pp[-1] == 'C') for p in modseqs: self.assertEqual(len(pp), parser.length(p, labels=self.extlabels)) self.assertEqual(len(modseqs), (3 ** pp.count('A')) * (2 ** (pp.count('X') + pp.count('C') + N)))
def test_isoforms_simple(self): self.assertEqual( list( parser.isoforms('PEPTIDE', variable_mods={'xx': ['A', 'B', 'P', 'E']})), [ 'PEPTIDE', 'PEPTIDxxE', 'PExxPTIDE', 'PExxPTIDxxE', 'PxxEPTIDE', 'PxxEPTIDxxE', 'PxxExxPTIDE', 'PxxExxPTIDxxE', 'xxPEPTIDE', 'xxPEPTIDxxE', 'xxPExxPTIDE', 'xxPExxPTIDxxE', 'xxPxxEPTIDE', 'xxPxxEPTIDxxE', 'xxPxxExxPTIDE', 'xxPxxExxPTIDxxE' ])
def test_valid(self): for j in range(50): L = random.randint(1, 10) peptide = ''.join([random.choice(self.labels) for _ in range(L)]) modseqs = parser.isoforms(peptide, variable_mods=self.potential, fixed_mods=self.constant, labels=self.labels) self.assertFalse(parser.valid('H-' + peptide, labels=self.labels)) for s in modseqs: self.assertTrue(parser.valid(s, labels=self.extlabels)) for aa in set(peptide): bad = s.replace(aa, 'Z') self.assertFalse(parser.fast_valid(bad, labels=self.labels)) self.assertFalse(parser.valid(bad, labels=self.labels))
def add_peptide_collection(self, peptide, protein_id, target): """Adding PeptideObj() to peptide collection""" if self.unique_peptides == 1 and peptide in self.peptide_set: return #if target == 1: self.peptide_set.add(peptide) modified_peptides = set( parser.isoforms(peptide, variable_mods=self.modifications, max_mods=self.max_mods)) peptide_aa_mass = np.array([self.aa_mass[aa] for aa in peptide ]) #Make this faster using AA as ubytes for mod_pept in modified_peptides: mod_cnt = len(re.findall(r"\]", mod_pept)) if mod_cnt < self.min_mods or mod_cnt > self.max_mods: continue mod_peptide_aa_mass = list( peptide_aa_mass) #peptide_aa_mass[:] does not work offset = 0 for mod in re.finditer(self.pattern, mod_pept): # calc modifications' mass location = mod.start() if mod_pept[location] == "-": mod_peptide_aa_mass[-1] += float(mod.group(1)) break mod_peptide_aa_mass[location - offset] += float(mod.group(1)) offset += mod.end() - mod.start() # mod_pep_mass = np.sum(mod_peptide_aa_mass) + self.mono_h2o #H and OH mod_pep_mass = np.sum( mod_peptide_aa_mass ) + self.Y + self.B #H and OH and static modes if mod_pep_mass < self.min_pept_mass or mod_pep_mass >= self.max_pept_mass: continue pept_obj = PeptideObj(mod_pep_mass, mod_pept, protein_id, target, peptide, "full", self.missed_cleavages) pept_obj.aa_mass = mod_peptide_aa_mass #print(mod_pep_mass, mod_pept, target) self.peptide_collection.append(pept_obj)
def test_isoforms_simple_2(self): self.assertEqual(list(parser.isoforms('PEPTIDE', variable_mods={'x': 'T', 'y': 'T'})), ['PEPTIDE', 'PEPxTIDE', 'PEPyTIDE'])