def test_peptides_from_varaints(self): coding = {} coding['NM_080751'] = MutationSyntax('NM_080751',2629,876,'c.2630C>T','p.Pro877Leu') var = Variant('line0',0,20,2621905,'C','T',coding,True,False) var.gene = 'TMC2' ma = MartsAdapter(biomart="http://ensembl.org") vars = [var, Variant("testInsertion", 2, 20, 2621899, "", "AAAAAA", {'NM_080751':MutationSyntax('NM_080751',2625,876,'c.2630C>T','p.Pro877Leu')}, True, False)] test = Generator.generate_peptides_from_variants(vars, 9, ma, id_type=EIdentifierTypes.REFSEQ, peptides=None) test2 = [x for x in test] print(len(test2)) ts = list() #using a tweaked generator that takes another sequence source if the sequence is too short in respect to the given variants #in this case a newer/older sequence from mart in respect to what was given as reference in the annotation process t = Generator.generate_transcripts_from_variants(vars, ma, id_type=EIdentifierTypes.REFSEQ) ts = [x for x in t] print(len(ts[0])) p = Generator.generate_proteins_from_transcripts(ts, to_stop=True) ps = [x for x in p] e = Generator.generate_peptides_from_proteins(ps, 9) es = [x for x in e] print(len(es)) #print vars print len(vars)
def test_real_life_test(self): mart = MartsAdapter(biomart="http://grch37.ensembl.org/biomart/martservice?query=") ano_path = os.path.join(os.path.dirname(inspect.getfile(Fred2)), "Data/examples/test_annovar.out") vars = read_annovar_exonic(ano_path) peps = set(map(lambda x: str(x), Generator.generate_peptides_from_variants(vars, 9, mart, EIdentifierTypes.REFSEQ))) peps_from_prot = set(map(str, Generator.generate_peptides_from_proteins( Generator.generate_proteins_from_transcripts( Generator.generate_transcripts_from_variants(vars, mart, EIdentifierTypes.REFSEQ)), 9))) self.assertTrue(len(peps - peps_from_prot) == 0) self.assertTrue(len(peps_from_prot - peps) == 0)
def test_real_life_test(self): mart = MartsAdapter( biomart="http://grch37.ensembl.org/biomart/martservice?query=") ano_path = os.path.join(os.path.dirname(inspect.getfile(Fred2)), "Data/examples/test_annovar.out") vars = read_annovar_exonic(ano_path) peps = set( map( lambda x: str(x), Generator.generate_peptides_from_variants( vars, 9, mart, EIdentifierTypes.REFSEQ))) peps_from_prot = set( map( str, Generator.generate_peptides_from_proteins( Generator.generate_proteins_from_transcripts( Generator.generate_transcripts_from_variants( vars, mart, EIdentifierTypes.REFSEQ)), 9))) self.assertTrue(len(peps - peps_from_prot) == 0) self.assertTrue(len(peps_from_prot - peps) == 0)
def test_peptides_from_variants(self): """ Create multiple peptides, given a set containing heterozygous variants . Variants: 3-DEL(-2) , 5-INS(+3) , 7-DEL(-4) HET-DEL(-2), HOM-INS(+3), HET-DEL(-1) Reference sequence: AAAAACCCCCGGGGG AAATTTGGGGG (DEL,INS,DEL) AAATTTCCCCCGGGGG (DEL,INS) AAAAATTTGGGGG (INS,DEL) AAAAATTTCCCCCGGGGG (INS) GGGGGCCCCCAAAAA GGGTTTCAAAAA (DEL,INS,DEL) GGGTTTCCCCCAAAAA (DEL,INS) GGGGGTTTCAAAAA (INS,DEL) GGGGGTTTCCCCCAAAAA (INS) Resulting protein sequences: KFG KNLG KFPPG KNFPRG GFK GGLK GFPPK GGFPQK Resulting peptides of length 3: KFG + KNL + NLG + KFP + FPP + PPG + KNF + NFP + FPR + PRG + GFK + GGL + GLK + GFP + FPP + PPK + GGF + GFP + FPQ + PQK + """ dummy_db = DummyAdapter() exp_peps = set([ 'PRG', 'GLK', 'PPG', 'KFP', 'GFK', 'PPK', 'GFP', 'PQK', 'KNL', 'KFG', 'GGF', 'FPQ', 'FPP', 'NLG', 'FPR', 'KNF', 'GGL', 'NFP' ]) # 1) INS, SNP, DEL dummy_vars = [var_10, var_11, var_12] peps = set( map( lambda x: str(x), Generator.generate_peptides_from_variants( dummy_vars, 3, dummy_db, EIdentifierTypes.REFSEQ))) peps_from_prot = set( map( str, Generator.generate_peptides_from_proteins( Generator.generate_proteins_from_transcripts( Generator.generate_transcripts_from_variants( dummy_vars, dummy_db, EIdentifierTypes.REFSEQ)), 3))) self.assertTrue(len(peps - peps_from_prot) == 0) self.assertTrue(len(peps_from_prot - peps) == 0) self.assertTrue(len(peps - exp_peps) == 0) self.assertTrue(len(exp_peps - peps) == 0)
def test_peptides_from_variants(self): """ Create multiple peptides, given a set containing heterozygous variants . Variants: 3-DEL(-2) , 5-INS(+3) , 7-DEL(-4) HET-DEL(-2), HOM-INS(+3), HET-DEL(-1) Reference sequence: AAAAACCCCCGGGGG AAATTTGGGGG (DEL,INS,DEL) AAATTTCCCCCGGGGG (DEL,INS) AAAAATTTGGGGG (INS,DEL) AAAAATTTCCCCCGGGGG (INS) GGGGGCCCCCAAAAA GGGTTTCAAAAA (DEL,INS,DEL) GGGTTTCCCCCAAAAA (DEL,INS) GGGGGTTTCAAAAA (INS,DEL) GGGGGTTTCCCCCAAAAA (INS) Resulting protein sequences: KFG KNLG KFPPG KNFPRG GFK GGLK GFPPK GGFPQK Resulting peptides of length 3: KFG + KNL + NLG + KFP + FPP + PPG + KNF + NFP + FPR + PRG + GFK + GGL + GLK + GFP + FPP + PPK + GGF + GFP + FPQ + PQK + """ dummy_db = DummyAdapter() exp_peps = set(['PRG', 'GLK', 'PPG', 'KFP', 'GFK', 'PPK', 'GFP', 'PQK', 'KNL', 'KFG', 'GGF', 'FPQ', 'FPP', 'NLG', 'FPR', 'KNF', 'GGL', 'NFP']) # 1) INS, SNP, DEL dummy_vars = [var_10, var_11, var_12] peps = set(map(lambda x: str(x), Generator.generate_peptides_from_variants(dummy_vars, 3, dummy_db))) self.assertTrue(len(peps-exp_peps) == 0) self.assertTrue(len(exp_peps-peps) == 0)