def test_generate_amas_for_indel(self): # TODO from starp.amasfactory import generate_amas_for_indel allele1 = Sequence('TTGTCGTTGTGGAGTCTATATCAACCACCATTCTTAATCGTCGT') allele2 = Sequence('TTGTCGTTGTGGAGTCTATATCAACCACCATTCTTA-TCGTCGT') snp_pos = 39 upstream_amas, downstream_amas = generate_amas_for_indel( allele1, allele2, snp_pos)
def test_substitute_bases_one_snp_last(self): from starp.amasfactory import substitute_bases seq1 = Sequence('ACGGAGTTACAAAAGATACAATCCGT') seq2 = Sequence('ACGGAGTTACAAAAGATACAATCCGA') snp_position = 'last' expected = (Sequence('ACGGAGTTACAAAAGATACAATCTGT'), Sequence('ACGGAGTTACAAAAGATACAATTCGA')) self.assertEqual(expected, substitute_bases((seq1, seq2), snp_position))
def test_multiple_references(self): ref1 = Sequence( 'TGCGATGACGCGTGAGGTGTATGAAGCAAAGGCAGCGAAAAAAAAAGGTGGTGGGAGTATGACGAAAATAAACCAGCGAAAATTAACCGCGCGGACTATTTACTCCCACCACCTTTTTTTTTCCGTCACATCAAGTCGTCCATTAGGAGTAG' ) ref2 = Sequence( 'TATAAGATTATGAACGTGAGGTGTATGAAGCAAAGGCAGCAATCAAGCTAATATGGAGATGGATTTCAGAAGGTTTTGTTTATAGCGAAAAACAAGAAACTGACTTATATGAGCTCGGTAAGAAGTACTTCAATGAGCTAGTAAATAGAAGTATGATACAGCCAATTGGTATTGATGATGGAGAAGATAAACAAGCGTGTCGTGTACATGACATGGTGCTTGATATCCTATGCT' ) primer = Primer('CGTGAGGTGTATGAAGCAAAGGCAGC', allele1_span=(10, 36), allele2_span=(228, 263), strand=1) sites = binding_sites([ref1, ref2], primer) expected = [(10, 36), (14, 40)] self.assertEqual(expected, [site.span() for site in sites])
def test_substitute_bases_two_snps_first(self): """ Since the snp position is at the first nucleotide, the sequences need to be reverse complemented THEN have its bases substituted. Order is important here. """ from starp.amasfactory import substitute_bases seq1 = Sequence('TGCCTAACATAGAAAACATTGAGGCA') seq2 = Sequence('ACCCTAACATAGAAAACATTGAGGCA') snp_position = 'first' expected = (Sequence('TGCTTAACATAGAAAACATTGAGGCA'), Sequence('ACTCTAACATAGAAAACATTGAGGCA')) self.assertEqual(expected, substitute_bases((seq1, seq2), snp_position))
def test_no_binding_sites(self): ref = Sequence('GTAGCATGCGATCGTCTATCGTATGACGCTGCACGCAT') primer = Primer('TGACGGAAAAAAA', allele1_span=(13, 26), allele2_span=(13, 26), strand=1) sites = binding_sites([ref], primer) expected = [] self.assertEqual(expected, [site.span() for site in sites])
def test_rev_comp(self): """ Tails are not modified by reverse complements. """ amas = AmasPrimer('GGCCGGTCCTAGTGTTGA', 1, (29, 47), strand=1) amas.tail = Sequence('GACGCAAGTGAGCAGTATGAC') revcomp = amas.rev_comp() expected = 'GACGCAAGTGAGCAGTATGACTCAACACTAGGACCGGCC' self.assertEqual(revcomp.strand, -1) self.assertEqual(str(revcomp), expected)
def test_multiple_binding_sites(self): ref = Sequence( 'TGCGATGACGGAAAAAAAAAGGTGGTGGGAGTATGACGAAAATAAACCAGCGAAAATTAACCGCGCGGACTATTCATCAAGTCGTCCATTAGGAGTAGAGACGGAAAAAAAAAGGTGGTGGGAGTATGACGGATTTTCACAACCCAATTTGCCTATTTTATTTATTCATTGGAAAACTTGAAGAATGGAAATTTTGTGGTGCGATAATAGCATATTGTGTTGATGTTGTC' ) primer = Primer('GACGGAAAAAAAAAGGTGGTGGGAGTATGACG', allele1_span=(6, 38), allele2_span=(228, 263), strand=1) sites = binding_sites([ref], primer) expected = [(6, 38), (99, 131)] self.assertEqual(expected, [site.span() for site in sites])
def test_substitute_bases_one_snp_first(self): """ Since the snp position is at the first nucleotide, the sequences need to be reverse complemented THEN have its bases substituted. Order is important here. These substitution tests are not conclusive. There are way too many branches to test every possible combination of SNPs. Rather, these tests show that the logic is at least somewhat functional. """ from starp.amasfactory import substitute_bases seq1 = Sequence('TGCCTAACATAGAAAACATTGAGGCA') seq2 = Sequence('AGCCTAACATAGAAAACATTGAGGCA') snp_position = 'first' expected = (Sequence('TGCTTAACATAGAAAACATTGAGGCA'), Sequence('AGTCTAACATAGAAAACATTGAGGCA')) self.assertEqual(expected, substitute_bases((seq1, seq2), snp_position))
def test_generate_amas_downstream_sub(self): """ Verify the spans are the same after generating downstream. """ from starp.amasfactory import generate_amas_downstream allele1 = Sequence('TTGTCGTTGTGGAGTCTATATCAACCACCATTCTTAAT') allele2 = Sequence('TTGTCGTTGTGGAGTCTATATCAACCACCATTCTTAAT') smallest_primer1 = generate_amas_downstream(allele1, 1, 30, 5, 10)[0] smallest_primer2 = generate_amas_downstream(allele2, 2, 30, 5, 10)[0] expected1 = AmasPrimer(sequence='TTCTT', allele_num=1, span=(30, 35), strand=1) expected2 = AmasPrimer(sequence='TTCTT', allele_num=2, span=(30, 35), strand=1) self.assertEqual(expected1.span, smallest_primer1.span) self.assertEqual(expected2.span, smallest_primer2.span)
def test_generate_amas_downstream_del(self): """ Indels require some manipulation to keep the spans correct. """ from starp.amasfactory import generate_amas_downstream allele1 = Sequence('TTGTCGTTGTGGAGTCTATATCAACCACCATTCTTAAT') allele2 = Sequence('TTGTCGTTGTGGAGTCTATATCAACCACCATTCTTA-T') smallest_primer1 = generate_amas_downstream(allele1, 1, 30, 5, 10)[0] smallest_primer2 = generate_amas_downstream(allele2, 2, 30, 5, 10)[0] expected1 = AmasPrimer(sequence='TTCTT', allele_num=1, span=(30, 35), strand=1) expected2 = AmasPrimer(sequence='TTCTT', allele_num=2, span=(30, 35), strand=1) self.assertEqual(expected1.span, smallest_primer1.span) self.assertEqual(expected2.span, smallest_primer2.span)
def test_rev_comp_binding_sites(self): # TGACGGAAAAAAAAAGGTGGTGGGAGTA # Reverse Complement: # TACTCCCACCACCTTTTTTTTTCCGTCA ref = Sequence( 'TGCGATGACGGAAAAAAAAAGGTGGTGGGAGTATGACGAAAATAAACCAGCGAAAATTAACCGCGCGGACTATTTACTCCCACCACCTTTTTTTTTCCGTCACATCAAGTCGTCCATTAGGAGTAG' ) primer = Primer('TGACGGAAAAAAAAAGGTGGTGGGAGTA', allele1_span=(5, 33), allele2_span=(228, 263), strand=1) sites = binding_sites([ref], primer) expected = [(5, 33), (74, 102)] self.assertEqual(expected, [site.span() for site in sites])
def test_fuzzy_binding_sites(self): # ACGGAAAAAAAAAGGTGGTGGGAGTATGACGAAAAT # |||| |||||||| |||||| |||||||||||||| # ACGGTTAAAAAAAGATGGTGGCAGTATGACGAAAAT ref = Sequence( 'TGCGATGACGGAAAAAAAAAGGTGGTGGGAGTATGACGAAAATAAACCAGCGAAAATTAACCGCGCGGACTATTCATCAAGTCGTCCATTAGGAGTAGAGACGGAAAAAAAAAGGTGGTGGGAGTATGACGGACGGTTAAAAAAAGATGGTGGCAGTATGACGAAAATATTTTCACAACCCAATTTGCCTATTTTATTTATTCATTGGAAAACTTGAAGAATGGAAATTTTGTGGTGCGATAATAGCATATTGTGTTGATGTTGTC' ) primer = Primer('ACGGAAAAAAAAAGGTGGTGGGAGTATGACGAAAAT', allele1_span=(6, 38), allele2_span=(228, 263), strand=1) sites = binding_sites([ref], primer) expected = [(7, 43), (132, 168)] self.assertEqual(expected, [site.span() for site in sites])
def test_complement_invalid_chars(self): seq = Sequence('ACGTN') expected = Sequence('TGCAN') self.assertEqual(seq.complement(), expected)
def test_gc_empty(self): """ gc() divides by 0 """ seq = Sequence('') expected = 0 self.assertEqual(seq.gc, expected)
def test_slice(self): seq = Sequence('TGCGTACACGT') expected = Sequence('CGT') self.assertEqual(seq[2:5], expected)
def test_complement(self): seq = Sequence('ACGT') expected = Sequence('TGCA') self.assertEqual(seq.complement(), expected)
def test_gc(self): seq = Sequence('GAGATCTC') expected = 0.50 self.assertEqual(seq.gc, expected)