def test_transform_1(self): """simple het & hom indels""" refseq = self.fa0 mutseq = DipSeq(refseq.seqid + '.mut', refseq.description, size=refseq.seqA.shape[0] * 2, fold=refseq.fold) mutseq.transform(refseq, self.vcf1) insertion_1 = range(4, 7) self.assertEqual( ''.join(base(mutseq.seqA[POS - 1]) for POS in insertion_1), "AGG") self.assertEqual(list(mutseq.relA[POS - 1] for POS in insertion_1), [4, 4, 4]) insertion_2 = range(5, 9) self.assertEqual( ''.join(base(mutseq.seqB[POS - 1]) for POS in insertion_2), "CTTT") self.assertEqual(list(mutseq.relB[POS - 1] for POS in insertion_2), [5, 5, 5, 5]) deletion_1A = range(15, 17) deletion_1B = range(16, 18) self.assertEqual( ''.join(base(mutseq.seqA[POS - 1]) for POS in deletion_1A), "CC") self.assertEqual(list(mutseq.relA[POS - 1] for POS in deletion_1A), [13, 16]) self.assertEqual( ''.join(base(mutseq.seqB[POS - 1]) for POS in deletion_1B), "CC") self.assertEqual(list(mutseq.relB[POS - 1] for POS in deletion_1B), [13, 16])
def test_mutagen(self): """tests both mutagen and transform methods""" # Another case where we take a shortcut by simply asserting that the # data in the test resource files mut_vcf and mut_seq are valid "by # inspection" and require that the test run matches them every time. # In this case cross-referencing the randomly generated mutations in # the vcf with the transformed sequence file is what's required. You # should eyeball it yourself — it's interesting! refseq = self.fa1 vcf = VCF("sample1") mut_rate = 0.01 homo_frac = 0.333333 indel_frac = 0.15 indel_extend = 0.3 max_insertion = 1000 qasim.reseed(12345678) # deterministic iff we set the seed DipSeq.mutagen(refseq, vcf, mut_rate, homo_frac, indel_frac, indel_extend, max_insertion) out = StringIO() vcf.write(out) with open(self.mut_vcf) as fh: self.assertEqual(out.getvalue(), ''.join(fh.readlines())) mutseq = DipSeq(refseq.seqid + '.mut', refseq.description, size=refseq.seqA.shape[0] * 2, fold=refseq.fold) mutseq.transform(refseq, vcf) out = StringIO() mutseq.write(out) with open(self.mut_seq) as fh: self.assertEqual(out.getvalue(), ''.join(fh.readlines()))
def test_transform_4(self): """disallow mutations overlapping deletions in same vcf""" refseq = self.fa0 mutseq = DipSeq(refseq.seqid + '.mut', refseq.description, size=refseq.seqA.shape[0] * 2, fold=refseq.fold) expected_msg = EXCEPT_MUT % {'POS': 7, 'OLDPOS': 5} with self.assertRaisesRegex(Exception, expected_msg): mutseq.transform(refseq, self.vcf4)
def test_transform_2(self): """complex overlapping mutations""" refseq = self.fa0 mutseq = DipSeq(refseq.seqid + '.mut', refseq.description, size=refseq.seqA.shape[0] * 2, fold=refseq.fold) mutseq.transform(refseq, self.vcf2) out = StringIO() mutseq.write(out) # we take a bit of a shorcut and rather than testing all the # explicit logic of the transformation we just test equality to # this output that we assert is valid "by inspection". self.assertEqual(out.getvalue(), (">TEST.mut.0 small fasta for testing\n" "AAAAGGCCGAAACCCC\n" "1234445690123456\n" ">TEST.mut.1 small fasta for testing\n" "AAAAGGCTTTCAAAACCCC\n" "1234445555690123456\n"))
def test_transform_3(self): """overlapping mutations in somatic mode""" refseq = self.fa0 grmseq = DipSeq(refseq.seqid + '.grm', refseq.description, size=refseq.seqA.shape[0] * 2, fold=refseq.fold) grmseq.transform(refseq, self.vcfgrm) out = StringIO() grmseq.write(out) self.assertEqual(out.getvalue(), (">TEST.grm.0 small fasta for testing\n" "AAAAGGCCCCAAAACCCC\n" "123444567890123456\n" ">TEST.grm.1 small fasta for testing\n" "AAAAGGCCAAAACCCC\n" "1234447890123456\n")) somseq = DipSeq(refseq.seqid + '.som', refseq.description, size=refseq.seqA.shape[0] * 2, fold=refseq.fold) with captured_output() as (out, err): expected_msg = MSG_SKIP_MUT % {'allele': 1, 'POS': 5} somseq.transform(grmseq, self.vcfsom) self.assertEqual(err.getvalue(), expected_msg) out = StringIO() somseq.write(out) # somatic insertion at 4 and deletion at 5 both applied to allele 0 # somatic deletion at 5 isn't applied to allele 1 self.assertEqual(out.getvalue(), (">TEST.som.0 small fasta for testing\n" "AAAATTGGCAAAACCCC\n" "12344444590123456\n" ">TEST.som.1 small fasta for testing\n" "AAAATTGGCCAAAACCCC\n" "123444447890123456\n"))
def test_transform_0(self): """germline het & hom snps""" refseq = self.fa0 mutseq = DipSeq(refseq.seqid + '.mut', refseq.description, size=refseq.seqA.shape[0] * 2, fold=refseq.fold) mutseq.transform(refseq, self.vcf0) for i in range(refseq.stopA): POS = i + 1 # VCF coords if POS == 5: self.assertEqual(base(mutseq.seqA[i]), base(refseq.seqA[i])) self.assertEqual(base(mutseq.seqB[i]), "G") elif POS == 9: self.assertEqual(base(mutseq.seqA[i]), "G") self.assertEqual(base(mutseq.seqB[i]), base(refseq.seqA[i])) elif POS == 13: self.assertEqual(base(mutseq.seqA[i]), "T") self.assertEqual(base(mutseq.seqA[i]), base(mutseq.seqB[i])) self.assertNotEqual(base(mutseq.seqA[i]), base(refseq.seqA[i])) else: self.assertEqual(base(mutseq.seqA[i]), base(refseq.seqA[i])) self.assertEqual(base(mutseq.seqB[i]), base(refseq.seqA[i]))