Beispiel #1
0
 def test_transform_1(self):
     """simple het & hom indels"""
     refseq = self.fa0
     mutseq = DipSeq(refseq.seqid + '.mut',
                     refseq.description,
                     size=refseq.seqA.shape[0] * 2,
                     fold=refseq.fold)
     mutseq.transform(refseq, self.vcf1)
     insertion_1 = range(4, 7)
     self.assertEqual(
         ''.join(base(mutseq.seqA[POS - 1]) for POS in insertion_1), "AGG")
     self.assertEqual(list(mutseq.relA[POS - 1] for POS in insertion_1),
                      [4, 4, 4])
     insertion_2 = range(5, 9)
     self.assertEqual(
         ''.join(base(mutseq.seqB[POS - 1]) for POS in insertion_2), "CTTT")
     self.assertEqual(list(mutseq.relB[POS - 1] for POS in insertion_2),
                      [5, 5, 5, 5])
     deletion_1A = range(15, 17)
     deletion_1B = range(16, 18)
     self.assertEqual(
         ''.join(base(mutseq.seqA[POS - 1]) for POS in deletion_1A), "CC")
     self.assertEqual(list(mutseq.relA[POS - 1] for POS in deletion_1A),
                      [13, 16])
     self.assertEqual(
         ''.join(base(mutseq.seqB[POS - 1]) for POS in deletion_1B), "CC")
     self.assertEqual(list(mutseq.relB[POS - 1] for POS in deletion_1B),
                      [13, 16])
Beispiel #2
0
 def test_mutagen(self):
     """tests both mutagen and transform methods"""
     # Another case where we take a shortcut by simply asserting that the
     # data in the test resource files mut_vcf and mut_seq are valid "by
     # inspection" and require that the test run matches them every time.
     # In this case cross-referencing the randomly generated mutations in
     # the vcf with the transformed sequence file is what's required. You
     # should eyeball it yourself — it's interesting!
     refseq = self.fa1
     vcf = VCF("sample1")
     mut_rate = 0.01
     homo_frac = 0.333333
     indel_frac = 0.15
     indel_extend = 0.3
     max_insertion = 1000
     qasim.reseed(12345678)  # deterministic iff we set the seed
     DipSeq.mutagen(refseq, vcf, mut_rate, homo_frac, indel_frac,
                    indel_extend, max_insertion)
     out = StringIO()
     vcf.write(out)
     with open(self.mut_vcf) as fh:
         self.assertEqual(out.getvalue(), ''.join(fh.readlines()))
     mutseq = DipSeq(refseq.seqid + '.mut',
                     refseq.description,
                     size=refseq.seqA.shape[0] * 2,
                     fold=refseq.fold)
     mutseq.transform(refseq, vcf)
     out = StringIO()
     mutseq.write(out)
     with open(self.mut_seq) as fh:
         self.assertEqual(out.getvalue(), ''.join(fh.readlines()))
Beispiel #3
0
 def test_transform_4(self):
     """disallow mutations overlapping deletions in same vcf"""
     refseq = self.fa0
     mutseq = DipSeq(refseq.seqid + '.mut',
                     refseq.description,
                     size=refseq.seqA.shape[0] * 2,
                     fold=refseq.fold)
     expected_msg = EXCEPT_MUT % {'POS': 7, 'OLDPOS': 5}
     with self.assertRaisesRegex(Exception, expected_msg):
         mutseq.transform(refseq, self.vcf4)
Beispiel #4
0
 def test_transform_2(self):
     """complex overlapping mutations"""
     refseq = self.fa0
     mutseq = DipSeq(refseq.seqid + '.mut',
                     refseq.description,
                     size=refseq.seqA.shape[0] * 2,
                     fold=refseq.fold)
     mutseq.transform(refseq, self.vcf2)
     out = StringIO()
     mutseq.write(out)
     # we take a bit of a shorcut and rather than testing all the
     # explicit logic of the transformation we just test equality to
     # this output that we assert is valid "by inspection".
     self.assertEqual(out.getvalue(),
                      (">TEST.mut.0 small fasta for testing\n"
                       "AAAAGGCCGAAACCCC\n"
                       "1234445690123456\n"
                       ">TEST.mut.1 small fasta for testing\n"
                       "AAAAGGCTTTCAAAACCCC\n"
                       "1234445555690123456\n"))
Beispiel #5
0
    def test_transform_3(self):
        """overlapping mutations in somatic mode"""
        refseq = self.fa0

        grmseq = DipSeq(refseq.seqid + '.grm',
                        refseq.description,
                        size=refseq.seqA.shape[0] * 2,
                        fold=refseq.fold)
        grmseq.transform(refseq, self.vcfgrm)
        out = StringIO()
        grmseq.write(out)
        self.assertEqual(out.getvalue(),
                         (">TEST.grm.0 small fasta for testing\n"
                          "AAAAGGCCCCAAAACCCC\n"
                          "123444567890123456\n"
                          ">TEST.grm.1 small fasta for testing\n"
                          "AAAAGGCCAAAACCCC\n"
                          "1234447890123456\n"))

        somseq = DipSeq(refseq.seqid + '.som',
                        refseq.description,
                        size=refseq.seqA.shape[0] * 2,
                        fold=refseq.fold)
        with captured_output() as (out, err):
            expected_msg = MSG_SKIP_MUT % {'allele': 1, 'POS': 5}
            somseq.transform(grmseq, self.vcfsom)
            self.assertEqual(err.getvalue(), expected_msg)
        out = StringIO()
        somseq.write(out)
        # somatic insertion at 4 and deletion at 5 both applied to allele 0
        # somatic deletion at 5 isn't applied to allele 1
        self.assertEqual(out.getvalue(),
                         (">TEST.som.0 small fasta for testing\n"
                          "AAAATTGGCAAAACCCC\n"
                          "12344444590123456\n"
                          ">TEST.som.1 small fasta for testing\n"
                          "AAAATTGGCCAAAACCCC\n"
                          "123444447890123456\n"))
Beispiel #6
0
 def test_transform_0(self):
     """germline het & hom snps"""
     refseq = self.fa0
     mutseq = DipSeq(refseq.seqid + '.mut',
                     refseq.description,
                     size=refseq.seqA.shape[0] * 2,
                     fold=refseq.fold)
     mutseq.transform(refseq, self.vcf0)
     for i in range(refseq.stopA):
         POS = i + 1  # VCF coords
         if POS == 5:
             self.assertEqual(base(mutseq.seqA[i]), base(refseq.seqA[i]))
             self.assertEqual(base(mutseq.seqB[i]), "G")
         elif POS == 9:
             self.assertEqual(base(mutseq.seqA[i]), "G")
             self.assertEqual(base(mutseq.seqB[i]), base(refseq.seqA[i]))
         elif POS == 13:
             self.assertEqual(base(mutseq.seqA[i]), "T")
             self.assertEqual(base(mutseq.seqA[i]), base(mutseq.seqB[i]))
             self.assertNotEqual(base(mutseq.seqA[i]), base(refseq.seqA[i]))
         else:
             self.assertEqual(base(mutseq.seqA[i]), base(refseq.seqA[i]))
             self.assertEqual(base(mutseq.seqB[i]), base(refseq.seqA[i]))