예제 #1
0
 def test_transform_1(self):
     """simple het & hom indels"""
     refseq = self.fa0
     mutseq = DipSeq(refseq.seqid + '.mut',
                     refseq.description,
                     size=refseq.seqA.shape[0] * 2,
                     fold=refseq.fold)
     mutseq.transform(refseq, self.vcf1)
     insertion_1 = range(4, 7)
     self.assertEqual(
         ''.join(base(mutseq.seqA[POS - 1]) for POS in insertion_1), "AGG")
     self.assertEqual(list(mutseq.relA[POS - 1] for POS in insertion_1),
                      [4, 4, 4])
     insertion_2 = range(5, 9)
     self.assertEqual(
         ''.join(base(mutseq.seqB[POS - 1]) for POS in insertion_2), "CTTT")
     self.assertEqual(list(mutseq.relB[POS - 1] for POS in insertion_2),
                      [5, 5, 5, 5])
     deletion_1A = range(15, 17)
     deletion_1B = range(16, 18)
     self.assertEqual(
         ''.join(base(mutseq.seqA[POS - 1]) for POS in deletion_1A), "CC")
     self.assertEqual(list(mutseq.relA[POS - 1] for POS in deletion_1A),
                      [13, 16])
     self.assertEqual(
         ''.join(base(mutseq.seqB[POS - 1]) for POS in deletion_1B), "CC")
     self.assertEqual(list(mutseq.relB[POS - 1] for POS in deletion_1B),
                      [13, 16])
예제 #2
0
 def test_transform_4(self):
     """disallow mutations overlapping deletions in same vcf"""
     refseq = self.fa0
     mutseq = DipSeq(refseq.seqid + '.mut',
                     refseq.description,
                     size=refseq.seqA.shape[0] * 2,
                     fold=refseq.fold)
     expected_msg = EXCEPT_MUT % {'POS': 7, 'OLDPOS': 5}
     with self.assertRaisesRegex(Exception, expected_msg):
         mutseq.transform(refseq, self.vcf4)
예제 #3
0
 def test_write(self):
     """Test the DipSeq.write() method"""
     d = DipSeq("T", "TEST", hapseq=bytearray([65, 67, 71, 84, 78]))
     out = StringIO()
     d.write(out)
     self.assertEqual(out.getvalue(), (">T.0 TEST\n"
                                       "ACGTN\n"
                                       "12345\n"
                                       ">T.1 TEST\n"
                                       "ACGTN\n"
                                       "12345\n"))
예제 #4
0
 def test_mutagen(self):
     """tests both mutagen and transform methods"""
     # Another case where we take a shortcut by simply asserting that the
     # data in the test resource files mut_vcf and mut_seq are valid "by
     # inspection" and require that the test run matches them every time.
     # In this case cross-referencing the randomly generated mutations in
     # the vcf with the transformed sequence file is what's required. You
     # should eyeball it yourself — it's interesting!
     refseq = self.fa1
     vcf = VCF("sample1")
     mut_rate = 0.01
     homo_frac = 0.333333
     indel_frac = 0.15
     indel_extend = 0.3
     max_insertion = 1000
     qasim.reseed(12345678)  # deterministic iff we set the seed
     DipSeq.mutagen(refseq, vcf, mut_rate, homo_frac, indel_frac,
                    indel_extend, max_insertion)
     out = StringIO()
     vcf.write(out)
     with open(self.mut_vcf) as fh:
         self.assertEqual(out.getvalue(), ''.join(fh.readlines()))
     mutseq = DipSeq(refseq.seqid + '.mut',
                     refseq.description,
                     size=refseq.seqA.shape[0] * 2,
                     fold=refseq.fold)
     mutseq.transform(refseq, vcf)
     out = StringIO()
     mutseq.write(out)
     with open(self.mut_seq) as fh:
         self.assertEqual(out.getvalue(), ''.join(fh.readlines()))
예제 #5
0
 def test_transform_0(self):
     """germline het & hom snps"""
     refseq = self.fa0
     mutseq = DipSeq(refseq.seqid + '.mut',
                     refseq.description,
                     size=refseq.seqA.shape[0] * 2,
                     fold=refseq.fold)
     mutseq.transform(refseq, self.vcf0)
     for i in range(refseq.stopA):
         POS = i + 1  # VCF coords
         if POS == 5:
             self.assertEqual(base(mutseq.seqA[i]), base(refseq.seqA[i]))
             self.assertEqual(base(mutseq.seqB[i]), "G")
         elif POS == 9:
             self.assertEqual(base(mutseq.seqA[i]), "G")
             self.assertEqual(base(mutseq.seqB[i]), base(refseq.seqA[i]))
         elif POS == 13:
             self.assertEqual(base(mutseq.seqA[i]), "T")
             self.assertEqual(base(mutseq.seqA[i]), base(mutseq.seqB[i]))
             self.assertNotEqual(base(mutseq.seqA[i]), base(refseq.seqA[i]))
         else:
             self.assertEqual(base(mutseq.seqA[i]), base(refseq.seqA[i]))
             self.assertEqual(base(mutseq.seqB[i]), base(refseq.seqA[i]))
예제 #6
0
 def test_transform_2(self):
     """complex overlapping mutations"""
     refseq = self.fa0
     mutseq = DipSeq(refseq.seqid + '.mut',
                     refseq.description,
                     size=refseq.seqA.shape[0] * 2,
                     fold=refseq.fold)
     mutseq.transform(refseq, self.vcf2)
     out = StringIO()
     mutseq.write(out)
     # we take a bit of a shorcut and rather than testing all the
     # explicit logic of the transformation we just test equality to
     # this output that we assert is valid "by inspection".
     self.assertEqual(out.getvalue(),
                      (">TEST.mut.0 small fasta for testing\n"
                       "AAAAGGCCGAAACCCC\n"
                       "1234445690123456\n"
                       ">TEST.mut.1 small fasta for testing\n"
                       "AAAAGGCTTTCAAAACCCC\n"
                       "1234445555690123456\n"))
예제 #7
0
    def test_transform_3(self):
        """overlapping mutations in somatic mode"""
        refseq = self.fa0

        grmseq = DipSeq(refseq.seqid + '.grm',
                        refseq.description,
                        size=refseq.seqA.shape[0] * 2,
                        fold=refseq.fold)
        grmseq.transform(refseq, self.vcfgrm)
        out = StringIO()
        grmseq.write(out)
        self.assertEqual(out.getvalue(),
                         (">TEST.grm.0 small fasta for testing\n"
                          "AAAAGGCCCCAAAACCCC\n"
                          "123444567890123456\n"
                          ">TEST.grm.1 small fasta for testing\n"
                          "AAAAGGCCAAAACCCC\n"
                          "1234447890123456\n"))

        somseq = DipSeq(refseq.seqid + '.som',
                        refseq.description,
                        size=refseq.seqA.shape[0] * 2,
                        fold=refseq.fold)
        with captured_output() as (out, err):
            expected_msg = MSG_SKIP_MUT % {'allele': 1, 'POS': 5}
            somseq.transform(grmseq, self.vcfsom)
            self.assertEqual(err.getvalue(), expected_msg)
        out = StringIO()
        somseq.write(out)
        # somatic insertion at 4 and deletion at 5 both applied to allele 0
        # somatic deletion at 5 isn't applied to allele 1
        self.assertEqual(out.getvalue(),
                         (">TEST.som.0 small fasta for testing\n"
                          "AAAATTGGCAAAACCCC\n"
                          "12344444590123456\n"
                          ">TEST.som.1 small fasta for testing\n"
                          "AAAATTGGCCAAAACCCC\n"
                          "123444447890123456\n"))
예제 #8
0
 def test_ctor_seq_and_size(self):
     """Test Exception is correctly raised for bad ctor args"""
     with self.assertRaisesRegex(Exception, MSG_CTOR_SEQ_OR_SIZE):
         DipSeq("T", "TEST", bytearray([65]), 1)
예제 #9
0
 def test_ctor_from_seq(self):
     """Test the constructor that takes a sequence argument"""
     d = DipSeq("T", "TEST", hapseq=bytearray([65, 67, 71, 84, 78, 45]))
     self.assertEqual(d.stopA, 6)
     self.assertEqual(d.stopB, 6)
     self.assertEqual(list(d.seqA), [0, 1, 2, 3, 4, 5])
예제 #10
0
 def test_ctor_from_size(self):
     """Test the constructor that takes size argument"""
     d = DipSeq("T", "TEST", size=6)
     self.assertEqual(d.stopA, 6)
     self.assertEqual(d.stopB, 6)