Example #1
0
    def test_empy_snv(self):
        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t./.\t./.\t./.\t./.\t./.\t./.
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)
        assert counts is None
Example #2
0
    def test_count_homo_haplotypes(self):
        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)
        assert counts.AB == 3
        assert counts.ab == 2

        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7 8
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1\t1/1\t0/0
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1\t1/1\t0/0'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)
        assert counts.AB == 4
        assert counts.ab == 3

        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7 8
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1\t1/1\t0/0
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t2/2\t1/1\t0/0\t0/0\t0/1\t1/1\t0/0'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)
        assert counts.AB == 4
        assert counts.ab == 3

        r_sqr = calculate_r_sqr(snps[0], snps[1])
        self.assertAlmostEqual(r_sqr,  1)

        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7 8
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1\t1/1\t0/0
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t0/0\t1/1\t1/1\t0/0\t1/1\t0/0\t1/1'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)

        r_sqr = calculate_r_sqr(snps[0], snps[1])
        assert r_sqr - 1.0 < 0.0001

        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7 8
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1\t1/1\t0/0
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1\t1/1\t1/1'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)

        r_sqr = calculate_r_sqr(snps[0], snps[1])
        assert r_sqr - 1.0 < 0.0001

        # monomorphic
        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7 8
20\t2\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t0/0\t0/0\t0/0\t0/0\t0/0\t0/0\t0/0
20\t3\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t0/0\t0/0\t0/0\t0/0\t0/0\t0/0\t0/0'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        assert _count_biallelic_haplotypes(call1, call2) is None
        r_sqr = calculate_r_sqr(snps[0], snps[1])
        assert r_sqr is None
        assert fisher_exact(snps[0], snps[1]) is None

        # Ab and aB
        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7
20\t2\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t0/0\t0/0\t1/1\t1/1\t1/1\t0/0\t
20\t3\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t0/0\t0/0\t1/1\t1/1\t0/0\t1/1\t'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)
        assert counts.AB == 3
        assert counts.ab == 2
        assert counts.aB == 1
        assert counts.Ab == 1

        # different major allele names in snp1 (1, 2) and snp2 (2,3)
        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7
20\t2\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t0/0\t0/0\t1/1\t1/1\t1/1\t0/0\t
20\t3\t.\tG\tA\t29\tPASS\tNS=3\tGT\t3/3\t3/3\t3/3\t2/2\t2/2\t3/3\t3/3\t'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)
        assert counts.AB == 4
        assert counts.ab == 2
        assert counts.aB == 1
        assert counts.Ab == 0

        # missing data
        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7
20\t2\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t0/0\t0/0\t1/1\t1/1\t1/1\t./.\t
20\t3\t.\tG\tA\t29\tPASS\tNS=3\tGT\t3/3\t3/3\t3/3\t2/2\t2/2\t3/3\t3/3\t'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)
        assert counts.AB == 3
        assert counts.ab == 2
        assert counts.aB == 1
        assert counts.Ab == 0