def test_no_allele_depths(self): vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1''' vcf = StringIO(VCF_HEADER2 + vcf) snps = list(VCFReader(vcf, min_calls_for_pop_stats=4).parse_snvs()) filter_ = LowEvidenceAlleleFilter(0.99) try: snps = [filter_(snp) for snp in snps] self.fail('RuntimeError expected') except RuntimeError: pass
def test_het_filter(self): vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 20\t14370\trs6054257\tG\tA\t29\tPASS\tNS=3;DP=14;AF=0.5;DB;H2\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5:.,. ''' in_fhand = StringIO(VCF_HEADER + vcf) snps = list(VCFReader(in_fhand).parse_snvs()) exp = [[0, 0], [1, 0], [1, 1]] assert [call.int_alleles for call in snps[0].calls] == exp res = [ call.int_alleles for call in snps[0].remove_gt_from_het_calls().calls ] assert res == [[0, 0], [], [1, 1]]
def test_no_geno_no_alle_freq(self): vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t./.\t./.\t./.\t./.\t./.\t./. 20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1''' vcf = StringIO(VCF_HEADER + vcf) snps = list(VCFReader(vcf).parse_snvs()) filter_ = LowEvidenceAlleleFilter(0.99) snps = [filter_(snp) for snp in snps] expected = [False] * 12 res = [call.called for snp in snps for call in snp.calls] assert filter_.log == {'not_enough_individuals': 12, 'tot': 12} assert expected == res
def test_filter_low_alle_evidence_hw(self): vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT:RO:AO\t0/0:14:0\t1/1:0:15\t1/1:0:1\t0/0:1:0\t0/0:9:0\t0/1:1:1''' vcf_f = StringIO(VCF_HEADER + vcf) snps = list(VCFReader(vcf_f, min_calls_for_pop_stats=4).parse_snvs()) filter_ = LowEvidenceAlleleFilter() snps = [filter_(snp) for snp in snps] assert filter_.log == { 'tot': 6, 'not_enough_evidence': 3, 'enough_evidence': 2, 'was_het': 1 } res = [call.call.data.GT for snp in snps for call in snp.calls] assert res == ['0/0', '1/1', '1/.', '0/.', '0/.', '0/1']
def test_filter_low_alle_evidence_ril(self): vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT:RO:AO\t0/0:14:0\t1/1:0:15\t1/1:0:1\t0/0:1:0\t0/0:9:0\t0/1:1:1''' # ril n 7 vcf_f = StringIO(VCF_HEADER + vcf) snps = list(VCFReader(vcf_f).parse_snvs()) kwargs = {'n_generation': 7} filter_ = LowEvidenceAlleleFilter(genotypic_freqs_method=RIL_SELF, genotypic_freqs_kwargs=kwargs) snps = [filter_(snp) for snp in snps] assert filter_.log == { 'tot': 6, 'enough_evidence': 3, 'not_enough_evidence': 2, 'was_het': 1 } res = [call.call.data.GT for snp in snps for call in snp.calls] assert res == ['0/0', '1/1', '1/.', '0/.', '0/0', '0/1']