def test_partition_into_regions__num_parts(shared_datadir, vcf_file, is_path):
    vcf_path = path_for_test(shared_datadir, vcf_file, is_path)

    regions = partition_into_regions(vcf_path, num_parts=4)

    assert regions is not None
    part_variant_counts = [
        count_variants(vcf_path, region) for region in regions
    ]
    total_variants = count_variants(vcf_path)

    assert sum(part_variant_counts) == total_variants
def test_partition_into_regions__num_parts_large(shared_datadir, is_path):
    vcf_path = path_for_test(shared_datadir, "CEUTrio.20.21.gatk3.4.g.vcf.bgz",
                             is_path)

    regions = partition_into_regions(vcf_path, num_parts=100)
    assert regions is not None
    assert len(regions) == 18

    part_variant_counts = [
        count_variants(vcf_path, region) for region in regions
    ]
    total_variants = count_variants(vcf_path)

    assert sum(part_variant_counts) == total_variants
Example #3
0
def test_record_counts_csi(shared_datadir, vcf_file, is_path):
    # Check record counts in csi with actual count of VCF
    vcf_path = path_for_test(shared_datadir, vcf_file, is_path)
    csi_path = get_csi_path(vcf_path)
    assert csi_path is not None
    csi = read_csi(csi_path)

    for i, contig in enumerate(VCF(vcf_path).seqnames):
        assert csi.record_counts[i] == count_variants(vcf_path, contig)
Example #4
0
def test_record_counts_tbi(shared_datadir, vcf_file, is_path):
    # Check record counts in tabix with actual count of VCF
    vcf_path = path_for_test(shared_datadir, vcf_file, is_path)
    tabix_path = get_tabix_path(vcf_path)
    assert tabix_path is not None
    tabix = read_tabix(tabix_path)

    for i, contig in enumerate(tabix.sequence_names):
        assert tabix.record_counts[i] == count_variants(vcf_path, contig)