예제 #1
0
def test_VariantQueryable_to_vcf(tmp_path):
    path = str(tmp_path / 'a.vcf')
    vcf = MultiSampleVCF(vcf_file)
    variant_queryable = vcf.query_all()
    variant_queryable.to_vcf(path, remove_samples=True, clean_info=True)

    vcf = MultiSampleVCF(path)
    assert len(vcf.samples) == 0
예제 #2
0
def test_VariantQueryable_to_sample_csv(tmp_path):
    vcf = MultiSampleVCF(vcf_file)

    variant_queryable = vcf.query_all()

    path = str(tmp_path / 'sample.csv')
    variant_queryable.to_sample_csv(path)

    df = pd.read_csv(path)
    df_expected = pd.DataFrame({
        'variant': ['chr1:4:T>C', 'chr1:25:AACG>GA'],
        'sample': ['NA00003', 'NA00002'],
        'genotype': [3, 3]
    })
    pd.testing.assert_frame_equal(df, df_expected)
예제 #3
0
def variant_queryable():
    vcf = MultiSampleVCF(vcf_file)
    return VariantIntervalQueryable(
        vcf, [([
            Variant('chr1', 12, 'A', 'T'),
            Variant('chr1', 18, 'A', 'C', filter='q10'),
        ], Interval('chr1', 10, 20)),
              ([
                  Variant('chr2', 120, 'AT', 'AAAT'),
              ], Interval('chr2', 110, 200))])
예제 #4
0
def test_VariantQueryable_batch_iter():
    vcf = MultiSampleVCF(vcf_file)

    variant_queryable = vcf.query_all()
    batches = list(variant_queryable.batch_iter(batch_size=1))
    assert len(batches) == 3

    vcf = MultiSampleVCF(vcf_file)
    variant_queryable = vcf.query_all()
    batches = list(variant_queryable.batch_iter(batch_size=2))

    assert len(batches) == 2
    assert len(batches[0].variant_intervals[0][0]) == 2
    assert len(batches[1].variant_intervals[0][0]) == 1

    vcf = MultiSampleVCF(vcf_file)
    variant_queryable = vcf.query_all()
    batches = list(variant_queryable.batch_iter(batch_size=10))
    assert len(batches) == 1

    variants, interval = batches[0].variant_intervals[0]
    assert interval == Interval('chr1', 3, 25)
    assert len(variants) == 3