Beispiel #1
0
def test_caching_cachedir():
    vcf_fn = 'fixture/sample.vcf.gz'
    cachedir = 'fixture/custom.vcfnp_cache/foo'

    cache_fn = vcfnp.array._mk_cache_fn(vcf_fn,
                                        array_type='variants',
                                        cachedir=cachedir)
    debug(cache_fn)
    if os.path.exists(cache_fn):
        os.remove(cache_fn)
    a = variants(vcf_fn, cache=True, verbose=True, cachedir=cachedir)
    a2 = np.load(cache_fn)
    assert np.all(a == a2)

    cache_fn = vcfnp.array._mk_cache_fn(vcf_fn,
                                        array_type='calldata',
                                        cachedir=cachedir)
    debug(cache_fn)
    if os.path.exists(cache_fn):
        os.remove(cache_fn)
    a = calldata(vcf_fn, cache=True, verbose=True, cachedir=cachedir)
    a2 = np.load(cache_fn)
    assert np.all(a == a2)

    cache_fn = vcfnp.array._mk_cache_fn(vcf_fn,
                                        array_type='calldata_2d',
                                        cachedir=cachedir)
    debug(cache_fn)
    if os.path.exists(cache_fn):
        os.remove(cache_fn)
    a = calldata_2d(vcf_fn, cache=True, verbose=True, cachedir=cachedir)
    a2 = np.load(cache_fn)
    assert np.all(a == a2)
Beispiel #2
0
def test_condition():
    v = variants('fixture/sample.vcf')
    eq_(9, len(v))
    c = calldata('fixture/sample.vcf', condition=v['FILTER']['PASS'])
    eq_(5, len(c))
    vf = variants('fixture/sample.vcf', condition=v['FILTER']['PASS'])
    eq_(5, len(vf))
Beispiel #3
0
def test_caching_cachedir():
    vcf_fn = 'fixture/sample.vcf.gz'
    cachedir = 'fixture/custom.vcfnp_cache/foo'

    cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='variants',
                                        cachedir=cachedir)
    debug(cache_fn)
    if os.path.exists(cache_fn):
        os.remove(cache_fn)
    a = variants(vcf_fn, cache=True, verbose=True, cachedir=cachedir)
    a2 = np.load(cache_fn)
    assert_array_equal(a, a2)

    cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata',
                                        cachedir=cachedir)
    debug(cache_fn)
    if os.path.exists(cache_fn):
        os.remove(cache_fn)
    a = calldata(vcf_fn, cache=True, verbose=True, cachedir=cachedir)
    a2 = np.load(cache_fn)
    assert_array_equal(a, a2)

    cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata_2d',
                                        cachedir=cachedir)
    debug(cache_fn)
    if os.path.exists(cache_fn):
        os.remove(cache_fn)
    a = calldata_2d(vcf_fn, cache=True, verbose=True, cachedir=cachedir)
    a2 = np.load(cache_fn)
    assert_array_equal(a, a2)
Beispiel #4
0
def test_condition():
    v = variants('fixture/sample.vcf')
    eq_(9, len(v))
    c = calldata('fixture/sample.vcf', condition=v['FILTER']['PASS'])
    eq_(5, len(c))
    vf = variants('fixture/sample.vcf', condition=v['FILTER']['PASS'])
    eq_(5, len(vf))
Beispiel #5
0
def test_caching_compression():
    vcf_fn = 'fixture/sample.vcf.gz'

    cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='variants',
                                        compress=True)
    debug(cache_fn)
    if os.path.exists(cache_fn):
        os.remove(cache_fn)
    a = variants(vcf_fn, cache=True, compress_cache=True, verbose=True)
    a2 = np.load(cache_fn)['data']
    assert_array_equal(a, a2)

    cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata',
                                        compress=True)
    debug(cache_fn)
    if os.path.exists(cache_fn):
        os.remove(cache_fn)
    a = calldata(vcf_fn, cache=True, compress_cache=True, verbose=True)
    a2 = np.load(cache_fn)['data']
    assert_array_equal(a, a2)

    cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata_2d',
                                        compress=True)
    debug(cache_fn)
    if os.path.exists(cache_fn):
        os.remove(cache_fn)
    a = calldata_2d(vcf_fn, cache=True, compress_cache=True, verbose=True)
    a2 = np.load(cache_fn)['data']
    assert_array_equal(a, a2)
Beispiel #6
0
def test_caching():
    vcf_fn = 'fixture/sample.vcf.gz'

    cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='variants')
    debug(cache_fn)
    if os.path.exists(cache_fn):
        os.remove(cache_fn)
    a = variants(vcf_fn, cache=True, verbose=True)
    a2 = np.load(cache_fn)
    assert np.all(a == a2)

    cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata')
    debug(cache_fn)
    if os.path.exists(cache_fn):
        os.remove(cache_fn)
    a = calldata(vcf_fn, cache=True, verbose=True)
    a2 = np.load(cache_fn)
    assert np.all(a == a2)

    cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata_2d')
    debug(cache_fn)
    if os.path.exists(cache_fn):
        os.remove(cache_fn)
    a = calldata_2d(vcf_fn, cache=True, verbose=True)
    a2 = np.load(cache_fn)
    assert np.all(a == a2)
Beispiel #7
0
def test_variable_calldata():
    c = calldata('fixture/test1.vcf')
    eq_((1, 0), tuple(c['test2']['AD'][0]))
    eq_((1, 0), tuple(c['test2']['AD'][1]))
    eq_((1, 0), tuple(c['test2']['AD'][2]))
    eq_(b'.', c['test2']['GT'][0])
    eq_(b'0', c['test2']['GT'][1])
    eq_(b'1', c['test2']['GT'][2])
Beispiel #8
0
def test_variable_calldata():
    c = calldata('fixture/test1.vcf')
    eq_((1, 0), tuple(c['test2']['AD'][0]))
    eq_((1, 0), tuple(c['test2']['AD'][1]))
    eq_((1, 0), tuple(c['test2']['AD'][2]))
    eq_(b'.', c['test2']['GT'][0])
    eq_(b'0', c['test2']['GT'][1])
    eq_(b'1', c['test2']['GT'][2])
Beispiel #9
0
def test_calldata():
    a = calldata('fixture/sample.vcf')
    debug(repr(a))
    eq_(b'0|0', a[0]['NA00001']['GT'])
    eq_(True, a[0]['NA00001']['is_called'])
    eq_(True, a[0]['NA00001']['is_phased'])
    eq_((0, 0), tuple(a[0]['NA00001']['genotype']))
    eq_((-1, -1), tuple(a[6]['NA00003']['genotype']))
    eq_((-1, -1), tuple(a[7]['NA00003']['genotype']))
    eq_((10, 10), tuple(a[0]['NA00001']['HQ']))
Beispiel #10
0
def test_calldata():
    a = calldata('fixture/sample.vcf')
    debug(repr(a))
    eq_(b'0|0', a[0]['NA00001']['GT'])
    eq_(True, a[0]['NA00001']['is_called'])
    eq_(True, a[0]['NA00001']['is_phased'])
    eq_((0, 0), tuple(a[0]['NA00001']['genotype']))
    eq_((-1, -1), tuple(a[6]['NA00003']['genotype']))
    eq_((-1, -1), tuple(a[7]['NA00003']['genotype']))
    eq_((10, 10), tuple(a[0]['NA00001']['HQ']))
Beispiel #11
0
def test_truncate():
    # https://github.com/alimanfoo/vcfnp/issues/54

    vcf_fn = 'fixture/test54.vcf.gz'

    # truncate by default
    v = variants(vcf_fn, region='chr1:10-100')
    eq_(2, len(v))
    c = calldata(vcf_fn, region='chr1:10-100')
    eq_(2, len(c))
    c2d = calldata_2d(vcf_fn, region='chr1:10-100')
    eq_(2, len(c2d))

    # don't truncate
    v = variants(vcf_fn, region='chr1:10-100', truncate=False)
    eq_(3, len(v))
    c = calldata(vcf_fn, region='chr1:10-100', truncate=False)
    eq_(3, len(c))
    c2d = calldata_2d(vcf_fn, region='chr1:10-100', truncate=False)
    eq_(3, len(c2d))
Beispiel #12
0
def test_truncate():
    # https://github.com/alimanfoo/vcfnp/issues/54

    vcf_fn = 'fixture/test54.vcf.gz'

    # truncate by default
    v = variants(vcf_fn, region='chr1:10-100')
    eq_(2, len(v))
    c = calldata(vcf_fn, region='chr1:10-100')
    eq_(2, len(c))
    c2d = calldata_2d(vcf_fn, region='chr1:10-100')
    eq_(2, len(c2d))

    # don't truncate
    v = variants(vcf_fn, region='chr1:10-100', truncate=False)
    eq_(3, len(v))
    c = calldata(vcf_fn, region='chr1:10-100', truncate=False)
    eq_(3, len(c))
    c2d = calldata_2d(vcf_fn, region='chr1:10-100', truncate=False)
    eq_(3, len(c2d))
Beispiel #13
0
def test_missing_calldata_cleared():
    c = calldata('fixture/test32.vcf')['AC0093-C']

    # first variant, non-missing
    eq_(b'0/0', c['GT'][0])
    eq_((0, 0), tuple(c['genotype'][0]))
    eq_(8, c['DP'][0])
    eq_(3, c['GQ'][0])

    # second variant, missing
    eq_(b'./.', c['GT'][1])
    eq_((-1, -1), tuple(c['genotype'][1]))
    eq_(0, c['DP'][1])  # should be default fill value
    eq_(0, c['GQ'][1])  # should be default fill value
Beispiel #14
0
def test_missing_calldata_cleared():
    c = calldata('fixture/test32.vcf')['AC0093-C']

    # first variant, non-missing
    eq_(b'0/0', c['GT'][0])
    eq_((0, 0), tuple(c['genotype'][0]))
    eq_(8, c['DP'][0])
    eq_(3, c['GQ'][0])

    # second variant, missing
    eq_(b'./.', c['GT'][1])
    eq_((-1, -1), tuple(c['genotype'][1]))
    eq_(0, c['DP'][1])  # should be default fill value
    eq_(0, c['GQ'][1])  # should be default fill value
Beispiel #15
0
def test_missing_calldata():
    c = calldata('fixture/test1.vcf')

    # first variant, second sample
    eq_(b'.', c['test2']['GT'][0])
    eq_((-1, -1), tuple(c['test2']['genotype'][0]))
    eq_((1, 0), tuple(c['test2']['AD'][0]))  # data are present

    # third variant, third sample
    eq_(b'.', c['test3']['GT'][2])
    eq_((-1, -1), tuple(c['test3']['genotype'][2]))
    eq_((0, 0), tuple(c['test3']['AD'][2]))  # default fill

    # third variant, fourth sample
    eq_(b'./.', c['test4']['GT'][2])
    eq_((-1, -1), tuple(c['test4']['genotype'][2]))
    eq_((0, 0), tuple(c['test4']['AD'][2]))  # default fill
Beispiel #16
0
def test_missing_calldata():
    c = calldata('fixture/test1.vcf')

    # first variant, second sample
    eq_(b'.', c['test2']['GT'][0])
    eq_((-1, -1), tuple(c['test2']['genotype'][0]))
    eq_((1, 0), tuple(c['test2']['AD'][0]))  # data are present

    # third variant, third sample
    eq_(b'.', c['test3']['GT'][2])
    eq_((-1, -1), tuple(c['test3']['genotype'][2]))
    eq_((0, 0), tuple(c['test3']['AD'][2]))  # default fill

    # third variant, fourth sample
    eq_(b'./.', c['test4']['GT'][2])
    eq_((-1, -1), tuple(c['test4']['genotype'][2]))
    eq_((0, 0), tuple(c['test4']['AD'][2]))  # default fill
Beispiel #17
0
def test_calldata_region():
    a = calldata('fixture/sample.vcf.gz', region='20')
    eq_(6, len(a))
Beispiel #18
0
def test_missing_format_definition():
    # FORMAT field DP not declared in VCF header
    c = calldata('fixture/test14.vcf', fields=['DP'],
                 vcf_types={'DP': 'Integer'})
    eq_(1, c[2]['NA00001']['DP'])
Beispiel #19
0
def test_duplicate_field_definitions():
    variants('fixture/test10.vcf')
    # should not raise, but print useful message to stderr
    calldata('fixture/test10.vcf')
Beispiel #20
0
def test_duplicate_field_definitions():
    variants('fixture/test10.vcf')
    # should not raise, but print useful message to stderr
    calldata('fixture/test10.vcf')
Beispiel #21
0
def test_missing_format_definition():
    # FORMAT field DP not declared in VCF header
    c = calldata('fixture/test14.vcf',
                 fields=['DP'],
                 vcf_types={'DP': 'Integer'})
    eq_(1, c[2]['NA00001']['DP'])
Beispiel #22
0
def test_calldata_region_empty():
    a = calldata('fixture/sample.vcf.gz', region='18')
    eq_(0, len(a))
    a = calldata('fixture/sample.vcf.gz', region='19:113-200')
    eq_(0, len(a))
Beispiel #23
0
def test_calldata_region_empty():
    a = calldata('fixture/sample.vcf.gz', region='18')
    eq_(0, len(a))
    a = calldata('fixture/sample.vcf.gz', region='19:113-200')
    eq_(0, len(a))
Beispiel #24
0
def test_calldata_region():
    a = calldata('fixture/sample.vcf.gz', region='20')
    eq_(6, len(a))