def test_caching_compression(): vcf_fn = 'fixture/sample.vcf.gz' cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='variants', compress=True) debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = variants(vcf_fn, cache=True, compress_cache=True, verbose=True) a2 = np.load(cache_fn)['data'] assert_array_equal(a, a2) cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata', compress=True) debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = calldata(vcf_fn, cache=True, compress_cache=True, verbose=True) a2 = np.load(cache_fn)['data'] assert_array_equal(a, a2) cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata_2d', compress=True) debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = calldata_2d(vcf_fn, cache=True, compress_cache=True, verbose=True) a2 = np.load(cache_fn)['data'] assert_array_equal(a, a2)
def test_caching_cachedir(): vcf_fn = 'fixture/sample.vcf.gz' cachedir = 'fixture/custom.vcfnp_cache/foo' cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='variants', cachedir=cachedir) debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = variants(vcf_fn, cache=True, verbose=True, cachedir=cachedir) a2 = np.load(cache_fn) assert_array_equal(a, a2) cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata', cachedir=cachedir) debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = calldata(vcf_fn, cache=True, verbose=True, cachedir=cachedir) a2 = np.load(cache_fn) assert_array_equal(a, a2) cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata_2d', cachedir=cachedir) debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = calldata_2d(vcf_fn, cache=True, verbose=True, cachedir=cachedir) a2 = np.load(cache_fn) assert_array_equal(a, a2)
def test_caching_cachedir(): vcf_fn = 'fixture/sample.vcf.gz' cachedir = 'fixture/custom.vcfnp_cache/foo' cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='variants', cachedir=cachedir) debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = variants(vcf_fn, cache=True, verbose=True, cachedir=cachedir) a2 = np.load(cache_fn) assert np.all(a == a2) cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata', cachedir=cachedir) debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = calldata(vcf_fn, cache=True, verbose=True, cachedir=cachedir) a2 = np.load(cache_fn) assert np.all(a == a2) cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata_2d', cachedir=cachedir) debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = calldata_2d(vcf_fn, cache=True, verbose=True, cachedir=cachedir) a2 = np.load(cache_fn) assert np.all(a == a2)
def test_caching(): vcf_fn = 'fixture/sample.vcf.gz' cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='variants') debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = variants(vcf_fn, cache=True, verbose=True) a2 = np.load(cache_fn) assert np.all(a == a2) cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata') debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = calldata(vcf_fn, cache=True, verbose=True) a2 = np.load(cache_fn) assert np.all(a == a2) cache_fn = vcfnp.array._mk_cache_fn(vcf_fn, array_type='calldata_2d') debug(cache_fn) if os.path.exists(cache_fn): os.remove(cache_fn) a = calldata_2d(vcf_fn, cache=True, verbose=True) a2 = np.load(cache_fn) assert np.all(a == a2)
def test_truncate(): # https://github.com/alimanfoo/vcfnp/issues/54 vcf_fn = 'fixture/test54.vcf.gz' # truncate by default v = variants(vcf_fn, region='chr1:10-100') eq_(2, len(v)) c = calldata(vcf_fn, region='chr1:10-100') eq_(2, len(c)) c2d = calldata_2d(vcf_fn, region='chr1:10-100') eq_(2, len(c2d)) # don't truncate v = variants(vcf_fn, region='chr1:10-100', truncate=False) eq_(3, len(v)) c = calldata(vcf_fn, region='chr1:10-100', truncate=False) eq_(3, len(c)) c2d = calldata_2d(vcf_fn, region='chr1:10-100', truncate=False) eq_(3, len(c2d))
def test_genotype_ac(): a = calldata_2d('fixture/test63.vcf', fields=['GT', 'genotype', 'genotype_ac', 'ploidy'], ploidy=3, arities=dict(genotype_ac=3)) debug(repr(a)) # check GT expect = np.array([ [b'0/0', b'0/0/0', b'0'], [b'1', b'0/1', b'0/1/2'], [b'././.', b'.', b'./3'], [b'././.', b'././.', b'././.'], ]) actual = a['GT'] assert_array_equal(expect, actual) # check genotype expect = np.array([ [(0, 0, -1), (0, 0, 0), (0, -1, -1)], [(1, -1, -1), (0, 1, -1), (0, 1, 2)], [(-1, -1, -1), (-1, -1, -1), (-1, 3, -1)], [(-1, -1, -1), (-1, -1, -1), (-1, -1, -1)], ]) actual = a['genotype'] assert_array_equal(expect, actual) # check ploidy expect = np.array([ [2, 3, 1], [1, 2, 3], [3, 1, 2], [-1, -1, -1], ]) actual = a['ploidy'] assert_array_equal(expect, actual) # check genotype_ac expect = np.array([ [(2, 0, 0), (3, 0, 0), (1, 0, 0)], [(0, 1, 0), (1, 1, 0), (1, 1, 1)], [(0, 0, 0), (0, 0, 0), (0, 0, 0)], [(-1, -1, -1), (-1, -1, -1), (-1, -1, -1)], ]) actual = a['genotype_ac'] assert_array_equal(expect, actual)