def test_zoomify(): kwargs = dict(chunksize=10, nproc=1, columns=None, dtypes=None, agg=None) with isolated_filesystem(): zoomify_cooler(op.join(datadir, "toy.asymm.2.cool"), "test.2.mcool", resolutions=[4, 8, 16, 32], **kwargs) for res in [2, 4, 8, 16, 32]: cooler_cmp( "test.2.mcool::resolutions/{}".format(res), op.join(datadir, "toy.asymm.{}.cool".format(res)), ) # include base resolution zoomify_cooler(op.join(datadir, "toy.asymm.2.cool"), "test.2.mcool", resolutions=[2, 4, 8, 16, 32], **kwargs) for res in [2, 4, 8, 16, 32]: cooler_cmp( "test.2.mcool::resolutions/{}".format(res), op.join(datadir, "toy.asymm.{}.cool".format(res)), ) # impossible resolution to obtain with pytest.raises(ValueError): zoomify_cooler(op.join(datadir, "toy.asymm.2.cool"), "test.2.mcool", resolutions=[4, 5, 32], **kwargs)
def test_zoomify(): kwargs = dict(chunksize=10, nproc=1, columns=None, dtypes=None, agg=None) with isolated_filesystem(): zoomify_cooler(op.join(datadir, "toy.asymm.2.cool"), "test.2.mcool", resolutions=[4, 8, 16, 32], **kwargs) for res in [2, 4, 8, 16, 32]: cooler_cmp( "test.2.mcool::resolutions/{}".format(res), op.join(datadir, "toy.asymm.{}.cool".format(res)), )
def test_mv(): with isolated_filesystem(): ref_file = "test.ref.mcool" src_file = "test.src.mcool" shutil.copyfile(op.join(testdir, "data", "toy.symm.upper.2.mcool"), ref_file) shutil.copyfile(op.join(testdir, "data", "toy.symm.upper.2.mcool"), src_file) fileops.mv(src_file + "::resolutions/2", src_file + "::abc/d") with h5py.File(src_file) as f: assert "resolutions/2" not in f assert "abc/d" in f cooler_cmp(ref_file + "::resolutions/2", src_file + "::abc/d")
def test_mv(): with isolated_filesystem(): ref_file = 'test.ref.mcool' src_file = 'test.src.mcool' shutil.copyfile(op.join(testdir, 'data', 'toy.symm.upper.2.mcool'), ref_file) shutil.copyfile(op.join(testdir, 'data', 'toy.symm.upper.2.mcool'), src_file) fileops.mv(src_file + '::resolutions/2', src_file + '::abc/d') with h5py.File(src_file) as f: assert 'resolutions/2' not in f assert 'abc/d' in f cooler_cmp(ref_file + '::resolutions/2', src_file + '::abc/d')
def test_coarsen(input_uri, factor, ref_uri): kwargs = dict( chunksize=10, nproc=1, columns=None, dtypes=None, agg=None ) with isolated_filesystem(): coarsen_cooler( input_uri, 'test.cool', factor, **kwargs ) cooler_cmp('test.cool', ref_uri)
def test_zoomify(): kwargs = dict( chunksize=10, nproc=1, columns=None, dtypes=None, agg=None, ) with isolated_filesystem(): zoomify_cooler(op.join(testdir, 'data', 'toy.asymm.2.cool'), 'test.2.mcool', resolutions=[4, 8, 16, 32], **kwargs) for res in [2, 4, 8, 16, 32]: cooler_cmp( 'test.2.mcool::resolutions/{}'.format(res), op.join(testdir, 'data', 'toy.asymm.{}.cool'.format(res)))
def test_cp(): with isolated_filesystem(): src_file = op.join(testdir, "data", "toy.symm.upper.2.mcool") # file-to-file src_uri = src_file + "::resolutions/2" fileops.cp(src_uri, "test.2.cool") cooler_cmp(src_uri, "test.2.cool") # within-file test_file = "test.src.mcool" shutil.copyfile(src_file, test_file) fileops.cp(test_file + "::resolutions/2", test_file + "::abc/d") cooler_cmp(test_file + "::resolutions/2", test_file + "::abc/d") with h5py.File(test_file) as f: assert "resolutions/2" in f assert "abc/d" in f assert f["resolutions/2"].id != f["abc/d"].id
def test_cp(): with isolated_filesystem() as fs: src_file = op.join(testdir, 'data', 'toy.symm.upper.2.mcool') # file-to-file src_uri = src_file + '::resolutions/2' fileops.cp(src_uri, 'test.2.cool') cooler_cmp(src_uri, 'test.2.cool') # within-file test_file = 'test.src.mcool' shutil.copyfile(src_file, test_file) fileops.cp(test_file + '::resolutions/2', test_file + '::abc/d') cooler_cmp(test_file + '::resolutions/2', test_file + '::abc/d') with h5py.File(test_file) as f: assert 'resolutions/2' in f assert 'abc/d' in f assert f['resolutions/2'].id != f['abc/d'].id
def test_coarsen(input_uri, factor, ref_uri): with isolated_filesystem(): kwargs = dict(chunksize=10, nproc=1, columns=None, dtypes=None, agg=None) coarsen_cooler(input_uri, "test.cool", factor, **kwargs) cooler_cmp("test.cool", ref_uri) # custom dtype kwargs = dict(chunksize=10, nproc=1, columns=None, dtypes={'count': np.float64}) coarsen_cooler(input_uri, "test.cool", factor, **kwargs) with h5py.File('test.cool', 'r') as f: assert f['pixels/count'].dtype.kind == 'f' # custom aggregator kwargs = dict(chunksize=10, nproc=1, columns=None, dtypes=None, agg={'count': 'mean'}) coarsen_cooler(input_uri, "test.cool", factor, **kwargs) # parallel kwargs = dict(chunksize=10, nproc=2, columns=None, dtypes=None, agg=None) coarsen_cooler(input_uri, "test.cool", factor, **kwargs) # raise on missing value column kwargs = dict(chunksize=10, nproc=2, columns=['missing'], dtypes=None, agg=None) with pytest.raises(ValueError): coarsen_cooler(input_uri, "test.cool", factor, **kwargs)
def test_ln(): with isolated_filesystem(): src_file = op.join(testdir, "data", "toy.symm.upper.2.mcool") # within-file hard link test_file = "test.hardlink.mcool" shutil.copyfile(src_file, test_file) fileops.ln(test_file + "::resolutions/2", test_file + "::abc/d") with h5py.File(test_file) as f: assert "resolutions/2" in f assert "abc/d" in f assert f["resolutions/2"].id == f["abc/d"].id cooler_cmp(test_file + "::resolutions/2", test_file + "::abc/d") # within-file soft link test_file = "test.softlink.mcool" shutil.copyfile(src_file, test_file) fileops.ln(test_file + "::resolutions/2", test_file + "::abc/d", soft=True) with h5py.File(test_file) as f: assert "resolutions/2" in f assert "abc/d" in f assert f["resolutions/2"].id == f["abc/d"].id cooler_cmp(test_file + "::resolutions/2", test_file + "::abc/d") # between-file external link test_file = "test.extlink.mcool" dst_file = "test.dst.cool" shutil.copyfile(src_file, test_file) fileops.ln(test_file + "::resolutions/2", dst_file + "::abc/d", soft=True) cooler_cmp(test_file + "::resolutions/2", dst_file + "::abc/d")
def test_ln(): with isolated_filesystem() as fs: src_file = op.join(testdir, 'data', 'toy.symm.upper.2.mcool') # within-file hard link test_file = 'test.hardlink.mcool' shutil.copyfile(src_file, test_file) fileops.ln(test_file + '::resolutions/2', test_file + '::abc/d') with h5py.File(test_file) as f: assert 'resolutions/2' in f assert 'abc/d' in f assert f['resolutions/2'].id == f['abc/d'].id cooler_cmp(test_file + '::resolutions/2', test_file + '::abc/d') # within-file soft link test_file = 'test.softlink.mcool' shutil.copyfile(src_file, test_file) fileops.ln(test_file + '::resolutions/2', test_file + '::abc/d', soft=True) with h5py.File(test_file) as f: assert 'resolutions/2' in f assert 'abc/d' in f assert f['resolutions/2'].id == f['abc/d'].id cooler_cmp(test_file + '::resolutions/2', test_file + '::abc/d') # between-file external link test_file = 'test.extlink.mcool' dst_file = 'test.dst.cool' shutil.copyfile(src_file, test_file) fileops.ln(test_file + '::resolutions/2', dst_file + '::abc/d', soft=True) cooler_cmp(test_file + '::resolutions/2', dst_file + '::abc/d')
def test_dump(): runner = CliRunner() with runner.isolated_filesystem(): f_in = op.join(datadir, "toy.symm.upper.2.cool") result = runner.invoke(dump, [f_in]) assert result.exit_code == 0 # roundtrip symm-upper data bins = pd.read_csv( StringIO(runner.invoke(dump, [f_in, "-H", "-t", "bins"]).output), sep="\t" ) pixels = pd.read_csv( StringIO(runner.invoke(dump, [f_in, "-H"]).output), sep="\t" ) cooler.create_cooler("out.cool", bins, pixels, symmetric_upper=True) cooler_cmp(f_in, "out.cool") # duplexed output pixels2 = pd.read_csv( StringIO(runner.invoke(dump, [f_in, "--matrix", "-H"]).output), sep="\t" ) assert len(pixels2) > len(pixels) upper = pixels2[pixels2["bin1_id"] <= pixels2["bin2_id"]].reset_index(drop=True) assert np.allclose(pixels, upper) # lower triangle trans_lower = pd.read_csv( StringIO( runner.invoke(dump, [f_in, "-H", "-r", "chr2", "-r2", "chr1"]).output ), sep="\t", ) assert len(trans_lower) == 0 trans_lower = pd.read_csv( StringIO( runner.invoke( dump, [f_in, "-m", "-H", "-r", "chr2", "-r2", "chr1"] ).output ), sep="\t", ) assert len(trans_lower) > 0 # roundtrip square data f_in = op.join(datadir, "toy.asymm.2.cool") bins = pd.read_csv( StringIO(runner.invoke(dump, [f_in, "-H", "-t", "bins"]).output), sep="\t" ) pixels = pd.read_csv( StringIO(runner.invoke(dump, [f_in, "-H"]).output), sep="\t" ) cooler.create_cooler("out.cool", bins, pixels, symmetric_upper=False) cooler_cmp(f_in, "out.cool") pixels2 = pd.read_csv( StringIO(runner.invoke(dump, [f_in, "--matrix", "-H"]).output), sep="\t" ) assert np.allclose(pixels, pixels2) # for square data, -m is a no-op lower1 = pd.read_csv( StringIO( runner.invoke(dump, [f_in, "-H", "-r", "chr2", "-r2", "chr1"]).output ), sep="\t", ) lower2 = pd.read_csv( StringIO( runner.invoke( dump, [f_in, "-m", "-H", "-r", "chr2", "-r2", "chr1"] ).output ), sep="\t", ) assert np.allclose(lower1, lower2)
def test_dump(): runner = CliRunner() with runner.isolated_filesystem(): f_in = op.join(datadir, 'toy.symm.upper.2.cool') result = runner.invoke(dump, [ f_in, ]) assert result.exit_code == 0 # roundtrip symm-upper data bins = pd.read_csv(StringIO( runner.invoke(dump, [f_in, '-H', '-t', 'bins']).output), sep='\t') pixels = pd.read_csv(StringIO( runner.invoke(dump, [f_in, '-H']).output), sep='\t') cooler.create_cooler('out.cool', bins, pixels, symmetric_upper=True) cooler_cmp(f_in, 'out.cool') # duplexed output pixels2 = pd.read_csv(StringIO( runner.invoke(dump, [f_in, '--matrix', '-H']).output), sep='\t') assert len(pixels2) > len(pixels) upper = pixels2[pixels2['bin1_id'] <= pixels2['bin2_id']].reset_index( drop=True) assert np.allclose(pixels, upper) # lower triangle trans_lower = pd.read_csv(StringIO( runner.invoke(dump, [f_in, '-H', '-r', 'chr2', '-r2', 'chr1']).output), sep='\t') assert len(trans_lower) == 0 trans_lower = pd.read_csv(StringIO( runner.invoke( dump, [f_in, '-m', '-H', '-r', 'chr2', '-r2', 'chr1']).output), sep='\t') assert len(trans_lower) > 0 # roundtrip square data f_in = op.join(datadir, 'toy.asymm.2.cool') bins = pd.read_csv(StringIO( runner.invoke(dump, [f_in, '-H', '-t', 'bins']).output), sep='\t') pixels = pd.read_csv(StringIO( runner.invoke(dump, [f_in, '-H']).output), sep='\t') cooler.create_cooler('out.cool', bins, pixels, symmetric_upper=False) cooler_cmp(f_in, 'out.cool') pixels2 = pd.read_csv(StringIO( runner.invoke(dump, [f_in, '--matrix', '-H']).output), sep='\t') assert np.allclose(pixels, pixels2) # for square data, -m is a no-op lower1 = pd.read_csv(StringIO( runner.invoke(dump, [f_in, '-H', '-r', 'chr2', '-r2', 'chr1']).output), sep='\t') lower2 = pd.read_csv(StringIO( runner.invoke( dump, [f_in, '-m', '-H', '-r', 'chr2', '-r2', 'chr1']).output), sep='\t') assert np.allclose(lower1, lower2)
def test_dump(): runner = CliRunner() with runner.isolated_filesystem(): f_in = op.join(datadir, "toy.symm.upper.2.cool") result = runner.invoke(dump, [f_in]) assert result.exit_code == 0 result = runner.invoke(dump, [f_in, "-t", "chroms", "--columns", "length"]) assert result.exit_code == 0 result = runner.invoke(dump, [f_in, "-t", "bins", "--columns", "chrom,start"]) assert result.exit_code == 0 result = runner.invoke(dump, [f_in, "-r", "chr1"]) assert result.exit_code == 0 result = runner.invoke(dump, [f_in, "-r", "chr1:0-16", "-r2", "chr1:10-25"]) assert result.exit_code == 0 result = runner.invoke(dump, [f_in, "-r", "chr1:10-25", "-r2", "chr1:0-5"]) assert result.exit_code == 0 result = runner.invoke(dump, [f_in, "--join"]) assert result.exit_code == 0 result = runner.invoke(dump, [f_in, "--join", "--one-based-ids"]) assert result.exit_code == 0 result = runner.invoke(dump, [f_in, "--join", "--one-based-starts"]) assert result.exit_code == 0 result = runner.invoke(dump, [f_in, "--annotate", "chrom", "--one-based-starts"]) assert result.exit_code == 0 # unbalanced file result = runner.invoke(dump, [f_in, "-b"]) assert result.exit_code == 1 # roundtrip symm-upper data result = runner.invoke(dump, [f_in, "-H", "-t", "bins"]) bins = pd.read_csv(StringIO(result.output), sep="\t") result = runner.invoke(dump, [f_in, "-H"]) pixels = pd.read_csv(StringIO(result.output), sep="\t") cooler.create_cooler("out.cool", bins, pixels, symmetric_upper=True) cooler_cmp(f_in, "out.cool") # duplexed output result = runner.invoke(dump, [f_in, "--matrix", "-H"]) pixels2 = pd.read_csv(StringIO(result.output), sep="\t") assert len(pixels2) > len(pixels) upper = pixels2[pixels2["bin1_id"] <= pixels2["bin2_id"]].reset_index(drop=True) assert np.allclose(pixels, upper) # lower triangle result = runner.invoke(dump, [f_in, "-H", "-r", "chr2", "-r2", "chr1"]) trans_lower = pd.read_csv(StringIO(result.output), sep="\t") assert len(trans_lower) == 0 result = runner.invoke(dump, [f_in, "-m", "-H", "-r", "chr2", "-r2", "chr1"]) trans_lower = pd.read_csv(StringIO(result.output), sep="\t") assert len(trans_lower) > 0 # roundtrip square data f_in = op.join(datadir, "toy.asymm.2.cool") result = runner.invoke(dump, [f_in, "-H", "-t", "bins"]) bins = pd.read_csv(StringIO(result.output), sep="\t") result = runner.invoke(dump, [f_in, "-H"]) pixels = pd.read_csv(StringIO(result.output), sep="\t") cooler.create_cooler("out.cool", bins, pixels, symmetric_upper=False) cooler_cmp(f_in, "out.cool") result = runner.invoke(dump, [f_in, "--matrix", "-H"]) pixels2 = pd.read_csv(StringIO(result.output), sep="\t") assert np.allclose(pixels, pixels2) # for square data, -m is a no-op result = runner.invoke(dump, [f_in, "-H", "-r", "chr2", "-r2", "chr1"]) lower1 = pd.read_csv(StringIO(result.output), sep="\t") result = runner.invoke(dump, [f_in, "-m", "-H", "-r", "chr2", "-r2", "chr1"]) lower2 = pd.read_csv(StringIO(result.output), sep="\t") assert np.allclose(lower1, lower2)