def test_load_zero_one_based_bg2(): kwargs = dict( format="bg2", metadata=None, assembly="toy", chunksize=10, field=(), count_as_float=False, comment_char="#", input_copy_status="unique", no_symmetric_upper=False, storage_options=None, ) # 1-based-start BG2 input ref = "toy.symm.upper.1.ob.bg2" bins_path = op.join(testdir, "data", "toy.chrom.sizes") + ":1" pixels_path = op.join(testdir, "data", ref) load.callback(bins_path, pixels_path, testcool_path, one_based=True, **kwargs) # reference, 1-based starts ref_df = pd.read_csv( pixels_path, sep="\t", names=[ "chrom1", "start1", "end1", "chrom2", "start2", "end2", "count" ], ) # output out_df = cooler.Cooler(testcool_path).pixels(join=True)[:] out_df["start1"] += 1 out_df["start2"] += 1 assert np.all(out_df == ref_df) # 0-based-start BG2 input ref = "toy.symm.upper.1.zb.bg2" bins_path = op.join(testdir, "data", "toy.chrom.sizes") + ":1" pixels_path = op.join(testdir, "data", ref) load.callback(bins_path, pixels_path, testcool_path, one_based=False, **kwargs) # reference, 0-based starts ref_df = pd.read_csv( pixels_path, sep="\t", names=[ "chrom1", "start1", "end1", "chrom2", "start2", "end2", "count" ], ) # output out_df = cooler.Cooler(testcool_path).pixels(join=True)[:] assert np.all(out_df == ref_df)
def test_load_zero_one_based_bg2(): kwargs = dict(format='bg2', metadata=None, assembly='toy', chunksize=10, field=(), count_as_float=False, comment_char='#', input_copy_status='unique', no_symmetric_upper=False, storage_options=None) # 1-based-start BG2 input ref = 'toy.symm.upper.1.ob.bg2' bins_path = op.join(testdir, 'data', 'toy.chrom.sizes') + ':1' pixels_path = op.join(testdir, 'data', ref) load.callback(bins_path, pixels_path, testcool_path, one_based=True, **kwargs) # reference, 1-based starts ref_df = pd.read_csv(pixels_path, sep='\t', names=[ 'chrom1', 'start1', 'end1', 'chrom2', 'start2', 'end2', 'count' ]) # output out_df = cooler.Cooler(testcool_path).pixels(join=True)[:] out_df['start1'] += 1 out_df['start2'] += 1 assert np.all(out_df == ref_df) # 0-based-start BG2 input ref = 'toy.symm.upper.1.zb.bg2' bins_path = op.join(testdir, 'data', 'toy.chrom.sizes') + ':1' pixels_path = op.join(testdir, 'data', ref) load.callback(bins_path, pixels_path, testcool_path, one_based=False, **kwargs) # reference, 0-based starts ref_df = pd.read_csv(pixels_path, sep='\t', names=[ 'chrom1', 'start1', 'end1', 'chrom2', 'start2', 'end2', 'count' ]) # output out_df = cooler.Cooler(testcool_path).pixels(join=True)[:] assert np.all(out_df == ref_df)
def test_load_zero_one_based_coo(): kwargs = dict( format='coo', metadata=None, assembly='toy', chunksize=10, field=(), count_as_float=False, comment_char='#', symmetric_input='unique', no_symmetric_storage=False, storage_options=None ) # 1-based-start COO input ref = 'toy.symm.upper.1.ob.coo' bins_path = op.join(testdir, 'data', 'toy.chrom.sizes') + ':1' pixels_path = op.join(testdir, 'data', ref) load.callback( bins_path, pixels_path, testcool_path, one_based=True, **kwargs ) # reference, 1-based starts ref_df = pd.read_table( pixels_path, names=['bin1_id', 'bin2_id', 'count']) # output out_df = cooler.Cooler(testcool_path).pixels()[:] out_df['bin1_id'] += 1 out_df['bin2_id'] += 1 assert np.all(out_df == ref_df) # 0-based-start COO input ref = 'toy.symm.upper.1.zb.coo' bins_path = op.join(testdir, 'data', 'toy.chrom.sizes') + ':1' pixels_path = op.join(testdir, 'data', ref) load.callback( bins_path, pixels_path, testcool_path, one_based=False, **kwargs ) # reference, 0-based starts ref_df = pd.read_table( pixels_path, names=['bin1_id', 'bin2_id', 'count']) # output out_df = cooler.Cooler(testcool_path).pixels()[:] assert np.all(out_df == ref_df)
def test_load_bg2_vs_coo(): kwargs = dict( metadata=None, assembly='hg19', chunksize=int(20e6), field=(), count_as_float=False, one_based=False, comment_char='#', symmetric_input='unique', no_symmetric_storage=False, storage_options=None, ) out_path1 = op.join(tmp, 'test1.cool') out_path2 = op.join(tmp, 'test2.cool') load.callback( op.join(testdir, 'data', 'hg19.bins.2000kb.bed.gz'), op.join(testdir, 'data', 'hg19.GM12878-MboI.matrix.2000kb.bg2.gz'), out_path1, format='bg2', **kwargs ) load.callback( op.join(testdir, 'data', 'hg19.bins.2000kb.bed.gz'), op.join(testdir, 'data', 'hg19.GM12878-MboI.matrix.2000kb.coo.txt'), out_path2, format='coo', **kwargs ) with h5py.File(out_path1, 'r') as f1, \ h5py.File(out_path2, 'r') as f2: for col in ['bin1_id', 'bin2_id', 'count']: assert np.all(f1['pixels'][col][:] == f2['pixels'][col][:]) for fp in [out_path1, out_path2]: try: os.remove(fp) except OSError: pass
def test_load_bg2_vs_coo(): kwargs = dict( metadata=None, assembly="hg19", chunksize=int(20e6), field=(), count_as_float=False, one_based=False, comment_char="#", input_copy_status="unique", no_symmetric_upper=False, storage_options=None, ) out_path1 = op.join(tmp, "test1.cool") out_path2 = op.join(tmp, "test2.cool") load.callback(op.join(testdir, "data", "hg19.bins.2000kb.bed.gz"), op.join(testdir, "data", "hg19.GM12878-MboI.matrix.2000kb.bg2.gz"), out_path1, format="bg2", **kwargs) load.callback(op.join(testdir, "data", "hg19.bins.2000kb.bed.gz"), op.join(testdir, "data", "hg19.GM12878-MboI.matrix.2000kb.coo.txt"), out_path2, format="coo", **kwargs) with h5py.File(out_path1, "r") as f1, h5py.File(out_path2, "r") as f2: for col in ["bin1_id", "bin2_id", "count"]: assert np.all(f1["pixels"][col][:] == f2["pixels"][col][:]) for fp in [out_path1, out_path2]: try: os.remove(fp) except OSError: pass