def test_AsyncTsvBatchWriter_array(dl_batch, pred_batch_array, tmpdir): tmpfile = str(tmpdir.mkdir("example").join("out.tsv")) writer = AsyncBatchWriter(TsvBatchWriter(tmpfile)) batch = prepare_batch(dl_batch, pred_batch_array) writer.batch_write(batch) writer.batch_write(batch) writer.close() df = pd.read_csv(tmpfile, sep="\t") assert set(list(df.columns)) == { 'metadata/ranges/id', 'metadata/ranges/strand', 'metadata/ranges/chr', 'metadata/ranges/start', 'metadata/ranges/end', 'metadata/gene_id', 'preds/0', 'preds/1', 'preds/2' } assert list(df['metadata/ranges/id']) == [0, 1, 2, 0, 1, 2]
def test_MultipleBatchWriter(dl_batch, pred_batch_array, tmpdir): tmpdir = tmpdir.mkdir("example") h5_tmpfile = str(tmpdir.join("out.h5")) tsv_tmpfile = str(tmpdir.join("out.tsv")) batch = prepare_batch(dl_batch, pred_batch_array) writer = MultipleBatchWriter( [TsvBatchWriter(tsv_tmpfile), HDF5BatchWriter(h5_tmpfile)]) writer.batch_write(batch) writer.batch_write(batch) writer.close() assert os.path.exists(h5_tmpfile) assert os.path.exists(tsv_tmpfile) df = pd.read_csv(tsv_tmpfile, sep="\t") assert set(list(df.columns)) == { 'metadata/ranges/id', 'metadata/ranges/strand', 'metadata/ranges/chr', 'metadata/ranges/start', 'metadata/ranges/end', 'metadata/gene_id', 'preds/0', 'preds/1', 'preds/2' } assert list(df['metadata/ranges/id']) == [0, 1, 2, 0, 1, 2]