예제 #1
0
def test_AsyncTsvBatchWriter_array(dl_batch, pred_batch_array, tmpdir):

    tmpfile = str(tmpdir.mkdir("example").join("out.tsv"))
    writer = AsyncBatchWriter(TsvBatchWriter(tmpfile))
    batch = prepare_batch(dl_batch, pred_batch_array)
    writer.batch_write(batch)
    writer.batch_write(batch)
    writer.close()
    df = pd.read_csv(tmpfile, sep="\t")

    assert set(list(df.columns)) == {
        'metadata/ranges/id', 'metadata/ranges/strand', 'metadata/ranges/chr',
        'metadata/ranges/start', 'metadata/ranges/end', 'metadata/gene_id',
        'preds/0', 'preds/1', 'preds/2'
    }
    assert list(df['metadata/ranges/id']) == [0, 1, 2, 0, 1, 2]
예제 #2
0
def test_MultipleBatchWriter(dl_batch, pred_batch_array, tmpdir):
    tmpdir = tmpdir.mkdir("example")
    h5_tmpfile = str(tmpdir.join("out.h5"))
    tsv_tmpfile = str(tmpdir.join("out.tsv"))
    batch = prepare_batch(dl_batch, pred_batch_array)
    writer = MultipleBatchWriter(
        [TsvBatchWriter(tsv_tmpfile),
         HDF5BatchWriter(h5_tmpfile)])
    writer.batch_write(batch)
    writer.batch_write(batch)
    writer.close()
    assert os.path.exists(h5_tmpfile)
    assert os.path.exists(tsv_tmpfile)
    df = pd.read_csv(tsv_tmpfile, sep="\t")
    assert set(list(df.columns)) == {
        'metadata/ranges/id', 'metadata/ranges/strand', 'metadata/ranges/chr',
        'metadata/ranges/start', 'metadata/ranges/end', 'metadata/gene_id',
        'preds/0', 'preds/1', 'preds/2'
    }
    assert list(df['metadata/ranges/id']) == [0, 1, 2, 0, 1, 2]