예제 #1
0
def test_ZarrBatchWriter_list(dl_batch, pred_batch_list, tmpdir):
    tmpfile = str(tmpdir.mkdir("example").join("out.zip.zarr"))
    batch = prepare_batch(dl_batch, pred_batch_list)
    writer = ZarrBatchWriter(tmpfile, chunk_size=4)

    writer.batch_write(batch)
    writer.batch_write(batch)
    writer.close()
    with ZarrReader(tmpfile) as f:
        assert np.all(
            list(f.batch_iter(2))[0]['metadata']['gene_id'] ==
            dl_batch['metadata']['gene_id'][:2])
        out = f.load_all()
        assert np.all(out['metadata']['gene_id'] == np.concatenate([
            dl_batch['metadata']['gene_id'], dl_batch['metadata']['gene_id']
        ]))
        assert np.all(out['metadata']['ranges']["chr"] == np.concatenate([
            dl_batch['metadata']['ranges']['chr'], dl_batch['metadata']
            ['ranges']['chr']
        ]))
        assert np.all(out['metadata']['ranges']["start"] == np.concatenate([
            dl_batch['metadata']['ranges']['start'], dl_batch['metadata']
            ['ranges']['start']
        ]))
        assert np.all(out['preds'][0][:3] == pred_batch_list[0])
예제 #2
0
    parser.add_argument("--writer", default="zarr")
    args = parser.parse_args()

    fasta_file = "/s/genomes/human/hg19/ensembl_GRCh37.p13_release75/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa"

    model = get_model("DeepSEA/variantEffects")
    dl_kwargs = {'fasta_file': fasta_file, 'num_chr_fasta': True}
    output_dir = Path(args.output_dir)
    output_name = os.path.basename(args.vcf).split('.vcf')[0]

    if args.writer == "zarr":
        from kipoi.writers import ZarrBatchWriter, AsyncBatchWriter
        td = output_name + ".zarr"
        writer = SyncBatchWriter(
            AsyncBatchWriter(
                ZarrBatchWriter(str(output_dir / td), chunk_size=1024)))
    elif args.writer == "lmdb":
        td = output_name + ".lmdb"
        writer = SyncBatchWriter(
            AsyncSyncPredictionsWriter(
                LmdbBatchWriter(str(output_dir / td), "DeepSea_veff",
                                274578419865)))
    elif args.writer == "hdf5":
        td = output_name + ".hdf5"
        from kipoi.writers import HDF5BatchWriter
        writer = SyncBatchWriter(HDF5BatchWriter(str(output_dir / td)))

    print("Start predictions..")
    sp.score_variants(model=model,
                      input_vcf=args.vcf,
                      batch_size=16,