def test_ZarrBatchWriter_list(dl_batch, pred_batch_list, tmpdir): tmpfile = str(tmpdir.mkdir("example").join("out.zip.zarr")) batch = prepare_batch(dl_batch, pred_batch_list) writer = ZarrBatchWriter(tmpfile, chunk_size=4) writer.batch_write(batch) writer.batch_write(batch) writer.close() with ZarrReader(tmpfile) as f: assert np.all( list(f.batch_iter(2))[0]['metadata']['gene_id'] == dl_batch['metadata']['gene_id'][:2]) out = f.load_all() assert np.all(out['metadata']['gene_id'] == np.concatenate([ dl_batch['metadata']['gene_id'], dl_batch['metadata']['gene_id'] ])) assert np.all(out['metadata']['ranges']["chr"] == np.concatenate([ dl_batch['metadata']['ranges']['chr'], dl_batch['metadata'] ['ranges']['chr'] ])) assert np.all(out['metadata']['ranges']["start"] == np.concatenate([ dl_batch['metadata']['ranges']['start'], dl_batch['metadata'] ['ranges']['start'] ])) assert np.all(out['preds'][0][:3] == pred_batch_list[0])
parser.add_argument("--writer", default="zarr") args = parser.parse_args() fasta_file = "/s/genomes/human/hg19/ensembl_GRCh37.p13_release75/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa" model = get_model("DeepSEA/variantEffects") dl_kwargs = {'fasta_file': fasta_file, 'num_chr_fasta': True} output_dir = Path(args.output_dir) output_name = os.path.basename(args.vcf).split('.vcf')[0] if args.writer == "zarr": from kipoi.writers import ZarrBatchWriter, AsyncBatchWriter td = output_name + ".zarr" writer = SyncBatchWriter( AsyncBatchWriter( ZarrBatchWriter(str(output_dir / td), chunk_size=1024))) elif args.writer == "lmdb": td = output_name + ".lmdb" writer = SyncBatchWriter( AsyncSyncPredictionsWriter( LmdbBatchWriter(str(output_dir / td), "DeepSea_veff", 274578419865))) elif args.writer == "hdf5": td = output_name + ".hdf5" from kipoi.writers import HDF5BatchWriter writer = SyncBatchWriter(HDF5BatchWriter(str(output_dir / td))) print("Start predictions..") sp.score_variants(model=model, input_vcf=args.vcf, batch_size=16,