def test_dataloader_model(example): """Test dataloader """ if example in {"rbp", "iris_model_template"} and sys.version_info[0] == 2: pytest.skip("example not supported on python 2 ") example_dir = "example/models/{0}".format(example) # install the dependencies if INSTALL_REQ: install_model_requirements(example_dir, "dir", and_dataloaders=True) Dl = kipoi.get_dataloader_factory(example_dir, source="dir") test_kwargs = Dl.example_kwargs # get dataloader # get model model = kipoi.get_model(example_dir, source="dir") with kipoi_utils.utils.cd(example_dir): # initialize the dataloader dataloader = Dl(**test_kwargs) # sample a batch of data it = dataloader.batch_iter() batch = next(it) # predict with a model model.predict_on_batch(batch["inputs"])
def test_extractor_model(example): """Test extractor """ if example == "rbp" and sys.version_info[0] == 2: pytest.skip("rbp example not supported on python 2 ") # example_dir = "examples/{0}".format(example) # install the dependencies # - TODO maybe put it implicitly in load_dataloader? if INSTALL_REQ: install_model_requirements(example_dir, "dir", and_dataloaders=True) # Dl = kipoi.get_dataloader_factory(example_dir, source="dir") # test_kwargs = get_test_kwargs(example_dir) # # install the dependencies # - TODO maybe put it implicitly in load_extractor? if INSTALL_REQ: install_model_requirements(example_dir, source="dir") # # get model model = kipoi.get_model(example_dir, source="dir") # with cd(example_dir + "/example_files"): # initialize the dataloader dataloader = Dl(**test_kwargs) # # sample a batch of data it = dataloader.batch_iter() batch = next(it) # predict with a model model.predict_on_batch(batch["inputs"]) model.pred_grad(batch["inputs"], Slice_conv()[:, 0])
def test_var_eff_pred_varseq(tmpdir): model_name = "DeepSEA/variantEffects" if INSTALL_REQ: install_model_requirements(model_name, "kipoi", and_dataloaders=True) # model = kipoi.get_model(model_name, source="kipoi") # The preprocessor Dataloader = SeqIntervalDl # dataloader_arguments = {"intervals_file": "example_files/intervals.bed", "fasta_file": "example_files/hg38_chr22.fa", "required_seq_len": 1000, "alphabet_axis": 1, "dummy_axis": 2, "label_dtype": str} dataloader_arguments = {k: model.source_dir + "/" + v if isinstance(v, str) else v for k, v in dataloader_arguments.items()} vcf_path = "tests/data/variants.vcf" out_vcf_fpath = str(tmpdir.mkdir("variants_generated", ).join("out.vcf")) # vcf_path = kipoi_veff.ensure_tabixed_vcf(vcf_path) model_info = kipoi_veff.ModelInfoExtractor(model, Dataloader) writer = kipoi_veff.VcfWriter( model, vcf_path, out_vcf_fpath, standardise_var_id=True) vcf_to_region = kipoi_veff.SnvCenteredRg(model_info) res = sp.predict_snvs(model, Dataloader, vcf_path, dataloader_args=dataloader_arguments, batch_size=32, vcf_to_region=vcf_to_region, sync_pred_writer=writer) writer.close() assert os.path.exists(out_vcf_fpath)
def get_example_data(example, layer, writer=None): example_dir = "examples/{0}".format(example) if INSTALL_REQ: install_model_requirements(example_dir, "dir", and_dataloaders=True) model = kipoi.get_model(example_dir, source="dir") # The preprocessor Dataloader = kipoi.get_dataloader_factory(example_dir, source="dir") # with open(example_dir + "/example_files/test.json", "r") as ifh: dataloader_arguments = json.load(ifh) for k in dataloader_arguments: dataloader_arguments[k] = "example_files/" + dataloader_arguments[k] outputs = [] with cd(model.source_dir): dl = Dataloader(**dataloader_arguments) it = dl.batch_iter(batch_size=32, num_workers=0) # Loop through the data, make predictions, save the output for i, batch in enumerate(tqdm(it)): # make the prediction pred_batch = model.input_grad(batch['inputs'], avg_func="sum", layer=layer, final_layer=False) # write out the predictions, metadata (, inputs, targets) # always keep the inputs so that input*grad can be generated! output_batch = prepare_batch(batch, pred_batch, keep_inputs=True) if writer is not None: writer.batch_write(output_batch) outputs.append(output_batch) if writer is not None: writer.close() return numpy_collate(outputs)
def test_deeplift(): # return True example = "tal1_model" layer = predict_activation_layers[example] example_dir = "tests/models/{0}".format(example) if INSTALL_REQ: install_model_requirements(example_dir, "dir", and_dataloaders=True) model = kipoi.get_model(example_dir, source="dir") # The preprocessor Dataloader = kipoi.get_dataloader_factory(example_dir, source="dir") # with open(example_dir + "/example_files/test.json", "r") as ifh: dataloader_arguments = json.load(ifh) for k in dataloader_arguments: dataloader_arguments[k] = "example_files/" + dataloader_arguments[k] d = DeepLift(model, output_layer=-2, task_idx=0, preact=None, mxts_mode='grad_times_inp') new_ofname = model.source_dir + "/example_files/deeplift_grads_pred.hdf5" if os.path.exists(new_ofname): os.unlink(new_ofname) writer = writers.HDF5BatchWriter(file_path=new_ofname) with kipoi.utils.cd(model.source_dir): dl = Dataloader(**dataloader_arguments) it = dl.batch_iter(batch_size=32, num_workers=0) # Loop through the data, make predictions, save the output for i, batch in enumerate(tqdm(it)): # make the prediction pred_batch = d.score(batch['inputs'], None) # Using Avanti's recommendation to check whether the model conversion has worked. pred_batch_fwd = d.predict_on_batch(batch['inputs']) orig_pred_batch_fwd = model.predict_on_batch(batch['inputs']) assert np.all(pred_batch_fwd == orig_pred_batch_fwd) output_batch = batch output_batch["input_grad"] = pred_batch writer.batch_write(output_batch) writer.close() new_res = readers.HDF5Reader.load(new_ofname) ref_res = readers.HDF5Reader.load(model.source_dir + "/example_files/grads.hdf5") assert np.all( np.isclose(new_res['input_grad'], (ref_res['inputs'] * ref_res['grads']))) if os.path.exists(new_ofname): os.unlink(new_ofname)
def test_mutation_map(): if sys.version_info[0] == 2: pytest.skip("rbp example not supported on python 2 ") # Take the rbp model model_dir = "examples/rbp/" if INSTALL_REQ: install_model_requirements(model_dir, "dir", and_dataloaders=True) model = kipoi.get_model(model_dir, source="dir") # The preprocessor Dataloader = kipoi.get_dataloader_factory(model_dir, source="dir") # dataloader_arguments = { "fasta_file": "example_files/hg38_chr22.fa", "preproc_transformer": "dataloader_files/encodeSplines.pkl", "gtf_file": "example_files/gencode_v25_chr22.gtf.pkl.gz", } dataloader_arguments = { k: model_dir + v for k, v in dataloader_arguments.items() } # # Run the actual predictions vcf_path = model_dir + "example_files/first_variant.vcf" # model_info = kipoi.postprocessing.variant_effects.ModelInfoExtractor( model, Dataloader) vcf_to_region = kipoi.postprocessing.variant_effects.SnvCenteredRg( model_info) mdmm = mm._generate_mutation_map( model, Dataloader, vcf_path, dataloader_args=dataloader_arguments, evaluation_function=analyse_model_preds, batch_size=32, vcf_to_region=vcf_to_region, evaluation_function_kwargs={'diff_types': { 'diff': Diff("mean") }}) with cd(model.source_dir): mdmm.save_to_file("example_files/first_variant_mm_totest.hdf5") from kipoi.postprocessing.variant_effects.utils.generic import read_hdf5 reference = read_hdf5("example_files/first_variant_mm.hdf5") obs = read_hdf5("example_files/first_variant_mm.hdf5") compare_rec(reference[0], obs[0]) import matplotlib matplotlib.pyplot.switch_backend('agg') mdmm.plot_mutmap(0, "seq", "diff", "rbp_prb") os.unlink("example_files/first_variant_mm_totest.hdf5")
def test_var_eff_pred_varseq(): if sys.version_info[0] == 2: pytest.skip("rbp example not supported on python 2 ") model_dir = "examples/var_seqlen_model/" if INSTALL_REQ: install_model_requirements(model_dir, "dir", and_dataloaders=True) # model = kipoi.get_model(model_dir, source="dir") # The preprocessor Dataloader = kipoi.get_dataloader_factory(model_dir, source="dir") # dataloader_arguments = { "fasta_file": "example_files/hg38_chr22.fa", "preproc_transformer": "dataloader_files/encodeSplines.pkl", "gtf_file": "example_files/gencode_v25_chr22.gtf.pkl.gz", "intervals_file": "example_files/variant_centered_intervals.tsv" } vcf_path = "example_files/variants.vcf" out_vcf_fpath = "example_files/variants_generated.vcf" ref_out_vcf_fpath = "example_files/variants_ref_out.vcf" # with cd(model.source_dir): vcf_path = kipoi.postprocessing.variant_effects.ensure_tabixed_vcf( vcf_path) model_info = kipoi.postprocessing.variant_effects.ModelInfoExtractor( model, Dataloader) writer = kipoi.postprocessing.variant_effects.VcfWriter( model, vcf_path, out_vcf_fpath) vcf_to_region = None with pytest.raises(Exception): # This has to raise an exception as the sequence length is None. vcf_to_region = kipoi.postprocessing.variant_effects.SnvCenteredRg( model_info) res = sp.predict_snvs( model, Dataloader, vcf_path, dataloader_args=dataloader_arguments, evaluation_function=analyse_model_preds, batch_size=32, vcf_to_region=vcf_to_region, evaluation_function_kwargs={'diff_types': { 'diff': Diff("mean") }}, sync_pred_writer=writer) writer.close() # pass # assert filecmp.cmp(out_vcf_fpath, ref_out_vcf_fpath) compare_vcfs(out_vcf_fpath, ref_out_vcf_fpath) os.unlink(out_vcf_fpath)
def test_var_eff_pred2(): if sys.version_info[0] == 2: pytest.skip("rbp example not supported on python 2 ") # Take the rbp model model_dir = "examples/rbp/" if INSTALL_REQ: install_model_requirements(model_dir, "dir", and_dataloaders=True) # model = kipoi.get_model(model_dir, source="dir") # The preprocessor Dataloader = kipoi.get_dataloader_factory(model_dir, source="dir") # dataloader_arguments = { "fasta_file": "example_files/hg38_chr22.fa", "preproc_transformer": "dataloader_files/encodeSplines.pkl", "gtf_file": "example_files/gencode_v25_chr22.gtf.pkl.gz", } # # Run the actual predictions vcf_path = "example_files/variants.vcf" out_vcf_fpath = "example_files/variants_generated2.vcf" ref_out_vcf_fpath = "example_files/variants_ref_out2.vcf" restricted_regions_fpath = "example_files/restricted_regions.bed" # with cd(model.source_dir): pbd = pb.BedTool(restricted_regions_fpath) model_info = kipoi.postprocessing.variant_effects.ModelInfoExtractor( model, Dataloader) vcf_to_region = kipoi.postprocessing.variant_effects.SnvPosRestrictedRg( model_info, pbd) writer = kipoi.postprocessing.variant_effects.utils.io.VcfWriter( model, vcf_path, out_vcf_fpath) res = sp.predict_snvs( model, Dataloader, vcf_path, dataloader_args=dataloader_arguments, evaluation_function=analyse_model_preds, batch_size=32, vcf_to_region=vcf_to_region, evaluation_function_kwargs={'diff_types': { 'diff': Diff("mean") }}, sync_pred_writer=writer) writer.close() # pass #assert filecmp.cmp(out_vcf_fpath, ref_out_vcf_fpath) compare_vcfs(out_vcf_fpath, ref_out_vcf_fpath) os.unlink(out_vcf_fpath)
def test_gradient_function_model(example): """Test extractor """ if example == "rbp" and sys.version_info[0] == 2: pytest.skip("rbp example not supported on python 2 ") import keras backend = keras.backend._BACKEND if backend == 'theano' and example == "rbp": pytest.skip("extended_coda example not with theano ") # example_dir = "examples/{0}".format(example) # install the dependencies # - TODO maybe put it implicitly in load_dataloader? if INSTALL_REQ: install_model_requirements(example_dir, "dir", and_dataloaders=True) # Dl = kipoi.get_dataloader_factory(example_dir, source="dir") # test_kwargs = get_test_kwargs(example_dir) # # install the dependencies # - TODO maybe put it implicitly in load_extractor? if INSTALL_REQ: install_model_requirements(example_dir, source="dir") # # get model model = kipoi.get_model(example_dir, source="dir") # with cd(example_dir + "/example_files"): # initialize the dataloader dataloader = Dl(**test_kwargs) # # sample a batch of data it = dataloader.batch_iter() batch = next(it) # predict with a model model.predict_on_batch(batch["inputs"]) if backend != 'theano': model.input_grad(batch["inputs"], Slice_conv()[:, 0], pre_nonlinearity=True) model.input_grad(batch["inputs"], Slice_conv()[:, 0], pre_nonlinearity=False) model.input_grad(batch["inputs"], 0, pre_nonlinearity=False) # same as Slice_conv()[:, 0] model.input_grad(batch["inputs"], avg_func="sum")
def test_activation_function_model(example): """Test extractor """ if example == "rbp" and sys.version_info[0] == 2: pytest.skip("rbp example not supported on python 2 ") # import keras backend = keras.backend._BACKEND if backend == 'theano' and example == "rbp": pytest.skip("extended_coda example not with theano ") # example_dir = "examples/{0}".format(example) # install the dependencies # - TODO maybe put it implicitly in load_dataloader? if INSTALL_REQ: install_model_requirements(example_dir, "dir", and_dataloaders=True) # Dl = kipoi.get_dataloader_factory(example_dir, source="dir") # test_kwargs = get_test_kwargs(example_dir) # # install the dependencies # - TODO maybe put it implicitly in load_extractor? if INSTALL_REQ: install_model_requirements(example_dir, source="dir") # # get model model = kipoi.get_model(example_dir, source="dir") # with cd(example_dir + "/example_files"): # initialize the dataloader dataloader = Dl(**test_kwargs) # # sample a batch of data it = dataloader.batch_iter() batch = next(it) # predict with a model model.predict_on_batch(batch["inputs"]) model.predict_activation_on_batch(batch["inputs"], layer=len(model.model.layers) - 2) if example == "rbp": model.predict_activation_on_batch(batch["inputs"], layer="flatten_6")
def test_score(): example = "tal1_model" layer = predict_activation_layers[example] example_dir = "example/models/{0}".format(example) if INSTALL_REQ: install_model_requirements(example_dir, "dir", and_dataloaders=True) model = kipoi.get_model(example_dir, source="dir") # The preprocessor Dataloader = kipoi.get_dataloader_factory(example_dir, source="dir") # with open(example_dir + "/example_files/test.json", "r") as ifh: dataloader_arguments = json.load(ifh) for k in dataloader_arguments: dataloader_arguments[k] = "example_files/" + dataloader_arguments[k] g = Gradient(model, None, layer=layer, avg_func="sum") if os.path.exists(model.source_dir + "/example_files/grads_pred.hdf5"): os.unlink(model.source_dir + "/example_files/grads_pred.hdf5") writer = writers.HDF5BatchWriter(file_path=model.source_dir + "/example_files/grads_pred.hdf5") with kipoi_utils.utils.cd(model.source_dir): dl = Dataloader(**dataloader_arguments) it = dl.batch_iter(batch_size=32, num_workers=0) # Loop through the data, make predictions, save the output for i, batch in enumerate(tqdm(it)): # make the prediction pred_batch = g.score(batch['inputs']) output_batch = batch output_batch["grads"] = pred_batch writer.batch_write(output_batch) writer.close() obj1 = readers.HDF5Reader.load(model.source_dir + "/example_files/grads_pred.hdf5") obj2 = readers.HDF5Reader.load(model.source_dir + "/example_files/grads.hdf5") kipoi_utils.utils.compare_numpy_dict(obj1, obj2) if os.path.exists(model.source_dir + "/example_files/grads_pred.hdf5"): os.unlink(model.source_dir + "/example_files/grads_pred.hdf5")
def test_load_model(example): example_dir = "examples/{0}".format(example) if example in {"rbp", "iris_model_template"} and sys.version_info[0] == 2: pytest.skip("example not supported on python 2 ") if INSTALL_REQ: install_model_requirements(example_dir, "dir") m = kipoi.get_model(example_dir, source="dir") if isinstance(m, kipoi.model.KerasModel): m.arch m.weights m.info m.schema m.schema.inputs m.source m.default_dataloader m.model m.predict_on_batch
] # for regions in [regions_dict, regions_gr]: for exclude_indels, ref_res, ref_lines in zip( [False, True], [plus_indel_results, snv_results], [ref_lines_indel, snv_ref_lines]): found_vars, overlapping_region = sp._overlap_vcf_region( vcf_obj, regions, exclude_indels) assert all([ str(el1) == str(el2) for el1, el2 in zip(ref_res, found_vars) if not el1.is_indel ]) assert overlapping_region == ref_lines """ # Take the rbp model model_dir = "examples/rbp/" install_model_requirements(model_dir, "dir", and_dataloaders=True) model = kipoi.get_model(model_dir, source="dir") # The preprocessor Dataloader = kipoi.get_dataloader_factory(model_dir, source="dir") dataloader_arguments = { "fasta_file": "example_files/hg38_chr22.fa", "preproc_transformer": "dataloader_files/encodeSplines.pkl", "gtf_file": "example_files/gencode_v25_chr22.gtf.pkl.gz", } # Run the actual predictions