Ejemplo n.º 1
0
def test_dataloader_model(example):
    """Test dataloader
    """
    if example in {"rbp", "iris_model_template"} and sys.version_info[0] == 2:
        pytest.skip("example not supported on python 2 ")

    example_dir = "example/models/{0}".format(example)

    # install the dependencies
    if INSTALL_REQ:
        install_model_requirements(example_dir, "dir", and_dataloaders=True)

    Dl = kipoi.get_dataloader_factory(example_dir, source="dir")

    test_kwargs = Dl.example_kwargs

    # get dataloader

    # get model
    model = kipoi.get_model(example_dir, source="dir")

    with kipoi_utils.utils.cd(example_dir):
        # initialize the dataloader
        dataloader = Dl(**test_kwargs)

        # sample a batch of data
        it = dataloader.batch_iter()
        batch = next(it)
        # predict with a model
        model.predict_on_batch(batch["inputs"])
Ejemplo n.º 2
0
def test_extractor_model(example):
    """Test extractor
    """
    if example == "rbp" and sys.version_info[0] == 2:
        pytest.skip("rbp example not supported on python 2 ")
    #
    example_dir = "examples/{0}".format(example)
    # install the dependencies
    # - TODO maybe put it implicitly in load_dataloader?
    if INSTALL_REQ:
        install_model_requirements(example_dir, "dir", and_dataloaders=True)
    #
    Dl = kipoi.get_dataloader_factory(example_dir, source="dir")
    #
    test_kwargs = get_test_kwargs(example_dir)
    #
    # install the dependencies
    # - TODO maybe put it implicitly in load_extractor?
    if INSTALL_REQ:
        install_model_requirements(example_dir, source="dir")
    #
    # get model
    model = kipoi.get_model(example_dir, source="dir")
    #
    with cd(example_dir + "/example_files"):
        # initialize the dataloader
        dataloader = Dl(**test_kwargs)
        #
        # sample a batch of data
        it = dataloader.batch_iter()
        batch = next(it)
        # predict with a model
        model.predict_on_batch(batch["inputs"])
        model.pred_grad(batch["inputs"], Slice_conv()[:, 0])
Ejemplo n.º 3
0
def test_var_eff_pred_varseq(tmpdir):
    model_name = "DeepSEA/variantEffects"
    if INSTALL_REQ:
        install_model_requirements(model_name, "kipoi", and_dataloaders=True)
    #
    model = kipoi.get_model(model_name, source="kipoi")
    # The preprocessor
    Dataloader = SeqIntervalDl
    #
    dataloader_arguments = {"intervals_file": "example_files/intervals.bed",
                            "fasta_file": "example_files/hg38_chr22.fa",
                            "required_seq_len": 1000, "alphabet_axis": 1, "dummy_axis": 2, "label_dtype": str}
    dataloader_arguments = {k: model.source_dir + "/" + v if isinstance(v, str) else v for k, v in
                            dataloader_arguments.items()}

    vcf_path = "tests/data/variants.vcf"
    out_vcf_fpath = str(tmpdir.mkdir("variants_generated", ).join("out.vcf"))
    #
    vcf_path = kipoi_veff.ensure_tabixed_vcf(vcf_path)
    model_info = kipoi_veff.ModelInfoExtractor(model, Dataloader)
    writer = kipoi_veff.VcfWriter(
        model, vcf_path, out_vcf_fpath, standardise_var_id=True)
    vcf_to_region = kipoi_veff.SnvCenteredRg(model_info)
    res = sp.predict_snvs(model, Dataloader, vcf_path, dataloader_args=dataloader_arguments,
                          batch_size=32,
                          vcf_to_region=vcf_to_region,
                          sync_pred_writer=writer)
    writer.close()
    assert os.path.exists(out_vcf_fpath)
Ejemplo n.º 4
0
def get_example_data(example, layer, writer=None):
    example_dir = "examples/{0}".format(example)
    if INSTALL_REQ:
        install_model_requirements(example_dir, "dir", and_dataloaders=True)

    model = kipoi.get_model(example_dir, source="dir")
    # The preprocessor
    Dataloader = kipoi.get_dataloader_factory(example_dir, source="dir")
    #
    with open(example_dir + "/example_files/test.json", "r") as ifh:
        dataloader_arguments = json.load(ifh)

    for k in dataloader_arguments:
        dataloader_arguments[k] = "example_files/" + dataloader_arguments[k]

    outputs = []
    with cd(model.source_dir):
        dl = Dataloader(**dataloader_arguments)
        it = dl.batch_iter(batch_size=32, num_workers=0)

        # Loop through the data, make predictions, save the output
        for i, batch in enumerate(tqdm(it)):

            # make the prediction
            pred_batch = model.input_grad(batch['inputs'], avg_func="sum", layer=layer,
                                          final_layer=False)
            # write out the predictions, metadata (, inputs, targets)
            # always keep the inputs so that input*grad can be generated!
            output_batch = prepare_batch(batch, pred_batch, keep_inputs=True)
            if writer is not None:
                writer.batch_write(output_batch)
            outputs.append(output_batch)
        if writer is not None:
            writer.close()
    return numpy_collate(outputs)
Ejemplo n.º 5
0
def test_deeplift():
    # return True
    example = "tal1_model"
    layer = predict_activation_layers[example]
    example_dir = "tests/models/{0}".format(example)
    if INSTALL_REQ:
        install_model_requirements(example_dir, "dir", and_dataloaders=True)

    model = kipoi.get_model(example_dir, source="dir")
    # The preprocessor
    Dataloader = kipoi.get_dataloader_factory(example_dir, source="dir")
    #
    with open(example_dir + "/example_files/test.json", "r") as ifh:
        dataloader_arguments = json.load(ifh)

    for k in dataloader_arguments:
        dataloader_arguments[k] = "example_files/" + dataloader_arguments[k]

    d = DeepLift(model,
                 output_layer=-2,
                 task_idx=0,
                 preact=None,
                 mxts_mode='grad_times_inp')

    new_ofname = model.source_dir + "/example_files/deeplift_grads_pred.hdf5"
    if os.path.exists(new_ofname):
        os.unlink(new_ofname)

    writer = writers.HDF5BatchWriter(file_path=new_ofname)

    with kipoi.utils.cd(model.source_dir):
        dl = Dataloader(**dataloader_arguments)
        it = dl.batch_iter(batch_size=32, num_workers=0)
        # Loop through the data, make predictions, save the output
        for i, batch in enumerate(tqdm(it)):
            # make the prediction
            pred_batch = d.score(batch['inputs'], None)

            # Using Avanti's recommendation to check whether the model conversion has worked.
            pred_batch_fwd = d.predict_on_batch(batch['inputs'])
            orig_pred_batch_fwd = model.predict_on_batch(batch['inputs'])
            assert np.all(pred_batch_fwd == orig_pred_batch_fwd)

        output_batch = batch
        output_batch["input_grad"] = pred_batch
        writer.batch_write(output_batch)
    writer.close()

    new_res = readers.HDF5Reader.load(new_ofname)
    ref_res = readers.HDF5Reader.load(model.source_dir +
                                      "/example_files/grads.hdf5")
    assert np.all(
        np.isclose(new_res['input_grad'],
                   (ref_res['inputs'] * ref_res['grads'])))

    if os.path.exists(new_ofname):
        os.unlink(new_ofname)
Ejemplo n.º 6
0
def test_mutation_map():
    if sys.version_info[0] == 2:
        pytest.skip("rbp example not supported on python 2 ")

    # Take the rbp model
    model_dir = "examples/rbp/"
    if INSTALL_REQ:
        install_model_requirements(model_dir, "dir", and_dataloaders=True)

    model = kipoi.get_model(model_dir, source="dir")
    # The preprocessor
    Dataloader = kipoi.get_dataloader_factory(model_dir, source="dir")
    #
    dataloader_arguments = {
        "fasta_file": "example_files/hg38_chr22.fa",
        "preproc_transformer": "dataloader_files/encodeSplines.pkl",
        "gtf_file": "example_files/gencode_v25_chr22.gtf.pkl.gz",
    }
    dataloader_arguments = {
        k: model_dir + v
        for k, v in dataloader_arguments.items()
    }
    #
    # Run the actual predictions
    vcf_path = model_dir + "example_files/first_variant.vcf"
    #
    model_info = kipoi.postprocessing.variant_effects.ModelInfoExtractor(
        model, Dataloader)
    vcf_to_region = kipoi.postprocessing.variant_effects.SnvCenteredRg(
        model_info)
    mdmm = mm._generate_mutation_map(
        model,
        Dataloader,
        vcf_path,
        dataloader_args=dataloader_arguments,
        evaluation_function=analyse_model_preds,
        batch_size=32,
        vcf_to_region=vcf_to_region,
        evaluation_function_kwargs={'diff_types': {
            'diff': Diff("mean")
        }})
    with cd(model.source_dir):
        mdmm.save_to_file("example_files/first_variant_mm_totest.hdf5")
        from kipoi.postprocessing.variant_effects.utils.generic import read_hdf5
        reference = read_hdf5("example_files/first_variant_mm.hdf5")
        obs = read_hdf5("example_files/first_variant_mm.hdf5")
        compare_rec(reference[0], obs[0])
        import matplotlib
        matplotlib.pyplot.switch_backend('agg')
        mdmm.plot_mutmap(0, "seq", "diff", "rbp_prb")
        os.unlink("example_files/first_variant_mm_totest.hdf5")
Ejemplo n.º 7
0
def test_var_eff_pred_varseq():
    if sys.version_info[0] == 2:
        pytest.skip("rbp example not supported on python 2 ")
    model_dir = "examples/var_seqlen_model/"
    if INSTALL_REQ:
        install_model_requirements(model_dir, "dir", and_dataloaders=True)
    #
    model = kipoi.get_model(model_dir, source="dir")
    # The preprocessor
    Dataloader = kipoi.get_dataloader_factory(model_dir, source="dir")
    #
    dataloader_arguments = {
        "fasta_file": "example_files/hg38_chr22.fa",
        "preproc_transformer": "dataloader_files/encodeSplines.pkl",
        "gtf_file": "example_files/gencode_v25_chr22.gtf.pkl.gz",
        "intervals_file": "example_files/variant_centered_intervals.tsv"
    }
    vcf_path = "example_files/variants.vcf"
    out_vcf_fpath = "example_files/variants_generated.vcf"
    ref_out_vcf_fpath = "example_files/variants_ref_out.vcf"
    #
    with cd(model.source_dir):
        vcf_path = kipoi.postprocessing.variant_effects.ensure_tabixed_vcf(
            vcf_path)
        model_info = kipoi.postprocessing.variant_effects.ModelInfoExtractor(
            model, Dataloader)
        writer = kipoi.postprocessing.variant_effects.VcfWriter(
            model, vcf_path, out_vcf_fpath)
        vcf_to_region = None
        with pytest.raises(Exception):
            # This has to raise an exception as the sequence length is None.
            vcf_to_region = kipoi.postprocessing.variant_effects.SnvCenteredRg(
                model_info)
        res = sp.predict_snvs(
            model,
            Dataloader,
            vcf_path,
            dataloader_args=dataloader_arguments,
            evaluation_function=analyse_model_preds,
            batch_size=32,
            vcf_to_region=vcf_to_region,
            evaluation_function_kwargs={'diff_types': {
                'diff': Diff("mean")
            }},
            sync_pred_writer=writer)
        writer.close()
        # pass
        # assert filecmp.cmp(out_vcf_fpath, ref_out_vcf_fpath)
        compare_vcfs(out_vcf_fpath, ref_out_vcf_fpath)
        os.unlink(out_vcf_fpath)
Ejemplo n.º 8
0
def test_var_eff_pred2():
    if sys.version_info[0] == 2:
        pytest.skip("rbp example not supported on python 2 ")
    # Take the rbp model
    model_dir = "examples/rbp/"
    if INSTALL_REQ:
        install_model_requirements(model_dir, "dir", and_dataloaders=True)
    #
    model = kipoi.get_model(model_dir, source="dir")
    # The preprocessor
    Dataloader = kipoi.get_dataloader_factory(model_dir, source="dir")
    #
    dataloader_arguments = {
        "fasta_file": "example_files/hg38_chr22.fa",
        "preproc_transformer": "dataloader_files/encodeSplines.pkl",
        "gtf_file": "example_files/gencode_v25_chr22.gtf.pkl.gz",
    }
    #
    # Run the actual predictions
    vcf_path = "example_files/variants.vcf"
    out_vcf_fpath = "example_files/variants_generated2.vcf"
    ref_out_vcf_fpath = "example_files/variants_ref_out2.vcf"
    restricted_regions_fpath = "example_files/restricted_regions.bed"
    #
    with cd(model.source_dir):
        pbd = pb.BedTool(restricted_regions_fpath)
        model_info = kipoi.postprocessing.variant_effects.ModelInfoExtractor(
            model, Dataloader)
        vcf_to_region = kipoi.postprocessing.variant_effects.SnvPosRestrictedRg(
            model_info, pbd)
        writer = kipoi.postprocessing.variant_effects.utils.io.VcfWriter(
            model, vcf_path, out_vcf_fpath)
        res = sp.predict_snvs(
            model,
            Dataloader,
            vcf_path,
            dataloader_args=dataloader_arguments,
            evaluation_function=analyse_model_preds,
            batch_size=32,
            vcf_to_region=vcf_to_region,
            evaluation_function_kwargs={'diff_types': {
                'diff': Diff("mean")
            }},
            sync_pred_writer=writer)
        writer.close()
        # pass
        #assert filecmp.cmp(out_vcf_fpath, ref_out_vcf_fpath)
        compare_vcfs(out_vcf_fpath, ref_out_vcf_fpath)
        os.unlink(out_vcf_fpath)
Ejemplo n.º 9
0
def test_gradient_function_model(example):
    """Test extractor
    """
    if example == "rbp" and sys.version_info[0] == 2:
        pytest.skip("rbp example not supported on python 2 ")

    import keras
    backend = keras.backend._BACKEND
    if backend == 'theano' and example == "rbp":
        pytest.skip("extended_coda example not with theano ")
    #
    example_dir = "examples/{0}".format(example)
    # install the dependencies
    # - TODO maybe put it implicitly in load_dataloader?
    if INSTALL_REQ:
        install_model_requirements(example_dir, "dir", and_dataloaders=True)
    #
    Dl = kipoi.get_dataloader_factory(example_dir, source="dir")
    #
    test_kwargs = get_test_kwargs(example_dir)
    #
    # install the dependencies
    # - TODO maybe put it implicitly in load_extractor?
    if INSTALL_REQ:
        install_model_requirements(example_dir, source="dir")
    #
    # get model
    model = kipoi.get_model(example_dir, source="dir")
    #
    with cd(example_dir + "/example_files"):
        # initialize the dataloader
        dataloader = Dl(**test_kwargs)
        #
        # sample a batch of data
        it = dataloader.batch_iter()
        batch = next(it)
        # predict with a model
        model.predict_on_batch(batch["inputs"])
        if backend != 'theano':
            model.input_grad(batch["inputs"],
                             Slice_conv()[:, 0],
                             pre_nonlinearity=True)
        model.input_grad(batch["inputs"],
                         Slice_conv()[:, 0],
                         pre_nonlinearity=False)
        model.input_grad(batch["inputs"], 0,
                         pre_nonlinearity=False)  # same as Slice_conv()[:, 0]
        model.input_grad(batch["inputs"], avg_func="sum")
Ejemplo n.º 10
0
def test_activation_function_model(example):
    """Test extractor
    """
    if example == "rbp" and sys.version_info[0] == 2:
        pytest.skip("rbp example not supported on python 2 ")
    #
    import keras
    backend = keras.backend._BACKEND
    if backend == 'theano' and example == "rbp":
        pytest.skip("extended_coda example not with theano ")
    #
    example_dir = "examples/{0}".format(example)
    # install the dependencies
    # - TODO maybe put it implicitly in load_dataloader?
    if INSTALL_REQ:
        install_model_requirements(example_dir, "dir", and_dataloaders=True)
    #
    Dl = kipoi.get_dataloader_factory(example_dir, source="dir")
    #
    test_kwargs = get_test_kwargs(example_dir)
    #
    # install the dependencies
    # - TODO maybe put it implicitly in load_extractor?
    if INSTALL_REQ:
        install_model_requirements(example_dir, source="dir")
    #
    # get model
    model = kipoi.get_model(example_dir, source="dir")
    #
    with cd(example_dir + "/example_files"):
        # initialize the dataloader
        dataloader = Dl(**test_kwargs)
        #
        # sample a batch of data
        it = dataloader.batch_iter()
        batch = next(it)
        # predict with a model
        model.predict_on_batch(batch["inputs"])
        model.predict_activation_on_batch(batch["inputs"],
                                          layer=len(model.model.layers) - 2)
        if example == "rbp":
            model.predict_activation_on_batch(batch["inputs"],
                                              layer="flatten_6")
Ejemplo n.º 11
0
def test_score():
    example = "tal1_model"
    layer = predict_activation_layers[example]
    example_dir = "example/models/{0}".format(example)
    if INSTALL_REQ:
        install_model_requirements(example_dir, "dir", and_dataloaders=True)

    model = kipoi.get_model(example_dir, source="dir")
    # The preprocessor
    Dataloader = kipoi.get_dataloader_factory(example_dir, source="dir")
    #
    with open(example_dir + "/example_files/test.json", "r") as ifh:
        dataloader_arguments = json.load(ifh)

    for k in dataloader_arguments:
        dataloader_arguments[k] = "example_files/" + dataloader_arguments[k]

    g = Gradient(model, None, layer=layer, avg_func="sum")

    if os.path.exists(model.source_dir + "/example_files/grads_pred.hdf5"):
        os.unlink(model.source_dir + "/example_files/grads_pred.hdf5")

    writer = writers.HDF5BatchWriter(file_path=model.source_dir + "/example_files/grads_pred.hdf5")

    with kipoi_utils.utils.cd(model.source_dir):
        dl = Dataloader(**dataloader_arguments)
        it = dl.batch_iter(batch_size=32, num_workers=0)
        # Loop through the data, make predictions, save the output
        for i, batch in enumerate(tqdm(it)):
            # make the prediction
            pred_batch = g.score(batch['inputs'])
            output_batch = batch
            output_batch["grads"] = pred_batch
            writer.batch_write(output_batch)
        writer.close()

    obj1 = readers.HDF5Reader.load(model.source_dir + "/example_files/grads_pred.hdf5")
    obj2 = readers.HDF5Reader.load(model.source_dir + "/example_files/grads.hdf5")
    kipoi_utils.utils.compare_numpy_dict(obj1, obj2)

    if os.path.exists(model.source_dir + "/example_files/grads_pred.hdf5"):
        os.unlink(model.source_dir + "/example_files/grads_pred.hdf5")
Ejemplo n.º 12
0
def test_load_model(example):
    example_dir = "examples/{0}".format(example)

    if example in {"rbp", "iris_model_template"} and sys.version_info[0] == 2:
        pytest.skip("example not supported on python 2 ")

    if INSTALL_REQ:
        install_model_requirements(example_dir, "dir")
    m = kipoi.get_model(example_dir, source="dir")

    if isinstance(m, kipoi.model.KerasModel):
        m.arch
        m.weights
    m.info
    m.schema
    m.schema.inputs
    m.source
    m.default_dataloader
    m.model
    m.predict_on_batch
Ejemplo n.º 13
0
    ]
    #
    for regions in [regions_dict, regions_gr]:
        for exclude_indels, ref_res, ref_lines in zip(
            [False, True], [plus_indel_results, snv_results],
            [ref_lines_indel, snv_ref_lines]):
            found_vars, overlapping_region = sp._overlap_vcf_region(
                vcf_obj, regions, exclude_indels)
            assert all([
                str(el1) == str(el2) for el1, el2 in zip(ref_res, found_vars)
                if not el1.is_indel
            ])
            assert overlapping_region == ref_lines


"""
# Take the rbp model
model_dir = "examples/rbp/"
install_model_requirements(model_dir, "dir", and_dataloaders=True)

model = kipoi.get_model(model_dir, source="dir")
# The preprocessor
Dataloader = kipoi.get_dataloader_factory(model_dir, source="dir")

dataloader_arguments = {
    "fasta_file": "example_files/hg38_chr22.fa",
    "preproc_transformer": "dataloader_files/encodeSplines.pkl",
    "gtf_file": "example_files/gencode_v25_chr22.gtf.pkl.gz",
}

# Run the actual predictions