예제 #1
0
def test_output_tsv_score_across_conditions(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    inputs = Array("x", numpy.random.random((100, 10)))
    outputs = Array('y', numpy.random.randint(2, size=(100, 2)),
                    conditions=['c1', 'c2'])

    bwm = get_janggu(inputs, outputs)

    dummy_eval = Scorer('score', lambda y_true, y_pred: 0.15,
                        exporter=ExportTsv())
    dummy_evalacross = Scorer('scoreacross',
                              lambda y_true, y_pred: 0.15,
                              exporter=ExportTsv(),
                              percondition=False)

    bwm.evaluate(inputs, outputs, callbacks=[dummy_eval, dummy_evalacross])

    # percondition=True
    assert pandas.read_csv(os.path.join(tmpdir.strpath, "evaluation", bwm.name,
                                        "score.tsv"),
                           sep='\t', header=[0]).shape == (1, 2)
    # percondition=False
    val = pandas.read_csv(os.path.join(tmpdir.strpath, "evaluation", bwm.name,
                                       "scoreacross.tsv"),
                          sep='\t', header=[0])
    assert val['across'][0] == .15
    assert val.shape == (1, 1)
예제 #2
0
def test_output_score_by_name(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath

    inputs = Array("x", numpy.random.random((100, 10)))
    outputs = Array('y', numpy.random.randint(2, size=(100, 1)),
                    conditions=['random'])

    bwm = get_janggu(inputs, outputs)

    dummy_eval = Scorer('score', lambda y_true, y_pred: 0.15, immediate_export=False)

    bwm.evaluate(inputs, outputs, callbacks=['auc', 'roc', 'prc',
                                             'auprc', 'auroc',
                                             'cor', 'mae', 'mse',
                                             'var_explained', dummy_eval])

    assert os.path.exists(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "auc.tsv"))
    assert os.path.exists(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "prc.png"))
    assert os.path.exists(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "roc.png"))
    assert os.path.exists(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "cor.tsv"))
    assert os.path.exists(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "mae.tsv"))
    assert os.path.exists(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "mse.tsv"))
    assert os.path.exists(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "var_explained.tsv"))
    assert os.path.exists(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "auprc.tsv"))
    assert not os.path.exists(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "score.json"))

    dummy_eval.export(os.path.join(tmpdir.strpath, dummy_eval.subdir), bwm.name)
    assert os.path.exists(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "score.json"))

    with pytest.raises(ValueError):
        bwm.evaluate(inputs, outputs, callbacks=['adsf'])
예제 #3
0
def test_output_export_score_plot(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    inputs = Array("x", numpy.random.random((100, 10)))
    outputs = Array('y', numpy.random.randint(2, size=(100, 1)),
                    conditions=['random'])

    bwm = get_janggu(inputs, outputs)

    dummy_eval = Scorer('score',
                        lambda y_true, y_pred:
                        ([0., 0.5, 0.5, 1.],
                         [0.5, 0.5, 1., 1.],
                         [0.8, 0.4, 0.35, 0.1]),
                        exporter=ExportScorePlot())

    bwm.evaluate(inputs, outputs, callbacks=[dummy_eval])

    dummy_eval = Scorer('score',
                        lambda y_true, y_pred:
                        ([0., 0.5, 0.5, 1.],
                         [0.5, 0.5, 1., 1.],
                         [0.8, 0.4, 0.35, 0.1]),
                        exporter=ExportScorePlot(figsize=(10,12),
                                                 xlabel='FPR',
                                                 ylabel='TPR',
                                                 fform='eps'))

    bwm.evaluate(inputs, outputs, callbacks=[dummy_eval])

    # check if plot was produced
    assert os.path.exists(os.path.join(tmpdir.strpath,
                                       "evaluation", bwm.name, "score.png"))
    assert os.path.exists(os.path.join(tmpdir.strpath,
                                       "evaluation", bwm.name, "score.eps"))
예제 #4
0
def test_output_json_score(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath

    inputs = Array("x", numpy.random.random((100, 10)))
    outputs = Array('y', numpy.random.randint(2, size=(100, 1)),
                    conditions=['random'])

    bwm = get_janggu(inputs, outputs)

    # check exception if no scoring function is provided
    dummy_eval = Scorer('score')

    with pytest.raises(ValueError):
        bwm.evaluate(inputs, outputs, callbacks=[dummy_eval])

    dummy_eval = Scorer('score', lambda y_true, y_pred: 0.15)

    bwm.evaluate(inputs, outputs, callbacks=[dummy_eval])

    # check correctness of json
    with open(os.path.join(tmpdir.strpath, "evaluation", bwm.name,
                           "score.json"), 'r') as f:
        content = json.load(f)
        # now nptest was evaluated
        assert 'random' in content
예제 #5
0
def test_output_export_tsne(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    inputs = Array("x", numpy.random.random((100, 10)))
    outputs = Array('y',
                    numpy.random.randint(2, size=(100, 1)),
                    conditions=['random'])

    @inputlayer
    @outputdense('sigmoid')
    def _model(inputs, inp, oup, params):
        with inputs.use('x') as layer:
            outputs = Dense(3, name='hidden')(layer)
        return inputs, outputs

    bwm = Janggu.create(_model, inputs=inputs, outputs=outputs, name='nptest')

    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')

    dummy_eval = Scorer('tsne', exporter=ExportTsne())

    bwm.predict(inputs, layername='hidden', callbacks=[dummy_eval])

    dummy_eval = Scorer('tsne',
                        exporter=ExportTsne(
                            fform='eps',
                            annot={'annot': [1] * 50 + [0] * 50},
                            figsize=(10, 10)))
    bwm.predict(inputs, layername='hidden', callbacks=[dummy_eval])
    # check if plot was produced
    assert os.path.exists(
        os.path.join(tmpdir.strpath, "evaluation", bwm.name, 'hidden',
                     "tsne.png"))
    assert os.path.exists(
        os.path.join(tmpdir.strpath, "evaluation", bwm.name, 'hidden',
                     "tsne.eps"))
예제 #6
0
def test_output_bigwig_loss_resolution_unequal_stepsize(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    # generate loss
    #
    # resolution < stepsize
    inputs = Array("x", numpy.random.random((7, 4, 1, 10)))
    outputs = Array('y', numpy.random.random((7, 4, 1, 4)),
                    conditions=['c1', 'c2', 'c3', 'c4'])

    bwm = get_janggu(inputs, outputs)
    data_path = pkg_resources.resource_filename('janggu',
                                                'resources/10regions.bed')

    gi = GenomicIndexer.create_from_file(data_path,
                                         binsize=200,
                                         stepsize=50)

    dummy_eval = Scorer('loss', lambda t, p: [0.2] * len(t),
                        exporter=ExportBigwig(gindexer=gi))

    bwm.evaluate(inputs, outputs, callbacks=[dummy_eval])

    file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name,
                         'loss.{}.bigwig')

    for cond in ['c1', 'c2', 'c3', 'c4']:
        assert os.path.exists(file_.format(cond))

    bw = pyBigWig.open(file_.format('c1'))

    co = bw.values('chr1', 600, 2000-150)

    numpy.testing.assert_allclose(numpy.mean(co), 0.2, rtol=1e-5)
예제 #7
0
def test_output_tsv_score(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    inputs = Array("x", numpy.random.random((100, 10)))
    outputs = Array('y', numpy.random.randint(2, size=(100, 1)),
                    conditions=['random'])

    bwm = get_janggu(inputs, outputs)

    dummy_eval = Scorer('score', lambda y_true, y_pred: 0.15, exporter=ExportTsv())

    bwm.evaluate(inputs, outputs, callbacks=[dummy_eval])

    assert pandas.read_csv(os.path.join(tmpdir.strpath, "evaluation", bwm.name,
                                        "score.tsv"),
                           sep='\t', header=[0]).iloc[0, 0] == 0.15
예제 #8
0
def test_output_bed_loss_resolution_equal_stepsize(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    # generate loss
    #
    # resolution < stepsize
    inputs = Array("x", numpy.random.random((7, 1, 1, 10)))
    outputs = Array('y',
                    numpy.random.random((7, 1, 1, 4)),
                    conditions=['c1', 'c2', 'c3', 'c4'])

    bwm = get_janggu_conv(inputs, outputs)
    data_path = pkg_resources.resource_filename('janggu',
                                                'resources/10regions.bed')

    gi = GenomicIndexer.create_from_file(data_path, binsize=200, stepsize=200)

    dummy_eval = Scorer('loss',
                        lambda t, p: [0.1] * len(t),
                        exporter=export_bed)

    bwm.evaluate(inputs,
                 outputs,
                 callbacks=[dummy_eval],
                 exporter_kwargs={
                     'gindexer': gi,
                     'resolution': 200
                 })

    file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name,
                         'loss.nptest.y.{}.bed')

    for cond in ['c1', 'c2', 'c3', 'c4']:
        assert os.path.exists(file_.format(cond))

    bed = iter(HTSeq.BED_Reader(file_.format('c1')))

    nreg = 0
    for reg in bed:
        numpy.testing.assert_equal(reg.score, 0.1)
        nreg += 1


#        numpy.testing.assert_equal(breg.score, value)

    assert nreg == 7, 'There should be 7 regions in the bed file.'
예제 #9
0
def test_output_bed_loss_resolution_unequal_stepsize(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    # generate loss
    #
    # resolution < stepsize
    inputs = Array("x", numpy.random.random((7, 4, 1, 10)))
    outputs = Array('y', numpy.random.random((7, 4, 1, 4)),
                    conditions=['c1', 'c2', 'c3', 'c4'])

    bwm = get_janggu(inputs, outputs)
    data_path = pkg_resources.resource_filename('janggu',
                                                'resources/10regions.bed')

    gi = GenomicIndexer.create_from_file(data_path,
                                         binsize=200,
                                         stepsize=200)

    # dummy_eval = Scorer('loss', lambda t, p: -t * numpy.log(p),
    #                    exporter=export_bed, export_args={'gindexer': gi})
    dummy_eval = Scorer('loss', lambda t, p: [0.1] * len(t),
                        exporter=ExportBed(gindexer=gi, resolution=50))

    bwm.evaluate(inputs, outputs, callbacks=[dummy_eval])

    file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name,
                         'loss.{}.bed')

    for cond in ['c1', 'c2', 'c3', 'c4']:
        assert os.path.exists(file_.format(cond))

    bed = BedTool(file_.format('c1'))

    nreg = 0
    for reg in bed:
        numpy.testing.assert_equal(float(reg.score), 0.1)
        nreg += 1

    assert nreg == 28, 'There should be 28 regions in the bed file.'
예제 #10
0
def test_output_score_by_name(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath

    inputs = Array("x", numpy.random.random((100, 10)))
    outputs = Array('y',
                    numpy.random.randint(2, size=(100, 1)),
                    conditions=['random'])

    bwm = get_janggu(inputs, outputs)

    dummy_eval = Scorer('score', lambda y_true, y_pred: 0.15)

    bwm.evaluate(inputs,
                 outputs,
                 callbacks=[
                     'auc', 'prc', 'auprc', 'auroc', 'cor', 'mae', 'mse',
                     'var_explained'
                 ])

    # check correctness of json
    os.path.exists(
        os.path.join(tmpdir.strpath, "evaluation", bwm.name, "auc.png"))
    os.path.exists(
        os.path.join(tmpdir.strpath, "evaluation", bwm.name, "prc.png"))
    os.path.exists(
        os.path.join(tmpdir.strpath, "evaluation", bwm.name, "cor.tsv"))
    os.path.exists(
        os.path.join(tmpdir.strpath, "evaluation", bwm.name, "mae.tsv"))
    os.path.exists(
        os.path.join(tmpdir.strpath, "evaluation", bwm.name, "mse.tsv"))
    os.path.exists(
        os.path.join(tmpdir.strpath, "evaluation", bwm.name,
                     "var_explained.tsv"))
    os.path.exists(
        os.path.join(tmpdir.strpath, "evaluation", bwm.name, "auprc.tsv"))
    os.path.exists(
        os.path.join(tmpdir.strpath, "evaluation", bwm.name, "auroc.tsv"))
예제 #11
0
def test_output_bed_predict_denseout(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    # generate loss
    #
    # resolution < stepsize
    inputs = Array("x", numpy.random.random((7, 10)))
    outputs = Array('y',
                    numpy.random.random((7, 4)),
                    conditions=['c1', 'c2', 'c3', 'c4'])

    bwm = get_janggu(inputs, outputs)
    data_path = pkg_resources.resource_filename('janggu',
                                                'resources/10regions.bed')

    gi = GenomicIndexer.create_from_file(data_path, binsize=200, stepsize=200)

    dummy_eval = Scorer('pred',
                        lambda p: [0.1] * len(p),
                        exporter=ExportBed(gindexer=gi, resolution=200),
                        conditions=['c1', 'c2', 'c3', 'c4'])

    bwm.predict(inputs, callbacks=[dummy_eval])

    file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name,
                         'pred.nptest.y.{}.bed')

    for cond in ['c1', 'c2', 'c3', 'c4']:
        assert os.path.exists(file_.format(cond))

    bed = iter(HTSeq.BED_Reader(file_.format('c1')))

    nreg = 0
    for reg in bed:
        numpy.testing.assert_equal(reg.score, 0.1)
        nreg += 1

    assert nreg == 7, 'There should be 7 regions in the bed file.'
예제 #12
0
def test_output_bigwig_predict_denseout(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    # generate loss
    #
    # resolution < stepsize
    inputs = Array("x", numpy.random.random((7, 10)))
    outputs = Array('y',
                    numpy.random.random((7, 4)),
                    conditions=['c1', 'c2', 'c3', 'c4'])

    bwm = get_janggu(inputs, outputs)
    data_path = pkg_resources.resource_filename('janggu',
                                                'resources/10regions.bed')

    gi = GenomicIndexer.create_from_file(data_path, binsize=200, stepsize=200)

    dummy_eval = Scorer('pred',
                        lambda p: [0.1] * len(p),
                        exporter=export_bigwig,
                        conditions=['c1', 'c2', 'c3', 'c4'])

    bwm.predict(inputs,
                callbacks=[dummy_eval],
                exporter_kwargs={'gindexer': gi})

    file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name,
                         'pred.nptest.y.{}.bigwig')

    for cond in ['c1', 'c2', 'c3', 'c4']:
        assert os.path.exists(file_.format(cond))

    bw = pyBigWig.open(file_.format('c1'))

    co = bw.values('chr1', 600, 2000)

    numpy.testing.assert_allclose(numpy.mean(co), 0.1, rtol=1e-5)