Exemple #1
0
def test_output_tsv_score(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    inputs = Array("x", numpy.random.random((100, 10)))
    outputs = Array('y', numpy.random.randint(2, size=(100, 1)),
                    conditions=['random'])

    bwm = get_janggu(inputs, outputs)

    dummy_eval = Scorer('score', lambda y_true, y_pred: 0.15, exporter=ExportTsv())

    bwm.evaluate(inputs, outputs, callbacks=[dummy_eval])

    assert pandas.read_csv(os.path.join(tmpdir.strpath, "evaluation", bwm.name,
                                        "score.tsv"),
                           sep='\t', header=[0]).iloc[0, 0] == 0.15
Exemple #2
0
def test_janggu_train_predict_sequence(tmpdir):
    """Train, predict and evaluate on dummy data.

    create: YES
    Input args: Dataset
    validation_set: YES
    batch_size: None
    """
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath

    inputs = {'x': Array("x", np.random.random((100, 10)))}
    outputs = {
        'y': Array('y',
                   np.random.randint(2, size=(100, 1)),
                   conditions=['random'])
    }

    jseq = JangguSequence(10, inputs, outputs)

    @inputlayer
    @outputdense('sigmoid')
    def _model(inputs, inp, oup, params):
        return inputs, inputs[0]

    bwm = Janggu.create(_model,
                        inputs=jseq.inputs['x'],
                        outputs=jseq.outputs['y'],
                        name='nptest')

    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')

    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
    print('storage', storage)
    print('env', os.environ['JANGGU_OUTPUT'])
    print('name', bwm.name)
    print('outputdir', bwm.outputdir)
    assert not os.path.exists(storage)

    bwm.fit(jseq, epochs=2, validation_data=jseq, use_multiprocessing=False)

    assert os.path.exists(storage)

    pred = bwm.predict(jseq, use_multiprocessing=False)
    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs['x']))
    np.testing.assert_equal(pred.shape, outputs['y'].shape)
    bwm.evaluate(jseq, use_multiprocessing=False)
def test_output_bed_loss_resolution_equal_stepsize(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    # generate loss
    #
    # resolution < stepsize
    inputs = Array("x", numpy.random.random((7, 1, 1, 10)))
    outputs = Array('y',
                    numpy.random.random((7, 1, 1, 4)),
                    conditions=['c1', 'c2', 'c3', 'c4'])

    bwm = get_janggu_conv(inputs, outputs)
    data_path = pkg_resources.resource_filename('janggu',
                                                'resources/10regions.bed')

    gi = GenomicIndexer.create_from_file(data_path, binsize=200, stepsize=200)

    dummy_eval = Scorer('loss',
                        lambda t, p: [0.1] * len(t),
                        exporter=export_bed)

    bwm.evaluate(inputs,
                 outputs,
                 callbacks=[dummy_eval],
                 exporter_kwargs={
                     'gindexer': gi,
                     'resolution': 200
                 })

    file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name,
                         'loss.nptest.y.{}.bed')

    for cond in ['c1', 'c2', 'c3', 'c4']:
        assert os.path.exists(file_.format(cond))

    bed = iter(HTSeq.BED_Reader(file_.format('c1')))

    nreg = 0
    for reg in bed:
        numpy.testing.assert_equal(reg.score, 0.1)
        nreg += 1


#        numpy.testing.assert_equal(breg.score, value)

    assert nreg == 7, 'There should be 7 regions in the bed file.'
Exemple #4
0
def test_nantonumconverter():
    x_orig = np.zeros((3,1,1,2))
    x_orig[0,0,0,0] = np.nan
    arr = Array('test', x_orig, conditions=["A", "B"])
    assert np.isnan(arr[0].mean())

    x_tr = NanToNumConverter(Array('test', x_orig, conditions=["A", "B"]))
    assert x_tr[0].shape == (1, 1, 1, 2)
    assert x_tr[:3].shape == (3, 1, 1, 2)
    assert x_tr[[0,1]].shape == (2, 1, 1, 2)
    assert len(x_tr) == 3
    assert x_tr.shape == (3, 1, 1, 2)
    assert x_tr.ndim == 4
    assert not np.isnan(x_tr[0].mean())
    np.testing.assert_equal(x_tr[0], [[[[0,0]]]])
    new_x = copy(x_tr)
    assert x_tr[0].shape == new_x[0].shape
    assert x_tr.conditions == ["A", "B"]
Exemple #5
0
def test_janggu_train_predict_option0(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    """Train, predict and evaluate on dummy data.

    create: by_shape
    Input args: Dataset
    """

    inputs = Array("X", np.random.random((100, 10)))
    outputs = ReduceDim(Array('y',
                              np.random.randint(2, size=(100, 1))[:, None],
                              conditions=['random']),
                        axis=(1, ))

    @inputlayer
    @outputdense('sigmoid')
    def test_model(inputs, inp, oup, params):
        return inputs, inputs[0]

    bwm = Janggu.create(test_model,
                        inputs=inputs,
                        outputs=outputs,
                        name='nptest')

    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')

    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
    assert not os.path.exists(storage)

    bwm.fit(inputs, outputs, epochs=2, batch_size=32)

    assert os.path.exists(storage)

    pred = bwm.predict(inputs)
    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs))
    np.testing.assert_equal(pred.shape, outputs.shape)

    # test if the condition name is correctly used in the output table
    bwm.evaluate(inputs, outputs, callbacks=['auc'])

    outputauc = os.path.join(tmpdir.strpath, 'evaluation', 'nptest', 'auc.tsv')
    assert os.path.exists(outputauc)
    assert pd.read_csv(outputauc).columns[0] == 'random'
Exemple #6
0
def test_janggu_influence_genomic(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    """Test Janggu creation by shape and name. """
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
    bed_file = os.path.join(data_path, 'sample.bed')

    csvfile = os.path.join(data_path, 'sample.csv')

    refgenome = os.path.join(data_path, 'sample_genome.fa')

    dna = Bioseq.create_from_refgenome('dna',
                                       refgenome=refgenome,
                                       storage='ndarray',
                                       binsize=50,
                                       roi=bed_file,
                                       order=1)

    df = pd.read_csv(csvfile, header=None)
    ctcf = Array('ctcf', df.values, conditions=['peaks'])

    @inputlayer
    @outputdense('sigmoid')
    def _cnn_model(inputs, inp, oup, params):
        layer = inputs['dna']
        layer = Flatten()(layer)
        output = Dense(params[0])(layer)
        return inputs, output

    model = Janggu.create(_cnn_model,
                          modelparams=(2, ),
                          inputs=dna,
                          outputs=ctcf,
                          name='dna_ctcf_HepG2-cnn')

    model.compile(optimizer='adadelta', loss='binary_crossentropy')

    # check with some nice offset
    iv = dna.gindexer[0]
    chrom, start, end = iv.chrom, iv.start, iv.end
    influence = input_attribution(model,
                                  dna,
                                  chrom=chrom,
                                  start=start,
                                  end=end)

    # check with an odd offset

    #    chrom, start, end =
    influence2 = input_attribution(model,
                                   dna,
                                   chrom=chrom,
                                   start=start - 1,
                                   end=end + 1)
    np.testing.assert_equal(influence[0][:], influence2[0][:][:, 1:-1])
def test_output_json_score(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath

    inputs = Array("x", numpy.random.random((100, 10)))
    outputs = Array('y',
                    numpy.random.randint(2, size=(100, 1)),
                    conditions=['random'])

    bwm = get_janggu(inputs, outputs)

    dummy_eval = Scorer('score', lambda y_true, y_pred: 0.15)

    bwm.evaluate(inputs, outputs, callbacks=[dummy_eval])

    # check correctness of json
    with open(
            os.path.join(tmpdir.strpath, "evaluation", bwm.name, "score.json"),
            'r') as f:
        content = json.load(f)
        # now nptest was evaluated
        assert 'nptest-y-random' in content
Exemple #8
0
def test_reducedim():
    x_orig = np.zeros((3,1,1,2))

    np.testing.assert_equal(x_orig.ndim, 4)
    x_reduce = ReduceDim(Array('test', x_orig, conditions=["A", "B"]))
    x_reduce = ReduceDim(Array('test', x_orig, conditions=["A", "B"]), aggregator='mean')
    x_reduce = ReduceDim(Array('test', x_orig, conditions=["A", "B"]), aggregator='max')
    x_reduce = ReduceDim(Array('test', x_orig, conditions=["A", "B"]), aggregator=np.mean)
    with pytest.raises(ValueError):
        ReduceDim(Array('test', x_orig, conditions=["A", "B"]), aggregator='nonsense')

    np.testing.assert_equal(len(x_reduce), 3)
    np.testing.assert_equal(x_reduce.shape, (3,2))
    np.testing.assert_equal(x_reduce.ndim, 2)
    assert x_reduce[0].shape == (1, 2)
    assert x_reduce[:3].shape == (3, 2)
    assert x_reduce[[0,1]].shape == (2, 2)
    assert x_reduce.ndim == 2
    new_x = copy(x_reduce)
    assert x_reduce[0].shape == new_x[0].shape
    assert x_reduce.conditions == ["A", "B"]
Exemple #9
0
def test_output_bed_loss_resolution_unequal_stepsize(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    # generate loss
    #
    # resolution < stepsize
    inputs = Array("x", numpy.random.random((7, 4, 1, 10)))
    outputs = Array('y', numpy.random.random((7, 4, 1, 4)),
                    conditions=['c1', 'c2', 'c3', 'c4'])

    bwm = get_janggu(inputs, outputs)
    data_path = pkg_resources.resource_filename('janggu',
                                                'resources/10regions.bed')

    gi = GenomicIndexer.create_from_file(data_path,
                                         binsize=200,
                                         stepsize=200)

    # dummy_eval = Scorer('loss', lambda t, p: -t * numpy.log(p),
    #                    exporter=export_bed, export_args={'gindexer': gi})
    dummy_eval = Scorer('loss', lambda t, p: [0.1] * len(t),
                        exporter=ExportBed(gindexer=gi, resolution=50))

    bwm.evaluate(inputs, outputs, callbacks=[dummy_eval])

    file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name,
                         'loss.{}.bed')

    for cond in ['c1', 'c2', 'c3', 'c4']:
        assert os.path.exists(file_.format(cond))

    bed = BedTool(file_.format('c1'))

    nreg = 0
    for reg in bed:
        numpy.testing.assert_equal(float(reg.score), 0.1)
        nreg += 1

    assert nreg == 28, 'There should be 28 regions in the bed file.'
Exemple #10
0
def test_janggu_train_predict_option6(tmpdir):
    """Train, predict and evaluate on dummy data.

    create: YES
    Input args: Dataset
    """
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath

    inputs = Array("x", np.random.random((100, 10)))
    outputs = Array('y', np.random.randint(2, size=(100, 1)),
                    conditions=['random'])

    @inputlayer
    @outputdense('sigmoid')
    def _model(inputs, inp, oup, params):
        return inputs, inputs[0]

    bwm = Janggu.create(_model,
                        inputs=inputs,
                        outputs=outputs,
                        name='nptest')

    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')

    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
    assert not os.path.exists(storage)

    bwm.fit(inputs, outputs, epochs=2, batch_size=32,
            use_multiprocessing=False)

    assert os.path.exists(storage)

    pred = bwm.predict(inputs,
                       use_multiprocessing=False)
    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs))
    np.testing.assert_equal(pred.shape, outputs.shape)
    bwm.evaluate(inputs, outputs,
                 use_multiprocessing=False)
Exemple #11
0
def test_janggu_train_predict_option5(tmpdir):
    """Train, predict and evaluate on dummy data.

    create: NO
    Input args: list(Dataset)
    """

    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    inputs = Array("x", np.random.random((100, 10)))
    outputs = Array('y',
                    np.random.randint(2, size=(100, 1)),
                    conditions=['random'])

    def _model():
        inputs = Input((10, ), name='x')
        output = Dense(1, name='y', activation='sigmoid')(inputs)
        model = Janggu(inputs=inputs, outputs=output, name='test_model')
        model.compile(optimizer='adadelta',
                      loss='binary_crossentropy',
                      metrics=['accuracy'])
        return model

    bwm = _model()

    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)
    assert not os.path.exists(storage)

    bwm.fit([inputs], [outputs],
            epochs=2,
            batch_size=32,
            use_multiprocessing=False)

    assert os.path.exists(storage)

    pred = bwm.predict([inputs], use_multiprocessing=False)
    np.testing.assert_equal(len(pred[:, np.newaxis]), len(inputs))
    np.testing.assert_equal(pred.shape, outputs.shape)
    bwm.evaluate([inputs], [outputs], use_multiprocessing=False)
Exemple #12
0
def test_randomsignalscale():
    x_orig = np.ones((3,1,1,2))

    x_tr = RandomSignalScale(Array('test', x_orig), .1)
    assert x_tr[0].shape == (1, 1, 1, 2)
    assert x_tr[:3].shape == (3, 1, 1, 2)
    assert x_tr[[0,1]].shape == (2, 1, 1, 2)
    np.testing.assert_equal(len(x_tr), 3)
    assert len(x_tr) == 3
    assert x_tr.shape == (3, 1, 1, 2)
    assert x_tr.ndim == 4
    new_x = copy(x_tr)
    assert x_tr[0].shape == new_x[0].shape
    assert x_tr.conditions == None
Exemple #13
0
def test_output_bed_predict_denseout(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    # generate loss
    #
    # resolution < stepsize
    inputs = Array("x", numpy.random.random((7, 10)))
    outputs = Array('y',
                    numpy.random.random((7, 4)),
                    conditions=['c1', 'c2', 'c3', 'c4'])

    bwm = get_janggu(inputs, outputs)
    data_path = pkg_resources.resource_filename('janggu',
                                                'resources/10regions.bed')

    gi = GenomicIndexer.create_from_file(data_path, binsize=200, stepsize=200)

    dummy_eval = Scorer('pred',
                        lambda p: [0.1] * len(p),
                        exporter=ExportBed(gindexer=gi, resolution=200),
                        conditions=['c1', 'c2', 'c3', 'c4'])

    bwm.predict(inputs, callbacks=[dummy_eval])

    file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name,
                         'pred.nptest.y.{}.bed')

    for cond in ['c1', 'c2', 'c3', 'c4']:
        assert os.path.exists(file_.format(cond))

    bed = iter(HTSeq.BED_Reader(file_.format('c1')))

    nreg = 0
    for reg in bed:
        numpy.testing.assert_equal(reg.score, 0.1)
        nreg += 1

    assert nreg == 7, 'There should be 7 regions in the bed file.'
Exemple #14
0
def test_output_score_by_name(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath

    inputs = Array("x", numpy.random.random((100, 10)))
    outputs = Array('y',
                    numpy.random.randint(2, size=(100, 1)),
                    conditions=['random'])

    bwm = get_janggu(inputs, outputs)

    dummy_eval = Scorer('score', lambda y_true, y_pred: 0.15)

    bwm.evaluate(inputs,
                 outputs,
                 callbacks=[
                     'auc', 'prc', 'auprc', 'auroc', 'cor', 'mae', 'mse',
                     'var_explained'
                 ])

    # check correctness of json
    os.path.exists(
        os.path.join(tmpdir.strpath, "evaluation", bwm.name, "auc.png"))
    os.path.exists(
        os.path.join(tmpdir.strpath, "evaluation", bwm.name, "prc.png"))
    os.path.exists(
        os.path.join(tmpdir.strpath, "evaluation", bwm.name, "cor.tsv"))
    os.path.exists(
        os.path.join(tmpdir.strpath, "evaluation", bwm.name, "mae.tsv"))
    os.path.exists(
        os.path.join(tmpdir.strpath, "evaluation", bwm.name, "mse.tsv"))
    os.path.exists(
        os.path.join(tmpdir.strpath, "evaluation", bwm.name,
                     "var_explained.tsv"))
    os.path.exists(
        os.path.join(tmpdir.strpath, "evaluation", bwm.name, "auprc.tsv"))
    os.path.exists(
        os.path.join(tmpdir.strpath, "evaluation", bwm.name, "auroc.tsv"))
Exemple #15
0
def test_output_export_tsne(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    inputs = Array("x", numpy.random.random((100, 10)))
    outputs = Array('y', numpy.random.randint(2, size=(100, 1)),
                    conditions=['random'])

    @inputlayer
    @outputdense('sigmoid')
    def _model(inputs, inp, oup, params):
        with inputs.use('x') as layer:
            outputs = Dense(3, name='hidden')(layer)
        return inputs, outputs

    bwm = Janggu.create(_model,
                        inputs=inputs,
                        outputs=outputs,
                        name='nptest')

    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')

    dummy_eval = Scorer('tsne', exporter=ExportTsne())

    bwm.predict(inputs, layername='hidden',
                callbacks=[dummy_eval])

    dummy_eval = Scorer('tsne', exporter=ExportTsne(fform='eps',
                                                    annot={'annot':[1]*50 + [0]*50},
                                                    figsize=(10, 10)))
    bwm.predict(inputs, layername='hidden',
                callbacks=[dummy_eval])
    # check if plot was produced
    assert os.path.exists(os.path.join(tmpdir.strpath,
                                       "evaluation", bwm.name, 'hidden',
                                       "tsne.png"))
    assert os.path.exists(os.path.join(tmpdir.strpath,
                                       "evaluation", bwm.name, 'hidden',
                                       "tsne.eps"))
Exemple #16
0
def test_sequence_config():
    """Train, predict and evaluate on dummy data.

    create: YES
    Input args: Dataset
    validation_set: YES
    batch_size: None
    """

    inputs = Array("x", np.random.random((100, 10)))
    outputs = Array('y',
                    np.random.randint(2, size=(100, 1)),
                    conditions=['random'])

    jseq = JangguSequence(inputs.data,
                          outputs.data,
                          batch_size=10,
                          as_dict=False)
    assert len(jseq) == 10
    for x, y, _ in jseq:
        assert x[0].shape == (10, 10)
        assert y[0].shape == (10, 1)
        break

    jseq = JangguSequence(inputs, outputs, batch_size=10, as_dict=False)
    assert len(jseq) == 10
    for x, y, _ in jseq:
        assert x[0].shape == (10, 10)
        assert y[0].shape == (10, 1)
        break

    jseq = JangguSequence(inputs, outputs, batch_size=10, as_dict=True)
    assert len(jseq) == 10
    for x, y, _ in jseq:
        assert x['x'].shape == (10, 10)
        assert y['y'].shape == (10, 1)
        break
Exemple #17
0
def test_randomshift():
    x_orig = np.zeros((1,4,1,4))
    x_orig[0, 0, 0,0] = 1
    x_orig[0, 1, 0,1] = 1
    x_orig[0, 2, 0,2] = 1
    x_orig[0, 3, 0,3] = 1

    x_tr = RandomShift(Array('test', x_orig), 1)
    assert x_tr[0].shape == (1, 4, 1, 4)
    np.testing.assert_equal(len(x_tr), 1)
    assert x_tr.shape == (1, 4, 1, 4)
    assert x_tr.ndim == 4
    new_x = copy(x_tr)
    assert x_tr[0].shape == new_x[0].shape
    assert x_tr.conditions == None
    
    x_tr = RandomShift(Array('test', x_orig), 1, True)
    assert x_tr[0].shape == (1, 4, 1, 4)
    np.testing.assert_equal(len(x_tr), 1)
    assert x_tr.shape == (1, 4, 1, 4)
    assert x_tr.ndim == 4
    new_x = copy(x_tr)
    assert x_tr[0].shape == new_x[0].shape
    assert x_tr.conditions == None
Exemple #18
0
def test_output_bigwig_loss_resolution_unequal_stepsize(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    # generate loss
    #
    # resolution < stepsize
    inputs = Array("x", numpy.random.random((7, 4, 1, 10)))
    outputs = Array('y',
                    numpy.random.random((7, 4, 1, 4)),
                    conditions=['c1', 'c2', 'c3', 'c4'])

    bwm = get_janggu(inputs, outputs)
    data_path = pkg_resources.resource_filename('janggu',
                                                'resources/10regions.bed')

    gi = GenomicIndexer.create_from_file(data_path, binsize=200, stepsize=50)

    dummy_eval = Scorer('loss',
                        lambda t, p: [0.2] * len(t),
                        exporter=export_bigwig)

    bwm.evaluate(inputs,
                 outputs,
                 callbacks=[dummy_eval],
                 exporter_kwargs={'gindexer': gi})

    file_ = os.path.join(tmpdir.strpath, 'evaluation', bwm.name,
                         'loss.nptest.y.{}.bigwig')

    for cond in ['c1', 'c2', 'c3', 'c4']:
        assert os.path.exists(file_.format(cond))

    bw = pyBigWig.open(file_.format('c1'))

    co = bw.values('chr1', 600, 2000 - 150)

    numpy.testing.assert_allclose(numpy.mean(co), 0.2, rtol=1e-5)
Exemple #19
0
def test_randomorientation():
    x_orig = np.zeros((3,1,1,2))

    x_tr = RandomOrientation(Array('test', x_orig, conditions=["A", "B"]))
    assert x_tr[0].shape == (1, 1, 1, 2)
    assert x_tr[:3].shape == (3, 1, 1, 2)
    assert x_tr[[0,1]].shape == (2, 1, 1, 2)
    np.testing.assert_equal(len(x_tr), 3)
    assert len(x_tr) == 3
    assert x_tr.shape == (3, 1, 1, 2)
    assert x_tr.ndim == 4
    np.testing.assert_equal(x_tr[0], [[[[0,0]]]])
    new_x = copy(x_tr)
    assert x_tr[0].shape == new_x[0].shape
    assert x_tr.conditions == ["A", "B"]
Exemple #20
0
def test_squeezedim():
    x_orig = np.zeros((3,1,1,2))

    np.testing.assert_equal(x_orig.ndim, 4)
    x_sq = SqueezeDim(Array('test', x_orig, conditions=["A", "B"]))

    np.testing.assert_equal(len(x_sq), 3)

    np.testing.assert_equal(x_sq.shape, (3,2))

    np.testing.assert_equal(x_sq.ndim, 2)
    assert x_sq[0].shape == (2,)
    assert x_sq[:3].shape == (3, 2)
    assert x_sq[[0,1]].shape == (2, 2)
    assert x_sq.ndim == 2
    new_x = copy(x_sq)
    assert x_sq[0].shape == new_x[0].shape
    assert x_sq.conditions == ["A", "B"]
Exemple #21
0
def test_input_dims():
    data = Array('testa', numpy.zeros((10, 10, 1)))
    xin = Input((10, 1), name='testy')
    out = Dense(1)(xin)
    m = Model(xin, out)

    # False due to mismatch of names
    assert not _dimension_match(m, data, 'input_layers')

    xin = Input((20, 10, 1), name='testa')
    out = Dense(1)(xin)
    m = Model(xin, out)

    # False due to mismatch of dims
    assert not _dimension_match(m, data, 'input_layers')
    # more input datasets supplied than inputs to models
    assert not _dimension_match(m, [data, data], 'input_layers')

    xin = Input((10, 1), name='testa')
    out = Dense(1)(xin)
    m = Model(xin, out)

    # False due to mismatch of dims
    assert _dimension_match(m, data, 'input_layers')
Exemple #22
0
# load the dataset
DATA_PATH = pkg_resources.resource_filename('janggu', 'resources/')
SAMPLE_1 = os.path.join(DATA_PATH, 'sample.fa')
SAMPLE_2 = os.path.join(DATA_PATH, 'sample2.fa')

# DNA sequences in one-hot encoding will be used as input
DNA = Bioseq.create_from_seq('dna',
                             fastafile=[SAMPLE_1, SAMPLE_2],
                             order=args.order,
                             cache=True)

# An array of 1/0 will be used as labels for training
Y = np.asarray([[1] for line in range(nseqs(SAMPLE_1))] +
               [[0] for line in range(nseqs(SAMPLE_2))])
LABELS = Array('y', Y, conditions=['TF-binding'])
annot = pd.DataFrame(Y[:], columns=LABELS.conditions).applymap(
    lambda x: 'Oct4' if x == 1 else 'Mafk').to_dict(orient='list')

# Define the model templates


@inputlayer
@outputdense('sigmoid')
def single_stranded_model(inputs, inp, oup, params):
    """ keras model that scans a DNA sequence using
    a number of motifs.

    This model only scans one strand for sequence patterns.
    """
    with inputs.use('dna') as layer:
Exemple #23
0
def test_janggu_instance_dense(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    """Test Janggu creation by shape and name. """
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
    bed_file = os.path.join(data_path, 'sample.bed')

    csvfile = os.path.join(data_path, 'sample.csv')

    refgenome = os.path.join(data_path, 'sample_genome.fa')

    dna = Bioseq.create_from_refgenome('dna',
                                       refgenome=refgenome,
                                       storage='ndarray',
                                       roi=bed_file,
                                       order=1)

    df = pd.read_csv(csvfile, header=None)
    ctcf = Array('ctcf', df.values, conditions=['peaks'])

    @inputlayer
    @outputdense('sigmoid')
    def _cnn_model(inputs, inp, oup, params):
        layer = inputs['.']
        layer = Complement()(layer)
        layer = Reverse()(layer)
        layer = Flatten()(layer)
        output = Dense(params[0])(layer)
        return inputs, output

    with pytest.raises(Exception):
        # due to No input name . defined
        bwm = Janggu.create(_cnn_model,
                            modelparams=(2, ),
                            inputs=dna,
                            outputs=ctcf,
                            name='dna_ctcf_HepG2-cnn')

    @inputlayer
    @outputdense('sigmoid')
    def _cnn_model(inputs, inp, oup, params):
        layer = inputs[list()]
        layer = Complement()(layer)
        layer = Reverse()(layer)
        layer = Flatten()(layer)
        output = Dense(params[0])(layer)
        return inputs, output

    with pytest.raises(Exception):
        # due to Wrong type for indexing
        bwm = Janggu.create(_cnn_model,
                            modelparams=(2, ),
                            inputs=dna,
                            outputs=ctcf,
                            name='dna_ctcf_HepG2-cnn')

    @inputlayer
    @outputdense('sigmoid')
    def _cnn_model(inputs, inp, oup, params):
        layer = inputs()[0]
        layer = Complement()(layer)
        layer = Reverse()(layer)
        layer = Flatten()(layer)
        output = Dense(params[0])(layer)
        return inputs, output

    with pytest.raises(Exception):
        # name with must be string
        bwm = Janggu.create(_cnn_model,
                            modelparams=(2, ),
                            inputs=dna,
                            outputs=ctcf,
                            name=12342134)

    # test with given model name
    bwm = Janggu.create(_cnn_model,
                        modelparams=(2, ),
                        inputs=dna,
                        outputs=ctcf,
                        name='dna_ctcf_HepG2-cnn')
    # test with auto. generated modelname.
    bwm = Janggu.create(_cnn_model,
                        modelparams=(2, ),
                        inputs=dna,
                        outputs=ctcf,
                        name='dna_ctcf_HepG2-cnn')

    @inputlayer
    @outputdense('sigmoid')
    def _cnn_model(inputs, inp, oup, params):
        layer = inputs[0]
        layer = Complement()(layer)
        layer = Reverse()(layer)
        layer = Flatten()(layer)
        output = Dense(params[0])(layer)
        return inputs, output

    bwm = Janggu.create(_cnn_model,
                        modelparams=(2, ),
                        inputs=dna,
                        outputs=ctcf,
                        name='dna_ctcf_HepG2-cnn')

    @inputlayer
    @outputdense('sigmoid')
    def _cnn_model(inputs, inp, oup, params):
        layer = inputs['dna']
        layer = Complement()(layer)
        layer = Reverse()(layer)
        layer = Flatten()(layer)
        output = Dense(params[0])(layer)
        return inputs, output

    bwm = Janggu.create(_cnn_model,
                        modelparams=(2, ),
                        inputs=dna,
                        outputs=ctcf,
                        name='dna_ctcf_HepG2-cnn')
    kbwm2 = model_from_json(bwm.kerasmodel.to_json())
    kbwm3 = model_from_yaml(bwm.kerasmodel.to_yaml())

    bwm.compile(optimizer='adadelta', loss='binary_crossentropy')
    storage = bwm._storage_path(bwm.name, outputdir=tmpdir.strpath)

    bwm.save()
    bwm.summary()

    assert os.path.exists(storage)

    Janggu.create_by_name('dna_ctcf_HepG2-cnn')