Ejemplo n.º 1
0
 def double_stranded_model_dnaconv(inputs, inp, oup, params):
     with inputs.use('dna') as layer:
         layer = DnaConv2D(Conv2D(params[0], (params[1], 1),
                                  activation=params[2]))(layer)
     output = LocalAveragePooling2D(window_size=layer.shape.as_list()[1],
                                    name='motif')(layer)
     return inputs, output
Ejemplo n.º 2
0
 def _cnn_model1(inputs, inp, oup, params):
     with inputs.use('dna') as inlayer:
         layer = inlayer
         layer = DnaConv2D(Conv2D(5, (3, 1), name='fconv1'),
                           merge_mode='max',
                           name='bothstrands')(layer)
     return inputs, layer
Ejemplo n.º 3
0
def test_dnaconv():
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
    bed_file = os.path.join(data_path, 'sample.bed')

    refgenome = os.path.join(data_path, 'sample_genome.fa')

    dna = Bioseq.create_from_refgenome('dna',
                                       refgenome=refgenome,
                                       storage='ndarray',
                                       roi=bed_file,
                                       order=1)

    xin = Input(dna.shape[1:])
    l1 = DnaConv2D(Conv2D(30, (21, 1), activation='relu'))(xin)
    m1 = Model(xin, l1)
    res1 = m1.predict(dna[0])[0, 0, 0, :]

    clayer = m1.layers[1].forward_layer
    # forward only
    l1 = clayer(xin)
    m2 = Model(xin, l1)
    res2 = m2.predict(dna[0])[0, 0, 0, :]

    rxin = Reverse()(Complement()(xin))
    l1 = clayer(rxin)
    l1 = Reverse()(l1)
    m3 = Model(xin, l1)
    res3 = m3.predict(dna[0])[0, 0, 0, :]

    res4 = np.maximum(res3, res2)
    np.testing.assert_allclose(res1, res4, rtol=1e-4)
Ejemplo n.º 4
0
def test_dnaconv2():
    # this checks if DnaConv2D layer is instantiated correctly if
    # the conv2d layer has been instantiated beforehand.
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
    bed_file = os.path.join(data_path, 'sample.bed')

    refgenome = os.path.join(data_path, 'sample_genome.fa')

    dna = Bioseq.create_from_refgenome('dna',
                                       refgenome=refgenome,
                                       storage='ndarray',
                                       roi=bed_file,
                                       order=1)

    xin = Input(dna.shape[1:])
    clayer = Conv2D(30, (21, 1), activation='relu')

    clayer(xin)

    l1 = DnaConv2D(clayer)(xin)
    m1 = Model(xin, l1)
    res1 = m1.predict(dna[0])[0, 0, 0, :]

    np.testing.assert_allclose(clayer.get_weights()[0],
                               m1.layers[1].forward_layer.get_weights()[0])
    assert len(clayer.weights) == 2
Ejemplo n.º 5
0
def double_stranded_model_dnaconv(inputs, inp, oup, params):
    with inputs.use('dna') as layer:

        conv = DnaConv2D(Conv2D(params[0], (params[1], 1),
                                activation=params[2]),
                         name='conv1')(layer)

    output = GlobalAveragePooling2D(name='motif')(conv)
    return inputs, output
Ejemplo n.º 6
0
def double_stranded_model_dnaconv(inputs, inp, oup, params):
    """ keras model for scanning both DNA strands.

    A more elegant way of scanning both strands for motif occurrences
    is achieved by the DnaConv2D layer wrapper, which internally
    performs the convolution operation with the normal kernel weights
    and the reverse complemented weights.
    """
    with inputs.use('dna') as layer:
        # the name in inputs.use() should be the same as the dataset name.
        layer = DnaConv2D(
            Conv2D(params[0], (params[1], 1), activation=params[2]))(layer)
    output = GlobalAveragePooling2D(name='motif')(layer)
    return inputs, output
Ejemplo n.º 7
0
def dna_model(inputs, inp, oup, params):
    with inputs.use('dna') as dna_in:
        layer = dna_in

    if params['seq_dropout'] > 0.0:
        layer = Dropout(params['seq_dropout'])(layer)

    cl = Conv2D(params['nmotifs1'], (params['motiflen'], 1), activation='relu')
    if params['stranded'] == 'double':
        layer = DnaConv2D(cl)(layer)
    else:
        layer = cl(layer)

    layer = MaxPooling2D((params['pool1'], 1))(layer)
    layer = BatchNormalization()(layer)
    layer = Conv2D(params['nmotifs2'], (params['hypermotiflen'], 1), activation='relu')(layer)
    return inputs, layer
Ejemplo n.º 8
0
def dna_model_(inputs, inp, oup, params):
    with inputs.use('dna') as dna_in:
        layer = dna_in
    layer = Dropout(params['seq_dropout'], name='dna_dropout_1')(layer)
    cl = Conv2D(params['nmotifs1'], (params['motiflen'], 1),
                activation='relu',
                name='dna_conv2d_1')
    if params['stranded'] == 'double':
        layer = DnaConv2D(cl, name='dna_dnaconv2d_2')(layer)
    else:
        layer = cl(layer)
    layer = MaxPooling2D((params['pool1'], 1), name='dna_maxpooling1')(layer)
    layer = BatchNormalization(name='dna_batchnorm_1')(layer)
    layer = Conv2D(params['nmotifs2'], (params['hypermotiflen'], 1),
                   activation='relu',
                   name='dna_conv2d_2')(layer)
    layer = GlobalMaxPooling2D(name='global_max_pooling')(layer)
    layer = BatchNormalization(name='dna_batchnorm_2')(layer)
    return inputs, layer
Ejemplo n.º 9
0
# Training input and labels are purely defined genomic coordinates
DNA = Bioseq.create_from_refgenome('dna',
                                   refgenome=REFGENOME,
                                   roi=ROI_TRAIN,
                                   binsize=200)

LABELS = Cover.create_from_bed('peaks',
                               roi=ROI_TRAIN,
                               bedfiles=PEAK_FILE,
                               binsize=200,
                               resolution=None)

# define a keras model here

xin = Input((200, 1, 4))
layer = DnaConv2D(Conv2D(30, (21, 1), activation='relu'))(xin)
layer = GlobalAveragePooling2D()(layer)
layer = Dense(1, activation='sigmoid')(layer)

# the last one is used to make the dimensionality compatible with
# the coverage dataset dimensions.
# Alternatively, the ReduceDim dataset wrapper may be used to transform
# the output to a 2D dataset object.
output = Reshape((1, 1, 1))(layer)

model = Model(xin, output)

model.compile(optimizer='adadelta',
              loss='binary_crossentropy',
              metrics=['acc'])
model.summary()
Ejemplo n.º 10
0
                                   roi=ROI_TRAIN,
                                   binsize=200)

LABELS = Cover.create_from_bed('peaks',
                               roi=ROI_TRAIN,
                               bedfiles=PEAK_FILE,
                               binsize=200,
                               resolution=None)

# define a keras model here

xin = Input((200, 1, 4), name="dna")
convl = Conv2D(30, (21, 1), activation='relu')

if args.model == 'double':
    layer = DnaConv2D(convl)(xin)
else:
    layer = convl(xin)

layer = GlobalAveragePooling2D()(layer)
layer = Dense(1, activation='sigmoid')(layer)

# the last one is used to make the dimensionality compatible with
# the coverage dataset dimensions.
# Alternatively, the ReduceDim dataset wrapper may be used to transform
# the output to a 2D dataset object.
output = Reshape((1, 1, 1), name="peaks")(layer)

model = Model(xin, output)

model.compile(optimizer='adadelta',