Ejemplo n.º 1
0
 def _cnn_model2(inputs, inp, oup, params):
     with inputs.use('dna') as inlayer:
         layer = inlayer
         conv = Conv2D(5, (3, 1), name='singlestrand')
         fl = conv(layer)
         rl = Reverse()(conv(Complement()(Reverse()(inlayer))))
     return inputs, [fl, rl]
Ejemplo n.º 2
0
def double_stranded_model(inputs, inp, oup, params):
    """ keras model for scanning both DNA strands.

    Sequence patterns may be present on either strand.
    By scanning both DNA strands with the same motifs (kernels)
    the performance of the model will generally improve.

    In the model below, this is achieved by reverse complementing
    the input tensor and keeping the convolution filters fixed.
    """
    with inputs.use('dna') as layer:
        # the name in inputs.use() should be the same as the dataset name.
        forward = layer
    convlayer = Conv2D(params[0], (params[1], 1),
                       activation=params[2])
    revcomp = Reverse()(forward)
    revcomp = Complement()(revcomp)

    forward = convlayer(forward)
    revcomp = convlayer(revcomp)
    revcomp = Reverse()(revcomp)
    layer = Maximum()([forward, revcomp])
    output = LocalAveragePooling2D(window_size=layer.shape.as_list()[1],
                                   name='motif')(layer)
    return inputs, output
Ejemplo n.º 3
0
def test_dnaconv():
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
    bed_file = os.path.join(data_path, 'sample.bed')

    refgenome = os.path.join(data_path, 'sample_genome.fa')

    dna = Bioseq.create_from_refgenome('dna',
                                       refgenome=refgenome,
                                       storage='ndarray',
                                       roi=bed_file,
                                       order=1)

    xin = Input(dna.shape[1:])
    l1 = DnaConv2D(Conv2D(30, (21, 1), activation='relu'))(xin)
    m1 = Model(xin, l1)
    res1 = m1.predict(dna[0])[0, 0, 0, :]

    clayer = m1.layers[1].forward_layer
    # forward only
    l1 = clayer(xin)
    m2 = Model(xin, l1)
    res2 = m2.predict(dna[0])[0, 0, 0, :]

    rxin = Reverse()(Complement()(xin))
    l1 = clayer(rxin)
    l1 = Reverse()(l1)
    m3 = Model(xin, l1)
    res3 = m3.predict(dna[0])[0, 0, 0, :]

    res4 = np.maximum(res3, res2)
    np.testing.assert_allclose(res1, res4, rtol=1e-4)
Ejemplo n.º 4
0
def double_stranded_model(inputs, inp, oup, params):
    with inputs.use('dna') as layer:
        forward = layer
    convlayer = Conv2D(params[0], (params[1], 1), activation=params[2])
    revcomp = Reverse()(forward)
    revcomp = Complement()(revcomp)

    forward = convlayer(forward)
    revcomp = convlayer(revcomp)
    revcomp = Reverse()(revcomp)
    layer = Maximum()([forward, revcomp])
    output = GlobalAveragePooling2D(name='motif')(layer)
    return inputs, output
Ejemplo n.º 5
0
 def _cnn_model(inputs, inp, oup, params):
     layer = inputs['dna']
     layer = Complement()(layer)
     layer = Reverse()(layer)
     layer = Flatten()(layer)
     output = Dense(params[0])(layer)
     return inputs, output
Ejemplo n.º 6
0
def reverse_layer(order):
    data_path = pkg_resources.resource_filename('janggu', 'resources/')

    bed_file = os.path.join(data_path, 'sample.bed')

    refgenome = os.path.join(data_path, 'sample_genome.fa')

    data = Bioseq.create_from_refgenome('train', refgenome=refgenome,
                                     roi=bed_file,
                                     storage='ndarray',
                                     binsize=binsize,
                                     flank=flank,
                                     order=order)

    dna_in = Input(shape=data.shape[1:], name='dna')
    rdna_layer = Reverse()(dna_in)

    rmod = Model(dna_in, rdna_layer)

    # actual shape of DNA
    dna = data[0]
    np.testing.assert_equal(dna[:, ::-1, :, :], rmod.predict(dna))
Ejemplo n.º 7
0
 def _cnn_model(inputs, inp, oup, params):
     with inputs.use('dna') as inlayer:
         layer = inlayer
     layer = Complement()(layer)
     layer = Reverse()(layer)
     return inputs, layer
Ejemplo n.º 8
0
                                   binsize=200)

LABELS = Cover.create_from_bed('peaks',
                               roi=ROI_TRAIN,
                               bedfiles=PEAK_FILE,
                               binsize=200,
                               resolution=None)

# define a keras model here

xin = Input((200, 1, 4), name="dna")
convl = Conv2D(30, (21, 1), activation='relu')

if args.model == 'double':
    forward = convl(xin)
    reverse = convl(Complement()(Reverse()(xin)))
    layer = Maximum()([forward, reverse])
else:
    layer = convl(xin)

layer = GlobalAveragePooling2D()(layer)
layer = Dense(1, activation='sigmoid')(layer)

# the last one is used to make the dimensionality compatible with
# the coverage dataset dimensions.
# Alternatively, the ReduceDim dataset wrapper may be used to transform
# the output to a 2D dataset object.
output = Reshape((1, 1, 1), name="peaks")(layer)

model = Model(xin, output)