def _cnn_model2(inputs, inp, oup, params): with inputs.use('dna') as inlayer: layer = inlayer conv = Conv2D(5, (3, 1), name='singlestrand') fl = conv(layer) rl = Reverse()(conv(Complement()(Reverse()(inlayer)))) return inputs, [fl, rl]
def double_stranded_model(inputs, inp, oup, params): """ keras model for scanning both DNA strands. Sequence patterns may be present on either strand. By scanning both DNA strands with the same motifs (kernels) the performance of the model will generally improve. In the model below, this is achieved by reverse complementing the input tensor and keeping the convolution filters fixed. """ with inputs.use('dna') as layer: # the name in inputs.use() should be the same as the dataset name. forward = layer convlayer = Conv2D(params[0], (params[1], 1), activation=params[2]) revcomp = Reverse()(forward) revcomp = Complement()(revcomp) forward = convlayer(forward) revcomp = convlayer(revcomp) revcomp = Reverse()(revcomp) layer = Maximum()([forward, revcomp]) output = LocalAveragePooling2D(window_size=layer.shape.as_list()[1], name='motif')(layer) return inputs, output
def test_dnaconv(): data_path = pkg_resources.resource_filename('janggu', 'resources/') bed_file = os.path.join(data_path, 'sample.bed') refgenome = os.path.join(data_path, 'sample_genome.fa') dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome, storage='ndarray', roi=bed_file, order=1) xin = Input(dna.shape[1:]) l1 = DnaConv2D(Conv2D(30, (21, 1), activation='relu'))(xin) m1 = Model(xin, l1) res1 = m1.predict(dna[0])[0, 0, 0, :] clayer = m1.layers[1].forward_layer # forward only l1 = clayer(xin) m2 = Model(xin, l1) res2 = m2.predict(dna[0])[0, 0, 0, :] rxin = Reverse()(Complement()(xin)) l1 = clayer(rxin) l1 = Reverse()(l1) m3 = Model(xin, l1) res3 = m3.predict(dna[0])[0, 0, 0, :] res4 = np.maximum(res3, res2) np.testing.assert_allclose(res1, res4, rtol=1e-4)
def double_stranded_model(inputs, inp, oup, params): with inputs.use('dna') as layer: forward = layer convlayer = Conv2D(params[0], (params[1], 1), activation=params[2]) revcomp = Reverse()(forward) revcomp = Complement()(revcomp) forward = convlayer(forward) revcomp = convlayer(revcomp) revcomp = Reverse()(revcomp) layer = Maximum()([forward, revcomp]) output = GlobalAveragePooling2D(name='motif')(layer) return inputs, output
def _cnn_model(inputs, inp, oup, params): layer = inputs['dna'] layer = Complement()(layer) layer = Reverse()(layer) layer = Flatten()(layer) output = Dense(params[0])(layer) return inputs, output
def reverse_layer(order): data_path = pkg_resources.resource_filename('janggu', 'resources/') bed_file = os.path.join(data_path, 'sample.bed') refgenome = os.path.join(data_path, 'sample_genome.fa') data = Bioseq.create_from_refgenome('train', refgenome=refgenome, roi=bed_file, storage='ndarray', binsize=binsize, flank=flank, order=order) dna_in = Input(shape=data.shape[1:], name='dna') rdna_layer = Reverse()(dna_in) rmod = Model(dna_in, rdna_layer) # actual shape of DNA dna = data[0] np.testing.assert_equal(dna[:, ::-1, :, :], rmod.predict(dna))
def _cnn_model(inputs, inp, oup, params): with inputs.use('dna') as inlayer: layer = inlayer layer = Complement()(layer) layer = Reverse()(layer) return inputs, layer
binsize=200) LABELS = Cover.create_from_bed('peaks', roi=ROI_TRAIN, bedfiles=PEAK_FILE, binsize=200, resolution=None) # define a keras model here xin = Input((200, 1, 4), name="dna") convl = Conv2D(30, (21, 1), activation='relu') if args.model == 'double': forward = convl(xin) reverse = convl(Complement()(Reverse()(xin))) layer = Maximum()([forward, reverse]) else: layer = convl(xin) layer = GlobalAveragePooling2D()(layer) layer = Dense(1, activation='sigmoid')(layer) # the last one is used to make the dimensionality compatible with # the coverage dataset dimensions. # Alternatively, the ReduceDim dataset wrapper may be used to transform # the output to a 2D dataset object. output = Reshape((1, 1, 1), name="peaks")(layer) model = Model(xin, output)