def __init__( self, target_name, # "{task}/profile" net, # function that takes a keras tensor and returns a keras tensor activation=None, loss='mse', loss_weight=1, metric=RegressionMetrics(), postproc_fn=None, # bias input use_bias=False, bias_net=None, bias_input='bias/{task}/profile', bias_shape=(None, 1), ): self.net = net self.loss = loss self.loss_weight = loss_weight self.metric = metric self.postproc_fn = postproc_fn self.target_name = target_name self.activation = activation self.bias_input = bias_input self.bias_net = bias_net self.use_bias = use_bias self.bias_shape = bias_shape
def __init__( self, target_name, # "{task}/scalar" net, # function that takes a keras tensor and returns a keras tensor activation=None, loss='mse', loss_weight=1, metric=RegressionMetrics(), postproc_fn=None, # post-processing to apply so that we are in the right scale # bias input use_bias=False, bias_net=None, bias_input='bias/{task}/scalar', bias_shape=(1, ), ): self.net = net self.loss = loss self.loss_weight = loss_weight self.metric = metric self.postproc_fn = postproc_fn self.target_name = target_name self.activation = activation self.bias_input = bias_input self.bias_net = bias_net self.use_bias = use_bias self.bias_shape = bias_shape
def test_interpret_wo_bias(): from bpnet.metrics import RegressionMetrics, ClassificationMetrics, PeakPredictionProfileMetric from concise.preprocessing import encodeDNA # test the model seqs = encodeDNA(['ACAGA'] * 100) inputs = {"seq": seqs, "bias/a/profile": np.random.randn(100, 5, 2)} # Let's use regression targets = { "a/class": np.random.randint(low=0, high=2, size=(100, 1)).astype(float), "a/counts": 1 + np.ceil(np.abs(np.random.randn(100))), "a/profile": 1 + np.ceil(np.abs(np.random.randn(100, 5, 2))), } import keras.backend as K # K.clear_session() # use bias m = SeqModel( body=BaseNet('relu'), heads=[ BinaryClassificationHead('{task}/class', net=TopDense(pool_size=2), use_bias=False), ScalarHead('{task}/counts', loss='mse', metric=RegressionMetrics(), net=TopDense(pool_size=2), use_bias=False), ProfileHead( '{task}/profile', loss='mse', metric=PeakPredictionProfileMetric(neg_max_threshold=0.05, required_min_pos_counts=0), net=TopConv(n_output=2), use_bias=True, bias_shape=(5, 2) ), # NOTE: the shape currently has to be hard-coded to the sequence length ], tasks=['a']) m.model.fit(inputs, targets) o = m.contrib_score_all(seqs) assert 'a/profile/wn' in o assert o['a/profile/wn'].shape == seqs.shape assert 'a/profile/wn' in o assert o['a/profile/wn'].shape == seqs.shape # evaluate the dataset -> setup an array dataset (NumpyDataset) -> convert to from bpnet.data import NumpyDataset ds = NumpyDataset({"inputs": inputs, "targets": targets}) o = m.evaluate(ds) assert 'avg/counts/mad' in o
def bpnet_model(tasks, filters, n_dil_layers, conv1_kernel_size, tconv_kernel_size, b_loss_weight=1, c_loss_weight=1, p_loss_weight=1, c_splines=0, b_splines=20, merge_profile_reg=False, lr=0.004, tracks_per_task=2, padding='same', batchnorm=False, use_bias=False, n_bias_tracks=2, profile_metric=None, count_metric=None, profile_bias_window_sizes=[1, 50], seqlen=None, skip_type='residual'): """Setup the BPNet model architecture Args: tasks: list of tasks filters: number of convolutional filters to use at each layer n_dil_layers: number of dilated convolutional filters to use conv1_kernel_size: kernel_size of the first convolutional layer tconv_kernel_size: kernel_size of the transpose/de-convolutional final layer b_loss_weight: binary classification weight c_loss_weight: total count regression weight p_loss_weight: profile regression weight c_splines: number of splines to use in the binary classification output head p_splines: number of splines to use in the profile regression output head (0=None) merge_profile_reg: if True, total count and profile prediction will be part of a single profile output head lr: learning rate of the Adam optimizer padding: padding in the convolutional layers batchnorm: if True, add Batchnorm after every layer. Note: this may mess up the DeepLIFT contribution scores downstream use_bias: if True, correct for the bias n_bias_tracks: how many bias tracks to expect (for both total count and profile regression) seqlen: sequence length. skip_type: skip connection type ('residual' or 'dense') Returns: bpnet.seqmodel.SeqModel """ from bpnet.seqmodel import SeqModel from bpnet.layers import DilatedConv1D, DeConv1D, GlobalAvgPoolFCN, MovingAverages from bpnet.metrics import BPNetMetricSingleProfile, default_peak_pred_metric from bpnet.heads import ScalarHead, ProfileHead from bpnet.metrics import ClassificationMetrics, RegressionMetrics from bpnet.losses import multinomial_nll, CountsMultinomialNLL import bpnet.losses as bloss from bpnet.activations import clipped_exp from bpnet.functions import softmax assert p_loss_weight >= 0 assert c_loss_weight >= 0 assert b_loss_weight >= 0 # import ipdb # ipdb.set_trace() # TODO is it possible to re-instantiate the class to get rid of gin train? if profile_metric is None: print("Using the default profile prediction metric") profile_metric = default_peak_pred_metric if count_metric is None: print("Using the default regression prediction metrics") count_metric = RegressionMetrics() # Heads ------------------------------------------------- heads = [] # Profile prediction if p_loss_weight > 0: if not merge_profile_reg: heads.append(ProfileHead(target_name='{task}/profile', net=DeConv1D(n_tasks=tracks_per_task, filters=filters, tconv_kernel_size=tconv_kernel_size, padding=padding, n_hidden=0, batchnorm=batchnorm ), loss=multinomial_nll, loss_weight=p_loss_weight, postproc_fn=softmax, use_bias=use_bias, bias_input='bias/{task}/profile', bias_shape=(None, n_bias_tracks), bias_net=MovingAverages(window_sizes=profile_bias_window_sizes), metric=profile_metric )) else: heads.append(ProfileHead(target_name='{task}/profile', net=DeConv1D(n_tasks=tracks_per_task, filters=filters, tconv_kernel_size=tconv_kernel_size, padding=padding, n_hidden=1, # use 1 hidden layer in that case batchnorm=batchnorm ), activation=clipped_exp, loss=CountsMultinomialNLL(c_task_weight=c_loss_weight), loss_weight=p_loss_weight, bias_input='bias/{task}/profile', use_bias=use_bias, bias_shape=(None, n_bias_tracks), bias_net=MovingAverages(window_sizes=profile_bias_window_sizes), metric=BPNetMetricSingleProfile(count_metric=count_metric, profile_metric=profile_metric) )) c_loss_weight = 0 # don't need to use the other count loss # Count regression if c_loss_weight > 0: heads.append(ScalarHead(target_name='{task}/counts', net=GlobalAvgPoolFCN(n_tasks=tracks_per_task, n_splines=c_splines, batchnorm=batchnorm), activation=None, loss='mse', loss_weight=c_loss_weight, bias_input='bias/{task}/counts', use_bias=use_bias, bias_shape=(n_bias_tracks, ), metric=count_metric, )) # Binary classification if b_loss_weight > 0: heads.append(ScalarHead(target_name='{task}/class', net=GlobalAvgPoolFCN(n_tasks=1, n_splines=b_splines, batchnorm=batchnorm), activation='sigmoid', loss='binary_crossentropy', loss_weight=b_loss_weight, metric=ClassificationMetrics(), )) # ------------------------------------------------- m = SeqModel( body=DilatedConv1D(filters=filters, conv1_kernel_size=conv1_kernel_size, n_dil_layers=n_dil_layers, padding=padding, batchnorm=batchnorm, skip_type=skip_type), heads=heads, tasks=tasks, optimizer=Adam(lr=lr), seqlen=seqlen, ) return m