Exemple #1
0
 def __init__(
         self,
         target_name,  # "{task}/profile"
         net,  # function that takes a keras tensor and returns a keras tensor
         activation=None,
         loss='mse',
         loss_weight=1,
         metric=RegressionMetrics(),
         postproc_fn=None,
         # bias input
         use_bias=False,
         bias_net=None,
         bias_input='bias/{task}/profile',
         bias_shape=(None, 1),
 ):
     self.net = net
     self.loss = loss
     self.loss_weight = loss_weight
     self.metric = metric
     self.postproc_fn = postproc_fn
     self.target_name = target_name
     self.activation = activation
     self.bias_input = bias_input
     self.bias_net = bias_net
     self.use_bias = use_bias
     self.bias_shape = bias_shape
Exemple #2
0
 def __init__(
         self,
         target_name,  # "{task}/scalar"
         net,  # function that takes a keras tensor and returns a keras tensor
         activation=None,
         loss='mse',
         loss_weight=1,
         metric=RegressionMetrics(),
         postproc_fn=None,  # post-processing to apply so that we are in the right scale
         # bias input
     use_bias=False,
         bias_net=None,
         bias_input='bias/{task}/scalar',
         bias_shape=(1, ),
 ):
     self.net = net
     self.loss = loss
     self.loss_weight = loss_weight
     self.metric = metric
     self.postproc_fn = postproc_fn
     self.target_name = target_name
     self.activation = activation
     self.bias_input = bias_input
     self.bias_net = bias_net
     self.use_bias = use_bias
     self.bias_shape = bias_shape
Exemple #3
0
def test_interpret_wo_bias():
    from bpnet.metrics import RegressionMetrics, ClassificationMetrics, PeakPredictionProfileMetric
    from concise.preprocessing import encodeDNA
    # test the model
    seqs = encodeDNA(['ACAGA'] * 100)

    inputs = {"seq": seqs, "bias/a/profile": np.random.randn(100, 5, 2)}

    # Let's use regression
    targets = {
        "a/class": np.random.randint(low=0, high=2,
                                     size=(100, 1)).astype(float),
        "a/counts": 1 + np.ceil(np.abs(np.random.randn(100))),
        "a/profile": 1 + np.ceil(np.abs(np.random.randn(100, 5, 2))),
    }

    import keras.backend as K
    # K.clear_session()
    # use bias
    m = SeqModel(
        body=BaseNet('relu'),
        heads=[
            BinaryClassificationHead('{task}/class',
                                     net=TopDense(pool_size=2),
                                     use_bias=False),
            ScalarHead('{task}/counts',
                       loss='mse',
                       metric=RegressionMetrics(),
                       net=TopDense(pool_size=2),
                       use_bias=False),
            ProfileHead(
                '{task}/profile',
                loss='mse',
                metric=PeakPredictionProfileMetric(neg_max_threshold=0.05,
                                                   required_min_pos_counts=0),
                net=TopConv(n_output=2),
                use_bias=True,
                bias_shape=(5, 2)
            ),  # NOTE: the shape currently has to be hard-coded to the sequence length
        ],
        tasks=['a'])
    m.model.fit(inputs, targets)

    o = m.contrib_score_all(seqs)
    assert 'a/profile/wn' in o
    assert o['a/profile/wn'].shape == seqs.shape
    assert 'a/profile/wn' in o
    assert o['a/profile/wn'].shape == seqs.shape

    # evaluate the dataset -> setup an array dataset (NumpyDataset) -> convert to
    from bpnet.data import NumpyDataset
    ds = NumpyDataset({"inputs": inputs, "targets": targets})
    o = m.evaluate(ds)
    assert 'avg/counts/mad' in o
Exemple #4
0
def bpnet_model(tasks,
                filters,
                n_dil_layers,
                conv1_kernel_size,
                tconv_kernel_size,
                b_loss_weight=1,
                c_loss_weight=1,
                p_loss_weight=1,
                c_splines=0,
                b_splines=20,
                merge_profile_reg=False,
                lr=0.004,
                tracks_per_task=2,
                padding='same',
                batchnorm=False,
                use_bias=False,
                n_bias_tracks=2,
                profile_metric=None,
                count_metric=None,
                profile_bias_window_sizes=[1, 50],
                seqlen=None,
                skip_type='residual'):
    """Setup the BPNet model architecture

    Args:
      tasks: list of tasks
      filters: number of convolutional filters to use at each layer
      n_dil_layers: number of dilated convolutional filters to use
      conv1_kernel_size: kernel_size of the first convolutional layer
      tconv_kernel_size: kernel_size of the transpose/de-convolutional final layer
      b_loss_weight: binary classification weight
      c_loss_weight: total count regression weight
      p_loss_weight: profile regression weight
      c_splines: number of splines to use in the binary classification output head
      p_splines: number of splines to use in the profile regression output head (0=None)
      merge_profile_reg: if True, total count and profile prediction will be part of
        a single profile output head
      lr: learning rate of the Adam optimizer
      padding: padding in the convolutional layers
      batchnorm: if True, add Batchnorm after every layer. Note: this may mess up the
        DeepLIFT contribution scores downstream
      use_bias: if True, correct for the bias
      n_bias_tracks: how many bias tracks to expect (for both total count and profile regression)
      seqlen: sequence length.
      skip_type: skip connection type ('residual' or 'dense')

    Returns:
      bpnet.seqmodel.SeqModel
    """
    from bpnet.seqmodel import SeqModel
    from bpnet.layers import DilatedConv1D, DeConv1D, GlobalAvgPoolFCN, MovingAverages
    from bpnet.metrics import BPNetMetricSingleProfile, default_peak_pred_metric
    from bpnet.heads import ScalarHead, ProfileHead
    from bpnet.metrics import ClassificationMetrics, RegressionMetrics
    from bpnet.losses import multinomial_nll, CountsMultinomialNLL
    import bpnet.losses as bloss
    from bpnet.activations import clipped_exp
    from bpnet.functions import softmax

    assert p_loss_weight >= 0
    assert c_loss_weight >= 0
    assert b_loss_weight >= 0

    # import ipdb
    # ipdb.set_trace()

    # TODO is it possible to re-instantiate the class to get rid of gin train?

    if profile_metric is None:
        print("Using the default profile prediction metric")
        profile_metric = default_peak_pred_metric

    if count_metric is None:
        print("Using the default regression prediction metrics")
        count_metric = RegressionMetrics()

    # Heads -------------------------------------------------
    heads = []
    # Profile prediction
    if p_loss_weight > 0:
        if not merge_profile_reg:
            heads.append(ProfileHead(target_name='{task}/profile',
                                     net=DeConv1D(n_tasks=tracks_per_task,
                                                  filters=filters,
                                                  tconv_kernel_size=tconv_kernel_size,
                                                  padding=padding,
                                                  n_hidden=0,
                                                  batchnorm=batchnorm
                                                  ),
                                     loss=multinomial_nll,
                                     loss_weight=p_loss_weight,
                                     postproc_fn=softmax,
                                     use_bias=use_bias,
                                     bias_input='bias/{task}/profile',
                                     bias_shape=(None, n_bias_tracks),
                                     bias_net=MovingAverages(window_sizes=profile_bias_window_sizes),
                                     metric=profile_metric
                                     ))
        else:
            heads.append(ProfileHead(target_name='{task}/profile',
                                     net=DeConv1D(n_tasks=tracks_per_task,
                                                  filters=filters,
                                                  tconv_kernel_size=tconv_kernel_size,
                                                  padding=padding,
                                                  n_hidden=1,  # use 1 hidden layer in that case
                                                  batchnorm=batchnorm
                                                  ),
                                     activation=clipped_exp,
                                     loss=CountsMultinomialNLL(c_task_weight=c_loss_weight),
                                     loss_weight=p_loss_weight,
                                     bias_input='bias/{task}/profile',
                                     use_bias=use_bias,
                                     bias_shape=(None, n_bias_tracks),
                                     bias_net=MovingAverages(window_sizes=profile_bias_window_sizes),
                                     metric=BPNetMetricSingleProfile(count_metric=count_metric,
                                                                     profile_metric=profile_metric)
                                     ))
            c_loss_weight = 0  # don't need to use the other count loss

    # Count regression
    if c_loss_weight > 0:
        heads.append(ScalarHead(target_name='{task}/counts',
                                net=GlobalAvgPoolFCN(n_tasks=tracks_per_task,
                                                     n_splines=c_splines,
                                                     batchnorm=batchnorm),
                                activation=None,
                                loss='mse',
                                loss_weight=c_loss_weight,
                                bias_input='bias/{task}/counts',
                                use_bias=use_bias,
                                bias_shape=(n_bias_tracks, ),
                                metric=count_metric,
                                ))

    # Binary classification
    if b_loss_weight > 0:
        heads.append(ScalarHead(target_name='{task}/class',
                                net=GlobalAvgPoolFCN(n_tasks=1,
                                                     n_splines=b_splines,
                                                     batchnorm=batchnorm),
                                activation='sigmoid',
                                loss='binary_crossentropy',
                                loss_weight=b_loss_weight,
                                metric=ClassificationMetrics(),
                                ))
    # -------------------------------------------------
    m = SeqModel(
        body=DilatedConv1D(filters=filters,
                           conv1_kernel_size=conv1_kernel_size,
                           n_dil_layers=n_dil_layers,
                           padding=padding,
                           batchnorm=batchnorm,
                           skip_type=skip_type),
        heads=heads,
        tasks=tasks,
        optimizer=Adam(lr=lr),
        seqlen=seqlen,
    )
    return m