예제 #1
0
def basset_seq_model(tasks,
                     hidden=(1000, 1000),
                     dropout=(0.0, 0.0),
                     final_dropout=0.0,
                     batchnorm=False,
                     lr=0.004,
                     body='Basset',
                     seqlen=None):
    from basepair.seqmodel import SeqModel
    from basepair.heads import ScalarHead, ProfileHead
    from gin_train.metrics import ClassificationMetrics
    from basepair.layers import FCN
    from basepair import layers
    # Heads -------------------------------------------------
    heads = [ScalarHead(target_name='{task}/class',
                        net=FCN(dropout=final_dropout,
                                batchnorm=batchnorm),
                        activation='sigmoid',
                        loss='binary_crossentropy',
                        metric=ClassificationMetrics(),
                        )]
    # -------------------------------------------------
    m = SeqModel(
        body=layers.get(body)(hidden=hidden,
                              batchnorm=batchnorm,
                              dropout=dropout),
        heads=heads,
        tasks=tasks,
        optimizer=Adam(lr=lr),
        seqlen=seqlen,
    )
    return m
예제 #2
0
def binary_seq_model(tasks,
                     net_body,
                     net_head,
                     lr=0.004,
                     seqlen=None):
    """NOTE: This doesn't work with gin-train since
    the classes injected by gin-config can't be pickled.

    Instead, I created `basset_seq_model`

    ```
    Can't pickle <class 'basepair.layers.BassetConv'>: it's not the same
    object as basepair.layers.BassetConv
    ```

    """
    from basepair.seqmodel import SeqModel
    from basepair.heads import ScalarHead, ProfileHead
    from gin_train.metrics import ClassificationMetrics
    # Heads -------------------------------------------------
    heads = [ScalarHead(target_name='{task}/class',
                        net=net_head,
                        activation='sigmoid',
                        loss='binary_crossentropy',
                        metric=ClassificationMetrics(),
                        )]
    # -------------------------------------------------
    m = SeqModel(
        body=net_body,
        heads=heads,
        tasks=tasks,
        optimizer=Adam(lr=lr),
        seqlen=seqlen,
    )
    return m
예제 #3
0
def test_interpret_wo_bias():
    from basepair.metrics import PeakPredictionProfileMetric
    from gin_train.metrics import RegressionMetrics, ClassificationMetrics
    from concise.preprocessing import encodeDNA
    # test the model
    seqs = encodeDNA(['ACAGA'] * 100)

    inputs = {"seq": seqs,
              "bias/a/profile": np.random.randn(100, 5, 2)}

    # Let's use regression
    targets = {"a/class": np.random.randint(low=0, high=2, size=(100, 1)).astype(float),
               "a/counts": np.random.randn(100),
               "a/profile": np.random.randn(100, 5, 2),
               }

    import keras.backend as K
    # K.clear_session()
    # use bias
    m = SeqModel(
        body=BaseNet('relu'),
        heads=[BinaryClassificationHead('{task}/class',
                                        net=TopDense(pool_size=2),
                                        use_bias=False),
               ScalarHead('{task}/counts',
                          loss='mse',
                          metric=RegressionMetrics(),
                          net=TopDense(pool_size=2),
                          use_bias=False),
               ProfileHead('{task}/profile',
                           loss='mse',
                           metric=PeakPredictionProfileMetric(),
                           net=TopConv(n_output=2),
                           use_bias=True,
                           bias_shape=(5, 2)),  # NOTE: the shape currently has to be hard-coded to the sequence length
               ],
        tasks=['a']
    )
    m.model.fit(inputs, targets)

    o = m.imp_score_all(seqs)
    assert 'a/profile/wn' in o
    assert o['a/profile/wn'].shape == seqs.shape
    assert 'a/profile/wn' in o
    assert o['a/profile/wn'].shape == seqs.shape

    # evaluate the dataset -> setup an array dataset (NumpyDataset) -> convert to
    from basepair.data import NumpyDataset
    ds = NumpyDataset({"inputs": inputs, "targets": targets})
    o = m.evaluate(ds)
    assert 'avg/counts/mad' in o
예제 #4
0
def multihead_seq_model(tasks,
                        filters,
                        n_dil_layers,
                        conv1_kernel_size,
                        tconv_kernel_size,
                        b_loss_weight=1,
                        c_loss_weight=1,
                        p_loss_weight=1,
                        c_splines=20,
                        p_splines=0,
                        merge_profile_reg=False,
                        lr=0.004,
                        padding='same',
                        batchnorm=False,
                        use_bias=False,
                        n_profile_bias_tracks=2,
                        n_bias_tracks=2,
                        seqlen=None,
                        skip_type='residual'):
    from basepair.seqmodel import SeqModel
    from basepair.layers import DilatedConv1D, DeConv1D, GlobalAvgPoolFCN
    from basepair.metrics import BPNetMetricSingleProfile
    from basepair.heads import ScalarHead, ProfileHead
    from gin_train.metrics import ClassificationMetrics, RegressionMetrics
    from basepair.losses import mc_multinomial_nll_2, CountsMultinomialNLL
    from basepair.exp.paper.config import peak_pred_metric
    from basepair.activations import clipped_exp
    from basepair.functions import softmax

    assert p_loss_weight >= 0
    assert c_loss_weight >= 0
    assert b_loss_weight >= 0

    # Heads -------------------------------------------------
    heads = []
    # Profile prediction
    if p_loss_weight > 0:
        if not merge_profile_reg:
            heads.append(ProfileHead(target_name='{task}/profile',
                                     net=DeConv1D(n_tasks=2,
                                                  filters=filters,
                                                  tconv_kernel_size=tconv_kernel_size,
                                                  padding=padding,
                                                  n_hidden=0,
                                                  batchnorm=batchnorm
                                                  ),
                                     loss=mc_multinomial_nll_2,
                                     loss_weight=p_loss_weight,
                                     postproc_fn=softmax,
                                     use_bias=use_bias,
                                     bias_input='bias/{task}/profile',
                                     bias_shape=(None, n_profile_bias_tracks),
                                     metric=peak_pred_metric
                                     ))
        else:
            heads.append(ProfileHead(target_name='{task}/profile',
                                     net=DeConv1D(n_tasks=2,
                                                  filters=filters,
                                                  tconv_kernel_size=tconv_kernel_size,
                                                  padding=padding,
                                                  n_hidden=1,  # use 1 hidden layer in that case
                                                  batchnorm=batchnorm
                                                  ),
                                     activation=clipped_exp,
                                     loss=CountsMultinomialNLL(2, c_task_weight=c_loss_weight),
                                     loss_weight=p_loss_weight,
                                     bias_input='bias/{task}/profile',
                                     use_bias=use_bias,
                                     bias_shape=(None, n_profile_bias_tracks),
                                     metric=BPNetMetricSingleProfile(count_metric=RegressionMetrics(),
                                                                     profile_metric=peak_pred_metric)
                                     ))
            c_loss_weight = 0  # don't need to use the other count loss

    # Count regression
    if c_loss_weight > 0:
        heads.append(ScalarHead(target_name='{task}/counts',
                                net=GlobalAvgPoolFCN(n_tasks=2,
                                                     n_splines=p_splines,
                                                     batchnorm=batchnorm),
                                activation=None,
                                loss='mse',
                                loss_weight=c_loss_weight,
                                bias_input='bias/{task}/counts',
                                use_bias=use_bias,
                                bias_shape=(n_bias_tracks, ),
                                metric=RegressionMetrics(),
                                ))

    # Binary classification
    if b_loss_weight > 0:
        heads.append(ScalarHead(target_name='{task}/class',
                                net=GlobalAvgPoolFCN(n_tasks=1,
                                                     n_splines=c_splines,
                                                     batchnorm=batchnorm),
                                activation='sigmoid',
                                loss='binary_crossentropy',
                                loss_weight=b_loss_weight,
                                metric=ClassificationMetrics(),
                                ))
    # -------------------------------------------------
    m = SeqModel(
        body=DilatedConv1D(filters=filters,
                           conv1_kernel_size=conv1_kernel_size,
                           n_dil_layers=n_dil_layers,
                           padding=padding,
                           batchnorm=batchnorm,
                           skip_type=skip_type),
        heads=heads,
        tasks=tasks,
        optimizer=Adam(lr=lr),
        seqlen=seqlen,
    )
    return m