Example #1
0
def get_deeplift_scoring_function(model,target_layer_idx=-2,task_idx=0, reference="shuffled_ref", sequential=True):
    """
    Arguments: 
        model -- a string containing the path to the hdf5 exported model 
        target_layer_idx -- should be -2 for classification; -1 for regression 
        reference -- one of 'shuffled_ref','gc_ref','zero_ref'
    Returns:
        deepLIFT scoring function 
    """
    from deeplift.conversion import kerasapi_conversion as kc
    deeplift_model = kc.convert_model_from_saved_files(model,verbose=False)

    #get the deeplift score with respect to the logit 
    if(sequential):
        score_func = deeplift_model.get_target_contribs_func(
             find_scores_layer_idx=task_idx,
             target_layer_idx=target_layer_idx)
    else:
        input_name = deeplift_model.get_input_layer_names()[0]
        target_layer_name = list(deeplift_model.get_name_to_layer().keys())[target_layer_idx]
        multipliers_func = deeplift_model.get_target_multipliers_func(input_name, target_layer_name)
        score_func = deeplift.util.get_hypothetical_contribs_func_onehot(multipliers_func)
    
    if reference=="shuffled_ref":
        from deeplift.util import get_shuffle_seq_ref_function
        from deeplift.dinuc_shuffle import dinuc_shuffle        
        score_func=get_shuffle_seq_ref_function(
            score_computation_function=score_func,
            shuffle_func=dinuc_shuffle,
            one_hot_func=None)
    return score_func
Example #2
0
def get_deeplift_scoring_function(model,
                                  target_layer_idx=-2,
                                  task_idx=0,
                                  num_refs_per_seq=10,
                                  reference="shuffled_ref",
                                  one_hot_func=None):
    """
    Arguments: 
        model -- a string containing the path to the hdf5 exported model 
        target_layer_idx -- Layer in the model whose outputs will be interpreted. For classification models we \ 
                            interpret the logit (input to the sigmoid), which is the output of layer -2. 
                            For regression models we intepret the model output, which is the output of layer -1. 
        reference -- one of 'shuffled_ref','gc_ref','zero_ref'
        one_hot_func -- one hot function to use for encoding FASTA string inputs; if the inputs are already one-hot-encoded, use the default of None 
    Returns:
        deepLIFT scoring function 
    """
    assert reference in ["shuffled_ref", "gc_ref", "zero_ref"]
    from deeplift.conversion import kerasapi_conversion as kc
    deeplift_model = kc.convert_model_from_saved_files(model, verbose=False)

    #get the deeplift score with respect to the logit
    score_func = deeplift_model.get_target_contribs_func(
        find_scores_layer_idx=0, target_layer_idx=target_layer_idx)
    if reference == "shuffled_ref":
        from deeplift.util import get_shuffle_seq_ref_function
        from deeplift.dinuc_shuffle import dinuc_shuffle
        score_func = get_shuffle_seq_ref_function(
            score_computation_function=score_func,
            shuffle_func=dinuc_shuffle,
            one_hot_func=one_hot_func)
    return score_func
Example #3
0
    def getDeepliftScores(self, weight_path, yaml_path, data=None, sequence_length=150, use_references=True):
        if data is None:
            data = self.X[:, 0, :, :]

        deeplift_model = kc.convert_model_from_saved_files(
            weight_path,
            yaml_path,
            nonlinear_mxts_mode=deeplift.layers.NonlinearMxtsMode.DeepLIFT_GenomicsDefault
            )

        deeplift_contribs_func = deeplift_model.get_target_contribs_func(
                                    find_scores_layer_idx=0,
                                    target_layer_idx=-1)

        multipliers_func = deeplift_model.get_target_multipliers_func(find_scores_layer_idx=0,
                                                                    target_layer_idx=-1)
        hypothetical_contribs_func = get_hypothetical_contribs_func_onehot(multipliers_func)
        hypothetical_contribs_many_refs_func = get_shuffle_seq_ref_function(
            score_computation_function=hypothetical_contribs_func,
            shuffle_func=dinuc_shuffle)
        #idk??
        num_refs_per_seq = 10
        hypothetical_scores = hypothetical_contribs_many_refs_func(
                                task_idx=0,
                                input_data_sequences=data,
                                num_refs_per_seq=num_refs_per_seq,
                                batch_size=50,
                                progress_update=1000,
                            )
       

        '''
        data_indices, references = self.motifs.get_references(sequence_length)
        final_scores = []
        final_hyp = []
        for (ind, ref) in zip(data_indices, references):
            scores = np.array(deeplift_contribs_func(task_idx=0,
                                            input_data_list=[data[ind].astype(float)],
                                            input_references_list=[ref.astype(float)],
                                            batch_size=50,
                                            progress_update=4000))
            hyp_scores = hypothetical_contribs_func(
                            task_idx=0,
                            input_data_list=[data[ind].astype(float)],
                            input_references_list=[ref.astype(float)],
                            batch_size=50,
                            progress_update=1000,
                        )
            final_scores.append(scores)
            final_hyp.append(hyp_scores)
        return np.concatenate(final_scores, axis=0), np.concatenate(final_hyp, axis=0)

        '''
        return scores, hypothetical_scores
Example #4
0
def deeplift_shuffled_ref(X,score_func,batch_size=200,task_idx=0,num_refs_per_seq=10,one_hot_func=None):
    from deeplift.util import get_shuffle_seq_ref_function
    from deeplift.dinuc_shuffle import dinuc_shuffle        
    score_func=get_shuffle_seq_ref_function(
        score_computation_function=score_func,
        shuffle_func=dinuc_shuffle,
        one_hot_func=one_hot_func)
    print("got score func!") 
    deeplift_scores=score_func(
        task_idx=task_idx,
        input_data_sequences=X,
        num_refs_per_seq=num_refs_per_seq,
        batch_size=batch_size)
    return deeplift_scores
Example #5
0
def deeplift_shuffled_ref(X,
                          score_func,
                          batch_size=200,
                          task_idx=0,
                          refs_per_seq=10):
    from deeplift.util import get_shuffle_seq_ref_function
    from deeplift.dinuc_shuffle import dinuc_shuffle
    score_func = get_shuffle_seq_ref_function(
        score_computational_function=score_func,
        shuffle_func=dinuc_shuffle,
        one_hot_func=one_hot_encode)

    deeplift_scores = score_func(task_idx=task_idx,
                                 input_data_list=[X],
                                 refs_per_seq=refs_per_seq,
                                 batch_size=batch_size,
                                 progress_update=None)
Example #6
0
contribs_func = deeplift_model.get_target_contribs_func(find_scores_layer_idx=0,
                                                        target_layer_idx=-2)

# ### Adapt the scoring function to work with multiple shuffled references
# 
# For each sequence, we generate a collection of reference sequences by
# shuffling the original sequence, and then average the scores over these
# multiple shuffled versions of the sequence.

# In[8]:

contribs_many_refs_func = get_shuffle_seq_ref_function(
    #score_computation_function: is the original function to compute scores
    #shuffle_func: is the function that shuffles the sequence. On real genomic
    #    data, a dinuc shuffle is advisable due to the strong bias against CG
    #    dinucleotides
    score_computation_function=contribs_func,
    shuffle_func=dinuc_shuffle,
    one_hot_func=lambda x: np.array([one_hot_encode_along_channel_axis(seq)
                                     for seq in x]))


# ### Compile the "hypothetical" contribution scoring function
# 
# Hypothetical contribution scores are estimates of the contributions that
# different bases *would* have if it were present in the sequence. They are
# found by looking at the DeepLIFT mutlipliers (which satisfy the equation
# `difference_from_reference*multiplier = contribution`) and substituting in
# what the different values for `difference_from_reference` would be if other
# bases were present in the sequence. The hypothetical contributions can act
# as "autocompletes" of the motifs by revealing the preference of the network