コード例 #1
0
def deeplift(model, X, batch_size=200, target_layer_idx=-2):
    """
    Returns (num_task, num_samples, 1, num_bases, sequence_length) deeplift score array.
    """
    assert len(np.shape(X)) == 4 and np.shape(X)[1] == 1
    from deeplift.conversion import kerasapi_conversion as kc
    #dump the model to hdf5, as current dl wants a saved model input
    model.save('tmp.hdf5')

    # convert to deeplift model and get scoring function
    deeplift_model = kc.convert_model_from_saved_files('tmp.hdf5',
                                                       verbose=False)

    #get the deeplift score with respect to the logit
    score_func = deeplift_model.get_target_contribs_func(
        find_scores_layer_idx=0, target_layer_idx=target_layer_idx)

    # use a 40% GC reference
    input_references = [np.array([0.3, 0.2, 0.2, 0.3])[None, None, None, :]]
    # get deeplift scores
    deeplift_scores = score_func(task_idx=0,
                                 input_data_list=[X],
                                 batch_size=batch_size,
                                 progress_update=None,
                                 input_references_list=input_references)
    return np.asarray(deeplift_scores)
コード例 #2
0
def deeplift(model,
             X,
             batch_size=200,
             target_layer_idx=-2,
             task_idx=0,
             num_refs_per_seq=10,
             reference="shuffled_ref",
             one_hot_func=None):
    """
    Returns (num_task, num_samples, 1, num_bases, sequence_length) deeplift score array.
    """
    assert reference in ["shuffled_ref", "gc_ref"]
    if one_hot_func == None:
        #check that dataset has been one-hot-encoded
        assert len(np.shape(X)) == 4 and np.shape(X)[1] == 1
    from deeplift.conversion import kerasapi_conversion as kc
    deeplift_model = kc.convert_model_from_saved_files(model, verbose=False)

    #get the deeplift score with respect to the logit
    score_func = deeplift_model.get_target_contribs_func(
        find_scores_layer_idx=0, target_layer_idx=target_layer_idx)

    if reference == "shuffled_ref":
        deeplift_scores = deeplift_shuffled_ref(X,
                                                score_func,
                                                batch_size,
                                                task_idx,
                                                num_refs_per_seq,
                                                one_hot_func=one_hot_func)
    elif reference == "gc_ref":
        deeplift_scores = deeplift_gc_ref(X, score_func, batch_size, task_idx)
    else:
        raise Exception(
            "supported DeepLIFT references are 'shuffled_ref' and 'gc_ref'")
    return np.asarray(deeplift_scores)
コード例 #3
0
    def test_convert_conv1d_model_compute_scores(self):
        deeplift_model =\
            kc.convert_model_from_saved_files(
                self.saved_file_path,
                nonlinear_mxts_mode=NonlinearMxtsMode.Rescale)
        #print(deeplift_model.get_name_to_layer()['inp1_0'].get_shape())
        #print(deeplift_model.get_name_to_layer()['convolution1d_1_0'].get_shape())
        #print(deeplift_model.get_name_to_layer()['maxpooling1d_1_0'].get_shape())
        #print(deeplift_model.get_name_to_layer()['merge_1'].get_shape())
        #print(deeplift_model.get_name_to_layer()['flatten_1'].get_shape())
        deeplift_contribs_func = deeplift_model.\
                                     get_target_contribs_func(
                              find_scores_layer_name=["inp1_0", "inp2_0"],
                              pre_activation_target_layer_name="output_preact_0")

        grads_inp1, grads_inp2 = self.grad_func(self.inp1, self.inp2)
        np.testing.assert_almost_equal(
            np.array(
                deeplift_contribs_func(task_idx=0,
                                       input_data_list={
                                           'inp1_0': self.inp1,
                                           'inp2_0': self.inp2
                                       },
                                       input_references_list={
                                           'inp1_0': np.zeros_like(self.inp1),
                                           'inp2_0': np.zeros_like(self.inp2)
                                       },
                                       batch_size=10,
                                       progress_update=None)),
            #when biases are 0 and ref is 0, deeplift
            #with the rescale rule is the same as grad*inp
            np.array([grads_inp1 * self.inp1, grads_inp2 * self.inp2]),
            decimal=6)
コード例 #4
0
ファイル: deeplift.py プロジェクト: kundajelab/kerasAC
def get_deeplift_scoring_function(model,target_layer_idx=-2,task_idx=0, reference="shuffled_ref", sequential=True):
    """
    Arguments: 
        model -- a string containing the path to the hdf5 exported model 
        target_layer_idx -- should be -2 for classification; -1 for regression 
        reference -- one of 'shuffled_ref','gc_ref','zero_ref'
    Returns:
        deepLIFT scoring function 
    """
    from deeplift.conversion import kerasapi_conversion as kc
    deeplift_model = kc.convert_model_from_saved_files(model,verbose=False)

    #get the deeplift score with respect to the logit 
    if(sequential):
        score_func = deeplift_model.get_target_contribs_func(
             find_scores_layer_idx=task_idx,
             target_layer_idx=target_layer_idx)
    else:
        input_name = deeplift_model.get_input_layer_names()[0]
        target_layer_name = list(deeplift_model.get_name_to_layer().keys())[target_layer_idx]
        multipliers_func = deeplift_model.get_target_multipliers_func(input_name, target_layer_name)
        score_func = deeplift.util.get_hypothetical_contribs_func_onehot(multipliers_func)
    
    if reference=="shuffled_ref":
        from deeplift.util import get_shuffle_seq_ref_function
        from deeplift.dinuc_shuffle import dinuc_shuffle        
        score_func=get_shuffle_seq_ref_function(
            score_computation_function=score_func,
            shuffle_func=dinuc_shuffle,
            one_hot_func=None)
    return score_func
コード例 #5
0
    def test_convert_conv1d_model_compute_scores(self): 
        deeplift_model =\
            kc.convert_model_from_saved_files(
                self.saved_file_path,
                nonlinear_mxts_mode=NonlinearMxtsMode.Rescale) 
        #print(deeplift_model.get_name_to_layer()['inp1_0'].get_shape())
        #print(deeplift_model.get_name_to_layer()['convolution1d_1_0'].get_shape())
        #print(deeplift_model.get_name_to_layer()['maxpooling1d_1_0'].get_shape())
        #print(deeplift_model.get_name_to_layer()['merge_1'].get_shape())
        #print(deeplift_model.get_name_to_layer()['flatten_1'].get_shape())
        deeplift_contribs_func = deeplift_model.\
                                     get_target_contribs_func(
                              find_scores_layer_name=["inp1_0", "inp2_0"],
                              pre_activation_target_layer_name="output_preact_0")

        grads_inp1, grads_inp2 = self.grad_func(self.inp1, self.inp2)
        np.testing.assert_almost_equal(
            np.array(deeplift_contribs_func(task_idx=0,
                                      input_data_list={
                                       'inp1_0': self.inp1,
                                       'inp2_0': self.inp2},
                                      input_references_list={
                                       'inp1_0': np.zeros_like(self.inp1),
                                       'inp2_0': np.zeros_like(self.inp2)},
                                      batch_size=10,
                                      progress_update=None)),
            #when biases are 0 and ref is 0, deeplift
            #with the rescale rule is the same as grad*inp 
            np.array([grads_inp1*self.inp1,
                      grads_inp2*self.inp2]), decimal=6)
コード例 #6
0
ファイル: tutorial_utils.py プロジェクト: kundajelab/dragonn
def deeplift(model, X, batch_size=200,target_layer_idx=-2,task_idx=0, num_refs_per_seq=10,reference="shuffled_ref",one_hot_func=None):
    """
    Returns (num_task, num_samples, 1, num_bases, sequence_length) deeplift score array.
    """
    assert reference in ["shuffled_ref","gc_ref","zero_ref"]
    if one_hot_func==None:
        #check that dataset has been one-hot-encoded
        assert len(np.shape(X)) == 4 and np.shape(X)[1] == 1
    from deeplift.conversion import kerasapi_conversion as kc
    deeplift_model = kc.convert_model_from_saved_files(model,verbose=False)

    #get the deeplift score with respect to the logit 
    score_func = deeplift_model.get_target_contribs_func(
        find_scores_layer_idx=0,
        target_layer_idx=target_layer_idx)

    if reference=="shuffled_ref":
        deeplift_scores=deeplift_shuffled_ref(X,score_func,batch_size,task_idx,num_refs_per_seq,one_hot_func=one_hot_func)
    elif reference=="gc_ref":
        deeplift_scores=deeplift_gc_ref(X,score_func,batch_size,task_idx)
    elif reference=="zero_ref":
        deeplift_scores=deeplift_zero_ref(X,score_func,batch_size,task_idx)
    else:
        raise Exception("supported DeepLIFT references are 'shuffled_ref' and 'gc_ref'")
    return np.asarray(deeplift_scores)
コード例 #7
0
ファイル: deeplift.py プロジェクト: yynst2/dragonn
def get_deeplift_scoring_function(model,
                                  target_layer_idx=-2,
                                  task_idx=0,
                                  num_refs_per_seq=10,
                                  reference="shuffled_ref",
                                  one_hot_func=None):
    """
    Arguments: 
        model -- a string containing the path to the hdf5 exported model 
        target_layer_idx -- Layer in the model whose outputs will be interpreted. For classification models we \ 
                            interpret the logit (input to the sigmoid), which is the output of layer -2. 
                            For regression models we intepret the model output, which is the output of layer -1. 
        reference -- one of 'shuffled_ref','gc_ref','zero_ref'
        one_hot_func -- one hot function to use for encoding FASTA string inputs; if the inputs are already one-hot-encoded, use the default of None 
    Returns:
        deepLIFT scoring function 
    """
    assert reference in ["shuffled_ref", "gc_ref", "zero_ref"]
    from deeplift.conversion import kerasapi_conversion as kc
    deeplift_model = kc.convert_model_from_saved_files(model, verbose=False)

    #get the deeplift score with respect to the logit
    score_func = deeplift_model.get_target_contribs_func(
        find_scores_layer_idx=0, target_layer_idx=target_layer_idx)
    if reference == "shuffled_ref":
        from deeplift.util import get_shuffle_seq_ref_function
        from deeplift.dinuc_shuffle import dinuc_shuffle
        score_func = get_shuffle_seq_ref_function(
            score_computation_function=score_func,
            shuffle_func=dinuc_shuffle,
            one_hot_func=one_hot_func)
    return score_func
コード例 #8
0
ファイル: gen_interpret.py プロジェクト: IoBT-VISTEC/PPMI_DL
def inp_deeplift(PathOutput, Data_2, Labels_2, preds, n_model):
    if n_model == 0 or n_model == 1:
        deeplift_layer0 = "input1_0"
        deeplift_layer1 = "dense_2_0"
        compile_guided_layer = "activation_3"
        grad_cam_layer = "conv3d_2"
    elif n_model == 2 or n_model == 3:
        deeplift_layer0 = "input1_0"
        deeplift_layer1 = "dense_2_0"
        compile_guided_layer = "activation_5"
        grad_cam_layer = "conv3d_4"

    saved_model_file = PathOutput + 'best_model.hd5'

    revealcancel_model = kc.convert_model_from_saved_files(
        h5_file=saved_model_file,
        nonlinear_mxts_mode=NonlinearMxtsMode.RevealCancel)

    revealcancel_func = revealcancel_model.get_target_contribs_func(
        find_scores_layer_name=deeplift_layer0,
        pre_activation_target_layer_name=deeplift_layer1)

    from collections import OrderedDict
    method_to_task_to_scores = OrderedDict()
    for method_name, score_func in [
        ('revealcancel', revealcancel_func),
    ]:
        print("Computing scores for:", method_name)
        method_to_task_to_scores[method_name] = {}
        for task_idx in range(2):
            print("\tComputing scores for task: " + str(task_idx))
            scores = np.array(
                score_func(
                    task_idx=task_idx,
                    input_data_list=[Data_2],
                    #                     input_references_list=[np.zeros_like(Data_2)],
                    input_references_list=[np.average(Data_2, axis=0)],
                    batch_size=4,
                    progress_update=None))
            method_to_task_to_scores[method_name][task_idx] = scores

    # Generate the heatmap
    Data_2_map = np.zeros(Data_2.shape[0:4])

    for nn in range(Data_2.shape[0]):
        Data_test = Data_2[nn:nn + 1]
        nlabels = np.argmax(Labels_2[nn])
        class_idx = np.argmax(preds[nn])

        ## Copy DeepLIFT image
        deeplift_map = method_to_task_to_scores['revealcancel'][class_idx][
            nn, :, :, :, 0]

        Data_2_map[nn] = deeplift_map

    return Data_2_map
コード例 #9
0
 def test_convert_conv2d_model_forward_prop(self):
     deeplift_model =\
         kc.convert_model_from_saved_files(self.saved_file_path)
     deeplift_fprop_func = compile_func(
         [deeplift_model.get_layers()[0].get_activation_vars()],
         deeplift_model.get_layers()[-1].get_activation_vars())
     np.testing.assert_almost_equal(deeplift_fprop_func(self.inp),
                                    self.keras_output_fprop_func(
                                        [self.inp, 0]),
                                    decimal=6)
コード例 #10
0
 def test_convert_conv2d_model_forward_prop(self): 
     deeplift_model =\
         kc.convert_model_from_saved_files(self.saved_file_path) 
     deeplift_fprop_func = compile_func(
                 [deeplift_model.get_layers()[0].get_activation_vars()],
                  deeplift_model.get_layers()[-1].get_activation_vars())
     np.testing.assert_almost_equal(
         deeplift_fprop_func(self.inp),
         self.keras_output_fprop_func([self.inp, 0]),
         decimal=6)
コード例 #11
0
    def getDeepliftScores(self, weight_path, yaml_path, data=None, sequence_length=150, use_references=True):
        if data is None:
            data = self.X[:, 0, :, :]

        deeplift_model = kc.convert_model_from_saved_files(
            weight_path,
            yaml_path,
            nonlinear_mxts_mode=deeplift.layers.NonlinearMxtsMode.DeepLIFT_GenomicsDefault
            )

        deeplift_contribs_func = deeplift_model.get_target_contribs_func(
                                    find_scores_layer_idx=0,
                                    target_layer_idx=-1)

        multipliers_func = deeplift_model.get_target_multipliers_func(find_scores_layer_idx=0,
                                                                    target_layer_idx=-1)
        hypothetical_contribs_func = get_hypothetical_contribs_func_onehot(multipliers_func)
        hypothetical_contribs_many_refs_func = get_shuffle_seq_ref_function(
            score_computation_function=hypothetical_contribs_func,
            shuffle_func=dinuc_shuffle)
        #idk??
        num_refs_per_seq = 10
        hypothetical_scores = hypothetical_contribs_many_refs_func(
                                task_idx=0,
                                input_data_sequences=data,
                                num_refs_per_seq=num_refs_per_seq,
                                batch_size=50,
                                progress_update=1000,
                            )
       

        '''
        data_indices, references = self.motifs.get_references(sequence_length)
        final_scores = []
        final_hyp = []
        for (ind, ref) in zip(data_indices, references):
            scores = np.array(deeplift_contribs_func(task_idx=0,
                                            input_data_list=[data[ind].astype(float)],
                                            input_references_list=[ref.astype(float)],
                                            batch_size=50,
                                            progress_update=4000))
            hyp_scores = hypothetical_contribs_func(
                            task_idx=0,
                            input_data_list=[data[ind].astype(float)],
                            input_references_list=[ref.astype(float)],
                            batch_size=50,
                            progress_update=1000,
                        )
            final_scores.append(scores)
            final_hyp.append(hyp_scores)
        return np.concatenate(final_scores, axis=0), np.concatenate(final_hyp, axis=0)

        '''
        return scores, hypothetical_scores
コード例 #12
0
 def test_convert_conv1d_model_forward_prop(self):
     deeplift_model =\
         kc.convert_model_from_saved_files(
             self.saved_file_path,
             nonlinear_mxts_mode=NonlinearMxtsMode.Gradient)
     deeplift_fprop_func = compile_func(
         inputs=[deeplift_model.get_layers()[0].get_activation_vars()],
         outputs=deeplift_model.get_layers()[-1].get_activation_vars())
     np.testing.assert_almost_equal(deeplift_fprop_func(self.inp),
                                    self.keras_output_fprop_func(
                                        [self.inp, 0]),
                                    decimal=6)
コード例 #13
0
 def test_batch_norm_convert_model_fprop(self):
     deeplift_model =\
         kc.convert_model_from_saved_files(
             self.saved_file_path,
             nonlinear_mxts_mode=NonlinearMxtsMode.Rescale)
     deeplift_fprop_func = compile_func(
         [deeplift_model.get_layers()[0].get_activation_vars()],
         deeplift_model.get_layers()[-1].get_activation_vars())
     np.testing.assert_almost_equal(deeplift_fprop_func(self.inp),
                                    self.keras_output_fprop_func(
                                        [self.inp, 0]),
                                    decimal=5)
コード例 #14
0
 def test_convert_conv1d_model_forward_prop(self): 
     deeplift_model =\
         kc.convert_model_from_saved_files(
             self.saved_file_path,
             nonlinear_mxts_mode=NonlinearMxtsMode.Rescale) 
     deeplift_fprop_func = compile_func(
             inputs=[deeplift_model.get_layers()[0].get_activation_vars()],
             outputs=deeplift_model.get_layers()[-1].get_activation_vars())
     np.testing.assert_almost_equal(
         deeplift_fprop_func(self.inp),
         self.keras_output_fprop_func([self.inp, 0]),
         decimal=6)
コード例 #15
0
    def test_convert_conv1d_model_forward_prop(self): 
        deeplift_model =\
            kc.convert_model_from_saved_files(
                self.saved_file_path,
                nonlinear_mxts_mode=NonlinearMxtsMode.Rescale) 
        print(deeplift_model.get_name_to_layer().keys())
        deeplift_fprop_func = compile_func(
 [deeplift_model.get_name_to_layer()['inp1_0'].get_activation_vars(),
  deeplift_model.get_name_to_layer()['inp2_0'].get_activation_vars()],
  deeplift_model.get_name_to_layer()['output_postact_0'].get_activation_vars())
        np.testing.assert_almost_equal(
            deeplift_fprop_func([self.inp1, self.inp2]),
            self.keras_output_fprop_func(self.inp1, self.inp2),
            decimal=6)
コード例 #16
0
 def test_convert_conv2d_model_compute_scores(self): 
     deeplift_model =\
         kc.convert_model_from_saved_files(self.saved_file_path) 
     deeplift_contribs_func = deeplift_model.\
                                  get_target_contribs_func(
                                   find_scores_layer_idx=0,
                                   target_layer_idx=-2)
     np.testing.assert_almost_equal(
         deeplift_contribs_func(task_idx=0,
                                   input_data_list=[self.inp],
                                   batch_size=10,
                                   progress_update=None),
         #when biases are 0 and ref is 0, deeplift is the same as grad*inp 
         self.grad_func([self.inp, 0])*self.inp, decimal=6)
コード例 #17
0
 def test_batch_norm_convert_model_backprop(self): 
     deeplift_model =\
         kc.convert_model_from_saved_files(
             self.saved_file_path,
             nonlinear_mxts_mode=NonlinearMxtsMode.Rescale) 
     deeplift_multipliers_func = deeplift_model.\
                                  get_target_multipliers_func(
                                   find_scores_layer_idx=0,
                                   target_layer_idx=-1)
     np.testing.assert_almost_equal(
         deeplift_multipliers_func(task_idx=0,
                                   input_data_list=[self.inp],
                                   batch_size=10,
                                   progress_update=None),
         self.grad_func([self.inp, 0]), decimal=5)
コード例 #18
0
 def test_convert_conv2d_model_compute_scores(self):
     deeplift_model =\
         kc.convert_model_from_saved_files(self.saved_file_path)
     deeplift_contribs_func = deeplift_model.\
                                  get_target_contribs_func(
                                   find_scores_layer_idx=0,
                                   target_layer_idx=-2)
     np.testing.assert_almost_equal(
         deeplift_contribs_func(task_idx=0,
                                input_data_list=[self.inp],
                                batch_size=10,
                                progress_update=None),
         #when biases are 0 and ref is 0, deeplift is the same as grad*inp
         self.grad_func([self.inp, 0]) * self.inp,
         decimal=6)
コード例 #19
0
 def test_convert_conv1d_model_compute_scores(self):
     deeplift_model =\
         kc.convert_model_from_saved_files(self.saved_file_path,
         nonlinear_mxts_mode=NonlinearMxtsMode.Gradient)
     deeplift_contribs_func = deeplift_model.\
                                  get_target_contribs_func(
                                   find_scores_layer_idx=0,
                                   target_layer_idx=-2)
     np.testing.assert_almost_equal(
         deeplift_contribs_func(task_idx=0,
                                input_data_list=[self.inp],
                                batch_size=10,
                                progress_update=None),
         self.grad_func([self.inp, 0]) * self.inp,
         decimal=6)
コード例 #20
0
    def __init__(self):
        model_file = pkg_resources.resource_filename(
            'iseeu', 'models/kfold4_best.hdf5')
        print(f"****{model_file}*****")
        self._model = load_model(model_file)

        dm = kc.convert_model_from_saved_files(
            h5_file=model_file,
            nonlinear_mxts_mode=NonlinearMxtsMode.RevealCancel,
            verbose=False)
        self._deeplift_model = dm
        input_layer_name = self._deeplift_model.get_input_layer_names()[0]
        self._importance_func = self._deeplift_model.get_target_contribs_func(
            find_scores_layer_name=input_layer_name,
            pre_activation_target_layer_name='preact_fc2_0')
コード例 #21
0
 def test_batch_norm_convert_model_backprop(self):
     deeplift_model =\
         kc.convert_model_from_saved_files(
             self.saved_file_path,
             nonlinear_mxts_mode=NonlinearMxtsMode.Rescale)
     deeplift_multipliers_func = deeplift_model.\
                                  get_target_multipliers_func(
                                   find_scores_layer_idx=0,
                                   target_layer_idx=-1)
     np.testing.assert_almost_equal(deeplift_multipliers_func(
         task_idx=0,
         input_data_list=[self.inp],
         batch_size=10,
         progress_update=None),
                                    self.grad_func([self.inp, 0]),
                                    decimal=5)
コード例 #22
0
def _deeplift_contribs_generator(hdf5_path, x_test, process_x_func,
                                 num_feature, num_class, batch_size):
    """Generator which yields DeepLIFT contribution scores.

    Applies vectorization batch-by-batch to avoid memory overflow.

    Arguments:
        hdf5_path: str
            path to saved HDF5 Keras Model
        process_x_func: function
            function for vectorizing feature data
        num_feature: int
            number of features present in the dataset
        num_class: int
            number of classes
        batch_size: int
            batch size
    """
    # convert Keras model, and get relevant function
    deeplift_model = kc.convert_model_from_saved_files(
        hdf5_path, nonlinear_mxts_mode=NonlinearMxtsMode.RevealCancel)
    # input layer is 0, since we have a softmax layer the target layer is -2
    get_deeplift_contribs = deeplift_model.get_target_contribs_func(
        find_scores_layer_idx=0, target_layer_idx=-2)

    num_batch = int(round(float(len(x_test)) / batch_size))
    # yield a 3D array detailing the DeepLIFT contrib scores
    for batch_idx, x in enumerate(chunks(x_test, batch_size)):
        start = time.time()
        x = process_x_func(x)
        batch_size = len(x)
        zeros = [0.0] * batch_size  # reference data
        all_batch_contribs = np.zeros((num_class, batch_size, num_feature))

        for c in range(num_class):
            batch_contribs = get_deeplift_contribs(task_idx=c,
                                                   input_data_list=[x],
                                                   input_references_list=zeros,
                                                   batch_size=1024,
                                                   progress_update=None)
            all_batch_contribs[c] = batch_contribs

        if not batch_idx % 10:
            print('{}/{} in {:.2f} s'.format(batch_idx, num_batch,
                                             time.time() - start))

        yield all_batch_contribs
コード例 #23
0
 def test_convert_conv1d_model_forward_prop(self):
     deeplift_model =\
         kc.convert_model_from_saved_files(
             self.saved_file_path,
             nonlinear_mxts_mode=NonlinearMxtsMode.Rescale)
     print(deeplift_model.get_name_to_layer().keys())
     deeplift_fprop_func = compile_func([
         deeplift_model.get_name_to_layer()['inp1_0'].get_activation_vars(),
         deeplift_model.get_name_to_layer()['inp2_0'].get_activation_vars()
     ],
                                        deeplift_model.get_name_to_layer()
                                        ['output_postact_0'].
                                        get_activation_vars())
     np.testing.assert_almost_equal(
         deeplift_fprop_func([self.inp1, self.inp2]),
         self.keras_output_fprop_func(self.inp1, self.inp2),
         decimal=6)
コード例 #24
0
def run_deeplift_comparison(model_file, target_example, reference_example):

    backend.clear_session()

    deeplift_model = kc.convert_model_from_saved_files(
        model_file,
        nonlinear_mxts_mode=deeplift.layers.NonlinearMxtsMode.RevealCancel)

    deeplift_contribs_func = deeplift_model.get_target_contribs_func(
        find_scores_layer_idx=0, target_layer_idx=-1)

    deeplift_results = np.array(
        deeplift_contribs_func(task_idx=0,
                               input_data_list=[[target]],
                               input_references_list=[reference_example],
                               batch_size=1,
                               progress_update=1))

    return deeplift_results
コード例 #25
0
for j in range(1,15):
    print('======================model:CNN_1D_exclude_transcript_%f==================='%j)
    model = load_model('CNN_1D_exclude_transcript_'+str(j)+'.h5')


    predict = model.predict_classes(encoded_label1).astype('int')
    print(predict)
"""

for j in range(8, 9):
    print(
        '======================model:CNN_1D_exclude_transcript_%f======================'
        % j)

    deeplift_model = kc.convert_model_from_saved_files(
        'CNN_1D_exclude_transcript_' + str(j) + '.h5',
        nonlinear_mxts_mode=deeplift.layers.NonlinearMxtsMode.
        DeepLIFT_GenomicsDefault)

    find_scores_layer_idx = 0
    deeplift_contribs_func = deeplift_model.get_target_contribs_func(
        find_scores_layer_idx=find_scores_layer_idx, target_layer_idx=-2)

    background = OrderedDict([('A', 0.3), ('C', 0.2), ('G', 0.2), ('T', 0.3),
                              ('N', 0)])
    scores = np.array(
        deeplift_contribs_func(task_idx=1,
                               input_data_list=[encoded_label1],
                               input_references_list=[
                                   np.array([
                                       background['A'], background['C'],
                                       background['G'], background['T'],
コード例 #26
0
    else:
        dset["VBF"][vars] = scaler.transform(dset["VBF"][vars])
        dset["Top"][vars] = scaler.transform(dset["Top"][vars])
        if "WW" in cfg["samples"]:
            dset["WW"][vars] = scaler.transform(dset["WW"][vars])

########################################################
# Deeplift initializations

find_scores_layer_idx = 0
target_layer_idx = -2
n_vars = len(cfg["training_variables"])

# load model to deeplift
deeplift_model = kc.convert_model_from_saved_files(
    model_input_path,
    nonlinear_mxts_mode=deeplift.layers.NonlinearMxtsMode.
    DeepLIFT_GenomicsDefault)
deeplift_contribs_func = deeplift_model.get_target_contribs_func( \
                                                                  find_scores_layer_idx=find_scores_layer_idx, \
                                                                  target_layer_idx=target_layer_idx)

########################################################
# get inputs and calculate deeplift scores!

if "filepathData" in cfg.keys():
    inputs = dset["Data"][cfg["training_variables"]].values
else:
    inputs = dset["VBF"][cfg["training_variables"]].append(
        dset["Top"][cfg["training_variables"]]).values

# Cut on DNN output?
コード例 #27
0
ファイル: visualize.py プロジェクト: PelFritz/Master_project
encoded_seq = np.array([one_hot(prom) for prom in prom_seq])
encoded_seq = np.expand_dims(encoded_seq, 3)
encoded_shuf_seq = np.array([one_hot(prom) for prom in prom_shuf])
encoded_shuf_seq = np.expand_dims(encoded_shuf_seq, 3)
categories = np_utils.to_categorical(label, 2)

model = models.load_model('/nam-99/ablage/nam/peleke/Thesis_models/model2020-10-06073328.h5')
predictions = np.argmax(model.predict(encoded_seq), axis=1)
actual = np.argmax(categories, axis=1)
print(predictions)

print(accuracy_score(label, predictions))

# compute deeplift scores
deeplift_model =\
    kc.convert_model_from_saved_files('/nam-99/ablage/nam/peleke/Thesis_models/model2020-10-06073328.h5',
                                      nonlinear_mxts_mode=NonlinearMxtsMode.DeepLIFT_GenomicsDefault)

deeplift_contrib_func = deeplift_model.get_target_contribs_func(find_scores_layer_idx=0,
                                                                target_layer_idx=-2)

# True positive predictions
tp = []
tp_shuf = []

for pred, true, enc_seq, enc_shuf_seq in zip(predictions, actual, encoded_seq, encoded_shuf_seq):
    if pred == 1 and true == 1:
        tp.append(enc_seq)
        tp_shuf.append(enc_shuf_seq)


tp_data = np.array(tp)
コード例 #28
0
def DeepLIFT(json_file, weight_file, onehot):
    from deeplift.layers import NonlinearMxtsMode
    import deeplift.conversion.kerasapi_conversion as kc
    import deeplift.layers
    import deeplift.conversion.kerasapi_conversion
    from collections import OrderedDict    
    import deeplift
    
    method_to_model = OrderedDict()
    for method_name, nonlinear_mxts_mode in [
        #The genomics default = rescale on conv layers, revealcance on fully-connected
        ('rescale_conv_revealcancel_fc', NonlinearMxtsMode.DeepLIFT_GenomicsDefault),
        ('rescale_all_layers', NonlinearMxtsMode.Rescale),
        ('revealcancel_all_layers', NonlinearMxtsMode.RevealCancel),
        ('grad_times_inp', NonlinearMxtsMode.Gradient),
        ('guided_backprop', NonlinearMxtsMode.GuidedBackprop)]:
        method_to_model[method_name] = kc.convert_model_from_saved_files(
            h5_file=weight_file,
            json_file=json_file,
            nonlinear_mxts_mode=nonlinear_mxts_mode)
    
    print("Compiling scoring functions")
    method_to_scoring_func = OrderedDict()
    for method,model in method_to_model.items():
        print("Compiling scoring function for: "+method)
        method_to_scoring_func[method] = model.get_target_contribs_func(find_scores_layer_idx=0,
                                                                        target_layer_idx=-2)
    
    #To get a function that just gives the gradients, we use the multipliers of the Gradient model
    gradient_func = method_to_model['grad_times_inp'].get_target_multipliers_func(find_scores_layer_idx=0,
                                                                                  target_layer_idx=-2)
                                                                                  
    print("Compiling integrated gradients scoring functions")
    integrated_gradients10_func = deeplift.util.get_integrated_gradients_function(
        gradient_computation_function = gradient_func,
        num_intervals=10)
    method_to_scoring_func['integrated_gradients10'] = integrated_gradients10_func
    
    
    background = OrderedDict([('A', 0.3), ('C', 0.2), ('G', 0.2), ('T', 0.3)])
    
    from collections import OrderedDict
    
    method_to_task_to_scores = OrderedDict()
    for method_name, score_func in method_to_scoring_func.items():
        print("on method",method_name)
        method_to_task_to_scores[method_name] = OrderedDict()
        for task_idx in [0]:
            scores = np.array(score_func(
                        task_idx=task_idx,
                        input_data_list=[onehot],
                        input_references_list=[
                         np.array([background['A'],
                                   background['C'],
                                   background['G'],
                                   background['T']])[None,None,:]],
                        batch_size=200,
                        progress_update=None))
            assert scores.shape[2]==4
            scores = np.sum(scores, axis=2)
            method_to_task_to_scores[method_name][task_idx] = scores
    return method_to_task_to_scores
コード例 #29
0
prepared_proms = np.expand_dims(np.array(encoded_proms, dtype=np.float32), axis=3)
prepared_shuff_proms = np.expand_dims(np.array(encoded_shuff_proms, dtype=np.float32), axis=3)
print(prepared_proms.shape)
print(prepared_shuff_proms.shape)
print(classes.shape)


model = models.load_model('/nam-99/ablage/nam/peleke/Models/model2020-07-30150217.h5')

predictions = np.argmax(model.predict(prepared_proms), axis=1)
print(predictions)
actual = np.argmax(classes, axis=1)
print('Predictions done')

deeplift_model =\
    kc.convert_model_from_saved_files('/nam-99/ablage/nam/peleke/Models/model2020-07-30150217.h5',
                                      nonlinear_mxts_mode=NonlinearMxtsMode.DeepLIFT_GenomicsDefault)

deeplift_contrib_func = deeplift_model.get_target_contribs_func(find_scores_layer_idx=0,
                                                                target_layer_idx=-2)
# Calculate contributions scores for tps and tns
tp = []
tp_shuff = []
tn = []
tn_shuff = []

for pred, true, enc_seq, enc_shuf_seq in zip(predictions, actual, prepared_proms, prepared_shuff_proms):
    if pred == 1 and true == 1:
        tp.append(enc_seq)
        tp_shuff.append(enc_shuf_seq)
    elif pred == 0 and true == 0:
        tn.append(enc_seq)
コード例 #30
0
def compute_deeplift_scores(dataset, X, Y, keras_model_file, reference_label,
                            non_reference_label, base_neuron_label, mask,
                            gpu_id, threshold, percentage_cutoff):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    X_masked, mapping = get_masked_data(X, mask)
    Y = np.argmax(Y, axis=1)

    task_id = base_neuron_label

    find_scores_layer_idx = 0

    mode = 'average'

    reference = get_reference(mode, reference_label, X_masked, Y)
    deeplift_model = kc.convert_model_from_saved_files(
        keras_model_file,
        nonlinear_mxts_mode=deeplift.layers.NonlinearMxtsMode.
        DeepLIFT_GenomicsDefault)

    deeplift_contribs_func = deeplift_model.get_target_contribs_func(
        find_scores_layer_idx=find_scores_layer_idx, target_layer_idx=-2)

    scores = np.array(
        deeplift_contribs_func(
            task_idx=task_id,
            input_references_list=reference,
            input_data_list=[X_masked[Y == non_reference_label]],
            batch_size=10,
            progress_update=10))

    sum_scores = np.zeros(X_masked.shape[1])

    for score in scores:
        sum_scores += score

    padded_sum_scores = get_padded_data(sum_scores, mapping)
    print("Reshaping scores ")
    full_matrix = np.zeros((264, 264))
    full_matrix[np.triu_indices(264, 1)] = padded_sum_scores
    full_matrix_T = full_matrix.T
    full_matrix = full_matrix + full_matrix_T - np.diag(np.diag(full_matrix_T))

    if not os.path.isdir('./important_features/'):
        print("Folder that will store the results cannot be found.")
        print("Creating the results folder in " + './important_features/')
        os.makedirs('./important_features/')

    np.savetxt('./important_features/' + dataset +
               '_scores_deeplift_reduced_r_' + str(threshold) + '_t_' +
               str(percentage_cutoff) + '.csv',
               np.transpose(np.array(scores)),
               delimiter=',')

    print('Writing reshaped scores')
    np.savetxt('./important_features/' + dataset +
               '_scores_reshaped_reduced_r_' + str(threshold) + '_t_' +
               str(percentage_cutoff) + '.csv',
               full_matrix,
               delimiter=",")

    selected_features_matrix = get_crucial_features_abs(
        full_matrix, percentage_cutoff)
    np.fill_diagonal(selected_features_matrix, 0)

    selected_features_file = './important_features/' + dataset + '_deeplift_features_nodes_r_' + str(
        threshold) + '_t_' + str(percentage_cutoff) + '.csv'
    np.savetxt(selected_features_file, selected_features_matrix)

    return selected_features_file, flatten_data(selected_features_matrix)
コード例 #31
0
ファイル: run_deeplift.py プロジェクト: kttian/deep_tfbinding
keras_model.load_weights(keras_model_weights)



# ## Prepare the deeplift models
# 
# ### Model conversion
# 
# Convert the keras models to a deeplift model capable of computing importance
# scores using DeepLIFT.

# In[5]:


deeplift_model = kc.convert_model_from_saved_files(
                     h5_file=keras_model_weights,
                     json_file=keras_model_json)


'''
# ### Sanity checks
# To ensure that the conversion happend correctly, ensure that the models give
# identical predictions
# 
# If you are using a functional model, see this issue for how to adapt the
# code: https://github.com/kundajelab/deeplift/issues/54

# In[6]:

#make sure predictions are the same as the original model
from deeplift.util import compile_func
コード例 #32
0
    def __init__(self,
                 model,
                 output_layer,
                 task_idx,
                 preact=True,
                 mxts_mode='rescale_conv_revealcancel_fc',
                 batch_size=32):
        """
        Args:
          model: Kipoi model
          output_layer (int): selected Keras layer with respect to which the
             scores should be calculated
          task_idx (int): Node/Neuron within the selected layer with respect
             to which the score should be calculated
          preact: !NOT YET IMPLEMENTED! Use values prior to activation - for now
             the default is True!
          mxts_mode: Selected score
          batch_size: Batch size for scoring
        """
        from deeplift.conversion import kerasapi_conversion as kc
        from deeplift.layers import NonlinearMxtsMode

        if not preact:
            print("preact is not implemented yet")

        def get_mxts_mode(mode_name):
            # Labels from examples:
            mxts_modes = {
                'rescale_conv_revealcancel_fc':
                NonlinearMxtsMode.DeepLIFT_GenomicsDefault,
                'revealcancel_all_layers': NonlinearMxtsMode.RevealCancel,
                'rescale_all_layers': NonlinearMxtsMode.Rescale,
                'grad_times_inp': NonlinearMxtsMode.Gradient,
                'guided_backprop': NonlinearMxtsMode.GuidedBackprop
            }
            return mxts_modes[mode_name]

        self.model = model
        if not self.is_compatible(model):
            raise Exception("Model not compatible with DeepLift")

        self.task_idx = task_idx
        self.batch_size = batch_size

        weight_f = tempfile.mktemp()
        arch_f = tempfile.mktemp()
        model.model.save_weights(weight_f)
        with open(arch_f, "w") as ofh:
            ofh.write(model.model.to_json())
        self.deeplift_model = kc.convert_model_from_saved_files(
            weight_f,
            json_file=arch_f,
            nonlinear_mxts_mode=get_mxts_mode(mxts_mode))

        # TODO this code may be useful for future when functional models can be handled too
        self.input_layer_idxs = [0]
        self.output_layers_idxs = [-1]
        """
        input_names = self.model._get_feed_input_names()
        self.input_layer_idxs = []
        self.output_layers_idxs = []
        for input_name in input_names:
            input_layer_name = input_name[:-len("_input")] if input_name.endswith("_input") else input_name
            for i, l in enumerate(self.model.model.layers):
                if l.name == input_layer_name:
                    self.input_layer_idxs.append(i)
        """

        self.fwd_predict_fn = None

        # Now try to find the correct layer:
        if not isinstance(output_layer, int):
            raise Exception(
                "output_layer has to be an integer index of the Keras layer in the Keras model."
            )

        # TODO: DeepLIFT does not guarantee that the layer naming recapitulates the Keras layer order.
        if output_layer < 0:
            output_layer = len(model.model.layers) + output_layer
        target_layer_idx = [
            i for i, l in enumerate(self.deeplift_model.get_layers())
            if l.name == str(output_layer)
        ][0]

        # Compile the function that computes the contribution scores
        # For sigmoid or softmax outputs, target_layer_idx should be -2 (the default)
        # (See "3.6 Choice of target layer" in https://arxiv.org/abs/1704.02685 for justification)
        # For regression tasks with a linear output, target_layer_idx should be -1
        # (which simply refers to the last layer)
        # If you want the DeepLIFT multipliers instead of the contribution scores, you can use get_target_multipliers_func
        self.deeplift_contribs_func = self.deeplift_model.get_target_contribs_func(
            find_scores_layer_idx=self.input_layer_idxs,
            target_layer_idx=target_layer_idx)
コード例 #33
0
def plotPromoters():

    ########################
    #command line arguments#
    ########################

    parser = argparse.ArgumentParser()

    #PARAMETERS
    parser.add_argument(
        "--sequences",
        help="Full path to a fasta-file containing the promoter sequences.",
        type=str)
    parser.add_argument("--outdir", help="Full path to the output directory.")
    parser.add_argument(
        "--N",
        help=
        "How many references are used for averaging single signal sequence contributions.",
        type=int,
        default=10)
    parser.add_argument("--model",
                        help="Full path to the trained keras model.",
                        type=str,
                        default=None)
    parser.add_argument(
        "--background",
        help="Full path to a fasta-file containing the background sequences.",
        type=str)
    parser.add_argument("--target_layer",
                        help="Target layer index for deeplift (default=-3).",
                        type=int,
                        default=-3)
    parser.add_argument("--ylim",
                        help="Limits for y-axis.",
                        type=float,
                        nargs=2,
                        default=None)
    parser.add_argument(
        "--labels",
        help=
        "Full path to a file containing labels used as figure titles. If not given, use fasta IDs.",
        type=str,
        default=None)
    parser.add_argument("--logoType",
                        help="Logo image file extension (default=pdf).",
                        type=str,
                        default='pdf',
                        choices=['png', 'pdf'])

    args = parser.parse_args()

    #reading in the promoter sequences
    ids = []
    signal = []
    signal_seq = []
    for seq in pyfastx.Fasta(args.sequences):
        ids.append(seq.name)
        signal_seq.append(str(seq.seq).upper())
    #and one-hot encoding
    for i in range(0, len(signal_seq)):
        signal.append(vectorizeSequence(signal_seq[i]))
    signal = np.array(signal)

    #reading in the background sequences
    bg = []
    for seq in pyfastx.Fasta(args.background):
        bg.append(str(seq.seq).upper())
    #and one-hot encoding
    for i in range(0, len(bg)):
        bg[i] = vectorizeSequence(bg[i])
    bg = np.array(bg)

    #reading in labels if given
    if args.labels != None:
        labels = []
        f = open(args.labels, 'rt')
        for row in f:
            labels.append(row)
        f.close()
    else:
        labels = ids

    #initialize the deeplift model
    deeplift_model = kc.convert_model_from_saved_files(
        args.model,
        nonlinear_mxts_mode=deeplift.layers.NonlinearMxtsMode.
        DeepLIFT_GenomicsDefault)
    find_scores_layer_idx = 0  #computes importance scores for inpur layer input
    deeplift_contribs_func = deeplift_model.get_target_contribs_func(
        find_scores_layer_idx=find_scores_layer_idx,
        target_layer_idx=args.target_layer)

    #and then score each sequence against args.N different background sequences
    scores = np.zeros(shape=(args.N, signal.shape[0], signal.shape[1]))

    for i in range(0, args.N):
        scores[i, :, :] = np.sum(deeplift_contribs_func(
            task_idx=1,
            input_data_list=[signal],
            input_references_list=[bg[:signal.shape[0], :, :]],
            batch_size=10,
            progress_update=None),
                                 axis=2)
        bg = np.roll(bg, 1, axis=0)

    scores = np.mean(scores, axis=0)

    #now the contributions have been calculated, next plotting the sequence logos weighted by the contributions
    for ind in range(0, len(signal_seq)):
        #first plotting the sequence
        seq = signal_seq[ind]
        fig, ax = plt.subplots()
        matrix_df = lm.saliency_to_matrix(
            seq, scores[ind, :])  #pd.DataFrame(scores[i,:])
        logo = lm.Logo(df=matrix_df, color_scheme='classic')
        logo.ax.set_xlabel('position')
        logo.ax.set_ylabel('contribution')
        title = labels[ind]
        logo.ax.set_title(title)
        if args.ylim != None: logo.ax.set_ylim(args.ylim)
        plt.tight_layout()
        plt.savefig(args.outdir + ids[ind] + '.' + args.logoType,
                    dpi=150,
                    bbox_inches='tight',
                    pad_inches=0)
        plt.close(fig)
        plt.clf()
        plt.cla()

        #and then saving the importance scores to a file
        np.savetxt(args.outdir + ids[ind] + '.txt', scores[ind, :])
コード例 #34
0
def compute_deeplift_scores(
    TARGET_DIRECTORY, dataset, X, Y, 
    keras_model_file, reference_label, non_reference_label, base_neuron_label, 
    mask, gpu_id, dropout, threshold, percentage_cutoff, cluster_mask,
    flags):
    """
    Wrapper function for model reduction, called by main.py
    Uses DeepLIFT to compute saliency scores for feature selection, with the average data used as reference
    See https://github.com/kundajelab/deeplift for DeepLIFT implementation

    Inputs:
    - TARGET_DIRECTORY: general directory path to write files to (str)
    - dataset: choice of dataset, along with seed and fold number (str) 
    - X: Numpy array containing data matrices
    - Y: Numpy array containing data labels
    - keras_model_file: name of existing model file (str)
    - reference_label: how the reference class is represented in Y, usually 0 (int)
    - non_reference_label: how the other class(es) is (are) represented in Y, usually 1 (int)
    - base_neuron_label: label to be used as the base, usually 0 (int)
    - mask: Numpy array, usually initialised as all 1s unless neurons are repeatedly removed
    - gpu_id: ID of GPU to use (int)
    - dropout: fraction of neurons to turn off (float)
    - threshold: usually set as 1.0, represents the previous percentage_cutoff when repeatedly removing neurons (float)
    - percentage_cutoff: usually set as 0.95 to keep 5% of the most significant features (float)
    - cluster_mask: Numpy array containing mask obtained from CLIP
    - flags: used to vary model settings, see main.py (dict)

    Returns:
    - new_model_file: directory path to the new model (str)
    - mask_2D_flattened: Numpy array of 1s and 0s, with 1 representing a selected feature
    """

    keras_model = keras.models.load_model(keras_model_file)
    print(keras_model.summary()) # original model

    deeplift_model = kc.convert_model_from_saved_files(
        keras_model_file, 
        nonlinear_mxts_mode=deeplift.layers.NonlinearMxtsMode.DeepLIFT_GenomicsDefault)
    print(deeplift_model.get_layers())

    mode = 'average'
    X_masked, mapping = get_masked_data(X, mask)
    Y = np.argmax(Y, axis=1)
    reference = get_reference(mode, reference_label, X_masked, Y)
    
    print('+++++++++++++ Computing DeepLIFT scores ++++++++++++++')
    print('previous threshold', threshold, 'new threshold', percentage_cutoff)

    find_scores_layer_idx = 0
    input_scores = np.zeros(X_masked[Y == non_reference_label].shape)
    layer_scores = []
    task_id = base_neuron_label

    for layer_idx, layer in enumerate(deeplift_model.get_layers()):
        if type(layer).__name__ == 'Dense' or type(layer).__name__ == 'Input':
            deeplift_contribs_func = deeplift_model.get_target_contribs_func(find_scores_layer_idx=layer_idx, target_layer_idx=-2)
            scores = np.array(deeplift_contribs_func(task_idx=task_id,
                                                     input_references_list=reference,
                                                     input_data_list=[X_masked[Y == non_reference_label]],
                                                     batch_size=10,
                                                     progress_update=50))
                                                     
            sum_scores = np.zeros(scores.shape[1])
            
            for score in scores:
                sum_scores += score
            
            sum_scores = np.absolute(sum_scores)

            if sum_scores.shape[0] > 2:
                plot_hist(TARGET_DIRECTORY, sum_scores, dataset + '_t_' + str(threshold) + '_layer_' + str(layer_idx)) 
            
            print('layer', layer_idx, 'type is: ', type(layer), 'scores dimensions are: ', scores.shape, 'sum_scores', sum_scores.shape)
            layer_scores.append(sum_scores)
            
            if layer_idx == 0:
                input_scores = np.square(scores)
                layer_scores.append([])

        elif type(layer).__name__ == 'NoOp' or type(layer).__name__ == 'Softmax':
            layer_scores.append([])
            print('layer', layer_idx, 'type is: ', type(layer).__name__)

    alpha = (1 - (percentage_cutoff/threshold))
    new_model, mask_2D = compute_new_reduced_model(keras_model, dropout, layer_scores, 2, alpha, mapping, cluster_mask, flags)

    input_sum_scores = np.zeros(X_masked.shape[1])

    for input_score in input_scores:
        input_sum_scores += input_score

    padded_sum_scores = get_padded_data(input_sum_scores, mapping)
    full_matrix = create_matrix(padded_sum_scores)

    mkdir(TARGET_DIRECTORY + './important_features/')
    np.savetxt(
        TARGET_DIRECTORY + './important_features/' + dataset + '_scores_deeplift_reduced_r_' + str(threshold) + '_t_' + str(percentage_cutoff) + '.csv', 
        np.transpose(np.array(input_scores)), delimiter= ',')
    np.savetxt(
        TARGET_DIRECTORY + './important_features/' + dataset + '_scores_reshaped_reduced_r_' + str(threshold) + '_t_' + str(percentage_cutoff) + '.csv', 
        full_matrix, delimiter=",")
    np.savetxt(
        TARGET_DIRECTORY + './important_features/' + dataset + '_deeplift_features_nodes_r_' + str(threshold) + '_t_' + str(percentage_cutoff) + '.csv', 
        mask_2D)
    
    mkdir(TARGET_DIRECTORY + './reduced_models/')
    new_model_file = TARGET_DIRECTORY + './reduced_models/' + dataset + '_from_' + str(threshold) + '_to_' + str(percentage_cutoff) + '.h5'
    new_model.save(new_model_file)

    os.remove(keras_model_file)

    mask_2D_flattened = corr_mx_flatten_single(mask_2D)
    
    return new_model_file, mask_2D_flattened
コード例 #35
0
#esaved_model_file = 'keras2_mnist_cnn_allconv.h5'
saved_model_file = 'mnist_my_cnn_model.h5'
keras_model = keras.models.load_model(saved_model_file)
keras_model.summary()

from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_test = X_test[:, :, :, None]

import deeplift
from deeplift.layers import NonlinearMxtsMode
from deeplift.conversion import kerasapi_conversion as kc

#Three different models, one each for RevealCancel, Gradient and GuidedBackprop
revealcancel_model = kc.convert_model_from_saved_files(
    h5_file=saved_model_file,
    nonlinear_mxts_mode=NonlinearMxtsMode.RevealCancel)
grad_model = kc.convert_model_from_saved_files(
    h5_file=saved_model_file, nonlinear_mxts_mode=NonlinearMxtsMode.Gradient)
guided_backprop_model = kc.convert_model_from_saved_files(
    h5_file=saved_model_file,
    nonlinear_mxts_mode=NonlinearMxtsMode.GuidedBackprop)

from deeplift.util import compile_func
import numpy as np
from keras import backend as K

deeplift_model = revealcancel_model
deeplift_prediction_func = compile_func(
    [deeplift_model.get_layers()[0].get_activation_vars()],
    deeplift_model.get_layers()[-1].get_activation_vars())
コード例 #36
0
import deeplift
from deeplift.conversion import kerasapi_conversion as kc
from deeplift.blobs import NonlinearMxtsMode
from deeplift.util import get_integrated_gradients_function
"""
    Created by Mohsen Naghipourfar on 6/14/18.
    Email : [email protected] or [email protected]
    Website: http://ce.sharif.edu/~naghipourfar
    Github: https://github.com/naghipourfar
    Skype: mn7697np
"""

# Code = keras.models.load_model("./classifier.h5")

deeplift_model = kc.convert_model_from_saved_files(
    "./classifier-noBatchNorm-noGaussian.h5",
    nonlinear_mxts_mode=NonlinearMxtsMode.Gradient)
print(deeplift_model.get_name_to_layer().keys())

gradient_function = deeplift_model.get_target_multipliers_func(
    find_scores_layer_name="input_1_0",
    pre_activation_target_layer_name="preact_dense_5_0")

integrated_gradient_5 = get_integrated_gradients_function(gradient_function, 5)

x = pd.read_csv("../Data/fpkm_normalized.csv", header=None)

for task_idx in range(1):
    print("\tComputing scores for task: " + str(task_idx))
    scores = np.array(
        integrated_gradient_5(task_idx=task_idx,
コード例 #37
0
file = open('Results/PICKLE2020-05-26215517', 'rb')
tested_IDs = pickle.load(file)[0]
testing_indices = [geneIDs.index(gene) for gene in tested_IDs]

test_data = one_hot_seq[testing_indices]
test_data_shuffled = one_hot_dinuc_shuff_seq[testing_indices]
test_categories = np.argmax(categories[testing_indices], axis=1)

# Load model and make predictions
model = models.load_model('Results/model2020-05-26215517.h5')
predictions = np.argmax(model.predict(test_data), axis=1)

print('Predictions done')
print(predictions)
deeplift_model =\
    kc.convert_model_from_saved_files('Results/model2020-05-26215517.h5',
                                      nonlinear_mxts_mode=NonlinearMxtsMode.DeepLIFT_GenomicsDefault)

deeplift_contribs_func = deeplift_model.get_target_contribs_func(
    find_scores_layer_idx=0, target_layer_idx=-2)

print('deeplift sound')
# calculate deeplift for each gene (only tp and tn)

tp_data = []
tn_data = []
tp_shuf_data = []
tn_shuf_data = []

for j, indx in enumerate(testing_indices):
    test_gene = geneIDs[indx]
    if test_categories[j] == 1 and predictions[j] == 1: