Beispiel #1
0
def apply_deeplift(keras_model, data, nb_classes):
    # convert the Keras model
    deeplift_model = kc.convert_sequential_model(keras_model, num_dims=2,
        nonlinear_mxts_mode=NonlinearMxtsMode.DeepLIFT)
    guided_backprop_deeplift_model = kc.convert_sequential_model(keras_model, 
        nonlinear_mxts_mode=NonlinearMxtsMode.GuidedBackpropDeepLIFT)

    # get relevant functions
    deeplift_contribs_func = \
        deeplift_model.get_target_contribs_func(find_scores_layer_idx=0)
    guided_backprop_deeplift_func = \
        guided_backprop_deeplift_model.get_target_contribs_func(find_scores_layer_idx=0)

    # input_data_list is a list of arrays for each mode
    # each array in the list are features of cases in the appropriate format
    input_data_list = [data]

    # helper function for running aforementioned functions
    def compute_contribs(func):
        return [np.array(func(task_idx=i, input_data_list=input_data_list, 
            batch_size=10, progress_update=None)) for i in range(nb_classes)]

    # output is a list of arrays...
    # list index = index of output neuron (controlled by task_idx)
    # array has dimensions (k, 784), with k= # of samples, 784= # of features
    deeplift_contribs = compute_contribs(deeplift_contribs_func)
    guided_backprop_deeplift = compute_contribs(guided_backprop_deeplift_func)
    
    return deeplift_contribs, guided_backprop_deeplift
Beispiel #2
0
    def deeplift(self, X, batch_size=200):
        """
        Returns (num_task, num_samples, 1, num_bases, sequence_length) deeplift score array.
        """
        assert len(np.shape(X)) == 4 and np.shape(X)[1] == 1
        from deeplift.conversion import keras_conversion as kc

        # convert to deeplift model and get scoring function
        deeplift_model = kc.convert_sequential_model(self.model, verbose=False)
        score_func = deeplift_model.get_target_contribs_func(
            find_scores_layer_idx=0)
        # use a 40% GC reference
        input_references = [
            np.array([0.3, 0.2, 0.2, 0.3])[None, None, :, None]
        ]
        # get deeplift scores
        deeplift_scores = np.zeros((self.num_tasks, ) + X.shape)
        for i in range(self.num_tasks):
            deeplift_scores[i] = score_func(
                task_idx=i,
                input_data_list=[X],
                batch_size=batch_size,
                progress_update=None,
                input_references_list=input_references)
        return deeplift_scores
Beispiel #3
0
def deepliftAttrs(keras_model, dnaseq):
    '''uses DeepLIFT to determine the attributions for an inputted one-hot
	encoded DNA sequence'''

    import deeplift
    from deeplift.conversion import keras_conversion as kc

    # predict label and identify index of label (in output)
    predict = keras_model.predict(np.array([dnaseq]))
    task_idx = np.argmax(predict)

    deeplift_model = kc.convert_sequential_model(keras_model, \
     nonlinear_mxts_mode=deeplift.blobs.NonlinearMxtsMode.DeepLIFT_GenomicsDefault)

    find_scores_layer_idx = 0

    deeplift_contribs_func = deeplift_model.get_target_contribs_func(
        find_scores_layer_idx=find_scores_layer_idx, target_layer_idx=-2)

    scores = np.array(
        deeplift_contribs_func(task_idx=task_idx,
                               input_data_list=[dnaseq],
                               batch_size=10,
                               progress_update=1000))

    return scores
Beispiel #4
0
def deeplift_contribs_generator(model,
                                X_test,
                                process_X_data_func,
                                nb_features,
                                nb_classes,
                                batch_size,
                                process_X_data_func_args={}):

    # convert Keras model, and get relevant function
    deeplift_model = kc.convert_sequential_model(
        model, num_dims=2, nonlinear_mxts_mode=NonlinearMxtsMode.RevealCancel)
    get_deeplift_contribs = \
        deeplift_model.get_target_contribs_func(find_scores_layer_idx=0)

    # yield a 3D array detailing the deeplift contrib scores
    for X in chunks(X_test, batch_size):
        X = process_X_data_func(X, **process_X_data_func_args)
        batch_size = len(X)
        zeros = [0.0] * batch_size  # reference data
        all_batch_contribs = np.zeros((nb_classes, batch_size, nb_features))

        for c in range(nb_classes):
            batch_contribs = get_deeplift_contribs(task_idx=c,
                                                   input_data_list=[X],
                                                   input_references_list=zeros,
                                                   batch_size=10,
                                                   progress_update=None)
            all_batch_contribs[c] = batch_contribs

        yield all_batch_contribs
 def test_convert_conv1d_model_forward_prop(self):
     deeplift_model = kc.convert_sequential_model(model=self.keras_model)
     deeplift_fprop_func = compile_func(
         [deeplift_model.get_layers()[0].get_activation_vars()],
         deeplift_model.get_layers()[-1].get_activation_vars())
     np.testing.assert_almost_equal(deeplift_fprop_func(self.inp),
                                    self.keras_output_fprop_func(self.inp),
                                    decimal=6)
Beispiel #6
0
def compute_importance(model, sequences, tasks,
                       score_type='gradient_input',
                       find_scores_layer_idx=0,
                       target_layer_idx=-2,
                       reference_gc=0.46,
                       reference_shuffle_type=None,
                       num_refs_per_seq=10,
                       seed=1):
    """
    reference_shuffle_type in ['random', 'dinuc']
    reference_gc = 0 will return numpy array of 0s
    reference_gc < 1 will assign each G and C reference_gc/2
    """

    ### Compute Importance scores
    print('Calculating Importance Scores')

    importance_method = {
        "deeplift": deeplift.blobs.NonlinearMxtsMode.DeepLIFT_GenomicsDefault,
        "rescale_all_layers": deeplift.blobs.NonlinearMxtsMode.Rescale,
        "revealcancel_all_layers": deeplift.blobs.NonlinearMxtsMode.RevealCancel,
        "gradient_input": deeplift.blobs.NonlinearMxtsMode.Gradient,
        "guided_backprop": deeplift.blobs.NonlinearMxtsMode.GuidedBackprop,
        "deconv": deeplift.blobs.NonlinearMxtsMode.DeconvNet
    }

    importance_model = kc.convert_sequential_model(model,
                        nonlinear_mxts_mode=importance_method[score_type])

    importance_func = importance_model.get_target_contribs_func(
                                find_scores_layer_idx=find_scores_layer_idx,
                                target_layer_idx=target_layer_idx)

    (reference, new_importance_func) = get_reference(sequences, importance_func, 
                                                     gc_fraction=reference_gc, 
                                                     shuffle=reference_shuffle_type,
                                                     seed=seed)

    importance_score_dict = {}
    for task in tasks:
        if reference is None:
            import dfim
            import dfim.util
            reload(dfim.util)
            seq_fastas = dfim.util.convert_one_hot_to_fasta(sequences)
            scores = np.array(new_importance_func(task_idx=task, # was 0
                                                  input_data_sequences=seq_fastas,
                                                  num_refs_per_seq=num_refs_per_seq,
                                                  batch_size=10,
                                                  progress_update=1000))
        else:
            scores = np.array(new_importance_func(task_idx=task,
                                                  input_data_list=[sequences],
                                                  batch_size=10,
                                                  progress_update=1000,
                                                  input_references_list=[reference]))
        importance_score_dict[task] = scores * sequences
    return importance_score_dict
Beispiel #7
0
 def test_batch_norm_convert_model_fprop(self): 
     deeplift_model = kc.convert_sequential_model(model=self.keras_model)
     deeplift_fprop_func = theano.function(
                 [deeplift_model.get_layers()[0].get_activation_vars()],
                 deeplift_model.get_layers()[-1].get_activation_vars(),
                 allow_input_downcast=True)
     np.testing.assert_almost_equal(deeplift_fprop_func(self.inp),
                                self.keras_output_fprop_func(self.inp),
                                decimal=6)
Beispiel #8
0
 def test_batch_norm_convert_model_backprop(self): 
     deeplift_model = kc.convert_sequential_model(model=self.keras_model)
     deeplift_multipliers_func = deeplift_model.\
                                  get_target_multipliers_func(
                                   find_scores_layer_idx=0,
                                   target_layer_idx=-1)
     np.testing.assert_almost_equal(
         deeplift_multipliers_func(task_idx=0,
                                   input_data_list=[self.inp],
                                   batch_size=10,
                                   progress_update=None),
         self.grad_func(self.inp), decimal=6)
 def test_convert_conv1d_model_compute_scores(self):
     deeplift_model = kc.convert_sequential_model(model=self.keras_model)
     deeplift_contribs_func = deeplift_model.\
                                  get_target_contribs_func(
                                   find_scores_layer_idx=0,
                                   target_layer_idx=-2)
     np.testing.assert_almost_equal(
         deeplift_contribs_func(task_idx=0,
                                input_data_list=[self.inp],
                                batch_size=10,
                                progress_update=None),
         #when biases are 0 and ref is 0, deeplift is the same as grad*inp
         self.grad_func(self.inp) * self.inp,
         decimal=6)
Beispiel #10
0
def main():
    if len(sys.argv) < 3:
        print "you must call program as: python filtermap.py <rootdir> <resultdir>"
        sys.exit(1)
    savename ='deepliftmap_all'
    method = '16_G_'
    rootdir= sys.argv[1]
    resultdir=sys.argv[2]
    dir_ls=os.listdir(rootdir)
    dir_ls.sort()
    weight_file=method+'bestmodel_weights.h5'
    architecture_file=method+'best_archit.json'
    for x in range (len(dir_ls)):
       if dir_ls[x][0:15]=='wgEncodeAwgTfbs':
         print(dir_ls[x])
         if os.path.exists(join(resultdir,dir_ls[x],savename)):
            print('Exist!skip!')
         else:
            if not exists(resultdir) or not os.path.exists(join(resultdir,dir_ls[x])):
                print('Directory not exits!')
            else:
		inputdir=join(basedir,dir_ls[x],'CV0','data')#,'train.h5.batch1')
		files=subprocess.check_output("ls "+inputdir+"/train.h5.batch*", shell=True).split('\n')[0:-1]
    		input_data=np.asarray([]).reshape((0,4,1,101))
    		label_all=np.asarray([]).reshape((0,1))
		for batchfile in files:
			fi = h5py.File(batchfile, 'r')
			dataset = np.asarray(fi['data'])
			label=np.asarray(fi['label'])
			input_data=np.append(input_data,dataset,axis=0)
			label_all=np.append(label_all,label,axis=0)
		label_all=label_all.reshape(len(label_all))
		dataset=input_data[label_all>0]
		print dataset.shape
		x_in=dataset#[0:10000]
                model = model_from_json(open(join(rootdir,dir_ls[x],architecture_file)).read())
	        model.load_weights(join(rootdir,dir_ls[x],weight_file))
		keras_model=model
		deeplift_model = kc.convert_sequential_model(keras_model,nonlinear_mxts_mode=deeplift.blobs.NonlinearMxtsMode.DeepLIFT_GenomicsDefault)
		find_scores_layer_idx=2
		deeplift_contribs_func = deeplift_model.get_target_contribs_func(find_scores_layer_idx=find_scores_layer_idx,target_layer_idx=-2)	
		scores = np.array(deeplift_contribs_func(task_idx=0,input_data_list=[x_in], batch_size=10,progress_update=1000))
		s=scores.shape
		score=scores.reshape((s[0],s[1],s[3]))
		deepmap=np.max(score,axis=2)
		print deepmap.shape
		np.savetxt(join(resultdir,dir_ls[x],savename),deepmap,delimiter='\t',fmt='%.5f')
Beispiel #11
0
 def deeplift(self, X, batch_size=200):
     """
     Returns (num_task, num_samples, 1, num_bases, sequence_length) deeplift score array.
     """
     assert len(np.shape(X)) == 4 and np.shape(X)[1] == 1
     from deeplift.conversion import keras_conversion as kc
     from deeplift.blobs import NonlinearMxtsMode,DenseMxtsMode
     # normalize sequence convolution weights
     kc.mean_normalise_first_conv_layer_weights(self.model, True,None)
     # run deeplift
     deeplift_model = kc.convert_sequential_model(
         self.model, nonlinear_mxts_mode=NonlinearMxtsMode.DeepLIFT)
     target_contribs_func = deeplift_model.get_target_contribs_func(
         find_scores_layer_idx=0)
     return np.asarray([
         target_contribs_func(task_idx=i, input_data_list=[X],
                              batch_size=batch_size, progress_update=None)
         for i in range(self.num_tasks)])
Beispiel #12
0
    def deeplift(self,
                 X,
                 keras_model_weights,
                 keras_model_json,
                 batch_size=128):
        """
        Returns (num_task, num_samples, 1, num_bases, sequence_length) deeplift score array.
        """
        if sys.version_info[0] != 2:
            raise RuntimeError("DeepLIFT requires Python2!")
        #assert len(np.shape(X)) == 4 and np.shape(X)[1] == 1
        from deeplift.conversion import keras_conversion as kc
        from deeplift.blobs import NonlinearMxtsMode

        #load the keras model
        keras_model = kc.load_keras_model(weights=keras_model_weights,
                                          json=keras_model_json)

        # normalize sequence convolution weights
        #kc.mean_normalise_first_conv_layer_weights(self.model, True,None)

        # run deeplift
        deeplift_model = kc.convert_sequential_model(
            #self.model, nonlinear_mxts_mode=NonlinearMxtsMode.Gradient)
            #self.model, nonlinear_mxts_mode=NonlinearMxtsMode.GuidedBackprop)
            #self.model, nonlinear_mxts_mode=NonlinearMxtsMode.DeepLIFT_GenomicsDefault)
            self.model,
            nonlinear_mxts_mode=NonlinearMxtsMode.RevealCancel)

        # compile scoring function
        target_contribs_func = deeplift_model.get_target_contribs_func(
            find_scores_layer_idx=0, target_layer_idx=-2)

        input_reference_shape = tuple([1] + list(X.shape[1:]))
        return np.asarray(
            target_contribs_func(
                task_idx=0,
                input_data_list=[X],
                batch_size=batch_size,
                progress_update=None,
                input_references_list=[np.zeros(input_reference_shape)]))
Beispiel #13
0
    def deeplift(self, X, batch_size=200):
        """
        Returns (num_task, num_samples, 1, num_bases, sequence_length) deeplift score array.
        """
        assert len(np.shape(X)) == 4 and np.shape(X)[1] == 1
        from deeplift.conversion import keras_conversion as kc
        from deeplift.blobs import NonlinearMxtsMode

        # normalize sequence convolution weights
        kc.mean_normalise_first_conv_layer_weights(self.model, True,None)
        # run deeplift
        deeplift_model = kc.convert_sequential_model(
           self.model, nonlinear_mxts_mode=NonlinearMxtsMode.DeepLIFT)
        target_contribs_func = deeplift_model.get_target_contribs_func(
            find_scores_layer_idx=0)
        input_reference_shape = tuple([1] + list(X.shape[1:]))
        return np.asarray([
            target_contribs_func(task_idx=i, input_data_list=[X],
                                 batch_size=batch_size, progress_update=None,
                                 input_references_list=[np.zeros(input_reference_shape)])
            for i in range(self.num_tasks)])
Beispiel #14
0
    def deeplift(self, X, batch_size=200):
        """
        Returns (num_task, num_samples, 1, num_bases, sequence_length) deeplift score array.
        """
        if sys.version_info[0] != 2:
            raise RuntimeError("DeepLIFT requires Python2!")
        assert len(np.shape(X)) == 4 and np.shape(X)[1] == 1
        from deeplift.conversion import keras_conversion as kc
        from deeplift.blobs import NonlinearMxtsMode

        # normalize sequence convolution weights
        kc.mean_normalise_first_conv_layer_weights(self.model, True,None)
        # run deeplift
        deeplift_model = kc.convert_sequential_model(
           self.model, nonlinear_mxts_mode=NonlinearMxtsMode.DeepLIFT)
        target_contribs_func = deeplift_model.get_target_contribs_func(
            find_scores_layer_idx=0)
        input_reference_shape = tuple([1] + list(X.shape[1:]))
        return np.asarray([
            target_contribs_func(task_idx=i, input_data_list=[X],
                                 batch_size=batch_size, progress_update=None,
                                 input_references_list=[np.zeros(input_reference_shape)])
            for i in range(self.num_tasks)])
Beispiel #15
0
def deeplift_contribs_generator(model, X_test, process_X_data_func, 
    nb_features, nb_classes, batch_size, process_X_data_func_args={}):
    
    # convert Keras model, and get relevant function
    deeplift_model = kc.convert_sequential_model(model, num_dims=2,
        nonlinear_mxts_mode=NonlinearMxtsMode.RevealCancel)
    get_deeplift_contribs = \
        deeplift_model.get_target_contribs_func(find_scores_layer_idx=0)

    # yield a 3D array detailing the deeplift contrib scores
    for X in chunks(X_test, batch_size):
        X = process_X_data_func(X, **process_X_data_func_args)
        batch_size = len(X)
        zeros = [0.0] * batch_size # reference data
        all_batch_contribs = np.zeros((nb_classes, batch_size, nb_features))

        for c in range(nb_classes):
            batch_contribs = get_deeplift_contribs(task_idx=c, 
                input_data_list=[X], input_references_list=zeros, batch_size=10, 
                progress_update=None)
            all_batch_contribs[c] = batch_contribs

        yield all_batch_contribs
Beispiel #16
0
    def deeplift(self, X, batch_size=200):
        """
        Returns (num_task, num_samples, 1, num_bases, sequence_length) deeplift score array.
        """
        assert len(np.shape(X)) == 4 and np.shape(X)[1] == 1
        from deeplift.conversion import keras_conversion as kc

        # convert to deeplift model and get scoring function
        deeplift_model = kc.convert_sequential_model(
            self.model, verbose=False)
        score_func = deeplift_model.get_target_contribs_func(
            find_scores_layer_idx=0)
        # use a 40% GC reference
        input_references = [np.array([0.3, 0.2, 0.2, 0.3])[None, None, :, None]]
        # get deeplift scores
        deeplift_scores = np.zeros((self.num_tasks,) + X.shape)
        for i in range(self.num_tasks):
            deeplift_scores[i] = score_func(
                task_idx=i,
                input_data_list=[X],
                batch_size=batch_size,
                progress_update=None,
                input_references_list=input_references)
        return deeplift_scores
Beispiel #17
0
def apply_deeplift(keras_model,
                   data,
                   input_layer_index=0,
                   target_layer_index=-2,
                   class_index=None):
    # Convert the Keras model
    # NonlinearMxtsMode defines the method for computing importance scores. Other supported values are:
    # Gradient, DeconvNet, GuidedBackprop and GuidedBackpropDeepLIFT (a hybrid of GuidedBackprop and DeepLIFT where
    # negative multipliers are ignored during backpropagation)
    deeplift_model = kc.convert_sequential_model(
        keras_model,
        num_dims=len(keras_model.input_shape),
        nonlinear_mxts_mode=NonlinearMxtsMode.DeepLIFT)

    # get relevant functions
    deeplift_contribs_func = \
        deeplift_model.get_target_contribs_func(find_scores_layer_idx=input_layer_index)

    # input_data_list is a list of arrays for each mode
    # each array in the list are features of cases in the appropriate format
    input_data_list = [data]

    # helper function for running aforementioned functions
    def compute_contribs(func):
        return np.array(
            func(task_idx=class_index,
                 input_data_list=input_data_list,
                 batch_size=10,
                 progress_update=None))

    # output is a list of arrays...
    # list index = index of output neuron (controlled by task_idx)
    # array has dimensions (k, 784), with k= # of samples, 784= # of features
    deeplift_contribs = compute_contribs(deeplift_contribs_func)

    return deeplift_contribs
Beispiel #18
0
### A lot of the below is just copied straight from the deeplift
### example of how to use it

#NonlinearMxtsMode defines the method for computing importance scores.
#NonlinearMxtsMode.DeepLIFT_GenomicsDefault uses the RevealCancel rule on Dense layers
#and the Rescale rule on conv layers (see paper for rationale)
#Other supported values are:
#NonlinearMxtsMode.RevealCancel - DeepLIFT-RevealCancel at all layers (used for the MNIST example)
#NonlinearMxtsMode.Rescale - DeepLIFT-rescale at all layers
#NonlinearMxtsMode.Gradient - the 'multipliers' will be the same as the gradients
#NonlinearMxtsMode.GuidedBackprop - the 'multipliers' will be what you get from guided backprop
#Use deeplift.util.get_integrated_gradients_function to compute integrated gradients
#Feel free to email avanti [dot] [email protected] if anything is unclear

deeplift_model = kc.convert_sequential_model(
    amodel,
    nonlinear_mxts_mode=deeplift.blobs.NonlinearMxtsMode.
    DeepLIFT_GenomicsDefault)

#Specify the index of the layer to compute the importance scores of.
#In the example below, we find scores for the input layer, which is idx 0 in deeplift_model.get_layers()
find_scores_layer_idx = 0

#Compile the function that computes the contribution scores
#For sigmoid or softmax outputs, target_layer_idx should be -2 (the default)
#(See "3.6 Choice of target layer" in https://arxiv.org/abs/1704.02685 for justification)
#For regression tasks with a linear output, target_layer_idx should be -1
#(which simply refers to the last layer)
#If you want the DeepLIFT multipliers instead of the contribution scores, you can use get_target_multipliers_func

deeplift_contribs_func = deeplift_model.get_target_contribs_func(
    find_scores_layer_idx=find_scores_layer_idx, target_layer_idx=-1)
Beispiel #19
0
import os.path
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler
import sys

j = sys.argv[1]
network_path = "../networks/network" + str(j) + '/'
keras_model = keras.models.load_model(network_path + "nt3_network" + str(j) +
                                      ".h5")
keras_model.summary()

import deeplift
from deeplift.blobs import NonlinearMxtsMode
from deeplift.conversion import keras_conversion as kc

#Three different models, one each for RevealCancel, Gradient and GuidedBackprop
revealcancel_model = kc.convert_sequential_model(
    model=keras_model, nonlinear_mxts_mode=NonlinearMxtsMode.RevealCancel)
grad_model = kc.convert_sequential_model(
    model=keras_model, nonlinear_mxts_mode=NonlinearMxtsMode.Gradient)
guided_backprop_model = kc.convert_sequential_model(
    model=keras_model, nonlinear_mxts_mode=NonlinearMxtsMode.GuidedBackprop)

### load data
import pandas as pd
import numpy as np
from keras.utils import np_utils

df_test = (pd.read_csv('../data-05-31-2018/formatted_full_data.csv',
                       header=None).values).astype('float32')
df_y_test = df_test[:, 0].astype('int')
seqlen = df_test.shape[1]
Y_test = np_utils.to_categorical(df_y_test, 2)