def apply_deeplift(keras_model, data, nb_classes): # convert the Keras model deeplift_model = kc.convert_sequential_model(keras_model, num_dims=2, nonlinear_mxts_mode=NonlinearMxtsMode.DeepLIFT) guided_backprop_deeplift_model = kc.convert_sequential_model(keras_model, nonlinear_mxts_mode=NonlinearMxtsMode.GuidedBackpropDeepLIFT) # get relevant functions deeplift_contribs_func = \ deeplift_model.get_target_contribs_func(find_scores_layer_idx=0) guided_backprop_deeplift_func = \ guided_backprop_deeplift_model.get_target_contribs_func(find_scores_layer_idx=0) # input_data_list is a list of arrays for each mode # each array in the list are features of cases in the appropriate format input_data_list = [data] # helper function for running aforementioned functions def compute_contribs(func): return [np.array(func(task_idx=i, input_data_list=input_data_list, batch_size=10, progress_update=None)) for i in range(nb_classes)] # output is a list of arrays... # list index = index of output neuron (controlled by task_idx) # array has dimensions (k, 784), with k= # of samples, 784= # of features deeplift_contribs = compute_contribs(deeplift_contribs_func) guided_backprop_deeplift = compute_contribs(guided_backprop_deeplift_func) return deeplift_contribs, guided_backprop_deeplift
def deeplift(self, X, batch_size=200): """ Returns (num_task, num_samples, 1, num_bases, sequence_length) deeplift score array. """ assert len(np.shape(X)) == 4 and np.shape(X)[1] == 1 from deeplift.conversion import keras_conversion as kc # convert to deeplift model and get scoring function deeplift_model = kc.convert_sequential_model(self.model, verbose=False) score_func = deeplift_model.get_target_contribs_func( find_scores_layer_idx=0) # use a 40% GC reference input_references = [ np.array([0.3, 0.2, 0.2, 0.3])[None, None, :, None] ] # get deeplift scores deeplift_scores = np.zeros((self.num_tasks, ) + X.shape) for i in range(self.num_tasks): deeplift_scores[i] = score_func( task_idx=i, input_data_list=[X], batch_size=batch_size, progress_update=None, input_references_list=input_references) return deeplift_scores
def deepliftAttrs(keras_model, dnaseq): '''uses DeepLIFT to determine the attributions for an inputted one-hot encoded DNA sequence''' import deeplift from deeplift.conversion import keras_conversion as kc # predict label and identify index of label (in output) predict = keras_model.predict(np.array([dnaseq])) task_idx = np.argmax(predict) deeplift_model = kc.convert_sequential_model(keras_model, \ nonlinear_mxts_mode=deeplift.blobs.NonlinearMxtsMode.DeepLIFT_GenomicsDefault) find_scores_layer_idx = 0 deeplift_contribs_func = deeplift_model.get_target_contribs_func( find_scores_layer_idx=find_scores_layer_idx, target_layer_idx=-2) scores = np.array( deeplift_contribs_func(task_idx=task_idx, input_data_list=[dnaseq], batch_size=10, progress_update=1000)) return scores
def deeplift_contribs_generator(model, X_test, process_X_data_func, nb_features, nb_classes, batch_size, process_X_data_func_args={}): # convert Keras model, and get relevant function deeplift_model = kc.convert_sequential_model( model, num_dims=2, nonlinear_mxts_mode=NonlinearMxtsMode.RevealCancel) get_deeplift_contribs = \ deeplift_model.get_target_contribs_func(find_scores_layer_idx=0) # yield a 3D array detailing the deeplift contrib scores for X in chunks(X_test, batch_size): X = process_X_data_func(X, **process_X_data_func_args) batch_size = len(X) zeros = [0.0] * batch_size # reference data all_batch_contribs = np.zeros((nb_classes, batch_size, nb_features)) for c in range(nb_classes): batch_contribs = get_deeplift_contribs(task_idx=c, input_data_list=[X], input_references_list=zeros, batch_size=10, progress_update=None) all_batch_contribs[c] = batch_contribs yield all_batch_contribs
def test_convert_conv1d_model_forward_prop(self): deeplift_model = kc.convert_sequential_model(model=self.keras_model) deeplift_fprop_func = compile_func( [deeplift_model.get_layers()[0].get_activation_vars()], deeplift_model.get_layers()[-1].get_activation_vars()) np.testing.assert_almost_equal(deeplift_fprop_func(self.inp), self.keras_output_fprop_func(self.inp), decimal=6)
def compute_importance(model, sequences, tasks, score_type='gradient_input', find_scores_layer_idx=0, target_layer_idx=-2, reference_gc=0.46, reference_shuffle_type=None, num_refs_per_seq=10, seed=1): """ reference_shuffle_type in ['random', 'dinuc'] reference_gc = 0 will return numpy array of 0s reference_gc < 1 will assign each G and C reference_gc/2 """ ### Compute Importance scores print('Calculating Importance Scores') importance_method = { "deeplift": deeplift.blobs.NonlinearMxtsMode.DeepLIFT_GenomicsDefault, "rescale_all_layers": deeplift.blobs.NonlinearMxtsMode.Rescale, "revealcancel_all_layers": deeplift.blobs.NonlinearMxtsMode.RevealCancel, "gradient_input": deeplift.blobs.NonlinearMxtsMode.Gradient, "guided_backprop": deeplift.blobs.NonlinearMxtsMode.GuidedBackprop, "deconv": deeplift.blobs.NonlinearMxtsMode.DeconvNet } importance_model = kc.convert_sequential_model(model, nonlinear_mxts_mode=importance_method[score_type]) importance_func = importance_model.get_target_contribs_func( find_scores_layer_idx=find_scores_layer_idx, target_layer_idx=target_layer_idx) (reference, new_importance_func) = get_reference(sequences, importance_func, gc_fraction=reference_gc, shuffle=reference_shuffle_type, seed=seed) importance_score_dict = {} for task in tasks: if reference is None: import dfim import dfim.util reload(dfim.util) seq_fastas = dfim.util.convert_one_hot_to_fasta(sequences) scores = np.array(new_importance_func(task_idx=task, # was 0 input_data_sequences=seq_fastas, num_refs_per_seq=num_refs_per_seq, batch_size=10, progress_update=1000)) else: scores = np.array(new_importance_func(task_idx=task, input_data_list=[sequences], batch_size=10, progress_update=1000, input_references_list=[reference])) importance_score_dict[task] = scores * sequences return importance_score_dict
def test_batch_norm_convert_model_fprop(self): deeplift_model = kc.convert_sequential_model(model=self.keras_model) deeplift_fprop_func = theano.function( [deeplift_model.get_layers()[0].get_activation_vars()], deeplift_model.get_layers()[-1].get_activation_vars(), allow_input_downcast=True) np.testing.assert_almost_equal(deeplift_fprop_func(self.inp), self.keras_output_fprop_func(self.inp), decimal=6)
def test_batch_norm_convert_model_backprop(self): deeplift_model = kc.convert_sequential_model(model=self.keras_model) deeplift_multipliers_func = deeplift_model.\ get_target_multipliers_func( find_scores_layer_idx=0, target_layer_idx=-1) np.testing.assert_almost_equal( deeplift_multipliers_func(task_idx=0, input_data_list=[self.inp], batch_size=10, progress_update=None), self.grad_func(self.inp), decimal=6)
def test_convert_conv1d_model_compute_scores(self): deeplift_model = kc.convert_sequential_model(model=self.keras_model) deeplift_contribs_func = deeplift_model.\ get_target_contribs_func( find_scores_layer_idx=0, target_layer_idx=-2) np.testing.assert_almost_equal( deeplift_contribs_func(task_idx=0, input_data_list=[self.inp], batch_size=10, progress_update=None), #when biases are 0 and ref is 0, deeplift is the same as grad*inp self.grad_func(self.inp) * self.inp, decimal=6)
def main(): if len(sys.argv) < 3: print "you must call program as: python filtermap.py <rootdir> <resultdir>" sys.exit(1) savename ='deepliftmap_all' method = '16_G_' rootdir= sys.argv[1] resultdir=sys.argv[2] dir_ls=os.listdir(rootdir) dir_ls.sort() weight_file=method+'bestmodel_weights.h5' architecture_file=method+'best_archit.json' for x in range (len(dir_ls)): if dir_ls[x][0:15]=='wgEncodeAwgTfbs': print(dir_ls[x]) if os.path.exists(join(resultdir,dir_ls[x],savename)): print('Exist!skip!') else: if not exists(resultdir) or not os.path.exists(join(resultdir,dir_ls[x])): print('Directory not exits!') else: inputdir=join(basedir,dir_ls[x],'CV0','data')#,'train.h5.batch1') files=subprocess.check_output("ls "+inputdir+"/train.h5.batch*", shell=True).split('\n')[0:-1] input_data=np.asarray([]).reshape((0,4,1,101)) label_all=np.asarray([]).reshape((0,1)) for batchfile in files: fi = h5py.File(batchfile, 'r') dataset = np.asarray(fi['data']) label=np.asarray(fi['label']) input_data=np.append(input_data,dataset,axis=0) label_all=np.append(label_all,label,axis=0) label_all=label_all.reshape(len(label_all)) dataset=input_data[label_all>0] print dataset.shape x_in=dataset#[0:10000] model = model_from_json(open(join(rootdir,dir_ls[x],architecture_file)).read()) model.load_weights(join(rootdir,dir_ls[x],weight_file)) keras_model=model deeplift_model = kc.convert_sequential_model(keras_model,nonlinear_mxts_mode=deeplift.blobs.NonlinearMxtsMode.DeepLIFT_GenomicsDefault) find_scores_layer_idx=2 deeplift_contribs_func = deeplift_model.get_target_contribs_func(find_scores_layer_idx=find_scores_layer_idx,target_layer_idx=-2) scores = np.array(deeplift_contribs_func(task_idx=0,input_data_list=[x_in], batch_size=10,progress_update=1000)) s=scores.shape score=scores.reshape((s[0],s[1],s[3])) deepmap=np.max(score,axis=2) print deepmap.shape np.savetxt(join(resultdir,dir_ls[x],savename),deepmap,delimiter='\t',fmt='%.5f')
def deeplift(self, X, batch_size=200): """ Returns (num_task, num_samples, 1, num_bases, sequence_length) deeplift score array. """ assert len(np.shape(X)) == 4 and np.shape(X)[1] == 1 from deeplift.conversion import keras_conversion as kc from deeplift.blobs import NonlinearMxtsMode,DenseMxtsMode # normalize sequence convolution weights kc.mean_normalise_first_conv_layer_weights(self.model, True,None) # run deeplift deeplift_model = kc.convert_sequential_model( self.model, nonlinear_mxts_mode=NonlinearMxtsMode.DeepLIFT) target_contribs_func = deeplift_model.get_target_contribs_func( find_scores_layer_idx=0) return np.asarray([ target_contribs_func(task_idx=i, input_data_list=[X], batch_size=batch_size, progress_update=None) for i in range(self.num_tasks)])
def deeplift(self, X, keras_model_weights, keras_model_json, batch_size=128): """ Returns (num_task, num_samples, 1, num_bases, sequence_length) deeplift score array. """ if sys.version_info[0] != 2: raise RuntimeError("DeepLIFT requires Python2!") #assert len(np.shape(X)) == 4 and np.shape(X)[1] == 1 from deeplift.conversion import keras_conversion as kc from deeplift.blobs import NonlinearMxtsMode #load the keras model keras_model = kc.load_keras_model(weights=keras_model_weights, json=keras_model_json) # normalize sequence convolution weights #kc.mean_normalise_first_conv_layer_weights(self.model, True,None) # run deeplift deeplift_model = kc.convert_sequential_model( #self.model, nonlinear_mxts_mode=NonlinearMxtsMode.Gradient) #self.model, nonlinear_mxts_mode=NonlinearMxtsMode.GuidedBackprop) #self.model, nonlinear_mxts_mode=NonlinearMxtsMode.DeepLIFT_GenomicsDefault) self.model, nonlinear_mxts_mode=NonlinearMxtsMode.RevealCancel) # compile scoring function target_contribs_func = deeplift_model.get_target_contribs_func( find_scores_layer_idx=0, target_layer_idx=-2) input_reference_shape = tuple([1] + list(X.shape[1:])) return np.asarray( target_contribs_func( task_idx=0, input_data_list=[X], batch_size=batch_size, progress_update=None, input_references_list=[np.zeros(input_reference_shape)]))
def deeplift(self, X, batch_size=200): """ Returns (num_task, num_samples, 1, num_bases, sequence_length) deeplift score array. """ assert len(np.shape(X)) == 4 and np.shape(X)[1] == 1 from deeplift.conversion import keras_conversion as kc from deeplift.blobs import NonlinearMxtsMode # normalize sequence convolution weights kc.mean_normalise_first_conv_layer_weights(self.model, True,None) # run deeplift deeplift_model = kc.convert_sequential_model( self.model, nonlinear_mxts_mode=NonlinearMxtsMode.DeepLIFT) target_contribs_func = deeplift_model.get_target_contribs_func( find_scores_layer_idx=0) input_reference_shape = tuple([1] + list(X.shape[1:])) return np.asarray([ target_contribs_func(task_idx=i, input_data_list=[X], batch_size=batch_size, progress_update=None, input_references_list=[np.zeros(input_reference_shape)]) for i in range(self.num_tasks)])
def deeplift(self, X, batch_size=200): """ Returns (num_task, num_samples, 1, num_bases, sequence_length) deeplift score array. """ if sys.version_info[0] != 2: raise RuntimeError("DeepLIFT requires Python2!") assert len(np.shape(X)) == 4 and np.shape(X)[1] == 1 from deeplift.conversion import keras_conversion as kc from deeplift.blobs import NonlinearMxtsMode # normalize sequence convolution weights kc.mean_normalise_first_conv_layer_weights(self.model, True,None) # run deeplift deeplift_model = kc.convert_sequential_model( self.model, nonlinear_mxts_mode=NonlinearMxtsMode.DeepLIFT) target_contribs_func = deeplift_model.get_target_contribs_func( find_scores_layer_idx=0) input_reference_shape = tuple([1] + list(X.shape[1:])) return np.asarray([ target_contribs_func(task_idx=i, input_data_list=[X], batch_size=batch_size, progress_update=None, input_references_list=[np.zeros(input_reference_shape)]) for i in range(self.num_tasks)])
def deeplift_contribs_generator(model, X_test, process_X_data_func, nb_features, nb_classes, batch_size, process_X_data_func_args={}): # convert Keras model, and get relevant function deeplift_model = kc.convert_sequential_model(model, num_dims=2, nonlinear_mxts_mode=NonlinearMxtsMode.RevealCancel) get_deeplift_contribs = \ deeplift_model.get_target_contribs_func(find_scores_layer_idx=0) # yield a 3D array detailing the deeplift contrib scores for X in chunks(X_test, batch_size): X = process_X_data_func(X, **process_X_data_func_args) batch_size = len(X) zeros = [0.0] * batch_size # reference data all_batch_contribs = np.zeros((nb_classes, batch_size, nb_features)) for c in range(nb_classes): batch_contribs = get_deeplift_contribs(task_idx=c, input_data_list=[X], input_references_list=zeros, batch_size=10, progress_update=None) all_batch_contribs[c] = batch_contribs yield all_batch_contribs
def deeplift(self, X, batch_size=200): """ Returns (num_task, num_samples, 1, num_bases, sequence_length) deeplift score array. """ assert len(np.shape(X)) == 4 and np.shape(X)[1] == 1 from deeplift.conversion import keras_conversion as kc # convert to deeplift model and get scoring function deeplift_model = kc.convert_sequential_model( self.model, verbose=False) score_func = deeplift_model.get_target_contribs_func( find_scores_layer_idx=0) # use a 40% GC reference input_references = [np.array([0.3, 0.2, 0.2, 0.3])[None, None, :, None]] # get deeplift scores deeplift_scores = np.zeros((self.num_tasks,) + X.shape) for i in range(self.num_tasks): deeplift_scores[i] = score_func( task_idx=i, input_data_list=[X], batch_size=batch_size, progress_update=None, input_references_list=input_references) return deeplift_scores
def apply_deeplift(keras_model, data, input_layer_index=0, target_layer_index=-2, class_index=None): # Convert the Keras model # NonlinearMxtsMode defines the method for computing importance scores. Other supported values are: # Gradient, DeconvNet, GuidedBackprop and GuidedBackpropDeepLIFT (a hybrid of GuidedBackprop and DeepLIFT where # negative multipliers are ignored during backpropagation) deeplift_model = kc.convert_sequential_model( keras_model, num_dims=len(keras_model.input_shape), nonlinear_mxts_mode=NonlinearMxtsMode.DeepLIFT) # get relevant functions deeplift_contribs_func = \ deeplift_model.get_target_contribs_func(find_scores_layer_idx=input_layer_index) # input_data_list is a list of arrays for each mode # each array in the list are features of cases in the appropriate format input_data_list = [data] # helper function for running aforementioned functions def compute_contribs(func): return np.array( func(task_idx=class_index, input_data_list=input_data_list, batch_size=10, progress_update=None)) # output is a list of arrays... # list index = index of output neuron (controlled by task_idx) # array has dimensions (k, 784), with k= # of samples, 784= # of features deeplift_contribs = compute_contribs(deeplift_contribs_func) return deeplift_contribs
### A lot of the below is just copied straight from the deeplift ### example of how to use it #NonlinearMxtsMode defines the method for computing importance scores. #NonlinearMxtsMode.DeepLIFT_GenomicsDefault uses the RevealCancel rule on Dense layers #and the Rescale rule on conv layers (see paper for rationale) #Other supported values are: #NonlinearMxtsMode.RevealCancel - DeepLIFT-RevealCancel at all layers (used for the MNIST example) #NonlinearMxtsMode.Rescale - DeepLIFT-rescale at all layers #NonlinearMxtsMode.Gradient - the 'multipliers' will be the same as the gradients #NonlinearMxtsMode.GuidedBackprop - the 'multipliers' will be what you get from guided backprop #Use deeplift.util.get_integrated_gradients_function to compute integrated gradients #Feel free to email avanti [dot] [email protected] if anything is unclear deeplift_model = kc.convert_sequential_model( amodel, nonlinear_mxts_mode=deeplift.blobs.NonlinearMxtsMode. DeepLIFT_GenomicsDefault) #Specify the index of the layer to compute the importance scores of. #In the example below, we find scores for the input layer, which is idx 0 in deeplift_model.get_layers() find_scores_layer_idx = 0 #Compile the function that computes the contribution scores #For sigmoid or softmax outputs, target_layer_idx should be -2 (the default) #(See "3.6 Choice of target layer" in https://arxiv.org/abs/1704.02685 for justification) #For regression tasks with a linear output, target_layer_idx should be -1 #(which simply refers to the last layer) #If you want the DeepLIFT multipliers instead of the contribution scores, you can use get_target_multipliers_func deeplift_contribs_func = deeplift_model.get_target_contribs_func( find_scores_layer_idx=find_scores_layer_idx, target_layer_idx=-1)
import os.path from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler import sys j = sys.argv[1] network_path = "../networks/network" + str(j) + '/' keras_model = keras.models.load_model(network_path + "nt3_network" + str(j) + ".h5") keras_model.summary() import deeplift from deeplift.blobs import NonlinearMxtsMode from deeplift.conversion import keras_conversion as kc #Three different models, one each for RevealCancel, Gradient and GuidedBackprop revealcancel_model = kc.convert_sequential_model( model=keras_model, nonlinear_mxts_mode=NonlinearMxtsMode.RevealCancel) grad_model = kc.convert_sequential_model( model=keras_model, nonlinear_mxts_mode=NonlinearMxtsMode.Gradient) guided_backprop_model = kc.convert_sequential_model( model=keras_model, nonlinear_mxts_mode=NonlinearMxtsMode.GuidedBackprop) ### load data import pandas as pd import numpy as np from keras.utils import np_utils df_test = (pd.read_csv('../data-05-31-2018/formatted_full_data.csv', header=None).values).astype('float32') df_y_test = df_test[:, 0].astype('int') seqlen = df_test.shape[1] Y_test = np_utils.to_categorical(df_y_test, 2)