def meme_generate(W, output_file='meme.txt', prefix='filter', factor=None): # background frequency nt_freqs = [1. / 4 for i in range(4)] # open file for writing f = open(output_file, 'w') # print intro material f.write('MEME version 4\n') f.write('\n') f.write('ALPHABET= ACGT\n') f.write('\n') f.write('Background letter frequencies:\n') f.write('A %.4f C %.4f G %.4f T %.4f \n' % tuple(nt_freqs)) f.write('\n') for j in range(len(W)): if factor: pwm = utils.normalize_pwm(W[j], factor=factor) else: pwm = W[j] f.write('MOTIF %s%d \n' % (prefix, j)) f.write('letter-probability matrix: alength= 4 w= %d nsites= %d \n' % (pwm.shape[1], pwm.shape[1])) for i in range(pwm.shape[1]): f.write('%.4f %.4f %.4f %.4f \n' % tuple(pwm[:, i])) f.write('\n') f.close()
def fom_saliency_mul(X, layer, alphabet, nntrainer, sess, ax, title='notitle'): ''' requires that deepomics is being used and the appropriate architecture has already been constructed Must first initialize the session and set best parameters layer is the activation layer we want to use as a string figsize is the figure size we want to use''' #first mutate the sequence X_mut = mutate(X, X.shape[1], X.shape[3]) #take all the mutations and assign them into a dict for deepomics mutations = {'inputs': X_mut, 'targets': np.ones((X_mut.shape[0], 1))} #Get output or logits activations for the mutations mut_predictions = nntrainer.get_activations(sess, mutations, layer=layer) #take the WT and put it into a dict for deepomics WT = {'inputs': X, 'targets': np.ones((X.shape[0], 1))} #Get output or logits activations for the WT sequence predictions = nntrainer.get_activations(sess, WT, layer=layer) #shape the predictions of the mutations into the shape of a heatmap heat_mut = mut_predictions.reshape(X.shape[1], 4).T #normalize the heat map rearrangement by minusing it by the true prediction score of that test sequence norm_heat_mut = heat_mut - predictions[0] norm_heat_mut = utils.normalize_pwm(norm_heat_mut, factor=4) visualize.plot_seq_pos_saliency(np.squeeze(X).T, norm_heat_mut, alphabet=alphabet, nt_width=400)
def fom_convsal(X, layer, alphabet, convidx, nntrainer, sess, title='notitle', figsize=(15, 2), fig=None, pos=None, idx=None): eps = 1e-7 #choose neuron coordinates within convolution output i2, i3, i4 = convidx #first mutate the sequence X_mut = mutate(X, X.shape[1], X.shape[3]) #take all the mutations and assign them into a dict for deepomics mutations = {'inputs': X_mut, 'targets': np.ones((X_mut.shape[0], 1))} #Get the neurons score for the mutations mut_scores = nntrainer.get_activations(sess, mutations, layer=layer)[:, i2, i3, i4] #take the WT and put it into a dict for deepomics WT = {'inputs': X, 'targets': np.ones((X.shape[0], 1))} #Get activations for the WT sequence WT_score = nntrainer.get_activations(sess, WT, layer=layer)[:, i2, i3, i4] #shape the predictions of the mutations into the shape of a heatmap heat_mut = mut_scores.reshape(X.shape[1], 4).T #normalize the heat map rearrangement by minusing it by the true prediction score of that test sequence norm_heat_mut = (heat_mut - WT_score) + eps norm_heat_mut = utils.normalize_pwm(norm_heat_mut, factor=4) if fig: row, col = pos ax = fig.add_subplot(row, col, idx) if title != 'notitle': ax.set_title(title) ax = visualize.plot_seq_pos_saliency(np.squeeze(X).T, norm_heat_mut, alphabet=alphabet, nt_width=400) else: plt.figure(figsize=figsize) if title != 'notitle': plt.title(title) visualize.plot_seq_pos_saliency(np.squeeze(X).T, norm_heat_mut, alphabet=alphabet, nt_width=400)
def clip_filters(W, threshold=0.5, pad=3): num_filters, _, filter_length = W.shape W_clipped = [] for i in range(num_filters): w = utils.normalize_pwm(W[i], factor=3) entropy = np.log2(4) + np.sum(w * np.log2(w + 1e-7), axis=0) index = np.where(entropy > threshold)[0] if index.any(): start = np.maximum(np.min(index) - pad, 0) end = np.minimum(np.max(index) + pad + 1, filter_length) W_clipped.append(W[i, :, start:end]) else: W_clipped.append(W[i, :, :]) return W_clipped
def entropy_weighted_cosine_distance(X_saliency, X_model): """calculate entropy-weighted cosine distance between normalized saliency map and model""" def cosine_distance(X_norm, X_model): norm1 = np.sqrt(np.sum(X_norm**2, axis=0)) norm2 = np.sqrt(np.sum(X_model**2, axis=0)) dist = np.sum(X_norm * X_model, axis=0) / norm1 / norm2 return dist def entropy(X): information = np.log2(4) - np.sum(-X * np.log2(X + 1e-10), axis=0) return information X_norm = utils.normalize_pwm(X_saliency, factor=3) cd = cosine_distance(X_norm, X_model) model_info = entropy(X_model) tpr = np.sum(model_info * cd) / np.sum(model_info) inv_model_info = -(model_info - 2) inv_cd = -(cd - 1) fpr = np.sum(inv_cd * inv_model_info) / np.sum(inv_model_info) return tpr, fpr
def fom_heatmap(X, layer, alphabet, nntrainer, sess, eps=0): #first mutate the sequence X_mut = mutate(X, X.shape[1], X.shape[3]) #take all the mutations and assign them into a dict for deepomics mutations = {'inputs': X_mut, 'targets': np.ones((X_mut.shape[0], 1))} #Get output or logits activations for the mutations mut_predictions = nntrainer.get_activations(sess, mutations, layer=layer) #take the WT and put it into a dict for deepomics WT = {'inputs': X, 'targets': np.ones((X.shape[0], 1))} #Get output or logits activations for the WT sequence predictions = nntrainer.get_activations(sess, WT, layer=layer) #shape the predictions of the mutations into the shape of a heatmap heat_mut = mut_predictions.reshape(X.shape[1], 4).T #normalize the heat map rearrangement by minusing it by the true prediction score of that test sequence norm_heat_mut = heat_mut - predictions[0] + eps norm_heat_mut = utils.normalize_pwm(norm_heat_mut, factor=4) return (norm_heat_mut)