def compute_pwcca(acts1, acts2, epsilon=0.): """ Computes projection weighting for weighting CCA coefficients Args: acts1: 2d numpy array, shaped (neurons, num_datapoints) acts2: 2d numpy array, shaped (neurons, num_datapoints) Returns: Original cca coefficient mean and weighted mean """ sresults = cca_core.get_cca_similarity(acts1, acts2, epsilon=epsilon, compute_dirns=False, compute_coefs=True, verbose=False) if np.sum(sresults["x_idxs"]) <= np.sum(sresults["y_idxs"]): dirns = np.dot(sresults["coef_x"], (acts1[sresults["x_idxs"]] - \ sresults["neuron_means1"][sresults["x_idxs"]])) + sresults["neuron_means1"][sresults["x_idxs"]] coefs = sresults["cca_coef1"] acts = acts1 idxs = sresults["x_idxs"] else: dirns = np.dot(sresults["coef_y"], (acts1[sresults["y_idxs"]] - \ sresults["neuron_means2"][sresults["y_idxs"]])) + sresults["neuron_means2"][sresults["y_idxs"]] coefs = sresults["cca_coef2"] acts = acts2 idxs = sresults["y_idxs"] P, _ = np.linalg.qr(dirns.T) weights = np.sum(np.abs(np.dot(P.T, acts[idxs].T)), axis=1) weights = weights / np.sum(weights) return np.sum(weights * coefs), weights, coefs
def SVCCA(activations1, activations2, layer_number): # SVCCA different x # print("Results using SVCCA keeping 60 dims") # load activations acts1 = np.genfromtxt(activations1 + str(layer_number) + '.csv', delimiter=',') acts2 = np.genfromtxt(activations2 + str(layer_number) + '.csv', delimiter=',') # Mean subtract activations cacts1 = acts1 # - np.mean(acts1, axis=0, keepdims=True) cacts2 = acts2 # - np.mean(acts2, axis=0, keepdims=True) # Perform SVD U1, s1, V1 = np.linalg.svd(cacts1, full_matrices=False) U2, s2, V2 = np.linalg.svd(cacts2, full_matrices=False) svacts1 = np.dot( s1[:60] * np.eye(60), V1[:60]) # default: np.dot(s1[:20]*np.eye(20), V1[:20]), 49 # can also compute as svacts1 = np.dot(U1.T[:20], cacts1) svacts2 = np.dot( s2[:60] * np.eye(60), V2[:60]) # default: np.dot(s2[:20]*np.eye(20), V2[:20]), 49 # can also compute as svacts1 = np.dot(U2.T[:20], cacts2) svcca_results = cca_core.get_cca_similarity(svacts1, svacts2, epsilon=1e-10, verbose=False) # 1e-10 # print("Layer Number:", layer_number) # print("SVCCA Correlation Coefficient:", np.mean(svcca_results["cca_coef1"])) return np.mean( svcca_results["cca_coef1"]) # , acts1, cacts1, U1, s1, V1, svacts1
def get_similarity(acts1, acts2, verbose=False, epsilon=1e-10, method='mean'): import pwcca if method == 'all': similarity_dict = cca_core.get_cca_similarity(acts1, acts2, verbose=verbose, epsilon=epsilon) return similarity_dict['cca_coef1'] if method == 'mean': similarity_dict = cca_core.get_cca_similarity(acts1, acts2, verbose=verbose, epsilon=epsilon) return similarity_dict['mean'][0] # contains two times the same value. elif method == 'svcca': similarity_dict = get_svcca_similarity(acts1, acts2, K=10, verbose=verbose, epsilon=epsilon) return similarity_dict['mean'][0] elif method == 'pwcca': pwcca_mean, w, __ = pwcca.compute_pwcca(acts1, acts2, epsilon=epsilon) return pwcca_mean else: raise NotImplementedError(method)
def get_svcca_similarity(acts1, acts2, K=20, verbose=False, epsilon=None): ''' Compute svcca similarity, adapted from tutorial on https://github.com/google/svcca/tree/master/tutorials. ''' cacts1 = acts1 - np.mean(acts1, axis=1, keepdims=True) cacts2 = acts2 - np.mean(acts2, axis=1, keepdims=True) # Perform SVD U1, s1, V1 = np.linalg.svd(cacts1, full_matrices=False) U2, s2, V2 = np.linalg.svd(cacts2, full_matrices=False) svacts1 = np.dot(s1[:K] * np.eye(K), V1[:K]) svacts2 = np.dot(s2[:K] * np.eye(K), V2[:K]) svcca_results = cca_core.get_cca_similarity(svacts1, svacts2, epsilon=epsilon, verbose=verbose) return svcca_results
def fourier_ccas(conv_acts1, conv_acts2, return_coefs=False, compute_dirns=False, verbose=False): """Computes cca similarity between two conv layers with DFT. This function takes in two sets of convolutional activations, conv_acts1, conv_acts2 After resizing the spatial dimensions to be the same, applies fft and then computes the ccas. Finally, it applies the inverse fourier transform to get the CCA directions and neuron coefficients. Args: conv_acts1: numpy array with shape [batch_size, height1, width1, num_channels1] conv_acts2: numpy array with shape [batch_size, height2, width2, num_channels2] compute_dirns: boolean, used to determine whether results also contain actual cca directions. Returns: all_results: a pandas dataframe, with cca results for every spatial location. Columns are neuron coefficients (combinations of neurons that correspond to cca directions), the cca correlation coefficients (how well aligned directions correlate) x and y idxs (for computing cca directions on the fly if compute_dirns=False), and summary statistics. If compute_dirns=True, the cca directions are also computed. """ height1, width1 = conv_acts1.shape[1], conv_acts1.shape[2] height2, width2 = conv_acts2.shape[1], conv_acts2.shape[2] if height1 != height2 or width1 != width2: height = min(height1, height2) width = min(width1, width2) new_size = [height, width] resize = True else: height = height1 width = width1 new_size = None resize = False # resize and preprocess with fft fft_acts1 = fft_resize(conv_acts1, resize=resize, new_size=new_size) fft_acts2 = fft_resize(conv_acts2, resize=resize, new_size=new_size) # loop over spatial dimensions and get cca coefficients all_results = pd.DataFrame() for i in range(height): for j in range(width): results_dict = cca_core.get_cca_similarity(fft_acts1[:, i, j, :].T, fft_acts2[:, i, j, :].T, compute_dirns, verbose=verbose) # apply inverse FFT to get coefficients and directions if specified if return_coefs: results_dict["neuron_coeffs1"] = np.fft.ifft2( results_dict["neuron_coeffs1"]) results_dict["neuron_coeffs2"] = np.fft.ifft2( results_dict["neuron_coeffs2"]) else: del results_dict["neuron_coeffs1"] del results_dict["neuron_coeffs2"] if compute_dirns: results_dict["cca_dirns1"] = np.fft.ifft2( results_dict["cca_dirns1"]) results_dict["cca_dirns2"] = np.fft.ifft2( results_dict["cca_dirns2"]) # accumulate results results_dict["location"] = (i, j) all_results = all_results.append(results_dict, ignore_index=True) return all_results
n_comp1=300, feat_new=['pca' + str(i) for i in range(300)]): pca = PCA(n_components=n_comp1, random_state=42) df_pca = pd.DataFrame(pca.fit_transform(df), columns=feat_new) return (df_pca) df_L1_pc_x = pca_preprocess(df_L1000_x) df_cp_pc_x = pca_preprocess(df_cp_x) """#### - CCA on CP & L1000 train data """ cca_results = cca_core.get_cca_similarity(df_cp_pc_x.values.T, df_L1_pc_x.values.T, epsilon=1e-10, verbose=False) plt.figure(figsize=(12, 8)) sns.set_context('talk', font_scale=0.85) sns.lineplot(x=range(len(cca_results["cca_coef1"])), y=cca_results["cca_coef1"]) plt.title( "CCA correlation coefficients between CP and L1000 canonical variables (300) after PCA" ) print( "Mean Canonical Correlation co-efficient between CP and L1000 canonical variables (300):", np.mean(cca_results["cca_coef1"])) """#### - (Singular Vectors)CCA as a method to analyze the correlation between Cell painting & L1000"""
model1 = cc.get_cnn_model(input_shape, num_classes) model2 = cc.get_dense_model(input_shape, num_classes) history1 = model1.fit(x_train, y_train, epochs=60, batch_size=256, validation_split=0.1) history2 = model2.fit(x_train, y_train, epochs=60, batch_size=256, validation_split=0.1) model_performance = cc.output_model(history1) model_performance.to_csv('performance1.csv') model_performance = cc.output_model(history2) model_performance.to_csv('performance2.csv') act1 = cc.get_acts_from_model(model1, x_train[range(600)]) act2 = cc.get_acts_from_model(model2, x_train[range(600)]) cca = cc.get_cca_similarity(act1, act2, compute_dirns=False) pd.DataFrame(cca['cca_dirns1']).round(3).to_csv('cca1.csv') pd.DataFrame(cca['cca_dirns2']).round(3).to_csv('cca2.csv') #pd.DataFrame(act1).to_csv('acts1.csv') # history = model4.fit(x_train, y_train,epochs=12,batch_size=128,validation_data=(x_test,y_test)) # model_info = cc.output_model(history) # model_info.to_csv('model_history.csv')
def compute_cca(embeddings, descriptors): results = cca_core.get_cca_similarity(embeddings, descriptors, verbose=False, epsilon=1e-20) print(np.mean(results["cca_coef1"]))
if subspace is None: assert (len(subspace.shape) == 2) subspace = next_batch else: subspace = np.concatenate([subspace, next_batch]) fh.close() return subspace.transpose() # return dimensions num_neurons1 x data_points # The models must be run on the same data, with the same sequence length and batch size. if __name__ == "__main__": parser = argparse.ArgumentParser( description='SVCCA on npy files generated by appending each batch.') parser.add_argument('--subspace-file1', type=str, help='location of first subspace saved as .npy') parser.add_argument('--subspace-file2', type=str, help='location of second subspace saved as .npy') parser.add_argument('--results-file', type=str, help='location to save mean cca scores') args = parser.parse_args() results_file = open(args.results_file, 'a') print(cca_core.get_cca_similarity(load_subspace(args.subspace_file1), load_subspace( args.subspace_file2))["mean"], file=results_file)