def __init__(self, no_of_layers, no_of_neurons, epochs, learning_rate, bias, xy, activation): self.no_of_features = self.no_of_features self.no_of_classes = self.no_of_classes self.no_of_layers = no_of_layers self.no_of_neurons = no_of_neurons self.epochs = epochs self.lr = learning_rate self.bias = bias self.activation = activation self.x_train = xy[0] self.x_test = xy[1] self.y_train = xy[2] self.y_test = xy[3] self.bias = bias if self.bias: self.x_train = hp.add_bias(self.x_train) self.x_test = hp.add_bias(self.x_test) self.dim_of_weights = [self.no_of_features ] + self.no_of_neurons + [self.no_of_classes] self.init_network() # print(self.weights) self.fit()
def update(self): self.weights[0] = self.weights[0] + self.lr * np.dot( self.delta[-1].T, self.x_train) for i in range(1, self.no_of_layers + 1): if self.bias: self.f[i - 1] = hp.add_bias(self.f[i - 1]) self.weights[i] = self.weights[i] + self.lr * np.dot( self.delta[-(i + 1)].T, self.f[i - 1])
def main(): argparser = argparse.ArgumentParser( description="FDR significance thresholding for single subject") argparser.add_argument("-embedding_layer", "--embedding_layer", type=str, help="Location of NN embedding (for a layer)") argparser.add_argument("-subject_number", "--subject_number", type=int, default=1, help="subject number (fMRI data) for decoding") argparser.add_argument( "-agg_type", "--agg_type", help="Aggregation type ('avg', 'max', 'min', 'last')", type=str, default='avg') argparser.add_argument( "-random", "--random", action='store_true', default=False, help="True if initialize random brain activations, False if not") argparser.add_argument( "-rand_embed", "--rand_embed", action='store_true', default=False, help="True if initialize random embeddings, False if not") argparser.add_argument( "-glove", "--glove", action='store_true', default=False, help="True if initialize glove embeddings, False if not") argparser.add_argument( "-word2vec", "--word2vec", action='store_true', default=False, help="True if initialize word2vec embeddings, False if not") argparser.add_argument( "-bert", "--bert", action='store_true', default=False, help="True if initialize bert embeddings, False if not") argparser.add_argument( "-normalize", "--normalize", action='store_true', default=False, help="True if add normalization across voxels, False if not") argparser.add_argument("-permutation", "--permutation", action='store_true', default=False, help="True if permutation, False if not") argparser.add_argument( "-permutation_region", "--permutation_region", action='store_true', default=False, help="True if permutation by brain region, False if not") argparser.add_argument( "-which_layer", "--which_layer", help="Layer of interest in [1: total number of layers]", type=int, default=1) argparser.add_argument("-single_subject", "--single_subject", help="if single subject analysis", action='store_true', default=False) argparser.add_argument("-group_level", "--group_level", help="if group level analysis", action='store_true', default=False) argparser.add_argument("-searchlight", "--searchlight", help="if searchlight", action='store_true', default=True) argparser.add_argument("-fdr", "--fdr", help="if apply FDR", action='store_true', default=False) argparser.add_argument("-subjects", "--subjects", help="subject numbers", type=str, default="") ### UPDATE FILE PATHS HERE ### argparser.add_argument( "--fmri_path", default="/n/shieber_lab/Lab/users/cjou/fmri/", type=str, help="file path to fMRI data on the Odyssey cluster") argparser.add_argument( "--to_save_path", default="/n/shieber_lab/Lab/users/cjou/", type=str, help="file path to and create rmse/ranking/llh on the Odyssey cluster") ### UPDATE FILE PATHS HERE ### args = argparser.parse_args() ### check conditions if not args.single_subject and not args.group_level: print("select analysis type: single subject or group level") exit() if args.fdr and args.single_subject and not args.searchlight: print( "not valid application of FDR to single subject with searchlight") exit() if args.group_level and args.subjects == "": print("must specify subject numbers in group level analysis") exit() if not os.path.exists(str(args.to_save_path) + 'mat/'): os.makedirs(str(args.to_save_path) + 'mat/') if not args.glove and not args.word2vec and not args.bert and not args.rand_embed: embed_loc = args.embedding_layer file_name = embed_loc.split("/")[-1].split(".")[0] embedding = scipy.io.loadmat(embed_loc) embed_matrix = helper.get_embed_matrix(embedding) else: embed_loc = args.embedding_layer file_name = embed_loc.split("/")[-1].split(".")[0].split( "-")[-1] + "_layer" + str(args.which_layer) embed_matrix = pickle.load(open(embed_loc, "rb")) if args.word2vec: file_name += "word2vec" elif args.glove: file_name += "glove" elif args.bert: file_name += "bert" else: file_name += "random" if args.single_subject: if args.searchlight: search = "_searchlight" else: search = "" save_location = str( args.to_save_path) + "fdr/" + str(file_name) + "_subj" + str( args.subject_number) + str(search) volmask = pickle.load( open( str(args.to_save_path) + "subj" + str(args.subject_number) + "/volmask.p", "rb")) space_to_index_dict, index_to_space_dict, volmask_shape = get_spotlights( volmask) # 1. z-score print("z-scoring activations and embeddings...") individual_activations = pickle.load( open( "../../examplesGLM/subj" + str(args.subject_number) + "/individual_activations.p", "rb")) z_activations = helper.z_score(individual_activations) z_embeddings = helper.z_score(embed_matrix) # 2. calculate correlation print("calculating correlations...") z_activations = helper.add_bias(z_activations) z_embeddings = helper.add_bias(z_embeddings) correlations, pvals = calculate_pearson_correlation( args, z_activations, z_embeddings) # 3. evaluate significance print("evaluating significance...") valid_correlations, indices, num_voxels = evaluate_performance( args, correlations, pvals, space_to_index_dict, index_to_space_dict, volmask_shape) corrected_coordinates = get_2d_coordinates(valid_correlations, indices, num_voxels) norm_coords = fix_coords_to_absolute_value(corrected_coordinates) _ = helper.transform_coordinates(norm_coords, volmask, save_location, "fdr", pvals=pvals) print("done.") if args.group_level: save_location = str( args.to_save_path) + "fdr/" + str(file_name) + "_group_analysis" subject_numbers = [ int(subj_num) for subj_num in args.subjects.split(",") ] print("loading brain common space...") volmask = helper.load_common_space(subject_numbers) print("VOLMASK SHAPE: " + str(volmask.shape)) space_to_index_dict, index_to_space_dict, volmask_shape = get_spotlights( volmask) print("returned shape: " + str(volmask_shape)) # 1. get all data print("get all data...") fdr_corr_list = [] for subj_num in tqdm(subject_numbers): print("adding subject: " + str(subj_num)) file_name = str(args.to_save_path) + "fdr/" + str( args.agg_type) + "_layer" + str( args.which_layer) + "bert_subj" + str( args.subject_number) + "_searchlight-3dtransform-fdr" fdr_corr = scipy.io.loadmat(file_name + ".mat") fdr_corr_vals = fdr_corr["metric"] common_corr = np.ma.array(fdr_corr_vals, mask=volmask) fdr_corr_list.append(common_corr) # 2. average correlations and pvalues print("calculating correlations...") avg_corrs = np.mean(np.array(fdr_corr_list), axis=0) ttest_pval = np.apply_along_axis(ttest_voxels, 0, np.array(fdr_corr_list)) # 3. save files print("saving files...") scipy.io.savemat(save_location + "-3dtransform-corr.mat", dict(metric=avg_corrs)) scipy.io.savemat(save_location + "-3dtransform-pvals.mat", dict(metric=ttest_pval)) print("done.") return
def linear_model(embed_matrix, spotlight_activations, args, kfold_split, alpha): global predicted_trials predicted = [] if args.brain_to_model: from_regress = np.array(spotlight_activations) to_regress = np.array(embed_matrix) else: from_regress = np.array(embed_matrix) to_regress = np.array(spotlight_activations) print("FROM REGRESS: " + str(from_regress.shape)) print("TO REGRESS: " + str(to_regress.shape)) if args.cross_validation: outer_kf = KFold(n_splits=kfold_split, shuffle=True) errors = [] predicted_trials = np.zeros((to_regress.shape[0], )) llhs = [] rankings = [] if args.add_bias: from_regress = helper.add_bias(from_regress) if args.permutation: np.random.shuffle(from_regress) for train_index, test_index in outer_kf.split(from_regress): greatest_possible_rank = len(test_index) X_train, X_test = from_regress[train_index], from_regress[ test_index] y_train, y_test = to_regress[train_index], to_regress[test_index] # nested CV inner_kf = KFold(n_splits=kfold_split, shuffle=True) alphas = np.logspace(-10, 10, 21, endpoint=True) clf = RidgeCV(alphas=alphas).fit(X_train, y_train) best_alpha = clf.alpha_ # with ridge regression clf = Ridge(alpha=best_alpha) clf.fit(X_train, y_train) y_hat_test = clf.predict(X_test) predicted_trials[test_index] = y_hat_test if args.llh: n = X_train.shape[0] k = X_train.shape[1] y_hat_train = clf.predict(X_train) sigma_train = np.sum((y_hat_train - y_train)**2, axis=0) llh = vectorize_llh(y_hat_test, y_test, sigma_train) llhs.append(llh) if args.ranking and args.model_to_brain: y_hat_test_reshape = y_hat_test.reshape((len(y_hat_test), 1)) y_test_reshape = y_test.reshape((len(y_test), 1)) true_distances = helper.calculate_true_distances( y_hat_test_reshape, y_test_reshape) print("TRUE DISTANCES: " + str(true_distances.shape)) distance_matrix = helper.compute_distance_matrix( y_hat_test_reshape, y_test_reshape) print("DISTANCE MATRIX: " + str(distance_matrix.shape)) rank = helper.calculate_rank(true_distances, distance_matrix) rank_accuracy = 1 - (rank - 1) * 1.0 / (greatest_possible_rank - 1) rankings.append(rank_accuracy) errors = np.sqrt( np.sum(np.abs(np.array(predicted_trials) - to_regress))) return errors.astype( np.float32), predicted_trials, np.sum(llhs).astype( np.float32), np.mean(rankings).astype(np.float32) return
def linear_model(embed_matrix, spotlight_activations, args, kfold_split, alpha): global predicted_trials predicted = [] if args.brain_to_model: from_regress = np.array(spotlight_activations) to_regress = np.array(embed_matrix) else: from_regress = np.array(embed_matrix) to_regress = np.array(spotlight_activations) if args.cross_validation: kf = KFold(n_splits=kfold_split) errors = [] predicted_trials = np.zeros((to_regress.shape[0], to_regress.shape[1])) llhs = [] rankings = [] pvalues = [] if args.add_bias: from_regress = helper.add_bias(from_regress) if args.permutation: np.random.shuffle(from_regress) alphas = np.logspace(-10, 20, 31, endpoint=False) clf = RidgeCV(alphas=alphas).fit(from_regress, to_regress) best_alpha = clf.alpha_ print("BEST ALPHA: " + str(best_alpha)) # best_alpha = 0 # if args.significance: # clf = Ridge(alpha=best_alpha) # score, permutation_scores, pvalue = permutation_test_score(clf, from_regress, to_regress, scoring="neg_mean_squared_error", cv=5, n_permutations=100, n_jobs=1) # pvalues.append(pvalue) for train_index, test_index in kf.split(from_regress): greatest_possible_rank = len(test_index) X_train, X_test = from_regress[train_index], from_regress[test_index] y_train, y_test = to_regress[train_index], to_regress[test_index] # with ridge regression clf = Ridge(alpha=best_alpha) clf.fit(X_train, y_train) y_hat_test = clf.predict(X_test) predicted_trials[test_index] = y_hat_test if args.llh: y_hat_train = clf.predict(X_train) sigma_train = np.sum((y_hat_train - y_train)**2, axis=0) llh = vectorize_llh(y_hat_test, y_test, sigma_train) llhs.append(llh) if args.ranking and args.model_to_brain: true_distances = helper.calculate_true_distances(y_hat_test, y_test) distance_matrix = helper.compute_distance_matrix(y_hat_test, y_test) rank = helper.calculate_rank(true_distances, distance_matrix) rank_accuracy = 1 - (rank - 1) * 1.0 / (greatest_possible_rank - 1) rankings.append(rank_accuracy) errors = np.sqrt(np.sum(np.abs(np.array(predicted_trials) - to_regress))) return errors.astype(np.float32), predicted_trials, np.mean(llhs).astype(np.float64), np.mean(rankings).astype(np.float32), best_alpha return
def forward(self, l): for i in range(self.no_of_layers + 1): l = np.dot(l, self.weights[i].T) self.f[i] = self.activation(l) if self.bias: l = hp.add_bias(l)