def generate_file_name(args, subject_number, which_layer, glove=False, word2vec=False, baseline=False): direction, validate, rlabel, elabel, glabel, w2vlabel, bertlabel, plabel, prlabel = helper.generate_labels(args) if args.bert or args.word2vec or args.glove: specific_file = str(plabel) + str(prlabel) + str(rlabel) + str(elabel) + str(glabel) + str(w2vlabel) + str( bertlabel) + str(direction) + str(validate) + "-subj{}-{}_layer{}" file_name = specific_file.format( subject_number, args.agg_type, which_layer ) elif baseline: if glove: file_name = "glove" + str(direction) + str(validate) + "-subj{}-avg_layer1".format( subject_number ) elif word2vec: file_name = "word2vec" + str(direction) + str(validate) + "-subj{}-avg_layer1".format( subject_number ) else: specific_file = str(direction) + str(validate) + "-subj{}-parallel-english-to-{}-model-{}layer-{}-pred-layer{}-{}" file_name = specific_file.format( subject_number, args.language, 4, "brnn", which_layer, args.agg_type ) else: return "ERROR" return file_name
def main(): global temp_file_name argparser = argparse.ArgumentParser( description="Decoding (linear reg). step for correlating NN and brain") argparser.add_argument('--embedding_layer', type=str, help="Location of NN embedding (for a layer)", required=True) argparser.add_argument( "--rsa", action='store_true', default=False, help="True if RSA is used to generate residual values") argparser.add_argument( "--brain_to_model", action='store_true', default=False, help="True if regressing brain to model, False if not") argparser.add_argument( "--model_to_brain", action='store_true', default=False, help="True if regressing model to brain, False if not") argparser.add_argument( "--which_layer", help="Layer of interest in [1: total number of layers]", type=int, default=1) argparser.add_argument("--cross_validation", action='store_true', default=True, help="True if add cross validation, False if not") argparser.add_argument("--subject_number", type=int, default=1, help="subject number (fMRI data) for decoding") argparser.add_argument( "--random", action='store_true', default=False, help="True if initialize random brain activations, False if not") argparser.add_argument( "--rand_embed", action='store_true', default=False, help="True if initialize random embeddings, False if not") argparser.add_argument( "--glove", action='store_true', default=False, help="True if initialize glove embeddings, False if not") argparser.add_argument( "--word2vec", action='store_true', default=False, help="True if initialize word2vec embeddings, False if not") argparser.add_argument( "--bert", action='store_true', default=False, help="True if initialize bert embeddings, False if not") argparser.add_argument( "--normalize", action='store_true', default=False, help="True if add normalization across voxels, False if not") argparser.add_argument("--permutation", action='store_true', default=False, help="True if permutation, False if not") argparser.add_argument( "--permutation_region", action='store_true', default=False, help="True if permutation by brain region, False if not") argparser.add_argument("--add_bias", action='store_true', default=True, help="True if add bias, False if not") argparser.add_argument("--llh", action='store_true', default=True, help="True if calculate likelihood, False if not") argparser.add_argument("--ranking", action='store_true', default=False, help="True if calculate ranking, False if not") argparser.add_argument( "--mixed_effects", action='store_true', default=False, help="True if calculate mixed effects, False if not") ### UPDATE FILE PATHS HERE ### argparser.add_argument( "--fmri_path", default="/n/shieber_lab/Lab/users/cjou/fmri/", type=str, help="file path to fMRI data on the Odyssey cluster") argparser.add_argument( "--to_save_path", default="/n/shieber_lab/Lab/users/cjou/", type=str, help="file path to and create rmse/ranking/llh on the Odyssey cluster") ### UPDATE FILE PATHS HERE ### args = argparser.parse_args() if not args.glove and not args.word2vec and not args.bert and not args.rand_embed: embed_loc = args.embedding_layer file_name = embed_loc.split("/")[-1].split(".")[0] embedding = scipy.io.loadmat(embed_loc) embed_matrix = helper.get_embed_matrix(embedding) else: embed_loc = args.embedding_layer file_name = embed_loc.split( "/")[-1].split(".")[0].split("-")[-1] + "_layer" + str( args.which_layer) # aggregation type + which layer embed_matrix = np.array(pickle.load(open(embed_loc, "rb"))) direction, validate, rlabel, elabel, glabel, w2vlabel, bertlabel, plabel, prlabel = helper.generate_labels( args) # get modified activations activations = pickle.load( open( "{}subj{}/activations.p".format(args.fmri_path, args.subject_number), "rb")) volmask = pickle.load( open("{}subj{}/volmask.p".format(args.fmri_path, args.subject_number), "rb")) modified_activations = pickle.load( open( "{}subj{}/modified_activations.p".format(args.fmri_path, args.subject_number), "rb")) print("PERMUTATION: " + str(args.permutation)) print("PERMUTATION REGION: " + str(args.permutation_region)) print("PLABEL: " + str(plabel)) print("PRLABEL: " + str(prlabel)) if args.normalize: modified_activations = helper.z_score(modified_activations) embed_matrix = helper.z_score(embed_matrix) if args.random: print("RANDOM ACTIVATIONS") modified_activations = np.random.randint(-20, high=20, size=(240, 79, 95, 68)) # make file path if not os.path.exists('{}residuals_od32/'.format(args.to_save_path)): os.makedirs('{}residuals_od32/'.format(args.to_save_path)) if not os.path.exists('{}final_rankings/'.format(args.to_save_path)): os.makedirs('{}final_rankings/'.format(args.to_save_path)) if not os.path.exists('{}rsa/'.format(args.to_save_path)): os.makedirs('{}rsa/'.format(args.to_save_path)) if not os.path.exists('{}nested_llh/'.format(args.to_save_path)): os.makedirs('{}nested_llh/'.format(args.to_save_path)) temp_file_name = str(plabel) + str(prlabel) + str(rlabel) + str( elabel) + str(glabel) + str(w2vlabel) + str(bertlabel) + str( direction) + str(validate) + "-subj" + str( args.subject_number) + "-" + str(file_name) + "_no_spotlight" if args.mixed_effects: val = mixed_effects_analysis(args, embed_matrix) else: all_residuals, llhs, rankings = all_activations_for_all_sentences( modified_activations, volmask, embed_matrix, args) # dump if args.rsa: file_name = '{}rsa/'.format( args.to_save_path) + str(temp_file_name) + ".p" pickle.dump(all_residuals, open(file_name, "wb")) else: if args.llh: llh_file_name = '{}nested_llh/'.format( args.to_save_path) + temp_file_name print("LLH SPOTLIGHTS FILE: " + str(llh_file_name)) pickle.dump(llhs, open(llh_file_name + "-llh.p", "wb"), protocol=-1) altered_file_name = '{}residuals_od32/'.format( args.to_save_path) + temp_file_name print("RESIDUALS FILE: " + str(altered_file_name)) pickle.dump(all_residuals, open(altered_file_name + ".p", "wb"), protocol=-1) if args.model_to_brain and args.ranking: ranking_file_name = '{}final_rankings/'.format( args.to_save_path) + temp_file_name print("RANKING FILE: " + str(ranking_file_name)) pickle.dump(rankings, open(ranking_file_name + ".p", "wb"), protocol=-1) print("done.") return
def main(): global temp_file_name argparser = argparse.ArgumentParser( description="Decoding (linear reg). step for correlating NN and brain") argparser.add_argument('--embedding_layer', type=str, help="Location of NN embedding (for a layer)", required=True) argparser.add_argument( "--rsa", action='store_true', default=False, help="True if RSA is used to generate residual values") argparser.add_argument("--subject_mat_file", type=str, help=".mat file ") argparser.add_argument( "--brain_to_model", action='store_true', default=False, help="True if regressing brain to model, False if not") argparser.add_argument( "--model_to_brain", action='store_true', default=True, help="True if regressing model to brain, False if not") argparser.add_argument( "--which_layer", help="Layer of interest in [1: total number of layers]", type=int, default=1) argparser.add_argument("--cross_validation", action='store_true', default=True, help="True if add cross validation, False if not") argparser.add_argument( "--random", action='store_true', default=False, help="True if initialize random brain activations, False if not") argparser.add_argument( "--rand_embed", action='store_true', default=False, help="True if initialize random embeddings, False if not") argparser.add_argument( "--glove", action='store_true', default=False, help="True if initialize glove embeddings, False if not") argparser.add_argument( "--word2vec", action='store_true', default=False, help="True if initialize word2vec embeddings, False if not") argparser.add_argument( "--bert", action='store_true', default=False, help="True if initialize bert embeddings, False if not") argparser.add_argument( "--normalize", action='store_true', default=True, help="True if add normalization across voxels, False if not") argparser.add_argument("--permutation", action='store_true', default=False, help="True if permutation, False if not") argparser.add_argument( "--permutation_region", action='store_true', default=False, help="True if permutation by brain region, False if not") argparser.add_argument("--add_bias", action='store_true', default=True, help="True if add bias, False if not") argparser.add_argument("--llh", action='store_true', default=True, help="True if calculate likelihood, False if not") argparser.add_argument("--ranking", action='store_true', default=True, help="True if calculate ranking, False if not") argparser.add_argument( "--mixed_effects", action='store_true', default=True, help="True if calculate mixed effects, False if not") argparser.add_argument("--local", action='store_true', default=False, help="True if local, False if not") argparser.add_argument( "--batch_num", type=int, help="batch number of total (for scripting) (out of --total_batches)", required=True) argparser.add_argument("--total_batches", type=int, help="total number of batches", default=100) args = argparser.parse_args() if not args.glove and not args.word2vec and not args.bert and not args.rand_embed: embed_loc = args.embedding_layer file_name = embed_loc.split("/")[-1].split(".")[0] embedding = scipy.io.loadmat(embed_loc) embed_matrix = helper.get_embed_matrix(embedding) else: embed_loc = args.embedding_layer file_name = embed_loc.split( "/")[-1].split(".")[0].split("-")[-1] + "_layer" + str( args.which_layer) # aggregation type + which layer embed_matrix = np.array(pickle.load(open(embed_loc, "rb"))) direction, validate, rlabel, elabel, glabel, w2vlabel, bertlabel, plabel, prlabel = helper.generate_labels( args) print("PERMUTATION: " + str(args.permutation)) print("PERMUTATION REGION: " + str(args.permutation_region)) print("PLABEL: " + str(plabel)) print("PRLABEL: " + str(prlabel)) # normalize embed_matrix = helper.z_score(embed_matrix) # PCA pca = PCA(0.75) embed_matrix = pca.fit_transform(embed_matrix) print("PCA SHAPE: " + str(embed_matrix.shape)) # make file path if args.local: if not os.path.exists('../mixed_effects/'): os.makedirs('../mixed_effects/') file_path = '../mixed_effects/' else: if not os.path.exists('/n/shieber_lab/Lab/users/cjou/mixed_effects/'): os.makedirs('/n/shieber_lab/Lab/users/cjou/mixed_effects/') file_path = '/n/shieber_lab/Lab/users/cjou/mixed_effects/' temp_file_name = str(plabel) + str(prlabel) + str(rlabel) + str( elabel) + str(glabel) + str(w2vlabel) + str(bertlabel) + str( direction) + str(validate) + "-" + str( file_name) + "_mixed_effects_batch" + str( args.batch_num) + "of" + str(args.total_batches) # get residuals and predictions # all_residuals, predictions, true_spotlights, llhs = all_activations_for_all_sentences(modified_activations, volmask, embed_matrix, args) rmses = mixed_effects_analysis(args, embed_matrix) # dump # if args.llh: # llh_file_name = "/n/shieber_lab/Lab/users/cjou/llh/" + temp_file_name # print("LLH SPOTLIGHTS FILE: " + str(llh_file_name)) # pickle.dump( llhs, open(llh_file_name+"-llh.p", "wb" ), protocol=-1 ) altered_file_name = file_path + temp_file_name print("RESIDUALS FILE: " + str(altered_file_name)) pickle.dump(rmses, open(altered_file_name + ".p", "wb"), protocol=-1) # if args.model_to_brain and args.ranking: # ranking_file_name = "/n/shieber_lab/Lab/users/cjou/final_rankings/" + temp_file_name # print("RANKING FILE: " + str(ranking_file_name)) # pickle.dump( rankings, open(ranking_file_name + ".p", "wb" ), protocol=-1 ) print("done.") return
def main(): global temp_file_name argparser = argparse.ArgumentParser(description="Decoding (linear reg). step for correlating NN and brain") argparser.add_argument('--embedding_layer', type=str, help="Location of NN embedding (for a layer)", required=True) argparser.add_argument("--rsa", action='store_true', default=False, help="True if RSA is used to generate residual values") argparser.add_argument("--subject_mat_file", type=str, help=".mat file ") argparser.add_argument("--brain_to_model", action='store_true', default=False, help="True if regressing brain to model, False if not") argparser.add_argument("--model_to_brain", action='store_true', default=False, help="True if regressing model to brain, False if not") argparser.add_argument("--which_layer", help="Layer of interest in [1: total number of layers]", type=int, default=1) argparser.add_argument("--cross_validation", action='store_true', default=True, help="True if add cross validation, False if not") argparser.add_argument("--subject_number", type=int, default=1, help="subject number (fMRI data) for decoding") argparser.add_argument("--batch_num", type=int, help="batch number of total (for scripting) (out of --total_batches)", required=True) argparser.add_argument("--total_batches", type=int, help="total number of batches", required=True) argparser.add_argument("--random", action='store_true', default=False, help="True if initialize random brain activations, False if not") argparser.add_argument("--rand_embed", action='store_true', default=False, help="True if initialize random embeddings, False if not") argparser.add_argument("--glove", action='store_true', default=False, help="True if initialize glove embeddings, False if not") argparser.add_argument("--word2vec", action='store_true', default=False, help="True if initialize word2vec embeddings, False if not") argparser.add_argument("--bert", action='store_true', default=False, help="True if initialize bert embeddings, False if not") argparser.add_argument("--normalize", action='store_true', default=False, help="True if add normalization across voxels, False if not") argparser.add_argument("--permutation", action='store_true', default=False, help="True if permutation, False if not") argparser.add_argument("--permutation_region", action='store_true', default=False, help="True if permutation by brain region, False if not") argparser.add_argument("--add_bias", action='store_true', default=True, help="True if add bias, False if not") argparser.add_argument("--llh", action='store_true', default=True, help="True if calculate likelihood, False if not") argparser.add_argument("--ranking", action='store_true', default=True, help="True if calculate ranking, False if not") argparser.add_argument("--mixed_effects", action='store_true', default=False, help="True if calculate mixed effects, False if not") argparser.add_argument("--significance", action='store_true', default=False, help="True if calculate significance, False if not") args = argparser.parse_args() if not args.glove and not args.word2vec and not args.bert and not args.rand_embed: embed_loc = args.embedding_layer file_name = embed_loc.split("/")[-1].split(".")[0] embedding = scipy.io.loadmat(embed_loc) embed_matrix = helper.get_embed_matrix(embedding) else: embed_loc = args.embedding_layer file_name = embed_loc.split("/")[-1].split(".")[0].split("-")[-1] + "_layer" + str(args.which_layer) # aggregation type + which layer embed_matrix = np.array(pickle.load( open( embed_loc , "rb" ) )) # if args.word2vec: # embed_matrix = pickle.load( open( "/n/shieber_lab/Lab/users/cjou/embeddings/word2vec/" + str(file_name) + ".p", "rb" ) ) # elif args.glove: # embed_matrix = pickle.load( open( "/n/shieber_lab/Lab/users/cjou/embeddings/glove/" + str(file_name) + ".p", "rb" ) ) # elif args.bert: # embed_matrix = pickle.load( open( "/n/shieber_lab/Lab/users/cjou/embeddings/bert/" + str(file_name) + ".p", "rb" ) ) # else: # args.rand_embed # embed_matrix = pickle.load( open( "/n/shieber_lab/Lab/users/cjou/embeddings/rand_embed/rand_embed.p", "rb" ) ) subj_num = args.subject_number num = args.batch_num total_batches = args.total_batches direction, validate, rlabel, elabel, glabel, w2vlabel, bertlabel, plabel, prlabel = helper.generate_labels(args) # get modified activations activations = pickle.load( open( f"/n/shieber_lab/Lab/users/cjou/fmri/subj{subj_num}/activations.p", "rb" ) ) volmask = pickle.load( open( f"/n/shieber_lab/Lab/users/cjou/fmri/subj{subj_num}/volmask.p", "rb" ) ) modified_activations = pickle.load( open( f"/n/shieber_lab/Lab/users/cjou/fmri/subj{subj_num}/modified_activations.p", "rb" ) ) print("PERMUTATION: " + str(args.permutation)) print("PERMUTATION REGION: " + str(args.permutation_region)) print("PLABEL: " + str(plabel)) print("PRLABEL: " + str(prlabel)) if args.normalize: modified_activations = helper.z_score(modified_activations) # embed_matrix = helper.z_score(embed_matrix) if args.random: print("RANDOM ACTIVATIONS") modified_activations = np.random.randint(-20, high=20, size=(240, 79, 95, 68)) # make file path if not os.path.exists('/n/shieber_lab/Lab/users/cjou/residuals_od32/'): os.makedirs('/n/shieber_lab/Lab/users/cjou/residuals_od32/') if not os.path.exists('/n/shieber_lab/Lab/users/cjou/predictions_od32/'): os.makedirs('/n/shieber_lab/Lab/users/cjou/predictions_od32/') if not os.path.exists('/n/shieber_lab/Lab/users/cjou/true_spotlights_od32/'): os.makedirs('/n/shieber_lab/Lab/users/cjou/true_spotlights_od32/') if not os.path.exists('/n/shieber_lab/Lab/users/cjou/rsa/'): os.makedirs('/n/shieber_lab/Lab/users/cjou/rsa/') if not os.path.exists('/n/shieber_lab/Lab/users/cjou/llh/'): os.makedirs('/n/shieber_lab/Lab/users/cjou/llh/') if not os.path.exists('/n/shieber_lab/Lab/users/cjou/alphas/'): os.makedirs('/n/shieber_lab/Lab/users/cjou/alphas/') temp_file_name = str(plabel) + str(prlabel) + str(rlabel) + str(elabel) + str(glabel) + str(w2vlabel) + str(bertlabel) + str(direction) + str(validate) + "-subj" + str(args.subject_number) + "-" + str(file_name) + "_residuals_part" + str(args.batch_num) + "of" + str(args.total_batches) # get residuals and predictions # all_residuals, predictions, true_spotlights, llhs = all_activations_for_all_sentences(modified_activations, volmask, embed_matrix, args) all_residuals, llhs, rankings, alphas = all_activations_for_all_sentences(modified_activations, volmask, embed_matrix, args) # dump if args.rsa: file_name = "/n/shieber_lab/Lab/users/cjou/rsa/" + str(temp_file_name) + ".p" pickle.dump( all_residuals, open(file_name, "wb" ) ) else: if args.llh: llh_file_name = "/n/shieber_lab/Lab/users/cjou/llh/" + temp_file_name print("LLH SPOTLIGHTS FILE: " + str(llh_file_name)) pickle.dump( llhs, open(llh_file_name+"-llh.p", "wb" ), protocol=-1 ) altered_file_name = "/n/shieber_lab/Lab/users/cjou/residuals_od32/" + temp_file_name print("RESIDUALS FILE: " + str(altered_file_name)) pickle.dump( all_residuals, open(altered_file_name + ".p", "wb" ), protocol=-1 ) altered_file_name = "/n/shieber_lab/Lab/users/cjou/alphas/" + temp_file_name print("ALPHAS FILE: " + str(altered_file_name)) pickle.dump( alphas, open(altered_file_name + ".p", "wb" ), protocol=-1 ) # alphas_file_name = "/n/shieber_lab/Lab/users/cjou/alphas/" + temp_file_name # print("ALPHAS FILE: " + str(alphas_file_name)) # pickle.dump( alphas, open(alphas_file_name + ".p", "wb" ), protocol=-1 ) if args.model_to_brain and args.ranking: ranking_file_name = "/n/shieber_lab/Lab/users/cjou/final_rankings/" + temp_file_name print("RANKING FILE: " + str(ranking_file_name)) pickle.dump( rankings, open(ranking_file_name + ".p", "wb" ), protocol=-1 ) # pred_file_name = "/n/shieber_lab/Lab/users/cjou/predictions_od32/" + temp_file_name # print("PREDICTIONS FILE: " + str(pred_file_name)) # pickle.dump( predictions, open(pred_file_name+"-decoding-predictions.p", "wb" ), protocol=-1 ) # spot_file_name = "/n/shieber_lab/Lab/users/cjou/true_spotlights_od32/" + temp_file_name # print("TRUE SPOTLIGHTS FILE: " + str(spot_file_name)) # pickle.dump( true_spotlights, open(spot_file_name+"-true-spotlights.p", "wb" ), protocol=-1 ) print("done.") return
def main(): argparser = argparse.ArgumentParser(description="plot RMSE by location") argparser.add_argument( "-language", "--language", help= "Target language ('spanish', 'german', 'italian', 'french', 'swedish')", type=str, default='spanish') argparser.add_argument("-num_layers", "--num_layers", help="Total number of layers ('2', '4')", type=int, default=2) argparser.add_argument("-model_type", "--model_type", help="Type of model ('brnn', 'rnn')", type=str, default='brnn') argparser.add_argument( "-which_layer", "--which_layer", help="Layer of interest in [1: total number of layers]", type=int, default=1) argparser.add_argument( "-agg_type", "--agg_type", help="Aggregation type ('avg', 'max', 'min', 'last')", type=str, default='avg') argparser.add_argument("-subject_number", "--subject_number", type=int, default=1, help="subject number (fMRI data) for decoding") argparser.add_argument("-cross_validation", "--cross_validation", help="Add flag if add cross validation", action='store_true', default=False) argparser.add_argument("-brain_to_model", "--brain_to_model", help="Add flag if regressing brain to model", action='store_true', default=False) argparser.add_argument("-model_to_brain", "--model_to_brain", help="Add flag if regressing model to brain", action='store_true', default=False) argparser.add_argument( "-glove", "--glove", action='store_true', default=False, help="True if initialize glove embeddings, False if not") argparser.add_argument( "-word2vec", "--word2vec", action='store_true', default=False, help="True if initialize word2vec embeddings, False if not") argparser.add_argument( "-random", "--random", action='store_true', default=False, help="True if initialize random brain activations, False if not") argparser.add_argument( "-rand_embed", "--rand_embed", action='store_true', default=False, help="True if initialize random embeddings, False if not") argparser.add_argument( "-bert", "--bert", action='store_true', default=False, help="True if initialize bert embeddings, False if not") argparser.add_argument("-permutation", "--permutation", action='store_true', default=False, help="True if permutation, False if not") argparser.add_argument( "-permutation_region", "--permutation_region", action='store_true', default=False, help="True if permutation by brain region, False if not") argparser.add_argument("-local", "--local", action='store_true', default=False, help="True if running locally") argparser.add_argument("-hard_drive", "--hard_drive", action='store_true', default=False, help="True if running from hard drive") args = argparser.parse_args() # get residuals # check conditions // can remove when making pipeline if args.brain_to_model and args.model_to_brain: print("select only one flag for brain_to_model or model_to_brain") exit() if not args.brain_to_model and not args.model_to_brain: print("select at least flag for brain_to_model or model_to_brain") exit() direction, validate, rlabel, elabel, glabel, w2vlabel, bertlabel, plabel, prlabel = helper.generate_labels( args) # residual_file = sys.argv[1] file_loc = str(plabel) + str(prlabel) + str(rlabel) + str(elabel) + str( glabel) + str(w2vlabel) + str(bertlabel) + str(direction) + str( validate ) + "subj{}_parallel-english-to-{}-model-{}layer-{}-pred-layer{}-{}" file_name = file_loc.format(args.subject_number, args.language, args.num_layers, args.model_type, args.which_layer, args.agg_type) residual_file = "../rmses/concatenated-" + str(file_name) + ".p" # file_name = residual_file.split("/")[-1].split(".")[0] all_residuals = pickle.load(open(residual_file, "rb")) # get atlas and roi if not args.local: atlas_vals = pickle.load( open( f"/n/shieber_lab/Lab/users/cjou/fmri/subj{args.subject_number}/atlas_vals.p", "rb")) atlas_labels = pickle.load( open( f"/n/shieber_lab/Lab/users/cjou/fmri/subj{args.subject_number}/atlas_labels.p", "rb")) roi_vals = pickle.load( open( f"/n/shieber_lab/Lab/users/cjou/fmri/subj{args.subject_number}/roi_vals.p", "rb")) roi_labels = pickle.load( open( f"/n/shieber_lab/Lab/users/cjou/fmri/subj{args.subject_number}/roi_labels.p", "rb")) elif args.hard_drive: atlas_vals = pickle.load( open( f"/Volumes/passport/\!RESEARCH/examplesGLM/subj{args.subject_number}/atlas_vals.p", "rb")) atlas_labels = pickle.load( open( f"/Volumes/passport/\!RESEARCH/examplesGLM/subj{args.subject_number}/atlas_labels.p", "rb")) roi_vals = pickle.load( open( f"/Volumes/passport/\!RESEARCH/examplesGLM/subj{args.subject_number}/roi_vals.p", "rb")) roi_labels = pickle.load( open( f"/Volumes/passport/\!RESEARCH/examplesGLM/subj{args.subject_number}/roi_labels.p", "rb")) else: atlas_vals = pickle.load( open(f"../examplesGLM/subj{args.subject_number}/atlas_vals.p", "rb")) atlas_labels = pickle.load( open(f"../examplesGLM/subj{args.subject_number}/atlas_labels.p", "rb")) roi_vals = pickle.load( open(f"../examplesGLM/subj{args.subject_number}/roi_vals.p", "rb")) roi_labels = pickle.load( open(f"../examplesGLM/subj{args.subject_number}/roi_labels.p", "rb")) print("INITIAL:") print(len(atlas_vals)) print(len(atlas_labels)) print(len(roi_vals)) print(len(roi_labels)) final_roi_labels = helper.clean_roi(roi_vals, roi_labels) at_labels = helper.clean_atlas(atlas_vals, atlas_labels) print("CLEANING") print(len(final_roi_labels)) print(len(at_labels)) if not os.path.exists('../visualizations/'): os.makedirs('../visualizations/') # make dataframe print(len(list(range(len(all_residuals))))) print(len(all_residuals)) print(len(at_labels)) print(len(final_roi_labels)) df_dict = { 'voxel_index': list(range(len(all_residuals))), 'residuals': all_residuals, 'atlas_labels': at_labels, 'roi_labels': final_roi_labels } df = pd.DataFrame(df_dict) # create plots print("creating plots...") # plot_roi(df, args, file_name + "-roi", zoom=False) # plot_atlas(df, args, file_name + "-atlas", zoom=False) # plot_roi(df, args, file_name + "-roi", zoom=True) # plot_atlas(df, args, file_name + "-atlas", zoom=True) plot_boxplot_for_roi(df, args, file_name + "-boxplot-roi") # plot_boxplot_for_atlas(df, args, file_name + "-boxplot-atlas") # plot_violinplot_for_roi(df, args, file_name + "-violinplot-roi") # plot_violinplot_for_atlas(df, args, file_name + "-violinplot-atlas") # plot_aggregations(df, args, file_name + "-agg") print("done.") return
def save_script(args): if args.local: if not os.path.exists('../decoding_scripts/'): os.makedirs('../decoding_scripts/') else: if not os.path.exists('../../decoding_scripts/'): os.makedirs('../../decoding_scripts/') # file name assignments direction, validate, rlabel, elabel, glabel, w2vlabel, bertlabel, plabel, prlabel = helper.generate_labels( args) if args.rsa: rsa_label = "_rsa_" direction = "" validate = "" else: rsa_label = "" # create subfolder if not args.bert and not args.glove and not args.word2vec: model_type = str(plabel) + str(prlabel) + str(rlabel) + str( elabel ) + str(glabel) + str(w2vlabel) + str(bertlabel) + str( direction ) + str(validate) + str( rsa_label ) + "subj{}_parallel-english-to-{}-model-{}layer-{}-pred-layer{}-{}" folder_name = model_type.format(args.subject_number, args.language, args.num_layers, args.model_type, args.which_layer, args.agg_type) print(folder_name) master_script = "parallel-english-to-{}-model-{}layer-{}-pred-".format( args.language, args.num_layers, args.model_type) layer_script = "layer{}-".format(args.which_layer) elif args.bert: model_type = str(plabel) + str(prlabel) + str(rlabel) + str( elabel) + str(glabel) + str(w2vlabel) + str(bertlabel) + str( direction) + str(validate) + str( rsa_label) + "subj{}_layer{}_{}" folder_name = model_type.format(args.subject_number, args.which_layer, args.agg_type) print(folder_name) master_script = "" layer_script = "layer{}_".format(args.which_layer) else: model_type = str(plabel) + str(prlabel) + str(rlabel) + str( elabel) + str(glabel) + str(w2vlabel) + str(bertlabel) + str( direction) + str(validate) + str(rsa_label) + "subj{}_{}" folder_name = model_type.format(args.subject_number, args.agg_type) print(folder_name) master_script = "" layer_script = "" if args.local: if not os.path.exists('../decoding_scripts/' + str(folder_name) + '/'): os.makedirs('../decoding_scripts/' + str(folder_name) + '/') script_to_open = "../decoding_scripts/" + str(folder_name) + "/" + str( folder_name) + ".sh" else: if not os.path.exists('../../decoding_scripts/' + str(folder_name) + '/'): os.makedirs('../../decoding_scripts/' + str(folder_name) + '/') script_to_open = "../../decoding_scripts/" + str( folder_name) + "/" + str(folder_name) + ".sh" # make master script with open(script_to_open, "w") as rsh: rsh.write('''\ #!/bin/bash for i in `seq 0 99`; do sbatch "{}{}{}{}{}{}{}{}{}{}subj{}_decoding_""$i""_of_{}_{}{}{}.sh" -H done '''.format(plabel, prlabel, rlabel, elabel, glabel, w2vlabel, bertlabel, rsa_label, direction, validate, args.subject_number, args.total_batches, master_script, layer_script, args.agg_type)) # break into batches for i in range(args.total_batches): if not args.bert and not args.glove and not args.word2vec: embedding_layer_location = "/n/shieber_lab/Lab/users/cjou/embeddings/parallel/{0}/{1}layer-{2}/{3}/parallel-english-to-{0}-model-{1}layer-{2}-pred-layer{4}-{3}.mat".format( args.language, args.num_layers, args.model_type, args.agg_type, args.which_layer) file = str(plabel) + str(prlabel) + str(rlabel) + str( elabel ) + str(glabel) + str(w2vlabel) + str(bertlabel) + str( direction ) + str(validate) + str( rsa_label ) + "subj{}_decoding_{}_of_{}_parallel-english-to-{}-model-{}layer-{}-pred-layer{}-{}" job_id = file.format(args.subject_number, i, args.total_batches, args.language, args.num_layers, args.model_type, args.which_layer, args.agg_type) elif args.bert: embedding_layer_location = "/n/shieber_lab/Lab/users/cjou/embeddings/bert/layer{0}/{1}.p".format( args.which_layer, args.agg_type) file = str(plabel) + str(prlabel) + str(rlabel) + str( elabel) + str(glabel) + str(w2vlabel) + str(bertlabel) + str( direction) + str(validate) + str( rsa_label) + "subj{}_decoding_{}_of_{}_layer{}_{}" job_id = file.format(args.subject_number, i, args.total_batches, args.which_layer, args.agg_type) elif args.glove: embedding_layer_location = "/n/shieber_lab/Lab/users/cjou/embeddings/glove/{0}.p".format( args.agg_type) file = str(plabel) + str(prlabel) + str(rlabel) + str( elabel) + str(glabel) + str(w2vlabel) + str(bertlabel) + str( direction) + str(validate) + str( rsa_label) + "subj{}_decoding_{}_of_{}_{}" job_id = file.format(args.subject_number, i, args.total_batches, args.agg_type) elif args.word2vec: embedding_layer_location = "/n/shieber_lab/Lab/users/cjou/embeddings/word2vec/{0}.p".format( args.agg_type) file = str(plabel) + str(prlabel) + str(rlabel) + str( elabel) + str(glabel) + str(w2vlabel) + str(bertlabel) + str( direction) + str(validate) + str( rsa_label) + "subj{}_decoding_{}_of_{}_{}" job_id = file.format(args.subject_number, i, args.total_batches, args.agg_type) else: print("error") exit() if args.local: fname = '../decoding_scripts/' + str(folder_name) + '/' + str( job_id) + '.sh' else: fname = '../../decoding_scripts/' + str(folder_name) + '/' + str( job_id) + '.sh' with open(fname, 'w') as rsh: cvflag = "" if not args.cross_validation else " --cross_validation " dflag = " --brain_to_model " if args.brain_to_model else " --model_to_brain " pflag = "" if (plabel == "") else "--permutation" prflag = "" if (prlabel == "") else "--permutation_region" rflag = "" if (rlabel == "") else "--" + str(rlabel) gflag = "" if (glabel == "") else "--" + str(glabel) w2vflag = "" if (w2vlabel == "") else "--" + str(w2vlabel) bertflag = "" if (bertlabel == "") else "--" + str(bertlabel) eflag = "" if (elabel == "") else "--" + str(elabel) memmap_flag = "" if not args.memmap else " --memmap" rsaflag = "" if not args.rsa else " --rsa" spotlight = "odyssey" if args.spotlight else "nested" if args.rsa: mem = "4500" timelimit = "0-6:00" elif args.llh: mem = "9000" timelimit = "0-24:00" else: mem = "7000" timelimit = "0-7:00" rsh.write('''\ #!/bin/bash #SBATCH -J {0} # Job name #SBATCH -p serial_requeue # partition (queue) #SBATCH --mem {17} # memory pool for all cores #SBATCH -t {18} # time (D-HH:MM) #SBATCH --output=/n/home10/cjou/projects # file output location #SBATCH -o ../../logs/outpt_{0}.txt # File that STDOUT writes to #SBATCH -e ../../logs/err_{0}.txt # File that STDERR writes to #SBATCH --mail-type=ALL #SBATCH [email protected] module load Anaconda3/5.0.1-fasrc02 source activate virtualenv python ../../projects/opennmt-inspection/{19}_decoding.py \ --embedding_layer {1} \ --subject_mat_file /n/shieber_lab/Lab/users/cjou/fmri/subj{2}/examplesGLM.mat \ {3} {4} \ --subject_number {2} \ --batch_num {5} \ --total_batches {6} \ --which_layer {7} \ {8} {9} {10} {11} {12} {13} {14} {15} {16} '''.format(job_id, embedding_layer_location, args.subject_number, dflag, cvflag, i, args.total_batches, args.which_layer, rflag, eflag, gflag, w2vflag, bertflag, pflag, prflag, memmap_flag, rsaflag, mem, timelimit, spotlight))
def main(): argparser = argparse.ArgumentParser( description="layer and subject group level comparison") argparser.add_argument("-subject_number", "--subject_number", type=int, default=1, help="subject number (fMRI data) for decoding") ### SPECIFY MODEL PARAMETERS ### argparser.add_argument("-cross_validation", "--cross_validation", help="Add flag if add cross validation", action='store_true', default=False) argparser.add_argument("-brain_to_model", "--brain_to_model", help="Add flag if regressing brain to model", action='store_true', default=False) argparser.add_argument("-model_to_brain", "--model_to_brain", help="Add flag if regressing model to brain", action='store_true', default=False) argparser.add_argument( "-agg_type", "--agg_type", help="Aggregation type ('avg', 'max', 'min', 'last')", type=str, default='avg') argparser.add_argument( "-language", "--language", help= "Target language ('spanish', 'german', 'italian', 'french', 'swedish')", type=str, default='spanish') argparser.add_argument("-num_layers", "--num_layers", help="Total number of layers ('2', '4')", type=int, required=True) argparser.add_argument( "-random", "--random", action='store_true', default=False, help="True if initialize random brain activations, False if not") argparser.add_argument( "-rand_embed", "--rand_embed", action='store_true', default=False, help="True if initialize random embeddings, False if not") argparser.add_argument( "-glove", "--glove", action='store_true', default=False, help="True if initialize glove embeddings, False if not") argparser.add_argument( "-word2vec", "--word2vec", action='store_true', default=False, help="True if initialize word2vec embeddings, False if not") argparser.add_argument( "-bert", "--bert", action='store_true', default=False, help="True if initialize bert embeddings, False if not") argparser.add_argument( "-normalize", "--normalize", action='store_true', default=False, help="True if add normalization across voxels, False if not") argparser.add_argument("-permutation", "--permutation", action='store_true', default=False, help="True if permutation, False if not") argparser.add_argument( "-permutation_region", "--permutation_region", action='store_true', default=False, help="True if permutation by brain region, False if not") ### SPECIFY FOR ONE LAYER OR DIFFERENCE IN LAYERS ### # argparser.add_argument("-across_layer", "--across_layer", help="if across layer depth significance", # action='store_true', default=False) ### SPECIFY WHICH METRIC ### argparser.add_argument("-fdr", "--fdr", help="if apply FDR", action='store_true', default=False) argparser.add_argument("-llh", "--llh", action='store_true', default=False, help="True if calculate likelihood, False if not") argparser.add_argument("-ranking", "--ranking", action='store_true', default=False, help="True if calculate ranking, False if not") argparser.add_argument("-rmse", "--rmse", action='store_true', default=False, help="True if calculate rmse, False if not") argparser.add_argument("-rsa", "--rsa", action='store_true', default=False, help="True if calculate rsa, False if not") argparser.add_argument("-local", "--local", action='store_true', default=False, help="True if running locally") ### UPDATE FILE PATHS HERE ### argparser.add_argument( "--fmri_path", default="/n/shieber_lab/Lab/users/cjou/fmri/", type=str, help="file path to fMRI data on the Odyssey cluster") argparser.add_argument( "--to_save_path", default="/n/shieber_lab/Lab/users/cjou/", type=str, help="file path to and create rmse/ranking/llh on the Odyssey cluster") ### UPDATE FILE PATHS HERE ### args = argparser.parse_args() if args.num_layers != 12 and args.bert: print("error: please ensure bert has 12 layers") exit() if args.num_layers != 1 and (args.word2vec or args.random or args.permutation or args.glove): print("error: please ensure baseline has 1 layer") exit() if not args.fdr and not args.llh and not args.ranking and not args.rmse and not args.rsa: print("error: select at least 1 metric of correlation") exit() print("NUMBER OF LAYERS: " + str(args.num_layers)) subjects = [1, 2, 4, 5, 7, 8, 9, 10, 11] direction, validate, rlabel, elabel, glabel, w2vlabel, bertlabel, plabel, prlabel = helper.generate_labels( args) # get subject if args.local: volmask = pickle.load( open(f"../examplesGLM/subj{args.subject_number}/volmask.p", "rb")) atlas_vals = pickle.load( open(f"../examplesGLM/subj{args.subject_number}/atlas_vals.p", "rb")) atlas_labels = pickle.load( open(f"../examplesGLM/subj{args.subject_number}/atlas_labels.p", "rb")) roi_vals = pickle.load( open(f"../examplesGLM/subj{args.subject_number}/roi_vals.p", "rb")) roi_labels = pickle.load( open(f"../examplesGLM/subj{args.subject_number}/roi_labels.p", "rb")) else: volmask = pickle.load( open( str(args.fmri_path) + "subj{args.subject_number}/volmask.p", "rb")) atlas_vals = pickle.load( open( str(args.fmri_path) + "subj{args.subject_number}/atlas_vals.p", "rb")) atlas_labels = pickle.load( open( str(args.fmri_path) + "subj{args.subject_number}/atlas_labels.p", "rb")) roi_vals = pickle.load( open( str(args.fmri_path) + "subj{args.subject_number}/roi_vals.p", "rb")) roi_labels = pickle.load( open( str(args.fmri_path) + "subj{args.subject_number}/roi_labels.p", "rb")) true_roi_labels = helper.compare_labels(roi_labels, volmask, subj_num=args.subject_number, roi=True) true_atlas_labels = helper.compare_labels(atlas_labels, volmask, subj_num=args.subject_number) # clean labels final_roi_labels = helper.clean_roi(roi_vals, roi_labels) final_atlas_labels = helper.clean_atlas(atlas_vals, atlas_labels) layer_info = [] metric_info = [] roi_info = true_roi_labels * args.num_layers atlas_info = true_atlas_labels * args.num_layers # print("ROI INFO: " + str(len(roi_info))) # print("ATLAS INFO: " + str(len(atlas_info))) # total=0 # print("ATLAS: " + str(len(final_atlas_labels))) # for i in range(len(true_atlas_labels)): # if true_atlas_labels[i] == final_atlas_labels[i]: # total+=1 # print("TOTAL: " + str(total)) # total=0 # print("ROI: " + str(len(true_roi_labels))) # for i in range(len(true_roi_labels)): # if true_roi_labels[i] == final_roi_labels[i]: # total+=1 # print("TOTAL: " + str(total)) # exit() # for layer in tqdm(range(1, num_layers+1)): # file_name = "bertmodel2brain_cv_-subj1-avg_layer" + str(layer) # values = pickle.load(open("../final_rankings/" + str(file_name) + ".p", "rb")) # metric_info.extend(values) # layer_vals = len(values) * [layer] # layer_info.extend(layer_vals) # get information if args.bert: print("getting metric information per layer...") for layer in tqdm(range(1, args.num_layers + 1)): file_name = "bert{}{}-subj{}-{}_layer{}".format( direction, validate, args.subject_number, args.agg_type, layer) if args.local: # values = pickle.load(open("../final_rankings/" + str(file_name) + ".p", "rb")) # content = scipy.io.loadmat("../final_rankings/layer" + str(layer) + "_ranking_backwards_nifti.mat") # values = pickle.load(open("../final_rankings/layer" + str(layer) + "_ranking_backwards_nifti.p", "rb")) if args.ranking: # values = pickle.load(open("../mat/bertmodel2brain_cv_-subj1-avg_layer" + str(layer) + "-ranking.p", "rb")) content = scipy.io.loadmat( "../mat/" + str(file_name) + "-3dtransform-ranking.mat")["metric"] if args.rmse: # bertbrain2model_cv_-subj1-avg_layer1-3dtransform-rmse.mat content = scipy.io.loadmat( "../mat/" + str(file_name) + "-3dtransform-rmse.mat")["metric"] if args.llh: content = np.abs( scipy.io.loadmat("../mat/" + str(file_name) + "-3dtransform-llh.mat")["metric"]) if args.rsa: content = scipy.io.loadmat( "../mat/" + str(file_name) + "-3dtransform-rsa.mat")["metric"] values = helper.convert_matlab_to_np(content, volmask) else: values = pickle.load( open( str(args.to_save_path) + "final_rankings/" + str(file_name) + ".p", "rb")) metric_info.extend(values) layer_vals = len(values) * [layer] layer_info.extend(layer_vals) to_save_file = str(plabel) + str(prlabel) + str(glabel) + str( w2vlabel) + str(bertlabel) + str(direction) + str( validate) + "-subj" + str(args.subject_number) + "-bert" elif not args.glove and not args.word2vec: for layer in tqdm(range(1, args.num_layers + 1)): file_name = "{}{}-subj{}-parallel-english-to-{}-model-{}layer-brnn-pred-layer{}-{}-3dtransform-".format( direction, validate, args.subject_number, args.language, args.num_layers, layer, args.agg_type) print(file_name) if args.local: if args.ranking: content = scipy.io.loadmat("../mat/" + file_name + "ranking.mat")["metric"] if args.rmse: content = scipy.io.loadmat("../mat/" + file_name + "rmse.mat")["metric"] if args.llh: content = np.abs( scipy.io.loadmat("../mat/" + file_name + "llh.mat")["metric"]) values = helper.convert_matlab_to_np(content, volmask) else: values = pickle.load( open( str(args.to_save_path) + "final_rankings/" + str(file_name) + ".p", "rb")) metric_info.extend(values) layer_vals = len(values) * [layer] layer_info.extend(layer_vals) to_save_file = "{}_{}_subj{}_{}layer_{}".format( direction, validate, args.subject_number, args.num_layers, args.language) else: # word2vec, glove pass print("LAYER INFO: " + str(len(layer_info))) print("METRIC INFO: " + str(len(metric_info))) if args.ranking and args.model_to_brain: df_dict = { 'layer': layer_info, 'AR': metric_info, 'ROI': roi_info, 'atlas': atlas_info } df = pd.DataFrame(df_dict) df_slice = df.loc[df["layer"] == 1][["atlas", "AR"]] avg_df = df_slice.groupby(['atlas']).mean() print(avg_df.sort_values(by='AR', ascending=False).head()) print("plotting values...") helper.plot_roi_across_layers( df, "AR", "../fixed_roi_ar_" + to_save_file + ".png") helper.plot_atlas_across_layers( df, "AR", "../fixed_atlas_ar_" + to_save_file + ".png") if args.rmse: df_dict = { 'layer': layer_info, 'RMSE': metric_info, 'ROI': roi_info, 'atlas': atlas_info } df = pd.DataFrame(df_dict) df_slice = df.loc[df["layer"] == 1][["atlas", "RMSE"]] avg_df = df_slice.groupby(['atlas']).mean() print(avg_df.sort_values(by='RMSE', ascending=True).head()) print("plotting values...") helper.plot_roi_across_layers( df, "RMSE", "../fixed_roi_rmse_" + to_save_file + ".png") helper.plot_atlas_across_layers( df, "RMSE", "../fixed_atlas_rmse_" + to_save_file + ".png") if args.llh: df_dict = { 'layer': layer_info, 'LLH': metric_info, 'ROI': roi_info, 'atlas': atlas_info } df = pd.DataFrame(df_dict) df_slice = df.loc[df["layer"] == 1][["atlas", "LLH"]] avg_df = df_slice.groupby(['atlas']).mean() print(avg_df.sort_values(by='LLH', ascending=True).head()) print("plotting values...") helper.plot_roi_across_layers( df, "LLH", "../fixed_roi_llh_" + to_save_file + ".png") helper.plot_atlas_across_layers( df, "LLH", "../fixed_atlas_llh_" + to_save_file + ".png") if args.rsa: df_dict = { 'layer': layer_info, 'correlation_coefficient': metric_info, 'ROI': roi_info, 'atlas': atlas_info } df = pd.DataFrame(df_dict) print("ATLAS...") df_slice = df.loc[df["layer"] == 1][[ "atlas", "correlation_coefficient" ]] avg_df = df_slice.groupby(['atlas']).mean() print( avg_df.sort_values(by='correlation_coefficient', ascending=True).head()) print( avg_df.sort_values(by='correlation_coefficient', ascending=False).head()) print("ROI...") df_slice = df.loc[df["layer"] == 1][["ROI", "correlation_coefficient"]] avg_df = df_slice.groupby(['ROI']).mean() print( avg_df.sort_values(by='correlation_coefficient', ascending=True).head()) print( avg_df.sort_values(by='correlation_coefficient', ascending=False).head()) print("plotting values...") helper.plot_roi_across_layers( df, "correlation_coefficient", "../fixed_roi_rsa_" + to_save_file + ".png") helper.plot_atlas_across_layers( df, "correlation_coefficient", "../fixed_atlas_rsa_" + to_save_file + ".png") print("done.") return
def generate_file_name(args, subject_number, which_layer): direction, validate, rlabel, elabel, glabel, w2vlabel, bertlabel, plabel, prlabel = helper.generate_labels( args) if args.bert or args.word2vec or args.glove: specific_file = str(plabel) + str(prlabel) + str(rlabel) + str( elabel) + str(glabel) + str(w2vlabel) + str(bertlabel) + str( direction) + str(validate) + "-subj{}-{}_layer{}" file_name = specific_file.format(subject_number, args.agg_type, which_layer) else: specific_file = str(plabel) + str(prlabel) + str(rlabel) + str( elabel ) + str(glabel) + str(w2vlabel) + str(bertlabel) + str(direction) + str( validate ) + "-subj{}-parallel-english-to-{}-model-{}layer-{}-pred-layer{}-{}" file_name = specific_file.format(subject_number, args.language, args.num_layers, args.model_type, which_layer, args.agg_type) return file_name
def main(): ############# GET ARGUMENTS BELOW ############# parser = argparse.ArgumentParser(description="entire OpenNMT pipeline: data prep, model, decoding, visualization") # model type parser.add_argument("-language", "--language", help="Target language ('spanish', 'german', 'italian', 'french', 'swedish')", type=str, default='spanish') parser.add_argument("-num_layers", "--num_layers", help="Total number of layers ('2', '4')", type=int, default=2) parser.add_argument("-model_type", "--model_type", help="Type of model ('brnn', 'rnn')", type=str, default='brnn') parser.add_argument("-which_layer", "--which_layer", help="Layer of interest in [1: total number of layers]", type=int, default=1) parser.add_argument("-agg_type", "--agg_type", help="Aggregation type ('avg', 'max', 'min', 'last')", type=str, default='avg') parser.add_argument("-nbatches", "--nbatches", help="Total number of batches to run", type=int, default=100) # subject for brain data parser.add_argument("-subj_num", "--subj_num", help="fMRI subject number ([1:11])", type=int, default=1) parser.add_argument("-format_data", "--format_data", help="Format fMRI data", action='store_true', default=False) # opennmt model parser.add_argument("-create_model", "--create_model", help="create OpenNMT prediction model", action='store_true', default=False) # initializations parser.add_argument("-random", "--random", action='store_true', default=False, help="True if add cross validation, False if not") parser.add_argument("-rand_embed", "--rand_embed", action='store_true', default=False, help="True if initialize random embeddings, False if not") parser.add_argument("-glove", "--glove", action='store_true', default=False, help="True if initialize glove embeddings, False if not") parser.add_argument("-word2vec", "--word2vec", action='store_true', default=False, help="True if initialize word2vec embeddings, False if not") parser.add_argument("-bert", "--bert", action='store_true', default=False, help="True if initialize bert embeddings, False if not") parser.add_argument("-permutation", "--permutation", action='store_true', default=False, help="True if permutation, False if not") parser.add_argument("-permutation_region", "--permutation_region", action='store_true', default=False, help="True if permutation by brain region, False if not") # evaluation metrics parser.add_argument("-decoding", "--decoding", action='store_true', default=False, help="True if decoding, False if not") parser.add_argument("-cross_validation", "--cross_validation", help="Add flag if add cross validation", action='store_true', default=False) parser.add_argument("-brain_to_model", "--brain_to_model", help="Add flag if regressing brain to model", action='store_true', default=False) parser.add_argument("-model_to_brain", "--model_to_brain", help="Add flag if regressing model to brain", action='store_true', default=False) parser.add_argument("-fdr", "--fdr", action='store_true', default=False, help="True if FDR, False if not") parser.add_argument("-rank", "--rank", action='store_true', default=False, help="True if rank, False if not") parser.add_argument("-llh", "--llh", action='store_true', default=False, help="True if likelihood, False if not") parser.add_argument("-local", "--local", action='store_true', default=False, help="True if running locally, False if not") args = parser.parse_args() ############# VALIDATE ARGUMENTS ############# helper.validate_arguments() ############# CREATE LABELS ############# direction, validate, rlabel, elabel, glabel, w2vlabel, bertlabel, plabel, prlabel = helper.generate_labels(args) ############# GLOBAL OPTIONS ############# get_residuals_and_make_scripts, options = helper.generate_options(args) ############# CREATE OPENNMT MODEL ############# if args.create_model: ### multiparallelize texts ### todo: add here ### preprocess ### train ### translate ### todo: add locations training_src = "" training_tgt = "" validation_src = "" validation_tgt = "" preprocess = "python preprocess.py -train_src ../multiparallelize/training/" + str(training_text) + " -train_tgt ../multiparallelize/training/" + str(training_tgt) + " -valid_src ../multiparallelize/validation/" + str(validation_src) + " -valid_tgt ../multiparallelize/training/validation/" + str(validation_tgt) + " -save ../multiparallelize" os.system(preprocess) train = "python train.py -data data/english-to-spanish -save_model small-english-to-spanish-model -gpu 0 -separate_layers" os.system(train) translate = "python translate.py -model ../final_models/english-to-spanish-model_acc_61.26_ppl_6.28_e13.pt -src cleaned_sentencesGLM.txt -output ../predictions/english-to-spanish-model-pred.txt -replace_unk -verbose -dump_layers ../predictions/english-to-spanish-model-pred.pt" os.system(translate)
def main(): argparser = argparse.ArgumentParser(description="layer and subject group level comparison") argparser.add_argument("-subject_number", "--subject_number", type=int, default=1, help="subject number (fMRI data) for decoding") ### SPECIFY MODEL PARAMETERS ### argparser.add_argument("-cross_validation", "--cross_validation", help="Add flag if add cross validation", action='store_true', default=True) argparser.add_argument("-brain_to_model", "--brain_to_model", help="Add flag if regressing brain to model", action='store_true', default=False) argparser.add_argument("-model_to_brain", "--model_to_brain", help="Add flag if regressing model to brain", action='store_true', default=True) argparser.add_argument("-agg_type", "--agg_type", help="Aggregation type ('avg', 'max', 'min', 'last')", type=str, default='avg') argparser.add_argument("-language", "--language", help="Target language ('spanish', 'german', 'italian', 'french', 'swedish')", type=str, default='spanish') argparser.add_argument("-num_layers", "--num_layers", help="Total number of layers ('2', '4')", type=int, required=True) argparser.add_argument("-random", "--random", action='store_true', default=False, help="True if initialize random brain activations, False if not") argparser.add_argument("-rand_embed", "--rand_embed", action='store_true', default=False, help="True if initialize random embeddings, False if not") argparser.add_argument("-glove", "--glove", action='store_true', default=False, help="True if initialize glove embeddings, False if not") argparser.add_argument("-word2vec", "--word2vec", action='store_true', default=False, help="True if initialize word2vec embeddings, False if not") argparser.add_argument("-bert", "--bert", action='store_true', default=True, help="True if initialize bert embeddings, False if not") argparser.add_argument("-normalize", "--normalize", action='store_true', default=False, help="True if add normalization across voxels, False if not") argparser.add_argument("-permutation", "--permutation", action='store_true', default=False, help="True if permutation, False if not") argparser.add_argument("-permutation_region", "--permutation_region", action='store_true', default=False, help="True if permutation by brain region, False if not") ### PLOTTING ### argparser.add_argument("-which_layer", "--which_layer", help="Layer of interest in [1: total number of layers]", type=int, default=1) ### SPECIFY FOR SINGLE SUBJECT OR GROUP LEVEL ANALYSIS ### argparser.add_argument("-single_subject", "--single_subject", help="if single subject analysis", action='store_true', default=False) argparser.add_argument("-group_level", "--group_level", help="if group level analysis", action='store_true', default=False) argparser.add_argument("-searchlight", "--searchlight", help="if searchlight", action='store_true', default=False) ### SPECIFY FOR ONE LAYER OR DIFFERENCE IN LAYERS ### argparser.add_argument("-single_layer", "--single_layer", help="if single layer significance", action='store_true', default=False) argparser.add_argument("-across_layer", "--across_layer", help="if across layer depth significance", action='store_true', default=False) ### SPECIFY WHICH METRIC ### argparser.add_argument("-fdr", "--fdr", help="if apply FDR", action='store_true', default=False) argparser.add_argument("-llh", "--llh", action='store_true', default=False, help="True if calculate likelihood, False if not") argparser.add_argument("-ranking", "--ranking", action='store_true', default=False, help="True if calculate ranking, False if not") argparser.add_argument("-rmse", "--rmse", action='store_true', default=False, help="True if calculate rmse, False if not") argparser.add_argument("-rsa", "--rsa", action='store_true', default=False, help="True if calculate rsa, False if not") argparser.add_argument("-nested", "--nested", action='store_true', default=True, help="True if running nested") argparser.add_argument("-local", "--local", action='store_true', default=False, help="True if running locally") argparser.add_argument("-save_by_voxel", "--save_by_voxel", action='store_true', default=False, help="True if save by voxel") argparser.add_argument("-compare_models", "--compare_models", action='store_true', default=False, help="True if compare models") args = argparser.parse_args() if args.num_layers != 12 and args.bert: print("error: please ensure bert has 12 layers") exit() if args.num_layers != 1 and (args.word2vec or args.random or args.permutation or args.glove): print("error: please ensure baseline has 1 layerc") exit() if not args.fdr and not args.llh and not args.ranking and not args.rmse: print("error: select at least 1 metric of correlation") exit() print("NUMBER OF LAYERS: " + str(args.num_layers)) subjects = [1,2,4,5,7,8,9,10,11] print("getting common brain space") common_space = helper.load_common_space(subjects, local=args.local) voxel_coordinates = np.transpose(np.nonzero(common_space)) print(voxel_coordinates.shape) direction, validate, rlabel, elabel, glabel, w2vlabel, bertlabel, plabel, prlabel = helper.generate_labels(args) if args.fdr: metric = "fdr" if args.rmse: metric = "rmse" if args.rsa: metric = "rsa" if args.ranking: metric = "ranking" if args.llh: metric = "llh" if args.single_subject and args.across_layer: first = True for layer_num in list(range(1, args.num_layers + 1)): print("generating file names...") layer_file_name = generate_file_name(args, args.subject_number, layer_num) print("retrieving file contents...") layer, pvals = get_file(args, args.subject_number, layer_file_name) if first: updated_brain = layer best_layer = layer.astype(bool) * layer_num first = False mask = layer.astype(bool) else: if args.llh or args.ranking: max_vals = np.maximum(updated_brain, layer) # temp = np.minimum(updated_brain, layer) # print("SAME: " + str(np.sum(np.equal(max_vals, temp).astype(bool) * mask))) elif args.rmse: max_vals = np.minimum(updated_brain, layer) else: print("select llh, ranking, or rmse") exit() from_layer = np.equal(max_vals, layer).astype(bool) * mask * layer_num temp_best_layer = np.equal(max_vals, updated_brain).astype(bool) * mask * best_layer best_layer = np.maximum(from_layer, temp_best_layer) # print("NEW: " + str(np.sum(from_layer.astype(bool)))) # print("OLD: " + str(np.sum(temp_best_layer.astype(bool)))) updated_brain = max_vals if args.bert: file_name = "bert{}{}subj{}_{}".format( direction, validate, args.subject_number, args.agg_type, ) else: specific_file = str(plabel) + str(prlabel) + str(rlabel) + str(elabel) + str(glabel) + str(w2vlabel) + str(bertlabel) + str(direction) + str(validate) + "-subj{}-parallel-english-to-{}-model-{}layer-{}-pred-layer-{}" file_name = specific_file.format( args.subject_number, args.language, args.num_layers, "brnn", args.agg_type ) print("BEST LAYER") total = 0 for layer in range(1, args.num_layers +1): print("LAYER" + str(layer)) print(np.sum(best_layer == layer)) total += np.sum(best_layer == layer) print("TOTAL:" + str(total)) scipy.io.savemat("../" + str(file_name) + "_best_" + str(metric) + ".mat", dict(metric = best_layer.astype(np.int16))) if args.save_by_voxel: per_layer = [] for layer_num in tqdm(list(range(1, args.num_layers + 1))): per_subject = [] for subj_num in subjects: layer_file_name = generate_file_name(args, subj_num, layer_num) layer, _ = get_file(args, subj_num, layer_file_name) voxel_values = layer[np.nonzero(common_space)] # print("LENGTH: " + str(len(voxel_values))) per_subject.append(voxel_values) print("PER LAYER: ") print(np.array(per_subject).shape) per_layer.append(0.5 * np.transpose(np.array(per_subject))) print(np.array(per_layer).shape) print("BEFORE BASELINE") print(np.array(per_layer).shape) # add other embeddings if args.compare_models: for options in [[True, False], [False, True], [False, False]]: a,b = options if a == False and b == False: nmt_layers = 4 else: nmt_layers = 1 for layer_num in tqdm(list(range(1, nmt_layers + 1))): per_subject = [] a, b = options for subj_num in subjects: layer_file_name = generate_file_name(args, subj_num, layer_num, baseline=True, glove=a, word2vec=b) layer, _ = get_file(args, subj_num, layer_file_name) voxel_values = layer[np.nonzero(common_space)] per_subject.append(voxel_values) per_layer.append(0.5 * np.transpose(np.array(per_subject))) print("AFTER BASELINE") print(np.array(per_layer).shape) print("AT THE END: ") print(np.array(per_layer).shape) per_voxel = np.stack( per_layer, axis=-1 ) print(per_voxel.shape) print(per_voxel[0].shape) print(per_voxel[0]) scipy.io.savemat("../mfit/nested_all_best_" + str(metric) + "_by_voxel.mat", dict(metric = per_voxel.astype(np.float32))) print("done.") return
def main(): # parse arguments argparser = argparse.ArgumentParser(description="plot RMSE on 3d brain") # argparser.add_argument('--rmse', type=str, help="Location of RMSE for entire brain (.p)", required=True) argparser.add_argument( "-language", "--language", help= "Target language ('spanish', 'german', 'italian', 'french', 'swedish')", type=str, default='spanish') argparser.add_argument("-num_layers", "--num_layers", help="Total number of layers ('2', '4')", type=int, default=2) argparser.add_argument("-model_type", "--model_type", help="Type of model ('brnn', 'rnn')", type=str, default='brnn') argparser.add_argument( "-which_layer", "--which_layer", help="Layer of interest in [1: total number of layers]", type=int, default=1) argparser.add_argument( "-agg_type", "--agg_type", help="Aggregation type ('avg', 'max', 'min', 'last')", type=str, default='avg') argparser.add_argument("-subject_number", "--subject_number", type=int, default=1, help="subject number (fMRI data) for decoding") argparser.add_argument("-cross_validation", "--cross_validation", help="Add flag if add cross validation", action='store_true', default=False) argparser.add_argument("-brain_to_model", "--brain_to_model", help="Add flag if regressing brain to model", action='store_true', default=False) argparser.add_argument("-model_to_brain", "--model_to_brain", help="Add flag if regressing model to brain", action='store_true', default=False) argparser.add_argument( "-glove", "--glove", action='store_true', default=False, help="True if initialize glove embeddings, False if not") argparser.add_argument( "-word2vec", "--word2vec", action='store_true', default=False, help="True if initialize word2vec embeddings, False if not") argparser.add_argument( "-bert", "--bert", action='store_true', default=False, help="True if initialize bert embeddings, False if not") argparser.add_argument( "-rand_embed", "--rand_embed", action='store_true', default=False, help="True if initialize random embeddings, False if not") argparser.add_argument("-random", "--random", action='store_true', default=False, help="True if add cross validation, False if not") argparser.add_argument("-permutation", "--permutation", action='store_true', default=False, help="True if permutation, False if not") argparser.add_argument( "-permutation_region", "--permutation_region", action='store_true', default=False, help="True if permutation by brain region, False if not") args = argparser.parse_args() print("getting arguments...") # rmses = args.rmse # file_name = rmses.split("/")[-1].split(".")[0] # get residuals # check conditions // can remove when making pipeline if args.brain_to_model and args.model_to_brain: print("select only one flag for brain_to_model or model_to_brain") exit() if not args.brain_to_model and not args.model_to_brain: print("select at least flag for brain_to_model or model_to_brain") exit() direction, validate, rlabel, elabel, glabel, w2vlabel, bertlabel, plabel, prlabel = helper.generate_labels( args) # residual_file = sys.argv[1] if not args.word2vec and not args.glove and not args.bert and not args.random: specific_file = str(plabel) + str(prlabel) + str(rlabel) + str( elabel ) + str(glabel) + str(w2vlabel) + str(bertlabel) + str(direction) + str( validate ) + "-subj{}-parallel-english-to-{}-model-{}layer-{}-pred-layer{}-{}" file_name = specific_file.format(args.subject_number, args.language, args.num_layers, args.model_type, args.which_layer, args.agg_type) else: file_name = str(plabel) + str(prlabel) + str(rlabel) + str( elabel) + str(glabel) + str(w2vlabel) + str(bertlabel) + str( direction) + str(validate) + "-subj{}-{}_layer{}".format( args.subject_number, args.agg_type, args.which_layer) # file_loc = str(plabel) + str(prlabel) + str(rlabel) + str(elabel) + str(glabel) + str(w2vlabel) + str(bertlabel) + str(direction) + str(validate) + "subj{}_parallel-english-to-{}-model-{}layer-{}-pred-layer{}-{}" # file_name = file_loc.format( # args.subject_number, # args.language, # args.num_layers, # args.model_type, # args.which_layer, # args.agg_type # ) residual_file = "../rmses/concatenated-" + str(file_name) + ".p" # file_name = residual_file.split("/")[-1].split(".")[0] data = pickle.load(open(residual_file, "rb")) # get volmask subject_number = args.subject_number file_path = "../examplesGLM/subj{}/volmask.p".format(subject_number) volmask = pickle.load(open(file_path, "rb")) if not os.path.exists('../3d-brain/'): os.makedirs('../3d-brain/') print("transforming coordinates...") transform_data = helper.transform_coordinates(data, volmask, save_path="../3d-brain/" + file_name, metric="rmse") print("ORIGINAL DATA: " + str(len(data))) print("TRANSFORMED DATA: " + str(transform_data.shape)) print("plotting data...") f_name = "../3d-brain/" + file_name + "-glass-brain.png" plot_on_glass(transform_data, f_name) # plot_interactive(transform_data, file_name) # plot_roi(transform_data, file_name) print('done.') return
def main(): # parse arguments argparser = argparse.ArgumentParser( description="transform coordinates for plotting") argparser.add_argument( "-language", "--language", help= "Target language ('spanish', 'german', 'italian', 'french', 'swedish')", type=str, default='spanish') argparser.add_argument("-num_layers", "--num_layers", help="Total number of layers ('2', '4')", type=int, default=2) argparser.add_argument("-model_type", "--model_type", help="Type of model ('brnn', 'rnn')", type=str, default='brnn') argparser.add_argument( "-which_layer", "--which_layer", help="Layer of interest in [1: total number of layers]", type=int, default=1) argparser.add_argument( "-agg_type", "--agg_type", help="Aggregation type ('avg', 'max', 'min', 'last')", type=str, default='avg') argparser.add_argument("-subject_number", "--subject_number", type=int, default=1, help="subject number (fMRI data) for decoding") argparser.add_argument("-cross_validation", "--cross_validation", help="Add flag if add cross validation", action='store_true', default=False) argparser.add_argument("-brain_to_model", "--brain_to_model", help="Add flag if regressing brain to model", action='store_true', default=False) argparser.add_argument("-model_to_brain", "--model_to_brain", help="Add flag if regressing model to brain", action='store_true', default=False) argparser.add_argument( "-glove", "--glove", action='store_true', default=False, help="True if initialize glove embeddings, False if not") argparser.add_argument( "-word2vec", "--word2vec", action='store_true', default=False, help="True if initialize word2vec embeddings, False if not") argparser.add_argument( "-bert", "--bert", action='store_true', default=False, help="True if initialize bert embeddings, False if not") argparser.add_argument( "-rand_embed", "--rand_embed", action='store_true', default=False, help="True if initialize random embeddings, False if not") argparser.add_argument("-random", "--random", action='store_true', default=False, help="True if add cross validation, False if not") argparser.add_argument("-permutation", "--permutation", action='store_true', default=False, help="True if permutation, False if not") argparser.add_argument( "-permutation_region", "--permutation_region", action='store_true', default=False, help="True if permutation by brain region, False if not") # metrics argparser.add_argument("-rmse", "--rmse", action='store_true', default=False, help="True if rmse, False if not") argparser.add_argument("-fdr", "--fdr", action='store_true', default=False, help="True if fdr, False if not") argparser.add_argument("-rank", "--rank", action='store_true', default=False, help="True if rank, False if not") ### UPDATE FILE PATHS HERE ### argparser.add_argument( "--fmri_path", default="/n/shieber_lab/Lab/users/cjou/fmri/", type=str, help="file path to fMRI data on the Odyssey cluster") argparser.add_argument( "--to_save_path", default="/n/shieber_lab/Lab/users/cjou/", type=str, help="file path to and create rmse/ranking/llh on the Odyssey cluster") ### UPDATE FILE PATHS HERE ### args = argparser.parse_args() # verify arguments if args.rmse and args.fdr and args.rank: print("select only one flag for rmse, fdr, or rank") exit() if not args.rmse and not args.fdr and not args.rank: print("select at least flag for rmse, fdr, or rank") exit() print("getting arguments...") direction, validate, rlabel, elabel, glabel, w2vlabel, bertlabel, plabel, prlabel = helper.generate_labels( args) file_loc = str(plabel) + str(prlabel) + str(rlabel) + str(elabel) + str( glabel) + str(w2vlabel) + str(bertlabel) + str(direction) + str( validate ) + "subj{}_parallel-english-to-{}-model-{}layer-{}-pred-layer{}-{}" file_name = file_loc.format(args.subject_number, args.language, args.num_layers, args.model_type, args.which_layer, args.agg_type) if not os.path.exists(str(args.to_save_path) + '3d-brain/'): os.makedirs(str(args.to_save_path) + '3d-brain/') save_location = str(args.to_save_path) + '3d-brain/' # set save location if args.rmse: # TODO open_location = str( args.to_save_path) + "rmse/" + str(file_name) + "_subj" + str( args.subject_number) metric = "rmse" elif args.fdr: open_location = str( args.to_save_path) + "fdr/" + str(file_name) + "_subj" + str( args.subject_number) metric = "fdr" points = pickle.load( open(open_location + "_valid_correlations_2d_coordinates.p", "rb")) elif args.rank: # TODO open_location = str(args.to_save_path) + "rankings_od32/" + str( file_name) + "_subj" + str(args.subject_number) metric = "rank" else: print("error") exit() # get volmask file_path = str(args.fmri_path) + "examplesGLM/subj{}/volmask.p".format( args.subject_number) volmask = pickle.load(open(file_path, "rb")) # transform coordinates and save print("METRIC: " + str(metric)) _ = helper.transform_coordinates(points, volmask, save_location=save_location, metric=metric) print("done.")
def main(): argparser = argparse.ArgumentParser(description="calculate rankings for model-to-brain") argparser.add_argument("-language", "--language", help="Target language ('spanish', 'german', 'italian', 'french', 'swedish')", type=str, default='spanish') argparser.add_argument("-num_layers", "--num_layers", help="Total number of layers ('2', '4')", type=int, default=12) argparser.add_argument("-model_type", "--model_type", help="Type of model ('brnn', 'rnn')", type=str, default='brnn') argparser.add_argument("-agg_type", "--agg_type", help="Aggregation type ('avg', 'max', 'min', 'last')", type=str, default='avg') argparser.add_argument("-subject_number", "--subject_number", help="fMRI subject number ([1:11])", type=int, default=1) argparser.add_argument("-cross_validation", "--cross_validation", help="Add flag if add cross validation", action='store_true', default=True) argparser.add_argument("-brain_to_model", "--brain_to_model", help="Add flag if regressing brain to model", action='store_true', default=False) argparser.add_argument("-model_to_brain", "--model_to_brain", help="Add flag if regressing model to brain", action='store_true', default=False) argparser.add_argument("-glove", "--glove", action='store_true', default=False, help="True if initialize glove embeddings, False if not") argparser.add_argument("-word2vec", "--word2vec", action='store_true', default=False, help="True if initialize word2vec embeddings, False if not") argparser.add_argument("-bert", "--bert", action='store_true', default=False, help="True if initialize bert embeddings, False if not") argparser.add_argument("-rand_embed", "--rand_embed", action='store_true', default=False, help="True if initialize random embeddings, False if not") argparser.add_argument("-random", "--random", action='store_true', default=False, help="True if add cross validation, False if not") argparser.add_argument("-permutation", "--permutation", action='store_true', default=False, help="True if permutation, False if not") argparser.add_argument("-permutation_region", "--permutation_region", action='store_true', default=False, help="True if permutation by brain region, False if not") argparser.add_argument("-normalize", "--normalize", action='store_true', default=False, help="True if add normalization across voxels, False if not") argparser.add_argument("-local", "--local", action='store_true', default=False, help="True if local, False if not") argparser.add_argument("-log", "--log", action='store_true', default=False, help="True if use log coordinates, False if not") argparser.add_argument("-rmse", "--rmse", action='store_true', default=False, help="True if rmse, False if not") argparser.add_argument("-ranking", "--ranking", action='store_true', default=False, help="True if ranking, False if not") argparser.add_argument("-fdr", "--fdr", action='store_true', default=False, help="True if fdr, False if not") argparser.add_argument("-llh", "--llh", action='store_true', default=False, help="True if llh, False if not") argparser.add_argument("-rsa", "--rsa", action='store_true', default=False, help="True if rsa, False if not") argparser.add_argument("-alpha", "--alpha", action='store_true', default=False, help="True if alpha, False if not") argparser.add_argument("-total_batches", "--total_batches", type=int, help="total number of batches residual_name is spread across", default=100) ### UPDATE FILE PATHS HERE ### argparser.add_argument("--fmri_path", default="/n/shieber_lab/Lab/users/cjou/fmri/", type=str, help="file path to fMRI data on the Odyssey cluster") argparser.add_argument("--to_save_path", default="/n/shieber_lab/Lab/users/cjou/", type=str, help="file path to and create rmse/ranking/llh on the Odyssey cluster") ### UPDATE FILE PATHS HERE ### args = argparser.parse_args() # check conditions // can remove when making pipeline if args.brain_to_model and args.model_to_brain: print("select only one flag for brain_to_model or model_to_brain") exit() if (not args.brain_to_model and not args.model_to_brain) and not args.rsa: print("select at least flag for brain_to_model or model_to_brain // or rsa") exit() # if not args.rmse and not args.ranking and not args.fdr and not args.llh and not args.rsa: # print("select at least flag for rmse, ranking, fdr, llh, rsa") # exit() print("getting volmask...") direction, validate, rlabel, elabel, glabel, w2vlabel, bertlabel, plabel, prlabel = helper.generate_labels(args) print("CROSS VALIDATION: " + str(args.cross_validation)) print("BRAIN_TO_MODEL: " + str(args.brain_to_model)) print("MODEL_TO_BRAIN: " + str(args.model_to_brain)) print("GLOVE: " + str(args.glove)) print("WORD2VEC: " + str(args.word2vec)) print("BERT: " + str(args.bert)) print("RANDOM BRAIN: " + str(args.random)) print("RANDOM EMBEDDINGS: " + str(args.rand_embed)) print("PERMUTATION: " + str(args.permutation)) print("PERMUTATION REGION: " + str(args.permutation_region)) if args.local: volmask = pickle.load( open( f"../examplesGLM/subj{args.subject_number}/volmask.p", "rb" ) ) if args.ranking: atlas_vals = pickle.load( open( f"../examplesGLM/subj{args.subject_number}/atlas_vals.p", "rb" ) ) atlas_labels = pickle.load( open( f"../examplesGLM/subj{args.subject_number}/atlas_labels.p", "rb" ) ) roi_vals = pickle.load( open( f"../examplesGLM/subj{args.subject_number}/roi_vals.p", "rb" ) ) roi_labels = pickle.load( open( f"../examplesGLM/subj{args.subject_number}/roi_labels.p", "rb" ) ) else: volmask = pickle.load( open( "{}subj{}/volmask.p".format(args,fmri_path, args.subject_number), "rb" ) ) if args.ranking: atlas_vals = pickle.load( open( "{}subj{}/atlas_vals.p".format(args,fmri_path, args.subject_number), "rb" ) ) atlas_labels = pickle.load( open( "{}subj{}/atlas_labels.p".format(args,fmri_path, args.subject_number), "rb" ) ) roi_vals = pickle.load( open( "{}subj{}/roi_vals.p".format(args,fmri_path, args.subject_number), "rb" ) ) roi_labels = pickle.load( open( "{}subj{}/roi_labels.p".format(args,fmri_path, args.subject_number), "rb" ) ) ### MAKE PATHS ### print("making paths...") if not os.path.exists('../mat/'): os.makedirs('../mat/') if args.brain_to_model: metrics = ["rmse", "llh"] elif args.rsa: metrics = ["rsa"] elif args.alpha: metrics = ["alpha"] else: metrics = ["ranking", "rmse", "llh"] for layer in tqdm(range(1, args.num_layers+1)): print("LAYER: " + str(layer)) for metric in metrics: print("METRIC: " + str(metric)) if args.bert or args.word2vec or args.glove: specific_file = str(plabel) + str(prlabel) + str(rlabel) + str(elabel) + str(glabel) + str(w2vlabel) + str(bertlabel) + str(direction) + str(validate) + "-subj{}-{}_layer{}" file_name = specific_file.format( args.subject_number, args.agg_type, layer ) else: specific_file = str(plabel) + str(prlabel) + str(rlabel) + str(elabel) + str(glabel) + str(w2vlabel) + str(bertlabel) + str(direction) + str(validate) + "-subj{}-parallel-english-to-{}-model-{}layer-{}-pred-layer{}-{}" file_name = specific_file.format( args.subject_number, args.language, args.num_layers, args.model_type, layer, args.agg_type ) print("transform coordinates...") if not args.word2vec and not args.glove and not args.bert and not args.random: specific_file = str(plabel) + str(prlabel) + str(rlabel) + str(elabel) + str(glabel) + str(w2vlabel) + str(bertlabel) + str(direction) + str(validate) + "-subj{}-parallel-english-to-{}-model-{}layer-{}-pred-layer{}-{}" file_format = specific_file.format( args.subject_number, args.language, args.num_layers, args.model_type, layer, args.agg_type ) else: file_format = str(plabel) + str(prlabel) + str(rlabel) + str(elabel) + str(glabel) + str(w2vlabel) + str(bertlabel) + str(direction) + str(validate) + "-subj{}-{}_layer{}".format(args.subject_number, args.agg_type, layer) final_values = concatenate_all(file_format, args, metric) _ = helper.transform_coordinates(final_values, volmask, save_path="../mat/" + file_name, metric=metric) print('done.')
def main(): argparser = argparse.ArgumentParser( description="layer and subject group level comparison") argparser.add_argument("-subject_number", "--subject_number", type=int, default=1, help="subject number (fMRI data) for decoding") ### SPECIFY MODEL PARAMETERS ### argparser.add_argument("-cross_validation", "--cross_validation", help="Add flag if add cross validation", action='store_true', default=False) argparser.add_argument("-brain_to_model", "--brain_to_model", help="Add flag if regressing brain to model", action='store_true', default=False) argparser.add_argument("-model_to_brain", "--model_to_brain", help="Add flag if regressing model to brain", action='store_true', default=False) argparser.add_argument( "-agg_type", "--agg_type", help="Aggregation type ('avg', 'max', 'min', 'last')", type=str, default='avg') argparser.add_argument( "-language", "--language", help= "Target language ('spanish', 'german', 'italian', 'french', 'swedish')", type=str, default='spanish') argparser.add_argument("-num_layers", "--num_layers", help="Total number of layers ('2', '4')", type=int, required=True) argparser.add_argument( "-random", "--random", action='store_true', default=False, help="True if initialize random brain activations, False if not") argparser.add_argument( "-rand_embed", "--rand_embed", action='store_true', default=False, help="True if initialize random embeddings, False if not") argparser.add_argument( "-glove", "--glove", action='store_true', default=False, help="True if initialize glove embeddings, False if not") argparser.add_argument( "-word2vec", "--word2vec", action='store_true', default=False, help="True if initialize word2vec embeddings, False if not") argparser.add_argument( "-bert", "--bert", action='store_true', default=False, help="True if initialize bert embeddings, False if not") argparser.add_argument( "-normalize", "--normalize", action='store_true', default=False, help="True if add normalization across voxels, False if not") argparser.add_argument("-permutation", "--permutation", action='store_true', default=False, help="True if permutation, False if not") argparser.add_argument( "-permutation_region", "--permutation_region", action='store_true', default=False, help="True if permutation by brain region, False if not") ### PLOTTING ### argparser.add_argument( "-which_layer", "--which_layer", help="Layer of interest in [1: total number of layers]", type=int, default=1) ### SPECIFY FOR SINGLE SUBJECT OR GROUP LEVEL ANALYSIS ### argparser.add_argument("-single_subject", "--single_subject", help="if single subject analysis", action='store_true', default=False) argparser.add_argument("-group_level", "--group_level", help="if group level analysis", action='store_true', default=False) argparser.add_argument("-searchlight", "--searchlight", help="if searchlight", action='store_true', default=False) ### SPECIFY FOR ONE LAYER OR DIFFERENCE IN LAYERS ### argparser.add_argument("-single_layer", "--single_layer", help="if single layer significance", action='store_true', default=False) argparser.add_argument("-across_layer", "--across_layer", help="if across layer depth significance", action='store_true', default=False) ### SPECIFY WHICH METRIC ### argparser.add_argument("-fdr", "--fdr", help="if apply FDR", action='store_true', default=False) argparser.add_argument("-llh", "--llh", action='store_true', default=False, help="True if calculate likelihood, False if not") argparser.add_argument("-ranking", "--ranking", action='store_true', default=False, help="True if calculate ranking, False if not") argparser.add_argument("-rmse", "--rmse", action='store_true', default=False, help="True if calculate rmse, False if not") argparser.add_argument("-rsa", "--rsa", action='store_true', default=False, help="True if calculate rsa, False if not") ### UPDATE FILE PATHS HERE ### argparser.add_argument( "--fmri_path", default="/n/shieber_lab/Lab/users/cjou/fmri/", type=str, help="file path to fMRI data on the Odyssey cluster") argparser.add_argument( "--to_save_path", default="/n/shieber_lab/Lab/users/cjou/", type=str, help="file path to and create rmse/ranking/llh on the Odyssey cluster") ### UPDATE FILE PATHS HERE ### args = argparser.parse_args() if args.num_layers != 12 and args.bert: print("error: please ensure bert has 12 layers") exit() if args.num_layers != 1 and (args.word2vec or args.random or args.permutation or args.glove): print("error: please ensure baseline has 1 layerc") exit() if not args.fdr and not args.llh and not args.ranking and not args.rmse: print("error: select at least 1 metric of correlation") exit() print("NUMBER OF LAYERS: " + str(args.num_layers)) subjects = [1, 2, 4, 5, 7, 8, 9, 10, 11] direction, validate, rlabel, elabel, glabel, w2vlabel, bertlabel, plabel, prlabel = helper.generate_labels( args) # print("generating file names...") # layer1_file_name = generate_file_name(args, args.layer1) # layer2_file_name = generate_file_name(args, args.layer2) # print("retrieving file contents...") # layer1 = get_file(args, layer1_file_name) # layer2 = get_file(args, layer2_file_name) # print("evaluating layers...") # diff = compare_layers(layer1, layer2) # print("DIFF") # print(np.sum(diff)) if args.fdr: metric = "fdr" if args.rmse: metric = "rmse" if args.rsa: metric = "rsa" if args.ranking: metric = "ranking" if args.llh: metric = "llh" # generate heatmap if args.single_subject and args.across_layer: heatmap_differences = np.zeros((args.num_layers, args.num_layers)) for l1 in list(range(1, args.num_layers + 1)): for l2 in list(range(l1, args.num_layers + 1)): print("generating file names...") layer1_file_name = generate_file_name(args, args.subject_number, l1) layer2_file_name = generate_file_name(args, args.subject_number, l2) print("retrieving file contents...") layer1, pvals = get_file(args, layer1_file_name) layer2, pvals = get_file(args, layer2_file_name) diff = compare_layers(layer1, layer2) pvals = stats.ttest_rel(layer1, layer2) heatmap_differences[l1 - 1][l2 - 1] = np.sum(np.abs(diff)) heatmap_differences[l2 - 1][l1 - 1] = np.sum(np.abs(diff)) print(heatmap_differences.shape) print(heatmap_differences) Index = ['layer' + str(i) for i in range(1, args.num_layers + 1)] df = pd.DataFrame(heatmap_differences, index=Index, columns=Index) # plt.pcolor(df) # plt.yticks(np.arange(0.5, len(df.index), 1), df.index) # plt.xticks(np.arange(0.5, len(df.columns), 1), df.columns) # plt.show() sns.heatmap(df) if args.bert: file_name = "bert{}{}subj{}_{}_heatmap".format( direction, validate, args.subject_number, args.agg_type, ) else: specific_file = str(plabel) + str(prlabel) + str(rlabel) + str( elabel ) + str(glabel) + str(w2vlabel) + str(bertlabel) + str( direction ) + str( validate ) + "-subj{}-parallel-english-to-{}-model-{}layer-{}-pred-layer-{}" file_name = specific_file.format(args.subject_number, args.language, args.num_layers, "brnn", args.agg_type) plt.savefig("../" + str(file_name) + str(metric) + ".png", bbox_inches='tight') # plt.show() # pval = calculate_pval(layer1, layer2) # print("pval") # print(pval) # save_file(args, diff, "difference_" + a) # save_file(args, pval, "pval_" + ) if args.group_level and args.across_layer: common_space = find_common_brain_space(args, subjects, args.which_layer) a, b, c = common_space.shape heatmap_differences = np.zeros((args.num_layers, args.num_layers)) for l1 in list(range(1, args.num_layers + 1)): for l2 in list(range(l1, args.num_layers + 1)): # values_to_plot = np.zeros((len(subjects),a,b,c)) layer1_vals = np.zeros((len(subjects), a, b, c)) layer2_vals = np.zeros((len(subjects), a, b, c)) group_avgs = [] group_pvals = [] for subj_index in range(len(subjects)): layer1_file_name = generate_file_name(args, subj_index, l1) layer2_file_name = generate_file_name(args, subj_index, l2) layer1, pvals = get_file(args, layer1_file_name) layer2, pvals = get_file(args, layer2_file_name) common_per_layer1 = layer1[common_space.astype(bool)] common_per_layer2 = layer2[common_space.astype(bool)] # pvals_per_layer = pvals[common_space.astype(bool)] # values_to_plot[subj_index] = common_per_layer # pvalues[subj_index] = pvals_per_layer layer1_vals[subj_index] = common_per_layer1 layer2_vals[subj_index] = common_per_layer2 diff = compare_layers(common_per_layer1, common_per_layer2) group_avgs.append(diff) pvals = stats.ttest_rel(layer1_vals, layer2_vals) heatmap_differences[l1 - 1][l2 - 1] = np.sum( np.abs(np.mean(avgs, axis=0))) heatmap_differences[l2 - 1][l1 - 1] = np.sum( np.abs(np.mean(avgs, axis=0))) print(heatmap_differences.shape) print(heatmap_differences) Index = ['layer' + str(i) for i in range(1, args.num_layers + 1)] df = pd.DataFrame(heatmap_differences, index=Index, columns=Index) # plt.pcolor(df) # plt.yticks(np.arange(0.5, len(df.index), 1), df.index) # plt.xticks(np.arange(0.5, len(df.columns), 1), df.columns) # plt.show() sns.heatmap(df) plt.show() pass if args.group_level and args.single_layer: common_space = find_common_brain_space(args, subjects, args.which_layer) a, b, c = common_space.shape corr = np.zeros((len(subjects), a, b, c)) pvalues = np.zeros((len(subjects), a, b, c)) # get common values for subj_index in range(len(subjects)): layer_file_name = generate_file_name(args, subjects[subj_index], args.which_layer) layer, pvals = get_file(args, args.which_layer) common_per_layer = layer[common_space.astype(bool)] pvals_per_layer = pvals[common_space.astype(bool)] corr[subj_index] = common_per_layer pvalues[subj_index] = pvals_per_layer group_pvals = np.apply_along_axis(calculate_ttest, 0, pvalues) group_corrs = np.mean(corr, axis=0) save_location = str( args.to_save_path) + "fdr/group_level_single_layer_" + str( args.which_layer) volmask = pickle.load( open( str(args.to_save_path) + "subj" + str(args.subject_number) + "/volmask.p", "rb")) _ = helper.transform_coordinates(group_corrs, volmask, save_location, "fdr", pvals=group_pvals) print("done.") return