def mixed_effects_analysis(args, embed_matrix):
    # load common brain space
    subjects = [1, 2, 4, 5, 7, 8, 9, 10, 11]
    common_space = helper.load_common_space(subjects, local=args.local)
    voxel_coordinates = np.transpose(np.nonzero(common_space))
    num_voxels = len(voxel_coordinates)
    print("NUM VOXELS IN SHARED COMMON BRAIN SPACE: " + str(num_voxels))

    # initialize variables
    all_activations = []
    subj_number = []
    voxel_index = []

    # prepare model embeddings
    dim_labels = ['dim' + str(i) for i in range(embed_matrix.shape[1])]
    embed_matrix_pd = pd.DataFrame(embed_matrix, columns=dim_labels)
    embed_matrix_pd_repeat = pd.concat([embed_matrix_pd] * len(subjects),
                                       ignore_index=True)
    print("LENGTH OF EMBEDDINGS: " + str(len(embed_matrix_pd_repeat)))

    # get labels
    labels = ""
    conditional_labels = ""
    for i in range(embed_matrix.shape[1]):
        labels += 'dim' + str(i) + ' '
        conditional_labels += 'dim' + str(i) + ' | subject_number '

    # get data
    for subj in subjects:
        activation = pickle.load(
            open(
                f"/n/shieber_lab/Lab/users/cjou/fmri/subj{args.subject_number}/activations.p",
                "rb"))
        activation_vals = activation[np.nonzero(common_space)]
        modified_activations = get_modified_activations(
            activation_vals, common_space)
        all_activations.append(modified_activations)
        voxel_index.append(range(num_voxels))
        subj_number.extend([subj] * num_voxels)

    # create dataframe
    data = pd.DataFrame({
        'subject_number': subj_number,
        'voxel_index': voxel_index,
        'activations': all_activations
    })

    data_slice = data.iloc[data["voxel_index"] == 0]
    print("DATA SLICE LENGTH: " + str(len(data_slice)))

    # per voxel
    rmses_per_voxel = []
    for v in range(num_voxels):
        data_slice = data.iloc[data["voxel_index"] == v]
        concat_pd = pd.concat([data_slice, embed_matrix_pd_repeat], axis=1)
        rmse = run_per_voxel(concat_pd, labels, conditional_labels)
        rmses_per_voxel.append(rmse)

    return rmses_per_voxel
def main():
    argparser = argparse.ArgumentParser(
        description="FDR significance thresholding for single subject")
    argparser.add_argument("-embedding_layer",
                           "--embedding_layer",
                           type=str,
                           help="Location of NN embedding (for a layer)")
    argparser.add_argument("-subject_number",
                           "--subject_number",
                           type=int,
                           default=1,
                           help="subject number (fMRI data) for decoding")
    argparser.add_argument(
        "-agg_type",
        "--agg_type",
        help="Aggregation type ('avg', 'max', 'min', 'last')",
        type=str,
        default='avg')
    argparser.add_argument(
        "-random",
        "--random",
        action='store_true',
        default=False,
        help="True if initialize random brain activations, False if not")
    argparser.add_argument(
        "-rand_embed",
        "--rand_embed",
        action='store_true',
        default=False,
        help="True if initialize random embeddings, False if not")
    argparser.add_argument(
        "-glove",
        "--glove",
        action='store_true',
        default=False,
        help="True if initialize glove embeddings, False if not")
    argparser.add_argument(
        "-word2vec",
        "--word2vec",
        action='store_true',
        default=False,
        help="True if initialize word2vec embeddings, False if not")
    argparser.add_argument(
        "-bert",
        "--bert",
        action='store_true',
        default=False,
        help="True if initialize bert embeddings, False if not")
    argparser.add_argument(
        "-normalize",
        "--normalize",
        action='store_true',
        default=False,
        help="True if add normalization across voxels, False if not")
    argparser.add_argument("-permutation",
                           "--permutation",
                           action='store_true',
                           default=False,
                           help="True if permutation, False if not")
    argparser.add_argument(
        "-permutation_region",
        "--permutation_region",
        action='store_true',
        default=False,
        help="True if permutation by brain region, False if not")
    argparser.add_argument(
        "-which_layer",
        "--which_layer",
        help="Layer of interest in [1: total number of layers]",
        type=int,
        default=1)
    argparser.add_argument("-single_subject",
                           "--single_subject",
                           help="if single subject analysis",
                           action='store_true',
                           default=False)
    argparser.add_argument("-group_level",
                           "--group_level",
                           help="if group level analysis",
                           action='store_true',
                           default=False)
    argparser.add_argument("-searchlight",
                           "--searchlight",
                           help="if searchlight",
                           action='store_true',
                           default=True)
    argparser.add_argument("-fdr",
                           "--fdr",
                           help="if apply FDR",
                           action='store_true',
                           default=False)
    argparser.add_argument("-subjects",
                           "--subjects",
                           help="subject numbers",
                           type=str,
                           default="")

    ### UPDATE FILE PATHS HERE ###
    argparser.add_argument(
        "--fmri_path",
        default="/n/shieber_lab/Lab/users/cjou/fmri/",
        type=str,
        help="file path to fMRI data on the Odyssey cluster")
    argparser.add_argument(
        "--to_save_path",
        default="/n/shieber_lab/Lab/users/cjou/",
        type=str,
        help="file path to and create rmse/ranking/llh on the Odyssey cluster")
    ### UPDATE FILE PATHS HERE ###
    args = argparser.parse_args()

    ### check conditions
    if not args.single_subject and not args.group_level:
        print("select analysis type: single subject or group level")
        exit()

    if args.fdr and args.single_subject and not args.searchlight:
        print(
            "not valid application of FDR to single subject with searchlight")
        exit()

    if args.group_level and args.subjects == "":
        print("must specify subject numbers in group level analysis")
        exit()

    if not os.path.exists(str(args.to_save_path) + 'mat/'):
        os.makedirs(str(args.to_save_path) + 'mat/')

    if not args.glove and not args.word2vec and not args.bert and not args.rand_embed:
        embed_loc = args.embedding_layer
        file_name = embed_loc.split("/")[-1].split(".")[0]
        embedding = scipy.io.loadmat(embed_loc)
        embed_matrix = helper.get_embed_matrix(embedding)
    else:
        embed_loc = args.embedding_layer
        file_name = embed_loc.split("/")[-1].split(".")[0].split(
            "-")[-1] + "_layer" + str(args.which_layer)
        embed_matrix = pickle.load(open(embed_loc, "rb"))
        if args.word2vec:
            file_name += "word2vec"
        elif args.glove:
            file_name += "glove"
        elif args.bert:
            file_name += "bert"
        else:
            file_name += "random"

    if args.single_subject:

        if args.searchlight:
            search = "_searchlight"
        else:
            search = ""

        save_location = str(
            args.to_save_path) + "fdr/" + str(file_name) + "_subj" + str(
                args.subject_number) + str(search)
        volmask = pickle.load(
            open(
                str(args.to_save_path) + "subj" + str(args.subject_number) +
                "/volmask.p", "rb"))
        space_to_index_dict, index_to_space_dict, volmask_shape = get_spotlights(
            volmask)

        # 1. z-score
        print("z-scoring activations and embeddings...")
        individual_activations = pickle.load(
            open(
                "../../examplesGLM/subj" + str(args.subject_number) +
                "/individual_activations.p", "rb"))
        z_activations = helper.z_score(individual_activations)
        z_embeddings = helper.z_score(embed_matrix)

        # 2. calculate correlation
        print("calculating correlations...")
        z_activations = helper.add_bias(z_activations)
        z_embeddings = helper.add_bias(z_embeddings)
        correlations, pvals = calculate_pearson_correlation(
            args, z_activations, z_embeddings)

        # 3. evaluate significance
        print("evaluating significance...")
        valid_correlations, indices, num_voxels = evaluate_performance(
            args, correlations, pvals, space_to_index_dict,
            index_to_space_dict, volmask_shape)
        corrected_coordinates = get_2d_coordinates(valid_correlations, indices,
                                                   num_voxels)
        norm_coords = fix_coords_to_absolute_value(corrected_coordinates)
        _ = helper.transform_coordinates(norm_coords,
                                         volmask,
                                         save_location,
                                         "fdr",
                                         pvals=pvals)
        print("done.")

    if args.group_level:

        save_location = str(
            args.to_save_path) + "fdr/" + str(file_name) + "_group_analysis"
        subject_numbers = [
            int(subj_num) for subj_num in args.subjects.split(",")
        ]

        print("loading brain common space...")
        volmask = helper.load_common_space(subject_numbers)
        print("VOLMASK SHAPE: " + str(volmask.shape))
        space_to_index_dict, index_to_space_dict, volmask_shape = get_spotlights(
            volmask)

        print("returned shape: " + str(volmask_shape))

        # 1. get all data
        print("get all data...")
        fdr_corr_list = []
        for subj_num in tqdm(subject_numbers):
            print("adding subject: " + str(subj_num))
            file_name = str(args.to_save_path) + "fdr/" + str(
                args.agg_type) + "_layer" + str(
                    args.which_layer) + "bert_subj" + str(
                        args.subject_number) + "_searchlight-3dtransform-fdr"
            fdr_corr = scipy.io.loadmat(file_name + ".mat")
            fdr_corr_vals = fdr_corr["metric"]
            common_corr = np.ma.array(fdr_corr_vals, mask=volmask)
            fdr_corr_list.append(common_corr)

        # 2. average correlations and pvalues
        print("calculating correlations...")
        avg_corrs = np.mean(np.array(fdr_corr_list), axis=0)
        ttest_pval = np.apply_along_axis(ttest_voxels, 0,
                                         np.array(fdr_corr_list))

        # 3. save files
        print("saving files...")
        scipy.io.savemat(save_location + "-3dtransform-corr.mat",
                         dict(metric=avg_corrs))
        scipy.io.savemat(save_location + "-3dtransform-pvals.mat",
                         dict(metric=ttest_pval))
        print("done.")
    return
def main():
    parser = argparse.ArgumentParser("calculate nested cv family model")
    parser.add_argument("-family",
                        "--family",
                        action='store_true',
                        default=False,
                        help="True if use RSA family")
    parser.add_argument("-bert",
                        "--bert",
                        action='store_true',
                        default=False,
                        help="True if bert")
    parser.add_argument("-aal",
                        "--aal",
                        action='store_true',
                        default=False,
                        help="True if use RSA aal")
    parser.add_argument("-local",
                        "--local",
                        action='store_true',
                        default=False,
                        help="True if local")
    parser.add_argument("-save_to_matlab",
                        "--save_to_matlab",
                        action='store_true',
                        default=False,
                        help="True if save to matlab")
    args = parser.parse_args()

    if args.family and args.bert:
        print("error: choose either family or bert")
        exit(1)

    if not args.family and not args.bert:
        print("error: choose at least family or bert")
        exit(1)

    subjects = [1, 2, 4, 5, 7, 8, 9, 10, 11]
    print("finding common space...")
    common_space = helper.load_common_space(subjects, local=args.local)
    voxel_coordinates = np.transpose(np.nonzero(common_space))
    print("COMMON SPACE: " + str(common_space.shape))
    print("VOXEL COORDINATES: " + str(voxel_coordinates.shape))

    volmask, num_regions, labels, vals, file_name = helper.get_voxel_labels(
        args)

    vals_3d = helper.convert_np_to_matlab(vals, volmask)
    labels_vals = vals_3d[np.nonzero(common_space)]

    # get bayesian values
    print("concatenating files...")
    bor, pxp, family = concatenate_files(args)
    print("BOR SHAPE: " + str(bor.shape))
    print("PXP SHAPE: " + str(pxp.shape))
    print("FAMILY SHAPE: " + str(family.shape))

    if args.bert:
        file_name = "bert_only_" + file_name

    if args.save_to_matlab and args.bert:
        best_layers = np.argmax(pxp, axis=1)
        print("BEST_LAYERS SHAPE: " + str(best_layers.shape))
        vals_3d = helper.convert_np_to_matlab(best_layers, common_space)
        scipy.io.savemat("../nested_bert_layer.mat", dict(metric=vals_3d))

    # get significant values
    sig_bor = (bor < 0.05)
    sig_bor_3d = helper.convert_np_to_matlab(bor, common_space)
    # plot_bors(bor[~np.isnan(bor)], "all_bor")
    sig_pvals_05 = (np.array(bor) < 0.05).astype(bool)

    if args.family:
        max_values = np.argmax(family, axis=1)
        best_bert = (max_values == 0)
        best_pretrained = (max_values == 1)
        best_opennmt = (max_values == 2)
        best_bert3d = helper.convert_np_to_matlab(best_bert, common_space)
        best_pretrained3d = helper.convert_np_to_matlab(
            best_pretrained, common_space)
        best_opennmt3d = helper.convert_np_to_matlab(best_opennmt,
                                                     common_space)
        for_bert = sig_bor_3d.astype(bool) & best_bert3d.astype(bool)
        for_pretrained = sig_bor_3d.astype(bool) & best_pretrained3d.astype(
            bool)
        for_opennmt = sig_bor_3d.astype(bool) & best_opennmt3d.astype(bool)
        scipy.io.savemat("../bms_best_bert.mat",
                         dict(vals=for_bert.astype(np.int16)))
        scipy.io.savemat("../bms_best_pretrained.mat",
                         dict(vals=for_pretrained.astype(np.int16)))
        scipy.io.savemat("../bms_best_opennmt.mat",
                         dict(vals=for_opennmt.astype(np.int16)))
        print("saved best family.")

    # print("bor_3d: " + str(bor_3d.shape))
    # print("sig_pvals_05: " + str(sig_pvals_05.shape))
    # sig05 = bor[sig_pvals_05]
    # sig05vals = sig05[np.nonzero(sig05)]
    # plot_bors(sig05vals[~np.isnan(sig05vals)], "significant_bor05")
    # sig_pvals_05 = (np.array(bor) < 1).astype(bool)
    scipy.io.savemat("../sig_bor_bms05.mat", dict(vals=sig_bor_3d))
    asdf
    print("SIG PVALS SHAPE:" + str(sig_pvals_05.shape))

    print("aggregating...")
    # get values per region
    for region in tqdm(range(1, num_regions + 1)):
        indices_bool = (labels_vals == region).astype(bool)
        # print("INDICES SHAPE: " + str(indices_bool.shape))
        # print("NUM IN REGION: " + str(np.sum(indices_bool)))
        sig_indices_bol = np.array(indices_bool) & np.array(sig_pvals_05)
        # print("SIG INDICES SHAPE: " + str(sig_indices_bol.shape))
        # print("SiG NUM IN REGION: " + str(np.sum(sig_indices_bol)))
        indices = np.where(sig_indices_bol == True)[0]
        if args.family:
            region_vals = np.take(family, indices, axis=0)
            # print("DF REGION: " + str(region_vals.shape))
        if args.bert:
            all_model_region_vals = np.take(pxp, indices, axis=0)
            region_vals = all_model_region_vals[:, :12]
            # print("DF REGION: " + str(region_vals.shape))
        # if args.bert:
        # 	region_vals = np.take(pxp, indices, axis=0)
        plot_count_graphs(args, region_vals, "llh",
                          file_name + str(labels[region - 1]))

    print("done.")
def mixed_effects_analysis(args, embed_matrix):
    # load common brain space
    subjects = [1, 2, 4, 5, 7, 8, 9, 10, 11]
    num_sentences = 240
    common_space = helper.load_common_space(subjects, local=args.local)
    print("COMMON SPACE SHAPE: " + str(common_space.shape))
    voxel_coordinates = np.transpose(np.nonzero(common_space))
    num_voxels = len(voxel_coordinates)
    print("NUM VOXELS IN SHARED COMMON BRAIN SPACE: " + str(num_voxels))

    # initialize variables
    all_activations = []
    subj_number = []
    voxel_index = []

    # prepare model embeddings
    dim_labels = ['dim' + str(i) for i in range(embed_matrix.shape[1])]
    embed_matrix_pd = pd.DataFrame(embed_matrix, columns=dim_labels)
    print("EMBEDDINGS SHAPE: " + str(embed_matrix_pd.shape))
    embed_matrix_pd_repeat = pd.concat([embed_matrix_pd] * len(subjects),
                                       ignore_index=True)
    embed_matrix_pd_repeat.insert(0, 'bias', 1)
    print("REPEAT EMBEDDINGS SHAPE: " + str(embed_matrix_pd_repeat.shape))

    # get labels
    labels = ""
    conditional_labels = ""
    for i in range(embed_matrix.shape[1]):
        labels += 'dim' + str(i) + ' + '
        conditional_labels += 'dim' + str(i) + ' | subject_number + '

    # get data
    for subj in tqdm(subjects):
        if args.local:
            modified_activations = pickle.load(
                open(f"../examplesGLM/subj{subj}/modified_activations.p",
                     "rb"))
        else:
            modified_activations = pickle.load(
                open(
                    f"/n/shieber_lab/Lab/users/cjou/fmri/subj{subj}/modified_activations.p",
                    "rb"))

        norm_modified_activations = helper.z_score(
            np.array(modified_activations))
        activation_vals = np.array([
            modified_elem[np.nonzero(common_space)]
            for modified_elem in norm_modified_activations
        ])
        # print("ACTIVATIONS SHAPE: " + str(activation_vals.shape))
        flatten_activations = get_activations(activation_vals)
        # print("FLATTEN ACTIVATIONS SHAPE: " + str(flatten_activations.shape))
        all_activations.extend(flatten_activations)
        voxel_index.extend(list(range(num_voxels)) * num_sentences)
        subj_number.extend([subj] * num_voxels * num_sentences)
        del modified_activations
        del norm_modified_activations
        del activation_vals
        del flatten_activations

    print("ACTIVATIONS LENGTH: " + str(len(all_activations)))
    print("SUBJECT NUMBER LENGTH: " + str(len(subj_number)))
    print("VOXEL INDEX: " + str(len(voxel_index)))

    # create dataframe
    data = pd.DataFrame({
        'subject_number': subj_number,
        'voxel_index': voxel_index,
        'activations': all_activations
    })

    data_slice = data.loc[data["voxel_index"] == 0]
    print("DATA SLICE SHAPE: " + str(data_slice.shape))

    # per voxel
    rmses_per_voxel = []
    CHUNK = helper.chunkify(list(range(num_voxels)), args.batch_num,
                            args.total_batches)
    for v in tqdm(CHUNK):
        data_slice = data.loc[data["voxel_index"] == v].reset_index()
        # concat_pd = pd.concat([data_slice, embed_matrix_pd_repeat], axis=1)
        rmse = run_per_voxel(data_slice, embed_matrix_pd_repeat, labels)
        rmses_per_voxel.append(rmse)
        print(asdf)

    return rmses_per_voxel
def main():
    subjects = [1, 2, 4, 5, 7, 8, 9, 10, 11]
    num_layers = 18
    threshold = 0.01

    common_space = helper.load_common_space(subjects, local=True)
    voxel_coordinates = np.transpose(np.nonzero(common_space))
    # print(voxel_coordinates.shape)

    print("concatenating files...")
    bors, pxps = concatenate_files()
    print("BORS SHAPE: " + str(bors.shape))
    print("PXPS SHAPE: " + str(pxps.shape))
    print("BORS: " + str(bors))

    # plt.clf()
    # sns.set(style="darkgrid")
    # plt.figure(figsize=(16, 9))
    # _ = plt.hist(bors, bins='auto')
    # plt.ylabel("count")
    # plt.xlabel("BOR")
    # plt.savefig("../all_bors_hist.png", bbox_inches='tight')

    # print(np.sum(bors <= threshold))
    # print(np.sum(bors > threshold))
    # print(np.min(bors))

    # total = 0
    # all_pxps = []
    # for coord_index in tqdm(range(len(voxel_coordinates))):
    # 	x,y,z = voxel_coordinates[coord_index]
    # 	get_pxp = np.max(pxps[coord_index])
    # 	if get_pxp > .9:
    # 		total+=1
    # 	all_pxps.append(get_pxp)

    # plt.clf()
    # sns.set(style="darkgrid")
    # plt.figure(figsize=(16, 9))
    # _ = plt.hist(all_pxps, bins='auto')
    # # plt.xlim(0,1)
    # plt.ylabel("count")
    # plt.xlabel("maximum PXP per voxel")
    # plt.savefig("../all_pxp_values_hist.png", bbox_inches='tight')
    # print("TOTAL: " + str(total))
    # exit()

    a, b, c = common_space.shape
    mapped_space = np.zeros((a, b, c))
    all_layer_space = np.zeros((num_layers, a, b, c))
    print("ALL LAYER SPACE: " + str(all_layer_space.shape))

    # 121099
    print("creating maps...")
    for coord_index in tqdm(range(len(voxel_coordinates))):
        x, y, z = voxel_coordinates[coord_index]
        # if bors[coord_index] < threshold:
        # 	print(bors[coord_index])
        # 	print("HERE")
        # print(pxps[coord_index])
        # index = np.argmax(pxps[coord_index])
        print(pxps[coord_index])
        print(pxps[coord_index].shape)
        if np.max(pxps[index]) > 0.9:
            print(pxps[coord_index])
            print(pxps[coord_index].shape)
            mapped_space[x][y][z] = np.argmax(pxps[coord_index]) + 1
        #
        # print(np.max(pxps[index]))
        for layer in range(num_layers):
            all_layer_space[layer][x][y][z] = pxps[coord_index][layer]

    print("MAPPED SPACE: " + str(mapped_space.shape))
    total_voxels = []
    for layer in range(1, num_layers + 1):
        total = np.sum(mapped_space == layer)
        total_voxels.append(total)

    df = pd.DataFrame({
        'layer': list(range(1, num_layers + 1)),
        'num_voxels': total_voxels
    })

    # print(df.head())
    # sns.set(style="darkgrid")
    # plt.figure(figsize=(16, 9))
    # g = sns.catplot(x="layer", y="num_voxels", kind = "bar", color="cornflowerblue", data=df)
    # plt.savefig("../best_voxel_hist.png", bbox_inches='tight')
    # plt.show()

    scipy.io.savemat("../significant_pval_all_best_llh_by_voxel.mat",
                     dict(metric=mapped_space))

    # for layer in range(num_layers):
    # 	scipy.io.savemat("../significant_all_best_llh_by_voxel_layer" + str(layer+1) + ".mat", dict(metric = all_layer_space[layer]))

    print("done.")
def main():
	argparser = argparse.ArgumentParser(description="layer and subject group level comparison")
	argparser.add_argument("-subject_number", "--subject_number", type=int, default=1,
						   help="subject number (fMRI data) for decoding")

	### SPECIFY MODEL PARAMETERS ###
	argparser.add_argument("-cross_validation", "--cross_validation", help="Add flag if add cross validation",
						   action='store_true', default=True)
	argparser.add_argument("-brain_to_model", "--brain_to_model", help="Add flag if regressing brain to model",
						   action='store_true', default=False)
	argparser.add_argument("-model_to_brain", "--model_to_brain", help="Add flag if regressing model to brain",
						   action='store_true', default=True)
	argparser.add_argument("-agg_type", "--agg_type", help="Aggregation type ('avg', 'max', 'min', 'last')", type=str,
						   default='avg')
	argparser.add_argument("-language", "--language",
						   help="Target language ('spanish', 'german', 'italian', 'french', 'swedish')", type=str,
						   default='spanish')
	argparser.add_argument("-num_layers", "--num_layers", help="Total number of layers ('2', '4')", type=int, required=True)
	argparser.add_argument("-random", "--random", action='store_true', default=False,
						   help="True if initialize random brain activations, False if not")
	argparser.add_argument("-rand_embed", "--rand_embed", action='store_true', default=False,
						   help="True if initialize random embeddings, False if not")
	argparser.add_argument("-glove", "--glove", action='store_true', default=False,
						   help="True if initialize glove embeddings, False if not")
	argparser.add_argument("-word2vec", "--word2vec", action='store_true', default=False,
						   help="True if initialize word2vec embeddings, False if not")
	argparser.add_argument("-bert", "--bert", action='store_true', default=True,
						   help="True if initialize bert embeddings, False if not")
	argparser.add_argument("-normalize", "--normalize", action='store_true', default=False,
						   help="True if add normalization across voxels, False if not")
	argparser.add_argument("-permutation", "--permutation", action='store_true', default=False,
						   help="True if permutation, False if not")
	argparser.add_argument("-permutation_region", "--permutation_region", action='store_true', default=False,
						   help="True if permutation by brain region, False if not")

	### PLOTTING ###
	argparser.add_argument("-which_layer", "--which_layer", help="Layer of interest in [1: total number of layers]",
						   type=int, default=1)

	### SPECIFY FOR SINGLE SUBJECT OR GROUP LEVEL ANALYSIS ###
	argparser.add_argument("-single_subject", "--single_subject", help="if single subject analysis",
						   action='store_true', default=False)
	argparser.add_argument("-group_level", "--group_level", help="if group level analysis", action='store_true',
						   default=False)
	argparser.add_argument("-searchlight", "--searchlight", help="if searchlight", action='store_true', default=False)
	
	### SPECIFY FOR ONE LAYER OR DIFFERENCE IN LAYERS ###
	argparser.add_argument("-single_layer", "--single_layer", help="if single layer significance",
						   action='store_true', default=False)
	argparser.add_argument("-across_layer", "--across_layer", help="if across layer depth significance",
						   action='store_true', default=False)

	### SPECIFY WHICH METRIC ### 
	argparser.add_argument("-fdr", "--fdr", help="if apply FDR", action='store_true', default=False)
	argparser.add_argument("-llh", "--llh", action='store_true', default=False,
						   help="True if calculate likelihood, False if not")
	argparser.add_argument("-ranking", "--ranking", action='store_true', default=False,
						   help="True if calculate ranking, False if not")
	argparser.add_argument("-rmse", "--rmse", action='store_true', default=False,
						   help="True if calculate rmse, False if not")
	argparser.add_argument("-rsa", "--rsa", action='store_true', default=False,
						   help="True if calculate rsa, False if not")

	argparser.add_argument("-nested",  "--nested", action='store_true', default=True, help="True if running nested")
	argparser.add_argument("-local",  "--local", action='store_true', default=False, help="True if running locally")
	argparser.add_argument("-save_by_voxel",  "--save_by_voxel", action='store_true', default=False, help="True if save by voxel")
	argparser.add_argument("-compare_models",  "--compare_models", action='store_true', default=False, help="True if compare models")
	args = argparser.parse_args()

	if args.num_layers != 12 and args.bert:
		print("error: please ensure bert has 12 layers")
		exit()

	if args.num_layers != 1 and (args.word2vec or args.random or args.permutation or args.glove):
		print("error: please ensure baseline has 1 layerc")
		exit()

	if not args.fdr and not args.llh and not args.ranking and not args.rmse:
		print("error: select at least 1 metric of correlation")
		exit()

	print("NUMBER OF LAYERS: " + str(args.num_layers))
	subjects = [1,2,4,5,7,8,9,10,11]

	print("getting common brain space")
	common_space = helper.load_common_space(subjects, local=args.local)
	voxel_coordinates = np.transpose(np.nonzero(common_space))
	print(voxel_coordinates.shape)
	direction, validate, rlabel, elabel, glabel, w2vlabel, bertlabel, plabel, prlabel = helper.generate_labels(args)

	if args.fdr:
		metric = "fdr"
	if args.rmse:
		metric = "rmse"
	if args.rsa:
		metric = "rsa"
	if args.ranking:
		metric = "ranking"
	if args.llh:
		metric = "llh"

	if args.single_subject and args.across_layer:
		first = True
		for layer_num in list(range(1, args.num_layers + 1)):
			print("generating file names...")
			layer_file_name = generate_file_name(args, args.subject_number, layer_num)

			print("retrieving file contents...")
			layer, pvals = get_file(args, args.subject_number, layer_file_name)

			if first:
				updated_brain = layer
				best_layer = layer.astype(bool) * layer_num
				first = False
				mask = layer.astype(bool)
			else:
				if args.llh or args.ranking:
					max_vals = np.maximum(updated_brain, layer)
					# temp = np.minimum(updated_brain, layer)
					# print("SAME: " + str(np.sum(np.equal(max_vals, temp).astype(bool) * mask)))
				elif args.rmse:
					max_vals = np.minimum(updated_brain, layer)
				else:
					print("select llh, ranking, or rmse")
					exit()

				from_layer = np.equal(max_vals, layer).astype(bool) * mask * layer_num
				temp_best_layer = np.equal(max_vals, updated_brain).astype(bool) * mask * best_layer
				best_layer = np.maximum(from_layer, temp_best_layer)
				# print("NEW: " + str(np.sum(from_layer.astype(bool))))
				# print("OLD: " + str(np.sum(temp_best_layer.astype(bool))))
				updated_brain = max_vals

		if args.bert:
			file_name = "bert{}{}subj{}_{}".format(
						direction,
						validate,
						args.subject_number,
						args.agg_type,
					)
		else:
			specific_file = str(plabel) + str(prlabel) + str(rlabel) + str(elabel) + str(glabel) + str(w2vlabel) + str(bertlabel) + str(direction) + str(validate) + "-subj{}-parallel-english-to-{}-model-{}layer-{}-pred-layer-{}"
			file_name = specific_file.format(
				args.subject_number,
				args.language,
				args.num_layers,
				"brnn",
				args.agg_type
			)

		print("BEST LAYER")
		total = 0 
		for layer in range(1, args.num_layers +1):
			print("LAYER" + str(layer))
			print(np.sum(best_layer == layer))
			total += np.sum(best_layer == layer)
		print("TOTAL:" + str(total))

		scipy.io.savemat("../" + str(file_name) + "_best_" + str(metric) + ".mat", dict(metric = best_layer.astype(np.int16)))

	if args.save_by_voxel:
		
		per_layer = []
		for layer_num in tqdm(list(range(1, args.num_layers + 1))):
			per_subject = []
			for subj_num in subjects:
				layer_file_name = generate_file_name(args, subj_num, layer_num)
				layer, _ = get_file(args, subj_num, layer_file_name)
				voxel_values = layer[np.nonzero(common_space)]
				# print("LENGTH: " + str(len(voxel_values)))
				per_subject.append(voxel_values)
			print("PER LAYER: ")
			print(np.array(per_subject).shape)
			per_layer.append(0.5 * np.transpose(np.array(per_subject)))
			print(np.array(per_layer).shape)
		print("BEFORE BASELINE")
		print(np.array(per_layer).shape)
		# add other embeddings

		if args.compare_models:
			for options in [[True, False], [False, True], [False, False]]:
				a,b = options
				if a == False and b == False:
					nmt_layers = 4
				else:
					nmt_layers = 1

				for layer_num in tqdm(list(range(1, nmt_layers + 1))):
					per_subject = []
					a, b = options
					for subj_num in subjects:
						layer_file_name = generate_file_name(args, subj_num, layer_num, baseline=True, glove=a, word2vec=b)
						layer, _ = get_file(args, subj_num, layer_file_name)
						voxel_values = layer[np.nonzero(common_space)]
						per_subject.append(voxel_values)
					per_layer.append(0.5 * np.transpose(np.array(per_subject)))

		print("AFTER BASELINE")
		print(np.array(per_layer).shape)
		print("AT THE END: ")
		print(np.array(per_layer).shape)
		per_voxel = np.stack( per_layer, axis=-1 )
		print(per_voxel.shape)
		print(per_voxel[0].shape)
		print(per_voxel[0])
		scipy.io.savemat("../mfit/nested_all_best_" + str(metric) + "_by_voxel.mat", dict(metric = per_voxel.astype(np.float32)))

	print("done.")
	return
Beispiel #7
0
def main():

    argparser = argparse.ArgumentParser(
        description="plot initial activations by location")
    argparser.add_argument("-subject_number",
                           "--subject_number",
                           type=int,
                           default=1,
                           help="subject number (fMRI data) for decoding")
    argparser.add_argument("-subjects",
                           "--subjects",
                           help="subject numbers",
                           type=str,
                           default="")
    argparser.add_argument("-local",
                           "--local",
                           action='store_true',
                           default=False,
                           help="True if local False if not")
    argparser.add_argument("-brain_map",
                           "--brain_map",
                           action='store_true',
                           default=False,
                           help="True if for 3d brain map if not")
    argparser.add_argument("-hist",
                           "--hist",
                           action='store_true',
                           default=False,
                           help="True if for histogram of voxels if not")
    argparser.add_argument("-sentences",
                           "--sentences",
                           help="sentence numbers in numbers with commas",
                           type=str,
                           default="",
                           required=True)
    argparser.add_argument("-aal",
                           "--aal",
                           action='store_true',
                           default=False,
                           help="True if all brain AAL regions False if not")

    ### UPDATE FILE PATHS HERE ###
    argparser.add_argument(
        "-fmri_path",
        "--fmri_path",
        default="/n/shieber_lab/Lab/users/cjou/fmri/",
        type=str,
        help="file path to fMRI data on the Odyssey cluster")
    ### UPDATE FILE PATHS HERE ###

    args = argparser.parse_args()

    if args.brain_map:
        subject_numbers = [
            int(subj_num) for subj_num in args.subjects.split(",")
        ]
        sentence_numbers = [
            int(subj_num) for subj_num in args.sentences.split(",")
        ]

        print("finding common brain space...")
        volmask = helper.load_common_space(subject_numbers, local=args.local)

        print("getting all activations...")
        activations_list = []
        for subj_num in tqdm(subject_numbers):
            print("adding subject: " + str(subj_num))
            if args.local:
                file_name = "../examplesGLM/subj" + str(
                    subj_num) + "/modified_activations.p"
            else:
                file_name = "/n/shieber_lab/Lab/users/cjou/fmri/subj" + str(
                    subj_num) + "/modified_activations.p"
            print("FILE NAME: " + str(file_name))
            activations = pickle.load(open(file_name, "rb"))
            if len(sentence_numbers) > 0 and subj_num == 1:
                for sent_num in sentence_numbers:
                    scipy.io.savemat(
                        "../mat/subj" + str(subj_num) +
                        "_initial_activations_sentence" +
                        str(sent_num) + ".mat",
                        dict(metric=np.array(activations)[sent_num - 1]))
            avg_acts_per_subject = np.mean(np.array(activations), axis=0)
            scipy.io.savemat(
                "../mat/subj" + str(subj_num) + "_initial_activations.mat",
                dict(metric=avg_acts_per_subject))
            common_act = np.ma.array(avg_acts_per_subject, mask=volmask)
            activations_list.append(common_act)

        print("saving average activations...")
        across_brain = np.mean(np.array(activations_list), axis=0)
        scipy.io.savemat("../mat/common_space_initial_activations.mat",
                         dict(metric=across_brain))

    elif args.hist:
        if args.local:
            volmask = pickle.load(
                open(f"../examplesGLM/subj{args.subject_number}/volmask.p",
                     "rb"))
            activations = pickle.load(
                open(
                    "../examplesGLM/subj" + str(args.subject_number) +
                    "/activations.p", "rb"))
            atlas_vals = pickle.load(
                open(
                    "../examplesGLM/subj" + str(args.subject_number) +
                    "/atlas_vals.p", "rb"))
            atlas_labels = pickle.load(
                open(
                    "../examplesGLM/subj" + str(args.subject_number) +
                    "/atlas_labels.p", "rb"))
            roi_vals = pickle.load(
                open(
                    "../examplesGLM/subj" + str(args.subject_number) +
                    "/roi_vals.p", "rb"))
            roi_labels = pickle.load(
                open(
                    "../examplesGLM/subj" + str(args.subject_number) +
                    "/roi_labels.p", "rb"))

        final_roi_labels = helper.compare_labels(roi_labels, volmask, roi=True)
        final_atlas_labels = helper.compare_labels(atlas_labels, volmask)

        avg = np.nanmean(activations, axis=0)

        df_dict = {
            'voxel_index': list(range(len(avg))),
            'activations': avg,
            'atlas_labels': final_atlas_labels,
            'roi_labels': final_roi_labels
        }

        df = pd.DataFrame(df_dict)

        # PLOT ALTAS
        if args.aal:
            plot_voxel_num(df, "atlas_labels")
        else:
            plot_voxel_num(df, "roi_labels")
    else:
        # get atlas and roi
        if args.local:
            volmask = pickle.load(
                open(f"../examplesGLM/subj{args.subject_number}/volmask.p",
                     "rb"))
            activations = pickle.load(
                open(
                    "../examplesGLM/subj" + str(args.subject_number) +
                    "/activations.p", "rb"))
            atlas_vals = pickle.load(
                open(
                    "../examplesGLM/subj" + str(args.subject_number) +
                    "/atlas_vals.p", "rb"))
            atlas_labels = pickle.load(
                open(
                    "../examplesGLM/subj" + str(args.subject_number) +
                    "/atlas_labels.p", "rb"))
            roi_vals = pickle.load(
                open(
                    "../examplesGLM/subj" + str(args.subject_number) +
                    "/roi_vals.p", "rb"))
            roi_labels = pickle.load(
                open(
                    "../examplesGLM/subj" + str(args.subject_number) +
                    "/roi_labels.p", "rb"))
        else:
            volmask = pickle.load(
                open(
                    "{}subj{}/volmask.p".format(args.fmri_path,
                                                args.subject_number), "rb"))
            activations = pickle.load(
                open(
                    "{}subj{}/activations.p".format(args.fmri_path,
                                                    args.subject_number),
                    "rb"))
            atlas_vals = pickle.load(
                open(
                    "{}subj{}/atlas_vals.p".format(args.fmri_path,
                                                   args.subject_number), "rb"))
            atlas_labels = pickle.load(
                open(
                    "{}subj{}/atlas_labels.p".format(args.fmri_path,
                                                     args.subject_number),
                    "rb"))
            roi_vals = pickle.load(
                open(
                    "{}subj{}/roi_vals.p".format(args.fmri_path,
                                                 args.subject_number), "rb"))
            roi_labels = pickle.load(
                open(
                    "{}subj{}/roi_labels.p".format(args.fmri_path,
                                                   args.subject_number), "rb"))

        print("INITIAL:")
        print(len(atlas_vals))
        print(len(atlas_labels))
        print(len(roi_vals))
        print(len(roi_labels))

        final_roi_labels = helper.compare_labels(roi_labels, volmask, roi=True)
        final_atlas_labels = helper.compare_labels(atlas_labels, volmask)
        # final_roi_labels = clean_roi(roi_vals, roi_labels)
        # at_labels = clean_atlas(atlas_vals, atlas_labels)

        print("CLEANING")
        print(len(final_roi_labels))
        print(len(final_atlas_labels))

        if not os.path.exists('../visualizations/'):
            os.makedirs('../visualizations/')

        # make dataframe
        print(len(list(range(len(activations)))))
        print(len(activations))
        print(len(final_atlas_labels))
        print(len(final_roi_labels))

        create_per_brain_region(activations, args, final_atlas_labels,
                                final_roi_labels)
        # create_per_sentence(activations, args, final_atlas_labels, final_roi_labels)

    print("done.")

    return
Beispiel #8
0
def main():
    parser = argparse.ArgumentParser("calculate nested cv model significance")
    parser.add_argument("-count",
                        "--count",
                        action='store_true',
                        default=False,
                        help="use counter")
    parser.add_argument("-aal",
                        "--aal",
                        action='store_true',
                        default=False,
                        help="True if use RSA aal")
    parser.add_argument("-local",
                        "--local",
                        action='store_true',
                        default=False,
                        help="True if local")
    parser.add_argument("-use_cache",
                        "--use_cache",
                        action='store_true',
                        default=False,
                        help="True if use cache pval")
    parser.add_argument("-avg",
                        "--avg",
                        action='store_true',
                        default=False,
                        help="True if avg")
    args = parser.parse_args()

    subjects = [1, 2, 4, 5, 7, 8, 9, 10, 11]
    print("finding common space...")
    common_space = helper.load_common_space(subjects, local=args.local)
    voxel_coordinates = np.transpose(np.nonzero(common_space))
    print("COMMON SPACE: " + str(common_space.shape))
    print("VOXEL COORDINATES: " + str(voxel_coordinates.shape))

    volmask, num_regions, labels, vals, file_name = helper.get_voxel_labels(
        args)

    vals_3d = helper.convert_np_to_matlab(vals, volmask)
    labels_vals = vals_3d[np.nonzero(common_space)]

    # get values
    print("getting values...")
    bert_num_layers = 12
    opennmt_num_layers = 4

    df_full = []
    for subj in subjects:
        df_subj = []
        df_subj = get_model_contents(args, df_subj, subj, bert_num_layers,
                                     "bert", common_space)
        df_subj = get_model_contents(args, df_subj, subj, 1, "glove",
                                     common_space)
        df_subj = get_model_contents(args, df_subj, subj, 1, "word2vec",
                                     common_space)
        df_subj = get_model_contents(args, df_subj, subj, opennmt_num_layers,
                                     "opennmt", common_space)
        df_full.append(np.transpose(df_subj))

    df_full = np.stack(df_full, axis=1)
    print("DF FULL SHAPE: " + str(df_full.shape))

    print("calculate significant voxels...")
    if args.use_cache:
        sig_pvals = pickle.load(open("../sig_pvals.p", "rb"))
    else:
        sig_pvals = calculate_anova(df_full)
        pickle.dump(sig_pvals, open("../sig_pvals.p", "wb"))

    sig_pvals_05 = (np.array(sig_pvals) < 0.05).astype(bool)

    print("aggregating...")
    # get values per region
    for region in tqdm(range(1, num_regions + 1)):
        indices_bool = (labels_vals == region).astype(bool)
        # print("INDICES SHAPE: " + str(indices_bool.shape))
        # print("NUM IN REGION: " + str(np.sum(indices_bool)))
        sig_indices_bol = np.array(indices_bool) & np.array(sig_pvals_05)
        # print("SIG INDICES SHAPE: " + str(sig_indices_bol.shape))
        # print("SiG NUM IN REGION: " + str(np.sum(sig_indices_bol)))
        indices = np.where(sig_indices_bol == True)[0]
        region_vals = np.take(df_full, indices, axis=0)
        # print("DF REGION: " + str(region_vals.shape))
        if args.avg:
            avg_region_vals = np.mean(region_vals, axis=0)
            # print("DF REGION AVG: " + str(avg_region_vals.shape))
            avg_region_df = calculate_avg_across_models(avg_region_vals)
            # print("AVERAGE MODELS: " + str(np.array(avg_region_df).shape))
            df = pd.DataFrame(-avg_region_df,
                              columns=['bert', 'baseline', 'opennmt'])
            df = df.replace([np.inf, -np.inf], np.nan).dropna(axis=0)
            plot_graphs(args, df, "llh", file_name + str(labels[region - 1]))
        if args.count:
            plot_count_graphs(args, region_vals, "llh",
                              file_name + str(labels[region - 1]))

    print("done.")