Beispiel #1
0
    def __init__(self, no_of_layers, no_of_neurons, epochs, learning_rate,
                 bias, xy, activation):
        self.no_of_features = self.no_of_features
        self.no_of_classes = self.no_of_classes
        self.no_of_layers = no_of_layers
        self.no_of_neurons = no_of_neurons
        self.epochs = epochs
        self.lr = learning_rate
        self.bias = bias
        self.activation = activation
        self.x_train = xy[0]
        self.x_test = xy[1]
        self.y_train = xy[2]
        self.y_test = xy[3]
        self.bias = bias
        if self.bias:
            self.x_train = hp.add_bias(self.x_train)
            self.x_test = hp.add_bias(self.x_test)
        self.dim_of_weights = [self.no_of_features
                               ] + self.no_of_neurons + [self.no_of_classes]

        self.init_network()

        # print(self.weights)
        self.fit()
Beispiel #2
0
    def update(self):

        self.weights[0] = self.weights[0] + self.lr * np.dot(
            self.delta[-1].T, self.x_train)
        for i in range(1, self.no_of_layers + 1):
            if self.bias:
                self.f[i - 1] = hp.add_bias(self.f[i - 1])
            self.weights[i] = self.weights[i] + self.lr * np.dot(
                self.delta[-(i + 1)].T, self.f[i - 1])
def main():
    argparser = argparse.ArgumentParser(
        description="FDR significance thresholding for single subject")
    argparser.add_argument("-embedding_layer",
                           "--embedding_layer",
                           type=str,
                           help="Location of NN embedding (for a layer)")
    argparser.add_argument("-subject_number",
                           "--subject_number",
                           type=int,
                           default=1,
                           help="subject number (fMRI data) for decoding")
    argparser.add_argument(
        "-agg_type",
        "--agg_type",
        help="Aggregation type ('avg', 'max', 'min', 'last')",
        type=str,
        default='avg')
    argparser.add_argument(
        "-random",
        "--random",
        action='store_true',
        default=False,
        help="True if initialize random brain activations, False if not")
    argparser.add_argument(
        "-rand_embed",
        "--rand_embed",
        action='store_true',
        default=False,
        help="True if initialize random embeddings, False if not")
    argparser.add_argument(
        "-glove",
        "--glove",
        action='store_true',
        default=False,
        help="True if initialize glove embeddings, False if not")
    argparser.add_argument(
        "-word2vec",
        "--word2vec",
        action='store_true',
        default=False,
        help="True if initialize word2vec embeddings, False if not")
    argparser.add_argument(
        "-bert",
        "--bert",
        action='store_true',
        default=False,
        help="True if initialize bert embeddings, False if not")
    argparser.add_argument(
        "-normalize",
        "--normalize",
        action='store_true',
        default=False,
        help="True if add normalization across voxels, False if not")
    argparser.add_argument("-permutation",
                           "--permutation",
                           action='store_true',
                           default=False,
                           help="True if permutation, False if not")
    argparser.add_argument(
        "-permutation_region",
        "--permutation_region",
        action='store_true',
        default=False,
        help="True if permutation by brain region, False if not")
    argparser.add_argument(
        "-which_layer",
        "--which_layer",
        help="Layer of interest in [1: total number of layers]",
        type=int,
        default=1)
    argparser.add_argument("-single_subject",
                           "--single_subject",
                           help="if single subject analysis",
                           action='store_true',
                           default=False)
    argparser.add_argument("-group_level",
                           "--group_level",
                           help="if group level analysis",
                           action='store_true',
                           default=False)
    argparser.add_argument("-searchlight",
                           "--searchlight",
                           help="if searchlight",
                           action='store_true',
                           default=True)
    argparser.add_argument("-fdr",
                           "--fdr",
                           help="if apply FDR",
                           action='store_true',
                           default=False)
    argparser.add_argument("-subjects",
                           "--subjects",
                           help="subject numbers",
                           type=str,
                           default="")

    ### UPDATE FILE PATHS HERE ###
    argparser.add_argument(
        "--fmri_path",
        default="/n/shieber_lab/Lab/users/cjou/fmri/",
        type=str,
        help="file path to fMRI data on the Odyssey cluster")
    argparser.add_argument(
        "--to_save_path",
        default="/n/shieber_lab/Lab/users/cjou/",
        type=str,
        help="file path to and create rmse/ranking/llh on the Odyssey cluster")
    ### UPDATE FILE PATHS HERE ###
    args = argparser.parse_args()

    ### check conditions
    if not args.single_subject and not args.group_level:
        print("select analysis type: single subject or group level")
        exit()

    if args.fdr and args.single_subject and not args.searchlight:
        print(
            "not valid application of FDR to single subject with searchlight")
        exit()

    if args.group_level and args.subjects == "":
        print("must specify subject numbers in group level analysis")
        exit()

    if not os.path.exists(str(args.to_save_path) + 'mat/'):
        os.makedirs(str(args.to_save_path) + 'mat/')

    if not args.glove and not args.word2vec and not args.bert and not args.rand_embed:
        embed_loc = args.embedding_layer
        file_name = embed_loc.split("/")[-1].split(".")[0]
        embedding = scipy.io.loadmat(embed_loc)
        embed_matrix = helper.get_embed_matrix(embedding)
    else:
        embed_loc = args.embedding_layer
        file_name = embed_loc.split("/")[-1].split(".")[0].split(
            "-")[-1] + "_layer" + str(args.which_layer)
        embed_matrix = pickle.load(open(embed_loc, "rb"))
        if args.word2vec:
            file_name += "word2vec"
        elif args.glove:
            file_name += "glove"
        elif args.bert:
            file_name += "bert"
        else:
            file_name += "random"

    if args.single_subject:

        if args.searchlight:
            search = "_searchlight"
        else:
            search = ""

        save_location = str(
            args.to_save_path) + "fdr/" + str(file_name) + "_subj" + str(
                args.subject_number) + str(search)
        volmask = pickle.load(
            open(
                str(args.to_save_path) + "subj" + str(args.subject_number) +
                "/volmask.p", "rb"))
        space_to_index_dict, index_to_space_dict, volmask_shape = get_spotlights(
            volmask)

        # 1. z-score
        print("z-scoring activations and embeddings...")
        individual_activations = pickle.load(
            open(
                "../../examplesGLM/subj" + str(args.subject_number) +
                "/individual_activations.p", "rb"))
        z_activations = helper.z_score(individual_activations)
        z_embeddings = helper.z_score(embed_matrix)

        # 2. calculate correlation
        print("calculating correlations...")
        z_activations = helper.add_bias(z_activations)
        z_embeddings = helper.add_bias(z_embeddings)
        correlations, pvals = calculate_pearson_correlation(
            args, z_activations, z_embeddings)

        # 3. evaluate significance
        print("evaluating significance...")
        valid_correlations, indices, num_voxels = evaluate_performance(
            args, correlations, pvals, space_to_index_dict,
            index_to_space_dict, volmask_shape)
        corrected_coordinates = get_2d_coordinates(valid_correlations, indices,
                                                   num_voxels)
        norm_coords = fix_coords_to_absolute_value(corrected_coordinates)
        _ = helper.transform_coordinates(norm_coords,
                                         volmask,
                                         save_location,
                                         "fdr",
                                         pvals=pvals)
        print("done.")

    if args.group_level:

        save_location = str(
            args.to_save_path) + "fdr/" + str(file_name) + "_group_analysis"
        subject_numbers = [
            int(subj_num) for subj_num in args.subjects.split(",")
        ]

        print("loading brain common space...")
        volmask = helper.load_common_space(subject_numbers)
        print("VOLMASK SHAPE: " + str(volmask.shape))
        space_to_index_dict, index_to_space_dict, volmask_shape = get_spotlights(
            volmask)

        print("returned shape: " + str(volmask_shape))

        # 1. get all data
        print("get all data...")
        fdr_corr_list = []
        for subj_num in tqdm(subject_numbers):
            print("adding subject: " + str(subj_num))
            file_name = str(args.to_save_path) + "fdr/" + str(
                args.agg_type) + "_layer" + str(
                    args.which_layer) + "bert_subj" + str(
                        args.subject_number) + "_searchlight-3dtransform-fdr"
            fdr_corr = scipy.io.loadmat(file_name + ".mat")
            fdr_corr_vals = fdr_corr["metric"]
            common_corr = np.ma.array(fdr_corr_vals, mask=volmask)
            fdr_corr_list.append(common_corr)

        # 2. average correlations and pvalues
        print("calculating correlations...")
        avg_corrs = np.mean(np.array(fdr_corr_list), axis=0)
        ttest_pval = np.apply_along_axis(ttest_voxels, 0,
                                         np.array(fdr_corr_list))

        # 3. save files
        print("saving files...")
        scipy.io.savemat(save_location + "-3dtransform-corr.mat",
                         dict(metric=avg_corrs))
        scipy.io.savemat(save_location + "-3dtransform-pvals.mat",
                         dict(metric=ttest_pval))
        print("done.")
    return
def linear_model(embed_matrix, spotlight_activations, args, kfold_split,
                 alpha):
    global predicted_trials

    predicted = []
    if args.brain_to_model:
        from_regress = np.array(spotlight_activations)
        to_regress = np.array(embed_matrix)
    else:
        from_regress = np.array(embed_matrix)
        to_regress = np.array(spotlight_activations)

    print("FROM REGRESS: " + str(from_regress.shape))
    print("TO REGRESS: " + str(to_regress.shape))

    if args.cross_validation:
        outer_kf = KFold(n_splits=kfold_split, shuffle=True)

        errors = []
        predicted_trials = np.zeros((to_regress.shape[0], ))
        llhs = []
        rankings = []

        if args.add_bias:
            from_regress = helper.add_bias(from_regress)

        if args.permutation:
            np.random.shuffle(from_regress)

        for train_index, test_index in outer_kf.split(from_regress):
            greatest_possible_rank = len(test_index)

            X_train, X_test = from_regress[train_index], from_regress[
                test_index]
            y_train, y_test = to_regress[train_index], to_regress[test_index]

            # nested CV
            inner_kf = KFold(n_splits=kfold_split, shuffle=True)
            alphas = np.logspace(-10, 10, 21, endpoint=True)
            clf = RidgeCV(alphas=alphas).fit(X_train, y_train)
            best_alpha = clf.alpha_

            # with ridge regression
            clf = Ridge(alpha=best_alpha)
            clf.fit(X_train, y_train)
            y_hat_test = clf.predict(X_test)
            predicted_trials[test_index] = y_hat_test

            if args.llh:
                n = X_train.shape[0]
                k = X_train.shape[1]
                y_hat_train = clf.predict(X_train)
                sigma_train = np.sum((y_hat_train - y_train)**2, axis=0)
                llh = vectorize_llh(y_hat_test, y_test, sigma_train)
                llhs.append(llh)

            if args.ranking and args.model_to_brain:
                y_hat_test_reshape = y_hat_test.reshape((len(y_hat_test), 1))
                y_test_reshape = y_test.reshape((len(y_test), 1))

                true_distances = helper.calculate_true_distances(
                    y_hat_test_reshape, y_test_reshape)
                print("TRUE DISTANCES: " + str(true_distances.shape))
                distance_matrix = helper.compute_distance_matrix(
                    y_hat_test_reshape, y_test_reshape)
                print("DISTANCE MATRIX: " + str(distance_matrix.shape))
                rank = helper.calculate_rank(true_distances, distance_matrix)
                rank_accuracy = 1 - (rank -
                                     1) * 1.0 / (greatest_possible_rank - 1)
                rankings.append(rank_accuracy)
        errors = np.sqrt(
            np.sum(np.abs(np.array(predicted_trials) - to_regress)))
        return errors.astype(
            np.float32), predicted_trials, np.sum(llhs).astype(
                np.float32), np.mean(rankings).astype(np.float32)
    return
Beispiel #5
0
def linear_model(embed_matrix, spotlight_activations, args, kfold_split, alpha):
	global predicted_trials

	predicted = []
	if args.brain_to_model:
		from_regress = np.array(spotlight_activations)
		to_regress = np.array(embed_matrix)
	else:
		from_regress = np.array(embed_matrix)
		to_regress = np.array(spotlight_activations)

	if args.cross_validation:
		kf = KFold(n_splits=kfold_split)
		errors = []
		predicted_trials = np.zeros((to_regress.shape[0], to_regress.shape[1]))
		llhs = []
		rankings = []
		pvalues = []

		if args.add_bias:
			from_regress = helper.add_bias(from_regress)

		if args.permutation:
			np.random.shuffle(from_regress)

		alphas = np.logspace(-10, 20, 31, endpoint=False)
		clf = RidgeCV(alphas=alphas).fit(from_regress, to_regress)
		best_alpha = clf.alpha_
		print("BEST ALPHA: " + str(best_alpha))
		# best_alpha = 0

		# if args.significance:
		# 	clf = Ridge(alpha=best_alpha)
		# 	score, permutation_scores, pvalue = permutation_test_score(clf, from_regress, to_regress, scoring="neg_mean_squared_error", cv=5, n_permutations=100, n_jobs=1)
		# 	pvalues.append(pvalue)

		for train_index, test_index in kf.split(from_regress):
			greatest_possible_rank = len(test_index)

			X_train, X_test = from_regress[train_index], from_regress[test_index]
			y_train, y_test = to_regress[train_index], to_regress[test_index]

			# with ridge regression
			clf = Ridge(alpha=best_alpha)
			clf.fit(X_train, y_train)
			y_hat_test = clf.predict(X_test)
			predicted_trials[test_index] = y_hat_test

			if args.llh:
				y_hat_train = clf.predict(X_train)
				sigma_train = np.sum((y_hat_train - y_train)**2, axis=0)
				llh = vectorize_llh(y_hat_test, y_test, sigma_train)
				llhs.append(llh)

			if args.ranking and args.model_to_brain:
				true_distances = helper.calculate_true_distances(y_hat_test, y_test)
				distance_matrix = helper.compute_distance_matrix(y_hat_test, y_test)
				rank = helper.calculate_rank(true_distances, distance_matrix)
				rank_accuracy = 1 - (rank - 1) * 1.0 / (greatest_possible_rank - 1)
				rankings.append(rank_accuracy)
		errors = np.sqrt(np.sum(np.abs(np.array(predicted_trials) - to_regress)))
		return errors.astype(np.float32), predicted_trials, np.mean(llhs).astype(np.float64), np.mean(rankings).astype(np.float32), best_alpha
	return
Beispiel #6
0
 def forward(self, l):
     for i in range(self.no_of_layers + 1):
         l = np.dot(l, self.weights[i].T)
         self.f[i] = self.activation(l)
         if self.bias:
             l = hp.add_bias(l)