Ejemplo n.º 1
0
def get_ns_for_pairs(a_b_c):
    """Convert `f([1,2])` to `f(1,2)` call."""
    from copy import deepcopy
    dataset, pairs, thresh = a_b_c

    index = (pairs[0][1], pairs[1][1]) # Tuple numeric index of pairs
    names = [pairs[0][0], pairs[1][0]] # Actual paths to masks

    X, y = classify.get_studies_by_regions(dataset, names, thresh)

    n = np.bincount(y)
    return (index, n)
Ejemplo n.º 2
0
def get_ns_for_pairs(a_b_c):
    """Convert `f([1,2])` to `f(1,2)` call."""
    from copy import deepcopy
    dataset, pairs, thresh = a_b_c

    index = (pairs[0][1], pairs[1][1])  # Tuple numeric index of pairs
    names = [pairs[0][0], pairs[1][0]]  # Actual paths to masks

    X, y = classify.get_studies_by_regions(dataset, names, thresh)

    n = np.bincount(y)
    return (index, n)
Ejemplo n.º 3
0
            rootdir + pairs[1]],
            classifier=GradientBoostingClassifier(), param_grid=param_grid,
            threshold=thresh, output='summary_clf')

    results[int(re.findall('[0-9]', pairs[0])[1]) - 1,
            int(re.findall('[0-9]', pairs[1])[1]) - 1] = output['score']

    ns[int(re.findall('[0-9]', pairs[0])[1]) - 1, int(re.findall('[0-9]'
       , pairs[1])[1]) - 1] = output['n'][0] + output['n'][1]

    fitClfs[int(re.findall('[0-9]', pairs[0])[1]) - 1,
            int(re.findall('[0-9]', pairs[1])[1]) - 1] = output['clf']

    c_data[int(re.findall('[0-9]', pairs[0])[1]) - 1,
           int(re.findall('[0-9]', pairs[1])[1]) - 1] = \
        classify.get_studies_by_regions(dataset, [rootdir + pairs[0],
            rootdir + pairs[1]], threshold=thresh)

    dummyoutput = classify.classify_regions(dataset, [rootdir
            + pairs[0], rootdir + pairs[1]],
            method='Dummy', threshold=thresh)

    resultsDummy[int(re.findall('[0-9]', pairs[0])[1]) - 1,
                 int(re.findall('[0-9]', pairs[1])[1]) - 1] = \
        dummyoutput['score']
    prog = prog + 1
    update_progress(int(prog / total * 100))

results = np.ma.masked_array(results, results == 0)
resultsDummy = np.ma.masked_array(resultsDummy, resultsDummy == 0)
diffs = results - resultsDummy
Ejemplo n.º 4
0
    def classify(self, features=None, scoring='accuracy', dummy = True, X_threshold=None):

        iters = list(itertools.permutations(self.masklist, 2))
        prog = 0.0
        total = len(list(iters))

        self.update_progress(0)

        if features:
            self.feature_names = features
        else:
            self.feature_names = self.dataset.get_feature_names()

        # Make feature importance grid w/ masked diagonals
        self.feature_importances = np.ma.masked_array(np.zeros((self.mask_num,
            self.mask_num, len(self.feature_names))))

        i, j, k = np.meshgrid(*map(np.arange, self.feature_importances.shape), indexing='ij')

        self.feature_importances.mask = (i == j)

        for pairs in iters:

            index = (pairs[0][1], pairs[1][1]) # Tuple numeric index of pairs
            names = [pairs[0][0], pairs[1][0]] # Actual paths to masks

            if self.c_data[index] is None:
                X, y = classify.get_studies_by_regions(self.dataset, 
                    names, threshold=self.thresh, features=features, regularization='scale')

            if X_threshold is not None:
                X = binarize(X, X_threshold)

            # if features is not None:
            #     X = X[:, classify.get_feature_order(self.dataset, self.feature_names)]

            self.c_data[index] = (X, y)

            if isinstance(self.classifier, RFE):

                self.classifier.fit(*self.c_data[index])

                self.fit_clfs[index] = self.classifier

                self.class_score[index] = self.classifier.score(*self.c_data[index])

                self.feature_importances[index] = self.classifier.estimator_.coef_[0]

                self.feature_ranking[index] = self.classifier.ranking_

            else:
                output = classify.classify(X, y, classifier = self.classifier, output = 'summary_clf', cross_val = '4-Fold',
                    class_weight = 'auto', scoring=scoring, param_grid=self.param_grid)

                self.class_score[index] = output['score']

                self.fit_clfs[index] = output['clf'].fit(*self.c_data[index])

                # import ipdb; ipdb.set_trace()

                if self.param_grid: # Just get them if you used a grid
                    try:
                        self.feature_importances[index] = self.fit_clfs[index].best_estimator_.coef_[0]
                    except AttributeError:
                        try:
                            self.feature_importances[index] = self.fit_clfs[index].feature_importances_
                        except AttributeError:
                            pass
                else:
                    try:
                        self.feature_importances[index] = self.fit_clfs[index].coef_[0]
                    except AttributeError:
                        try:
                            self.feature_importances[index] = self.fit_clfs[index].feature_importances_
                        except AttributeError:
                            pass

            self.dummy_score[index] = classify.classify_regions(self.dataset, names,
                method='Dummy' , threshold=self.thresh)['score']

            prog = prog + 1
            self.update_progress(int(prog / total * 100))

        self.class_score = np.ma.masked_array(self.class_score,
            self.class_score == 0)
        self.dummy_score = np.ma.masked_array(self.dummy_score,
            self.dummy_score == 0)

        if dummy:
            self.final_score = self.class_score - self.dummy_score
        else:
            self.final_score = self.class_score

        # Make results fill in across diagonal
        # for j in range(0, self.mask_num):
        #     for b in range(0, self.mask_num):
        #         if self.final_score.mask[j, b] and not j == b:
        #             self.final_score[j, b] = self.final_score[b, j]
        #             self.fit_clfs[j, b] = self.fit_clfs[b, j]
        #             self.c_data[j, b] = self.c_data[b, j]
        #             if isinstance(self.classifier, LinearSVC):
        #                 self.feature_importances[j, b] = self.feature_importances[b, j] * -1
        #             else:
        #                 self.feature_importances[j, b] = self.feature_importances[b, j]
                    
        #             if self.feature_ranking is not None:
        #                 self.feature_ranking[j, b] = self.feature_ranking[b, j]

        self.status = 1
Ejemplo n.º 5
0
    def classify(self,
                 features=None,
                 scoring='accuracy',
                 dummy=True,
                 X_threshold=None):

        iters = list(itertools.permutations(self.masklist, 2))
        prog = 0.0
        total = len(list(iters))

        self.update_progress(0)

        if features:
            self.feature_names = features
        else:
            self.feature_names = self.dataset.get_feature_names()

        # Make feature importance grid w/ masked diagonals
        self.feature_importances = np.ma.masked_array(
            np.zeros((self.mask_num, self.mask_num, len(self.feature_names))))

        i, j, k = np.meshgrid(*map(np.arange, self.feature_importances.shape),
                              indexing='ij')

        self.feature_importances.mask = (i == j)

        for pairs in iters:

            index = (pairs[0][1], pairs[1][1])  # Tuple numeric index of pairs
            names = [pairs[0][0], pairs[1][0]]  # Actual paths to masks

            if self.c_data[index] is None:
                X, y = classify.get_studies_by_regions(self.dataset,
                                                       names,
                                                       threshold=self.thresh,
                                                       features=features,
                                                       regularization='scale')

            if X_threshold is not None:
                X = binarize(X, X_threshold)

            # if features is not None:
            #     X = X[:, classify.get_feature_order(self.dataset, self.feature_names)]

            self.c_data[index] = (X, y)

            if isinstance(self.classifier, RFE):

                self.classifier.fit(*self.c_data[index])

                self.fit_clfs[index] = self.classifier

                self.class_score[index] = self.classifier.score(
                    *self.c_data[index])

                self.feature_importances[
                    index] = self.classifier.estimator_.coef_[0]

                self.feature_ranking[index] = self.classifier.ranking_

            else:
                output = classify.classify(X,
                                           y,
                                           classifier=self.classifier,
                                           output='summary_clf',
                                           cross_val='4-Fold',
                                           class_weight='auto',
                                           scoring=scoring,
                                           param_grid=self.param_grid)

                self.class_score[index] = output['score']

                self.fit_clfs[index] = output['clf'].fit(*self.c_data[index])

                # import ipdb; ipdb.set_trace()

                if self.param_grid:  # Just get them if you used a grid
                    try:
                        self.feature_importances[index] = self.fit_clfs[
                            index].best_estimator_.coef_[0]
                    except AttributeError:
                        try:
                            self.feature_importances[index] = self.fit_clfs[
                                index].feature_importances_
                        except AttributeError:
                            pass
                else:
                    try:
                        self.feature_importances[index] = self.fit_clfs[
                            index].coef_[0]
                    except AttributeError:
                        try:
                            self.feature_importances[index] = self.fit_clfs[
                                index].feature_importances_
                        except AttributeError:
                            pass

            self.dummy_score[index] = classify.classify_regions(
                self.dataset, names, method='Dummy',
                threshold=self.thresh)['score']

            prog = prog + 1
            self.update_progress(int(prog / total * 100))

        self.class_score = np.ma.masked_array(self.class_score,
                                              self.class_score == 0)
        self.dummy_score = np.ma.masked_array(self.dummy_score,
                                              self.dummy_score == 0)

        if dummy:
            self.final_score = self.class_score - self.dummy_score
        else:
            self.final_score = self.class_score

        # Make results fill in across diagonal
        # for j in range(0, self.mask_num):
        #     for b in range(0, self.mask_num):
        #         if self.final_score.mask[j, b] and not j == b:
        #             self.final_score[j, b] = self.final_score[b, j]
        #             self.fit_clfs[j, b] = self.fit_clfs[b, j]
        #             self.c_data[j, b] = self.c_data[b, j]
        #             if isinstance(self.classifier, LinearSVC):
        #                 self.feature_importances[j, b] = self.feature_importances[b, j] * -1
        #             else:
        #                 self.feature_importances[j, b] = self.feature_importances[b, j]

        #             if self.feature_ranking is not None:
        #                 self.feature_ranking[j, b] = self.feature_ranking[b, j]

        self.status = 1
Ejemplo n.º 6
0
from neurosynth.analysis import classify
roi1 = "/Users/ateghipc/Desktop/spt/ROI/PT/clusterSolutions/Kmeans_solution_2_Cluster_2_bin.nii"
roi2 = "/Users/ateghipc/Desktop/spt/ROI/PT/clusterSolutions/Kmeans_solution_2_Cluster_1.nii"

results = classify.classify_regions(dataset, [roi2, roi1], threshold=0.2)
results['n']  #studies in the first class vs the second
results['score']  # this is your classification score
results = classify.classify_regions(dataset, [roi2, roi1],
                                    threshold=0.2,
                                    method="Dummy")
results['score']  #this is a dummy classifier score

# here's an example for classification with a lot of parameterization
(X, y) = classify.get_studies_by_regions(dataset, [roi2, roi1],
                                         threshold=0.2,
                                         remove_overlap=True,
                                         studies=None,
                                         features=None,
                                         regularization='scale')
method = 'ERF'
threshold = 0.08
remove_overlap = True
regularization = 'scale'
output = 'summary'
studies = None
features = None
class_weight = 'auto'
classifier = None
cross_val = '4-Fold'
param_grid = None
scoring = 'accuracy'
refit_all = True