iters = list(itertools.combinations(masklist, 2)) param_grid = fitClfs = np.empty((7, 7), object) c_data = np.empty((7, 7), tuple) prog = 0.0 total = len(list(iters)) update_progress(0) for pairs in iters: output = classify.classify_regions(dataset, [rootdir + pairs[0], rootdir + pairs[1]], classifier=GradientBoostingClassifier(), param_grid=param_grid, threshold=thresh, output='summary_clf') results[int(re.findall('[0-9]', pairs[0])[1]) - 1, int(re.findall('[0-9]', pairs[1])[1]) - 1] = output['score'] ns[int(re.findall('[0-9]', pairs[0])[1]) - 1, int(re.findall('[0-9]' , pairs[1])[1]) - 1] = output['n'][0] + output['n'][1] fitClfs[int(re.findall('[0-9]', pairs[0])[1]) - 1, int(re.findall('[0-9]', pairs[1])[1]) - 1] = output['clf'] c_data[int(re.findall('[0-9]', pairs[0])[1]) - 1, int(re.findall('[0-9]', pairs[1])[1]) - 1] = \ classify.get_studies_by_regions(dataset, [rootdir + pairs[0], rootdir + pairs[1]], threshold=thresh)
def classify(self, features=None, scoring='accuracy', dummy=True, X_threshold=None): iters = list(itertools.permutations(self.masklist, 2)) prog = 0.0 total = len(list(iters)) self.update_progress(0) if features: self.feature_names = features else: self.feature_names = self.dataset.get_feature_names() # Make feature importance grid w/ masked diagonals self.feature_importances = np.ma.masked_array( np.zeros((self.mask_num, self.mask_num, len(self.feature_names)))) i, j, k = np.meshgrid(*map(np.arange, self.feature_importances.shape), indexing='ij') self.feature_importances.mask = (i == j) for pairs in iters: index = (pairs[0][1], pairs[1][1]) # Tuple numeric index of pairs names = [pairs[0][0], pairs[1][0]] # Actual paths to masks if self.c_data[index] is None: X, y = classify.get_studies_by_regions(self.dataset, names, threshold=self.thresh, features=features, regularization='scale') if X_threshold is not None: X = binarize(X, X_threshold) # if features is not None: # X = X[:, classify.get_feature_order(self.dataset, self.feature_names)] self.c_data[index] = (X, y) if isinstance(self.classifier, RFE): self.classifier.fit(*self.c_data[index]) self.fit_clfs[index] = self.classifier self.class_score[index] = self.classifier.score( *self.c_data[index]) self.feature_importances[ index] = self.classifier.estimator_.coef_[0] self.feature_ranking[index] = self.classifier.ranking_ else: output = classify.classify(X, y, classifier=self.classifier, output='summary_clf', cross_val='4-Fold', class_weight='auto', scoring=scoring, param_grid=self.param_grid) self.class_score[index] = output['score'] self.fit_clfs[index] = output['clf'].fit(*self.c_data[index]) # import ipdb; ipdb.set_trace() if self.param_grid: # Just get them if you used a grid try: self.feature_importances[index] = self.fit_clfs[ index].best_estimator_.coef_[0] except AttributeError: try: self.feature_importances[index] = self.fit_clfs[ index].feature_importances_ except AttributeError: pass else: try: self.feature_importances[index] = self.fit_clfs[ index].coef_[0] except AttributeError: try: self.feature_importances[index] = self.fit_clfs[ index].feature_importances_ except AttributeError: pass self.dummy_score[index] = classify.classify_regions( self.dataset, names, method='Dummy', threshold=self.thresh)['score'] prog = prog + 1 self.update_progress(int(prog / total * 100)) self.class_score = np.ma.masked_array(self.class_score, self.class_score == 0) self.dummy_score = np.ma.masked_array(self.dummy_score, self.dummy_score == 0) if dummy: self.final_score = self.class_score - self.dummy_score else: self.final_score = self.class_score # Make results fill in across diagonal # for j in range(0, self.mask_num): # for b in range(0, self.mask_num): # if self.final_score.mask[j, b] and not j == b: # self.final_score[j, b] = self.final_score[b, j] # self.fit_clfs[j, b] = self.fit_clfs[b, j] # self.c_data[j, b] = self.c_data[b, j] # if isinstance(self.classifier, LinearSVC): # self.feature_importances[j, b] = self.feature_importances[b, j] * -1 # else: # self.feature_importances[j, b] = self.feature_importances[b, j] # if self.feature_ranking is not None: # self.feature_ranking[j, b] = self.feature_ranking[b, j] self.status = 1
def classify(self, features=None, scoring='accuracy', dummy = True, X_threshold=None): iters = list(itertools.permutations(self.masklist, 2)) prog = 0.0 total = len(list(iters)) self.update_progress(0) if features: self.feature_names = features else: self.feature_names = self.dataset.get_feature_names() # Make feature importance grid w/ masked diagonals self.feature_importances = np.ma.masked_array(np.zeros((self.mask_num, self.mask_num, len(self.feature_names)))) i, j, k = np.meshgrid(*map(np.arange, self.feature_importances.shape), indexing='ij') self.feature_importances.mask = (i == j) for pairs in iters: index = (pairs[0][1], pairs[1][1]) # Tuple numeric index of pairs names = [pairs[0][0], pairs[1][0]] # Actual paths to masks if self.c_data[index] is None: X, y = classify.get_studies_by_regions(self.dataset, names, threshold=self.thresh, features=features, regularization='scale') if X_threshold is not None: X = binarize(X, X_threshold) # if features is not None: # X = X[:, classify.get_feature_order(self.dataset, self.feature_names)] self.c_data[index] = (X, y) if isinstance(self.classifier, RFE): self.classifier.fit(*self.c_data[index]) self.fit_clfs[index] = self.classifier self.class_score[index] = self.classifier.score(*self.c_data[index]) self.feature_importances[index] = self.classifier.estimator_.coef_[0] self.feature_ranking[index] = self.classifier.ranking_ else: output = classify.classify(X, y, classifier = self.classifier, output = 'summary_clf', cross_val = '4-Fold', class_weight = 'auto', scoring=scoring, param_grid=self.param_grid) self.class_score[index] = output['score'] self.fit_clfs[index] = output['clf'].fit(*self.c_data[index]) # import ipdb; ipdb.set_trace() if self.param_grid: # Just get them if you used a grid try: self.feature_importances[index] = self.fit_clfs[index].best_estimator_.coef_[0] except AttributeError: try: self.feature_importances[index] = self.fit_clfs[index].feature_importances_ except AttributeError: pass else: try: self.feature_importances[index] = self.fit_clfs[index].coef_[0] except AttributeError: try: self.feature_importances[index] = self.fit_clfs[index].feature_importances_ except AttributeError: pass self.dummy_score[index] = classify.classify_regions(self.dataset, names, method='Dummy' , threshold=self.thresh)['score'] prog = prog + 1 self.update_progress(int(prog / total * 100)) self.class_score = np.ma.masked_array(self.class_score, self.class_score == 0) self.dummy_score = np.ma.masked_array(self.dummy_score, self.dummy_score == 0) if dummy: self.final_score = self.class_score - self.dummy_score else: self.final_score = self.class_score # Make results fill in across diagonal # for j in range(0, self.mask_num): # for b in range(0, self.mask_num): # if self.final_score.mask[j, b] and not j == b: # self.final_score[j, b] = self.final_score[b, j] # self.fit_clfs[j, b] = self.fit_clfs[b, j] # self.c_data[j, b] = self.c_data[b, j] # if isinstance(self.classifier, LinearSVC): # self.feature_importances[j, b] = self.feature_importances[b, j] * -1 # else: # self.feature_importances[j, b] = self.feature_importances[b, j] # if self.feature_ranking is not None: # self.feature_ranking[j, b] = self.feature_ranking[b, j] self.status = 1
# and what if you want to play around with FDR? feature_list = dataset.get_feature_names( ['word recognition', 'speech production']) meta.analyze_features( dataset, feature_list, threshold=0.001, q=0.05, output_dir='/Users/ateghipc/Desktop/DualStreamUpdateFigs/manual/q001') # here's an example for classifying ROIs without getting your hands dirty from neurosynth.analysis import classify roi1 = "/Users/ateghipc/Desktop/spt/ROI/PT/clusterSolutions/Kmeans_solution_2_Cluster_2_bin.nii" roi2 = "/Users/ateghipc/Desktop/spt/ROI/PT/clusterSolutions/Kmeans_solution_2_Cluster_1.nii" results = classify.classify_regions(dataset, [roi2, roi1], threshold=0.2) results['n'] #studies in the first class vs the second results['score'] # this is your classification score results = classify.classify_regions(dataset, [roi2, roi1], threshold=0.2, method="Dummy") results['score'] #this is a dummy classifier score # here's an example for classification with a lot of parameterization (X, y) = classify.get_studies_by_regions(dataset, [roi2, roi1], threshold=0.2, remove_overlap=True, studies=None, features=None, regularization='scale') method = 'ERF'