def load_data(self, features): """ Loads ids and data for each individual mask """ print "Loading data from neurosynth..." from neurosynth.analysis.reduce import average_within_regions if self.mask_img is None: self.y = self.dataset.get_image_data() elif isinstance(self.mask_img, basestring): if self.mask_img[-3:] == ".pkl": import cPickle self.y = cPickle.load(open(self.mask_img, 'rb')) else: # ADD FEATURE TO FILTER BY FEATURES self.y = average_within_regions(self.dataset, self.mask_img) else: self.y = self.mask_img self.mask_num = self.y.shape[0] from neurosynth.analysis.classify import regularize X = self.dataset.get_feature_data(features=features) self.feature_names = X.columns.tolist() self.X = regularize(X, method='scale') self.set_dims()
def load_data(self, features): """ Loads ids and data for each individual mask """ print "Loading data from neurosynth..." from neurosynth.analysis.reduce import average_within_regions if self.mask_img is None: self.y = self.dataset.get_image_data() elif isinstance(self.mask_img, basestring): if self.mask_img[-3:] == ".pkl": import cPickle self.y = cPickle.load(open(self.mask_img, 'rb')) else: # ADD FEATURE TO FILTER BY FEATURES self.y = average_within_regions( self.dataset, self.mask_img) else: self. y = self.mask_img self.mask_num = self.y.shape[0] from neurosynth.analysis.classify import regularize X = self.dataset.get_feature_data(features=features) self.X = regularize(X, method='scale') self.set_dims()
def load_data(self, features, X_threshold): """ Load data into c_data """ from neurosynth.analysis.reduce import average_within_regions # Load Masks by studies matrix # ADD FEATURE TO FILTER BY FEATURES masks_by_studies = average_within_regions(self.dataset, self.mask_img, threshold = self.thresh) study_ids = self.dataset.feature_table.data.index print "Loading data from neurosynth..." pb = tools.ProgressBar(len(list(masks_by_studies)), start=True) self.ids_by_masks = [] self.data_by_masks = [] for mask in masks_by_studies: m_ids = study_ids[np.where(mask == True)[0]] self.ids_by_masks.append(m_ids) self.data_by_masks.append(self.dataset.get_feature_data(ids=m_ids)) pb.next() self.mask_num = masks_by_studies.shape[0] self.mask_pairs = list(itertools.permutations(range(0, self.mask_num), 2)) filename = path.join(mkdtemp(), 'c_data.dat') self.c_data = np.memmap(filename, dtype='object', mode='w+', shape=(self.mask_num, self.mask_num)) # Load data for pair in self.mask_pairs: reg1_ids = self.ids_by_masks[pair[0]] reg2_ids = self.ids_by_masks[pair[1]] reg1_set = list(set(reg1_ids) - set(reg2_ids)) reg2_set = list(set(reg2_ids) - set(reg1_ids)) x1 = self.data_by_masks[pair[0]] x1 = np.array(x1)[np.where(np.in1d(reg1_ids, reg1_set))[0]] x2 = self.data_by_masks[pair[1]] x2 = np.array(x2)[np.where(np.in1d(reg2_ids, reg2_set))[0]] y = np.array([0]*len(reg1_set) + [1]*len(reg2_set)) X = np.vstack((x1, x2)) if X_threshold is not None: X = binarize(X, X_threshold) from neurosynth.analysis.classify import regularize X = regularize(X, method='scale') self.c_data[pair] = (X, y) if self.memsave: self.data_by_masks = [] self.ids_by_masks = []
def load_data(self, features, X_threshold): """ Load data into data """ # Load data for each mask self.load_mask_data(features) filename = path.join(mkdtemp(), 'data.dat') self.data = np.memmap(filename, dtype='object', mode='w+', shape=(self.mask_num)) all_ids = self.dataset.image_table.ids # If a low thresh is set, then get ids for studies at that threshold if self.thresh_low is not None: ids_by_masks_low = [] from neurosynth.analysis.reduce import average_within_regions masks_by_studies_low = average_within_regions( self.dataset, self.mask_img, threshold=self.thresh_low) for mask in masks_by_studies_low: m_ids = np.array(all_ids)[np.where(mask == True)[0]] ids_by_masks_low.append(m_ids) # Set up data into data for num, on_ids in enumerate(self.ids_by_masks): # If a low threshold is set, then use that to filter "off_ids", otherwise use "on_ids" if self.thresh_low is not None: off_ids = list(set(all_ids) - set(ids_by_masks_low[num])) else: off_ids = list(set(all_ids) - set(on_ids)) on_data = self.data_by_masks[num].dropna() off_data = self.dataset.feature_table.get_feature_data( ids=off_ids, features=features).dropna() y = np.array([0] * off_data.shape[0] + [1] * on_data.shape[0]) X = np.vstack((np.array(off_data), np.array(on_data))) from neurosynth.analysis.classify import regularize X = regularize(X, method='scale') if X_threshold is not None: X = binarize(X, X_threshold) self.data[num] = (X, y) if self.memsave: self.data_by_masks = [] self.ids_by_masks = [] self.comparisons = range(0, self.mask_num) self.comp_dims = (self.mask_num, )
def load_data(self, features, X_threshold): """ Load data into c_data """ # Load data for each mask self.load_mask_data(features) filename = path.join(mkdtemp(), 'c_data.dat') self.c_data = np.memmap(filename, dtype='object', mode='w+', shape=(self.mask_num)) all_ids = self.dataset.image_table.ids # If a low thresh is set, then get ids for studies at that threshold if self.thresh_low is not None: ids_by_masks_low = [] from neurosynth.analysis.reduce import average_within_regions masks_by_studies_low = average_within_regions( self.dataset, self.mask_img, threshold=self.thresh_low) for mask in masks_by_studies_low: m_ids = np.array(all_ids)[np.where(mask == True)[0]] ids_by_masks_low.append(m_ids) # Set up data into c_data for num, on_ids in enumerate(self.ids_by_masks): # If a low threshold is set, then use that to filter "off_ids", otherwise use "on_ids" if self.thresh_low is not None: off_ids = list(set(all_ids) - set(ids_by_masks_low[num])) else: off_ids = list(set(all_ids) - set(on_ids)) on_data = self.data_by_masks[num].dropna() off_data = self.dataset.get_feature_data(ids=off_ids).dropna() y = np.array([0] * off_data.shape[0] + [1] * on_data.shape[0]) X = np.vstack((np.array(off_data), np.array(on_data))) from neurosynth.analysis.classify import regularize X = regularize(X, method='scale') if X_threshold is not None: X = binarize(X, X_threshold) self.c_data[num] = (X, y) if self.memsave: self.data_by_masks = [] self.ids_by_masks = [] self.comparisons = range(0, self.mask_num) self.comp_dims = (self.mask_num, )
def load_data(self, features, X_threshold): """ Load data into data """ # Load data for each mask self.load_mask_data(features) # Set up pair-wise data self.comparisons = list( itertools.combinations(range(0, self.mask_num), 2)) filename = path.join(mkdtemp(), 'data.dat') self.data = np.memmap(filename, dtype='object', mode='w+', shape=(self.mask_num, self.mask_num)) # Filter data and arrange into data for pair in self.comparisons: x1 = self.data_by_masks[pair[0]] x2 = self.data_by_masks[pair[1]] reg1_ids = self.ids_by_masks[pair[0]] reg2_ids = self.ids_by_masks[pair[1]] if self.remove_overlap is True: reg1_set = list(set(reg1_ids) - set(reg2_ids)) reg2_set = list(set(reg2_ids) - set(reg1_ids)) x1 = np.array(x1)[np.where(np.in1d(reg1_ids, reg1_set))[0]] x2 = np.array(x2)[np.where(np.in1d(reg2_ids, reg2_set))[0]] reg1_ids = reg1_set reg2_ids = reg2_set y = np.array([0] * len(reg1_ids) + [1] * len(reg2_ids)) X = np.vstack((x1, x2)) if X_threshold is not None: X = binarize(X, X_threshold) from neurosynth.analysis.classify import regularize X = regularize(X, method='scale') self.data[pair] = (X, y) if self.memsave: self.data_by_masks = [] self.ids_by_masks = [] self.comp_dims = (self.mask_num, self.mask_num)
def load_data(self, features, X_threshold): """ Load data into c_data """ # Load data for each mask self.load_mask_data(features) # Set up pair-wise data self.comparisons = list( itertools.combinations(range(0, self.mask_num), 2)) filename = path.join(mkdtemp(), 'c_data.dat') self.c_data = np.memmap(filename, dtype='object', mode='w+', shape=(self.mask_num, self.mask_num)) # Filter data and arrange into c_data for pair in self.comparisons: x1 = self.data_by_masks[pair[0]] x2 = self.data_by_masks[pair[1]] reg1_ids = self.ids_by_masks[pair[0]] reg2_ids = self.ids_by_masks[pair[1]] if self.remove_overlap is True: reg1_set = list(set(reg1_ids) - set(reg2_ids)) reg2_set = list(set(reg2_ids) - set(reg1_ids)) x1 = np.array(x1)[np.where(np.in1d(reg1_ids, reg1_set))[0]] x2 = np.array(x2)[np.where(np.in1d(reg2_ids, reg2_set))[0]] reg1_ids = reg1_set reg2_ids = reg2_set y = np.array([0] * len(reg1_ids) + [1] * len(reg2_ids)) X = np.vstack((x1, x2)) if X_threshold is not None: X = binarize(X, X_threshold) from neurosynth.analysis.classify import regularize X = regularize(X, method='scale') self.c_data[pair] = (X, y) if self.memsave: self.data_by_masks = [] self.ids_by_masks = [] self.comp_dims = (self.mask_num, self.mask_num)
def bootstrap_mv_full_parallel(args): try: (X, y_high, y_low, classifier, scorer, method), boot_n = args np.random.seed() ran_index = np.random.choice(X.shape[0], X.shape[0]) from neurosynth.analysis.classify import regularize # Bootstrap sample X & y X = X.iloc[ran_index, :] y_high = pd.DataFrame(y_high[:, ran_index]) y_low = pd.DataFrame(y_low[:, ran_index]) feature_names = X.columns.tolist() n_topics = len(feature_names) X = regularize(X, method='scale') results = [] for reg_i, reg_y_high in y_high.iterrows(): reg_ix = ( (y_low.iloc[reg_i, :] == True) & (reg_y_high == False)) == False reg_y = reg_y_high[reg_ix].astype('int') reg_X = X[reg_ix.values, :] if method == 'combinatorial': ix = [] # Feature order index remaining = range(0, n_topics) for i in range(0, n_topics): test_results = [] for num, new_feat in enumerate(remaining): try_comb = ix + [new_feat] X_1 = reg_X[:, try_comb] feature = feature_names[new_feat] output = classify.classify( X_1, reg_y.values, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='summary') test_results.append( [output['score'], i, feature, reg_i, boot_n, new_feat]) test_results = pd.DataFrame(test_results) winner = test_results[ test_results.ix[:, 0] == test_results.ix[:, 0].max()] if winner.shape[0] > 1: winner = winner.iloc[0] results.append(map(list, winner.values)[0][0:5]) remaining.remove(winner[5].values) ix += winner[5].values.tolist() # elif method == 'shannons': # from base.statistics import shannons # clf = classify.classify(X, y, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='clf') # odds_ratios = np.log(clf.clf.theta_[1] / clf.clf.theta_[0]) # odds_ratios -= (odds_ratios.min() - 0.000001) # results = [shannons(odds_ratios), reg, boot_n] except: import warnings warnings.warn('something went wrong') results = None finally: return results
def bootstrap_mv_full_parallel(args): try: (X, y_high, y_low, classifier, scorer, method), boot_n = args np.random.seed() ran_index = np.random.choice(X.shape[0], X.shape[0]) from neurosynth.analysis.classify import regularize ## Bootstrap sample X & y X = X.iloc[ran_index, :] y_high = pd.DataFrame(y_high[:, ran_index]) y_low = pd.DataFrame(y_low[:, ran_index]) feature_names = X.columns.tolist() n_topics = len(feature_names) X = regularize(X, method='scale') results = [] for reg_i, reg_y_high in y_high.iterrows(): reg_ix = ((y_low.iloc[reg_i, :] == True) & (reg_y_high == False)) == False reg_y = reg_y_high[reg_ix].astype('int') reg_X = X[reg_ix.values, :] if method == 'combinatorial': ix = [] # Feature order index remaining = range(0, n_topics) for i in range(0, n_topics): test_results = [] for num, new_feat in enumerate(remaining): try_comb = ix + [new_feat] X_1 = reg_X[:, try_comb] feature = feature_names[new_feat] output = classify.classify(X_1, reg_y.values, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='summary') test_results.append([ output['score'], i, feature, reg_i, boot_n, new_feat ]) test_results = pd.DataFrame(test_results) winner = test_results[test_results.ix[:, 0] == test_results.ix[:, 0].max()] if winner.shape[0] > 1: winner = winner.iloc[0] results.append(map(list, winner.values)[0][0:5]) remaining.remove(winner[5].values) ix += winner[5].values.tolist() # elif method == 'shannons': # from base.statistics import shannons # clf = classify.classify(X, y, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='clf') # odds_ratios = np.log(clf.clf.theta_[1] / clf.clf.theta_[0]) # odds_ratios -= (odds_ratios.min() - 0.000001) # results = [shannons(odds_ratios), reg, boot_n] except: import warnings warnings.warn('something went wrong') results = None finally: return results