Ejemplo n.º 1
0
    def load_data(self, features):
        """ Loads ids and data for each individual mask """

        print "Loading data from neurosynth..."

        from neurosynth.analysis.reduce import average_within_regions

        if self.mask_img is None:
            self.y = self.dataset.get_image_data()
        elif isinstance(self.mask_img, basestring):
            if self.mask_img[-3:] == ".pkl":
                import cPickle
                self.y = cPickle.load(open(self.mask_img, 'rb'))
            else:
                # ADD FEATURE TO FILTER BY FEATURES
                self.y = average_within_regions(self.dataset, self.mask_img)
        else:
            self.y = self.mask_img

        self.mask_num = self.y.shape[0]

        from neurosynth.analysis.classify import regularize

        X = self.dataset.get_feature_data(features=features)
        self.feature_names = X.columns.tolist()

        self.X = regularize(X, method='scale')

        self.set_dims()
Ejemplo n.º 2
0
    def load_data(self, features):
        """ Loads ids and data for each individual mask """

        print "Loading data from neurosynth..."

        from neurosynth.analysis.reduce import average_within_regions

        if self.mask_img is None:
            self.y = self.dataset.get_image_data()
        elif isinstance(self.mask_img, basestring):
            if self.mask_img[-3:] == ".pkl":
                import cPickle
                self.y = cPickle.load(open(self.mask_img, 'rb'))
            else:
                # ADD FEATURE TO FILTER BY FEATURES
                self.y = average_within_regions(
                    self.dataset, self.mask_img)
        else:
            self. y = self.mask_img

        self.mask_num =  self.y.shape[0]

        from neurosynth.analysis.classify import regularize

        X = self.dataset.get_feature_data(features=features)
        self.X = regularize(X, method='scale')

        self.set_dims()
Ejemplo n.º 3
0
    def load_data(self, features, X_threshold):
        """ Load data into c_data """
        from neurosynth.analysis.reduce import average_within_regions

        # Load Masks by studies matrix

        # ADD FEATURE TO FILTER BY FEATURES
        masks_by_studies = average_within_regions(self.dataset, self.mask_img, threshold = self.thresh)

        study_ids = self.dataset.feature_table.data.index

        print "Loading data from neurosynth..."

        pb = tools.ProgressBar(len(list(masks_by_studies)), start=True)

        self.ids_by_masks = []
        self.data_by_masks = []
        for mask in masks_by_studies:

            m_ids = study_ids[np.where(mask == True)[0]]
            self.ids_by_masks.append(m_ids)
            self.data_by_masks.append(self.dataset.get_feature_data(ids=m_ids))
            pb.next()

        self.mask_num = masks_by_studies.shape[0]    
        self.mask_pairs = list(itertools.permutations(range(0, self.mask_num), 2))

        filename = path.join(mkdtemp(), 'c_data.dat')
        self.c_data = np.memmap(filename, dtype='object',
                                mode='w+', shape=(self.mask_num, self.mask_num))
        # Load data
        for pair in self.mask_pairs:
            reg1_ids = self.ids_by_masks[pair[0]]
            reg2_ids = self.ids_by_masks[pair[1]]

            reg1_set = list(set(reg1_ids) - set(reg2_ids))
            reg2_set = list(set(reg2_ids) - set(reg1_ids))

            x1 = self.data_by_masks[pair[0]]
            x1 = np.array(x1)[np.where(np.in1d(reg1_ids, reg1_set))[0]]

            x2 = self.data_by_masks[pair[1]]
            x2 = np.array(x2)[np.where(np.in1d(reg2_ids, reg2_set))[0]] 

            y = np.array([0]*len(reg1_set) + [1]*len(reg2_set))

            X = np.vstack((x1, x2))

            if X_threshold is not None:
                X = binarize(X, X_threshold)

            from neurosynth.analysis.classify import regularize
            X = regularize(X, method='scale')

            self.c_data[pair] = (X, y)

        if self.memsave:
            self.data_by_masks = []
            self.ids_by_masks = []
Ejemplo n.º 4
0
    def load_data(self, features, X_threshold):
        """ Load data into data """
        # Load data for each mask
        self.load_mask_data(features)

        filename = path.join(mkdtemp(), 'data.dat')
        self.data = np.memmap(filename,
                              dtype='object',
                              mode='w+',
                              shape=(self.mask_num))

        all_ids = self.dataset.image_table.ids

        # If a low thresh is set, then get ids for studies at that threshold
        if self.thresh_low is not None:
            ids_by_masks_low = []
            from neurosynth.analysis.reduce import average_within_regions
            masks_by_studies_low = average_within_regions(
                self.dataset, self.mask_img, threshold=self.thresh_low)
            for mask in masks_by_studies_low:
                m_ids = np.array(all_ids)[np.where(mask == True)[0]]
                ids_by_masks_low.append(m_ids)

        # Set up data into data
        for num, on_ids in enumerate(self.ids_by_masks):

            # If a low threshold is set, then use that to filter "off_ids", otherwise use "on_ids"
            if self.thresh_low is not None:
                off_ids = list(set(all_ids) - set(ids_by_masks_low[num]))
            else:
                off_ids = list(set(all_ids) - set(on_ids))

            on_data = self.data_by_masks[num].dropna()

            off_data = self.dataset.feature_table.get_feature_data(
                ids=off_ids, features=features).dropna()

            y = np.array([0] * off_data.shape[0] + [1] * on_data.shape[0])

            X = np.vstack((np.array(off_data), np.array(on_data)))

            from neurosynth.analysis.classify import regularize
            X = regularize(X, method='scale')

            if X_threshold is not None:
                X = binarize(X, X_threshold)

            self.data[num] = (X, y)

        if self.memsave:
            self.data_by_masks = []

        self.ids_by_masks = []

        self.comparisons = range(0, self.mask_num)

        self.comp_dims = (self.mask_num, )
Ejemplo n.º 5
0
    def load_data(self, features, X_threshold):
        """ Load data into c_data """
        # Load data for each mask
        self.load_mask_data(features)

        filename = path.join(mkdtemp(), 'c_data.dat')
        self.c_data = np.memmap(filename, dtype='object',
                                mode='w+', shape=(self.mask_num))

        all_ids = self.dataset.image_table.ids

        # If a low thresh is set, then get ids for studies at that threshold
        if self.thresh_low is not None:
            ids_by_masks_low = []
            from neurosynth.analysis.reduce import average_within_regions
            masks_by_studies_low = average_within_regions(
                self.dataset, self.mask_img, threshold=self.thresh_low)
            for mask in masks_by_studies_low:
                m_ids = np.array(all_ids)[np.where(mask == True)[0]]
                ids_by_masks_low.append(m_ids)       

        # Set up data into c_data
        for num, on_ids in enumerate(self.ids_by_masks):

            # If a low threshold is set, then use that to filter "off_ids", otherwise use "on_ids"
            if self.thresh_low is not None:
                off_ids = list(set(all_ids) - set(ids_by_masks_low[num]))
            else:
                off_ids = list(set(all_ids) - set(on_ids))

            on_data = self.data_by_masks[num].dropna()

            off_data = self.dataset.get_feature_data(ids=off_ids).dropna()

            y = np.array([0] * off_data.shape[0] + [1] * on_data.shape[0])

            X = np.vstack((np.array(off_data), np.array(on_data)))

            from neurosynth.analysis.classify import regularize
            X = regularize(X, method='scale')

            if X_threshold is not None:
                X = binarize(X, X_threshold)

            self.c_data[num] = (X, y)

        if self.memsave:
            self.data_by_masks = []
            self.ids_by_masks = []

        self.comparisons = range(0, self.mask_num)

        self.comp_dims = (self.mask_num, )
Ejemplo n.º 6
0
    def load_data(self, features, X_threshold):
        """ Load data into data """
        # Load data for each mask
        self.load_mask_data(features)

        # Set up pair-wise data
        self.comparisons = list(
            itertools.combinations(range(0, self.mask_num), 2))

        filename = path.join(mkdtemp(), 'data.dat')
        self.data = np.memmap(filename,
                              dtype='object',
                              mode='w+',
                              shape=(self.mask_num, self.mask_num))

        # Filter data and arrange into data
        for pair in self.comparisons:

            x1 = self.data_by_masks[pair[0]]
            x2 = self.data_by_masks[pair[1]]

            reg1_ids = self.ids_by_masks[pair[0]]
            reg2_ids = self.ids_by_masks[pair[1]]

            if self.remove_overlap is True:
                reg1_set = list(set(reg1_ids) - set(reg2_ids))
                reg2_set = list(set(reg2_ids) - set(reg1_ids))

                x1 = np.array(x1)[np.where(np.in1d(reg1_ids, reg1_set))[0]]
                x2 = np.array(x2)[np.where(np.in1d(reg2_ids, reg2_set))[0]]

                reg1_ids = reg1_set
                reg2_ids = reg2_set

            y = np.array([0] * len(reg1_ids) + [1] * len(reg2_ids))

            X = np.vstack((x1, x2))

            if X_threshold is not None:
                X = binarize(X, X_threshold)

            from neurosynth.analysis.classify import regularize
            X = regularize(X, method='scale')

            self.data[pair] = (X, y)

        if self.memsave:
            self.data_by_masks = []
            self.ids_by_masks = []

        self.comp_dims = (self.mask_num, self.mask_num)
Ejemplo n.º 7
0
    def load_data(self, features, X_threshold):
        """ Load data into c_data """
        # Load data for each mask
        self.load_mask_data(features)

        # Set up pair-wise data
        self.comparisons = list(
            itertools.combinations(range(0, self.mask_num), 2))

        filename = path.join(mkdtemp(), 'c_data.dat')
        self.c_data = np.memmap(filename, dtype='object',
                                mode='w+', shape=(self.mask_num, self.mask_num))

        # Filter data and arrange into c_data
        for pair in self.comparisons:

            x1 = self.data_by_masks[pair[0]]
            x2 = self.data_by_masks[pair[1]]

            reg1_ids = self.ids_by_masks[pair[0]]
            reg2_ids = self.ids_by_masks[pair[1]]

            if self.remove_overlap is True:
                reg1_set = list(set(reg1_ids) - set(reg2_ids))
                reg2_set = list(set(reg2_ids) - set(reg1_ids))

                x1 = np.array(x1)[np.where(np.in1d(reg1_ids, reg1_set))[0]]
                x2 = np.array(x2)[np.where(np.in1d(reg2_ids, reg2_set))[0]]

                reg1_ids = reg1_set
                reg2_ids = reg2_set
                
            y = np.array([0] * len(reg1_ids) + [1] * len(reg2_ids))

            X = np.vstack((x1, x2))

            if X_threshold is not None:
                X = binarize(X, X_threshold)

            from neurosynth.analysis.classify import regularize
            X = regularize(X, method='scale')

            self.c_data[pair] = (X, y)

        if self.memsave:
            self.data_by_masks = []
            self.ids_by_masks = []

        self.comp_dims = (self.mask_num, self.mask_num)
Ejemplo n.º 8
0
def bootstrap_mv_full_parallel(args):

    try:
        (X, y_high, y_low, classifier, scorer, method), boot_n = args

        np.random.seed()
        ran_index = np.random.choice(X.shape[0], X.shape[0])

        from neurosynth.analysis.classify import regularize

        # Bootstrap sample X & y
        X = X.iloc[ran_index, :]
        y_high = pd.DataFrame(y_high[:, ran_index])
        y_low = pd.DataFrame(y_low[:, ran_index])

        feature_names = X.columns.tolist()
        n_topics = len(feature_names)

        X = regularize(X, method='scale')
        results = []
        for reg_i, reg_y_high in y_high.iterrows():
            reg_ix = (
                (y_low.iloc[reg_i, :] == True) & (reg_y_high == False)) == False
            reg_y = reg_y_high[reg_ix].astype('int')
            reg_X = X[reg_ix.values, :]

            if method == 'combinatorial':

                ix = []  # Feature order index
                remaining = range(0, n_topics)

                for i in range(0, n_topics):
                    test_results = []
                    for num, new_feat in enumerate(remaining):
                        try_comb = ix + [new_feat]
                        X_1 = reg_X[:, try_comb]
                        feature = feature_names[new_feat]
                        output = classify.classify(
                            X_1, reg_y.values, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='summary')
                        test_results.append(
                            [output['score'], i, feature, reg_i, boot_n, new_feat])

                    test_results = pd.DataFrame(test_results)

                    winner = test_results[
                        test_results.ix[:, 0] == test_results.ix[:, 0].max()]

                    if winner.shape[0] > 1:
                        winner = winner.iloc[0]

                    results.append(map(list, winner.values)[0][0:5])

                    remaining.remove(winner[5].values)

                    ix += winner[5].values.tolist()

            # elif method == 'shannons':
            #     from base.statistics import shannons
            #     clf = classify.classify(X, y, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='clf')
            #     odds_ratios = np.log(clf.clf.theta_[1] / clf.clf.theta_[0])
            #     odds_ratios -= (odds_ratios.min() - 0.000001)
            #     results = [shannons(odds_ratios), reg, boot_n]
    except:
        import warnings
        warnings.warn('something went wrong')

        results = None
    finally:
        return results
Ejemplo n.º 9
0
def bootstrap_mv_full_parallel(args):

    try:
        (X, y_high, y_low, classifier, scorer, method), boot_n = args

        np.random.seed()
        ran_index = np.random.choice(X.shape[0], X.shape[0])

        from neurosynth.analysis.classify import regularize

        ## Bootstrap sample X & y
        X = X.iloc[ran_index, :]
        y_high = pd.DataFrame(y_high[:, ran_index])
        y_low = pd.DataFrame(y_low[:, ran_index])

        feature_names = X.columns.tolist()
        n_topics = len(feature_names)

        X = regularize(X, method='scale')
        results = []
        for reg_i, reg_y_high in y_high.iterrows():
            reg_ix = ((y_low.iloc[reg_i, :] == True) &
                      (reg_y_high == False)) == False
            reg_y = reg_y_high[reg_ix].astype('int')
            reg_X = X[reg_ix.values, :]

            if method == 'combinatorial':

                ix = []  # Feature order index
                remaining = range(0, n_topics)

                for i in range(0, n_topics):
                    test_results = []
                    for num, new_feat in enumerate(remaining):
                        try_comb = ix + [new_feat]
                        X_1 = reg_X[:, try_comb]
                        feature = feature_names[new_feat]
                        output = classify.classify(X_1,
                                                   reg_y.values,
                                                   classifier=classifier,
                                                   cross_val='4-Fold',
                                                   scoring=scorer,
                                                   output='summary')
                        test_results.append([
                            output['score'], i, feature, reg_i, boot_n,
                            new_feat
                        ])

                    test_results = pd.DataFrame(test_results)

                    winner = test_results[test_results.ix[:, 0] ==
                                          test_results.ix[:, 0].max()]

                    if winner.shape[0] > 1:
                        winner = winner.iloc[0]

                    results.append(map(list, winner.values)[0][0:5])

                    remaining.remove(winner[5].values)

                    ix += winner[5].values.tolist()

            # elif method == 'shannons':
            #     from base.statistics import shannons
            #     clf = classify.classify(X, y, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='clf')
            #     odds_ratios = np.log(clf.clf.theta_[1] / clf.clf.theta_[0])
            #     odds_ratios -= (odds_ratios.min() - 0.000001)
            #     results = [shannons(odds_ratios), reg, boot_n]
    except:
        import warnings
        warnings.warn('something went wrong')

        results = None
    finally:
        return results