Python classify 예제들, neurosynth.analysis.classify.classify Python 예제들

예제 #1

0

파일 보기

파일: mv.py 프로젝트: csddzh/NS_Classify

def calc_mv_parallel_classifier(args):
    (filename, classifier, scorer, comp_dims,
     fis, feature_names, method), reg = args

    X, y = np.memmap(filename, dtype='object', mode='r',
                     shape=comp_dims)[reg]
    fis = fis[reg]

    n_topics = X.shape[1]

    if method == 'sequential':
        results = []
        ix = np.abs(fis).argsort()[::-1]
        for i in range(1, n_topics):
            X_1 = X[:, np.abs(fis).argsort()[::-1][0:i]]
            feature = feature_names[ix[i-1]]
            output = classify.classify(
                X_1, y, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='summary')
            results.append([output['score'], i, reg, feature])

    elif method == 'combinatorial':
        results = []
        ix = [np.abs(fis).argsort()[::-1][0]]
        remaining = range(0, n_topics)
        remaining.remove(ix[0])

        for i in range(1, n_topics + 1):
            if i == 1:
                X_1 = X[:, ix]
                feature = feature_names[ix[i-1]]
                output = classify.classify(
                    X_1, y, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='summary')
                results.append([output['score'], i, reg, feature])
            else:
                test_results = []
                features = []
                for num, new_feat in enumerate(remaining):
                    try_comb = ix + [new_feat]
                    X_1 = X[:, try_comb]
                    feature = feature_names[new_feat]
                    output = classify.classify(
                        X_1, y, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='summary')
                    test_results.append([output['score'], i, reg, feature])
                    features.append(new_feat)

                test_results = pd.DataFrame(test_results)

                winner = test_results.ix[:, 0] == test_results.ix[:, 0].max()

                results.append(map(list, test_results[winner].values)[0])

                remaining.remove(features[np.where(winner)[0][0]])

                ix += [features[np.where(winner)[0][0]]]

    return results

예제 #2

0

파일 보기

파일: mv.py 프로젝트: adelavega/ns_classify

def best_subsets_parallel(args):
    (X, y, classifier, scorer, feature_names), comb = args
       
    X_1 = X[:, comb]

    features = list(np.array(feature_names)[list(comb)])
    output = classify.classify(X_1, y, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='summary')

    return (output['score'], features, comb)

예제 #3

0

파일 보기

파일: mv.py 프로젝트: csddzh/NS_Classify

def best_subsets_parallel(args):
    (X, y, classifier, scorer, feature_names), comb = args

    X_1 = X[:, comb]

    features = list(np.array(feature_names)[list(comb)])
    output = classify.classify(
        X_1, y, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='summary')

    return (output['score'], features, comb)

예제 #4

0

파일 보기

def classify_parallel(args):
    (classifier, param_grid, scoring, filename, feat_select, length, class_weight), index = args
    
    X, y = np.memmap(filename, dtype='object', mode='r',
                       shape=(length, length))[index]

    output = classify.classify(
        X, y, classifier=classifier, cross_val='4-Fold',
        class_weight=class_weight, scoring=scoring, param_grid=param_grid, feat_select=feat_select)

    output['index'] = index

    # Remember to add vector to output that keeps track of seleted features to asses stability
    return output

예제 #5

0

파일 보기

파일: classification.py 프로젝트: adelavega/neurosynth-mfc

def classify_parallel(classifier, scoring, region_data, importance_function):
    """ Parallel classification function. Used to classify for each region if study
    was activated or not (typically based on neurosynth features)
    classifier: sklearn classifier
    scoring: sklearn scoring function
    region_data: contains (X, y) data for a given region
    importance function: function to format importance vector (i.e. what to pull out from fitted classifier)

    returns summary dictionary with score, importance, preditions and importance vectors """

    X, y = region_data

    output = classify(
        X, y, classifier=classifier, cross_val='4-Fold', scoring=scoring)
    output['importance'] = importance_function(output['clf'].clf)
    return output

예제 #6

0

파일 보기

def classify_parallel(classifier, scoring, region_data, importance_function):
    """ Parallel classification function. Used to classify for each region if study
    was activated or not (typically based on neurosynth features)
    classifier: sklearn classifier
    scoring: sklearn scoring function
    region_data: contains (X, y) data for a given region
    importance function: function to format importance vector (i.e. what to pull out from fitted classifier)

    returns summary dictionary with score, importance, preditions and importance vectors """

    X, y = region_data

    output = classify(X,
                      y,
                      classifier=classifier,
                      cross_val='4-Fold',
                      scoring=scoring)
    output['importance'] = importance_function(output['clf'].clf)
    return output

예제 #7

0

파일 보기

파일: __init__.py 프로젝트: margulies/NS_Classify

    def classify(self, features=None, scoring='accuracy', X_threshold=None, feat_select=None, processes=1, class_weight='auto', dummy=None):
        if self.c_data is None:
            self.load_data(features, X_threshold)
            self.initalize_containers(features, feat_select, dummy)

        print "Classifying..."
        pb = tools.ProgressBar(len(list(self.comparisons)), start=True)

        if processes > 1:
            from multiprocessing import Pool
            pool = Pool(processes=processes)
        else:
            pool = itertools

        try:
            filename = self.c_data.filename

            for output in pool.imap(
                classify_parallel, itertools.izip(
                    itertools.repeat(
                        (self.classifier, scoring, filename, feat_select, self.comp_dims, class_weight)),
                    self.comparisons)):

                index = output['index']
                self.class_score[index] = output['score']
                if self.memsave is False:
                    self.fit_clfs[index] = output['clf']

                try:
                    self.feature_importances[index] = output['clf'].clf.coef_[0]
                except AttributeError:
                    try:
                        self.feature_importances[index] = output['clf'].clf.feature_importances_
                    except AttributeError:
                        pass

                if feat_select:
                    self.features_selected[index] = output['features_selected']

                self.predictions[index] = output['predictions']

                if dummy is not None:
                    from sklearn.dummy import DummyClassifier

                    X, y = self.c_data[index]
                    output = classify.classify(
                        X, y, classifier=DummyClassifier(strategy=dummy), cross_val='4-Fold',
                        class_weight=class_weight, scoring=scoring, feat_select=feat_select)

                    self.dummy_score[index] = output['score']
                    self.dummy_predictions[index] = output['predictions']

                pb.next()
        finally:
            if processes > 1:
                pool.close()
                pool.join()

        if dummy is None:
            self.final_score = self.class_score
        else:
            self.final_score = self.class_score - self.dummy_score

예제 #8

0

파일 보기

파일: mv.py 프로젝트: adelavega/ns_classify

def calc_mv_parallel_classifier(args):
    (filename, classifier, scorer, comp_dims, fis, feature_names, method), reg = args

    X, y = np.memmap(filename, dtype='object', mode='r',
                     shape=comp_dims)[reg]
    fis = fis[reg]
    
    n_topics = X.shape[1]
    
    results = []
    for i in range(1, n_topics + 1):
        ix = np.abs(fis).argsort()[::-1]
        X_1 =  X[:, ix[0:i]]
        feature = feature_names[ix[i-1]]
        output = classify.classify(X_1, y, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='summary')
        results.append([output['score'], i, reg, feature])

    if method == 'sequential':
        results = []
        for i in range(1, n_topics):
            X_1 =  X[:, np.abs(fis).argsort()[::-1][0:i]]
            feature = feature_names[ix[i-1]]
            output = classify.classify(X_1, y, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='summary')
            results.append([output['score'], i, reg, feature])

    elif method == 'best_subsets':
        results = []

        total_features = X.shape[1]

        for n_comb in range(1, n_topics):
            combinations = itertools.combinations(range(0, total_features), n_comb)
            print combinations

            test_results = []
            for comb in combinations:
                X_1 = X[:, comb]
                features = list(np.array(feature_names)[list(comb)])
                output = classify.classify(X_1, y, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='summary')

                test_results.append([output['score'], n_comb, reg, features])

            test_results = pd.DataFrame(test_results)

            winner = test_results.ix[:, 0] == test_results.ix[:, 0].max()
            results.append(map(list, test_results[winner].values)[0])

    elif method == 'combinatorial':
        results = []
        ix = [np.abs(fis).argsort()[::-1][0]]
        remaining = range(0, n_topics)
        remaining.remove(ix[0])

        for i in range(1, n_topics + 1):
            if i == 1:
                X_1 =  X[:, ix]
                feature = feature_names[ix[i-1]]
                output = classify.classify(X_1, y, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='summary')
                results.append([output['score'], i, reg, feature])
            else:
                test_results = []
                features = []
                for num, new_feat in enumerate(remaining):
                    try_comb = ix + [new_feat]
                    X_1 = X[:, try_comb]
                    feature = feature_names[new_feat]
                    output = classify.classify(X_1, y, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='summary')
                    test_results.append([output['score'], i, reg, feature])
                    features.append(new_feat)

                test_results = pd.DataFrame(test_results)

                winner = test_results.ix[:, 0] == test_results.ix[:, 0].max()

                results.append(map(list, test_results[winner].values)[0])

                remaining.remove(features[np.where(winner)[0][0]])

                ix += [features[np.where(winner)[0][0]]]

       
    return results

예제 #9

0

파일 보기

파일: mv.py 프로젝트: csddzh/NS_Classify

def bootstrap_mv_full_parallel(args):

    try:
        (X, y_high, y_low, classifier, scorer, method), boot_n = args

        np.random.seed()
        ran_index = np.random.choice(X.shape[0], X.shape[0])

        from neurosynth.analysis.classify import regularize

        # Bootstrap sample X & y
        X = X.iloc[ran_index, :]
        y_high = pd.DataFrame(y_high[:, ran_index])
        y_low = pd.DataFrame(y_low[:, ran_index])

        feature_names = X.columns.tolist()
        n_topics = len(feature_names)

        X = regularize(X, method='scale')
        results = []
        for reg_i, reg_y_high in y_high.iterrows():
            reg_ix = (
                (y_low.iloc[reg_i, :] == True) & (reg_y_high == False)) == False
            reg_y = reg_y_high[reg_ix].astype('int')
            reg_X = X[reg_ix.values, :]

            if method == 'combinatorial':

                ix = []  # Feature order index
                remaining = range(0, n_topics)

                for i in range(0, n_topics):
                    test_results = []
                    for num, new_feat in enumerate(remaining):
                        try_comb = ix + [new_feat]
                        X_1 = reg_X[:, try_comb]
                        feature = feature_names[new_feat]
                        output = classify.classify(
                            X_1, reg_y.values, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='summary')
                        test_results.append(
                            [output['score'], i, feature, reg_i, boot_n, new_feat])

                    test_results = pd.DataFrame(test_results)

                    winner = test_results[
                        test_results.ix[:, 0] == test_results.ix[:, 0].max()]

                    if winner.shape[0] > 1:
                        winner = winner.iloc[0]

                    results.append(map(list, winner.values)[0][0:5])

                    remaining.remove(winner[5].values)

                    ix += winner[5].values.tolist()

            # elif method == 'shannons':
            #     from base.statistics import shannons
            #     clf = classify.classify(X, y, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='clf')
            #     odds_ratios = np.log(clf.clf.theta_[1] / clf.clf.theta_[0])
            #     odds_ratios -= (odds_ratios.min() - 0.000001)
            #     results = [shannons(odds_ratios), reg, boot_n]
    except:
        import warnings
        warnings.warn('something went wrong')

        results = None
    finally:
        return results

예제 #10

0

파일 보기

파일: mv.py 프로젝트: csddzh/NS_Classify

def bootstrap_mv_parallel(args):

    try:
        (X, y, classifier, scorer, fis,
         feature_names, method, reg), boot_n = args

        n_topics = X.shape[1]

        np.random.seed()
        ran_index = np.random.choice(X.shape[0], X.shape[0])

        # Bootstrap sample X & y
        X = X[ran_index, :]
        y = y[ran_index]

        if method == 'sequential':
            results = []
            ix = np.abs(fis).argsort()[::-1]
            for i in range(1, n_topics):
                X_1 = X[:, np.abs(fis).argsort()[::-1][0:i]]
                feature = feature_names[ix[i-1]]
                output = classify.classify(
                    X_1, y, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='summary')
                results.append([output['score'], i, reg, feature])

        elif method == 'combinatorial':
            results = []
            ix = []  # Feature order index
            remaining = range(0, n_topics)

            for i in range(0, n_topics):
                test_results = []
                for num, new_feat in enumerate(remaining):
                    try_comb = ix + [new_feat]
                    X_1 = X[:, try_comb]
                    feature = feature_names[new_feat]
                    output = classify.classify(
                        X_1, y, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='summary')
                    test_results.append(
                        [output['score'], i, feature, reg, boot_n, new_feat])

                test_results = pd.DataFrame(test_results)

                winner = test_results[
                    test_results.ix[:, 0] == test_results.ix[:, 0].max()]

                if winner.shape[0] > 1:
                    winner = winner.iloc[0]

                results.append(map(list, winner.values)[0][0:5])

                remaining.remove(winner[5].values)

                ix += winner[5].values.tolist()

        elif method == 'shannons':
            from base.statistics import shannons
            clf = classify.classify(
                X, y, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='clf')
            odds_ratios = np.log(clf.clf.theta_[1] / clf.clf.theta_[0])
            odds_ratios -= (odds_ratios.min() - 0.000001)
            results = [shannons(odds_ratios), reg, boot_n]

    except:
        import warnings
        warnings.warn('something went wrong')

        results = None
    finally:
        return results

예제 #11

0

파일 보기

파일: regionalClassifier.py 프로젝트: margulies/NS_Classify

    def classify(self, features=None, scoring='accuracy', dummy = True, X_threshold=None):

        iters = list(itertools.permutations(self.masklist, 2))
        prog = 0.0
        total = len(list(iters))

        self.update_progress(0)

        if features:
            self.feature_names = features
        else:
            self.feature_names = self.dataset.get_feature_names()

        # Make feature importance grid w/ masked diagonals
        self.feature_importances = np.ma.masked_array(np.zeros((self.mask_num,
            self.mask_num, len(self.feature_names))))

        i, j, k = np.meshgrid(*map(np.arange, self.feature_importances.shape), indexing='ij')

        self.feature_importances.mask = (i == j)

        for pairs in iters:

            index = (pairs[0][1], pairs[1][1]) # Tuple numeric index of pairs
            names = [pairs[0][0], pairs[1][0]] # Actual paths to masks

            if self.c_data[index] is None:
                X, y = classify.get_studies_by_regions(self.dataset, 
                    names, threshold=self.thresh, features=features, regularization='scale')

            if X_threshold is not None:
                X = binarize(X, X_threshold)

            # if features is not None:
            #     X = X[:, classify.get_feature_order(self.dataset, self.feature_names)]

            self.c_data[index] = (X, y)

            if isinstance(self.classifier, RFE):

                self.classifier.fit(*self.c_data[index])

                self.fit_clfs[index] = self.classifier

                self.class_score[index] = self.classifier.score(*self.c_data[index])

                self.feature_importances[index] = self.classifier.estimator_.coef_[0]

                self.feature_ranking[index] = self.classifier.ranking_

            else:
                output = classify.classify(X, y, classifier = self.classifier, output = 'summary_clf', cross_val = '4-Fold',
                    class_weight = 'auto', scoring=scoring, param_grid=self.param_grid)

                self.class_score[index] = output['score']

                self.fit_clfs[index] = output['clf'].fit(*self.c_data[index])

                # import ipdb; ipdb.set_trace()

                if self.param_grid: # Just get them if you used a grid
                    try:
                        self.feature_importances[index] = self.fit_clfs[index].best_estimator_.coef_[0]
                    except AttributeError:
                        try:
                            self.feature_importances[index] = self.fit_clfs[index].feature_importances_
                        except AttributeError:
                            pass
                else:
                    try:
                        self.feature_importances[index] = self.fit_clfs[index].coef_[0]
                    except AttributeError:
                        try:
                            self.feature_importances[index] = self.fit_clfs[index].feature_importances_
                        except AttributeError:
                            pass

            self.dummy_score[index] = classify.classify_regions(self.dataset, names,
                method='Dummy' , threshold=self.thresh)['score']

            prog = prog + 1
            self.update_progress(int(prog / total * 100))

        self.class_score = np.ma.masked_array(self.class_score,
            self.class_score == 0)
        self.dummy_score = np.ma.masked_array(self.dummy_score,
            self.dummy_score == 0)

        if dummy:
            self.final_score = self.class_score - self.dummy_score
        else:
            self.final_score = self.class_score

        # Make results fill in across diagonal
        # for j in range(0, self.mask_num):
        #     for b in range(0, self.mask_num):
        #         if self.final_score.mask[j, b] and not j == b:
        #             self.final_score[j, b] = self.final_score[b, j]
        #             self.fit_clfs[j, b] = self.fit_clfs[b, j]
        #             self.c_data[j, b] = self.c_data[b, j]
        #             if isinstance(self.classifier, LinearSVC):
        #                 self.feature_importances[j, b] = self.feature_importances[b, j] * -1
        #             else:
        #                 self.feature_importances[j, b] = self.feature_importances[b, j]
                    
        #             if self.feature_ranking is not None:
        #                 self.feature_ranking[j, b] = self.feature_ranking[b, j]

        self.status = 1

예제 #12

0

파일 보기

파일: mv.py 프로젝트: adelavega/ns_classify

def bootstrap_mv_full_parallel(args):

    try:
        (X, y_high, y_low, classifier, scorer, method), boot_n = args

        np.random.seed()
        ran_index = np.random.choice(X.shape[0], X.shape[0])

        from neurosynth.analysis.classify import regularize

        ## Bootstrap sample X & y
        X = X.iloc[ran_index, :]
        y_high = pd.DataFrame(y_high[:, ran_index])
        y_low = pd.DataFrame(y_low[:, ran_index])

        feature_names = X.columns.tolist()
        n_topics = len(feature_names)

        X = regularize(X, method='scale')
        results = []
        for reg_i, reg_y_high in y_high.iterrows():
            reg_ix = ((y_low.iloc[reg_i, :] == True) &
                      (reg_y_high == False)) == False
            reg_y = reg_y_high[reg_ix].astype('int')
            reg_X = X[reg_ix.values, :]

            if method == 'combinatorial':

                ix = []  # Feature order index
                remaining = range(0, n_topics)

                for i in range(0, n_topics):
                    test_results = []
                    for num, new_feat in enumerate(remaining):
                        try_comb = ix + [new_feat]
                        X_1 = reg_X[:, try_comb]
                        feature = feature_names[new_feat]
                        output = classify.classify(X_1,
                                                   reg_y.values,
                                                   classifier=classifier,
                                                   cross_val='4-Fold',
                                                   scoring=scorer,
                                                   output='summary')
                        test_results.append([
                            output['score'], i, feature, reg_i, boot_n,
                            new_feat
                        ])

                    test_results = pd.DataFrame(test_results)

                    winner = test_results[test_results.ix[:, 0] ==
                                          test_results.ix[:, 0].max()]

                    if winner.shape[0] > 1:
                        winner = winner.iloc[0]

                    results.append(map(list, winner.values)[0][0:5])

                    remaining.remove(winner[5].values)

                    ix += winner[5].values.tolist()

            # elif method == 'shannons':
            #     from base.statistics import shannons
            #     clf = classify.classify(X, y, classifier=classifier, cross_val='4-Fold', scoring=scorer, output='clf')
            #     odds_ratios = np.log(clf.clf.theta_[1] / clf.clf.theta_[0])
            #     odds_ratios -= (odds_ratios.min() - 0.000001)
            #     results = [shannons(odds_ratios), reg, boot_n]
    except:
        import warnings
        warnings.warn('something went wrong')

        results = None
    finally:
        return results

예제 #13

0

파일 보기

파일: mv.py 프로젝트: adelavega/ns_classify

def bootstrap_mv_parallel(args):

    try:
        (X, y, classifier, scorer, fis, feature_names, method,
         reg), boot_n = args

        n_topics = X.shape[1]

        np.random.seed()
        ran_index = np.random.choice(X.shape[0], X.shape[0])

        ## Bootstrap sample X & y
        X = X[ran_index, :]
        y = y[ran_index]

        if method == 'sequential':
            results = []
            ix = np.abs(fis).argsort()[::-1]
            for i in range(1, n_topics):
                X_1 = X[:, np.abs(fis).argsort()[::-1][0:i]]
                feature = feature_names[ix[i - 1]]
                output = classify.classify(X_1,
                                           y,
                                           classifier=classifier,
                                           cross_val='4-Fold',
                                           scoring=scorer,
                                           output='summary')
                results.append([output['score'], i, reg, feature])

        elif method == 'combinatorial':
            results = []
            ix = []  # Feature order index
            remaining = range(0, n_topics)

            for i in range(0, n_topics):
                test_results = []
                for num, new_feat in enumerate(remaining):
                    try_comb = ix + [new_feat]
                    X_1 = X[:, try_comb]
                    feature = feature_names[new_feat]
                    output = classify.classify(X_1,
                                               y,
                                               classifier=classifier,
                                               cross_val='4-Fold',
                                               scoring=scorer,
                                               output='summary')
                    test_results.append(
                        [output['score'], i, feature, reg, boot_n, new_feat])

                test_results = pd.DataFrame(test_results)

                winner = test_results[test_results.ix[:, 0] ==
                                      test_results.ix[:, 0].max()]

                if winner.shape[0] > 1:
                    winner = winner.iloc[0]

                results.append(map(list, winner.values)[0][0:5])

                remaining.remove(winner[5].values)

                ix += winner[5].values.tolist()

        elif method == 'shannons':
            from base.statistics import shannons
            clf = classify.classify(X,
                                    y,
                                    classifier=classifier,
                                    cross_val='4-Fold',
                                    scoring=scorer,
                                    output='clf')
            odds_ratios = np.log(clf.clf.theta_[1] / clf.clf.theta_[0])
            odds_ratios -= (odds_ratios.min() - 0.000001)
            results = [shannons(odds_ratios), reg, boot_n]

    except:
        import warnings
        warnings.warn('something went wrong')

        results = None
    finally:
        return results

예제 #14

0

파일 보기

    def classify(self, features=None, scoring='accuracy', X_threshold=None, feat_select=None, processes=1, class_weight = 'auto', dummy = None):

        self.load_data(features, X_threshold)

        self.initalize_containers(features, feat_select, dummy)

        print "Classifying..."
        pb = tools.ProgressBar(len(list(self.mask_pairs)), start=True)

        if processes > 1:
            pool = Pool(processes=processes)
        else:
            pool = itertools

        try:
            filename = self.c_data.filename

            for output in pool.imap(
                classify_parallel, itertools.izip(
                    itertools.repeat((self.classifier, self.param_grid, scoring, filename, feat_select, self.mask_num, class_weight)), 
                    self.mask_pairs)):

                index = output['index']
                self.class_score[index] = output['score']
                if self.memsave is False:
                    self.fit_clfs[index] = output['clf']

                if self.param_grid:  # Just get the FIs if you used a grid
                    try:
                        self.feature_importances[index] = self.fit_clfs[
                            index].best_estimator_.coef_[0]
                    except AttributeError:
                        try:
                            self.feature_importances[index] = self.fit_clfs[
                                index].best_estimator.feature_importances_
                        except AttributeError:
                            pass
                else:
                    try:
                        self.feature_importances[
                            index] = self.fit_clfs[index].clf.coef_[0]
                    except AttributeError:
                        try:
                            self.feature_importances[index] = self.fit_clfs[
                                index].clf.feature_importances_
                        except AttributeError:
                            pass

                if feat_select:
                    self.features_selected[index] = output['features_selected']

                if dummy is not None:
                    X, y = self.c_data[index]
                    output = classify.classify(X, y, classifier=DummyClassifier(strategy=dummy), cross_val='4-Fold',
                        class_weight=class_weight, scoring=scoring, feat_select=feat_select)

                    self.dummy_score[index] = output['score']

                pb.next()
        finally:
            if processes > 1:
                pool.close()
                pool.join()

        if dummy is None:
            self.final_score = self.class_score
        else:
            self.final_score = self.class_score - self.dummy_score

예제 #15

0

파일 보기

    def classify(self,
                 features=None,
                 scoring='accuracy',
                 dummy=True,
                 X_threshold=None):

        iters = list(itertools.permutations(self.masklist, 2))
        prog = 0.0
        total = len(list(iters))

        self.update_progress(0)

        if features:
            self.feature_names = features
        else:
            self.feature_names = self.dataset.get_feature_names()

        # Make feature importance grid w/ masked diagonals
        self.feature_importances = np.ma.masked_array(
            np.zeros((self.mask_num, self.mask_num, len(self.feature_names))))

        i, j, k = np.meshgrid(*map(np.arange, self.feature_importances.shape),
                              indexing='ij')

        self.feature_importances.mask = (i == j)

        for pairs in iters:

            index = (pairs[0][1], pairs[1][1])  # Tuple numeric index of pairs
            names = [pairs[0][0], pairs[1][0]]  # Actual paths to masks

            if self.c_data[index] is None:
                X, y = classify.get_studies_by_regions(self.dataset,
                                                       names,
                                                       threshold=self.thresh,
                                                       features=features,
                                                       regularization='scale')

            if X_threshold is not None:
                X = binarize(X, X_threshold)

            # if features is not None:
            #     X = X[:, classify.get_feature_order(self.dataset, self.feature_names)]

            self.c_data[index] = (X, y)

            if isinstance(self.classifier, RFE):

                self.classifier.fit(*self.c_data[index])

                self.fit_clfs[index] = self.classifier

                self.class_score[index] = self.classifier.score(
                    *self.c_data[index])

                self.feature_importances[
                    index] = self.classifier.estimator_.coef_[0]

                self.feature_ranking[index] = self.classifier.ranking_

            else:
                output = classify.classify(X,
                                           y,
                                           classifier=self.classifier,
                                           output='summary_clf',
                                           cross_val='4-Fold',
                                           class_weight='auto',
                                           scoring=scoring,
                                           param_grid=self.param_grid)

                self.class_score[index] = output['score']

                self.fit_clfs[index] = output['clf'].fit(*self.c_data[index])

                # import ipdb; ipdb.set_trace()

                if self.param_grid:  # Just get them if you used a grid
                    try:
                        self.feature_importances[index] = self.fit_clfs[
                            index].best_estimator_.coef_[0]
                    except AttributeError:
                        try:
                            self.feature_importances[index] = self.fit_clfs[
                                index].feature_importances_
                        except AttributeError:
                            pass
                else:
                    try:
                        self.feature_importances[index] = self.fit_clfs[
                            index].coef_[0]
                    except AttributeError:
                        try:
                            self.feature_importances[index] = self.fit_clfs[
                                index].feature_importances_
                        except AttributeError:
                            pass

            self.dummy_score[index] = classify.classify_regions(
                self.dataset, names, method='Dummy',
                threshold=self.thresh)['score']

            prog = prog + 1
            self.update_progress(int(prog / total * 100))

        self.class_score = np.ma.masked_array(self.class_score,
                                              self.class_score == 0)
        self.dummy_score = np.ma.masked_array(self.dummy_score,
                                              self.dummy_score == 0)

        if dummy:
            self.final_score = self.class_score - self.dummy_score
        else:
            self.final_score = self.class_score

        # Make results fill in across diagonal
        # for j in range(0, self.mask_num):
        #     for b in range(0, self.mask_num):
        #         if self.final_score.mask[j, b] and not j == b:
        #             self.final_score[j, b] = self.final_score[b, j]
        #             self.fit_clfs[j, b] = self.fit_clfs[b, j]
        #             self.c_data[j, b] = self.c_data[b, j]
        #             if isinstance(self.classifier, LinearSVC):
        #                 self.feature_importances[j, b] = self.feature_importances[b, j] * -1
        #             else:
        #                 self.feature_importances[j, b] = self.feature_importances[b, j]

        #             if self.feature_ranking is not None:
        #                 self.feature_ranking[j, b] = self.feature_ranking[b, j]

        self.status = 1