Beispiel #1
0
def top_features():
    models = get_models(current_user.id)
    model_id = int(request.args.get('model_id', '0'))
    model = models[model_id]
    n = int(request.args.get('n', '10'))
    top_features = get_top_features(model['clf'],
                                    col_names=model['feature_names'],
                                    n=n)
    ret = [{'feature': feat, 'score': score} for feat, score in top_features]
    return jsonify(data=ret)
Beispiel #2
0
    def test_get_top_features(self):
        M, labels = uft.generate_test_matrix(1000, 15, random_state=0)
        M = utils.cast_np_sa_to_nd(M)
        M_train, M_test, labels_train, labels_test = train_test_split(
                M, 
                labels)
        clf = RandomForestClassifier(random_state=0)
        clf.fit(M_train, labels_train)

        ctrl_feat_importances = clf.feature_importances_
        ctrl_col_names = ['f{}'.format(i) for i in xrange(15)]
        ctrl_feat_ranks = np.argsort(ctrl_feat_importances)[::-1][:10]
        ctrl = utils.convert_to_sa(
                zip(ctrl_col_names, ctrl_feat_importances),
                col_names=('feat_name', 'score'))[ctrl_feat_ranks]

        res = dsp.get_top_features(clf, M, verbose=False)
        self.assertTrue(uft.array_equal(ctrl, res))

        res = dsp.get_top_features(clf, col_names=['f{}'.format(i) for i in xrange(15)], verbose=False)
        self.assertTrue(uft.array_equal(ctrl, res))
Beispiel #3
0
def top_features():
    models = get_models(current_user.id)
    model_id = int(request.args.get('model_id', '0'))
    model = models[model_id]
    n = int(request.args.get('n', '10'))
    top_features = get_top_features(
            model['clf'], 
            col_names=model['feature_names'],
            n=n)
    ret = [{'feature': feat, 'score': score} for feat, score in 
           top_features]
    return jsonify(data=ret)
Beispiel #4
0
 def test_get_top_features(self):
     M, labels = uft.generate_test_matrix(1000, 15, random_state=0)
     M = utils.cast_np_sa_to_nd(M)
     M_train, M_test, labels_train, labels_test = train_test_split(
             M, 
             labels)
     clf = RandomForestClassifier(random_state=0)
     clf.fit(M_train, labels_train)
     res = dsp.get_top_features(clf, M, verbose=False)
     ctrl = utils.convert_to_sa(
             [('f5',  0.0773838526068), 
              ('f13',   0.0769596713039),
              ('f8',  0.0751584839431),
              ('f6',  0.0730815879102),
              ('f11',   0.0684456133071),
              ('f9',  0.0666747414603),
              ('f10',   0.0659621889608),
              ('f7',  0.0657988099065),
              ('f2',  0.0634000069218),
              ('f0',  0.0632912268319)],
             col_names=('feat_name', 'score'))
     self.assertTrue(uft.array_equal(ctrl, res))
     res = dsp.get_top_features(clf, col_names=['f{}'.format(i) for i in xrange(15)], verbose=False)
     self.assertTrue(uft.array_equal(ctrl, res))