def top_features(): models = get_models(current_user.id) model_id = int(request.args.get('model_id', '0')) model = models[model_id] n = int(request.args.get('n', '10')) top_features = get_top_features(model['clf'], col_names=model['feature_names'], n=n) ret = [{'feature': feat, 'score': score} for feat, score in top_features] return jsonify(data=ret)
def test_get_top_features(self): M, labels = uft.generate_test_matrix(1000, 15, random_state=0) M = utils.cast_np_sa_to_nd(M) M_train, M_test, labels_train, labels_test = train_test_split( M, labels) clf = RandomForestClassifier(random_state=0) clf.fit(M_train, labels_train) ctrl_feat_importances = clf.feature_importances_ ctrl_col_names = ['f{}'.format(i) for i in xrange(15)] ctrl_feat_ranks = np.argsort(ctrl_feat_importances)[::-1][:10] ctrl = utils.convert_to_sa( zip(ctrl_col_names, ctrl_feat_importances), col_names=('feat_name', 'score'))[ctrl_feat_ranks] res = dsp.get_top_features(clf, M, verbose=False) self.assertTrue(uft.array_equal(ctrl, res)) res = dsp.get_top_features(clf, col_names=['f{}'.format(i) for i in xrange(15)], verbose=False) self.assertTrue(uft.array_equal(ctrl, res))
def top_features(): models = get_models(current_user.id) model_id = int(request.args.get('model_id', '0')) model = models[model_id] n = int(request.args.get('n', '10')) top_features = get_top_features( model['clf'], col_names=model['feature_names'], n=n) ret = [{'feature': feat, 'score': score} for feat, score in top_features] return jsonify(data=ret)
def test_get_top_features(self): M, labels = uft.generate_test_matrix(1000, 15, random_state=0) M = utils.cast_np_sa_to_nd(M) M_train, M_test, labels_train, labels_test = train_test_split( M, labels) clf = RandomForestClassifier(random_state=0) clf.fit(M_train, labels_train) res = dsp.get_top_features(clf, M, verbose=False) ctrl = utils.convert_to_sa( [('f5', 0.0773838526068), ('f13', 0.0769596713039), ('f8', 0.0751584839431), ('f6', 0.0730815879102), ('f11', 0.0684456133071), ('f9', 0.0666747414603), ('f10', 0.0659621889608), ('f7', 0.0657988099065), ('f2', 0.0634000069218), ('f0', 0.0632912268319)], col_names=('feat_name', 'score')) self.assertTrue(uft.array_equal(ctrl, res)) res = dsp.get_top_features(clf, col_names=['f{}'.format(i) for i in xrange(15)], verbose=False) self.assertTrue(uft.array_equal(ctrl, res))