예제 #1
0
class ExtraTreesClassifierImpl():

    def __init__(self, n_estimators=10, criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=False, oob_score=False, n_jobs=None, random_state=None, verbose=0, warm_start=False, class_weight='balanced'):
        self._hyperparams = {
            'n_estimators': n_estimators,
            'criterion': criterion,
            'max_depth': max_depth,
            'min_samples_split': min_samples_split,
            'min_samples_leaf': min_samples_leaf,
            'min_weight_fraction_leaf': min_weight_fraction_leaf,
            'max_features': max_features,
            'max_leaf_nodes': max_leaf_nodes,
            'min_impurity_decrease': min_impurity_decrease,
            'min_impurity_split': min_impurity_split,
            'bootstrap': bootstrap,
            'oob_score': oob_score,
            'n_jobs': n_jobs,
            'random_state': random_state,
            'verbose': verbose,
            'warm_start': warm_start,
            'class_weight': class_weight}
        self._wrapped_model = SKLModel(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)

    def predict_proba(self, X):
        return self._wrapped_model.predict_proba(X)
예제 #2
0
def run_decision_tree_probabilistic_classification(train, train_labels, validate, validate_labels):
    # transform counts to TFIDF features
    tfidf = feature_extraction.text.TfidfTransformer(smooth_idf=False)
    train = tfidf.fit_transform(train).toarray()
    validate = tfidf.transform(validate).toarray()

    # encode labels
    label_encode = preprocessing.LabelEncoder()
    train_labels = label_encode.fit_transform(train_labels)

    decisionTree = ExtraTreesClassifier(n_jobs=4, n_estimators=1000, max_features=20, min_samples_split=3,
                                        bootstrap=False, verbose=3, random_state=23)
    decisionTree.fit(train, train_labels)
    predicted_labels = decisionTree.predict_proba(validate)
    print "Extra Trees Classifier LogLoss"
    print str(metrics.log_loss(validate_labels, predicted_labels))