Exemple #1
0
def deserialize_random_forest(model_dict):
    model = RandomForestClassifier(**model_dict['params'])
    estimators = [deserialize_decision_tree(decision_tree) for decision_tree in model_dict['estimators_']]
    model.estimators_ = np.array(estimators)

    model.classes_ = np.array(model_dict['classes_'])
    model.n_features_ = model_dict['n_features_']
    model.n_outputs_ = model_dict['n_outputs_']
    model.max_depth = model_dict['max_depth']
    model.min_samples_split = model_dict['min_samples_split']
    model.min_samples_leaf = model_dict['min_samples_leaf']
    model.min_weight_fraction_leaf = model_dict['min_weight_fraction_leaf']
    model.max_features = model_dict['max_features']
    model.max_leaf_nodes = model_dict['max_leaf_nodes']
    model.min_impurity_decrease = model_dict['min_impurity_decrease']
    model.min_impurity_split = model_dict['min_impurity_split']

    if 'oob_score_' in model_dict:
        model.oob_score_ = model_dict['oob_score_']
    if 'oob_decision_function_' in model_dict:
        model.oob_decision_function_ = model_dict['oob_decision_function_']

    if isinstance(model_dict['n_classes_'], list):
        model.n_classes_ = np.array(model_dict['n_classes_'])
    else:
        model.n_classes_ = model_dict['n_classes_']

    return model
import pandas as pd  #1
from sklearn.model_selection import train_test_split  #2
from sklearn.ensemble import RandomForestClassifier  #3
from sklearn.metrics import confusion_matrix  #4
# import libs

datas = pd.read_csv("datas.csv")  # read datas
#1

x = datas.iloc[:, 3:-3].values
y = datas.iloc[:, -2].values
# split values

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.10, random_state=0)  # 90% for train, %10 for test
#2

rfc = RandomForestClassifier()
#3
rfc.max_depth = 100
rfc.criterion = "entropy"  #select criterion,other criterion is 'gini'
rfc.n_estimators = 1
rfc.fit(x_train, y_train)

y_pred = rfc.predict(x_test)
cm = confusion_matrix(y_test, y_pred)
#4
print("RFC")
print(cm)
Exemple #3
0
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)


from sklearn.datasets import make_blobs

X, y = make_blobs(n_samples=20,
                  n_features=2,
                  centers=2,
                  cluster_std=2,
                  random_state=3)

plt.scatter(X[:, 0], X[:, 1], c=y, s=50, edgecolors='k')

from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(random_state=8)
clf.max_depth = 1
clf.n_estimators = 1
clf.fit(X, y)
plt.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k')
plotBoundary(X, clf)

for i in range(3, 10):
    clf.n_estimators = i
    clf.fit(X, y)
    plt.scatter(X[:, 0], X[:, 1], marker='o', c=y)
    plotBoundary(X, clf)
    plt.title("{0} estimators".format(i))
    plt.show()
Exemple #4
0
        from sklearn import grid_search
        clf = grid_search.GridSearchCV(model, parameters,
                                       cv=4, verbose=10,
                                       n_jobs=1)
        print 'Grid Search for the model'
        clf.fit(X_trn, y_trn)
        print clf.best_params_

        model.n_estimators = clf.best_params_['n_estimators']
        model.oob_score = clf.best_estimator_['oob_score']

    else:
        model.n_estimators = 600
        model.oob_score = False
        model.max_depth = 20
        model.n_jobs = 20
 

    from sklearn import cross_validation as cv
  
    if args.SGD: 
        from SGDRank import SGDClassifier
        model = SGDClassifier()
    
    print 'CV'
    cv.cross_val_score(model, X_trn, y_trn, cv=3, n_jobs=3)

    print 'Fit the model'
    model.fit(X_trn, y_trn)