Example #1
0
Xt = []
Yt = []

Xp = []
Yp = []

rows_count = len(df.index)
rows_training = rows_count * 0.7

idx_t = set(random.sample(range(rows_count), int(rows_training)))

for row in df.iterrows():
    index, data = row
    l = data.tolist()
    l.remove(l[-1])
    if index in idx_t:
        Xt.append(l)
        Yt.append(data[-1])
    else:
        Xp.append(l)
        Yp.append(data[-1])


classifier = RandomForestClassifier()
classifier.class_weight = {0: 0.2, 1: 0.8}

classifier.fit(Xt, Yt)
ans = classifier.predict(Xp)

print("Model accuracy: ")
print(f1_score(ans, Yp) * 100, "%")
Example #2
0
                                                            test_size=0.2,
                                                            random_state=i)
        if len(set(Y_train)) < 4 or len(set(Y_test)) < 4:
            continue

        # Append new subsamples to full training samples
        Full_training_set = np.append(Full_training_set, Y_train)

        # Calculate the class weights
        classes = np.unique(Full_training_set)
        weights = compute_class_weight('balanced', classes, Full_training_set)
        class_weight = dict()
        for _idx, class_ in enumerate(classes):
            class_weight[class_] = weights[_idx]

        clf.class_weight = class_weight

        # Train a random forest classifier
        clf.fit(X_train, Y_train)
        oob_score.append(clf.oob_score_)
        counter += 1
        if counter == 300:
            break
        clf.n_estimators += 100

    # Create classification prediction
    Y_clf_predict = clf.predict(Features)

    # Calculate model accuracy, out-of-bag score, and feature importance
    Model_accuracy = metrics.accuracy_score(Labels, Y_clf_predict)
    #oob_score = clf.oob_score_
def fit(X_train, Y_train, X_test, Y_test,
        use_local_parameters = False):
    
    # build a decision tree model
    
    # train the decision tree model
    classifier = tree.DecisionTreeClassifier()
    tree_model = classifier.fit(X_train, Y_train)
    
    # Evaluate the performance of the model
    tree_score = tree_model.score(X_test,Y_test)
    
    # Build a random forests algorithm
    if use_local_parameters:
        try:
            # Load the local file containing the hyperparameter settings
            directory = "Hyperparameters/RandomForests.pkl"
            RandomForests_params_file = open(directory, "rb")
            params = pickle.load(RandomForests_params_file)
            RandomForests_params_file.close()
            
            # Intiliase random forests classifier with local hyperparameters
            classifier = RandomForestClassifier(
                bootstrap = params['bootstrap'],
                max_features = params['max_features'],
                min_samples_leaf = params['min_samples_leaf'],
                min_samples_split = params['min_samples_split'],
                n_estimators = params['n_estimators'])
            
        except Exception as e:
            print('Could not load local hyperparameters!')
            print('Error: ' + str(e))
            print('Will now continue with default paramters.')
            # Intiliase random forests classifier with deafault hyperparameters
            classifier = RandomForestClassifier()
    else:
        # Intiliase random forests classifier with deafault hyperparameters
        classifier = RandomForestClassifier()
    
    # Assign a random seed    
    classifier.random_state  = random.randint(1,10000)
    # Tell the classifier to use all CPU cores when fitting
    classifier.n_jobs = -1
    # Tell the classifier to 
    classifier.warm_start = False
    # Tell it to balance the class weights
    classifier.class_weight = 'balanced'
    
    # Train the Random Forests model on top of the previous built model
    forest_model = classifier.fit(X_train, Y_train)
        
    
    # report performance
    
    # Evaluate the performance of the model
    forest_score = forest_model.score(X_test,Y_test)
    forest_prediction = forest_model.predict(X_test)
    report = classification_report(Y_test, forest_prediction)
    print(report)
    
    # %% print final results
    print(F"Decision tree accuracy: {tree_score}\n")
    print(F"Random forests accuracy: {forest_score}")
        
        
    return (tree_model, forest_model)