Xt = [] Yt = [] Xp = [] Yp = [] rows_count = len(df.index) rows_training = rows_count * 0.7 idx_t = set(random.sample(range(rows_count), int(rows_training))) for row in df.iterrows(): index, data = row l = data.tolist() l.remove(l[-1]) if index in idx_t: Xt.append(l) Yt.append(data[-1]) else: Xp.append(l) Yp.append(data[-1]) classifier = RandomForestClassifier() classifier.class_weight = {0: 0.2, 1: 0.8} classifier.fit(Xt, Yt) ans = classifier.predict(Xp) print("Model accuracy: ") print(f1_score(ans, Yp) * 100, "%")
test_size=0.2, random_state=i) if len(set(Y_train)) < 4 or len(set(Y_test)) < 4: continue # Append new subsamples to full training samples Full_training_set = np.append(Full_training_set, Y_train) # Calculate the class weights classes = np.unique(Full_training_set) weights = compute_class_weight('balanced', classes, Full_training_set) class_weight = dict() for _idx, class_ in enumerate(classes): class_weight[class_] = weights[_idx] clf.class_weight = class_weight # Train a random forest classifier clf.fit(X_train, Y_train) oob_score.append(clf.oob_score_) counter += 1 if counter == 300: break clf.n_estimators += 100 # Create classification prediction Y_clf_predict = clf.predict(Features) # Calculate model accuracy, out-of-bag score, and feature importance Model_accuracy = metrics.accuracy_score(Labels, Y_clf_predict) #oob_score = clf.oob_score_
def fit(X_train, Y_train, X_test, Y_test, use_local_parameters = False): # build a decision tree model # train the decision tree model classifier = tree.DecisionTreeClassifier() tree_model = classifier.fit(X_train, Y_train) # Evaluate the performance of the model tree_score = tree_model.score(X_test,Y_test) # Build a random forests algorithm if use_local_parameters: try: # Load the local file containing the hyperparameter settings directory = "Hyperparameters/RandomForests.pkl" RandomForests_params_file = open(directory, "rb") params = pickle.load(RandomForests_params_file) RandomForests_params_file.close() # Intiliase random forests classifier with local hyperparameters classifier = RandomForestClassifier( bootstrap = params['bootstrap'], max_features = params['max_features'], min_samples_leaf = params['min_samples_leaf'], min_samples_split = params['min_samples_split'], n_estimators = params['n_estimators']) except Exception as e: print('Could not load local hyperparameters!') print('Error: ' + str(e)) print('Will now continue with default paramters.') # Intiliase random forests classifier with deafault hyperparameters classifier = RandomForestClassifier() else: # Intiliase random forests classifier with deafault hyperparameters classifier = RandomForestClassifier() # Assign a random seed classifier.random_state = random.randint(1,10000) # Tell the classifier to use all CPU cores when fitting classifier.n_jobs = -1 # Tell the classifier to classifier.warm_start = False # Tell it to balance the class weights classifier.class_weight = 'balanced' # Train the Random Forests model on top of the previous built model forest_model = classifier.fit(X_train, Y_train) # report performance # Evaluate the performance of the model forest_score = forest_model.score(X_test,Y_test) forest_prediction = forest_model.predict(X_test) report = classification_report(Y_test, forest_prediction) print(report) # %% print final results print(F"Decision tree accuracy: {tree_score}\n") print(F"Random forests accuracy: {forest_score}") return (tree_model, forest_model)