parallelism_cases = [True, False] #parallelism_cases = [False, True] if __name__ == '__main__': for enableParallelism in parallelism_cases: print("*************************") print("enableParallelism is set to ", enableParallelism) print("*************************") print("-------------------------") print("ID3 for label encoded features and nominal target:") config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism} model = cb.fit(pd.read_csv("dataset/golf_le.txt"), config) print("-------------------------") print("ID3 for nominal features and nominal target:") config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism} model = cb.fit(pd.read_csv("dataset/golf.txt"), config) cb.save_model(model) print("built model is saved to model.pkl") restored_model = cb.load_model("model.pkl") print("built model is restored from model.pkl") instance = ['Sunny', 'Hot', 'High', 'Weak'] prediction = cb.predict(restored_model, instance)
from chefboost import Chefboost as chef import pandas as pd import numpy as np df = pd.read_csv("play.txt") config = {'algorithm': 'C4.5'} model = chef.fit(df.copy(), config) for index, instance in df.iterrows(): prediction = chef.predict(model, instance) actual = instance['Decision'] print(actual, " - ", prediction)
#---------------------------------------------- #parallelism_cases = [True] parallelism_cases = [False, True] if __name__ == '__main__': for enableParallelism in parallelism_cases: print("*************************") print("enableParallelism is set to ", enableParallelism) print("*************************") print("no config passed ") df = pd.read_csv("dataset/golf.txt") model = cb.fit(df) print("-------------------------") print("Validation set case") df = pd.read_csv("dataset/golf.txt") validation_df = pd.read_csv("dataset/golf.txt") config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism} model = cb.fit(df, config, validation_df=validation_df) print("-------------------------") print("Feature importance") #decision_rules = model["trees"][0].__dict__["__name__"]+".py" decision_rules = model["trees"][0].__dict__["__spec__"].origin
X.head() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.349, random_state=1) training_set = pd.concat([X_train, y_train], axis=1) print("Training set size: {}".format(X_train.Mitoses.count())) print("Testing set size: {}".format(X_test.Mitoses.count())) X_train.head() y_train.head() # Training # config = {'algorithm': 'ID3'} config = {'algorithm': 'C4.5'} model = chef.fit(training_set, config) X_test.Clump_Thickness.count() y_test.head() # Calculate Accuracy _true = 0 _false = 0 accuracy = { "Benign": { "Malignant": 0, "Benign": 0 }, "Malignant": { "Malignant": 0, "Benign": 0
import numpy as np import pandas as pd df = pd.read_excel("MS- train (MS).xlsx") test = pd.read_excel("MS-test (MS).xlsx") df.head() test.head() from chefboost import Chefboost as chef config = {'algorithm': 'CHAID'} model = chef.fit(df, config) dogru = 0 yanlis = 0 for index, instance in test.iterrows(): prediction = chef.predict(model, instance) actual = instance['Decision'] if prediction == actual: dogru = dogru + 1 else: yanlis = yanlis + 1 print("Dogru: ", dogru, " Yanlış: ", yanlis)
""" 알려진 공격에 대한 attack tree 생성 CART를 이용 1)C4.5로 하고 차후 C5를 사용하여 생성 및 예정 라이브러리 링크 : https://github.com/serengil/chefboost (['ID3', 'C4.5', 'CART', 'CHAID', 'Regression']) """ import pandas as pd from chefboost import Chefboost as chef train_data = pd.read_csv('dataset/pre_train.csv') train_data.rename(columns={'Label': 'Decision'}, inplace=True) #train_data = train_data[(train_data['Decision'] != 'dos') == True] # model 생성 config = {'algorithm': 'C4.5'} model = chef.fit(train_data, config) #모델 저장 chef.save_model(model, "dataset/c45_model.pkl")
'Out', 'In', 'Out', 'Out', 'Out', 'Out', 'In', 'Out', 'Out', 'In', 'Out', 'In' ], 'media': [ 'NBC', 'NBC', 'ESPN', 'FOX', 'NBC', 'ABC', 'NBC', 'NBC', 'NBC', 'ABC', 'NBC', 'ABC' ] } y_true5 = [ 'WIN', 'LOSS', 'WIN', 'WIN', 'WIN', 'WIN', 'WIN', 'WIN', 'WIN', 'LOSS', 'WIN', 'LOSS' ] trainDF5 = pd.DataFrame(data=train_5) testDF5 = pd.DataFrame(data=test_5) config = {'algorithm': 'C4.5'} model = chef.fit(trainDF5.copy(), config) pred5 = [] for index, instance in testDF5.iterrows(): prediction5 = str(chef.predict(model, instance)) pred5.append(prediction5) y_pred5 = np.array(pred5) precision5 = precision_score(y_true5, y_pred5) F1_score5 = f1_score(y_true5, y_pred5) recall_score5 = recall_score(y_true5, y_pred5) accuracy_score5 = accuracy_score(y_true5, y_pred5) print("\tPrecision Score: ", precision5) print("\tF1 Score ", F1_score5) print("\tRecall Score: ", recall_score5) print("\tAccuracy Score: ", accuracy_score5) print("\tPrediction for Entropy Q5:", y_pred5)
#!pip install chefboost from chefboost import Chefboost as cb import pandas as pd import matplotlib.pyplot as plt if __name__ == '__main__': df = pd.read_csv("golf.txt") config = config = {'algorithm': 'C4.5', 'enableParallelism': True} model = cb.fit(df, config) fi = cb.feature_importance() fi.plot.bar() plt.show()
#Professor: Dibio Leandro Borges #Aluno: William Coelho da Silva - 180029274 #Importando bibliotecas necessarias import pandas as pd from chefboost import Chefboost as chef import gc print('\nDecisionTree C4.5\n\n') #Le a base de dados dataset = pd.read_excel('dataset.xlsx', engine='openpyxl') dataset = dataset.rename(columns={'resultado do exame': 'Decision'}) print(dataset) config = {'algorithm': 'C4.5'} model = chef.fit(dataset.copy(), config=config) for ind, istance in dataset.iterrows(): prediction = chef.predict(model, dataset.iloc[0]) actual = istance['Decison'] if actual == prediction: classified = True else: cclassified = False print("x", end='') print(actual, " - ", prediction) #gc.collect()
decision_tree = DecisionTreeClassifier(random_state=0, criterion='entropy') decision_tree = decision_tree.fit(data, target) plot_tree(decision_tree) #%% # Gini decision_tree = DecisionTreeClassifier(random_state=0, criterion='gini') decision_tree = decision_tree.fit(data, target) plot_tree(decision_tree) # C4.5 df = pd.read_csv("/Users/muhammadshahid/Downloads/task4-1.csv")[["HomeOrAway", "InOrOut", "Media", "Label"]] df = df.rename(columns={"Label": "Decision"}) test = pd.read_csv("/Users/muhammadshahid/Downloads/task4-1.csv") config_c45 = {'algorithm': 'C4.5'} model_c45 = chef.fit(df.copy(), config_c45) for index, instance in test.iterrows(): prediction = chef.predict(model_c45, instance) print(index, prediction) # Task 4-2 df = pd.read_csv("/Users/muhammadshahid/Downloads/task4-2.csv") test = pd.read_csv("/Users/muhammadshahid/Downloads/task4-2-test.csv") target = df.Label data = df[["Outlook","Temperature","Humidity","Windy"]] data = pd.get_dummies(data) test = pd.get_dummies(test)