def bulk_prediction(df, model): predictions = [] for index, instance in df.iterrows(): features = instance.values[0:-1] prediction = cb.predict(model, features) predictions.append(prediction) df['Prediction'] = predictions
from chefboost import Chefboost as chef import pandas as pd import numpy as np df = pd.read_csv("play.txt") config = {'algorithm': 'C4.5'} model = chef.fit(df.copy(), config) for index, instance in df.iterrows(): prediction = chef.predict(model, instance) actual = instance['Decision'] print(actual, " - ", prediction)
X.head() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.349, random_state=1) training_set = pd.concat([X_train, y_train], axis=1) print("Training set size: {}".format(X_train.Mitoses.count())) print("Testing set size: {}".format(X_test.Mitoses.count())) X_train.head() y_train.head() # Training # config = {'algorithm': 'ID3'} config = {'algorithm': 'C4.5'} model = chef.fit(training_set, config) X_test.Clump_Thickness.count() y_test.head() # Calculate Accuracy _true = 0 _false = 0 accuracy = { "Benign": { "Malignant": 0, "Benign": 0 }, "Malignant": { "Malignant": 0, "Benign": 0
from chefboost import Chefboost as chef import pandas as pd df = pd.read_csv("dataset/golf.txt") df.head() config = {'algorithm': 'C4.5'} model = chef.fit(df, config)
aux = [] for i in x[0]: a['aux_{}'.format(contador)].append(i) contador += 1 for i in x[1]: a['aux_{}'.format(contador)].append(i) contador += 1 Y_train.append(x[2]) df = pd.DataFrame( (zip(a['aux_0'], a['aux_1'], a['aux_2'], a['aux_3'], a['aux_4'], a['aux_5'], a['aux_6'], a['aux_7'], a['aux_8'], a['aux_9'], a['aux_10'], a['aux_11'], a['aux_12'], a['aux_13'], a['aux_14'], a['aux_15'], a['aux_16'], a['aux_17'], Y_train)), columns=[ 'Eyebrow Distribution 1', 'Eyebrow Shape 1', 'Eyebrow Size 1', 'Eyelashes Size 1', 'Eyelids Shape 1', 'Iris Color 1', 'Skin Texture 1', 'Skin Color 1', 'Spots 1', 'Eyebrow Distribution 2', 'Eyebrow Shape 2', 'Eyebrow Size 3', 'Eyelashes Size 2', 'Eyelids Shape 2', 'Iris Color 2', 'Skin Texture 2', 'Skin Color 2', 'Spots 2', 'Decision' ]) if __name__ == '__main__': config = {'algorithm': 'C4.5'} model = chef.fit(df, config) chef.save_model(model, "model.pkl")
# idx = np.array([idx, idx]) # idx = np.transpose(idx) # # delete the feature by using the mask # s_list = s_list[idx] # length = len(s_list)//2 # s_list = s_list.reshape((length, 2)) # return np.array(F, dtype=int), np.array(SU) # #feat_index, sym_arr = fcbf(X_train_data.iloc[:,:5], X_test_data.iloc[:,:5]) #MIM from skfeature.function.information_theoretical_based import LCSI F, J_CMI, MIfy = LCSI.lcsi(X_train_data, y_train_data, beta=0, gamma=0) from sklearn.ensemble import AdaBoostRegressor regr = AdaBoostRegressor(random_state=0, n_estimators=100) regr.fit(X_train_data, y_train_data) imp = regr.feature_importances_ X_train_data.columns[imp > 0] from chefboost import Chefboost as chef import pandas as pd config = {'algorithm': 'C4.5'} df = X_train_data df["Decision"] = y_train_data model = chef.fit(df, config) prediction = chef.predict(X_test_data)
'Out', 'In', 'Out', 'Out', 'Out', 'Out', 'In', 'Out', 'Out', 'In', 'Out', 'In' ], 'media': [ 'NBC', 'NBC', 'ESPN', 'FOX', 'NBC', 'ABC', 'NBC', 'NBC', 'NBC', 'ABC', 'NBC', 'ABC' ] } y_true5 = [ 'WIN', 'LOSS', 'WIN', 'WIN', 'WIN', 'WIN', 'WIN', 'WIN', 'WIN', 'LOSS', 'WIN', 'LOSS' ] trainDF5 = pd.DataFrame(data=train_5) testDF5 = pd.DataFrame(data=test_5) config = {'algorithm': 'C4.5'} model = chef.fit(trainDF5.copy(), config) pred5 = [] for index, instance in testDF5.iterrows(): prediction5 = str(chef.predict(model, instance)) pred5.append(prediction5) y_pred5 = np.array(pred5) precision5 = precision_score(y_true5, y_pred5) F1_score5 = f1_score(y_true5, y_pred5) recall_score5 = recall_score(y_true5, y_pred5) accuracy_score5 = accuracy_score(y_true5, y_pred5) print("\tPrecision Score: ", precision5) print("\tF1 Score ", F1_score5) print("\tRecall Score: ", recall_score5) print("\tAccuracy Score: ", accuracy_score5) print("\tPrediction for Entropy Q5:", y_pred5)
#Professor: Dibio Leandro Borges #Aluno: William Coelho da Silva - 180029274 #Importando bibliotecas necessarias import pandas as pd from chefboost import Chefboost as chef import gc print('\nDecisionTree C4.5\n\n') #Le a base de dados dataset = pd.read_excel('dataset.xlsx', engine='openpyxl') dataset = dataset.rename(columns={'resultado do exame': 'Decision'}) print(dataset) config = {'algorithm': 'C4.5'} model = chef.fit(dataset.copy(), config=config) for ind, istance in dataset.iterrows(): prediction = chef.predict(model, dataset.iloc[0]) actual = istance['Decison'] if actual == prediction: classified = True else: cclassified = False print("x", end='') print(actual, " - ", prediction) #gc.collect()
import pandas as pd import sys from chefboost import Chefboost as cb print("ID3 for nominal features and target:") config = {'algorithm': 'ID3'} cb.fit(pd.read_csv("dataset/golf.txt"), config) print("-------------------------") print("ID3 for nominal/numeric features and target:") config = {'algorithm': 'ID3'} cb.fit(pd.read_csv("dataset/golf2.txt"), config) print("-------------------------") print("C4.5 for nominal/numeric features and target:") config = {'algorithm': 'C4.5'} cb.fit(pd.read_csv("dataset/golf2.txt"), config) print("-------------------------") print("CART for nominal/numeric features and target:") config = {'algorithm': 'CART'} cb.fit(pd.read_csv("dataset/golf2.txt"), config) print("-------------------------") print("regression tree for nominal features, numeric target")
#!pip install chefboost from chefboost import Chefboost as cb import pandas as pd import matplotlib.pyplot as plt if __name__ == '__main__': df = pd.read_csv("golf.txt") config = config = {'algorithm': 'C4.5', 'enableParallelism': True} model = cb.fit(df, config) fi = cb.feature_importance() fi.plot.bar() plt.show()
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score def make_df(name, df): print(name) result_df = df[(df['predict'] == name) == True] result_df.to_csv('dataset/misuse_result/' + name + '.csv', index=False) print(len(result_df)) test_data = pd.read_csv('dataset/pre_test.csv') result_df = pd.DataFrame(data=test_data, columns=test_data.columns) test_data.rename(columns={'Label': 'Decision'}, inplace=True) #모델 호출 model = chef.load_model("dataset/c45_model.pkl") test_label = test_data['Decision'] test_data = test_data.drop(['Decision'], axis=1) predict_list = [] for index, instance in test_data.iterrows(): prediction = chef.predict(model, instance) predict_list.append(prediction) result_df.rename(columns={'Decision': 'actual'}, inplace=True) result_df['predict'] = predict_list print(len(result_df)) print(result_df.columns) label_list = set(result_df['predict'])
model.fit(X_train_data[X_train_data.columns[imp > 0]], y_train) y_pred = model.predict(X_test_data[X_test_data.columns[imp > 0]]) print(sum((y_pred - y_test)**2)) print(mean_squared_error(y_pred, y_test)) print(time.time() - start) ################################ start = time.time() config = {'algorithm': 'C4.5'} df = X_train_data df["Decision"] = y_train ######### model = chef.fit(df, config) y_pred = [] for index, instance in X_test_data.iterrows(): y_pred.append(chef.predict(model, instance)) print(sum((y_pred - y_test)**2)) print(mean_squared_error(y_pred, y_test)) print(time.time() - start) #################################### import time start = time.time() ls = ["Polyuria", "Polydipsia", "delayed healing", "muscle stiffness"] X_train = X_train_data[ls] X_test = X_test_data[ls]
if (not isNumeric(feature1[column])): feature_.append(diccionario[df.columns[column]][feature1[column]]) else: feature_.append(feature1[column]) return feature_ df = to_number(df) df2 = df.copy() #Regression from chefboost import Chefboost as chef config = {'algorithm': 'Regression'} model = chef.fit(df, config) #feature_=['Overcast','Cool','Normal','Strong'] feature_ = [1, 2, 3, 4] feature = features(feature_) prediction = chef.predict(model, feature) print( feature_, antidiccionario[df2.columns[len(df2.columns) - 1]][str(round(prediction))]) count = 0 for index, instance in df2.iterrows(): feature = features(instance) #print(index, feature) prediction = antidiccionario[df2.columns[len(df2.columns) - 1]][str( round(chef.predict(model, feature)))]
#---------------------------------------------- #parallelism_cases = [True] parallelism_cases = [False, True] if __name__ == '__main__': for enableParallelism in parallelism_cases: print("*************************") print("enableParallelism is set to ", enableParallelism) print("*************************") print("no config passed ") df = pd.read_csv("dataset/golf.txt") model = cb.fit(df) print("-------------------------") print("Validation set case") df = pd.read_csv("dataset/golf.txt") validation_df = pd.read_csv("dataset/golf.txt") config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism} model = cb.fit(df, config, validation_df=validation_df) print("-------------------------") print("Feature importance") #decision_rules = model["trees"][0].__dict__["__name__"]+".py" decision_rules = model["trees"][0].__dict__["__spec__"].origin
""" 알려진 공격에 대한 attack tree 생성 CART를 이용 1)C4.5로 하고 차후 C5를 사용하여 생성 및 예정 라이브러리 링크 : https://github.com/serengil/chefboost (['ID3', 'C4.5', 'CART', 'CHAID', 'Regression']) """ import pandas as pd from chefboost import Chefboost as chef train_data = pd.read_csv('dataset/pre_train.csv') train_data.rename(columns={'Label': 'Decision'}, inplace=True) #train_data = train_data[(train_data['Decision'] != 'dos') == True] # model 생성 config = {'algorithm': 'C4.5'} model = chef.fit(train_data, config) #모델 저장 chef.save_model(model, "dataset/c45_model.pkl")
parallelism_cases = [True, False] #parallelism_cases = [False, True] if __name__ == '__main__': for enableParallelism in parallelism_cases: print("*************************") print("enableParallelism is set to ", enableParallelism) print("*************************") print("-------------------------") print("ID3 for label encoded features and nominal target:") config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism} model = cb.fit(pd.read_csv("dataset/golf_le.txt"), config) print("-------------------------") print("ID3 for nominal features and nominal target:") config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism} model = cb.fit(pd.read_csv("dataset/golf.txt"), config) cb.save_model(model) print("built model is saved to model.pkl") restored_model = cb.load_model("model.pkl") print("built model is restored from model.pkl") instance = ['Sunny', 'Hot', 'High', 'Weak'] prediction = cb.predict(restored_model, instance)
decision_tree = DecisionTreeClassifier(random_state=0, criterion='entropy') decision_tree = decision_tree.fit(data, target) plot_tree(decision_tree) #%% # Gini decision_tree = DecisionTreeClassifier(random_state=0, criterion='gini') decision_tree = decision_tree.fit(data, target) plot_tree(decision_tree) # C4.5 df = pd.read_csv("/Users/muhammadshahid/Downloads/task4-1.csv")[["HomeOrAway", "InOrOut", "Media", "Label"]] df = df.rename(columns={"Label": "Decision"}) test = pd.read_csv("/Users/muhammadshahid/Downloads/task4-1.csv") config_c45 = {'algorithm': 'C4.5'} model_c45 = chef.fit(df.copy(), config_c45) for index, instance in test.iterrows(): prediction = chef.predict(model_c45, instance) print(index, prediction) # Task 4-2 df = pd.read_csv("/Users/muhammadshahid/Downloads/task4-2.csv") test = pd.read_csv("/Users/muhammadshahid/Downloads/task4-2-test.csv") target = df.Label data = df[["Outlook","Temperature","Humidity","Windy"]] data = pd.get_dummies(data) test = pd.get_dummies(test)