def bulk_prediction(df, model): predictions = [] for index, instance in df.iterrows(): features = instance.values[0:-1] prediction = cb.predict(model, features) predictions.append(prediction) df['Prediction'] = predictions
model = cb.fit(pd.read_csv("dataset/golf_le.txt"), config) print("-------------------------") print("ID3 for nominal features and nominal target:") config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism} model = cb.fit(pd.read_csv("dataset/golf.txt"), config) cb.save_model(model) print("built model is saved to model.pkl") restored_model = cb.load_model("model.pkl") print("built model is restored from model.pkl") instance = ['Sunny', 'Hot', 'High', 'Weak'] prediction = cb.predict(restored_model, instance) print("prediction for ", instance, "is ", prediction) print("-------------------------") print("ID3 for nominal/numeric features and nominal target:") config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism} model = cb.fit(pd.read_csv("dataset/golf2.txt"), config) instance = ['Sunny', 85, 85, 'Weak'] prediction = cb.predict(restored_model, instance) print("prediction for ", instance, "is ", prediction) print("-------------------------")
from chefboost import Chefboost as chef import pandas as pd import numpy as np df = pd.read_csv("play.txt") config = {'algorithm': 'C4.5'} model = chef.fit(df.copy(), config) for index, instance in df.iterrows(): prediction = chef.predict(model, instance) actual = instance['Decision'] print(actual, " - ", prediction)
# Calculate Accuracy _true = 0 _false = 0 accuracy = { "Benign": { "Malignant": 0, "Benign": 0 }, "Malignant": { "Malignant": 0, "Benign": 0 } } for i in range(X_test.Clump_Thickness.count()): prediction = chef.predict(model, X_test.iloc[i]) if prediction != None and round(prediction) == y_test.iloc[i].Decision: _true += 1 if y_test.iloc[i].Decision == 0: accuracy["Benign"]["Benign"] += 1 else: accuracy["Malignant"]["Malignant"] += 1 else: _false += 1 if y_test.iloc[i].Decision == 0: accuracy["Benign"]["Malignant"] += 1 else: accuracy["Malignant"]["Benign"] += 1 print(accuracy) print("\nTotal Accuracy: {:0.2f}".format(_true * 100 / (_true + _false))) print(
# idx = np.transpose(idx) # # delete the feature by using the mask # s_list = s_list[idx] # length = len(s_list)//2 # s_list = s_list.reshape((length, 2)) # return np.array(F, dtype=int), np.array(SU) # #feat_index, sym_arr = fcbf(X_train_data.iloc[:,:5], X_test_data.iloc[:,:5]) #MIM from skfeature.function.information_theoretical_based import LCSI F, J_CMI, MIfy = LCSI.lcsi(X_train_data, y_train_data, beta=0, gamma=0) from sklearn.ensemble import AdaBoostRegressor regr = AdaBoostRegressor(random_state=0, n_estimators=100) regr.fit(X_train_data, y_train_data) imp = regr.feature_importances_ X_train_data.columns[imp > 0] from chefboost import Chefboost as chef import pandas as pd config = {'algorithm': 'C4.5'} df = X_train_data df["Decision"] = y_train_data model = chef.fit(df, config) prediction = chef.predict(X_test_data)
'Out', 'In' ], 'media': [ 'NBC', 'NBC', 'ESPN', 'FOX', 'NBC', 'ABC', 'NBC', 'NBC', 'NBC', 'ABC', 'NBC', 'ABC' ] } y_true5 = [ 'WIN', 'LOSS', 'WIN', 'WIN', 'WIN', 'WIN', 'WIN', 'WIN', 'WIN', 'LOSS', 'WIN', 'LOSS' ] trainDF5 = pd.DataFrame(data=train_5) testDF5 = pd.DataFrame(data=test_5) config = {'algorithm': 'C4.5'} model = chef.fit(trainDF5.copy(), config) pred5 = [] for index, instance in testDF5.iterrows(): prediction5 = str(chef.predict(model, instance)) pred5.append(prediction5) y_pred5 = np.array(pred5) precision5 = precision_score(y_true5, y_pred5) F1_score5 = f1_score(y_true5, y_pred5) recall_score5 = recall_score(y_true5, y_pred5) accuracy_score5 = accuracy_score(y_true5, y_pred5) print("\tPrecision Score: ", precision5) print("\tF1 Score ", F1_score5) print("\tRecall Score: ", recall_score5) print("\tAccuracy Score: ", accuracy_score5) print("\tPrediction for Entropy Q5:", y_pred5)
print("*************************") print("enableParallelism is set to ",enableParallelism) print("*************************") print("ID3 for nominal features and target:") config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism} model = cb.fit(pd.read_csv("dataset/golf.txt"), config) cb.save_model(model) print("built model is saved to model.pkl") restored_model = cb.load_model("model.pkl") print("built model is restored from model.pkl") instance = ['Sunny', 'Hot', 'High', 'Weak'] prediction = cb.predict(restored_model, instance) print("prediction for ", instance, "is ", prediction) print("-------------------------") print("ID3 for nominal/numeric features and target:") config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism} model = cb.fit(pd.read_csv("dataset/golf2.txt"), config) instance = ['Sunny', 85, 85, 'Weak'] prediction = cb.predict(restored_model, instance) print("prediction for ", instance, "is ", prediction) print("-------------------------")
print(sum((y_pred - y_test)**2)) print(mean_squared_error(y_pred, y_test)) print(time.time() - start) ################################ start = time.time() config = {'algorithm': 'C4.5'} df = X_train_data df["Decision"] = y_train ######### model = chef.fit(df, config) y_pred = [] for index, instance in X_test_data.iterrows(): y_pred.append(chef.predict(model, instance)) print(sum((y_pred - y_test)**2)) print(mean_squared_error(y_pred, y_test)) print(time.time() - start) #################################### import time start = time.time() ls = ["Polyuria", "Polydipsia", "delayed healing", "muscle stiffness"] X_train = X_train_data[ls] X_test = X_test_data[ls] n_estimators = 1000 model = RandomForestRegressor(n_estimators=n_estimators, oob_score=True,
return feature_ df = to_number(df) df2 = df.copy() #Regression from chefboost import Chefboost as chef config = {'algorithm': 'Regression'} model = chef.fit(df, config) #feature_=['Overcast','Cool','Normal','Strong'] feature_ = [1, 2, 3, 4] feature = features(feature_) prediction = chef.predict(model, feature) print( feature_, antidiccionario[df2.columns[len(df2.columns) - 1]][str(round(prediction))]) count = 0 for index, instance in df2.iterrows(): feature = features(instance) #print(index, feature) prediction = antidiccionario[df2.columns[len(df2.columns) - 1]][str( round(chef.predict(model, feature)))] actual = antidiccionario[df2.columns[len(df2.columns) - 1]][str( round(float(instance['Decision'])))] print(index + 1, '\tActual:', actual, '\t- \tPredict', prediction, '\tmatch: ', prediction == actual) if (prediction == actual):
#Professor: Dibio Leandro Borges #Aluno: William Coelho da Silva - 180029274 #Importando bibliotecas necessarias import pandas as pd from chefboost import Chefboost as chef import gc print('\nDecisionTree C4.5\n\n') #Le a base de dados dataset = pd.read_excel('dataset.xlsx', engine='openpyxl') dataset = dataset.rename(columns={'resultado do exame': 'Decision'}) print(dataset) config = {'algorithm': 'C4.5'} model = chef.fit(dataset.copy(), config=config) for ind, istance in dataset.iterrows(): prediction = chef.predict(model, dataset.iloc[0]) actual = istance['Decison'] if actual == prediction: classified = True else: cclassified = False print("x", end='') print(actual, " - ", prediction) #gc.collect()
#%% # Gini decision_tree = DecisionTreeClassifier(random_state=0, criterion='gini') decision_tree = decision_tree.fit(data, target) plot_tree(decision_tree) # C4.5 df = pd.read_csv("/Users/muhammadshahid/Downloads/task4-1.csv")[["HomeOrAway", "InOrOut", "Media", "Label"]] df = df.rename(columns={"Label": "Decision"}) test = pd.read_csv("/Users/muhammadshahid/Downloads/task4-1.csv") config_c45 = {'algorithm': 'C4.5'} model_c45 = chef.fit(df.copy(), config_c45) for index, instance in test.iterrows(): prediction = chef.predict(model_c45, instance) print(index, prediction) # Task 4-2 df = pd.read_csv("/Users/muhammadshahid/Downloads/task4-2.csv") test = pd.read_csv("/Users/muhammadshahid/Downloads/task4-2-test.csv") target = df.Label data = df[["Outlook","Temperature","Humidity","Windy"]] data = pd.get_dummies(data) test = pd.get_dummies(test) test = data.iloc[0:0].combine_first(test).fillna("0") test