print("-------------------------") print("ID3 for label encoded features and nominal target:") config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism} model = cb.fit(pd.read_csv("dataset/golf_le.txt"), config) print("-------------------------") print("ID3 for nominal features and nominal target:") config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism} model = cb.fit(pd.read_csv("dataset/golf.txt"), config) cb.save_model(model) print("built model is saved to model.pkl") restored_model = cb.load_model("model.pkl") print("built model is restored from model.pkl") instance = ['Sunny', 'Hot', 'High', 'Weak'] prediction = cb.predict(restored_model, instance) print("prediction for ", instance, "is ", prediction) print("-------------------------") print("ID3 for nominal/numeric features and nominal target:") config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism} model = cb.fit(pd.read_csv("dataset/golf2.txt"), config) instance = ['Sunny', 85, 85, 'Weak'] prediction = cb.predict(restored_model, instance)
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score def make_df(name, df): print(name) result_df = df[(df['predict'] == name) == True] result_df.to_csv('dataset/misuse_result/' + name + '.csv', index=False) print(len(result_df)) test_data = pd.read_csv('dataset/pre_test.csv') result_df = pd.DataFrame(data=test_data, columns=test_data.columns) test_data.rename(columns={'Label': 'Decision'}, inplace=True) #모델 호출 model = chef.load_model("dataset/c45_model.pkl") test_label = test_data['Decision'] test_data = test_data.drop(['Decision'], axis=1) predict_list = [] for index, instance in test_data.iterrows(): prediction = chef.predict(model, instance) predict_list.append(prediction) result_df.rename(columns={'Decision': 'actual'}, inplace=True) result_df['predict'] = predict_list print(len(result_df)) print(result_df.columns) label_list = set(result_df['predict'])