예제 #1
0
        print("-------------------------")

        print("ID3 for label encoded features and nominal target:")
        config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism}
        model = cb.fit(pd.read_csv("dataset/golf_le.txt"), config)

        print("-------------------------")

        print("ID3 for nominal features and nominal target:")
        config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism}
        model = cb.fit(pd.read_csv("dataset/golf.txt"), config)

        cb.save_model(model)
        print("built model is saved to model.pkl")

        restored_model = cb.load_model("model.pkl")
        print("built model is restored from model.pkl")

        instance = ['Sunny', 'Hot', 'High', 'Weak']
        prediction = cb.predict(restored_model, instance)

        print("prediction for ", instance, "is ", prediction)

        print("-------------------------")

        print("ID3 for nominal/numeric features and nominal target:")
        config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism}
        model = cb.fit(pd.read_csv("dataset/golf2.txt"), config)

        instance = ['Sunny', 85, 85, 'Weak']
        prediction = cb.predict(restored_model, instance)
예제 #2
0
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


def make_df(name, df):
    print(name)
    result_df = df[(df['predict'] == name) == True]
    result_df.to_csv('dataset/misuse_result/' + name + '.csv', index=False)
    print(len(result_df))


test_data = pd.read_csv('dataset/pre_test.csv')
result_df = pd.DataFrame(data=test_data, columns=test_data.columns)
test_data.rename(columns={'Label': 'Decision'}, inplace=True)

#모델 호출
model = chef.load_model("dataset/c45_model.pkl")

test_label = test_data['Decision']
test_data = test_data.drop(['Decision'], axis=1)

predict_list = []
for index, instance in test_data.iterrows():
    prediction = chef.predict(model, instance)
    predict_list.append(prediction)

result_df.rename(columns={'Decision': 'actual'}, inplace=True)
result_df['predict'] = predict_list
print(len(result_df))
print(result_df.columns)

label_list = set(result_df['predict'])