Beispiel #1
0
parallelism_cases = [True, False]
#parallelism_cases = [False, True]

if __name__ == '__main__':

    for enableParallelism in parallelism_cases:

        print("*************************")
        print("enableParallelism is set to ", enableParallelism)
        print("*************************")

        print("-------------------------")

        print("ID3 for label encoded features and nominal target:")
        config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism}
        model = cb.fit(pd.read_csv("dataset/golf_le.txt"), config)

        print("-------------------------")

        print("ID3 for nominal features and nominal target:")
        config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism}
        model = cb.fit(pd.read_csv("dataset/golf.txt"), config)

        cb.save_model(model)
        print("built model is saved to model.pkl")

        restored_model = cb.load_model("model.pkl")
        print("built model is restored from model.pkl")

        instance = ['Sunny', 'Hot', 'High', 'Weak']
        prediction = cb.predict(restored_model, instance)
Beispiel #2
0
from chefboost import Chefboost as chef
import pandas as pd
import numpy as np

df = pd.read_csv("play.txt")
config = {'algorithm': 'C4.5'}

model = chef.fit(df.copy(), config)

for index, instance in df.iterrows():
    prediction = chef.predict(model, instance)
    actual = instance['Decision']
    print(actual, " - ", prediction)
Beispiel #3
0
#----------------------------------------------
#parallelism_cases = [True]
parallelism_cases = [False, True]

if __name__ == '__main__':

    for enableParallelism in parallelism_cases:

        print("*************************")
        print("enableParallelism is set to ", enableParallelism)
        print("*************************")

        print("no config passed ")
        df = pd.read_csv("dataset/golf.txt")
        model = cb.fit(df)

        print("-------------------------")

        print("Validation set case")

        df = pd.read_csv("dataset/golf.txt")
        validation_df = pd.read_csv("dataset/golf.txt")
        config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism}
        model = cb.fit(df, config, validation_df=validation_df)

        print("-------------------------")

        print("Feature importance")
        #decision_rules = model["trees"][0].__dict__["__name__"]+".py"
        decision_rules = model["trees"][0].__dict__["__spec__"].origin
X.head()
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.349,
                                                    random_state=1)
training_set = pd.concat([X_train, y_train], axis=1)
print("Training set size: {}".format(X_train.Mitoses.count()))
print("Testing set size: {}".format(X_test.Mitoses.count()))

X_train.head()
y_train.head()

# Training
# config = {'algorithm': 'ID3'}
config = {'algorithm': 'C4.5'}
model = chef.fit(training_set, config)

X_test.Clump_Thickness.count()
y_test.head()

# Calculate Accuracy
_true = 0
_false = 0
accuracy = {
    "Benign": {
        "Malignant": 0,
        "Benign": 0
    },
    "Malignant": {
        "Malignant": 0,
        "Benign": 0
Beispiel #5
0
import numpy as np
import pandas as pd
df = pd.read_excel("MS- train (MS).xlsx")
test = pd.read_excel("MS-test (MS).xlsx")
df.head()
test.head()
from chefboost import Chefboost as chef
config = {'algorithm': 'CHAID'}
model = chef.fit(df, config)

dogru = 0
yanlis = 0
for index, instance in test.iterrows():
    prediction = chef.predict(model, instance)
    actual = instance['Decision']
    if prediction == actual:
        dogru = dogru + 1
    else:
        yanlis = yanlis + 1

print("Dogru: ", dogru, " Yanlış: ", yanlis)
Beispiel #6
0
"""
    알려진 공격에 대한 attack tree 생성
    CART를 이용 1)C4.5로 하고 차후 C5를 사용하여 생성 및 예정
    라이브러리 링크 : https://github.com/serengil/chefboost (['ID3', 'C4.5', 'CART', 'CHAID', 'Regression'])
"""

import pandas as pd
from chefboost import Chefboost as chef

train_data = pd.read_csv('dataset/pre_train.csv')

train_data.rename(columns={'Label': 'Decision'}, inplace=True)
#train_data = train_data[(train_data['Decision'] != 'dos') == True]

# model 생성
config = {'algorithm': 'C4.5'}
model = chef.fit(train_data, config)

#모델 저장
chef.save_model(model, "dataset/c45_model.pkl")
Beispiel #7
0
        'Out', 'In', 'Out', 'Out', 'Out', 'Out', 'In', 'Out', 'Out', 'In',
        'Out', 'In'
    ],
    'media': [
        'NBC', 'NBC', 'ESPN', 'FOX', 'NBC', 'ABC', 'NBC', 'NBC', 'NBC', 'ABC',
        'NBC', 'ABC'
    ]
}
y_true5 = [
    'WIN', 'LOSS', 'WIN', 'WIN', 'WIN', 'WIN', 'WIN', 'WIN', 'WIN', 'LOSS',
    'WIN', 'LOSS'
]
trainDF5 = pd.DataFrame(data=train_5)
testDF5 = pd.DataFrame(data=test_5)
config = {'algorithm': 'C4.5'}
model = chef.fit(trainDF5.copy(), config)
pred5 = []
for index, instance in testDF5.iterrows():
    prediction5 = str(chef.predict(model, instance))
    pred5.append(prediction5)
y_pred5 = np.array(pred5)
precision5 = precision_score(y_true5, y_pred5)
F1_score5 = f1_score(y_true5, y_pred5)
recall_score5 = recall_score(y_true5, y_pred5)
accuracy_score5 = accuracy_score(y_true5, y_pred5)

print("\tPrecision Score: ", precision5)
print("\tF1 Score ", F1_score5)
print("\tRecall Score: ", recall_score5)
print("\tAccuracy Score: ", accuracy_score5)
print("\tPrediction for Entropy Q5:", y_pred5)
#!pip install chefboost
from chefboost import Chefboost as cb
import pandas as pd
import matplotlib.pyplot as plt

if __name__ == '__main__':
    df = pd.read_csv("golf.txt")
    config = config = {'algorithm': 'C4.5', 'enableParallelism': True}

    model = cb.fit(df, config)

    fi = cb.feature_importance()
    fi.plot.bar()
    plt.show()
Beispiel #9
0
#Professor: Dibio Leandro Borges
#Aluno: William Coelho da Silva - 180029274

#Importando bibliotecas necessarias
import pandas as pd
from chefboost import Chefboost as chef
import gc

print('\nDecisionTree C4.5\n\n')

#Le a base de dados
dataset = pd.read_excel('dataset.xlsx', engine='openpyxl')
dataset = dataset.rename(columns={'resultado do exame': 'Decision'})
print(dataset)

config = {'algorithm': 'C4.5'}
model = chef.fit(dataset.copy(), config=config)

for ind, istance in dataset.iterrows():
    prediction = chef.predict(model, dataset.iloc[0])
    actual = istance['Decison']
    if actual == prediction:
        classified = True
    else:
        cclassified = False
        print("x", end='')

    print(actual, " - ", prediction)

#gc.collect()
Beispiel #10
0
decision_tree = DecisionTreeClassifier(random_state=0, criterion='entropy')
decision_tree = decision_tree.fit(data, target)
plot_tree(decision_tree)

#%%
# Gini
decision_tree = DecisionTreeClassifier(random_state=0, criterion='gini')
decision_tree = decision_tree.fit(data, target)
plot_tree(decision_tree)

# C4.5
df = pd.read_csv("/Users/muhammadshahid/Downloads/task4-1.csv")[["HomeOrAway", "InOrOut", "Media", "Label"]]
df = df.rename(columns={"Label": "Decision"})
test = pd.read_csv("/Users/muhammadshahid/Downloads/task4-1.csv")
config_c45 = {'algorithm': 'C4.5'}
model_c45 = chef.fit(df.copy(), config_c45)

for index, instance in test.iterrows():
    prediction = chef.predict(model_c45, instance)
    print(index, prediction)


# Task 4-2
df = pd.read_csv("/Users/muhammadshahid/Downloads/task4-2.csv")
test = pd.read_csv("/Users/muhammadshahid/Downloads/task4-2-test.csv")

target = df.Label
data = df[["Outlook","Temperature","Humidity","Windy"]]
data = pd.get_dummies(data)

test = pd.get_dummies(test)