コード例 #1
0
def bulk_prediction(df, model):

    predictions = []
    for index, instance in df.iterrows():
        features = instance.values[0:-1]
        prediction = cb.predict(model, features)
        predictions.append(prediction)

    df['Prediction'] = predictions
コード例 #2
0
        model = cb.fit(pd.read_csv("dataset/golf_le.txt"), config)

        print("-------------------------")

        print("ID3 for nominal features and nominal target:")
        config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism}
        model = cb.fit(pd.read_csv("dataset/golf.txt"), config)

        cb.save_model(model)
        print("built model is saved to model.pkl")

        restored_model = cb.load_model("model.pkl")
        print("built model is restored from model.pkl")

        instance = ['Sunny', 'Hot', 'High', 'Weak']
        prediction = cb.predict(restored_model, instance)

        print("prediction for ", instance, "is ", prediction)

        print("-------------------------")

        print("ID3 for nominal/numeric features and nominal target:")
        config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism}
        model = cb.fit(pd.read_csv("dataset/golf2.txt"), config)

        instance = ['Sunny', 85, 85, 'Weak']
        prediction = cb.predict(restored_model, instance)
        print("prediction for ", instance, "is ", prediction)

        print("-------------------------")
コード例 #3
0
from chefboost import Chefboost as chef
import pandas as pd
import numpy as np

df = pd.read_csv("play.txt")
config = {'algorithm': 'C4.5'}

model = chef.fit(df.copy(), config)

for index, instance in df.iterrows():
    prediction = chef.predict(model, instance)
    actual = instance['Decision']
    print(actual, " - ", prediction)
# Calculate Accuracy
_true = 0
_false = 0
accuracy = {
    "Benign": {
        "Malignant": 0,
        "Benign": 0
    },
    "Malignant": {
        "Malignant": 0,
        "Benign": 0
    }
}
for i in range(X_test.Clump_Thickness.count()):
    prediction = chef.predict(model, X_test.iloc[i])
    if prediction != None and round(prediction) == y_test.iloc[i].Decision:
        _true += 1
        if y_test.iloc[i].Decision == 0:
            accuracy["Benign"]["Benign"] += 1
        else:
            accuracy["Malignant"]["Malignant"] += 1
    else:
        _false += 1
        if y_test.iloc[i].Decision == 0:
            accuracy["Benign"]["Malignant"] += 1
        else:
            accuracy["Malignant"]["Benign"] += 1
print(accuracy)
print("\nTotal Accuracy: {:0.2f}".format(_true * 100 / (_true + _false)))
print(
コード例 #5
0
ファイル: fast_cmim.py プロジェクト: softsys4ai/SCTL
#                idx = np.transpose(idx)
#                # delete the feature by using the mask
#                s_list = s_list[idx]
#                length = len(s_list)//2
#                s_list = s_list.reshape((length, 2))
#    return np.array(F, dtype=int), np.array(SU)
#
#feat_index, sym_arr = fcbf(X_train_data.iloc[:,:5], X_test_data.iloc[:,:5])

#MIM
from skfeature.function.information_theoretical_based import LCSI
F, J_CMI, MIfy = LCSI.lcsi(X_train_data, y_train_data, beta=0, gamma=0)

from sklearn.ensemble import AdaBoostRegressor
regr = AdaBoostRegressor(random_state=0, n_estimators=100)
regr.fit(X_train_data, y_train_data)

imp = regr.feature_importances_

X_train_data.columns[imp > 0]

from chefboost import Chefboost as chef
import pandas as pd

config = {'algorithm': 'C4.5'}
df = X_train_data
df["Decision"] = y_train_data
model = chef.fit(df, config)

prediction = chef.predict(X_test_data)
コード例 #6
0
        'Out', 'In'
    ],
    'media': [
        'NBC', 'NBC', 'ESPN', 'FOX', 'NBC', 'ABC', 'NBC', 'NBC', 'NBC', 'ABC',
        'NBC', 'ABC'
    ]
}
y_true5 = [
    'WIN', 'LOSS', 'WIN', 'WIN', 'WIN', 'WIN', 'WIN', 'WIN', 'WIN', 'LOSS',
    'WIN', 'LOSS'
]
trainDF5 = pd.DataFrame(data=train_5)
testDF5 = pd.DataFrame(data=test_5)
config = {'algorithm': 'C4.5'}
model = chef.fit(trainDF5.copy(), config)
pred5 = []
for index, instance in testDF5.iterrows():
    prediction5 = str(chef.predict(model, instance))
    pred5.append(prediction5)
y_pred5 = np.array(pred5)
precision5 = precision_score(y_true5, y_pred5)
F1_score5 = f1_score(y_true5, y_pred5)
recall_score5 = recall_score(y_true5, y_pred5)
accuracy_score5 = accuracy_score(y_true5, y_pred5)

print("\tPrecision Score: ", precision5)
print("\tF1 Score ", F1_score5)
print("\tRecall Score: ", recall_score5)
print("\tAccuracy Score: ", accuracy_score5)
print("\tPrediction for Entropy Q5:", y_pred5)
コード例 #7
0
		print("*************************")
		print("enableParallelism is set to ",enableParallelism)
		print("*************************")
		
		print("ID3 for nominal features and target:")
		config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism}
		model = cb.fit(pd.read_csv("dataset/golf.txt"), config)
		
		cb.save_model(model)
		print("built model is saved to model.pkl")
		
		restored_model = cb.load_model("model.pkl")
		print("built model is restored from model.pkl")
		
		instance = ['Sunny', 'Hot', 'High', 'Weak']
		prediction = cb.predict(restored_model, instance)
		
		print("prediction for ", instance, "is ", prediction)

		print("-------------------------")
		
		print("ID3 for nominal/numeric features and target:")
		config = {'algorithm': 'ID3', 'enableParallelism': enableParallelism}
		model = cb.fit(pd.read_csv("dataset/golf2.txt"), config)
		
		instance = ['Sunny', 85, 85, 'Weak']
		prediction = cb.predict(restored_model, instance)
		print("prediction for ", instance, "is ", prediction)

		print("-------------------------")
		
コード例 #8
0
print(sum((y_pred - y_test)**2))
print(mean_squared_error(y_pred, y_test))
print(time.time() - start)

################################

start = time.time()

config = {'algorithm': 'C4.5'}
df = X_train_data
df["Decision"] = y_train
#########
model = chef.fit(df, config)
y_pred = []
for index, instance in X_test_data.iterrows():
    y_pred.append(chef.predict(model, instance))

print(sum((y_pred - y_test)**2))
print(mean_squared_error(y_pred, y_test))
print(time.time() - start)

####################################

import time
start = time.time()
ls = ["Polyuria", "Polydipsia", "delayed healing", "muscle stiffness"]
X_train = X_train_data[ls]
X_test = X_test_data[ls]
n_estimators = 1000
model = RandomForestRegressor(n_estimators=n_estimators,
                              oob_score=True,
コード例 #9
0
ファイル: regression.py プロジェクト: ponisio7/chefboost
    return feature_


df = to_number(df)
df2 = df.copy()

#Regression
from chefboost import Chefboost as chef
config = {'algorithm': 'Regression'}
model = chef.fit(df, config)
#feature_=['Overcast','Cool','Normal','Strong']
feature_ = [1, 2, 3, 4]
feature = features(feature_)

prediction = chef.predict(model, feature)
print(
    feature_,
    antidiccionario[df2.columns[len(df2.columns) - 1]][str(round(prediction))])
count = 0
for index, instance in df2.iterrows():

    feature = features(instance)
    #print(index, feature)
    prediction = antidiccionario[df2.columns[len(df2.columns) - 1]][str(
        round(chef.predict(model, feature)))]
    actual = antidiccionario[df2.columns[len(df2.columns) - 1]][str(
        round(float(instance['Decision'])))]
    print(index + 1, '\tActual:', actual, '\t- \tPredict', prediction,
          '\tmatch: ', prediction == actual)
    if (prediction == actual):
コード例 #10
0
#Professor: Dibio Leandro Borges
#Aluno: William Coelho da Silva - 180029274

#Importando bibliotecas necessarias
import pandas as pd
from chefboost import Chefboost as chef
import gc

print('\nDecisionTree C4.5\n\n')

#Le a base de dados
dataset = pd.read_excel('dataset.xlsx', engine='openpyxl')
dataset = dataset.rename(columns={'resultado do exame': 'Decision'})
print(dataset)

config = {'algorithm': 'C4.5'}
model = chef.fit(dataset.copy(), config=config)

for ind, istance in dataset.iterrows():
    prediction = chef.predict(model, dataset.iloc[0])
    actual = istance['Decison']
    if actual == prediction:
        classified = True
    else:
        cclassified = False
        print("x", end='')

    print(actual, " - ", prediction)

#gc.collect()
コード例 #11
0
#%%
# Gini
decision_tree = DecisionTreeClassifier(random_state=0, criterion='gini')
decision_tree = decision_tree.fit(data, target)
plot_tree(decision_tree)

# C4.5
df = pd.read_csv("/Users/muhammadshahid/Downloads/task4-1.csv")[["HomeOrAway", "InOrOut", "Media", "Label"]]
df = df.rename(columns={"Label": "Decision"})
test = pd.read_csv("/Users/muhammadshahid/Downloads/task4-1.csv")
config_c45 = {'algorithm': 'C4.5'}
model_c45 = chef.fit(df.copy(), config_c45)

for index, instance in test.iterrows():
    prediction = chef.predict(model_c45, instance)
    print(index, prediction)


# Task 4-2
df = pd.read_csv("/Users/muhammadshahid/Downloads/task4-2.csv")
test = pd.read_csv("/Users/muhammadshahid/Downloads/task4-2-test.csv")

target = df.Label
data = df[["Outlook","Temperature","Humidity","Windy"]]
data = pd.get_dummies(data)

test = pd.get_dummies(test)
test = data.iloc[0:0].combine_first(test).fillna("0")
test