from sklearn.metrics import f1_score from sklearn.preprocessing import LabelEncoder col_names = pd.read_csv('data/names.csv') # 'data/names.csv' data = pd.read_csv('data/breast-cancer-wisconsin.data', names=col_names.columns) data = data[data["bare_nuclei"] != '?'] data.set_index('id', inplace=True) #stop the model from using id as a node train, test = train_test_split(data, test_size=0.2, random_state=0) Y_test = test['class'] test = test.drop(['class'], axis=1) #fit model model = NaiveBayes() model.fit(train, 'class') print("Naive Bayes edges: ", model.edges()) #make predictions Y_pred = model.predict(test) #Convert Labels so we can use sklearn function to evaluate our model labelencoder = LabelEncoder() Y_test = labelencoder.fit_transform(Y_test.values.ravel()) Y_pred = labelencoder.fit_transform(Y_pred.values.ravel()) # Output results accuracy = accuracy_score(Y_test, Y_pred) precision = precision_score(Y_test, Y_pred) f1 = f1_score(Y_test, Y_pred) print({"Accuracy": accuracy, "Precision": precision, "F1 Score": f1})
# Print the CPDs learned print("\n\n............Overview of our CPDs from the fit...........:") for cpd in model.get_cpds(): print("CPD of {variable}:".format(variable=cpd.variable)) print(cpd) ################################################################################# ##### Using the model to query ################################################################################# # Doing exact inference using Variable Elimination model_infer = VariableElimination(model) # Computing the probability of class given sex # print("\n\n............Here are some queries...............") # q1 = model_infer.query(variables=['class'], evidence={'sex':0}) # print(q1['class']) ################################################################################# ##### Evalutating the model by predicting ################################################################################# y_true = data_test['class'].copy() data_test.drop('class', axis=1, inplace=True) y_pred = model.predict(data_test) #print(y_pred) accuracy = accuracy_score(y_pred, y_true) print("\n\n\n\n\n\nAccuracy = ", accuracy) print("\nEnd of code \n...o0o.... F**k you Julien ...o0o...") print("\nRuntime: ") end = time.time() print(round(end - start),"seconds")