imputer_object = imp(missing_values='NaN', strategy='mean', axis=0) # fitting the object on our data -- we do this so that we can save the # fit for our new data. imputer_object.fit(explanatory_df) explanatory_df = imputer_object.transform(explanatory_df) ########################## ### Naive Bayes Model ### ########################## ### creating naive bayes classifier ### naive_bayes_classifier = nb() accuracy_scores = cv(naive_bayes_classifier, explanatory_df, response_series, cv=10, scoring='accuracy') print accuracy_scores.mean() #looks like on average the model is 60% accurate, not very high ### calculating accuracy metrics for comparison ### ## ACCURACY METRIC 1: Cohen's Kappa ## mean_accuracy_score = accuracy_scores.mean() largest_class_percent_of_total = response_series.value_counts(normalize = True)[0] largest_class_percent_of_total #the largest class percent total is 90%, thus the model will correctly #predict 90% of the time that someone WILL NOT be in the hall of fame
def naive_bayes(data, classifiers): bayes = nb() return bayes.fit(data, classifiers)