def validate(self): """ This function makes predictions for the naive bayes algorithm """ print("Testing Naive Bayes Accuracy...") if not self.model: print("Please call the function train first!") return # Calculate predictions by calculating the probability of each # C=1 and C=0 in each sample(row) for index, row in self.test.iterrows(): c0_product = self.model.prob_0 c1_product = self.model.prob_1 for feature_index, feature in enumerate(row): if feature == 0: c0_product *= self.model.feature_probs_f0_given0[ feature_index] c1_product *= self.model.feature_probs_f0_given1[ feature_index] else: c0_product *= self.model.feature_probs_f1_given0[ feature_index] c1_product *= self.model.feature_probs_f1_given1[ feature_index] if c0_product > c1_product: self.predictions.append(0) else: self.predictions.append(1) return get_num_similarities(self.predictions, self.test_classes) / len( self.test_classes) * 100
def test_performance(self, data, *hypothesis): """ This function measures performance using the validation function :param data: The data to test on :param hypothesis: The model to use :return: The performance as a percentage correct """ predictions = self.validate(data, *hypothesis) return get_num_similarities(self.valid_classes, predictions) / len( self.valid_classes)
def make_predictions(self): predictions = list() for i, row in self.test.iterrows(): prediction = self.predict(row) predictions.append(prediction) print("Predicted Classes = ") print(predictions) print("Expected Classes = ") print(list(self.test_classes)) return get_num_similarities(predictions, self.test_classes) / len( self.test_classes) * 100
def cluster_and_classify(optimized_feature_set, x_test, x_train): """This function run the clustering and classification algorithms and tests clusters with the silhouette coffecient""" # Use k-means to cluster data print("Running K Means on Glass data set with optimized feature set...") km = KMeansClustering(x_train[optimized_feature_set], 2) labels = km.run() # Train the training data with the cluster labels using Naive Bayes print("Training with Naive Bayes with k-means labels...") model = nb.learn(pd.Series(labels), x_train[optimized_feature_set]) # Test the naive bayes classifier on test data print("Testing Naive Bayes Classifier with cluster labels") predictions = nb.test(x_test[optimized_feature_set], *model) print("Naive Bayes Classifier Performance = " + str(get_num_similarities(labels, predictions) / len(labels) * 100)) # Find the silhouette coefficient of the clusters print("Calculating the silhouette coefficient...") sc = calculate_silhouette_coefficient(x_train[optimized_feature_set], labels) print("Silhouette Coefficient = " + str(sc))
def validate(self): """ This function determines accuracy of model using the test data set and applying the linear function using the weights """ print("Testing...") predictions = [] for x_index, x_values in self.test.iterrows(): # Calculate linear value by adding up x values and their weights o = 0 for j, value in enumerate(self.features): o += float(x_values[j]) * self.weights[j] y = self.sigmoid(o) if y > 0.5: predictions.append(1) else: predictions.append(0) return get_num_similarities(predictions, self.test_classes) / len(self.test_classes) * 100