def get_confusion_matrix(): # db = DoubleBayes() # predicted, actual = db.generate_predictions(weighted=True) nb = Naive_Bayes() predicted, actual = nb.generate_predictions() cnf_matrix = confusion_matrix(actual, predicted) return cnf_matrix
def post(): if request.is_json: data = request.get_json() else: return jsonify({'request': request.get_data()}) assert (len(data) == 3) try: age = int(data['age']) condition_list = list(data['conditions']) state = str(data['state']) except KeyError: return jsonify(dict()) nb = Naive_Bayes(age, state, condition_list) return jsonify({'probability': nb.get_probability()})
def train(self, training_data): """ Data should be nx(m+1) numpy matrix where n is the number of examples and m is the number of features (recall that the first element of the vector is the label). I recommend implementing the specific algorithms in a seperate module and then determining which method to call based on classifier_type. E.g. if you had a module called neural_nets: if self.classifier_type == 'neural_net': import neural_nets neural_nets.train_neural_net(self.params, training_data) Note that your training algorithms should be modifying the parameters so make sure that your methods are actually modifying self.params You should print the accuracy, precision, and recall on the training data. """ if self.classifier_type == 'neural_network': #change num_input, num_output based upon the data self.nn = Neural_Network("neural_network",weights = [], num_input=self.params['num_input'], num_hidden=1000, num_output=self.params['num_output'], alt_weight=self.params['one']=='1', momentum=self.params['two']=='1') self.nn.train(training_data) elif self.classifier_type == 'naive_bayes': self.nb = Naive_Bayes("naive_bayes") self.nb.train(training_data) elif self.classifier_type =='decision_tree': self.dt = Decision_Tree("decision_tree", pruning=self.params['one']=='1', info_gain_ratio=self.params['two']=='1') self.dt.train(training_data)
from decision_tree import Decision_Tree from naive_bayes import Naive_Bayes import load_data as ld import pdb # nb = Decision_Tree("decision_tree", pruning=False, info_gain_ratio=True) nb = Naive_Bayes("naive_bayes") # monks3 = ld.load_monks(1) # monks3 = ld.load_monks(2) # monks3 = ld.load_monks(3) # monks3 = ld.load_iris(.75) monks3 = ld.load_congress_data(0.75) # nb.train(monks3[0]) """tot, hit = 0, 0 for person in monks3[1]: predict = nb.predict(person) if predict == person[0]: hit += 1 tot += 1""" # classify = nb.train(monks3[0]) nb.train(monks3[0]) # nb.train(monks3[0]) # nb.train(monks3[0]) # nb.train(monks3[0]) # pdb.set_trace() # nb.test(monks3[1])
hmm.fit(data_train) print(f"Duration of training: {time.time() - start_time}") # evaluation hmm # ------------------------------------------------------------------------- # plot confusion matrix, calculate precision, recall, f1-score hmm.evaluate(data_test) # show misclassifications features_test, labels_test = separate_labels_from_features(data_test) predictions = hmm.predict(features_test) show_misclassifications(data_test, predictions) elif model_type == "NB": # fit naive bayes model # ------------------------------------------------------------------------- nb = Naive_Bayes() data_train_featurized = feature_maker.get_pos_features_nltk( data_train ) if not load_entities else feature_maker.get_ner_features_nltk(data_train) data_train_featurized = flatten(data_train_featurized) start_time = time.time() nb.fit_nltk(data_train_featurized) print(f"Duration of training: {time.time() - start_time}") # evaluation naive bayes # ------------------------------------------------------------------------- data_test_featurized = feature_maker.get_pos_features_nltk( data_test ) if not load_entities else feature_maker.get_ner_features_nltk(data_test)
class Classifier: def __init__(self, classifier_type, **kwargs): """ Initializer. Classifier_type should be a string which refers to the specific algorithm the current classifier is using. Use keyword arguments to store parameters specific to the algorithm being used. E.g. if you were making a neural net with 30 input nodes, hidden layer with 10 units, and 3 output nodes your initalization might look something like this: neural_net = Classifier(weights = [], num_input=30, num_hidden=10, num_output=3) Here I have the weight matrices being stored in a list called weights (initially empty). """ self.classifier_type = classifier_type self.params = kwargs """ The kwargs you inputted just becomes a dictionary, so we can save that dictionary to be used in other methods. """ def train(self, training_data): """ Data should be nx(m+1) numpy matrix where n is the number of examples and m is the number of features (recall that the first element of the vector is the label). I recommend implementing the specific algorithms in a seperate module and then determining which method to call based on classifier_type. E.g. if you had a module called neural_nets: if self.classifier_type == 'neural_net': import neural_nets neural_nets.train_neural_net(self.params, training_data) Note that your training algorithms should be modifying the parameters so make sure that your methods are actually modifying self.params You should print the accuracy, precision, and recall on the training data. """ if self.classifier_type == 'neural_network': #change num_input, num_output based upon the data self.nn = Neural_Network("neural_network",weights = [], num_input=self.params['num_input'], num_hidden=1000, num_output=self.params['num_output'], alt_weight=self.params['one']=='1', momentum=self.params['two']=='1') self.nn.train(training_data) elif self.classifier_type == 'naive_bayes': self.nb = Naive_Bayes("naive_bayes") self.nb.train(training_data) elif self.classifier_type =='decision_tree': self.dt = Decision_Tree("decision_tree", pruning=self.params['one']=='1', info_gain_ratio=self.params['two']=='1') self.dt.train(training_data) def predict(self, data): """ Predict class of a single data vector Data should be 1x(m+1) numpy matrix where m is the number of features (recall that the first element of the vector is the label). I recommend implementing the specific algorithms in a seperate module and then determining which method to call based on classifier_type. This method should return the predicted label. """ def test(self, test_data): """ Data should be nx(m+1) numpy matrix where n is the number of examples and m is the number of features (recall that the first element of the vector is the label). You should print the accuracy, precision, and recall on the test data. """ #pdb.set_trace() #Accuracy, Recall, and Precision relevant_and_retrieved, relevant, retrieved, total, hit = 0, 0, 0, 0, 0 for person in test_data: predict = 0 if self.classifier_type == 'neural_network': predict = self.nn.predict(person) elif self.classifier_type == 'naive_bayes': predict = self.nb.predict(person) elif self.classifier_type == 'decision_tree': predict = self.dt.predict(person) if predict == person[0]: if predict == 0: relevant_and_retrieved += 1 hit += 1 if person[0] == 0: relevant += 1 if predict == 0: retrieved += 1 total += 1 accuracy = hit/float(total) recall = relevant_and_retrieved/float(relevant) precision = relevant_and_retrieved/float(retrieved) print "Accuracy: ", accuracy print "Precision ", precision print "Recall: " , recall
def train(instances, algorithm, high_idx, learn_rate, iterate, peg_lambda, k_val, T, clus_lambda, K, clus_iter): if (algorithm == "perceptron"): classifier = Perceptron(instances, high_idx, learn_rate) #iterate the training for i in range(iterate): classifier.train(instances) return classifier elif (algorithm == "averaged_perceptron"): classifier2 = AveragePerceptron(instances, high_idx, learn_rate) for i in range(iterate): classifier2.train(instances) return classifier2 elif (algorithm == "pegasos"): classifier3 = Pegasos(instances, high_idx, peg_lambda) for i in range(iterate): classifier3.train(instances) return classifier3 elif (algorithm == "margin_perceptron"): classifier4 = PerceptronMargin(instances, high_idx, learn_rate, iterate) for i in range(iterate): classifier4.train(instances) return classifier4 elif (algorithm == "knn"): classifier5 = KNN(instances, k_val, high_idx) for i in range(iterate): classifier5.train(instances) return classifier5 elif (algorithm == "distance_knn"): classifier6 = Distance_KNN(instances, k_val, high_idx) for i in range(iterate): classifier6.train(instances) return classifier6 elif (algorithm == "adaboost"): classifier7 = Adaboost(instances, T, high_idx) for i in range(iterate): classifier7.train(instances) return classifier7 elif (algorithm == "lambda_means"): classifier8 = Lambda_Means2(instances, high_idx, clus_lambda, clus_iter) for i in range(iterate): #print "##################Training", i+1, "out of", iterate,"###############" classifier8.train(instances) return classifier8 elif (algorithm == "nb_clustering"): classifier9 = Naive_Bayes(instances, high_idx, K) for i in range(iterate): #print "##################Training", i+1, "out of", iterate,"###############" classifier9.train(instances) return classifier9 elif (algorithm == "mc_perceptron"): classifier10 = MC_Perceptron(instances, high_idx) for i in range(iterate): classifier10.train(instances) return classifier10 else: return None
from naive_bayes import Naive_Bayes import load_data as ld import pdb nb = Naive_Bayes("naive_bayes") congress = ld.load_congress_data(0.75) # iris = ld.load_iris(.75) classify = nb.train(congress[0]) # nb.train(iris[0]) # pdb.set_trace() # nb.test(congress[1]) tot, hit = 0, 0 for person in congress[1]: predict = nb.predict(person) if predict == person[0]: hit += 1 tot += 1 print hit, tot, hit / float(tot)