def cross_validate(dataset, scramBool): global num_classes backup_data = copy.copy(dataset) test_results = [] stats = [] full_set_stats = [] if scramBool: dataset = ten_percent_scrambler(dataset) backup_data = splitter(backup_data) for i in range(10): # iterates through passing each of the 10 subsets of our now scrambled and split dataset nb.freqTable = [] to_learn = copy.copy(backup_data) # Grabs a fresh copy of the dataset each time, since the to_learn list pops deletes a tenth of the data in each loop to_test = make_test_set(to_learn.pop(i)) to_learn = flatten_list(to_learn) # print('tester') # array_printer_2d(to_test) # print('learner') nb.train(to_learn) #array_printer_3d(nb.freqTable) # array_printer_2d(to_learn) to_test = nb.classify(to_test) test_results.append(to_test) # print("classified data") # array_printer_2d(to_test) stats.append(analyze(backup_data[i], to_test, num_classes)) # print(len(to_learn)) # learn(temp) # this will call the learner algo # test_results.append(test(to_test, dataset[i])) # This tests our model with the current tenth of the dataset #array_printer_2d(stats) full_set_stats = analyze(flatten_list(backup_data), flatten_list(test_results), num_classes) # Performs analysis on the entire classified set compared to the original data array_printer_2d(full_set_stats)
def get_label(): """ Retrieves link from user and runs classifier on tweet. Return values _: jsonified response containing label of either 'troll' or 'nontroll' """ tweet_url = request.form.get("tweet") tweet_id = get_id(tweet_url) status = api.GetStatus(tweet_id) label = classify(status.text) return jsonify({"label": label})
def main(): # handle data dataset, labels = nb.load_data(trainning_dir) vocab_list, vocab_vec_list = nb.handle_data(dataset) # train model p0_vec, p1_vec, pa = nb.train_classifier(vocab_vec_list, labels) # test test_dataset, test_labels = nb.load_data(testing_dir) test_vocab_vec_list = [] for line in test_dataset: test_vocab_vec_list.append(nb.get_vocab_vec(line, vocab_list)) res_vec = nb.classify(np.array(test_vocab_vec_list[0]), np.array(p0_vec), np.array(p1_vec), pa) if res_vec: print("侮辱类") else: print("非侮辱类")
# train data path_train = './spam_train.csv' sms_words, class_lables = common_utils.read_sms(path_train) vocabulary_list = common_utils.create_vocabulary_list(sms_words) train_marked_words = common_utils.set_of_words_list_to_vector(vocabulary_list, sms_words) train_marked_words = np.array(train_marked_words) p_words_spamicity, p_words_healthy, p_spam = naive_bayes.training(train_marked_words, class_lables) # classify test data path = './spam_data.csv' sms_words, class_lables = common_utils.read_sms(path) sms_list = open(path, "r").readlines() for i in range(len(sms_words)): smsType = naive_bayes.classify(vocabulary_list, p_words_spamicity, p_words_healthy, p_spam, sms_words[i]) if smsType == 0: row = "ham\t" + (sms_list[i].split('\t')[1]) result_list.append(row) else: row = "spam\t" + (sms_list[i].split('\t')[1]) result_list.append(row) common_utils.write_file("result", result_list) # quality control path_full_spam = './spam_full.csv' quality_control = naive_bayes.quality_control(path_full_spam, 1000) common_utils.write_file("quality_control", quality_control)
def predict(self,x): return naive_bayes.classify(self.nbayes_model, self._map_x(x))
import argparse import mnist import naive_bayes as nb import numpy as np parser = argparse.ArgumentParser(description='It is a program for ML HW#2.') parser.add_argument('train_img_path', help='file path of train img', type=str) parser.add_argument('train_lbl_path', help='file path of train lbl', type=str) parser.add_argument('test_img_path', help='file path of test img', type=str) parser.add_argument('test_lbl_path', help='file path of test lbl', type=str) parser.add_argument('mode', help='toggle option', type=int) args = parser.parse_args() print('train_img_path: {}'.format(args.train_img_path)) print('train_lbl_path: {}'.format(args.train_lbl_path)) print('test_img_path: {}'.format(args.test_img_path)) print('test_img_path: {}'.format(args.test_lbl_path)) train_img, train_lbl = mnist.read(args.train_img_path, args.train_lbl_path) test_img, test_lbl = mnist.read(args.test_img_path, args.test_lbl_path) print(train_img.shape) nb.classify(train_img, train_lbl, test_img, test_lbl, args.mode)
def predict(self, x): return naive_bayes.classify(self.nbayes_model, self._map_x(x))