Esempio n. 1
0
def cross_validate(dataset, scramBool):
    global num_classes
    backup_data = copy.copy(dataset)
    test_results = []
    stats = []
    full_set_stats = []
    if scramBool:
        dataset = ten_percent_scrambler(dataset)
    backup_data = splitter(backup_data)
    for i in range(10): # iterates through passing each of the 10 subsets of our now scrambled and split dataset
        nb.freqTable = []
        to_learn = copy.copy(backup_data) # Grabs a fresh copy of the dataset each time, since the to_learn list pops deletes a tenth of the data in each loop
        to_test = make_test_set(to_learn.pop(i))
        to_learn = flatten_list(to_learn)
        # print('tester')
        # array_printer_2d(to_test)
        # print('learner')
        nb.train(to_learn)
        #array_printer_3d(nb.freqTable)
        # array_printer_2d(to_learn)
        to_test = nb.classify(to_test)
        test_results.append(to_test)
        # print("classified data")
        # array_printer_2d(to_test)
        stats.append(analyze(backup_data[i], to_test, num_classes))
        # print(len(to_learn))
        # learn(temp) # this will call the learner algo
        # test_results.append(test(to_test, dataset[i])) # This tests our model with the current tenth of the dataset
    #array_printer_2d(stats)
    full_set_stats = analyze(flatten_list(backup_data), flatten_list(test_results), num_classes) # Performs analysis on the entire classified set compared to the original data
    array_printer_2d(full_set_stats)
Esempio n. 2
0
def get_label():
    """
    Retrieves link from user and runs classifier on tweet.

    Return values
        _: jsonified response containing label of either 'troll' or 'nontroll'
    """

    tweet_url = request.form.get("tweet")
    tweet_id = get_id(tweet_url)

    status = api.GetStatus(tweet_id)

    label = classify(status.text)

    return jsonify({"label": label})
Esempio n. 3
0
def main():
    # handle data
    dataset, labels = nb.load_data(trainning_dir)
    vocab_list, vocab_vec_list = nb.handle_data(dataset)
    # train model
    p0_vec, p1_vec, pa = nb.train_classifier(vocab_vec_list, labels)
    # test
    test_dataset, test_labels = nb.load_data(testing_dir)
    test_vocab_vec_list = []
    for line in test_dataset:
        test_vocab_vec_list.append(nb.get_vocab_vec(line, vocab_list))
    res_vec = nb.classify(np.array(test_vocab_vec_list[0]), np.array(p0_vec),
                          np.array(p1_vec), pa)
    if res_vec:
        print("侮辱类")
    else:
        print("非侮辱类")
Esempio n. 4
0
# train data
path_train = './spam_train.csv'
sms_words, class_lables = common_utils.read_sms(path_train)
vocabulary_list = common_utils.create_vocabulary_list(sms_words)
train_marked_words = common_utils.set_of_words_list_to_vector(vocabulary_list, sms_words)
train_marked_words = np.array(train_marked_words)
p_words_spamicity, p_words_healthy, p_spam = naive_bayes.training(train_marked_words, class_lables)

# classify test data
path = './spam_data.csv'
sms_words, class_lables = common_utils.read_sms(path)
sms_list = open(path, "r").readlines()

for i in range(len(sms_words)):
    smsType = naive_bayes.classify(vocabulary_list, p_words_spamicity,
                                p_words_healthy, p_spam, sms_words[i])
    if smsType == 0:
        row = "ham\t" + (sms_list[i].split('\t')[1])
        result_list.append(row)
    else:
        row = "spam\t" + (sms_list[i].split('\t')[1])
        result_list.append(row)

common_utils.write_file("result", result_list)

# quality control
path_full_spam = './spam_full.csv'
quality_control = naive_bayes.quality_control(path_full_spam, 1000)

common_utils.write_file("quality_control", quality_control)
Esempio n. 5
0
 def predict(self,x):
     return naive_bayes.classify(self.nbayes_model, self._map_x(x))
Esempio n. 6
0
import argparse
import mnist
import naive_bayes as nb
import numpy as np

parser = argparse.ArgumentParser(description='It is a program for ML HW#2.')
parser.add_argument('train_img_path', help='file path of train img', type=str)
parser.add_argument('train_lbl_path', help='file path of train lbl', type=str)
parser.add_argument('test_img_path', help='file path of test img', type=str)
parser.add_argument('test_lbl_path', help='file path of test lbl', type=str)
parser.add_argument('mode', help='toggle option', type=int)
args = parser.parse_args()
print('train_img_path: {}'.format(args.train_img_path))
print('train_lbl_path: {}'.format(args.train_lbl_path))
print('test_img_path: {}'.format(args.test_img_path))
print('test_img_path: {}'.format(args.test_lbl_path))

train_img, train_lbl = mnist.read(args.train_img_path, args.train_lbl_path)
test_img, test_lbl = mnist.read(args.test_img_path, args.test_lbl_path)
print(train_img.shape)

nb.classify(train_img, train_lbl, test_img, test_lbl, args.mode)
 def predict(self, x):
     return naive_bayes.classify(self.nbayes_model, self._map_x(x))