def test_fs_tsv_loader_with_nlp(self): sample_words = "sample_data/email/email.tsv" self.tlog("loading words => " + sample_words) nlp_eng = nlp("eng") wordmat_train, wordlabel_train, voca, wordmat_test, wordlabel_test \ = fs.tsv_loader_with_nlp(sample_words, 0.1, nlp_eng) self.tlog('email data voca size : ' + str(len(voca))) self.tlog('voca sample : ' + str(voca[:5]))
def test_process(self): nlp_eng = nlp("eng") email_data_file = "sample_data/email/email.tsv" emailmat_train, emaillabel_train, voca, emailmat_test, emaillabel_test = fs.tsv_loader_with_nlp( email_data_file, 0.3, nlp_eng ) self.tlog(voca) email_nbayes = NaiveBayes(emailmat_train, emaillabel_train) email_nbayes.fit() error_rate = autotest.eval_predict(email_nbayes, emailmat_test, emaillabel_test, self.logging) self.tlog("spam-mail predict (with NaiveBayes) error rate : " + str(error_rate)) assert error_rate <= 0.1
def test_process(self): nlp_eng = nlp("eng_lower") email_data_file = "sample_data/email/email.tsv" emailmat_train, emaillabel_train, voca, emailmat_test, emaillabel_test \ = fs.tsv_loader_with_nlp(email_data_file, 0.4, nlp_eng) self.tlog(voca) email_nbayes = NaiveBayes(emailmat_train, emaillabel_train) email_nbayes.fit() error_rate = autotest.eval_predict(email_nbayes, emailmat_test, emaillabel_test, self.logging) self.tlog("spam-mail predict (with NaiveBayes) error rate : " + str(error_rate)) assert error_rate <= 0.1