Example #1
0
    def test_fs_tsv_loader_with_nlp(self):
        sample_words = "sample_data/email/email.tsv"
        self.tlog("loading words => " + sample_words)

        nlp_eng = nlp("eng")
        wordmat_train, wordlabel_train, voca, wordmat_test, wordlabel_test \
          = fs.tsv_loader_with_nlp(sample_words, 0.1, nlp_eng)

        self.tlog('email data voca size : ' + str(len(voca)))
        self.tlog('voca sample : ' + str(voca[:5]))
Example #2
0
    def test_fs_tsv_loader_with_nlp(self):
        sample_words = "sample_data/email/email.tsv"
        self.tlog("loading words => " + sample_words)

        nlp_eng = nlp("eng")
        wordmat_train, wordlabel_train, voca, wordmat_test, wordlabel_test \
          = fs.tsv_loader_with_nlp(sample_words, 0.1, nlp_eng)

        self.tlog('email data voca size : ' + str(len(voca)))
        self.tlog('voca sample : ' + str(voca[:5]))
Example #3
0
    def test_process(self):

        nlp_eng = nlp("eng")

        email_data_file = "sample_data/email/email.tsv"
        emailmat_train, emaillabel_train, voca, emailmat_test, emaillabel_test = fs.tsv_loader_with_nlp(
            email_data_file, 0.3, nlp_eng
        )
        self.tlog(voca)

        email_nbayes = NaiveBayes(emailmat_train, emaillabel_train)
        email_nbayes.fit()

        error_rate = autotest.eval_predict(email_nbayes, emailmat_test, emaillabel_test, self.logging)
        self.tlog("spam-mail predict (with NaiveBayes) error rate : " + str(error_rate))

        assert error_rate <= 0.1
Example #4
0
    def test_process(self):

        nlp_eng = nlp("eng_lower")

        email_data_file = "sample_data/email/email.tsv"
        emailmat_train, emaillabel_train, voca, emailmat_test, emaillabel_test \
                = fs.tsv_loader_with_nlp(email_data_file, 0.4, nlp_eng)
        self.tlog(voca)

        email_nbayes = NaiveBayes(emailmat_train, emaillabel_train)
        email_nbayes.fit()

        error_rate = autotest.eval_predict(email_nbayes, emailmat_test,
                                           emaillabel_test, self.logging)
        self.tlog("spam-mail predict (with NaiveBayes) error rate : " +
                  str(error_rate))

        assert error_rate <= 0.1