Python PickledClassifier 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: spambayes.storage

클래스/타입: PickledClassifier

hotexamples.com에서의 예제들: 10

Python PickledClassifier - 10개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 spambayes.storage.PickledClassifier에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

PickledClassifier(4)

learn(3)

store(3)

_wordinfoget(2)

_wordinfokeys(2)

chi2_spamprob(1)

예제 #1

파일 보기

 def test_merge_to_pickle(self):
     bayes = PickledClassifier(TEMP_PICKLE_NAME)
     bayes.learn(tokenize(spam1), True)
     bayes.learn(tokenize(good1), False)
     bayes.store()
     nham, nspam = 3,4
     temp = open(TEMP_CSV_NAME, "wb")
     temp.write("%d,%d\n" % (nham, nspam))
     csv_data = {"this":(2,1), "is":(0,1), "a":(3,4), 'test':(1,1),
                 "of":(1,0), "the":(1,2), "import":(3,1)}
     for word, (ham, spam) in csv_data.items():
         temp.write("%s,%s,%s\n" % (word, ham, spam))
     temp.close()
     sb_dbexpimp.runImport(TEMP_PICKLE_NAME, "pickle", False,
                           TEMP_CSV_NAME)
     bayes2 = open_storage(TEMP_PICKLE_NAME, "pickle")
     self.assertEqual(bayes2.nham, nham + bayes.nham)
     self.assertEqual(bayes2.nspam, nspam + bayes.nspam)
     words = bayes._wordinfokeys()
     words.extend(csv_data.keys())
     for word in words:
         word = sb_dbexpimp.uquote(word)
         self.assert_(word in bayes2._wordinfokeys())
         h, s = csv_data.get(word, (0,0))
         wi = bayes._wordinfoget(word)
         if wi:
             h += wi.hamcount
             s += wi.spamcount
         wi2 = bayes2._wordinfoget(word)
         self.assertEqual(h, wi2.hamcount)
         self.assertEqual(s, wi2.spamcount)

예제 #2

파일 보기

파일: email_gateway.py 프로젝트: mcrute/tiny-webapps

def looks_like_spam(message, config, section):
    log.info("Checking message for spam...")
    log.debug(message)
    pickle_filename = config.get(section, 'spam.pickle_file')
    min_spam_prob = config.getfloat(section, 'spam.min_spam_prob') or 0.90

    log.debug("Loading pickle from %s", pickle_filename)
    bayes = PickledClassifier(pickle_filename)

    spamprob = bayes.chi2_spamprob(message)

    if spamprob >= min_spam_prob:
        log.debug("spamprob %s >= %s, probably spam", spamprob, min_spam_prob)
        return True

    log.debug("spamprob %s <= %s, probably not spam", spamprob, min_spam_prob)
    return False

예제 #3

파일 보기

파일: email_gateway.py 프로젝트: fpip/email-gateway

def looks_like_spam(message, config, section):
    log.info("Checking message for spam...")
    log.debug(message)
    pickle_filename = config.get(section, 'spam.pickle_file')
    min_spam_prob = config.getfloat(section, 'spam.min_spam_prob') or 0.90

    log.debug("Loading pickle from %s", pickle_filename)
    bayes = PickledClassifier(pickle_filename)

    spamprob = bayes.chi2_spamprob(message)

    if spamprob >= min_spam_prob:
        log.debug("spamprob %s >= %s, probably spam", spamprob, min_spam_prob)
        return True

    log.debug("spamprob %s <= %s, probably not spam", spamprob, min_spam_prob)
    return False

예제 #4

파일 보기

 def test_merge_to_pickle(self):
     # Create a pickled classifier to merge with.
     bayes = PickledClassifier(TEMP_PICKLE_NAME)
     # Stuff some messages in it so it's not empty.
     bayes.learn(tokenize(spam1), True)
     bayes.learn(tokenize(good1), False)
     # Save.
     bayes.store()
     # Create a CSV file to import.
     nham, nspam = 3, 4
     temp = open(TEMP_CSV_NAME, "wb")
     temp.write("%d,%d\n" % (nham, nspam))
     csv_data = {
         "this": (2, 1),
         "is": (0, 1),
         "a": (3, 4),
         'test': (1, 1),
         "of": (1, 0),
         "the": (1, 2),
         "import": (3, 1)
     }
     for word, (ham, spam) in csv_data.items():
         temp.write("%s,%s,%s\n" % (word, ham, spam))
     temp.close()
     sb_dbexpimp.runImport(TEMP_PICKLE_NAME, "pickle", False, TEMP_CSV_NAME)
     # Open the converted file and verify that it has all the data from
     # the CSV file (and by opening it, that it is a valid pickle),
     # and the data from the original pickle.
     bayes2 = open_storage(TEMP_PICKLE_NAME, "pickle")
     self.assertEqual(bayes2.nham, nham + bayes.nham)
     self.assertEqual(bayes2.nspam, nspam + bayes.nspam)
     words = bayes._wordinfokeys()
     words.extend(csv_data.keys())
     for word in words:
         word = sb_dbexpimp.uquote(word)
         self.assert_(word in bayes2._wordinfokeys())
         h, s = csv_data.get(word, (0, 0))
         wi = bayes._wordinfoget(word)
         if wi:
             h += wi.hamcount
             s += wi.spamcount
         wi2 = bayes2._wordinfoget(word)
         self.assertEqual(h, wi2.hamcount)
         self.assertEqual(s, wi2.spamcount)

예제 #5

파일 보기

 def test_pickle_export(self):
     bayes = PickledClassifier(TEMP_PICKLE_NAME)
     bayes.learn(tokenize(spam1), True)
     bayes.learn(tokenize(good1), False)
     bayes.store()
     sb_dbexpimp.runExport(TEMP_PICKLE_NAME, "pickle", TEMP_CSV_NAME)
     fp = open(TEMP_CSV_NAME, "rb")
     reader = sb_dbexpimp.csv.reader(fp)
     (nham, nspam) = reader.next()
     self.assertEqual(int(nham), bayes.nham)
     self.assertEqual(int(nspam), bayes.nspam)
     for (word, hamcount, spamcount) in reader:
         word = sb_dbexpimp.uunquote(word)
         self.assert_(word in bayes._wordinfokeys())
         wi = bayes._wordinfoget(word)
         self.assertEqual(int(hamcount), wi.hamcount)
         self.assertEqual(int(spamcount), wi.spamcount)

예제 #6

파일 보기

 def test_pickle_export(self):
     # Create a pickled classifier to export.
     bayes = PickledClassifier(TEMP_PICKLE_NAME)
     # Stuff some messages in it so it's not empty.
     bayes.learn(tokenize(spam1), True)
     bayes.learn(tokenize(good1), False)
     # Save.
     bayes.store()
     # Export.
     sb_dbexpimp.runExport(TEMP_PICKLE_NAME, "pickle", TEMP_CSV_NAME)
     # Verify that the CSV holds all the original data (and, by using
     # the CSV module to open it, that it is valid CSV data).
     fp = open(TEMP_CSV_NAME, "rb")
     reader = sb_dbexpimp.csv.reader(fp)
     (nham, nspam) = reader.next()
     self.assertEqual(int(nham), bayes.nham)
     self.assertEqual(int(nspam), bayes.nspam)
     for (word, hamcount, spamcount) in reader:
         word = sb_dbexpimp.uunquote(word)
         self.assert_(word in bayes._wordinfokeys())
         wi = bayes._wordinfoget(word)
         self.assertEqual(int(hamcount), wi.hamcount)
         self.assertEqual(int(spamcount), wi.spamcount)

예제 #7

파일 보기

파일: test_sb_dbexpimp.py 프로젝트: bloggse/spambayes-lite

 def test_merge_to_pickle(self):
     # Create a pickled classifier to merge with.
     bayes = PickledClassifier(TEMP_PICKLE_NAME)
     # Stuff some messages in it so it's not empty.
     bayes.learn(tokenize(spam1), True)
     bayes.learn(tokenize(good1), False)
     # Save.
     bayes.store()
     # Create a CSV file to import.
     nham, nspam = 3,4
     temp = open(TEMP_CSV_NAME, "wb")
     temp.write("%d,%d\n" % (nham, nspam))
     csv_data = {"this":(2,1), "is":(0,1), "a":(3,4), 'test':(1,1),
                 "of":(1,0), "the":(1,2), "import":(3,1)}
     for word, (ham, spam) in csv_data.items():
         temp.write("%s,%s,%s\n" % (word, ham, spam))
     temp.close()
     sb_dbexpimp.runImport(TEMP_PICKLE_NAME, "pickle", False,
                           TEMP_CSV_NAME)
     # Open the converted file and verify that it has all the data from
     # the CSV file (and by opening it, that it is a valid pickle),
     # and the data from the original pickle.
     bayes2 = open_storage(TEMP_PICKLE_NAME, "pickle")
     self.assertEqual(bayes2.nham, nham + bayes.nham)
     self.assertEqual(bayes2.nspam, nspam + bayes.nspam)
     words = bayes._wordinfokeys()
     words.extend(csv_data.keys())
     for word in words:
         word = sb_dbexpimp.uquote(word)
         self.assert_(word in bayes2._wordinfokeys())
         h, s = csv_data.get(word, (0,0))
         wi = bayes._wordinfoget(word)
         if wi:
             h += wi.hamcount
             s += wi.spamcount
         wi2 = bayes2._wordinfoget(word)
         self.assertEqual(h, wi2.hamcount)
         self.assertEqual(s, wi2.spamcount)

예제 #8

파일 보기

파일: test_sb_dbexpimp.py 프로젝트: bloggse/spambayes-lite

 def test_pickle_export(self):
     # Create a pickled classifier to export.
     bayes = PickledClassifier(TEMP_PICKLE_NAME)
     # Stuff some messages in it so it's not empty.
     bayes.learn(tokenize(spam1), True)
     bayes.learn(tokenize(good1), False)
     # Save.
     bayes.store()
     # Export.
     sb_dbexpimp.runExport(TEMP_PICKLE_NAME, "pickle", TEMP_CSV_NAME)
     # Verify that the CSV holds all the original data (and, by using
     # the CSV module to open it, that it is valid CSV data).
     fp = open(TEMP_CSV_NAME, "rb")
     reader = sb_dbexpimp.csv.reader(fp)
     (nham, nspam) = reader.next()
     self.assertEqual(int(nham), bayes.nham)
     self.assertEqual(int(nspam), bayes.nspam)
     for (word, hamcount, spamcount) in reader:
         word = sb_dbexpimp.uunquote(word)
         self.assert_(word in bayes._wordinfokeys())
         wi = bayes._wordinfoget(word)
         self.assertEqual(int(hamcount), wi.hamcount)
         self.assertEqual(int(spamcount), wi.spamcount)

예제 #9

파일 보기

파일: train_spam.py 프로젝트: mcrute/tiny-webapps

def main():
    pickle_filename = sys.argv[-1]
    bayes = PickledClassifier(pickle_filename)
    message = sys.stdin.readlines()
    bayes.learn(message, True)
    bayes.store()

예제 #10

파일 보기

파일: train_spam.py 프로젝트: fpip/email-gateway

def main():
    pickle_filename = sys.argv[-1]
    bayes = PickledClassifier(pickle_filename)
    message = sys.stdin.readlines()
    bayes.learn(message, True)
    bayes.store()