def setUp(self): _FileCorpusBaseTest.setUp(self) self.directory = 'fctesthamcorpus' self.cache_size = 100 self.factory = FileMessageFactory() self.stuff_corpus() self.corpus = FileCorpus(self.factory, self.directory, '?', self.cache_size)
def test_filter(self): self.assertEqual(len(self.corpus.msgs), 3) self.corpus = FileCorpus(self.factory, self.directory, '*', self.cache_size) self.assertEqual(len(self.corpus.msgs), 4)
class FileCorpusTest(_FileCorpusBaseTest): def setUp(self): _FileCorpusBaseTest.setUp(self) self.directory = 'fctesthamcorpus' self.cache_size = 100 self.factory = FileMessageFactory() self.stuff_corpus() self.corpus = FileCorpus(self.factory, self.directory, '?', self.cache_size) def stuff_corpus(self): """Put messages in the corpus""" i = 0 for content in [good1, spam1, malformed1]: self.msg = self.factory.create(str(i), self.directory, content) self.msg.store() i += 1 msg = self.factory.create("10", self.directory, good1) msg.store() def test___init__(self): self.assertEqual(self.corpus.directory, self.directory) self.assertEqual(self.corpus.filter, '?') self.assertEqual(self.corpus.cacheSize, self.cache_size) def test_filter(self): self.assertEqual(len(self.corpus.msgs), 3) self.corpus = FileCorpus(self.factory, self.directory, '*', self.cache_size) self.assertEqual(len(self.corpus.msgs), 4) def test_makeMessage_no_content(self): key = "testmake" self.corpus.makeMessage(key) def test_makeMessage_with_content(self): key = "testmake" content = spam1 msg = self.corpus.makeMessage(key, content) self.assertEqual(msg.key(), key) self.assertEqual(msg.as_string(), content.replace("\n", "\r\n")) def test_addMessage_invalid(self): class msg(object): def key(self): return 'aa' self.assertRaises(ValueError, self.corpus.addMessage, msg()) def test_addMessage(self): msg = self.factory.create("9", 'fctestspamcorpus', good1) self.corpus.addMessage(msg) self.assertEqual(msg.directory, self.directory) fn = os.path.join(self.directory, "9") f = open(fn, "rU") content = f.read() f.close() self.assertEqual(content, good1) def test_removeMessage(self): fn = self.msg.pathname() self.assertEqual(os.path.exists(fn), True) self.corpus.removeMessage(self.msg) self.assertEqual(os.path.exists(fn), False)
def main(argv): opts, args = getopt.getopt(argv, "h", ["help"]) for opt, arg in opts: if opt in ("-h", "--help"): usage() return # Create the corpuses and the factory that reads the messages. if options["pop3proxy", "cache_use_gzip"]: messageFactory = GzipFileMessageFactory() else: messageFactory = FileMessageFactory() sc = get_pathname_option("Storage", "spam_cache") hc = get_pathname_option("Storage", "ham_cache") spamCorpus = FileCorpus(messageFactory, sc) hamCorpus = FileCorpus(messageFactory, hc) # Read in all the trained messages. allTrained = {} for corpus, disposition in [(spamCorpus, 'Yes'), (hamCorpus, 'No')]: for m in corpus: message = mboxutils.get_message(m.getSubstance()) message._pop3CacheDisposition = disposition allTrained[m.key()] = message # Sort the messages into the order they arrived, then work out a scaling # factor for the graph - 'limit' is the widest it can be in characters. keys = allTrained.keys() keys.sort() limit = 70 if len(keys) < limit: scale = 1 else: scale = len(keys) // (limit // 2) # Build the data - an array of cumulative success indexed by count. count = successful = 0 successByCount = [] for key in keys: message = allTrained[key] disposition = message[options["Headers", "classification_header_name"]] if (message._pop3CacheDisposition == disposition): successful += 1 count += 1 if count % scale == (scale - 1): successByCount.append(successful // scale) # Build the graph, as a list of rows of characters. size = count // scale graph = [[" " for i in range(size + 3)] for j in range(size)] for c in range(size): graph[c][1] = "|" graph[c][c + 3] = "." graph[successByCount[c]][c + 3] = "*" graph.reverse() # Print the graph. print "\n Success of the classifier over time:\n" print " . - Number of messages over time" print " * - Number of correctly classified messages over time\n\n" for row in range(size): line = ''.join(graph[row]) if row == 0: print line + " %d" % count elif row == (count - successful) // scale: print line + " %d" % successful else: print line print " " + "_" * (size + 2)
def test_filter(self): self.assertEqual(len(self.corpus.msgs), 3) # Try again, with all messages. self.corpus = FileCorpus(self.factory, self.directory, '*', self.cache_size) self.assertEqual(len(self.corpus.msgs), 4)
class FileCorpusTest(_FileCorpusBaseTest): def setUp(self): _FileCorpusBaseTest.setUp(self) self.directory = 'fctesthamcorpus' self.cache_size = 100 self.factory = FileMessageFactory() self.stuff_corpus() self.corpus = FileCorpus(self.factory, self.directory, '?', self.cache_size) def stuff_corpus(self): """Put messages in the corpus""" i = 0 for content in [good1, spam1, malformed1]: self.msg = self.factory.create(str(i), self.directory, content) self.msg.store() i += 1 # Put in a message that won't match the filter. msg = self.factory.create("10", self.directory, good1) msg.store() def test___init__(self): self.assertEqual(self.corpus.directory, self.directory) self.assertEqual(self.corpus.filter, '?') self.assertEqual(self.corpus.cacheSize, self.cache_size) def test_filter(self): self.assertEqual(len(self.corpus.msgs), 3) # Try again, with all messages. self.corpus = FileCorpus(self.factory, self.directory, '*', self.cache_size) self.assertEqual(len(self.corpus.msgs), 4) def test_makeMessage_no_content(self): key = "testmake" self.corpus.makeMessage(key) def test_makeMessage_with_content(self): key = "testmake" content = spam1 msg = self.corpus.makeMessage(key, content) self.assertEqual(msg.key(), key) self.assertEqual(msg.as_string(), content.replace("\n", "\r\n")) def test_addMessage_invalid(self): class msg(object): def key(self): return 'aa' self.assertRaises(ValueError, self.corpus.addMessage, msg()) def test_addMessage(self): msg = self.factory.create("9", 'fctestspamcorpus', good1) self.corpus.addMessage(msg) self.assertEqual(msg.directory, self.directory) fn = os.path.join(self.directory, "9") f = open(fn, "rU") content = f.read() f.close() self.assertEqual(content, good1) def test_removeMessage(self): fn = self.msg.pathname() self.assertEqual(os.path.exists(fn), True) self.corpus.removeMessage(self.msg) self.assertEqual(os.path.exists(fn), False)
class FileCorpusTest (_FileCorpusBaseTest) : def setUp(self): _FileCorpusBaseTest.setUp(self) self.directory = 'fctesthamcorpus' self.cache_size = 100 self.factory = FileMessageFactory() self.stuff_corpus() self.corpus = FileCorpus(self.factory, self.directory, '?', self.cache_size) def stuff_corpus(self): """Put messages in the corpus""" i = 0 for content in [good1, spam1, malformed1]: self.msg = self.factory.create(str(i), self.directory, content) self.msg.store() i += 1 msg = self.factory.create("10", self.directory, good1) msg.store() def test___init__(self): self.assertEqual(self.corpus.directory, self.directory) self.assertEqual(self.corpus.filter, '?') self.assertEqual(self.corpus.cacheSize, self.cache_size) def test_filter(self): self.assertEqual(len(self.corpus.msgs), 3) self.corpus = FileCorpus(self.factory, self.directory, '*', self.cache_size) self.assertEqual(len(self.corpus.msgs), 4) def test_makeMessage_no_content(self): key = "testmake" self.corpus.makeMessage(key) def test_makeMessage_with_content(self): key = "testmake" content = spam1 msg = self.corpus.makeMessage(key, content) self.assertEqual(msg.key(), key) self.assertEqual(msg.as_string(), content.replace("\n", "\r\n")) def test_addMessage_invalid(self): class msg(object): def key(self): return 'aa' self.assertRaises(ValueError, self.corpus.addMessage, msg()) def test_addMessage(self): msg = self.factory.create("9", 'fctestspamcorpus', good1) self.corpus.addMessage(msg) self.assertEqual(msg.directory, self.directory) fn = os.path.join(self.directory, "9") f = open(fn) content = f.read() f.close() self.assertEqual(content, good1) def test_removeMessage(self): fn = self.msg.pathname() self.assertEqual(os.path.exists(fn), True) self.corpus.removeMessage(self.msg) self.assertEqual(os.path.exists(fn), False) class ExpiryFileCorpusTest (FileCorpusTest) : def setUp(self): _FileCorpusBaseTest.setUp(self) self.cache_size = 100 self.directory = 'fctesthamcorpus' self.factory = FileMessageFactory() self.stuff_corpus() self.corpus = ExpiryFileCorpus(1.0, self.factory, self.directory, '?', self.cache_size) def suite(): suite = unittest.TestSuite() clses = (FileMessageFactoryTest, GzipFileMessageFactoryTest, FileMessageTest, GzipFileMessageTest, FileCorpusTest, ExpiryFileCorpusTest, ) for cls in clses: suite.addTest(unittest.makeSuite(cls)) return suite if __name__=='__main__': sb_test_support.unittest_main(argv=sys.argv + ['suite']) if __name__=='__main__': sb_test_support.unittest_main(argv=sys.argv + ['suite'])