def setUp(self):
     _FileCorpusBaseTest.setUp(self)
     self.directory = 'fctesthamcorpus'
     self.cache_size = 100
     self.factory = FileMessageFactory()
     self.stuff_corpus()
     self.corpus = FileCorpus(self.factory, self.directory, '?',
                              self.cache_size)
Beispiel #2
0
 def setUp(self):
     _FileCorpusBaseTest.setUp(self)
     self.directory = 'fctesthamcorpus'
     self.cache_size = 100
     self.factory = FileMessageFactory()
     self.stuff_corpus()
     self.corpus = FileCorpus(self.factory, self.directory,
                              '?', self.cache_size)
Beispiel #3
0
 def test_filter(self):

        self.assertEqual(len(self.corpus.msgs), 3)

        self.corpus = FileCorpus(self.factory, self.directory,
                                 '*', self.cache_size)

        self.assertEqual(len(self.corpus.msgs), 4)
Beispiel #4
0
class FileCorpusTest(_FileCorpusBaseTest):
    def setUp(self):
        _FileCorpusBaseTest.setUp(self)
        self.directory = 'fctesthamcorpus'
        self.cache_size = 100
        self.factory = FileMessageFactory()
        self.stuff_corpus()
        self.corpus = FileCorpus(self.factory, self.directory,
                                 '?', self.cache_size)
    def stuff_corpus(self):
        """Put messages in the corpus"""
        i = 0
        for content in [good1, spam1, malformed1]:
            self.msg = self.factory.create(str(i), self.directory, content)
            self.msg.store()
            i += 1
        msg = self.factory.create("10", self.directory, good1)
        msg.store()
    def test___init__(self):
        self.assertEqual(self.corpus.directory, self.directory)
        self.assertEqual(self.corpus.filter, '?')
        self.assertEqual(self.corpus.cacheSize, self.cache_size)
    def test_filter(self):
        self.assertEqual(len(self.corpus.msgs), 3)
        self.corpus = FileCorpus(self.factory, self.directory,
                                 '*', self.cache_size)
        self.assertEqual(len(self.corpus.msgs), 4)
    def test_makeMessage_no_content(self):
        key = "testmake"
        self.corpus.makeMessage(key)
    def test_makeMessage_with_content(self):
        key = "testmake"
        content = spam1
        msg = self.corpus.makeMessage(key, content)
        self.assertEqual(msg.key(), key)
        self.assertEqual(msg.as_string(), content.replace("\n", "\r\n"))
    def test_addMessage_invalid(self):
        class msg(object):
            def key(self):
                return 'aa'
        self.assertRaises(ValueError, self.corpus.addMessage, msg())
    def test_addMessage(self):
        msg = self.factory.create("9", 'fctestspamcorpus', good1)
        self.corpus.addMessage(msg)
        self.assertEqual(msg.directory, self.directory)
        fn = os.path.join(self.directory, "9")
        f = open(fn, "rU")
        content = f.read()
        f.close()
        self.assertEqual(content, good1)
    def test_removeMessage(self):
        fn = self.msg.pathname()
        self.assertEqual(os.path.exists(fn), True)
        self.corpus.removeMessage(self.msg)
        self.assertEqual(os.path.exists(fn), False)
Beispiel #5
0
def main(argv):
    opts, args = getopt.getopt(argv, "h", ["help"])
    for opt, arg in opts:
        if opt in ("-h", "--help"):
            usage()
            return

    # Create the corpuses and the factory that reads the messages.
    if options["pop3proxy", "cache_use_gzip"]:
        messageFactory = GzipFileMessageFactory()
    else:
        messageFactory = FileMessageFactory()
    sc = get_pathname_option("Storage", "spam_cache")
    hc = get_pathname_option("Storage", "ham_cache")
    spamCorpus = FileCorpus(messageFactory, sc)
    hamCorpus = FileCorpus(messageFactory, hc)

    # Read in all the trained messages.
    allTrained = {}
    for corpus, disposition in [(spamCorpus, 'Yes'), (hamCorpus, 'No')]:
        for m in corpus:
            message = mboxutils.get_message(m.getSubstance())
            message._pop3CacheDisposition = disposition
            allTrained[m.key()] = message

    # Sort the messages into the order they arrived, then work out a scaling
    # factor for the graph - 'limit' is the widest it can be in characters.
    keys = allTrained.keys()
    keys.sort()
    limit = 70
    if len(keys) < limit:
        scale = 1
    else:
        scale = len(keys) // (limit // 2)

    # Build the data - an array of cumulative success indexed by count.
    count = successful = 0
    successByCount = []
    for key in keys:
        message = allTrained[key]
        disposition = message[options["Headers", "classification_header_name"]]
        if (message._pop3CacheDisposition == disposition):
            successful += 1
        count += 1
        if count % scale == (scale - 1):
            successByCount.append(successful // scale)

    # Build the graph, as a list of rows of characters.
    size = count // scale
    graph = [[" " for i in range(size + 3)] for j in range(size)]
    for c in range(size):
        graph[c][1] = "|"
        graph[c][c + 3] = "."
        graph[successByCount[c]][c + 3] = "*"
    graph.reverse()

    # Print the graph.
    print "\n   Success of the classifier over time:\n"
    print "   . - Number of messages over time"
    print "   * - Number of correctly classified messages over time\n\n"
    for row in range(size):
        line = ''.join(graph[row])
        if row == 0:
            print line + " %d" % count
        elif row == (count - successful) // scale:
            print line + " %d" % successful
        else:
            print line
    print " " + "_" * (size + 2)
 def test_filter(self):
     self.assertEqual(len(self.corpus.msgs), 3)
     # Try again, with all messages.
     self.corpus = FileCorpus(self.factory, self.directory, '*',
                              self.cache_size)
     self.assertEqual(len(self.corpus.msgs), 4)
class FileCorpusTest(_FileCorpusBaseTest):
    def setUp(self):
        _FileCorpusBaseTest.setUp(self)
        self.directory = 'fctesthamcorpus'
        self.cache_size = 100
        self.factory = FileMessageFactory()
        self.stuff_corpus()
        self.corpus = FileCorpus(self.factory, self.directory, '?',
                                 self.cache_size)

    def stuff_corpus(self):
        """Put messages in the corpus"""
        i = 0
        for content in [good1, spam1, malformed1]:
            self.msg = self.factory.create(str(i), self.directory, content)
            self.msg.store()
            i += 1

        # Put in a message that won't match the filter.
        msg = self.factory.create("10", self.directory, good1)
        msg.store()

    def test___init__(self):
        self.assertEqual(self.corpus.directory, self.directory)
        self.assertEqual(self.corpus.filter, '?')
        self.assertEqual(self.corpus.cacheSize, self.cache_size)

    def test_filter(self):
        self.assertEqual(len(self.corpus.msgs), 3)
        # Try again, with all messages.
        self.corpus = FileCorpus(self.factory, self.directory, '*',
                                 self.cache_size)
        self.assertEqual(len(self.corpus.msgs), 4)

    def test_makeMessage_no_content(self):
        key = "testmake"
        self.corpus.makeMessage(key)

    def test_makeMessage_with_content(self):
        key = "testmake"
        content = spam1
        msg = self.corpus.makeMessage(key, content)
        self.assertEqual(msg.key(), key)
        self.assertEqual(msg.as_string(), content.replace("\n", "\r\n"))

    def test_addMessage_invalid(self):
        class msg(object):
            def key(self):
                return 'aa'

        self.assertRaises(ValueError, self.corpus.addMessage, msg())

    def test_addMessage(self):
        msg = self.factory.create("9", 'fctestspamcorpus', good1)
        self.corpus.addMessage(msg)
        self.assertEqual(msg.directory, self.directory)
        fn = os.path.join(self.directory, "9")
        f = open(fn, "rU")
        content = f.read()
        f.close()
        self.assertEqual(content, good1)

    def test_removeMessage(self):
        fn = self.msg.pathname()
        self.assertEqual(os.path.exists(fn), True)
        self.corpus.removeMessage(self.msg)
        self.assertEqual(os.path.exists(fn), False)
Beispiel #8
0
class  FileCorpusTest (_FileCorpusBaseTest) :
	def setUp(self):

        _FileCorpusBaseTest.setUp(self)

        self.directory = 'fctesthamcorpus'

        self.cache_size = 100

        self.factory = FileMessageFactory()

        self.stuff_corpus()

        self.corpus = FileCorpus(self.factory, self.directory,
                                 '?', self.cache_size)
 def stuff_corpus(self):

        """Put messages in the corpus"""

        i = 0

        for content in [good1, spam1, malformed1]:

            self.msg = self.factory.create(str(i), self.directory, content)

            self.msg.store()

            i += 1

        msg = self.factory.create("10", self.directory, good1)

        msg.store()
 def test___init__(self):

        self.assertEqual(self.corpus.directory, self.directory)

        self.assertEqual(self.corpus.filter, '?')

        self.assertEqual(self.corpus.cacheSize, self.cache_size)
 def test_filter(self):

        self.assertEqual(len(self.corpus.msgs), 3)

        self.corpus = FileCorpus(self.factory, self.directory,
                                 '*', self.cache_size)

        self.assertEqual(len(self.corpus.msgs), 4)
 def test_makeMessage_no_content(self):

        key = "testmake"

        self.corpus.makeMessage(key)
 def test_makeMessage_with_content(self):

        key = "testmake"

        content = spam1

        msg = self.corpus.makeMessage(key, content)

        self.assertEqual(msg.key(), key)

        self.assertEqual(msg.as_string(), content.replace("\n", "\r\n"))
 def test_addMessage_invalid(self):

        class msg(object):

            def key(self):

                return 'aa'

        self.assertRaises(ValueError, self.corpus.addMessage, msg())
 def test_addMessage(self):

        msg = self.factory.create("9", 'fctestspamcorpus', good1)

        self.corpus.addMessage(msg)

        self.assertEqual(msg.directory, self.directory)

        fn = os.path.join(self.directory, "9")

        f = open(fn)

        content = f.read()

        f.close()

        self.assertEqual(content, good1)
 def test_removeMessage(self):

        fn = self.msg.pathname()

        self.assertEqual(os.path.exists(fn), True)

        self.corpus.removeMessage(self.msg)

        self.assertEqual(os.path.exists(fn), False)

class  ExpiryFileCorpusTest (FileCorpusTest) :
	def setUp(self):

        _FileCorpusBaseTest.setUp(self)

        self.cache_size = 100

        self.directory = 'fctesthamcorpus'

        self.factory = FileMessageFactory()

        self.stuff_corpus()

        self.corpus = ExpiryFileCorpus(1.0, self.factory, self.directory,
                                       '?', self.cache_size)

def suite():

    suite = unittest.TestSuite()

    clses = (FileMessageFactoryTest,
             GzipFileMessageFactoryTest,
             FileMessageTest,
             GzipFileMessageTest,
             FileCorpusTest,
             ExpiryFileCorpusTest,
             )

    for cls in clses:

        suite.addTest(unittest.makeSuite(cls))

    return suite
 if __name__=='__main__':

    sb_test_support.unittest_main(argv=sys.argv + ['suite'])

 if __name__=='__main__':

    sb_test_support.unittest_main(argv=sys.argv + ['suite'])
 def test_filter(self):
     self.assertEqual(len(self.corpus.msgs), 3)
     # Try again, with all messages.
     self.corpus = FileCorpus(self.factory, self.directory,
                              '*', self.cache_size)
     self.assertEqual(len(self.corpus.msgs), 4)