Ejemplo n.º 1
0
 def setUp(self):
     self.plain_file = './tests/fixtures/plain.eml'
     self.plaintext = io.open(self.plain_file, 'rb')
     self.text = self.plaintext.read()
     self.plaintext.seek(0)
     self.plain_email = EmailObject(self.plaintext)
     self.maxDiff = None
Ejemplo n.º 2
0
    def train(self):
        for category, file in self.to_train:
            email = EmailObject(io.open(file, 'rb'))

            self.categories.add(category)

            for token in Tokenizer.unique_tokenizer(email.body()):
                self.training[category][token] += 1
                self.totals['_all'] += 1
                self.totals[category] += 1

        self.to_train = {}
class TestHTMLEmail(unittest.TestCase):
  def setUp(self):
    self.html_file = io.open('./tests/fixtures/html.eml', 'rb')
    self.html = self.html_file.read()
    self.html_file.seek(0)
    self.html_email = EmailObject(self.html_file)

  def test_parses_stores_inner_text_html(self):
    body = "\n\n".join(self.html.split("\n\n")[1:])
    expected = BeautifulSoup(body).text 
    self.assertEqual(self.html_email.body(), expected)

  def test_stores_subject(self):
    subject = re.search("Subject: (.*)", self.html).group(1)
    self.assertEqual(self.html_email.subject(), subject)
class TestHTMLEmail(unittest.TestCase):
    def setUp(self):
        self.html_file = io.open("./tests/fixtures/html.eml", "rb")
        self.html = self.html_file.read()
        self.html_file.seek(0)
        self.html_email = EmailObject(self.html_file)

    def test_parses_stores_inner_text_html(self):
        body = "\n\n".join(self.html.split("\n\n")[1:])
        expected = BeautifulSoup(body).text
        self.assertEqual(self.html_email.body(), expected)

    def test_stores_subject(self):
        subject = re.search("Subject: (.*)", self.html).group(1)
        self.assertEqual(self.html_email.subject(), subject)
Ejemplo n.º 5
0
class TestHTMLEmail(unittest.TestCase):
    def setUp(self):
        self.html_file = io.open('./tests/fixtures/html.eml', 'rb')
        self.html = self.html_file.read()
        self.html_file.seek(0)
        self.html_email = EmailObject(self.html_file)
        self.maxDiff = None
    
    def test_parses_stores_inner_text_html(self):
        body = b'\n\n'.join(self.html.split(b'\n\n')[1:])
        expected = BeautifulSoup(body, features = 'html.parser').text
        self.assertEqual(self.html_email.body(), expected)
    
    def test_stores_subject(self):
        subject = re.search('Subject: (.*)', str(self.html)).group(1)
        self.assertEqual(str(self.html_email.subject()), subject)
Ejemplo n.º 6
0
 def setUp(self):
     self.training = [['spam', './tests/fixtures/plain.eml'],
                      ['ham', './tests/fixtures/small.eml'],
                      ['scram', './tests/fixtures/plain.eml']]
     self.trainer = SpamTrainer(self.training)
     with io.open('./tests/fixtures/plain.eml', 'rb') as eml_file:
         self.email = EmailObject(eml_file)
Ejemplo n.º 7
0
 def setUp(self):
     self.training = [['spam', './tests/fixtures/plain.eml'],
                      ['ham', './tests/fixtures/small.eml'],
                      ['scram', './tests/fixtures/plain.eml']]
     self.trainer = SpamTrainer(self.training)
     file = io.open('./tests/fixtures/plain.eml', 'r')
     self.email = EmailObject(file)
Ejemplo n.º 8
0
class TestPlaintextEmailObject(unittest.TestCase):
    CLRF = b'\\ n'
    def setUp(self):
        self.plain_file = './tests/fixtures/plain.eml'
        self.plaintext = io.open(self.plain_file, 'rb')
        self.text = self.plaintext.read()
        self.plaintext.seek(0)
        self.plain_email = EmailObject(self.plaintext)
        self.maxDiff = None
    
    def test_parse_plain_body(self):
        body = self.CLRF.join(self.text.split(self.CLRF)[:1])
        self.assertEqual(self.plain_email.body(), body)
    
    def test_parses_the_subject(self):
        subject = re.search('Subject: (.*)', str(self.text)).group(1)
        self.assertEqual(str(self.plain_email.subject()), subject)
class TestPlaintextEmailObject(unittest.TestCase):
  CLRF = "\n\n"

  def setUp(self):
    self.plain_file = './tests/fixtures/plain.eml'
    self.plaintext = io.open(self.plain_file, 'r')
    self.text = self.plaintext.read()
    self.plaintext.seek(0)
    self.plain_email = EmailObject(self.plaintext)

  def test_parse_plain_body(self):
    body = self.CLRF.join(self.text.split(self.CLRF)[1:])
    self.assertEqual(self.plain_email.body(), body)

  def test_parses_the_subject(self):
    subject = re.search("Subject: (.*)", self.text).group(1)
    self.assertEqual(self.plain_email.subject(), subject)
class TestHTMLEmail(unittest.TestCase):
    def setUp(self):
        with io.open('./tests/fixtures/html.eml', 'rb') as html_file:
            self.html = html_file.read().decode('utf-8')
            html_file.seek(0)
            self.html_email = EmailObject(html_file)

    def test_parses_stores_inner_text_html(self):
        body = "\n\n".join(self.html.split("\n\n")[1:])
        expected = BeautifulSoup(body, 'html.parser').text
        actual_body = self.html_email.body()
        self.assertEqual(actual_body, expected)

    def test_stores_subject(self):
        expected_subject = re.search("Subject: (.*)", self.html).group(1)
        actual_subject = self.html_email.subject()
        self.assertEqual(actual_subject, expected_subject)
Ejemplo n.º 11
0
class TestPlaintextEmailObject(unittest.TestCase):
    CLRF = "\n\n"

    def setUp(self):
        self.plain_file = './tests/fixtures/plain.eml'
        self.plaintext = io.open(self.plain_file, 'r')
        self.text = self.plaintext.read()
        self.plaintext.seek(0)
        self.plain_email = EmailObject(self.plaintext)

    def test_parse_plain_body(self):
        body = self.CLRF.join(self.text.split(self.CLRF)[1:])
        self.assertEqual(self.plain_email.body(), body)

    def test_parses_the_subject(self):
        subject = re.search("Subject: (.*)", self.text).group(1)
        self.assertEqual(self.plain_email.subject(), subject)
Ejemplo n.º 12
0
def parse_emails(keyfile):
    emails = []
    print('parsing emails for ' + keyfile)

    for line in io.open(keyfile, 'rb'):
        label, file = line.rstrip().split(b' ')
        with io.open(file, 'rb') as f:
            emails.append(EmailObject(f, category = label))
        
    
    print('Done parsing files for ' + keyfile)
    return emails
class TestMultipartEmailObject(unittest.TestCase):
  def setUp(self):
    self.multipart_file = './tests/fixtures/multipart.eml'
    self.multipart = io.open(self.multipart_file, 'r')
    self.text = self.multipart.read()
    self.multipart.seek(0)
    self.multipart_email = EmailObject(self.multipart)

  def test_parse_concatenated_body_of_text(self):
    internal_mail = self.multipart_email.mail
    assert internal_mail.is_multipart()

    body = ''
    for part in internal_mail.walk():
      if re.match("text/plain", part.get_content_type()):
        body += part.get_payload(decode=True)
      elif re.match("text/html", part.get_content_type()):
        body += part.get_payload(decode=True) 
    self.assertEqual(self.multipart_email.body(), body)

  def test_stores_subject(self):
    subject = re.search("Subject: (.*)", self.text).group(1)
    self.assertEqual(self.multipart_email.subject(), subject)
Ejemplo n.º 14
0
class TestMultipartEmailObject(unittest.TestCase):
    def setUp(self):
        self.multipart_file = './tests/fixtures/multipart.eml'
        self.multipart = io.open(self.multipart_file, 'r')
        self.text = self.multipart.read()
        self.multipart.seek(0)
        self.multipart_email = EmailObject(self.multipart)

    def test_parse_concatenated_body_of_text(self):
        internal_mail = self.multipart_email.mail
        assert internal_mail.is_multipart()

        body = ''
        for part in internal_mail.walk():
            if re.match("text/plain", part.get_content_type()):
                body += part.get_payload(decode=True)
            elif re.match("text/html", part.get_content_type()):
                body += part.get_payload(decode=True)
        self.assertEqual(self.multipart_email.body(), body)

    def test_stores_subject(self):
        subject = re.search("Subject: (.*)", self.text).group(1)
        self.assertEqual(self.multipart_email.subject(), subject)
Ejemplo n.º 15
0
 def setUp(self):
   self.html_file = io.open('./tests/fixtures/html.eml', 'rb')
   self.html = self.html_file.read()
   self.html_file.seek(0)
   self.html_email = EmailObject(self.html_file)
 def setUp(self):
     self.plain_file = './tests/fixtures/plain.eml'
     with io.open(self.plain_file, 'rb') as plaintext:
         self.text = plaintext.read().decode('utf-8')
         plaintext.seek(0)
         self.plain_email = EmailObject(plaintext)
Ejemplo n.º 17
0
 def setUp(self):
     self.multipart_file = './tests/fixtures/multipart.eml'
     self.multipart = io.open(self.multipart_file, 'r')
     self.text = self.multipart.read()
     self.multipart.seek(0)
     self.multipart_email = EmailObject(self.multipart)
 def setUp(self):
   self.plain_file = './tests/fixtures/plain.eml'
   self.plaintext = io.open(self.plain_file, 'r')
   self.text = self.plaintext.read()
   self.plaintext.seek(0)
   self.plain_email = EmailObject(self.plaintext)
 def setUp(self):
     with io.open('./tests/fixtures/html.eml', 'rb') as html_file:
         self.html = html_file.read().decode('utf-8')
         html_file.seek(0)
         self.html_email = EmailObject(html_file)
 def setUp(self):
     self.html_file = io.open("./tests/fixtures/html.eml", "rb")
     self.html = self.html_file.read()
     self.html_file.seek(0)
     self.html_email = EmailObject(self.html_file)
 def setUp(self):
   self.multipart_file = './tests/fixtures/multipart.eml'
   with io.open(self.multipart_file, 'rb') as multipart:
     self.text = multipart.read().decode('utf-8')
     multipart.seek(0)
     self.multipart_email = EmailObject(multipart)
 def setUp(self):
   self.multipart_file = './tests/fixtures/multipart.eml'
   self.multipart = io.open(self.multipart_file, 'r')
   self.text = self.multipart.read()
   self.multipart.seek(0)
   self.multipart_email = EmailObject(self.multipart)