Esempio n. 1
0
class MBoxReaderTests(unittest.TestCase):
    """
    Testing the mbox reader for email extraction.
    """
    def setUp(self):
        self.reader = MBoxReader(MBOX)

    def tearDown(self):
        self.reader = None

    def test_header_analysis(self):
        """
        Test the header analysis functionality
        """
        headers = self.reader.header_analysis()
        self.assertEqual(len(headers), 25)

        with open(HEADERS, 'r') as f:
            expected = json.load(f)

        for header, count in expected.items():
            self.assertEqual(count, headers[header])

    def test_count(self):
        """
        Test that the number of emails is expected
        """
        self.assertEqual(self.reader.count(), 140)
        self.assertEqual(self.reader.count(), len(self.reader))

    def test_extract(self):
        """
        Make sure that extract does not error
        """
        for idx, msg in enumerate(self.reader.extract()):

            # Some simple type checking
            self.assertIsInstance(msg, EmailMeta)
            self.assertIsInstance(msg.sender, EmailAddress)
            self.assertIsInstance(msg.recipients, list)
            self.assertIsInstance(msg.copied, list)
            self.assertIsInstance(msg.subject, string_types + (None, ))
            self.assertIsInstance(msg.date, (datetime, None))

        self.assertEqual(idx + 1, 140)

    def test_graph_extract(self):
        """
        Make sure that extract graph does not error
        """
        G = self.reader.extract_graph()
        self.assertEqual(nx.number_of_nodes(G), 7)
        self.assertEqual(nx.number_of_edges(G), 6)
        self.assertFalse(nx.is_directed(G))
Esempio n. 2
0
    def timed_inner(path):
        reader  = MBoxReader(path)
        counter = FreqDist()

        for email in reader.extract():
            people = [email.sender,]
            people.extend(email.recipients)
            people.extend(email.copied)

            people = filter(lambda p: p is not None, people)    # Filter out any None addresses
            people = set(addr.email for addr in people)         # Obtain only unique people

            for person in people:
                counter[person] += 1

        return counter
Esempio n. 3
0
 def timed_inner(path):
     reader = MBoxReader(path)
     return reader.extract_graph()
Esempio n. 4
0
 def timed_inner(path):
     reader  = MBoxReader(path)
     return reader.header_analysis()
Esempio n. 5
0
 def timed_inner(path, outpath):
     reader = MBoxReader(path)
     G = reader.extract_graph()
     nx.write_graphml(G, outpath)
     return reader.errors
Esempio n. 6
0
 def timed_inner(path):
     reader = MBoxReader(path)
     return reader.count()
Esempio n. 7
0
 def timed_inner(path):
     reader = MBoxReader(path)
     return reader.count()
Esempio n. 8
0
 def setUp(self):
     self.reader = MBoxReader(MBOX)