コード例 #1
0
 def test_ratio(self):
     """
     Test the ratio computation
     """
     dist = FreqDist('aaabbbaaabccddeeffbbccddeegjja')
     self.assertAlmostEqual(dist.ratio('a', 'b'), 1.16666667)
     self.assertAlmostEqual(dist.ratio('b', 'a'), 0.85714285)
コード例 #2
0
    def extract_graph(self):
        """
        Extracts a Graph where the nodes are EmailAddress
        """
        links = FreqDist()
        for email in self.extract():
            people = [
                email.sender,
            ]
            people.extend(email.recipients)
            people.extend(email.copied)

            people = filter(lambda p: p is not None,
                            people)  # Filter out any None addresses
            people = set(addr.email for addr in people
                         if addr.email)  # Obtain only unique people
            people = sorted(people)  # Sort lexicographically for combinations

            for combo in combinations(people, 2):
                links[combo] += 1

        G = nx.Graph(name="Email Network", mbox=self.path, extracted=strfnow())
        for link in links.keys():
            G.add_edge(*link, weight=links.freq(link))

        return G
コード例 #3
0
 def test_str(self):
     """
     Test the stringification of the frequency distribution
     """
     try:
         dist = FreqDist(random_characters(1000, 'abc'))
         s = str(dist)
         r = repr(dist)
         p = dist.pprint()
     except Exception as e:
         self.fail("Stringifcation failed: {}".format(e))
コード例 #4
0
    def test_inverse_ratio(self):
        """
        Test that the ratio is correct for the inverse
        """
        dist = FreqDist(random_characters(1000, 'abc'))
        rtab = dist.ratio('a', 'b')
        rtba = dist.ratio('b', 'a')

        riab = 1.0 / rtab
        riba = 1.0 / rtba

        self.assertAlmostEqual(riab, rtba)
        self.assertAlmostEqual(riba, rtab)
コード例 #5
0
    def test_freq(self):
        """
        Test the computation of the frequency
        """
        samples = list(random_characters(90, 'abc'))
        samples.extend(['d'] * 10)
        dist = FreqDist(samples)

        self.assertEqual(dist.N, len(samples))
        self.assertEqual(dist.B, 4)
        self.assertAlmostEqual(dist.freq('d'), 0.1)

        for c in 'abc':
            self.assertGreater(dist.freq(c), 0.0)
            self.assertLess(dist.freq(c), 1.0)
コード例 #6
0
    def test_dump_and_load(self):
        """
        Test the serialization of frequency distribution
        """
        fobj = StringIO()
        orig = FreqDist(random_characters(1000))

        # Dump the frequncy distribution to the stream
        orig.dump(fobj)

        # Seek to 0 and load the frequency distribution
        fobj.seek(0)
        dist = FreqDist.load(fobj)

        self.assertEqual(orig, dist)
コード例 #7
0
    def header_analysis(self):
        """
        Performs an analysis of the frequency of headers in the Mbox
        """
        headers = FreqDist()
        for msg in self:
            headers['X-Tribe-Message-Count'] += 1
            for key in msg.keys():
                headers[key] += 1

        return headers
コード例 #8
0
    def test_memoized_n_samples(self):
        """
        Test the memoization of N, the number of samples
        """
        dist = FreqDist(random_characters(100))
        self.assertEqual(dist.N, 100)

        for letter in random_characters(100):
            dist[letter] += 1

        self.assertEqual(dist.N, 100)
        del dist.N
        self.assertEqual(dist.N, 200)
コード例 #9
0
    def test_memoized_b_bins(self):
        """
        Test the memoization of B, the number of bins
        """
        dist = FreqDist(random_characters(1000))
        self.assertEqual(dist.B, 26)

        for letter in random_characters(100, 'abcdef'):
            dist[letter] += 1

        self.assertEqual(dist.B, 26)
        del dist.B
        self.assertEqual(dist.B, 32)
コード例 #10
0
    def test_memoized_m_magnitude(self):
        """
        Test the memoization of M, the magnitude
        """
        dist = FreqDist('aaabbbaaabccddeeffbbccddeegjja')
        self.assertEqual(dist.M, 7)

        for letter in 'aaabbccc':
            dist[letter] += 1

        self.assertEqual(dist.M, 7)
        del dist.M
        self.assertEqual(dist.M, 10)
コード例 #11
0
    def test_norm(self):
        """
        Test the computation of the norm
        """
        samples = list(random_characters(50, 'abc'))
        samples.extend(['d'] * 50)
        dist = FreqDist(samples)

        self.assertEqual(dist.max(), 'd')
        self.assertEqual(dist.N, len(samples))
        self.assertEqual(dist.M, 50)
        self.assertAlmostEqual(dist.norm('d'), 1.0)

        for c in 'abc':
            self.assertGreater(dist.norm(c), 0.0)
            self.assertLess(dist.norm(c), 1.0)
コード例 #12
0
 def test_m_magnitude(self):
     """
     Test the computation of M, the magnitude
     """
     dist = FreqDist('aaabbbaaabccddeeffbbccddeegjja')
     self.assertEqual(dist.M, 7)
コード例 #13
0
    def __init__(self, path):
        self.path = path
        self.mbox = mbox(path)

        # Track errors through extraction process
        self.errors = FreqDist()
コード例 #14
0
 def test_max(self):
     """
     Test maximal element selection
     """
     dist = FreqDist('aaabbbaaabccddeeffbbccddeegjja')
     self.assertEqual(dist.max(), 'a')
コード例 #15
0
 def test_empty_max(self):
     """
     Test the frequency of an empty distribution
     """
     dist = FreqDist()
     self.assertIsNone(dist.max())
コード例 #16
0
    def extract_graph(self):
        """
        Extracts a Graph where the nodes are EmailAddress
        """
        def relationships(email):
            """
            Inner function that constructs email relationships
            """
            people = [
                email.sender,
            ]
            people.extend(email.recipients)
            people.extend(email.copied)

            people = filter(lambda p: p is not None,
                            people)  # Filter out any None addresses
            people = set(addr.email for addr in people
                         if addr.email)  # Obtain only unique people
            people = sorted(people)  # Sort lexicographically for combinations

            for combo in combinations(people, 2):
                yield combo

        # Keep track of all the email to email links
        links = FreqDist()
        emails = 0

        # Iterate over all the extracted emails
        # Catch exceptions, if any, and move forward
        # NOTE: This will allow the progress bar to work
        # NOTE: This will build the graph data structure in memory
        for email in self.extract():
            emails += 1
            try:
                for combo in relationships(email):
                    links[combo] += 1
            except Exception as e:
                self.errors[e] += 1
                continue

        # Construct the networkx graph with details about generation.
        G = nx.Graph(
            name="Email Network",
            mbox=self.path,
            extracted=strfnow(),
            n_emails=emails,
            mbox_size=filesize(self.path),
        )

        # Add edges to the graph with various weight properties from counts.
        # NOTE: memoization is used here in the FreqDist to speed things up.
        for link in links.keys():
            link_data = {
                "weight": links.freq(link),
                "count": links[link],
                "norm": links.norm(link),
            }
            G.add_edge(*link, **link_data)

        # Return the generated graph
        return G
コード例 #17
0
 def test_b_bins(self):
     """
     Test the computation of B, the number of bins
     """
     dist = FreqDist(random_characters(1000))
     self.assertEqual(dist.B, 26)
コード例 #18
0
 def test_empty_freq(self):
     """
     Test the frequency of an empty distribution
     """
     dist = FreqDist()
     self.assertEqual(dist.freq('a'), 0)
コード例 #19
0
 def test_n_samples(self):
     """
     Test the computation of N, the number of samples
     """
     dist = FreqDist(random_characters(100))
     self.assertEqual(dist.N, 100)
コード例 #20
0
 def test_empty_norm(self):
     """
     Test the norm of an empty distribution
     """
     dist = FreqDist()
     self.assertEqual(dist.norm('a'), 0)
コード例 #21
0
 def test_missing_ratio(self):
     """
     Test that ratio of an unseen element is 0
     """
     dist = FreqDist(random_characters(100, 'abc'))
     self.assertEqual(dist.ratio('a', 'd'), 0)