예제 #1
0
def plot_undirected(reader: MBoxReader,
                    layout: str = 'shell',
                    graphml: bool = False):
    """Plot an undirected social network graph from the entire `mbox`, supplied by `MBoxReader`. 
    `layout` determines the underlying `NetworkX` layout.   

    Usage:
        reader = MboxReader('path-to-mbox.mbox')
        plot_undirected(reader)

    Args:
        reader (MBoxReader): A `MBoxReader` object
        layout (str, optional): Can be one of 'shell', 'spring' or 'spiral'. Defaults to 'shell'.
        graphml (bool, optional): Determines if a .graphml file is exported to the working directory. Defaults to False.
    """

    emails = reader.extract()
    G = nx.Graph(name='Email Social Network')
    plt.figure(figsize=(12, 12))
    counter = Counter()
    for email in emails:
        ng = PeopleCombination(email)

        for combo in ng.combo:
            counter[combo] += 1

    total_combos = sum(counter.values())
    by_freq = {k: v / total_combos for k, v in counter.most_common()}
    for rel in counter.keys():
        G.add_edge(*rel, weight=by_freq[rel], count=counter[rel])

    if graphml:
        fileName = f'network-{str(uuid.uuid4().hex)[:8]}.graphml'
        nx.write_graphml(G, fileName)
        print(f"Graphml exported as {fileName}")

    if layout == 'shell':
        pos = nx.shell_layout(G)
    elif layout == 'spring':
        k = 1 / math.sqrt(G.order()) * 2
        pos = nx.spring_layout(G, k=k)
    else:
        pos = nx.spiral_layout(G)

    deg = [v * 50 for _, v in G.degree()]
    nx.draw_networkx_nodes(G, pos, node_size=deg, linewidths=1.0, alpha=0.60)
    nx.draw_networkx_edges(G,
                           pos,
                           width=1.0,
                           style='dashed',
                           edge_color='cadetblue',
                           alpha=0.6)
    nx.draw_networkx_labels(G,
                            pos, {n: n.split('@')[0]
                                  for n in G.nodes},
                            font_size=8,
                            font_color='darkorchid')

    plt.axis('off')
    plt.show()
예제 #2
0
def plot_directed(reader: MBoxReader,
                  layout: str = 'shell',
                  graphml: bool = False) -> None:
    """
    Plot a directed social network graph from the entire `mbox`, supplied by `MBoxReader`. 
    `layout` determines the underlying `NetworkX` layout.   

    Usage:
        reader = MboxReader('path-to-mbox.mbox')
        plot_directed(reader)
    Args:
        reader (MBoxReader): A `MBoxReader` object
        layout (str, optional): Can be one of 'shell', 'spring' or 'spiral'. Defaults to 'shell'.
        graphml (bool, optional): Determines if a .graphml file is exported to the working directory. Defaults to False.
    """

    emails = reader.extract()
    plt.figure(figsize=(12, 12))
    G = nx.MultiDiGraph(name='Email Social Network')
    for email in emails:
        sender = email.sender.name
        source_addr = sender if sender != '' else email.sender.email.split(
            '@')[0]

        all_recipients = [
            em.name if em.name != '' or None else em.email.split('@')[0]
            for em in email.recipients + email.cc
        ]

        for recipient in all_recipients:
            G.add_edge(source_addr, recipient, message=email.subject)

    if graphml:
        fileName = f'network-{str(uuid.uuid4().hex)[:8]}.graphml'
        nx.write_graphml(G, fileName)

    if layout == 'shell':
        pos = nx.shell_layout(G)
    elif layout == 'spring':
        pos = nx.spring_layout(G)
    else:
        pos = nx.spiral_layout(G)
    nx.draw(G,
            pos,
            node_size=0,
            alpha=0.4,
            edge_color='cadetblue',
            font_size=7,
            with_labels=True)
    ax = plt.gca()
    ax.margins(0.08)
    plt.show()
예제 #3
0
class TestGraph(TestCase):
    def setUp(self):
        self.reader = MBoxReader(MBOX_PATH)
        self.emails = self.reader.extract()
        self.layout = ['shell', 'spring', 'spiral']

    def test_single_graph(self):
        # TODO: to be implemented later
        pass

    @mock.patch(f"{__name__}.graph.plt")
    def test_plot_single_directed(self, mock_plt):
        graph.plot_single_directed(self.reader, 1, True)
        mock_plt.title.assert_called_once_with(
            "Three tips to get the most out of Gmail\n Delivery date: 04/17/2020",
            fontdict={
                'fontname': 'Helvetica',
                'color': 'k',
                'fontweight': 'bold',
                'fontsize': 8
            })
        assert mock_plt.figure.called

    def test_plot_single_undirected(self):
        with mock.patch("%s.graph.plt" % __name__) as patch, mock.patch(
                "%s.graph.nx" % __name__) as patch2:
            graph.plot_single_undirected(self.reader, 2, True)
            patch.title.assert_called_once_with(
                "Stay more organized with Gmail's inbox\n Delivery date: 08/13/2020",
                fontdict={
                    'fontname': 'Helvetica',
                    'color': 'k',
                    'fontweight': 'bold',
                    'fontsize': 8
                })
            patch2.Graph.assert_called_once_with(
                name='Single Email Social Network')

    def test_plot_directed(self):
        with mock.patch("%s.graph.plt" % __name__) as patch:
            for item in self.layout:
                graph.plot_directed(self.reader, item)
                assert patch.figure.called

    def test_plot_undirected(self):
        with mock.patch("%s.graph.plt" % __name__) as patch:
            for item in self.layout:
                graph.plot_undirected(self.reader, item)
                assert patch.figure.called
예제 #4
0
class TestExtract(TestCase):
    def setUp(self):
        self.reader = MBoxReader(MBOX_PATH)
        self.emails = self.reader.extract()
        self.emailAdress = self.reader.mbox[1]

    def tearDown(self):
        self.reader = None

    def test_read_mbox(self):
        self.assertIsInstance(self.reader, MBoxReader)

    def test_length_mbox(self):
        self.assertEqual(len(self.reader), 140)

    def test_extract(self):
        # self.assertTrue(isinstance(next(self.emails), EmailMeta))
        firstemail = next(self.emails)
        self.assertIsInstance(firstemail, EmailMeta)
        self.assertIsInstance(firstemail.subject, str)
        self.assertIsInstance(firstemail.date, datetime.datetime)

        for msg in self.emails:
            self.assertGreaterEqual(len(msg.recipients), 1)
            self.assertIsInstance(msg.cc, list)

    def test_email_address(self):
        firstemail = next(self.emails)
        self.assertIsInstance(firstemail.sender, EmailAddress)
        self.assertIsInstance(firstemail.sender.name, str)
        self.assertIsInstance(firstemail.sender.email, str)

        mail = extract_meta(self.emailAdress)
        self.assertIsInstance(mail.recipients[0].name, str)
        self.assertIsInstance(mail.recipients[0].email, str)

    def test_filter_emails(self):
        newmails = self.reader.filter_emails(datestring="2020-01-01",
                                             dateoperator=">=")
        self.assertEqual(len(newmails), 4)

        for email in newmails:
            self.assertGreater(email.date, datetime.datetime(2020, 1, 1))
            self.assertLess(email.date, datetime.datetime.now())

        oldmails = self.reader.filter_emails(datestring="2019-12-31",
                                             dateoperator="<=")
        self.assertEqual(len(oldmails), 136)

        exactmails = self.reader.filter_emails(datestring="2020-04-17",
                                               dateoperator="==")
        self.assertEqual(len(exactmails), 1)
        self.assertEqual(exactmails[0].date.date(), datetime.date(2020, 4, 17))

        namedmails = self.reader.filter_emails(
            emailaddress="*****@*****.**")

        for email in namedmails:
            checkers = [email.sender.email] + [
                recipient.email for recipient in email.recipients
            ]
            self.assertTrue("*****@*****.**" in checkers)

        fullfilteredmails = self.reader.filter_emails(
            emailaddress="*****@*****.**",
            datestring="2020-01-01",
            dateoperator=">=",
        )

        for email in fullfilteredmails:
            checkers = [email.sender.email] + [
                recipient.email for recipient in email.recipients
            ]
            self.assertTrue("*****@*****.**" in checkers)
            self.assertGreater(email.date, datetime.datetime(2020, 1, 1))

        fullfilteredmailsequal = self.reader.filter_emails(
            emailaddress="*****@*****.**",
            datestring="2020-04-17",
            dateoperator="==",
        )

        for email in fullfilteredmailsequal:
            checkers = [email.sender.email] + [
                recipient.email for recipient in email.recipients
            ]
            self.assertTrue("*****@*****.**" in checkers)
            self.assertEqual(fullfilteredmailsequal[0].date.date(),
                             datetime.date(2020, 4, 17))

        fullfilteremailless = self.reader.filter_emails(
            emailaddress="*****@*****.**",
            datestring="2019-12-31",
            dateoperator="<=",
        )

        for email in fullfilteremailless:
            checkers = [email.sender.email] + [
                recipient.email for recipient in email.recipients
            ]
            self.assertTrue("*****@*****.**" in checkers)

        mailswithoutfilter = self.reader.filter_emails()

        for email in mailswithoutfilter:
            self.assertIsInstance(email, EmailMeta)

    # also need tests to fail with expected exception when datetime operator not in [==, <=, >=], emailaddress and datetime in wrong format.
    def test_afunction_throws_exception(self):
        self.assertRaises(ValueError, self.reader.filter_emails, 20,
                          "2019-12-31", "<")

    def test_extract_meta_single(self):
        for email in self.reader.mbox:
            self.assertIsInstance(email["Subject"], (bytes, str))
            emailmsg = extract_meta(email)
            self.assertIsInstance(emailmsg, EmailMeta)
            self.assertIsInstance(emailmsg.origin_domain, str)
            self.assertIsInstance(emailmsg.subject, str)

    def test_extract_body_single(self):
        for email in self.reader.mbox:
            emailbody = extract_body(email)
            self.assertIsInstance(emailbody, EmailBody)
            self.assertIsInstance(emailbody.subject, str)
            self.assertIsInstance(emailbody.body, str)