Exemple #1
0
def get_leaders_and_outermosts():
    guys = Reader.read_lines("datasets/enron/enron_guys.txt")
    guys = {int(nid) : email for nid, email in guys}

    global_leaders, global_outermosts = defaultdict(list), defaultdict(list)
    for month in xrange(1, 13):
        n = Network('datasets/enron/timeslots/{:02d}-filtered2.edges'.format(month),
                    is_directed=False,
                    use_communities=True)
        communities = set(flatten_list(n.communities.values()))
        month_leaders, month_outermosts = set(), set()
        for c in communities:
            n.filter_community([c])
            leaders, outermosts = RoleMining(n).find_roles()
            [month_leaders.add(leader) for leader in leaders]
            [month_outermosts.add(outer) for outer in outermosts]
            n.unfilter_graph()

        for l, c in month_leaders:
            global_leaders[l].append((month,c))
        for o, c in month_outermosts:
            global_outermosts[o].append((month, c))

    print
    for id, months in global_outermosts.items():
        if guys[id] in the_losers:
            print guys[id], months
Exemple #2
0
def get_mediator_score_distribution():
    month = 12
    n = XNetwork("datasets/enron/timeslots/{:02d}-filtered2.edges".format(month),
                 communities_file="datasets/enron/communities/{:02d}-filtered2/k=3/directed_communities".format(month))
    nodes_with_cbc = n.calculate_CBC()

    cbc_by_node = {nid: cbc for nid, cbc in nodes_with_cbc}

    guys_tuples = Reader.read_lines("datasets/enron/enron_guys.txt")
    guys = {int(nid): email for nid, email in guys_tuples}
    guys_by_email = {email: int(nid) for nid, email in guys_tuples}

    all_cdc = cbc_by_node.values()
    P.hist(all_cdc)
    P.title("Enron - Mediator score distribution in month {}".format(month))
    P.xlabel("Mediator score")
    P.ylabel("Frequency")
    P.show()

    interesting_emails = [
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**",
        "*****@*****.**"
    ]
    for email in interesting_emails:
        try:
            print "{}\t{:.4f}".format(email, cbc_by_node[guys_by_email[email]])
        except KeyError:
            print "{}\tX".format(email)

    i = 0
    print "month", month
    print "-----------------------------------"
    print "rank\tnode_id\temail\tMS"
    print "-----------------------------------"
    for n, cbc in nodes_with_cbc:
        i += 1
        if guys[n] not in interesting_emails:
            print "{}\t{}\t{}\t{:.4f}".format(i, n, guys[n], cbc)