def get_leaders_and_outermosts(): guys = Reader.read_lines("datasets/enron/enron_guys.txt") guys = {int(nid) : email for nid, email in guys} global_leaders, global_outermosts = defaultdict(list), defaultdict(list) for month in xrange(1, 13): n = Network('datasets/enron/timeslots/{:02d}-filtered2.edges'.format(month), is_directed=False, use_communities=True) communities = set(flatten_list(n.communities.values())) month_leaders, month_outermosts = set(), set() for c in communities: n.filter_community([c]) leaders, outermosts = RoleMining(n).find_roles() [month_leaders.add(leader) for leader in leaders] [month_outermosts.add(outer) for outer in outermosts] n.unfilter_graph() for l, c in month_leaders: global_leaders[l].append((month,c)) for o, c in month_outermosts: global_outermosts[o].append((month, c)) print for id, months in global_outermosts.items(): if guys[id] in the_losers: print guys[id], months
def get_mediator_score_distribution(): month = 12 n = XNetwork("datasets/enron/timeslots/{:02d}-filtered2.edges".format(month), communities_file="datasets/enron/communities/{:02d}-filtered2/k=3/directed_communities".format(month)) nodes_with_cbc = n.calculate_CBC() cbc_by_node = {nid: cbc for nid, cbc in nodes_with_cbc} guys_tuples = Reader.read_lines("datasets/enron/enron_guys.txt") guys = {int(nid): email for nid, email in guys_tuples} guys_by_email = {email: int(nid) for nid, email in guys_tuples} all_cdc = cbc_by_node.values() P.hist(all_cdc) P.title("Enron - Mediator score distribution in month {}".format(month)) P.xlabel("Mediator score") P.ylabel("Frequency") P.show() interesting_emails = [ "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**", "*****@*****.**" ] for email in interesting_emails: try: print "{}\t{:.4f}".format(email, cbc_by_node[guys_by_email[email]]) except KeyError: print "{}\tX".format(email) i = 0 print "month", month print "-----------------------------------" print "rank\tnode_id\temail\tMS" print "-----------------------------------" for n, cbc in nodes_with_cbc: i += 1 if guys[n] not in interesting_emails: print "{}\t{}\t{}\t{:.4f}".format(i, n, guys[n], cbc)