def main(): """Parse the graph file, and run both PageRank and HITS on it.""" print "Parsing text file." (number_emails, graph) = graphs.process_file("data/graph.txt") _dump_stats(number_emails, graph) _run_pagerank(graph) _run_hits(graph) print "Finished!"
def main(): """Creates a graph.dot file with interesting information.""" print "Parsing wikipedia.txt" wiki_words = get_wikipedia_words("data/wikipedia.txt") # Parse the aliases.txt file. print "Parsing aliases.txt" (aliases, inverse_aliases) = get_aliases("data/aliases.txt") # Parse the roles.txt file. print "Parsing roles.txt" employees_info = info.get_employees_map("data/roles.txt", inverse_aliases) # Parse the graph.txt file to get the email graph. print "Parsing graph.txt" (_, email_graph) = graphs.process_file("data/graph.txt", inverse_aliases) interesting_nodes = get_interesting_nodes(email_graph, aliases) relations = get_relations(interesting_nodes, employees_info, wiki_words) # Write the resultant graph. print "Writing results." with open("graph.dot", "w") as f: f.write('digraph G {\n') print "%s relations" % len(relations) for (i, relation) in enumerate(sorted(relations)): print "%s" % i a = relation.from_info b = relation.to_info # Choose the most common word in the email subjects. c = counter.Counter() for email in relation.emails.emails: if email.subject is not None: c.update(email.subject) if len(c) > 0: best_word = c.most_common(1)[0][0] else: best_word = "" f.write('"%s" -> "%s" [label = "%s"];\n' % (a.description(), b.description(), best_word)) f.write('}\n')