Example #1
0
def main():
  """Parse the graph file, and run both PageRank and HITS on it."""

  print "Parsing text file."
  (number_emails, graph) = graphs.process_file("data/graph.txt")

  _dump_stats(number_emails, graph)

  _run_pagerank(graph)

  _run_hits(graph)

  print "Finished!"
Example #2
0
def main():
    """Parse the graph file, and run both PageRank and HITS on it."""

    print "Parsing text file."
    (number_emails, graph) = graphs.process_file("data/graph.txt")

    _dump_stats(number_emails, graph)

    _run_pagerank(graph)

    _run_hits(graph)

    print "Finished!"
Example #3
0
def main():
    """Creates a graph.dot file with interesting information."""

    print "Parsing wikipedia.txt"
    wiki_words = get_wikipedia_words("data/wikipedia.txt")

    # Parse the aliases.txt file.
    print "Parsing aliases.txt"
    (aliases, inverse_aliases) = get_aliases("data/aliases.txt")

    # Parse the roles.txt file.
    print "Parsing roles.txt"
    employees_info = info.get_employees_map("data/roles.txt", inverse_aliases)

    # Parse the graph.txt file to get the email graph.
    print "Parsing graph.txt"
    (_, email_graph) = graphs.process_file("data/graph.txt", inverse_aliases)
    interesting_nodes = get_interesting_nodes(email_graph, aliases)

    relations = get_relations(interesting_nodes, employees_info, wiki_words)

    # Write the resultant graph.
    print "Writing results."
    with open("graph.dot", "w") as f:
        f.write('digraph G {\n')
        print "%s relations" % len(relations)
        for (i, relation) in enumerate(sorted(relations)):
            print "%s" % i
            a = relation.from_info
            b = relation.to_info

            # Choose the most common word in the email subjects.
            c = counter.Counter()
            for email in relation.emails.emails:
                if email.subject is not None:
                    c.update(email.subject)
            if len(c) > 0:
                best_word = c.most_common(1)[0][0]
            else:
                best_word = ""

            f.write('"%s" -> "%s" [label = "%s"];\n' %
                    (a.description(), b.description(), best_word))
        f.write('}\n')
def main():
  """Creates a graph.dot file with interesting information."""

  print "Parsing wikipedia.txt"
  wiki_words = get_wikipedia_words("data/wikipedia.txt")

  # Parse the aliases.txt file.
  print "Parsing aliases.txt"
  (aliases, inverse_aliases) = get_aliases("data/aliases.txt")

  # Parse the roles.txt file.
  print "Parsing roles.txt"
  employees_info = info.get_employees_map("data/roles.txt", inverse_aliases)

  # Parse the graph.txt file to get the email graph.
  print "Parsing graph.txt"
  (_, email_graph) = graphs.process_file("data/graph.txt", inverse_aliases)
  interesting_nodes = get_interesting_nodes(email_graph, aliases)

  relations = get_relations(interesting_nodes, employees_info, wiki_words)

  # Write the resultant graph.
  print "Writing results."
  with open("graph.dot", "w") as f:
    f.write('digraph G {\n')
    print "%s relations" % len(relations)
    for (i, relation) in enumerate(sorted(relations)):
      print "%s" % i
      a = relation.from_info
      b = relation.to_info

      # Choose the most common word in the email subjects.
      c = counter.Counter()
      for email in relation.emails.emails:
        if email.subject is not None:
          c.update(email.subject)
      if len(c) > 0:
        best_word = c.most_common(1)[0][0]
      else:
        best_word = ""

      f.write('"%s" -> "%s" [label = "%s"];\n' %
          (a.description(), b.description(), best_word))
    f.write('}\n')