def process(self, content, tag_count=7, doc_count=25): text = Text(self.extract(content)) graph = TextGraph(text) key_stems = [kw for score, kw in graph.rank_stems()] tags = [text[kw] for kw in key_stems[:tag_count]] vector = Vector.fromtokens(key_stems[:doc_count]) data = {"content": content, "text": text.text} return data, tags, vector
from graph import TextGraph from text import Text import sys, textwrap, urllib2 print >>sys.stderr, ">>> starting..." if "://" in sys.argv[1]: text = Text.from_url(sys.argv[1]) else: text = Text.from_file(sys.argv[1]) graph = TextGraph(text) summary = graph.summary() print "\n".join(textwrap.wrap(summary)) print graph = TextGraph(text) print "Tags:", ", ".join(graph.tags())