예제 #1
0
파일: base.py 프로젝트: schuyler/nlplib
 def process(self, content, tag_count=7, doc_count=25):
     text = Text(self.extract(content))
     graph = TextGraph(text)
     key_stems = [kw for score, kw in graph.rank_stems()]
     tags = [text[kw] for kw in key_stems[:tag_count]]
     vector = Vector.fromtokens(key_stems[:doc_count])
     data = {"content": content, "text": text.text}
     return data, tags, vector
예제 #2
0
파일: tags.py 프로젝트: schuyler/nlplib
from graph import TextGraph
from text import Text

import sys, textwrap, urllib2

print >>sys.stderr, ">>> starting..."

if "://" in sys.argv[1]:
    text = Text.from_url(sys.argv[1])
else:
    text = Text.from_file(sys.argv[1])
graph = TextGraph(text)
summary = graph.summary()
print "\n".join(textwrap.wrap(summary))
print
graph = TextGraph(text)
print "Tags:", ", ".join(graph.tags())