def get(self): # tokenList = word_tokenize("John's big idea isn't all that bad.") # tokenList = pos_tag(word_tokenize("John's big idea isn't all that bad.")) stemmer = PorterStemmer() plurals = ['caresses', 'flies', 'dies', 'mules', 'denied', 'died', 'agreed', 'owned', 'humbled', 'sized', 'meeting', 'stating', 'siezing', 'itemization', 'sensational', 'traditional', 'reference', 'colonizer', 'plotted'] singles = [] for plural in plurals: singles.append(stemmer.stem(plural)) self.response.headers['Content-Type'] = 'text/html' self.response.out.write('Hello test!') self.response.out.write(singles) nlProcessor = NLPlib() s = "Very little is known about Beethoven's childhood. He was baptized on December 17, 1770 and was probably born a few days before that. [1][4][5][6] Beethoven's parents were Johann van Beethoven (1740 in Bonn - December 18, 1792) and Maria Magdalena Keverich (1744 in Ehrenbreitstein - July 17, 1787)." v = nlProcessor.tokenize(s) t = nlProcessor.tag(v) for i in range(len(v)): self.response.out.write(v[i] + "(" + t[i] + ")<br/>")
def test(): tweets = get_file_data(sys.argv[1]) posts = [] sys.path.append("/home/nsatvik/twitminer/miner") print "1-Sports 2-Politics" tagger = NLPlib() for t in tweets: posts.append(tweet(t, 1)) print posts[-1].get_text() a = input("1 to display tags") if a == 1: words = tagger.tokenize(posts[-1].get_text()) tags = tagger.tag(words) for i in range(len(words)): print words[i], " ", tags[i] else: continue
def post(self): nlProcessor = NLPlib() content = self.request.get('content') tokens = nlProcessor.tokenize(content) taggedContent = nlProcessor.tag(tokens) content = taggedContent for i in range(len(taggedContent)): isVerb = (taggedContent[i] == "VBD" or taggedContent[i] == "VBZ") if isVerb: correctVerb = tokens[i] tokens[i] = "<select id=\"clozefox_answer\">" tokens[i] += "<option value=\"wrongAnswer\">loves</option>" tokens[i] += "<option value=\"wrongAnswer\">hates</option>" tokens[i] += "<option value=\"trueAnswer\">" + correctVerb + "</option>" tokens[i] += "</select>" content = ' '.join(tokens) self.response.headers['Content-Type'] = 'text/html' self.response.out.write(content)