Beispiel #1
0
    def get(self):
        # tokenList = word_tokenize("John's big idea isn't all that bad.")
        # tokenList = pos_tag(word_tokenize("John's big idea isn't all that bad.")) 

        stemmer = PorterStemmer()
        plurals = ['caresses', 'flies', 'dies', 'mules', 'denied',
                   'died', 'agreed', 'owned', 'humbled', 'sized',
                   'meeting', 'stating', 'siezing', 'itemization',
                   'sensational', 'traditional', 'reference', 'colonizer',
                   'plotted']
        singles = []
        for plural in plurals:
            singles.append(stemmer.stem(plural))


        self.response.headers['Content-Type'] = 'text/html'
        self.response.out.write('Hello test!')
        self.response.out.write(singles)


        nlProcessor = NLPlib()

        s = "Very little is known about Beethoven's childhood. He was baptized  on December 17, 1770 and was probably born a few days before that. [1][4][5][6]  Beethoven's parents were Johann van Beethoven (1740 in Bonn - December 18, 1792) and Maria Magdalena Keverich (1744 in Ehrenbreitstein - July 17, 1787)."

        v = nlProcessor.tokenize(s)
        t = nlProcessor.tag(v)
        for i in range(len(v)):
            self.response.out.write(v[i] + "(" + t[i] + ")<br/>")
def test():
    tweets = get_file_data(sys.argv[1])
    posts = []
    sys.path.append("/home/nsatvik/twitminer/miner")
    print "1-Sports 2-Politics"
    tagger = NLPlib()
    for t in tweets:
        posts.append(tweet(t, 1))
        print posts[-1].get_text()
        a = input("1 to display tags")
        if a == 1:
            words = tagger.tokenize(posts[-1].get_text())
            tags = tagger.tag(words)
            for i in range(len(words)):
                print words[i], " ", tags[i]

        else:
            continue
Beispiel #3
0
    def post(self):

        nlProcessor = NLPlib()
        content = self.request.get('content')
        tokens = nlProcessor.tokenize(content) 
        taggedContent = nlProcessor.tag(tokens)

        content = taggedContent

        for i in range(len(taggedContent)):
            isVerb = (taggedContent[i] == "VBD" or taggedContent[i] == "VBZ") 
            if isVerb:
                correctVerb = tokens[i]
                tokens[i] = "<select id=\"clozefox_answer\">"
                tokens[i] += "<option value=\"wrongAnswer\">loves</option>" 
                tokens[i] += "<option value=\"wrongAnswer\">hates</option>" 
                tokens[i] += "<option  value=\"trueAnswer\">" + correctVerb + "</option>"
                tokens[i] += "</select>"
        
        content = ' '.join(tokens)

        self.response.headers['Content-Type'] = 'text/html'
        self.response.out.write(content)