def word_tokenize(data): if data == None: return ret_failure(701) try: return ret_success(nltk.word_tokenize(data)) except: return ret_failure(701)
def stemmer(method,data): """ Takes an array of words in JSON format. """ data = parse_input(data) if data == False: return ret_failure(703) else: res=[] if method == "lancaster": for word in data: try: res.append([word,LancasterSt.stem(word)]) except: return ret_failure(702) elif method == "porter": for word in data: try: res.append([word,PorterSt.stem(word)]) except: return ret_failure(702) elif method == 'snowball': for word in data: try: res.append([word,SnowballSt.stem(word)]) except: return ret_failure(702) else: abort(404) return ret_success(res)
def lemmatize(method, data): """ Takes an array of words or array of tupples containing words and pos tags. Both Penn and Wordnet tags are supported """ data = parse_input(data) if data == False: return ret_failure(703) else: res = [] if method == "wordnet": for word in data: try: if type(word) is list: res.append([ word[0], WordnetLm.lemmatize(word[0], penn_to_wn(word[1])) ]) else: res.append([word, WordnetLm.lemmatize(word)]) except LookupError: return ret_failure(704) except: return ret_failure(702) else: abort(404) return ret_success(res)
def stemmer(method, data): """ Takes an array of words in JSON format. """ data = parse_input(data) if data == False: return ret_failure(703) else: res = [] if method == "lancaster": for word in data: try: res.append([word, LancasterSt.stem(word)]) except: return ret_failure(702) elif method == "porter": for word in data: try: res.append([word, PorterSt.stem(word)]) except: return ret_failure(702) elif method == 'snowball': for word in data: try: res.append([word, SnowballSt.stem(word)]) except: return ret_failure(702) else: abort(404) return ret_success(res)
def sent_tokenize(data): if data == None: return ret_failure(701) try: res = nltk.sent_tokenize(data) return ret_success(res) except: return ret_failure(702)
def sent_tokenize(data): if data == None: return ret_failure(701) try: tok = nltk.data.load('tokenizers/punkt/english.pickle') #return tok.tokenize(data) #nltk.sent_tokenize(data) return ret_success(tok.tokenize(data)) except: return ret_failure(702)
def pos_tag(data): data = parse_input(data) if data == False: return ret_failure(703) else: try: res = nltk.pos_tag(data) return ret_success(res) except LookupError: return ret_failure(704) except: return ret_failure(702)
def tagger(data): try: st=NERTagger('./nltk-data/StanfordNER/english.all.3class.distsim.crf.ser.gz','./nltk-data/StanfordNER/stanford-ner.jar') except: return ret_failure(705) #try: tag = st.tag(data.split()) #except: # return ret_failure(702) return ret_success(tag)
def tagger(data): try: st = NERTagger( './nltk-data/StanfordNER/english.all.3class.distsim.crf.ser.gz', './nltk-data/StanfordNER/stanford-ner.jar') except: return ret_failure(705) #try: tag = st.tag(data.split()) #except: # return ret_failure(702) return ret_success(tag)
def lemmatize(method,data): """ Takes an array of words or array of tupples containing words and pos tags. Both Penn and Wordnet tags are supported """ data = parse_input(data) if data == False: return ret_failure(703) else: res=[] if method == "wordnet": for word in data: try: if type(word) is list: res.append([word[0],WordnetLm.lemmatize(word[0],penn_to_wn(word[1]))]) else: res.append([word,WordnetLm.lemmatize(word)]) except LookupError: return ret_failure(704) except: return ret_failure(702) else: abort(404) return ret_success(res)