def _getStemmerObject(self, language="pt-br", approach="orengo"): if (approach != self.stemmerType): self.stemmerType = approach if approach == "orengo": self.stemmer = OrengoStemmer() if approach == "porter": self.stemmer = PorterStemmer() if approach == "savoy": self.stemmer = SavoyStemmer() return self.stemmer
# -*- coding: LATIN-1 -*- ''' * PTStemmer - A Stemming toolkit for the Portuguese language (C) 2008-2010 Pedro Oliveira * * This file is part of PTStemmer. * PTStemmer is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * PTStemmer is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with PTStemmer. If not, see <http://www.gnu.org/licenses/>. ''' from ptstemmer.implementations.OrengoStemmer import OrengoStemmer from ptstemmer.implementations.SavoyStemmer import SavoyStemmer from ptstemmer.implementations.PorterStemmer import PorterStemmer from ptstemmer.support import PTStemmerUtilities if __name__ == '__main__': s = OrengoStemmer() #or PorterStemmer or SavoyStemmer s.enableCaching(1000) s.ignore(PTStemmerUtilities.fileToSet("")) stem = s.getWordStem("ciências") print(PTStemmerUtilities.removeDiacritics(stem)) print(s.getWordStem("extremamente"))
import os import json from flask import Flask, jsonify, request from ptstemmer.implementations.OrengoStemmer import OrengoStemmer from ptstemmer.implementations.SavoyStemmer import SavoyStemmer from ptstemmer.implementations.PorterStemmer import PorterStemmer app = Flask(__name__) stemmer = OrengoStemmer() stemmer.enableCaching(1000) #Optional @app.route('/') def main(): return ''' <html> <head><title>Stemming Words</title></head> <body> <p> <h3>Saiba mais...</h3> <ul> <li><b>Para testar acesse a rota:</b> /steam?word=digite_a_palavra_desejada</li> <li> <a href='https://github.com/ednilsonmcs/apistemmer'>Repo no Git</a> </li> <li> <a href='https://www.linkedin.com/in/ednilsonmcs/'>Meu linkedin</a> </li> </ul> </p> </body>
def __init__(self): self.stopWords = [] self.stopWordsOnlyASCIICharacteres = False self.stemmer = OrengoStemmer() self.stemmerType = "orengo"