def get_names(self, list_) -> list: names = [] utils = Utils() stringList = [] for splitchunk in list_: if not utils.isValidName(splitchunk.name): continue stringList.append(splitchunk.name) content = ' '.join(stringList) config = configparser.ConfigParser() config.read('settings.ini') opennlp_dir = config['options']['opennlp_dir'] models_dir = config['options']['models_dir'] language = 'en' tt = OpenNLPTagger(language=language, path_to_bin=os.path.join(opennlp_dir, 'bin'), path_to_model=os.path.join(models_dir, 'en-pos-maxent.bin')) phrase = str(content) sentence = tt.tag(phrase) return self.getValidNames(sentence)
def test_opennlp_tagger(): dirname, filename = os.path.split(os.path.abspath(__file__)) language = 'en' tt = OpenNLPTagger(language=language, path_to_bin=os.path.join(dirname, 'apache-opennlp', 'bin'), path_to_model=os.path.join(dirname, 'opennlp_models', 'en-pos-maxent.bin')) phrase = 'Pierre Vinken , 61 years old , will join the board as a nonexecutive director Nov. 29 .' en_tags = tt.tag(phrase) assert en_tags[0][0] == 'Pierre' assert en_tags[0][1] == 'NNP'
def test_opennlp_tagger(self): language = 'en' tt = OpenNLPTagger(language=language, path_to_bin=os.path.join(opennlp_dir, 'bin'), path_to_model=os.path.join(models_dir, 'en-pos-maxent.bin')) phrase = 'Pierre Vinken , 61 years old , will join the board as a nonexecutive director Nov. 29 .' en_tags = tt.tag(phrase) print(en_tags) assert en_tags[0][0] == 'Pierre' assert en_tags[0][1] == 'NNP'
def test_opennlp_chunker(self): language = 'en' tt = OpenNLPTagger(language=language, path_to_bin=os.path.join(opennlp_dir, 'bin'), path_to_model=os.path.join(models_dir, 'en-pos-maxent.bin')) phrase = 'Pierre Vinken , 61 years old , will join the board as a nonexecutive director Nov. 29 .' sentence = tt.tag(phrase) cp = OpenNLPChunker(path_to_bin=os.path.join(opennlp_dir, 'bin'), path_to_chunker=os.path.join( models_dir, 'en-chunker.bin')) print(cp.parse(sentence))
def opennlp_test(self, content): config = configparser.ConfigParser() config.read('settings.ini') opennlp_dir = config['options']['opennlp_dir'] models_dir = config['options']['models_dir'] language = 'en' tt = OpenNLPTagger(language=language, path_to_bin=os.path.join(opennlp_dir, 'bin'), path_to_model=os.path.join(models_dir, 'en-pos-maxent.bin')) phrase = str(content) sentence = tt.tag(phrase) return self.getValidNames(sentence)
def test_opennlp_ner_chunker_bracketed(self): language = 'en' tt = OpenNLPTagger(language=language, path_to_bin=os.path.join(opennlp_dir, 'bin'), path_to_model=os.path.join(models_dir, 'en-pos-maxent.bin')) phrase = 'Pierre Vinken , ( 61 years old ) , will join Martin Vinken as a nonexecutive director Nov. 29 .' sentence = tt.tag(phrase) cp = OpenNERChunker(path_to_bin=os.path.join(opennlp_dir, 'bin'), path_to_chunker=os.path.join( models_dir, '{}-chunker.bin'.format(language)), path_to_ner_model=os.path.join( models_dir, '{}-ner-person.bin'.format(language))) print(cp.parse(sentence))
def test_opennlp_chunker_de(self): language = 'de' tt = OpenNLPTagger(language=language, path_to_bin=os.path.join(opennlp_dir, 'bin'), path_to_model=os.path.join(models_dir, 'de-pos-maxent.bin')) phrase = 'Das Haus hat einen großen hübschen Garten.' sentence = tt.tag(phrase) print(sentence) # There should not be OpenNLP chunker for German language, thus OSError is thrown in Linux if os.name != 'nt': with self.assertRaises(OSError): cp = OpenNLPChunker( path_to_bin=os.path.join(opennlp_dir, 'bin'), path_to_chunker=os.path.join(models_dir, 'de-chunker.bin')) print(cp.parse(sentence))
def test_opennlp_tagger_list(self): language = 'en' tt = OpenNLPTagger(language=language, path_to_bin=os.path.join(opennlp_dir, 'bin'), path_to_model=os.path.join(models_dir, 'en-pos-maxent.bin')) phrase = [ 'Pierre', 'Vinken' ',' '61', 'years', 'old', ',', 'will', 'join', 'the', 'board', 'as', 'a', 'nonexecutive', 'director', 'Nov.', '29', '.' ] en_tags = tt.tag(phrase) print(en_tags) assert en_tags[0][0] == 'Pierre' assert en_tags[0][1] == 'NNP'
def get_maxent_postagger(self, language='en'): if not self.__has_key(self.__maxent_postagger, language): dirname, filename = os.path.split(os.path.abspath(__file__)) path_to_model = os.path.join(dirname, 'opennlp_models', '{}-pos-maxent.bin'.format(language)) if not (os.path.exists(path_to_model) and os.path.isfile(path_to_model)): return None self.__maxent_postagger[language] = OpenNLPTagger( path_to_bin=os.path.join(dirname, 'apache-opennlp', 'bin'), path_to_model=path_to_model) return self.__maxent_postagger[language]
def test_opennlp_ner_multichunker(self): language = 'en' tt = OpenNLPTagger(language=language, path_to_bin=os.path.join(opennlp_dir, 'bin'), path_to_model=os.path.join(models_dir, 'en-pos-maxent.bin')) phrase = 'John Haddock , 32 years old male , travelled to Cambridge , USA in October 20 while paying 6.50 dollars for the ticket' sentence = tt.tag(phrase) cp = OpenNERChunkerMulti( path_to_bin=os.path.join(opennlp_dir, 'bin'), path_to_chunker=os.path.join(models_dir, '{}-chunker.bin'.format(language)), ner_models=[ os.path.join(models_dir, '{}-ner-person.bin'.format(language)), os.path.join(models_dir, '{}-ner-date.bin'.format(language)), os.path.join(models_dir, '{}-ner-location.bin'.format(language)), os.path.join(models_dir, '{}-ner-time.bin'.format(language)), os.path.join(models_dir, '{}-ner-money.bin'.format(language)) ]) print(cp.parse(sentence))