def get_names(self, list_) -> list:
        names = []

        utils = Utils()
        stringList = []
        for splitchunk in list_:
            if not utils.isValidName(splitchunk.name):
                continue
            stringList.append(splitchunk.name)

        content = ' '.join(stringList)

        config = configparser.ConfigParser()
        config.read('settings.ini')

        opennlp_dir = config['options']['opennlp_dir']
        models_dir = config['options']['models_dir']

        language = 'en'
        tt = OpenNLPTagger(language=language,
                           path_to_bin=os.path.join(opennlp_dir, 'bin'),
                           path_to_model=os.path.join(models_dir,
                                                      'en-pos-maxent.bin'))
        phrase = str(content)
        sentence = tt.tag(phrase)
        return self.getValidNames(sentence)
Example #2
0
def test_opennlp_tagger():
    dirname, filename = os.path.split(os.path.abspath(__file__))
    language = 'en'
    tt = OpenNLPTagger(language=language,
                       path_to_bin=os.path.join(dirname, 'apache-opennlp', 'bin'),
                       path_to_model=os.path.join(dirname, 'opennlp_models', 'en-pos-maxent.bin'))
    phrase = 'Pierre Vinken , 61 years old , will join the board as a nonexecutive director Nov. 29 .'
    en_tags = tt.tag(phrase)
    assert en_tags[0][0] == 'Pierre'
    assert en_tags[0][1] == 'NNP'
Example #3
0
 def test_opennlp_tagger(self):
     language = 'en'
     tt = OpenNLPTagger(language=language,
                        path_to_bin=os.path.join(opennlp_dir, 'bin'),
                        path_to_model=os.path.join(models_dir,
                                                   'en-pos-maxent.bin'))
     phrase = 'Pierre Vinken , 61 years old , will join the board as a nonexecutive director Nov. 29 .'
     en_tags = tt.tag(phrase)
     print(en_tags)
     assert en_tags[0][0] == 'Pierre'
     assert en_tags[0][1] == 'NNP'
Example #4
0
 def test_opennlp_chunker(self):
     language = 'en'
     tt = OpenNLPTagger(language=language,
                        path_to_bin=os.path.join(opennlp_dir, 'bin'),
                        path_to_model=os.path.join(models_dir,
                                                   'en-pos-maxent.bin'))
     phrase = 'Pierre Vinken , 61 years old , will join the board as a nonexecutive director Nov. 29 .'
     sentence = tt.tag(phrase)
     cp = OpenNLPChunker(path_to_bin=os.path.join(opennlp_dir, 'bin'),
                         path_to_chunker=os.path.join(
                             models_dir, 'en-chunker.bin'))
     print(cp.parse(sentence))
Example #5
0
    def opennlp_test(self, content):
        config = configparser.ConfigParser()
        config.read('settings.ini')

        opennlp_dir = config['options']['opennlp_dir']
        models_dir = config['options']['models_dir']

        language = 'en'
        tt = OpenNLPTagger(language=language,
                           path_to_bin=os.path.join(opennlp_dir, 'bin'),
                           path_to_model=os.path.join(models_dir,
                                                      'en-pos-maxent.bin'))
        phrase = str(content)
        sentence = tt.tag(phrase)
        return self.getValidNames(sentence)
Example #6
0
 def test_opennlp_ner_chunker_bracketed(self):
     language = 'en'
     tt = OpenNLPTagger(language=language,
                        path_to_bin=os.path.join(opennlp_dir, 'bin'),
                        path_to_model=os.path.join(models_dir,
                                                   'en-pos-maxent.bin'))
     phrase = 'Pierre Vinken , ( 61 years old ) , will join Martin Vinken as a nonexecutive director Nov. 29 .'
     sentence = tt.tag(phrase)
     cp = OpenNERChunker(path_to_bin=os.path.join(opennlp_dir, 'bin'),
                         path_to_chunker=os.path.join(
                             models_dir, '{}-chunker.bin'.format(language)),
                         path_to_ner_model=os.path.join(
                             models_dir,
                             '{}-ner-person.bin'.format(language)))
     print(cp.parse(sentence))
Example #7
0
 def test_opennlp_chunker_de(self):
     language = 'de'
     tt = OpenNLPTagger(language=language,
                        path_to_bin=os.path.join(opennlp_dir, 'bin'),
                        path_to_model=os.path.join(models_dir,
                                                   'de-pos-maxent.bin'))
     phrase = 'Das Haus hat einen großen hübschen Garten.'
     sentence = tt.tag(phrase)
     print(sentence)
     # There should not be OpenNLP chunker for German language, thus OSError is thrown in Linux
     if os.name != 'nt':
         with self.assertRaises(OSError):
             cp = OpenNLPChunker(
                 path_to_bin=os.path.join(opennlp_dir, 'bin'),
                 path_to_chunker=os.path.join(models_dir, 'de-chunker.bin'))
             print(cp.parse(sentence))
Example #8
0
 def test_opennlp_tagger_list(self):
     language = 'en'
     tt = OpenNLPTagger(language=language,
                        path_to_bin=os.path.join(opennlp_dir, 'bin'),
                        path_to_model=os.path.join(models_dir,
                                                   'en-pos-maxent.bin'))
     phrase = [
         'Pierre', 'Vinken'
         ','
         '61', 'years', 'old', ',', 'will', 'join', 'the', 'board', 'as',
         'a', 'nonexecutive', 'director', 'Nov.', '29', '.'
     ]
     en_tags = tt.tag(phrase)
     print(en_tags)
     assert en_tags[0][0] == 'Pierre'
     assert en_tags[0][1] == 'NNP'
Example #9
0
 def get_maxent_postagger(self, language='en'):
     if not self.__has_key(self.__maxent_postagger, language):
         dirname, filename = os.path.split(os.path.abspath(__file__))
         path_to_model = os.path.join(dirname, 'opennlp_models',
                                      '{}-pos-maxent.bin'.format(language))
         if not (os.path.exists(path_to_model)
                 and os.path.isfile(path_to_model)):
             return None
         self.__maxent_postagger[language] = OpenNLPTagger(
             path_to_bin=os.path.join(dirname, 'apache-opennlp', 'bin'),
             path_to_model=path_to_model)
     return self.__maxent_postagger[language]
Example #10
0
 def test_opennlp_ner_multichunker(self):
     language = 'en'
     tt = OpenNLPTagger(language=language,
                        path_to_bin=os.path.join(opennlp_dir, 'bin'),
                        path_to_model=os.path.join(models_dir,
                                                   'en-pos-maxent.bin'))
     phrase = 'John Haddock , 32 years old male , travelled to Cambridge , USA in October 20 while paying 6.50 dollars for the ticket'
     sentence = tt.tag(phrase)
     cp = OpenNERChunkerMulti(
         path_to_bin=os.path.join(opennlp_dir, 'bin'),
         path_to_chunker=os.path.join(models_dir,
                                      '{}-chunker.bin'.format(language)),
         ner_models=[
             os.path.join(models_dir, '{}-ner-person.bin'.format(language)),
             os.path.join(models_dir, '{}-ner-date.bin'.format(language)),
             os.path.join(models_dir,
                          '{}-ner-location.bin'.format(language)),
             os.path.join(models_dir, '{}-ner-time.bin'.format(language)),
             os.path.join(models_dir, '{}-ner-money.bin'.format(language))
         ])
     print(cp.parse(sentence))