コード例 #1
0
 def testOverrides(self):
     # run the inflection system once to assure the overrides is loaded (ie.. lazy loading)
     lemminflect.getInflection('watch', 'VBD'), ('watched', )
     # Hack the code to replace the overrides dictionary
     orig_dict = lemminflect.Inflections().overrides_dict
     with self.assertLogs():
         lemmas = lemminflect.getLemma('WORD', 'X')
     self.assertEqual(lemmas, ())
     with self.assertLogs():
         lemmas = lemminflect.getAllLemmas('WORD', 'X')
     self.assertEqual(lemmas, {})
     with self.assertLogs():
         lemmas = lemminflect.getAllLemmasOOV('WORD', 'X')
     self.assertEqual(lemmas, {})
     token = self.nlp('I')[0]
     self.assertEqual(token._.lemma(), 'I')
     lemminflect.Inflections().overrides_dict = {
         'watch': {
             'VBD': ('xxx', )
         }
     }
     inflections = lemminflect.getInflection('watch',
                                             'VBD',
                                             inflect_oov=False)
     self.assertEqual(inflections, ('xxx', ))
     # put the original dictionary back
     lemminflect.Inflections().overrides_dict = orig_dict
コード例 #2
0
 def testProperNouns(self):
     infls = lemminflect.getInflection('Alaskan', 'NN', inflect_oov=False)
     self.assertEqual(len(infls), 0)
     infls = lemminflect.getInflection('Alaskan', 'NNP', inflect_oov=False)
     self.assertEqual(len(infls), 1)
     self.assertEqual(infls[0], 'Alaskan')
     infls = lemminflect.getInflection('Alaskan', 'NNPS', inflect_oov=False)
     self.assertEqual(len(infls), 1)
     self.assertEqual(infls[0], 'Alaskans')
     infls = lemminflect.getInflection('Axxlaskan', 'NNP', inflect_oov=True)
     self.assertEqual(len(infls), 1)
     self.assertEqual(infls[0], 'Axxlaskan')
     infls = lemminflect.getInflection('Axxlaskan',
                                       'NNPS',
                                       inflect_oov=True)
     self.assertEqual(len(infls), 1)
     self.assertEqual(infls[0], 'Axxlaskans')
     lemminflect.Inflections().setUseInternalLemmatizer(
         True)  # lemmatize with lemminflect
     token = self.nlp('The Alaskan went South.')[1]
     self.assertEqual(token._.inflect('NNPS', inflect_oov=False),
                      'Alaskans')
     token = self.nlp('The Axxlaskan went South.')[1]
     self.assertEqual(token._.inflect('NNPS', inflect_oov=True),
                      'Axxlaskans')
コード例 #3
0
    # Load the corpus to test with
    print('Loading corpus from ', corp_fn)
    sents = loadFile(corp_fn, max_sents)
    print('Loaded {:,} test sentences'.format(len(sents)))
    print()

    # Create an empty overrides file before calling lemminflect because it loads this file on
    # first use.  This will mess-up the overrides creation process since overrides will be used.
    # Fix this issue by creating an empty file
    open(config.infl_overrides_fn, 'w').close()

    # Loop through the sentences and count the instances of (lemma, tag, corpus_word)
    # corpus_word is considered the "correct" inflection for the lemma/tag
    print('Processing sentences.  Use internal lemmatizer = ', \
        lemminflect.Inflections().isUsingInternalLemmatizer())
    infl_ctr = Counter()
    pb = ProgressBar(len(sents))
    for i, sent in enumerate(sents):
        doc = nlp(sent)
        for word in doc:
            # Filter out numbers, foreign characters, etc..
            if not isASCIIWord(word.text) or not word.tag_:
                continue
            # Skip aux and modal aux verbs since they're oddballs anyway
            if word.lemma_.lower() in ['be', 'have', 'do', 'will', 'can', 'may', 'shall', 'will', \
                'ought', 'dare']:
                continue
            # Only inflect regular nouns, verbs, adverbs and adjectives
            # Don't check inflections of particles or proper nouns
            ptype = word.tag_[0]
コード例 #4
0
 def testSpacyInflect02(self):
     lemminflect.Inflections().setUseInternalLemmatizer(
         False)  # lemmatize with spaCy
     self.testSpacyInflect01()
コード例 #5
0
 def __init__(self, *args, **kwargs):
     super(InflectionTests, self).__init__(*args, **kwargs)
     self.nlp = SPACY_NLP
     lemminflect.Inflections().setUseInternalLemmatizer(
         True)  # lemmatize with lemminflect