Esempio n. 1
0
 def testGetInflection03(self):
     self.assertEqual(lemminflect.getAllInflections('watch'),
         {'NNS': ('watches', 'watch'), 'NN': ('watch',), 'VBD': ('watched',),
         'VBG': ('watching',), 'VBZ': ('watches',), 'VB': ('watch',), 'VBP': ('watch',)})
     self.assertEqual(lemminflect.getAllInflections('watch', 'VERB'),
         {'VBD': ('watched',), 'VBG': ('watching',), 'VBZ': ('watches',),
          'VB': ('watch',), 'VBP': ('watch',)})
     self.assertEqual(lemminflect.getInflection('watch', 'VBD'), ('watched',))
     self.assertEqual(lemminflect.getAllInflections('watch', 'ADJ'), {})
Esempio n. 2
0
 def testGetInfections01(self):
     # Note this test may be a big problematic as the overrides file may change
     # VBN: awoken to awaked
     awake_dict = {'VBD': ('awoke',), 'VBN': ('awoken',), 'VBG': ('awaking',),
         'VBZ': ('awakes',), 'VB': ('awake',), 'VBP': ('awake',)}
     #awake_dict['VBN'] = ('awaked',)     # Applied in overrides but isn't preferred
     self.assertEqual(lemminflect.getAllInflections('awake', 'VERB'), awake_dict)
     self.assertEqual(lemminflect.getAllInflections('awoke', 'VERB'), {})
     with self.assertLogs():
         infls = lemminflect.getAllInflections('awake', 'X') # invalid upos
     self.assertEqual(infls, {})
    def _get_replacement_words(self, word, word_part_of_speech):
        # only nouns, verbs, and adjectives are considered for replacement
        if word_part_of_speech not in self._enptb_to_universal:
            return []

        # gets a dict that maps part-of-speech (POS) to available lemmas
        replacement_inflections_dict = lemminflect.getAllLemmas(word)

        # if dict is empty, there are no replacements for this word
        if not replacement_inflections_dict:
            return []

        # map the fine-grained POS to a universal POS
        lemminflect_pos = self._enptb_to_universal[word_part_of_speech]

        # choose lemma with same POS, if ones exists; otherwise, choose lemma randomly
        if lemminflect_pos in replacement_inflections_dict:
            lemma = replacement_inflections_dict[lemminflect_pos][0]
        else:
            lemma = random.choice(list(replacement_inflections_dict.values()))[0]

        # get the available inflections for chosen lemma
        inflections = lemminflect.getAllInflections(
            lemma, upos=lemminflect_pos
        ).values()

        # merge tuples, remove duplicates, remove copy of the original word
        replacement_words = list(set([infl for tup in inflections for infl in tup]))
        replacement_words = [r for r in replacement_words if r != word]

        return replacement_words
    def get_inflections(orig_tokenized, pos_tagged, constrain_pos):
        have_inflections = {'NOUN', 'VERB', 'ADJ'}
        token_inflections = [
        ]  # elements of form (i, inflections) where i is the token's position in the sequence

        for i, word in enumerate(orig_tokenized):
            lemmas = lemminflect.getAllLemmas(word)
            if lemmas and pos_tagged[i][1] in have_inflections:
                if pos_tagged[i][1] in lemmas:
                    lemma = lemmas[pos_tagged[i][1]][0]
                else:
                    lemma = random.choice(list(lemmas.values()))[0]

                if constrain_pos:
                    inflections = (
                        i,
                        list(
                            set([
                                infl for tup in lemminflect.getAllInflections(
                                    lemma, upos=pos_tagged[i][1]).values()
                                for infl in tup
                            ])))
                else:
                    inflections = (i,
                                   list(
                                       set([
                                           infl for tup in lemminflect.
                                           getAllInflections(lemma).values()
                                           for infl in tup
                                       ])))

                random.shuffle(inflections[1])
                token_inflections.append(inflections)
        return token_inflections
Esempio n. 5
0
def all_forms(word):
    wl = word.lower()
    all_forms = set()
    all_forms.add(wl)
    for list in getAllInflections(wl).values():
        all_forms.update(list)
    return all_forms
Esempio n. 6
0
 def testAuxModalInflections(self):
     # Modals auxilliary verbs
     infls = lemminflect.getAllInflections('can')
     self.assertTrue(infls.items() >= {
         'VB': ('can', ),
         'VBD': ('could', )
     }.items())
     infls = lemminflect.getAllInflections('may')
     self.assertTrue(infls.items() >= {
         'VB': ('may', ),
         'VBD': ('might', )
     }.items())
     infls = lemminflect.getAllInflections('will')
     self.assertTrue(infls.items() >= {
         'VB': ('will', ),
         'VBD': ('would', )
     }.items())
     infls = lemminflect.getAllInflections('shall')
     self.assertTrue(infls.items() >= {
         'VB': ('shall', ),
         'VBD': ('should', )
     }.items())
     infls = lemminflect.getAllInflections('must')
     self.assertTrue(infls.items() >= {
         'VB': ('must', ),
         'VBD': ('must', )
     }.items())
     infls = lemminflect.getAllInflections('ought')
     self.assertTrue(infls.items() >= {
         'VB': ('ought', ),
         'VBD': ('ought', )
     }.items())
     infls = lemminflect.getAllInflections('dare')
     self.assertTrue(infls.items() >= {'VB': ('dare', )}.items())
     # Auxilliary verbs
     infls = lemminflect.getAllInflections('be')
     self.assertTrue(infls.items() >= {'VB': ('be',), 'VBD': ('was', 'were'), \
         'VBG': ('being',), 'VBN': ('been',), 'VBP': ('am', 'are'), 'VBZ': ('is',)}.items())
     infls = lemminflect.getAllInflections('do')
     self.assertTrue(infls.items() >= {
         'VB': ('do', 'does'),
         'VBD': ('did', )
     }.items())
     infls = lemminflect.getAllInflections('have')
     self.assertTrue(infls.items() >= {'VB': ('have', 'has'), 'VBD': ('had',), \
         'VBG': ('having',)}.items())
Esempio n. 7
0
def all_inflect(w, word_len):
    out = set()
    for k, v in getAllInflections(w).items():
        if word_len is not None:
            out.update(filter(lambda x: len(x) == word_len, v))
        else:
            out.update(v)
    return out
Esempio n. 8
0
 def testGetInflection04(self):
     self.assertEqual(lemminflect.getAllInflections('watch', 'ADJ'), {})
     self.assertEqual(
         lemminflect.getInflection('watch', 'JJ', inflect_oov=False), ())
     self.assertEqual(
         lemminflect.getInflection('watch', 'JJ', inflect_oov=True),
         ('watch', ))
     self.assertEqual(lemminflect.getInflection('watch', 'VBD'),
                      ('watched', ))
Esempio n. 9
0
 def testUPOSLog(self):
     with self.assertLogs():
         infl = lemminflect.getInflection('WORD', 'X')
     self.assertEqual(infl, ())
     with self.assertLogs():
         infls = lemminflect.getAllInflections('WORD', 'X')
     self.assertEqual(infls, {})
     with self.assertLogs():
         infls = lemminflect.getAllInflectionsOOV('WORD', 'X')
     self.assertEqual(infls, {})
     token = self.nlp('testing')[0]
     self.assertEqual(token._.inflect('X'), 'testing')
Esempio n. 10
0
    def inflect_lemma(self, lemma, tag=None, pos=None):

        inflections = []
        # tag based
        if tag:
            inflection_tuple = lemminflect.getInflection(lemma, tag=tag)
            inflections = list(inflection_tuple)
        else:
            # pos based, can be None too
            inflection_dict = lemminflect.getAllInflections(lemma, upos=pos)
            for i in inflection_dict.values():
                inflections += list(i)

        return inflections
Esempio n. 11
0
def get_lemmas(word: str, pos: PartOfSpeech):
    word = word.lower()

    if (" " in word or "." in word):
        return JSONResponse (status_code = 200, content = {"message": "Input must contain only a single word without spaces or punctuation."})

    # Get the basic lemma version of the word first
    lemmas = getLemma(word, pos)
    if len(lemmas) > 0:
        lemma = getLemma(word, pos)[0]
    else:
        lemma = word

    inflections = merge_inflections(getAllInflections(lemma, upos=pos), getAllInflectionsOOV(lemma, upos=pos))
    
    return {"lemma": lemma, "inflections": inflections}
    
Esempio n. 12
0
 def candidate_edits(self, text: str) -> List[Edit]:
     tokenized = self._spacy.tokenizer(text)
     candidate_edits = []
     for token in tokenized:
         lemmas = {
             lemma
             for lemmas in lemminflect.getAllLemmas(token.text).values()
             for lemma in lemmas
         }
         inflections = {
             inflection
             for lemma in lemmas
             for inflections in lemminflect.getAllInflections(lemma).values()
             for inflection in inflections
         }
         substitutes = inflections - {token.text}
         current_candidate_edits = _edits(token.i, tokenized, substitutes)
         candidate_edits.extend(current_candidate_edits)
     return candidate_edits
Esempio n. 13
0
def random_inflect(source: str,
                   inflection_counts: Dict[str, int] = None) -> str:
    have_inflections = {'NOUN', 'VERB', 'ADJ'}
    tokenized = MosesTokenizer(lang='en').tokenize(
        source)  # Tokenize the sentence
    upper = False
    if tokenized[0][0].isupper():
        upper = True
        tokenized[0] = tokenized[0].lower()

    pos_tagged = nltk.pos_tag(tokenized,
                              tagset='universal')  # POS tag words in sentence

    for i, word in enumerate(tokenized):
        lemmas = lemminflect.getAllLemmas(word)
        # Only operate on content words (nouns/verbs/adjectives)
        if lemmas and pos_tagged[i][1] in have_inflections and pos_tagged[i][
                1] in lemmas:
            lemma = lemmas[pos_tagged[i][1]][0]
            inflections = (i, [(tag, infl)
                               for tag, tup in lemminflect.getAllInflections(
                                   lemma, upos=pos_tagged[i][1]).items()
                               for infl in tup])
            if inflections[1]:
                # Use inflection distribution for weighted random sampling if specified
                # Otherwise unweighted
                if inflection_counts:
                    counts = [
                        inflection_counts[tag] for tag, infl in inflections[1]
                    ]
                    inflection = random.choices(inflections[1],
                                                weights=counts)[0][1]
                else:
                    inflection = random.choices(inflections[1])[0][1]
                tokenized[i] = inflection
    if upper:
        tokenized[0] = tokenized[0].title()
    return MosesDetokenizer(lang='en').detokenize(tokenized)
def get_inflections(token):
    result = set()
    for key, value in lemminflect.getAllInflections(token).items():
        result.update(value)
    return result
Esempio n. 15
0
import lemminflect
import itertools

# The points we remove for scores of inflected forms
INFLECTED_PENALTY = 5

rw = dict()
with open('RankedWiktionary.txt', 'r') as fid:
    for line in fid:
        line = line.strip()
        word, score = line.split('@')
        score = int(score)
        rw[word] = score

# Go through inflected forms and add them
rw2 = dict()
for word, score in rw.items():
    infl = lemminflect.getAllInflections(word)
    for word1 in itertools.chain(*infl.values()):
        try:
            rw[word1]
        except:
            rw2[word1] = max(1, score - INFLECTED_PENALTY)

# Extend the dictionary
rw.update(rw2)

# Write the list
with open('RankedWiktionary2.txt', 'a') as fid:
    for word, score in rw.items():
        fid.write(f'{word}@{score}\n')
Esempio n. 16
0
def api_getAllInflections():
    content = request.json
    result = getAllInflections(content['lemma'], content['upos'])
    return jsonify(result)