Ejemplo n.º 1
0
 def test_parse_lemma(self):
     # Assert the accuracy of the verb lemmatization algorithm.
     # Note: the accuracy is higher (88%) when measured on CELEX word forms
     # (presumably because de.inflect.VERBS has high percentage irregular verbs).
     i, n = 0, 0
     for v in de.inflect.VERBS.infinitives:
         for tense in de.inflect.VERBS.TENSES:
             if de.conjugate(v, tense, parse=False) is None:
                 continue
             if de.inflect._parse_lemma(de.conjugate(v, tense)) == v: 
                 i += 1
             n += 1
     self.assertTrue(float(i) / n > 0.85)
     print "pattern.de.inflect._parse_lemma()"
Ejemplo n.º 2
0
 def test_conjugate(self):
     # Assert different tenses with different conjugations.
     for (v1, v2, tense) in (
       ("sein",  "sein",     de.INFINITIVE),
       ("sein",  "bin",     (de.PRESENT, 1, de.SINGULAR)),
       ("sein",  "bist",    (de.PRESENT, 2, de.SINGULAR)),
       ("sein",  "ist",     (de.PRESENT, 3, de.SINGULAR)),
       ("sein",  "sind",    (de.PRESENT, 1, de.PLURAL)),
       ("sein",  "seid",    (de.PRESENT, 2, de.PLURAL)),
       ("sein",  "sind",    (de.PRESENT, 3, de.PLURAL)),
       ("sein",  "seiend",  (de.PRESENT + de.PARTICIPLE)),
       ("sein",  "war",     (de.PAST, 1, de.SINGULAR)),
       ("sein",  "warst",   (de.PAST, 2, de.SINGULAR)),
       ("sein",  "war",     (de.PAST, 3, de.SINGULAR)),
       ("sein",  "waren",   (de.PAST, 1, de.PLURAL)),
       ("sein",  "wart",    (de.PAST, 2, de.PLURAL)),
       ("sein",  "waren",   (de.PAST, 3, de.PLURAL)),
       ("sein",  "gewesen", (de.PAST + de.PARTICIPLE)),
       ("sein",  "sei",     (de.PRESENT, 2, de.SINGULAR, de.IMPERATIVE)),
       ("sein",  "seien",   (de.PRESENT, 1, de.PLURAL, de.IMPERATIVE)),
       ("sein",  "seid",    (de.PRESENT, 2, de.PLURAL, de.IMPERATIVE)),
       ("sein", u"sei",     (de.PRESENT, 1, de.SINGULAR, de.SUBJUNCTIVE)),
       ("sein", u"seiest",  (de.PRESENT, 2, de.SINGULAR, de.SUBJUNCTIVE)),
       ("sein", u"sei",     (de.PRESENT, 3, de.SINGULAR, de.SUBJUNCTIVE)),
       ("sein", u"seien",   (de.PRESENT, 1, de.PLURAL, de.SUBJUNCTIVE)),
       ("sein", u"seiet",   (de.PRESENT, 2, de.PLURAL, de.SUBJUNCTIVE)),
       ("sein", u"seien",   (de.PRESENT, 3, de.PLURAL, de.SUBJUNCTIVE)),
       ("sein", u"wäre",    (de.PAST, 1, de.SINGULAR, de.SUBJUNCTIVE)),
       ("sein", u"wärest",  (de.PAST, 2, de.SINGULAR, de.SUBJUNCTIVE)),
       ("sein", u"wäre",    (de.PAST, 3, de.SINGULAR, de.SUBJUNCTIVE)),
       ("sein", u"wären",   (de.PAST, 1, de.PLURAL, de.SUBJUNCTIVE)),
       ("sein", u"wäret",   (de.PAST, 2, de.PLURAL, de.SUBJUNCTIVE)),
       ("sein", u"wären",   (de.PAST, 3, de.PLURAL, de.SUBJUNCTIVE))):
         self.assertEqual(de.conjugate(v1, tense), v2)
     print("pattern.de.conjugate()")
Ejemplo n.º 3
0
 def test_conjugate(self):
     # Assert different tenses with different conjugations.
     for (v1, v2, tense) in (
       ("sein",  "sein",     de.INFINITIVE),
       ("sein",  "bin",     (de.PRESENT, 1, de.SINGULAR)),
       ("sein",  "bist",    (de.PRESENT, 2, de.SINGULAR)),
       ("sein",  "ist",     (de.PRESENT, 3, de.SINGULAR)),
       ("sein",  "sind",    (de.PRESENT, 1, de.PLURAL)),
       ("sein",  "seid",    (de.PRESENT, 2, de.PLURAL)),
       ("sein",  "sind",    (de.PRESENT, 3, de.PLURAL)),
       ("sein",  "seiend",  (de.PRESENT + de.PARTICIPLE)),
       ("sein",  "war",     (de.PAST, 1, de.SINGULAR)),
       ("sein",  "warst",   (de.PAST, 2, de.SINGULAR)),
       ("sein",  "war",     (de.PAST, 3, de.SINGULAR)),
       ("sein",  "waren",   (de.PAST, 1, de.PLURAL)),
       ("sein",  "wart",    (de.PAST, 2, de.PLURAL)),
       ("sein",  "waren",   (de.PAST, 3, de.PLURAL)),
       ("sein",  "gewesen", (de.PAST + de.PARTICIPLE)),
       ("sein",  "sei",     (de.PRESENT, 2, de.SINGULAR, de.IMPERATIVE)),
       ("sein",  "seien",   (de.PRESENT, 1, de.PLURAL, de.IMPERATIVE)),
       ("sein",  "seid",    (de.PRESENT, 2, de.PLURAL, de.IMPERATIVE)),
       ("sein", u"sei",     (de.PRESENT, 1, de.SINGULAR, de.SUBJUNCTIVE)),
       ("sein", u"seiest",  (de.PRESENT, 2, de.SINGULAR, de.SUBJUNCTIVE)),
       ("sein", u"sei",     (de.PRESENT, 3, de.SINGULAR, de.SUBJUNCTIVE)),
       ("sein", u"seien",   (de.PRESENT, 1, de.PLURAL, de.SUBJUNCTIVE)),
       ("sein", u"seiet",   (de.PRESENT, 2, de.PLURAL, de.SUBJUNCTIVE)),
       ("sein", u"seien",   (de.PRESENT, 3, de.PLURAL, de.SUBJUNCTIVE)),
       ("sein", u"wäre",    (de.PAST, 1, de.SINGULAR, de.SUBJUNCTIVE)),
       ("sein", u"wärest",  (de.PAST, 2, de.SINGULAR, de.SUBJUNCTIVE)),
       ("sein", u"wäre",    (de.PAST, 3, de.SINGULAR, de.SUBJUNCTIVE)),
       ("sein", u"wären",   (de.PAST, 1, de.PLURAL, de.SUBJUNCTIVE)),
       ("sein", u"wäret",   (de.PAST, 2, de.PLURAL, de.SUBJUNCTIVE)),
       ("sein", u"wären",   (de.PAST, 3, de.PLURAL, de.SUBJUNCTIVE))):
         self.assertEqual(de.conjugate(v1, tense), v2)
     print "pattern.de.conjugate()"
Ejemplo n.º 4
0
def lemma_via_patternlib(token, pos):
    if pos == 'NP':  # singularize noun
        return singularize(token)
    elif pos.startswith('V'):  # get infinitive of verb
        return conjugate(token)
    elif pos.startswith('ADJ') or pos.startswith('ADV'):  # get baseform of adjective or adverb
        return predicative(token)

    return token
Ejemplo n.º 5
0
 def test_conjugate(self):
     # Assert different tenses with different conjugations.
     for (v1, v2, tense) in (
       ("sein",  "sein",     de.INFINITIVE),
       ("sein",  "bin",      de.PRESENT_1ST_PERSON_SINGULAR),
       ("sein",  "bist",     de.PRESENT_2ND_PERSON_SINGULAR),
       ("sein",  "ist",      de.PRESENT_3RD_PERSON_SINGULAR),
       ("sein",  "sind",     de.PRESENT_1ST_PERSON_PLURAL),
       ("sein",  "seid",     de.PRESENT_2ND_PERSON_PLURAL),
       ("sein",  "sind",     de.PRESENT_3RD_PERSON_PLURAL),
       ("sein",  "seiend",   de.PRESENT_PARTICIPLE),
       ("sein",  "war",      de.PAST_1ST_PERSON_SINGULAR),
       ("sein",  "warst",    de.PAST_2ND_PERSON_SINGULAR),
       ("sein",  "war",      de.PAST_3RD_PERSON_SINGULAR),
       ("sein",  "waren",    de.PAST_1ST_PERSON_PLURAL),
       ("sein",  "wart",     de.PAST_2ND_PERSON_PLURAL),
       ("sein",  "waren",    de.PAST_3RD_PERSON_PLURAL),
       ("sein",  "gewesen",  de.PAST_PARTICIPLE),
       ("sein",  "sei",      de.IMPERATIVE_2ND_PERSON_SINGULAR),
       ("sein",  "seien",    de.IMPERATIVE_1ST_PERSON_PLURAL),
       ("sein",  "seid",     de.IMPERATIVE_2ND_PERSON_PLURAL),
       ("sein",  "seien",    de.IMPERATIVE_3RD_PERSON_PLURAL),
       ("sein", u"sei",      de.PRESENT_SUBJUNCTIVE_1ST_PERSON_SINGULAR),
       ("sein", u"seiest",   de.PRESENT_SUBJUNCTIVE_2ND_PERSON_SINGULAR),
       ("sein", u"sei",      de.PRESENT_SUBJUNCTIVE_3RD_PERSON_SINGULAR),
       ("sein", u"seien",    de.PRESENT_SUBJUNCTIVE_1ST_PERSON_PLURAL),
       ("sein", u"seiet",    de.PRESENT_SUBJUNCTIVE_2ND_PERSON_PLURAL),
       ("sein", u"seien",    de.PRESENT_SUBJUNCTIVE_3RD_PERSON_PLURAL),
       ("sein", u"wäre",     de.PAST_SUBJUNCTIVE_1ST_PERSON_SINGULAR),
       ("sein", u"wärest",   de.PAST_SUBJUNCTIVE_2ND_PERSON_SINGULAR),
       ("sein", u"wäre",     de.PAST_SUBJUNCTIVE_3RD_PERSON_SINGULAR),
       ("sein", u"wären",    de.PAST_SUBJUNCTIVE_1ST_PERSON_PLURAL),
       ("sein", u"wäret",    de.PAST_SUBJUNCTIVE_2ND_PERSON_PLURAL),
       ("sein", u"wären",    de.PAST_SUBJUNCTIVE_3RD_PERSON_PLURAL)):
         self.assertEqual(de.conjugate(v1, tense), v2)
     print "pattern.de.conjugate()"
def check_if_subjunctive(word):
    """Returns true if a word is subjuntive, False otherwise

    word -- a Blob Word object that is a verb
    TODO: must the word be a verb? Saya?
    TODO: write tests for this

    """
    _lemmatizer = PatternParserLemmatizer()
    lemma =  _lemmatizer.lemmatize(word)[0][0]
    if conjugate(lemma, PRESENT, 1, SG, mood=SUBJUNCTIVE) == word:
        return True
    elif conjugate(lemma, PRESENT, 2, SG, mood=SUBJUNCTIVE) == word:
        return True
    elif conjugate(lemma, PRESENT, 3, SG, mood=SUBJUNCTIVE) == word:
        return True
    elif conjugate(lemma, PRESENT, 1, PL, mood=SUBJUNCTIVE) == word:
        return True
    elif conjugate(lemma, PRESENT, 2, PL, mood=SUBJUNCTIVE) == word:
        return True
    elif conjugate(lemma, PRESENT, 3, PL, mood=SUBJUNCTIVE) == word:
        return True
    elif conjugate(lemma, PAST, 1, SG, mood=SUBJUNCTIVE) == word:
        return True
    elif conjugate(lemma, PAST, 2, SG, mood=SUBJUNCTIVE) == word:
        return True
    elif conjugate(lemma, PAST, 3, SG, mood=SUBJUNCTIVE) == word:
        return True
    elif conjugate(lemma, PAST, 1, PL, mood=SUBJUNCTIVE) == word:
        return True
    elif conjugate(lemma, PAST, 2, PL, mood=SUBJUNCTIVE) == word:
        return True
    elif conjugate(lemma, PAST, 3, PL, mood=SUBJUNCTIVE) == word:
        return True
    else:
        return False
Ejemplo n.º 7
0
f = open('besser_gehts_nicht.txt')
f = open('lives_short.txt')
#python 3 syntax f = open('besser_gehts_nicht.txt', encoding='utf-8')

raw = f.read()
print raw

from pattern.de import gender, MALE, FEMALE, NEUTRAL
from pattern.de import article, DEFINITE, FEMALE, OBJECT
print gender('Katze')
print article('Katze', DEFINITE, gender=FEMALE, role=OBJECT)

from pattern.de import conjugate
from pattern.de import INFINITIVE, PRESENT, SG, SUBJUNCTIVE
print conjugate('sehe', INFINITIVE)
print conjugate('sehen', PRESENT, 1, SG, mood=SUBJUNCTIVE)

from pattern.de import parse, parsetree, split
"""
import pattern.de
pattern.de.verbs - 1962 verbs. 
pattern.de.tenses 
pattern.de.tenses('erblicken')
pattern.de.conjugate.__doc__


"""
"""
lst=parse(raw)
(Pdb) split(lst)[0]