Ejemplo n.º 1
0
def set_repetitions(doc):
    for sent in all_sents(doc):
        words = sent["Words"]
        if not words:
            continue
        _check_grammeme_repetition(words, 1, "posred", PARTS_OF_SPEECH)
        _check_grammeme_repetition(words, 1, "genderred", GENDERS)
        _check_grammeme_repetition(words, 1, "numberred", NUMBERS)
        _check_grammeme_repetition(words, 1, "casered", CASES)
        _check_grammeme_repetition(words, 1, "tensered", TENSES)
        _check_grammeme_repetition(words, 1, "personred", PERSONS)
        _check_attr_repetition(words, 1, "lexred", "lex")
Ejemplo n.º 2
0
def set_marks(doc):
    for sent in all_sents(doc):
        words = sent["Words"]
        for i in xrange(len(words)):
            values = _inspect_marks(words[i]["Punct"])
            if values:
                _insert_flags(words[i], "a", values)
                if i > 0:
                    _insert_flags(words[i - 1], "b", values)
        values = _inspect_marks(sent["Punct"])
        if values and words:
            _insert_flags(words[-1], "b", values)
Ejemplo n.º 3
0
def set_marks(doc):
    for sent in all_sents(doc):
        words = sent["Words"]
        for i in xrange(len(words)):
            values = _inspect_marks(words[i]["Punct"])
            if values:
                _insert_flags(words[i], "a", values)
                if i > 0:
                    _insert_flags(words[i-1], "b", values)
        values = _inspect_marks(sent["Punct"])
        if values and words:
            _insert_flags(words[-1], "b", values)
Ejemplo n.º 4
0
def set_first_last(doc):
    for sent in all_sents(doc):
        if not sent["Words"]:
            continue
        _insert_flag(sent["Words"][0], "first")
        _insert_flag(sent["Words"][-1], "last")
Ejemplo n.º 5
0
def normalize_punct(doc):
    for word in all_words(doc):
        word["Punct"] = _normalize_punct_str(word["Punct"])
    for sent in all_sents(doc):
        sent["Punct"] = _normalize_punct_str(sent["Punct"])
Ejemplo n.º 6
0
def normalize_punct(doc):
    for word in all_words(doc):
        word["Punct"] = _normalize_punct_str(word["Punct"])
    for sent in all_sents(doc):
        sent["Punct"] = _normalize_punct_str(sent["Punct"])