def set_repetitions(doc): for sent in all_sents(doc): words = sent["Words"] if not words: continue _check_grammeme_repetition(words, 1, "posred", PARTS_OF_SPEECH) _check_grammeme_repetition(words, 1, "genderred", GENDERS) _check_grammeme_repetition(words, 1, "numberred", NUMBERS) _check_grammeme_repetition(words, 1, "casered", CASES) _check_grammeme_repetition(words, 1, "tensered", TENSES) _check_grammeme_repetition(words, 1, "personred", PERSONS) _check_attr_repetition(words, 1, "lexred", "lex")
def set_marks(doc): for sent in all_sents(doc): words = sent["Words"] for i in xrange(len(words)): values = _inspect_marks(words[i]["Punct"]) if values: _insert_flags(words[i], "a", values) if i > 0: _insert_flags(words[i - 1], "b", values) values = _inspect_marks(sent["Punct"]) if values and words: _insert_flags(words[-1], "b", values)
def set_marks(doc): for sent in all_sents(doc): words = sent["Words"] for i in xrange(len(words)): values = _inspect_marks(words[i]["Punct"]) if values: _insert_flags(words[i], "a", values) if i > 0: _insert_flags(words[i-1], "b", values) values = _inspect_marks(sent["Punct"]) if values and words: _insert_flags(words[-1], "b", values)
def set_first_last(doc): for sent in all_sents(doc): if not sent["Words"]: continue _insert_flag(sent["Words"][0], "first") _insert_flag(sent["Words"][-1], "last")
def normalize_punct(doc): for word in all_words(doc): word["Punct"] = _normalize_punct_str(word["Punct"]) for sent in all_sents(doc): sent["Punct"] = _normalize_punct_str(sent["Punct"])