def compile(): """Loads variable substitutions from files and applies them to the patterns. Makes sure all patterns compile to regular expressions. """ for var, filename in VAR_FILES.items(): tokens = codecs.open(os.path.join("serapis/corpus", filename), 'r', 'utf-8').read().splitlines() VARS[var] = r"\b({})\b ?".format("|".join(tokens)) # Prepare patterns for key, pattern in patterns.items(): patterns[key] = multiple_replace(pattern, VARS, re_style=True) return patterns
def compile(): """Loads variable substitutions from files and applies them to the patterns. Makes sure all patterns compile to regular expressions. """ for var, filename in VAR_FILES.items(): tokens = codecs.open(os.path.join("serapis/corpus", filename), 'r', 'utf-8').read().splitlines() VARS[var] = r"\b({})\b ?".format("|".join(tokens)) # Prepare patterns for key, pattern in patterns.items(): patterns[key] = multiple_replace(pattern, VARS, re_style=True) return patterns
def clean_sentence(sentence, term, replacement='_TERM_'): """Replaces all variants of term with a replacement. >>> s_clean, variants = clean_sentence("I've had a Déjà Vu!", "deja-vu") >> s_clean "I've had a _TERM_" >> variants ["Déjà Vu"] Args: sentence: str term: str replacement: str Returns: tuple -- Contains the cleaned sentence and all variants found. """ variants = collect_variants(sentence, term) s_clean = multiple_replace(sentence, {v: replacement for v in variants}) if variants else sentence return s_clean, variants
def clean_sentence(sentence, term, replacement='_TERM_'): """Replaces all variants of term with a replacement. >>> s_clean, variants = clean_sentence("I've had a Déjà Vu!", "deja-vu") >> s_clean "I've had a _TERM_" >> variants ["Déjà Vu"] Args: sentence: str term: str replacement: str Returns: tuple -- Contains the cleaned sentence and all variants found. """ variants = collect_variants(sentence, term) s_clean = multiple_replace(sentence, {v: replacement for v in variants}) if variants else sentence return s_clean, variants