Beispiel #1
0
def compile():
    """Loads variable substitutions from files and applies them to the
    patterns. Makes sure all patterns compile to regular expressions.
    """
    for var, filename in VAR_FILES.items():
        tokens = codecs.open(os.path.join("serapis/corpus", filename), 'r', 'utf-8').read().splitlines()
        VARS[var] = r"\b({})\b ?".format("|".join(tokens))

    # Prepare patterns
    for key, pattern in patterns.items():
        patterns[key] = multiple_replace(pattern, VARS, re_style=True)

    return patterns
Beispiel #2
0
def compile():
    """Loads variable substitutions from files and applies them to the
    patterns. Makes sure all patterns compile to regular expressions.
    """
    for var, filename in VAR_FILES.items():
        tokens = codecs.open(os.path.join("serapis/corpus", filename), 'r',
                             'utf-8').read().splitlines()
        VARS[var] = r"\b({})\b ?".format("|".join(tokens))

    # Prepare patterns
    for key, pattern in patterns.items():
        patterns[key] = multiple_replace(pattern, VARS, re_style=True)

    return patterns
Beispiel #3
0
def clean_sentence(sentence, term, replacement='_TERM_'):
    """Replaces all variants of term with a replacement.

        >>> s_clean, variants = clean_sentence("I've had a Déjà Vu!", "deja-vu")
        >> s_clean
        "I've had a _TERM_"
        >> variants
        ["Déjà Vu"]

    Args:
        sentence: str
        term: str
        replacement: str
    Returns:
        tuple -- Contains the cleaned sentence and all variants found.
    """
    variants = collect_variants(sentence, term)
    s_clean = multiple_replace(sentence, {v: replacement for v in variants}) if variants else sentence
    return s_clean, variants
def clean_sentence(sentence, term, replacement='_TERM_'):
    """Replaces all variants of term with a replacement.

        >>> s_clean, variants = clean_sentence("I've had a Déjà Vu!", "deja-vu")
        >> s_clean
        "I've had a _TERM_"
        >> variants
        ["Déjà Vu"]

    Args:
        sentence: str
        term: str
        replacement: str
    Returns:
        tuple -- Contains the cleaned sentence and all variants found.
    """
    variants = collect_variants(sentence, term)
    s_clean = multiple_replace(sentence,
                               {v: replacement
                                for v in variants}) if variants else sentence
    return s_clean, variants