Esempio n. 1
0
def process_address_1(instance: Instance) -> Instance:
    un_normalized = instance.un_normalized
    normalized = instance.normalized
    normalized = re.sub(r"[^a-z ]", "", normalized)
    return Instance(token_type=instance.token_type,
                    un_normalized=un_normalized,
                    normalized=normalized)
Esempio n. 2
0
def process_cardinal_1(instance: Instance) -> Instance:
    un_normalized = instance.un_normalized
    normalized = instance.normalized
    un_normalized = re.sub(r"[^0-9]", "", un_normalized)
    normalized = re.sub(r"[^a-z ]", "", normalized)
    return Instance(token_type=instance.token_type,
                    un_normalized=un_normalized,
                    normalized=normalized)
Esempio n. 3
0
def process_time_1(instance: Instance) -> Instance:
    un_normalized = instance.un_normalized
    un_normalized = re.sub(r": ", ":", un_normalized)
    un_normalized = re.sub(r"(\d)\s?a\s?m\s?", r"\1 a.m.", un_normalized)
    un_normalized = re.sub(r"(\d)\s?p\s?m\s?", r"\1 p.m.", un_normalized)
    normalized = instance.normalized
    normalized = re.sub(r"[^a-z ]", "", normalized)
    return Instance(token_type=instance.token_type,
                    un_normalized=un_normalized,
                    normalized=normalized)
Esempio n. 4
0
def process_money_1(instance: Instance) -> Instance:
    un_normalized = instance.un_normalized
    normalized = instance.normalized
    un_normalized = re.sub(r",", "", un_normalized)
    un_normalized = re.sub(r"a\$", r"$", un_normalized)
    un_normalized = re.sub(r"us\$", r"$", un_normalized)
    un_normalized = re.sub(r"(\d)m\s*$", r"\1 million", un_normalized)
    un_normalized = re.sub(r"(\d)bn?\s*$", r"\1 billion", un_normalized)
    normalized = re.sub(r"[^a-z ]", "", normalized)
    return Instance(token_type=instance.token_type,
                    un_normalized=un_normalized,
                    normalized=normalized)
Esempio n. 5
0
def process_measure_1(instance: Instance) -> Instance:
    un_normalized = instance.un_normalized
    normalized = instance.normalized
    un_normalized = re.sub(r",", "", un_normalized)
    un_normalized = re.sub(r"m2", "m²", un_normalized)
    un_normalized = re.sub(r"(\d)([^\d.\s])", r"\1 \2", un_normalized)
    normalized = re.sub(r"[^a-z\s]", "", normalized)
    normalized = re.sub(r"per ([a-z\s]*)s$", r"per \1", normalized)
    normalized = re.sub(r"[^a-z ]", "", normalized)
    return Instance(token_type=instance.token_type,
                    un_normalized=un_normalized,
                    normalized=normalized)
Esempio n. 6
0
def process_verbatim_1(instance: Instance) -> Instance:
    un_normalized = instance.un_normalized
    normalized = instance.normalized
    return Instance(token_type=instance.token_type,
                    un_normalized=un_normalized,
                    normalized=normalized)