Example #1
0
    def __init__(self, cardinal: GraphFst, deterministic: bool = True):
        super().__init__(name="decimal",
                         kind="classify",
                         deterministic=deterministic)
        graph_digit = digit | zero

        if not deterministic:
            graph = pynini.union(graph_digit, cardinal.hundreds, cardinal.tens)
            graph += pynini.closure(insert_space + graph)

        else:
            # General pattern seems to be 1-3 digits: map as cardinal, default to digits otherwise \
            graph = pynini.union(
                graph_digit,
                cardinal.tens,
                cardinal.hundreds,
                graph_digit + pynini.closure(insert_space + graph_digit, 3),
                zero + pynini.closure(insert_space + zero) +
                pynini.closure(insert_space +
                               graph_digit),  # For cases such as "1,010"
            )

        # Need to strip apocope everywhere BUT end of string
        reverse_apocope = pynini.string_map([("un", "uno"), ("ún", "uno")])
        apply_reverse_apocope = pynini.cdrewrite(reverse_apocope, "",
                                                 NEMO_SPACE, NEMO_SIGMA)
        graph @= apply_reverse_apocope

        # Technically decimals should be space delineated groups of three, e.g. (1,333 333). This removes any possible spaces
        strip_formatting = pynini.cdrewrite(delete_space, "", "", NEMO_SIGMA)
        graph = strip_formatting @ graph

        self.graph = graph.optimize()

        graph_separator = pynutil.delete(decimal_separator)
        optional_graph_negative = pynini.closure(
            pynutil.insert("negative: ") + pynini.cross("-", "\"true\" "), 0,
            1)

        self.graph_fractional = pynutil.insert(
            "fractional_part: \"") + self.graph + pynutil.insert("\"")

        # Integer graph maintains apocope except for ones place
        graph_integer = (strip_cardinal_apocope(
            cardinal.graph) if deterministic else pynini.union(
                cardinal.graph, strip_cardinal_apocope(cardinal.graph))
                         )  # Gives us forms w/ and w/o apocope
        self.graph_integer = pynutil.insert(
            "integer_part: \"") + graph_integer + pynutil.insert("\"")
        final_graph_wo_sign = self.graph_integer + graph_separator + insert_space + self.graph_fractional

        self.final_graph_wo_negative = (final_graph_wo_sign | get_quantity(
            final_graph_wo_sign, cardinal.graph).optimize())
        final_graph = optional_graph_negative + self.final_graph_wo_negative

        final_graph += pynutil.insert(" preserve_order: true")
        final_graph = self.add_tokens(final_graph)

        self.fst = final_graph.optimize()
Example #2
0
    def __init__(self, deterministic: bool = True):
        super().__init__(name="cardinal",
                         kind="verbalize",
                         deterministic=deterministic)
        optional_sign = pynini.closure(
            pynini.cross("negative: \"true\" ", "menos "), 0, 1)
        self.optional_sign = optional_sign

        integer = pynini.closure(NEMO_NOT_QUOTE, 1)
        self.integer = pynutil.delete(" \"") + integer + pynutil.delete("\"")

        integer = pynutil.delete("integer:") + self.integer

        graph_masc = optional_sign + integer
        graph_fem = shift_cardinal_gender(graph_masc)

        self.graph_masc = pynini.optimize(graph_masc)
        self.graph_fem = pynini.optimize(graph_fem)

        # Adding adjustment for fem gender (choice of gender will be random)
        graph = graph_masc | graph_fem

        if not deterministic:
            # For alternate renderings when apocope is omitted (i.e. cardinal stands alone)
            graph |= strip_cardinal_apocope(graph_masc)
            # "una" will drop to "un" in unique contexts
            graph |= add_cardinal_apocope_fem(graph_fem)

        delete_tokens = self.delete_tokens(graph)
        self.fst = delete_tokens.optimize()
Example #3
0
    def __init__(self, deterministic: bool = True):
        super().__init__(name="decimal",
                         kind="classify",
                         deterministic=deterministic)

        optional_sign = pynini.closure(
            pynini.cross("negative: \"true\"", "menos ") + delete_space, 0, 1)
        integer = pynutil.delete("integer_part: \"") + pynini.closure(
            NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
        fractional_default = (pynutil.delete("fractional_part: \"") +
                              pynini.closure(NEMO_NOT_QUOTE, 1) +
                              pynutil.delete("\""))

        conjunction = pynutil.insert(
            " punto ") if LOCALIZATION == "am" else pynutil.insert(" coma ")
        if not deterministic:
            conjunction |= pynutil.insert(pynini.union(" con ", " y "))
            fractional_default |= strip_cardinal_apocope(fractional_default)
        fractional = conjunction + fractional_default

        quantity = (delete_space + insert_space +
                    pynutil.delete("quantity: \"") +
                    pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\""))
        optional_quantity = pynini.closure(quantity, 0, 1)

        graph_masc = optional_sign + pynini.union(
            (integer + quantity),
            (integer + delete_space + fractional + optional_quantity))

        # Allowing permutation for fem gender, don't include quantity since "million","billion", etc.. are masculine
        graph_fem = optional_sign + (shift_cardinal_gender(integer) +
                                     delete_space +
                                     shift_number_gender(fractional))
        if not deterministic:  # "una" will drop to "un" in certain cases
            graph_fem |= add_cardinal_apocope_fem(graph_fem)

        self.numbers_only_quantity = (optional_sign + pynini.union(
            (integer + quantity),
            (integer + delete_space + fractional + quantity)).optimize())

        self.graph_masc = (graph_masc + delete_preserve_order).optimize()
        self.graph_fem = (graph_fem + delete_preserve_order).optimize()

        graph = graph_masc | graph_fem

        graph += delete_preserve_order
        delete_tokens = self.delete_tokens(graph)
        self.fst = delete_tokens.optimize()
Example #4
0
    def __init__(self, deterministic: bool = True):
        super().__init__(name="date", kind="verbalize", deterministic=deterministic)

        day_cardinal = pynutil.delete("day: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
        day = strip_cardinal_apocope(day_cardinal)

        primero = pynini.cdrewrite(pynini.cross("uno", "primero"), "[BOS]", "[EOS]", NEMO_SIGMA)
        day = (
            (day @ primero) if deterministic else pynini.union(day, day @ primero)
        )  # Primero for first day is traditional, but will vary depending on region

        month = pynutil.delete("month: \"") + pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")

        year = (
            pynutil.delete("year: \"")
            + articles
            + NEMO_SPACE
            + pynini.closure(NEMO_NOT_QUOTE, 1)
            + pynutil.delete("\"")
        )

        # Insert preposition if wasn't originally with the year. This would mean a space was present
        year = pynutil.add_weight(year, -0.001)
        year |= (
            pynutil.delete("year: \"")
            + pynutil.insert("de ")
            + pynini.closure(NEMO_NOT_QUOTE, 1)
            + pynutil.delete("\"")
        )

        # day month year
        graph_dmy = day + pynini.cross(NEMO_SPACE, " de ") + month + pynini.closure(pynini.accep(" ") + year, 0, 1)

        graph_mdy = month + NEMO_SPACE + day + pynini.closure(NEMO_SPACE + year, 0, 1)
        if deterministic:
            graph_mdy += pynutil.delete(" preserve_order: true")  # Only accepts this if was explicitly passed

        self.graph = graph_dmy | graph_mdy
        final_graph = self.graph + delete_preserve_order

        delete_tokens = self.delete_tokens(final_graph)
        self.fst = delete_tokens.optimize()
Example #5
0
    def __init__(self, deterministic: bool = True):
        super().__init__(name="fraction",
                         kind="verbalize",
                         deterministic=deterministic)

        # Derivational strings append 'avo' as a suffix. Adding space for processing aid
        fraction_stem = pynutil.insert(" avo")
        plural = pynutil.insert("s")
        conjunction = pynutil.insert(" y ")

        integer = (pynutil.delete("integer_part: \"") +
                   strip_cardinal_apocope(pynini.closure(NEMO_NOT_QUOTE)) +
                   pynutil.delete("\""))

        numerator_one = pynutil.delete("numerator: \"") + pynini.accep(
            "un") + pynutil.delete("\" ")
        numerator = (pynutil.delete("numerator: \"") +
                     pynini.difference(pynini.closure(NEMO_NOT_QUOTE), "un") +
                     pynutil.delete("\" "))

        denominator_add_stem = pynutil.delete("denominator: \"") + (
            pynini.closure(NEMO_NOT_QUOTE) + fraction_stem +
            pynutil.delete("\" morphosyntactic_features: \"add_root\""))
        denominator_ordinal = pynutil.delete("denominator: \"") + (
            pynini.closure(NEMO_NOT_QUOTE) +
            pynutil.delete("\" morphosyntactic_features: \"ordinal\""))
        denominator_cardinal = pynutil.delete("denominator: \"") + (
            pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\""))

        denominator_singular = pynini.union(denominator_add_stem,
                                            denominator_ordinal)
        if not deterministic:
            # Occasional exceptions
            denominator_singular |= denominator_add_stem @ pynini.string_map(
                [("once avo", "undécimo"), ("doce avo", "duodécimo")])
        denominator_plural = denominator_singular + plural

        # Merging operations
        merge = pynini.cdrewrite(
            pynini.cross(" y ", "i"), "", "", NEMO_SIGMA
        )  # The denominator must be a single word, with the conjunction "y" replaced by i
        merge @= pynini.cdrewrite(delete_space, "",
                                  pynini.difference(NEMO_CHAR, "parte"),
                                  NEMO_SIGMA)

        # The merger can produce duplicate vowels. This is not allowed in orthography
        delete_duplicates = pynini.string_map([("aa", "a"),
                                               ("oo", "o")])  # Removes vowels
        delete_duplicates = pynini.cdrewrite(delete_duplicates, "", "",
                                             NEMO_SIGMA)

        remove_accents = pynini.cdrewrite(
            accents,
            pynini.union(NEMO_SPACE, pynini.accep("[BOS]")) +
            pynini.closure(NEMO_NOT_SPACE),
            pynini.closure(NEMO_NOT_SPACE) +
            pynini.union("avo", "ava", "ésimo", "ésima"),
            NEMO_SIGMA,
        )
        merge_into_single_word = merge @ remove_accents @ delete_duplicates

        fraction_default = numerator + delete_space + insert_space + (
            denominator_plural @ merge_into_single_word)

        fraction_with_one = (numerator_one + delete_space + insert_space +
                             (denominator_singular @ merge_into_single_word))

        fraction_with_cardinal = strip_cardinal_apocope(numerator
                                                        | numerator_one)
        fraction_with_cardinal += (
            delete_space + pynutil.insert(" sobre ") +
            strip_cardinal_apocope(denominator_cardinal))

        if not deterministic:
            # There is an alternative rendering where ordinals act as adjectives for 'parte'. This requires use of the feminine
            # Other rules will manage use of "un" at end, so just worry about endings
            exceptions = pynini.string_map([("tercia", "tercera")])
            apply_exceptions = pynini.cdrewrite(exceptions, "", "", NEMO_SIGMA)
            vowel_change = pynini.cdrewrite(pynini.cross("o", "a"), "",
                                            pynini.accep("[EOS]"), NEMO_SIGMA)

            denominator_singular_fem = shift_cardinal_gender(
                denominator_singular) @ vowel_change @ apply_exceptions
            denominator_plural_fem = denominator_singular_fem + plural

            numerator_one_fem = shift_cardinal_gender(numerator_one)
            numerator_fem = shift_cardinal_gender(numerator)

            fraction_with_cardinal |= (
                (numerator_one_fem | numerator_fem) + delete_space +
                pynutil.insert(" sobre ") +
                shift_cardinal_gender(denominator_cardinal))

            # Still need to manage stems
            merge_stem = pynini.cdrewrite(
                delete_space, "", pynini.union("avo", "ava", "avos", "avas"),
                NEMO_SIGMA)  # For managing alternative spacing
            merge_stem @= remove_accents @ delete_duplicates

            fraction_with_one_fem = numerator_one_fem + delete_space + insert_space
            fraction_with_one_fem += pynini.union(
                denominator_singular_fem @ merge_stem, denominator_singular_fem
                @ merge_into_single_word)  # Both forms exists
            fraction_with_one_fem += pynutil.insert(" parte")
            fraction_with_one_fem @= pynini.cdrewrite(
                pynini.cross("una media", "media"), "", "",
                NEMO_SIGMA)  # "media" not "una media"

            fraction_default_fem = numerator_fem + delete_space + insert_space
            fraction_default_fem += pynini.union(
                denominator_plural_fem @ merge_stem,
                denominator_plural_fem @ merge_into_single_word)
            fraction_default_fem += pynutil.insert(" partes")

            fraction_default |= (numerator + delete_space + insert_space +
                                 denominator_plural @ merge_stem
                                 )  # Case of no merger
            fraction_default |= fraction_default_fem

            fraction_with_one |= numerator_one + delete_space + insert_space + denominator_singular @ merge_stem
            fraction_with_one |= fraction_with_one_fem

        fraction_with_one @= pynini.cdrewrite(pynini.cross(
            "un medio", "medio"), "", "", NEMO_SIGMA)  # "medio" not "un medio"

        fraction = fraction_with_one | fraction_default | fraction_with_cardinal
        graph_masc = pynini.closure(integer + delete_space + conjunction, 0,
                                    1) + fraction

        # Manage cases of fem gender (only shows on integer except for "medio")
        integer_fem = shift_cardinal_gender(integer)
        fraction_default |= (
            shift_cardinal_gender(numerator) + delete_space + insert_space +
            (denominator_plural @ pynini.cross("medios", "medias")))
        fraction_with_one |= (
            pynutil.delete(numerator_one) + delete_space +
            (denominator_singular @ pynini.cross("medio", "media")))

        fraction_fem = fraction_with_one | fraction_default | fraction_with_cardinal
        graph_fem = pynini.closure(integer_fem + delete_space + conjunction, 0,
                                   1) + fraction_fem

        self.graph_masc = pynini.optimize(graph_masc)
        self.graph_fem = pynini.optimize(graph_fem)

        self.graph = graph_masc | graph_fem

        delete_tokens = self.delete_tokens(self.graph)
        self.fst = delete_tokens.optimize()
Example #6
0
    def __init__(self, cardinal: GraphFst, decimal: GraphFst, fraction: GraphFst, deterministic: bool = True):
        super().__init__(name="measure", kind="classify", deterministic=deterministic)
        cardinal_graph = cardinal.graph

        unit_singular = unit
        unit_plural = unit_singular @ (unit_plural_fem | unit_plural_masc)

        graph_unit_singular = convert_space(unit_singular)
        graph_unit_plural = convert_space(unit_plural)

        optional_graph_negative = pynini.closure("-", 0, 1)

        graph_unit_denominator = (
            pynini.cross("/", "por") + pynutil.insert(NEMO_NON_BREAKING_SPACE) + graph_unit_singular
        )

        optional_unit_denominator = pynini.closure(
            pynutil.insert(NEMO_NON_BREAKING_SPACE) + graph_unit_denominator, 0, 1,
        )

        unit_plural = (
            pynutil.insert("units: \"")
            + ((graph_unit_plural + optional_unit_denominator) | graph_unit_denominator)
            + pynutil.insert("\"")
        )

        unit_singular_graph = (
            pynutil.insert("units: \"")
            + ((graph_unit_singular + optional_unit_denominator) | graph_unit_denominator)
            + pynutil.insert("\"")
        )

        subgraph_decimal = decimal.fst + insert_space + pynini.closure(NEMO_SPACE, 0, 1) + unit_plural

        subgraph_cardinal = (
            (optional_graph_negative + (NEMO_SIGMA - "1")) @ cardinal.fst
            + insert_space
            + pynini.closure(delete_space, 0, 1)
            + unit_plural
        )

        subgraph_cardinal |= (
            (optional_graph_negative + pynini.accep("1")) @ cardinal.fst
            + insert_space
            + pynini.closure(delete_space, 0, 1)
            + unit_singular_graph
        )

        subgraph_fraction = fraction.fst + insert_space + pynini.closure(delete_space, 0, 1) + unit_singular_graph

        decimal_times = (
            pynutil.insert("decimal { ")
            + decimal.final_graph_wo_negative
            + pynutil.insert(" } units: \"")
            + pynini.union('x', 'X')
            + pynutil.insert("\"")
        )

        cardinal_times = (
            pynutil.insert("cardinal { integer: \"")
            + strip_cardinal_apocope(cardinal_graph)
            + pynutil.insert("\" } units: \"")
            + pynini.union('x', 'X')
            + pynutil.insert("\"")
        )

        cardinal_dash_alpha = (
            pynutil.insert("cardinal { integer: \"")
            + strip_cardinal_apocope(cardinal_graph)
            + pynutil.delete('-')
            + pynutil.insert("\" } units: \"")
            + pynini.closure(NEMO_ALPHA, 1)
            + pynutil.insert("\"")
        )

        decimal_dash_alpha = (
            pynutil.insert("decimal { ")
            + decimal.final_graph_wo_negative
            + pynutil.delete('-')
            + pynutil.insert(" } units: \"")
            + pynini.closure(NEMO_ALPHA, 1)
            + pynutil.insert("\"")
        )

        alpha_dash_cardinal = (
            pynutil.insert("units: \"")
            + pynini.closure(NEMO_ALPHA, 1)
            + pynutil.delete('-')
            + pynutil.insert("\"")
            + pynutil.insert(" cardinal { integer: \"")
            + cardinal_graph
            + pynutil.insert("\" } preserve_order: true")
        )

        alpha_dash_decimal = (
            pynutil.insert("units: \"")
            + pynini.closure(NEMO_ALPHA, 1)
            + pynutil.delete('-')
            + pynutil.insert("\"")
            + pynutil.insert(" decimal { ")
            + decimal.final_graph_wo_negative
            + pynutil.insert(" } preserve_order: true")
        )

        final_graph = (
            subgraph_decimal
            | subgraph_cardinal
            | cardinal_dash_alpha
            | alpha_dash_cardinal
            | decimal_dash_alpha
            | subgraph_fraction
            | decimal_times
            | cardinal_times
            | alpha_dash_decimal
        )
        final_graph += pynutil.insert(" preserve_order: true")
        final_graph = self.add_tokens(final_graph)

        self.fst = final_graph.optimize()
Example #7
0
    def __init__(self, decimal: GraphFst, deterministic: bool = True):
        super().__init__(name="money",
                         kind="verbalize",
                         deterministic=deterministic)

        maj_singular_masc = (
            pynutil.delete("currency_maj: \"") +
            (pynini.closure(NEMO_NOT_QUOTE, 1) @ masc_singular) +
            pynutil.delete("\""))
        maj_singular_fem = (
            pynutil.delete("currency_maj: \"") +
            (pynini.closure(NEMO_NOT_QUOTE, 1) @ fem_singular) +
            pynutil.delete("\""))

        maj_plural_masc = (pynutil.delete("currency_maj: \"") +
                           (pynini.closure(NEMO_NOT_QUOTE, 1) @ masc_plural) +
                           pynutil.delete("\""))
        maj_plural_fem = (pynutil.delete("currency_maj: \"") +
                          (pynini.closure(NEMO_NOT_QUOTE, 1) @ fem_plural) +
                          pynutil.delete("\""))

        maj_masc = maj_plural_masc | maj_singular_masc  # Tagger kept quantity resolution stable
        maj_fem = maj_plural_fem | maj_singular_fem

        min_singular_masc = (
            pynutil.delete("currency_min: \"") +
            (pynini.closure(NEMO_NOT_QUOTE, 1) @ masc_singular) +
            pynutil.delete("\""))
        min_singular_fem = (
            pynutil.delete("currency_min: \"") +
            (pynini.closure(NEMO_NOT_QUOTE, 1) @ fem_singular) +
            pynutil.delete("\""))

        min_plural_masc = (pynutil.delete("currency_min: \"") +
                           (pynini.closure(NEMO_NOT_QUOTE, 1) @ masc_plural) +
                           pynutil.delete("\""))
        min_plural_fem = (pynutil.delete("currency_min: \"") +
                          (pynini.closure(NEMO_NOT_QUOTE, 1) @ fem_plural) +
                          pynutil.delete("\""))

        min_masc = min_plural_masc | min_singular_masc
        min_fem = min_plural_fem | min_singular_fem

        fractional_part = (pynutil.delete("fractional_part: \"") +
                           pynini.closure(NEMO_NOT_QUOTE, 1) +
                           pynutil.delete("\""))

        integer_part = pynutil.delete("integer_part: \"") + pynini.closure(
            NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
        optional_add_and = pynini.closure(
            pynutil.insert(pynini.union("con ", "y ")), 0, 1)

        #  *** currency_maj
        graph_integer_masc = integer_part + NEMO_SPACE + maj_masc
        graph_integer_fem = shift_cardinal_gender(
            integer_part) + NEMO_SPACE + maj_fem

        graph_integer = graph_integer_fem | graph_integer_masc

        #  *** currency_maj + (***) | ((con) *** current_min)
        graph_integer_with_minor_masc = (
            graph_integer_masc + NEMO_SPACE + pynini.union(
                optional_add_and + strip_cardinal_apocope(fractional_part),
                (optional_add_and + fractional_part + NEMO_SPACE + min_masc),
                (optional_add_and + shift_cardinal_gender(fractional_part) +
                 NEMO_SPACE + min_fem),
            )  # Could be minor currency that is different gender
            + delete_preserve_order)

        graph_integer_with_minor_fem = (
            graph_integer_fem + NEMO_SPACE + pynini.union(
                optional_add_and + shift_cardinal_gender(fractional_part),
                (optional_add_and + fractional_part + NEMO_SPACE + min_masc),
                (optional_add_and + shift_cardinal_gender(fractional_part) +
                 NEMO_SPACE + min_fem),
            )  # Could be minor currency that is different gender
            + delete_preserve_order)

        graph_integer_with_minor = graph_integer_with_minor_fem | graph_integer_with_minor_masc

        ## *** coma *** currency_maj
        graph_decimal_masc = decimal.graph_masc + NEMO_SPACE + maj_masc

        graph_decimal_fem = decimal.graph_fem
        graph_decimal_fem |= decimal.numbers_only_quantity  # can still have "x billions" with fem currency
        graph_decimal_fem += NEMO_SPACE + maj_fem

        graph_decimal = graph_decimal_fem | graph_decimal_masc
        graph_decimal = (pynini.cdrewrite(
            pynutil.insert(" de"), "quantity: \"" + pynini.closure(
                NEMO_NOT_QUOTE, 1), "\"", NEMO_SIGMA) @ graph_decimal
                         )  # formally it's millones/billones de ***

        # *** current_min
        graph_minor_masc = fractional_part + NEMO_SPACE + min_masc + delete_preserve_order
        graph_minor_fem = shift_cardinal_gender(
            fractional_part) + NEMO_SPACE + min_fem + delete_preserve_order

        graph_minor = graph_minor_fem | graph_minor_masc

        graph = graph_integer | graph_integer_with_minor | graph_decimal | graph_minor

        delete_tokens = self.delete_tokens(graph)
        self.fst = delete_tokens.optimize()