Ejemplo n.º 1
0
    def __init__(self,
                 whitelist: 'pynini.FstLike',
                 deterministic: bool = True):
        super().__init__(name="abbreviation",
                         kind="classify",
                         deterministic=deterministic)

        main_graph = NEMO_UPPER + pynini.closure(insert_space + NEMO_UPPER, 1)
        misc_graph = pynutil.add_weight(
            TO_LOWER +
            pynini.closure(insert_space + pynini.union(TO_LOWER | NEMO_LOWER)),
            110)
        misc_graph |= pynutil.add_weight(
            pynini.closure(NEMO_UPPER, 2) +
            pynini.closure(insert_space + NEMO_LOWER, 1), 110)
        misc_graph |= (
            NEMO_UPPER + pynutil.delete(".") +
            pynini.closure(insert_space + NEMO_UPPER + pynutil.delete(".")))
        misc_graph |= pynutil.add_weight(
            TO_LOWER + pynutil.delete(".") +
            pynini.closure(insert_space + TO_LOWER + pynutil.delete(".")), 110)

        # set weight of the misc graph to the value higher then word
        graph = pynutil.add_weight(main_graph.optimize(),
                                   10) | pynutil.add_weight(
                                       misc_graph.optimize(), 101)

        # exclude words that are included in the whitelist
        graph = pynini.compose(
            pynini.difference(pynini.project(graph, "input"),
                              pynini.project(whitelist.graph, "input")), graph)
        graph = pynutil.insert(
            "value: \"") + graph.optimize() + pynutil.insert("\"")
        graph = self.add_tokens(graph)
        self.fst = graph.optimize()
Ejemplo n.º 2
0
    def __init__(self, deterministic: bool = True, cache_dir: str = None, overwrite_cache: bool = False):
        super().__init__(name="verbalize_final", kind="verbalize", deterministic=deterministic)

        far_file = None
        if cache_dir is not None and cache_dir != "None":
            os.makedirs(cache_dir, exist_ok=True)
            far_file = os.path.join(cache_dir, f"en_tn_{deterministic}_deterministic_verbalizer.far")
        if not overwrite_cache and far_file and os.path.exists(far_file):
            self.fst = pynini.Far(far_file, mode="r")["verbalize"]
            logging.info(f'VerbalizeFinalFst graph was restored from {far_file}.')
        else:
            verbalize = VerbalizeFst(deterministic=deterministic).fst
            word = WordFst(deterministic=deterministic).fst
            types = verbalize | word

            if deterministic:
                graph = (
                    pynutil.delete("tokens")
                    + delete_space
                    + pynutil.delete("{")
                    + delete_space
                    + types
                    + delete_space
                    + pynutil.delete("}")
                )
            else:
                graph = delete_space + types + delete_space

            graph = delete_space + pynini.closure(graph + delete_extra_space) + graph + delete_space

            self.fst = graph.optimize()
            if far_file:
                generator_main(far_file, {"verbalize": self.fst})
                logging.info(f"VerbalizeFinalFst grammars are saved to {far_file}.")
Ejemplo n.º 3
0
    def __init__(self, ordinal: GraphFst):
        super().__init__(name="date", kind="classify")

        ordinal_graph = ordinal.graph
        year_graph = _get_year_graph()
        YEAR_WEIGHT = 0.001
        year_graph = pynutil.add_weight(year_graph, YEAR_WEIGHT)
        month_graph = _get_month_graph()

        month_graph = pynutil.insert(
            "month: \"") + month_graph + pynutil.insert("\"")

        day_graph = pynutil.insert("day: \"") + pynutil.add_weight(
            ordinal_graph, -0.7) + pynutil.insert("\"")
        optional_day_graph = pynini.closure(delete_extra_space + day_graph, 0,
                                            1)
        optional_graph_year = pynini.closure(
            delete_extra_space + pynutil.insert("year: \"") +
            pynutil.add_weight(year_graph, -YEAR_WEIGHT) +
            pynutil.insert("\""),
            0,
            1,
        )
        graph_mdy = month_graph + optional_day_graph + optional_graph_year
        graph_dmy = (pynutil.delete("the") + delete_space + day_graph +
                     delete_space + pynutil.delete("of") + delete_extra_space +
                     month_graph + optional_graph_year)
        graph_year = pynutil.insert("year: \"") + (
            year_graph | _get_range_graph()) + pynutil.insert("\"")

        final_graph = graph_mdy | graph_dmy | graph_year
        final_graph += pynutil.insert(" preserve_order: true")
        final_graph = self.add_tokens(final_graph)
        self.fst = final_graph.optimize()
Ejemplo n.º 4
0
    def __init__(self):
        super().__init__(name="ordinal", kind="verbalize")

        graph_digit = pynini.string_file(
            get_abs_path("data/ordinals/digit.tsv")).invert()
        graph_teens = pynini.string_file(
            get_abs_path("data/ordinals/teen.tsv")).invert()

        graph = (pynutil.delete("integer:") + delete_space +
                 pynutil.delete("\"") + pynini.closure(NEMO_NOT_QUOTE, 1) +
                 pynutil.delete("\""))
        convert_rest = pynutil.insert("th", weight=0.01)

        suffix = pynini.cdrewrite(
            graph_digit | graph_teens
            | pynutil.add_weight(pynini.cross("ty", "tieth"), weight=0.001)
            | convert_rest,
            "",
            "[EOS]",
            NEMO_SIGMA,
        ).optimize()
        graph = graph @ suffix
        self.suffix = suffix
        delete_tokens = self.delete_tokens(graph)
        self.fst = delete_tokens.optimize()
Ejemplo n.º 5
0
Archivo: word.py Proyecto: blisc/NeMo
    def __init__(self, deterministic: bool = True):
        super().__init__(name="word", kind="verbalize", deterministic=deterministic)
        chars = pynini.closure(NEMO_CHAR - " ", 1)
        char = pynutil.delete("name:") + delete_space + pynutil.delete("\"") + chars + pynutil.delete("\"")
        graph = char @ pynini.cdrewrite(pynini.cross(u"\u00A0", " "), "", "", NEMO_SIGMA)

        self.fst = graph.optimize()
Ejemplo n.º 6
0
    def __init__(self):
        super().__init__(name="ordinal", kind="verbalize")
        graph = (
            pynutil.delete("integer:")
            + delete_space
            + pynutil.delete("\"")
            + pynini.closure(NEMO_NOT_QUOTE, 1)
            + pynutil.delete("\"")
        )
        convert_eleven = pynini.cross("11", "11th")
        convert_twelve = pynini.cross("12", "12th")
        convert_thirteen = pynini.cross("13", "13th")
        convert_one = pynini.cross("1", "1st")
        convert_two = pynini.cross("2", "2nd")
        convert_three = pynini.cross("3", "3rd")
        convert_rest = pynutil.insert("th", weight=0.01)

        suffix = pynini.cdrewrite(
            convert_eleven
            | convert_twelve
            | convert_thirteen
            | convert_one
            | convert_two
            | convert_three
            | convert_rest,
            "",
            "[EOS]",
            NEMO_SIGMA,
        )
        graph = graph @ suffix
        delete_tokens = self.delete_tokens(graph)
        self.fst = delete_tokens.optimize()
Ejemplo n.º 7
0
    def __init__(self):
        super().__init__(name="time", kind="classify")
        # hours, minutes, seconds, suffix, zone, style, speak_period

        suffix_graph = pynini.string_file(get_abs_path("data/time_suffix.tsv"))
        time_zone_graph = pynini.invert(
            pynini.string_file(get_abs_path("data/time_zone.tsv")))

        # only used for < 1000 thousand -> 0 weight
        cardinal = pynutil.add_weight(CardinalFst().graph_no_exception,
                                      weight=-0.7)

        labels_hour = [num_to_word(x) for x in range(0, 24)]
        labels_minute_single = [num_to_word(x) for x in range(1, 10)]
        labels_minute_double = [num_to_word(x) for x in range(10, 60)]

        graph_hour = pynini.union(*labels_hour) @ cardinal

        graph_minute_single = pynini.union(*labels_minute_single) @ cardinal
        graph_minute_double = pynini.union(*labels_minute_double) @ cardinal
        graph_minute_verbose = pynini.cross("half", "30") | pynini.cross(
            "quarter", "15")
        oclock = pynini.cross(
            pynini.union("o' clock", "o clock", "o'clock", "oclock"), "")

        final_graph_hour = pynutil.insert(
            "hours: \"") + graph_hour + pynutil.insert("\"")
        final_graph_minute = (
            pynutil.insert("minutes: \"") +
            (pynutil.insert("00")
             | oclock + pynutil.insert("00")
             | pynutil.delete("o") + delete_space + graph_minute_single
             | graph_minute_double) + pynutil.insert("\""))
        final_suffix = pynutil.insert("suffix: \"") + convert_space(
            suffix_graph) + pynutil.insert("\"")
        final_suffix_optional = pynini.closure(
            delete_space + insert_space + final_suffix, 0, 1)
        final_time_zone_optional = pynini.closure(
            delete_space + insert_space + pynutil.insert("zone: \"") +
            convert_space(time_zone_graph) + pynutil.insert("\""),
            0,
            1,
        )

        # five o' clock
        # two o eight, two thiry five (am/pm)
        # two pm/am
        graph_hm = final_graph_hour + delete_extra_space + final_graph_minute
        # 10 past four, quarter past four, half past four
        graph_mh = (pynutil.insert("minutes: \"") + pynini.union(
            graph_minute_single, graph_minute_double, graph_minute_verbose) +
                    pynutil.insert("\"") + delete_space +
                    pynutil.delete("past") + delete_extra_space +
                    final_graph_hour)
        final_graph = ((graph_hm | graph_mh) + final_suffix_optional +
                       final_time_zone_optional).optimize()

        final_graph = self.add_tokens(final_graph)
        self.fst = final_graph.optimize()
Ejemplo n.º 8
0
    def __init__(self, cardinal: GraphFst, deterministic: bool = True):
        super().__init__(name="time",
                         kind="classify",
                         deterministic=deterministic)
        suffix_graph = pynini.string_file(get_abs_path("data/time_suffix.tsv"))
        time_zone_graph = pynini.string_file(
            get_abs_path("data/time_zone.tsv"))

        # only used for < 1000 thousand -> 0 weight
        cardinal = cardinal.graph

        labels_hour = [str(x) for x in range(0, 24)]
        labels_minute_single = [str(x) for x in range(1, 10)]
        labels_minute_double = [str(x) for x in range(10, 60)]

        delete_leading_zero_to_double_digit = (NEMO_DIGIT + NEMO_DIGIT) | (
            pynini.closure(pynutil.delete("0"), 0, 1) + NEMO_DIGIT)

        graph_hour = delete_leading_zero_to_double_digit @ pynini.union(
            *labels_hour) @ cardinal

        graph_minute_single = pynini.union(*labels_minute_single) @ cardinal
        graph_minute_double = pynini.union(*labels_minute_double) @ cardinal

        final_graph_hour = pynutil.insert(
            "hours: \"") + graph_hour + pynutil.insert("\"")
        final_graph_minute = (
            pynutil.insert("minutes: \"") +
            (pynini.cross("0", "o") + insert_space + graph_minute_single
             | graph_minute_double) + pynutil.insert("\""))
        final_suffix = pynutil.insert("suffix: \"") + convert_space(
            suffix_graph) + pynutil.insert("\"")
        final_suffix_optional = pynini.closure(
            delete_space + insert_space + final_suffix, 0, 1)
        final_time_zone_optional = pynini.closure(
            delete_space + insert_space + pynutil.insert("zone: \"") +
            convert_space(time_zone_graph) + pynutil.insert("\""),
            0,
            1,
        )

        # 2:30 pm, 02:30, 2:00
        graph_hm = (
            final_graph_hour + pynutil.delete(":") +
            (pynutil.delete("00") | insert_space + final_graph_minute) +
            final_suffix_optional + final_time_zone_optional)

        # 2.xx pm/am
        graph_hm2 = (
            final_graph_hour + pynutil.delete(".") +
            (pynutil.delete("00") | insert_space + final_graph_minute) +
            delete_space + insert_space + final_suffix +
            final_time_zone_optional)
        # 2 pm est
        graph_h = final_graph_hour + delete_space + insert_space + final_suffix + final_time_zone_optional
        final_graph = (graph_hm | graph_h | graph_hm2).optimize()

        final_graph = self.add_tokens(final_graph)
        self.fst = final_graph.optimize()
Ejemplo n.º 9
0
    def __init__(self):
        super().__init__(name="measure", kind="verbalize")

        graph = (pynutil.delete(" cardinal { integer: \"") +
                 pynini.closure(NEMO_NOT_QUOTE, 1) + pynutil.delete("\"") +
                 delete_space + pynutil.delete("}"))
        delete_tokens = self.delete_tokens(graph)
        self.fst = delete_tokens.optimize()
Ejemplo n.º 10
0
 def __init__(self, decimal: GraphFst):
     super().__init__(name="money", kind="verbalize")
     unit = (pynutil.delete("currency:") + delete_space +
             pynutil.delete("\"") + pynini.closure(NEMO_CHAR - " ", 1) +
             pynutil.delete("\""))
     graph = unit + delete_space + decimal.numbers
     delete_tokens = self.delete_tokens(graph)
     self.fst = delete_tokens.optimize()
Ejemplo n.º 11
0
 def __init__(self):
     super().__init__(name="whitelist", kind="verbalize")
     graph = (pynutil.delete("name:") + delete_space +
              pynutil.delete("\"") + pynini.closure(NEMO_CHAR - " ", 1) +
              pynutil.delete("\""))
     graph = graph @ pynini.cdrewrite(pynini.cross(u"\u00A0", " "), "", "",
                                      NEMO_SIGMA)
     self.fst = graph.optimize()
Ejemplo n.º 12
0
    def __init__(self):
        super().__init__(name="money", kind="classify")
        # quantity, integer_part, fractional_part, currency, style(depr)
        cardinal_graph = CardinalFst().graph_no_exception
        graph_decimal_final = DecimalFst().final_graph_wo_negative

        unit = pynini.string_file(get_abs_path("data/currency.tsv"))
        unit_singular = pynini.invert(unit)
        unit_plural = get_singulars(unit_singular)

        graph_unit_singular = pynutil.insert("currency: \"") + convert_space(
            unit_singular) + pynutil.insert("\"")
        graph_unit_plural = pynutil.insert("currency: \"") + convert_space(
            unit_plural) + pynutil.insert("\"")

        add_leading_zero_to_double_digit = (NEMO_DIGIT + NEMO_DIGIT) | (
            pynutil.insert("0") + NEMO_DIGIT)
        # twelve dollars (and) fifty cents, zero cents
        cents_standalone = (pynutil.insert("fractional_part: \"") +
                            pynini.union(
                                pynutil.add_weight(
                                    ((NEMO_SIGMA - "one") @ cardinal_graph),
                                    -0.7) @ add_leading_zero_to_double_digit +
                                delete_space + pynutil.delete("cents"),
                                pynini.cross("one", "01") + delete_space +
                                pynutil.delete("cent"),
                            ) + pynutil.insert("\""))

        optional_cents_standalone = pynini.closure(
            delete_space +
            pynini.closure(pynutil.delete("and") + delete_space, 0, 1) +
            insert_space + cents_standalone,
            0,
            1,
        )
        # twelve dollars fifty, only after integer
        optional_cents_suffix = pynini.closure(
            delete_extra_space + pynutil.insert("fractional_part: \"") +
            pynutil.add_weight(cardinal_graph, -0.7) + pynutil.insert("\""),
            0,
            1,
        )

        graph_integer = (pynutil.insert("integer_part: \"") +
                         ((NEMO_SIGMA - "one") @ cardinal_graph) +
                         pynutil.insert("\"") + delete_extra_space +
                         graph_unit_plural +
                         (optional_cents_standalone | optional_cents_suffix))
        graph_integer |= (pynutil.insert("integer_part: \"") +
                          pynini.cross("one", "1") + pynutil.insert("\"") +
                          delete_extra_space + graph_unit_singular +
                          (optional_cents_standalone | optional_cents_suffix))
        graph_decimal = graph_decimal_final + delete_extra_space + graph_unit_plural
        graph_decimal |= pynutil.insert(
            "currency: \"$\" integer_part: \"0\" ") + cents_standalone
        final_graph = graph_integer | graph_decimal
        final_graph = self.add_tokens(final_graph)
        self.fst = final_graph.optimize()
Ejemplo n.º 13
0
    def __init__(self, deterministic: bool = True):
        super().__init__(name="money",
                         kind="verbalize",
                         deterministic=deterministic)

        graph = pynini.closure(RU_ALPHA | " ")
        delete_tokens = self.delete_tokens(
            pynutil.delete("integer_part: \"") + graph + pynutil.delete("\""))
        self.fst = delete_tokens.optimize()
Ejemplo n.º 14
0
    def __init__(self):
        super().__init__(name="ordinal", kind="verbalize")
        graph = (pynutil.delete("integer:") + delete_space +
                 pynutil.delete("\"") + pynini.closure(NEMO_NOT_QUOTE, 1) +
                 pynutil.delete("\""))

        graph = pynutil.insert("thứ ") + graph
        delete_tokens = self.delete_tokens(graph)
        self.fst = delete_tokens.optimize()
Ejemplo n.º 15
0
 def _get_thousands_graph(graph_ties, graph_digits):
     graph_hundred_component = (
         (graph_digit | graph_zero) + delete_space +
         pynutil.delete("trăm")) | pynutil.insert("0")
     graph = (graph_digit + delete_space +
              pynutil.delete(pynini.union("nghìn", "ngàn")) + delete_space +
              graph_hundred_component + delete_space +
              (graph_teen | graph_ties | graph_digits))
     return graph
Ejemplo n.º 16
0
    def __init__(self, deterministic: bool = True):
        super().__init__(name="roman", kind="verbalize", deterministic=deterministic)
        suffix = OrdinalFst().suffix

        integer = pynini.closure(NEMO_NOT_QUOTE)
        integer |= pynini.closure(pynutil.insert("the "), 0, 1) + integer @ suffix
        graph = pynutil.delete("integer: \"") + integer + pynutil.delete("\"")
        delete_tokens = self.delete_tokens(graph)
        self.fst = delete_tokens.optimize()
Ejemplo n.º 17
0
 def _get_thousands_graph():
     graph_ties = _get_ties_graph()
     graph_hundred_component = (
         graph_digit + delete_space +
         pynutil.delete("hundred")) | pynutil.insert("0")
     graph = (graph_digit + delete_space + pynutil.delete("thousand") +
              delete_space + graph_hundred_component + delete_space +
              (graph_teen | graph_ties))
     return graph
Ejemplo n.º 18
0
Archivo: date.py Proyecto: NVIDIA/NeMo
def _get_two_digit_year_with_s_graph():
    # to handle '70s -> seventies
    graph = (
        pynini.closure(pynutil.delete("'"), 0, 1)
        + pynini.compose(
            ties_graph + pynutil.delete("0s"), pynini.cdrewrite(pynini.cross("y", "ies"), "", "[EOS]", NEMO_SIGMA)
        )
    ).optimize()
    return graph
    def __init__(self, deterministic: bool = True):
        super().__init__(name="abbreviation",
                         kind="verbalize",
                         deterministic=deterministic)

        graph = pynutil.delete("value: \"") + pynini.closure(
            NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
        delete_tokens = self.delete_tokens(graph)
        self.fst = delete_tokens.optimize()
Ejemplo n.º 20
0
    def __init__(self, deterministic: bool = True):
        super().__init__(name="telephone",
                         kind="verbalize",
                         deterministic=deterministic)

        graph = pynutil.delete("number_part: \"") + pynini.closure(
            RU_ALPHA | " ", 1) + pynutil.delete("\"")
        delete_tokens = self.delete_tokens(graph)
        self.fst = delete_tokens.optimize()
Ejemplo n.º 21
0
    def __init__(self, deterministic: bool = True):
        super().__init__(name="electronic",
                         kind="verbalize",
                         deterministic=deterministic)

        graph = pynutil.delete("username: \"") + pynini.closure(
            RU_ALPHA | " ") + pynutil.delete("\"")
        delete_tokens = self.delete_tokens(graph)
        self.fst = delete_tokens.optimize()
Ejemplo n.º 22
0
    def __init__(self, deterministic: bool = True):
        super().__init__(name="ordinal",
                         kind="verbalize",
                         deterministic=deterministic)

        value = pynini.closure(NEMO_NOT_QUOTE)
        graph = pynutil.delete("integer: \"") + value + pynutil.delete("\"")
        delete_tokens = self.delete_tokens(graph)
        self.fst = delete_tokens.optimize()
Ejemplo n.º 23
0
    def __init__(self, deterministic: bool = True):
        super().__init__(name="cardinal",
                         kind="classify",
                         deterministic=deterministic)

        graph = pynini.Far(
            get_abs_path("data/numbers/cardinal_number_name.far")).get_fst()
        self.graph_hundred_component_at_least_one_none_zero_digit = (
            pynini.closure(NEMO_DIGIT, 2, 3)
            | pynini.difference(NEMO_DIGIT, pynini.accep("0"))) @ graph
        self.graph = (pynini.closure(NEMO_DIGIT, 1, 3) + pynini.closure(
            pynini.closure(pynutil.delete(","), 0, 1) + NEMO_DIGIT +
            NEMO_DIGIT + NEMO_DIGIT)) @ graph

        graph_digit = pynini.string_file(
            get_abs_path("data/numbers/digit.tsv"))
        graph_zero = pynini.string_file(get_abs_path("data/numbers/zero.tsv"))
        single_digits_graph = pynutil.add_weight(
            pynini.invert(graph_digit | graph_zero), 1.2) | pynutil.add_weight(
                pynini.cross("0", "oh"), 1.1)
        self.single_digits_graph = single_digits_graph + pynini.closure(
            pynutil.insert(" ") + single_digits_graph)

        if not deterministic:
            single_digits_graph_with_commas = pynini.closure(
                self.single_digits_graph + pynutil.insert(" "), 1,
                3) + pynini.closure(
                    pynutil.delete(",") + single_digits_graph +
                    pynutil.insert(" ") + single_digits_graph +
                    pynutil.insert(" ") + single_digits_graph,
                    1,
                )
            self.graph |= self.single_digits_graph | get_hundreds_graph(
            ) | single_digits_graph_with_commas
            self.range_graph = (
                pynini.closure(pynutil.insert("from "), 0, 1) + self.graph +
                (pynini.cross("-", " to ") | pynini.cross("-", " ")) +
                self.graph)

            self.range_graph |= self.graph + (pynini.cross(
                "x", " by ") | pynini.cross(" x ", " by ")) + self.graph
            self.range_graph = self.range_graph.optimize()

        optional_minus_graph = pynini.closure(
            pynutil.insert("negative: ") + pynini.cross("-", "\"true\" "), 0,
            1)
        final_graph = self.graph | pynutil.add_weight(self.get_serial_graph(),
                                                      1.2)

        if not deterministic:
            final_graph |= self.range_graph

        final_graph = optional_minus_graph + pynutil.insert(
            "integer: \"") + final_graph + pynutil.insert("\"")
        final_graph = self.add_tokens(final_graph)
        self.fst = final_graph.optimize()
Ejemplo n.º 24
0
    def __init__(self):
        super().__init__(name="electronic", kind="classify")

        delete_extra_space = pynutil.delete(" ")
        alpha_num = (
            NEMO_ALPHA
            | pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
            | pynini.string_file(get_abs_path("data/numbers/zero.tsv")))

        symbols = pynini.string_file(
            get_abs_path("data/electronic/symbols.tsv")).invert()

        accepted_username = alpha_num | symbols
        process_dot = pynini.cross("punto", ".")
        username = (pynutil.insert("username: \"") + alpha_num +
                    delete_extra_space +
                    pynini.closure(accepted_username + delete_extra_space) +
                    alpha_num + pynutil.insert("\""))
        single_alphanum = pynini.closure(alpha_num +
                                         delete_extra_space) + alpha_num
        server = single_alphanum | pynini.string_file(
            get_abs_path("data/electronic/server_name.tsv")).invert()
        domain = single_alphanum | pynini.string_file(
            get_abs_path("data/electronic/domain.tsv")).invert()
        domain_graph = (pynutil.insert("domain: \"") + server +
                        delete_extra_space + process_dot + delete_extra_space +
                        domain + pynutil.insert("\""))
        graph = (username + delete_extra_space + pynutil.delete("arroba") +
                 insert_space + delete_extra_space + domain_graph)

        ############# url ###
        protocol_end = pynini.cross(
            pynini.union("www", "w w w", "doble ve doble ve doble ve"), "www")
        protocol_start = pynini.cross(
            pynini.union("http", "h t t p", "hache te te pe"), "http")
        protocol_start |= pynini.cross(
            pynini.union("https", "h t t p s", "hache te te pe ese"), "https")
        protocol_start += pynini.cross(" dos puntos barra barra ", "://")

        # e.g. .com, .es
        ending = (delete_extra_space + symbols + delete_extra_space +
                  (domain
                   | pynini.closure(accepted_username + delete_extra_space, ) +
                   accepted_username))

        protocol = (pynini.closure(protocol_start, 0, 1) + protocol_end +
                    delete_extra_space + process_dot + delete_extra_space +
                    (pynini.closure(delete_extra_space + accepted_username, 1)
                     | server) + pynini.closure(ending, 1))
        protocol = pynutil.insert("protocol: \"") + protocol + pynutil.insert(
            "\"")
        graph |= protocol
        ########

        final_graph = self.add_tokens(graph)
        self.fst = final_graph.optimize()
Ejemplo n.º 25
0
    def __init__(self, measure: GraphFst, deterministic: bool = False):
        super().__init__(name="serial",
                         kind="verbalize",
                         deterministic=deterministic)

        serial = pynutil.delete("units: \"") + pynini.cross(
            "serial", "") + pynutil.delete("\"") + delete_space
        graph = measure.graph_cardinal + delete_space + serial
        delete_tokens = self.delete_tokens(graph)
        self.fst = delete_tokens.optimize()
Ejemplo n.º 26
0
 def __init__(self):
     super().__init__(name="decimal", kind="verbalize")
     optionl_sign = pynini.closure(pynini.cross("negative: \"true\"", "-") + delete_space, 0, 1)
     integer = (
         pynutil.delete("integer_part:")
         + delete_space
         + pynutil.delete("\"")
         + pynini.closure(NEMO_NOT_QUOTE, 1)
         + pynutil.delete("\"")
     )
     optional_integer = pynini.closure(integer + delete_space, 0, 1)
     fractional = (
         pynutil.insert(".")
         + pynutil.delete("fractional_part:")
         + delete_space
         + pynutil.delete("\"")
         + pynini.closure(NEMO_NOT_QUOTE, 1)
         + pynutil.delete("\"")
     )
     optional_fractional = pynini.closure(fractional + delete_space, 0, 1)
     quantity = (
         pynutil.delete("quantity:")
         + delete_space
         + pynutil.delete("\"")
         + pynini.closure(NEMO_NOT_QUOTE, 1)
         + pynutil.delete("\"")
     )
     optional_quantity = pynini.closure(pynutil.insert(" ") + quantity + delete_space, 0, 1)
     graph = optional_integer + optional_fractional + optional_quantity
     self.numbers = graph
     graph = optionl_sign + graph
     delete_tokens = self.delete_tokens(graph)
     self.fst = delete_tokens.optimize()
Ejemplo n.º 27
0
    def __init__(self, decimal: GraphFst, cardinal: GraphFst, fraction: GraphFst, deterministic: bool = True):
        super().__init__(name="measure", kind="verbalize", deterministic=deterministic)
        optional_sign = cardinal.optional_sign
        unit = pynutil.delete("units: \"") + pynini.closure(NEMO_CHAR - " ", 1) + pynutil.delete("\"") + delete_space

        graph_decimal = (
            pynutil.delete("decimal {")
            + delete_space
            + optional_sign
            + delete_space
            + decimal.numbers
            + delete_space
            + pynutil.delete("}")
        )
        graph_cardinal = (
            pynutil.delete("cardinal {")
            + delete_space
            + optional_sign
            + delete_space
            + cardinal.numbers
            + delete_space
            + pynutil.delete("}")
        )

        graph_fraction = (
            pynutil.delete("fraction {") + delete_space + fraction.graph + delete_space + pynutil.delete("}")
        )

        graph = (graph_cardinal | graph_decimal | graph_fraction) + delete_space + insert_space + unit

        # SH adds "preserve_order: true" by default
        preserve_order = pynutil.delete("preserve_order:") + delete_space + pynutil.delete("true") + delete_space
        graph |= unit + insert_space + (graph_cardinal | graph_decimal) + delete_space + pynini.closure(preserve_order)
        delete_tokens = self.delete_tokens(graph)
        self.fst = delete_tokens.optimize()
Ejemplo n.º 28
0
    def __init__(self):
        super().__init__(name="electronic", kind="verbalize")
        user_name = (
            pynutil.delete("username:"******"\"")
            + pynini.closure(NEMO_NOT_QUOTE, 1)
            + pynutil.delete("\"")
        )
        domain = (
            pynutil.delete("domain:")
            + delete_space
            + pynutil.delete("\"")
            + pynini.closure(NEMO_NOT_QUOTE, 1)
            + pynutil.delete("\"")
        )

        protocol = (
            pynutil.delete("protocol:")
            + delete_space
            + pynutil.delete("\"")
            + pynini.closure(NEMO_NOT_QUOTE, 1)
            + pynutil.delete("\"")
        )

        graph = user_name + delete_space + pynutil.insert("@") + domain
        graph |= protocol

        delete_tokens = self.delete_tokens(graph)
        self.fst = delete_tokens.optimize()
Ejemplo n.º 29
0
    def __init__(self):
        super().__init__(name="electronic", kind="classify")

        delete_extra_space = pynutil.delete(" ")
        alpha_num = (
            NEMO_ALPHA
            | pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
            | pynini.string_file(get_abs_path("data/numbers/zero.tsv")))

        symbols = pynini.string_file(
            get_abs_path("data/electronic/symbols.tsv")).invert()

        accepted_username = alpha_num | symbols
        process_dot = pynini.cross("chấm", ".")
        username = (pynutil.insert('username: "******"'))
        single_alphanum = pynini.closure(alpha_num +
                                         delete_extra_space) + alpha_num
        server = single_alphanum | pynini.string_file(
            get_abs_path("data/electronic/server_name.tsv"))
        domain = single_alphanum | pynini.string_file(
            get_abs_path("data/electronic/domain.tsv"))
        multi_domain = (pynini.closure(process_dot + delete_extra_space +
                                       domain + delete_extra_space) +
                        process_dot + delete_extra_space + domain)
        domain_graph = pynutil.insert(
            'domain: "'
        ) + server + delete_extra_space + multi_domain + pynutil.insert('"')
        graph = (username + delete_extra_space +
                 pynutil.delete(pynini.union("a còng", "a móc", "a vòng")) +
                 insert_space + delete_extra_space + domain_graph)

        ############# url ###
        protocol_end = pynini.cross(pynini.union("w w w", "www"), "www")
        protocol_start = (pynini.cross("h t t p", "http") | pynini.cross(
            "h t t p s", "https")) + pynini.cross(" hai chấm sẹc sẹc ", "://")
        # .com,
        ending = (
            delete_extra_space + symbols + delete_extra_space +
            (domain | pynini.closure(accepted_username + delete_extra_space) +
             accepted_username))

        protocol = (pynini.closure(protocol_start, 0, 1) + protocol_end +
                    delete_extra_space + process_dot +
                    pynini.closure(delete_extra_space + accepted_username, 1) +
                    pynini.closure(ending, 1, 2))
        protocol = pynutil.insert('protocol: "') + protocol + pynutil.insert(
            '"')
        graph |= protocol
        ########

        final_graph = self.add_tokens(graph)
        self.fst = final_graph.optimize()
Ejemplo n.º 30
0
 def __init__(self):
     super().__init__(name="verbalize_final", kind="verbalize")
     verbalize = VerbalizeFst().fst
     word = WordFst().fst
     types = verbalize | word
     graph = (pynutil.delete("tokens") + delete_space +
              pynutil.delete("{") + delete_space + types + delete_space +
              pynutil.delete("}"))
     graph = delete_space + pynini.closure(
         graph + delete_extra_space) + graph + delete_space
     self.fst = graph