Example #1
0
    def __init__(self, cardinal: GraphFst, deterministic: bool = True):
        super().__init__(name="decimal",
                         kind="classify",
                         deterministic=deterministic)

        graph_digit = pynini.string_file(
            get_abs_path("data/numbers/digit.tsv")).invert()
        graph_digit |= pynini.string_file(
            get_abs_path("data/numbers/zero.tsv")).invert()
        graph_digit |= pynini.cross("1", "eins")
        self.graph = graph_digit + pynini.closure(insert_space +
                                                  graph_digit).optimize()

        point = pynutil.delete(",")
        optional_graph_negative = pynini.closure(
            pynutil.insert("negative: ") + pynini.cross("-", "\"true\" "), 0,
            1)

        self.graph_fractional = pynutil.insert(
            "fractional_part: \"") + self.graph + pynutil.insert("\"")
        self.graph_integer = pynutil.insert(
            "integer_part: \"") + cardinal.graph + pynutil.insert("\"")
        final_graph_wo_sign = self.graph_integer + point + insert_space + self.graph_fractional

        self.final_graph_wo_negative = final_graph_wo_sign | get_quantity(
            final_graph_wo_sign,
            cardinal.graph_hundred_component_at_least_one_none_zero_digit)
        final_graph = optional_graph_negative + self.final_graph_wo_negative
        final_graph += pynutil.insert(" preserve_order: true")

        final_graph = self.add_tokens(final_graph)

        self.fst = final_graph.optimize()
Example #2
0
    def __init__(self, input_case: str, deterministic: bool = True, input_file: str = None):
        super().__init__(name="whitelist", kind="classify", deterministic=deterministic)

        def _get_whitelist_graph(input_case, file):
            whitelist = load_labels(file)
            if input_case == "lower_cased":
                whitelist = [[x[0].lower()] + x[1:] for x in whitelist]
            graph = pynini.string_map(whitelist)
            return graph

        graph = _get_whitelist_graph(input_case, get_abs_path("data/whitelist.tsv"))
        if not deterministic and input_case != "lower_cased":
            graph |= pynutil.add_weight(
                _get_whitelist_graph("lower_cased", get_abs_path("data/whitelist.tsv")), weight=0.0001
            )

        if input_file:
            whitelist_provided = _get_whitelist_graph(input_case, input_file)
            if not deterministic:
                graph |= whitelist_provided
            else:
                graph = whitelist_provided

        if not deterministic:
            units_graph = _get_whitelist_graph(input_case, file=get_abs_path("data/measure/measurements.tsv"))
            graph |= units_graph

        self.graph = graph
        self.final_graph = convert_space(self.graph).optimize()
        self.fst = (pynutil.insert("name: \"") + self.final_graph + pynutil.insert("\"")).optimize()
Example #3
0
    def __init__(self, deterministic: bool = True):
        super().__init__(name="ordinal",
                         kind="verbalize",
                         deterministic=deterministic)
        graph_digit = pynini.string_file(
            get_abs_path("data/ordinals/digit.tsv")).invert()
        graph_ties = pynini.string_file(
            get_abs_path("data/ordinals/ties.tsv")).invert()
        graph_thousands = pynini.string_file(
            get_abs_path("data/ordinals/thousands.tsv")).invert()

        graph = pynutil.delete("integer: \"") + pynini.closure(
            NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")

        suffixes = pynini.union("ten", "tem", "ter", "tes", "te")
        convert_rest = pynutil.insert(suffixes, weight=0.01)
        self.ordinal_stem = graph_digit | graph_ties | graph_thousands

        suffix = pynini.cdrewrite(
            pynini.closure(self.ordinal_stem, 0, 1) + convert_rest,
            "",
            "[EOS]",
            NEMO_SIGMA,
        ).optimize()
        self.graph = pynini.compose(graph, suffix)
        self.suffix = suffix
        delete_tokens = self.delete_tokens(self.graph)
        self.fst = delete_tokens.optimize()
Example #4
0
    def __init__(self, cardinal: GraphFst, deterministic: bool = True):
        super().__init__(name="telephone",
                         kind="classify",
                         deterministic=deterministic)

        graph_zero = pynini.invert(
            pynini.string_file(
                get_abs_path("data/numbers/zero.tsv"))).optimize()
        graph_digit_no_zero = pynini.invert(
            pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
        ).optimize() | pynini.cross("1", "eins")
        graph_digit = graph_digit_no_zero | graph_zero

        numbers_with_single_digits = pynini.closure(graph_digit +
                                                    insert_space) + graph_digit

        two_digit_and_zero = (
            NEMO_DIGIT**2 @ cardinal.two_digit_non_zero) | graph_zero
        # def add_space_after_two_digit():
        #     return pynini.closure(two_digit_and_zero + insert_space) + (
        #         two_digit_and_zero
        #     )

        country_code = pynini.closure(pynini.cross("+", "plus "), 0,
                                      1) + two_digit_and_zero
        country_code |= (pynutil.delete("(") + graph_zero + insert_space +
                         numbers_with_single_digits + pynutil.delete(")"))
        country_code |= graph_zero + insert_space + numbers_with_single_digits

        country_code = pynutil.insert(
            "country_code: \"") + country_code + pynutil.insert("\"")

        del_separator = pynini.cross(pynini.union("-", " "), " ")
        # numbers_with_two_digits = pynini.closure(graph_digit + insert_space) + add_space_after_two_digit() + pynini.closure(insert_space + graph_digit)
        # numbers = numbers_with_two_digits + pynini.closure(del_separator + numbers_with_two_digits, 0, 1)
        numbers = numbers_with_single_digits + pynini.closure(
            del_separator + numbers_with_single_digits, 0, 1)
        number_length = pynini.closure(
            (NEMO_DIGIT | pynini.union("-", " ", ")", "(")), 7)
        number_part = pynini.compose(number_length, numbers)
        number = pynutil.insert(
            "number_part: \"") + number_part + pynutil.insert("\"")

        graph = country_code + pynini.accep(" ") + number
        self.graph = graph
        final_graph = self.add_tokens(self.graph +
                                      pynutil.insert(" preserve_order: true"))
        self.fst = final_graph.optimize()
Example #5
0
    def __init__(self, deterministic: bool = True):
        super().__init__(name="electronic",
                         kind="classify",
                         deterministic=deterministic)

        dot = pynini.accep(".")
        accepted_common_domains = [
            x[0]
            for x in load_labels(get_abs_path("data/electronic/domain.tsv"))
        ]
        accepted_common_domains = pynini.union(*accepted_common_domains)
        accepted_symbols = [
            x[0]
            for x in load_labels(get_abs_path("data/electronic/symbols.tsv"))
        ]
        accepted_symbols = pynini.union(*accepted_symbols) - dot
        accepted_characters = pynini.closure(NEMO_ALPHA | NEMO_DIGIT
                                             | accepted_symbols)

        # email
        username = pynutil.insert(
            "username: \"") + accepted_characters + pynutil.insert(
                "\"") + pynini.cross('@', ' ')
        domain_graph = accepted_characters + dot + accepted_characters
        domain_graph = pynutil.insert(
            "domain: \"") + domain_graph + pynutil.insert("\"")
        domain_common_graph = (
            pynutil.insert("domain: \"") + accepted_characters +
            accepted_common_domains + pynini.closure(
                (accepted_symbols | dot) +
                pynini.closure(accepted_characters, 1), 0, 1) +
            pynutil.insert("\""))
        graph = (username + domain_graph) | domain_common_graph

        # url
        protocol_start = pynini.accep("https://") | pynini.accep("http://")
        protocol_end = pynini.accep("www.")
        protocol = protocol_start | protocol_end | (protocol_start +
                                                    protocol_end)
        protocol = pynutil.insert("protocol: \"") + protocol + pynutil.insert(
            "\"")
        graph |= protocol + insert_space + (domain_graph | domain_common_graph)
        self.graph = graph

        final_graph = self.add_tokens(self.graph +
                                      pynutil.insert(" preserve_order: true"))
        self.fst = final_graph.optimize()
Example #6
0
    def __init__(self, deterministic: bool = True):
        super().__init__(name="electronic",
                         kind="verbalize",
                         deterministic=deterministic)
        graph_digit_no_zero = pynini.invert(
            pynini.string_file(get_abs_path("data/numbers/digit.tsv"))
        ).optimize() | pynini.cross("1", "eins")
        graph_zero = pynini.invert(
            pynini.string_file(
                get_abs_path("data/numbers/zero.tsv"))).optimize()
        graph_digit = graph_digit_no_zero | graph_zero
        graph_symbols = pynini.string_file(
            get_abs_path("data/electronic/symbols.tsv")).optimize()
        server_common = pynini.string_file(
            get_abs_path("data/electronic/server_name.tsv"))
        domain_common = pynini.string_file(
            get_abs_path("data/electronic/domain.tsv"))

        def add_space_after_char():
            return pynini.closure(NEMO_NOT_QUOTE - pynini.accep(" ") +
                                  insert_space) + (NEMO_NOT_QUOTE -
                                                   pynini.accep(" "))

        verbalize_characters = pynini.cdrewrite(graph_symbols | graph_digit,
                                                "", "", NEMO_SIGMA)

        user_name = pynutil.delete(
            "username: \"") + add_space_after_char() + pynutil.delete("\"")
        user_name @= verbalize_characters

        convert_defaults = pynutil.add_weight(
            NEMO_NOT_QUOTE, weight=0.0001) | domain_common | server_common
        domain = convert_defaults + pynini.closure(insert_space +
                                                   convert_defaults)
        domain @= verbalize_characters

        domain = pynutil.delete("domain: \"") + domain + pynutil.delete("\"")
        protocol = (pynutil.delete("protocol: \"") + add_space_after_char()
                    @ pynini.cdrewrite(graph_symbols, "", "", NEMO_SIGMA) +
                    pynutil.delete("\""))
        self.graph = (pynini.closure(protocol + pynini.accep(" "), 0, 1) +
                      domain) | (user_name + pynini.accep(" ") +
                                 pynutil.insert("at ") + domain)
        delete_tokens = self.delete_tokens(self.graph + delete_preserve_order)
        self.fst = delete_tokens.optimize()
Example #7
0
    def __init__(self, ordinal: GraphFst, deterministic: bool = True):
        super().__init__(name="date",
                         kind="verbalize",
                         deterministic=deterministic)

        day_cardinal = pynutil.delete("day: \"") + pynini.closure(
            NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
        day = day_cardinal @ pynini.cdrewrite(
            ordinal.ordinal_stem, "", "[EOS]",
            NEMO_SIGMA) + pynutil.insert("ter")

        months_names = pynini.union(*[
            x[1]
            for x in load_labels(get_abs_path("data/months/abbr_to_name.tsv"))
        ])
        month = pynutil.delete("month: \"") + pynini.closure(
            NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")
        final_month = month @ months_names
        final_month |= month @ pynini.difference(
            NEMO_SIGMA, months_names) @ pynini.cdrewrite(
                ordinal.ordinal_stem, "", "[EOS]",
                NEMO_SIGMA) + pynutil.insert("ter")

        year = pynutil.delete("year: \"") + pynini.closure(
            NEMO_NOT_QUOTE, 1) + pynutil.delete("\"")

        # day month year
        graph_dmy = day + pynini.accep(" ") + final_month + pynini.closure(
            pynini.accep(" ") + year, 0, 1)
        graph_dmy |= final_month + pynini.accep(" ") + year

        self.graph = graph_dmy | year
        final_graph = self.graph + delete_preserve_order

        delete_tokens = self.delete_tokens(final_graph)
        self.fst = delete_tokens.optimize()
Example #8
0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from nemo_text_processing.text_normalization.de.utils import get_abs_path
from nemo_text_processing.text_normalization.en.graph_utils import GraphFst, insert_space

try:
    import pynini
    from pynini.lib import pynutil

    quantities = pynini.string_file(
        get_abs_path("data/numbers/quantities.tsv"))

    PYNINI_AVAILABLE = True
except (ModuleNotFoundError, ImportError):
    PYNINI_AVAILABLE = False
    quantities = None


def get_quantity(decimal: 'pynini.FstLike',
                 cardinal_up_to_hundred: 'pynini.FstLike') -> 'pynini.FstLike':
    """
    Returns FST that transforms either a cardinal or decimal followed by a quantity into a numeral,
    e.g. 1 million -> integer_part: "eine" quantity: "million"
    e.g. 1.4 million -> integer_part: "eins" fractional_part: "vier" quantity: "million"

    Args: 
Example #9
0
from nemo_text_processing.text_normalization.en.graph_utils import (
    NEMO_ALPHA,
    NEMO_DIGIT,
    NEMO_NON_BREAKING_SPACE,
    NEMO_SIGMA,
    GraphFst,
    convert_space,
    insert_space,
)

try:
    import pynini
    from pynini.lib import pynutil
    from pynini.examples import plurals

    unit_singular = pynini.string_file(get_abs_path("data/measure/measurements.tsv"))
    suppletive = pynini.string_file(get_abs_path("data/measure/suppletive.tsv"))

    PYNINI_AVAILABLE = True
except (ModuleNotFoundError, ImportError):
    PYNINI_AVAILABLE = False
    unit_singular = None
    suppletive = None


def singular_to_plural():
    # plural endung n/en maskuline Nomen mit den Endungen e, ent, and, ant, ist, or
    _n = NEMO_SIGMA + pynini.union("e") + pynutil.insert("n")
    _en = (
        NEMO_SIGMA
        + pynini.union("ent", "and", "ant", "ist", "or", "ion", "ik", "heit", "keit", "schaft", "tät", "ung")
Example #10
0
    def __init__(self, cardinal: GraphFst, deterministic: bool):
        super().__init__(name="date",
                         kind="classify",
                         deterministic=deterministic)

        month_abbr_graph = load_labels(
            get_abs_path("data/months/abbr_to_name.tsv"))
        number_to_month = pynini.string_file(
            get_abs_path("data/months/numbers.tsv")).optimize()
        month_graph = pynini.union(*[x[1]
                                     for x in month_abbr_graph]).optimize()
        month_abbr_graph = pynini.string_map(month_abbr_graph)
        month_abbr_graph = (pynutil.add_weight(month_abbr_graph, weight=0.0001)
                            | ((TO_LOWER + pynini.closure(NEMO_CHAR))
                               @ month_abbr_graph)) + pynini.closure(
                                   pynutil.delete(".", weight=-0.0001), 0, 1)

        self.month_abbr = month_abbr_graph
        month_graph |= (TO_LOWER + pynini.closure(NEMO_CHAR)) @ month_graph
        # jan.-> januar, Jan-> januar, januar-> januar
        month_graph |= month_abbr_graph

        numbers = cardinal.graph_hundred_component_at_least_one_none_zero_digit
        optional_leading_zero = delete_leading_zero | NEMO_DIGIT
        # 01, 31, 1
        digit_day = optional_leading_zero @ pynini.union(
            *[str(x) for x in range(1, 32)]) @ numbers
        day = (pynutil.insert("day: \"") + digit_day +
               pynutil.insert("\"")).optimize()

        digit_month = optional_leading_zero @ pynini.union(
            *[str(x) for x in range(1, 13)])
        number_to_month = digit_month @ number_to_month
        digit_month @= numbers

        month_name = (pynutil.insert("month: \"") + month_graph +
                      pynutil.insert("\"")).optimize()
        month_number = (pynutil.insert("month: \"") +
                        (pynutil.add_weight(digit_month, weight=0.0001)
                         | number_to_month) + pynutil.insert("\"")).optimize()

        # prefer cardinal over year
        year = pynutil.add_weight(get_year_graph(cardinal=cardinal),
                                  weight=0.001)
        self.year = year

        year_only = pynutil.insert("year: \"") + year + pynutil.insert("\"")

        graph_dmy = (day + pynutil.delete(".") +
                     pynini.closure(pynutil.delete(" "), 0, 1) + insert_space +
                     month_name +
                     pynini.closure(pynini.accep(" ") + year_only, 0, 1))

        separators = ["."]
        for sep in separators:
            year_optional = pynini.closure(
                pynini.cross(sep, " ") + year_only, 0, 1)
            new_graph = day + pynini.cross(sep,
                                           " ") + month_number + year_optional
            graph_dmy |= new_graph

        dash = "-"
        day_optional = pynini.closure(pynini.cross(dash, " ") + day, 0, 1)
        graph_ymd = year_only + pynini.cross(dash,
                                             " ") + month_number + day_optional

        final_graph = graph_dmy + pynutil.insert(" preserve_order: true")
        final_graph |= year_only
        final_graph |= graph_ymd

        self.final_graph = final_graph.optimize()
        self.fst = self.add_tokens(self.final_graph).optimize()
Example #11
0
from nemo_text_processing.text_normalization.de.utils import get_abs_path, load_labels
from nemo_text_processing.text_normalization.en.graph_utils import (
    NEMO_CHAR,
    NEMO_DIGIT,
    TO_LOWER,
    GraphFst,
    insert_space,
)

try:
    import pynini
    from pynini.lib import pynutil

    graph_teen = pynini.invert(
        pynini.string_file(get_abs_path("data/numbers/teen.tsv"))).optimize()
    graph_digit = pynini.invert(
        pynini.string_file(get_abs_path("data/numbers/digit.tsv"))).optimize()
    ties_graph = pynini.invert(
        pynini.string_file(get_abs_path("data/numbers/ties.tsv"))).optimize()
    delete_leading_zero = (pynutil.delete("0") |
                           (NEMO_DIGIT - "0")) + NEMO_DIGIT

    PYNINI_AVAILABLE = True
except (ModuleNotFoundError, ImportError):
    graph_teen = None
    graph_digit = None
    ties_graph = None
    delete_leading_zero = None
    PYNINI_AVAILABLE = True
Example #12
0
    def __init__(self, deterministic: bool = True):
        super().__init__(name="time",
                         kind="classify",
                         deterministic=deterministic)

        final_suffix = pynutil.delete(" ") + pynutil.delete(
            "Uhr") | pynutil.delete("uhr")
        time_zone_graph = pynini.string_file(
            get_abs_path("data/time/time_zone.tsv"))

        labels_hour = [str(x) for x in range(0, 25)]
        labels_minute_single = [str(x) for x in range(1, 10)]
        labels_minute_double = [str(x) for x in range(10, 60)]

        delete_leading_zero_to_double_digit = (pynutil.delete("0") |
                                               (NEMO_DIGIT - "0")) + NEMO_DIGIT

        graph_hour = pynini.union(*labels_hour)

        graph_minute_single = pynini.union(*labels_minute_single)
        graph_minute_double = pynini.union(*labels_minute_double)

        final_graph_hour_only = pynutil.insert(
            "hours: \"") + graph_hour + pynutil.insert("\"")
        final_graph_hour = (pynutil.insert("hours: \"") +
                            delete_leading_zero_to_double_digit @ graph_hour +
                            pynutil.insert("\""))
        final_graph_minute = (
            pynutil.insert("minutes: \"") +
            (pynutil.delete("0") + graph_minute_single | graph_minute_double) +
            pynutil.insert("\""))
        final_graph_second = (
            pynutil.insert("seconds: \"") +
            (pynutil.delete("0") + graph_minute_single | graph_minute_double) +
            pynutil.insert("\""))
        final_time_zone_optional = pynini.closure(
            pynini.accep(" ") + pynutil.insert("zone: \"") +
            convert_space(time_zone_graph) + pynutil.insert("\""),
            0,
            1,
        )

        # 02:30 Uhr
        graph_hm = (final_graph_hour + pynutil.delete(":") +
                    (pynutil.delete("00") |
                     (insert_space + final_graph_minute)) + final_suffix +
                    final_time_zone_optional)

        # 10:30:05 Uhr,
        graph_hms = (final_graph_hour + pynutil.delete(":") +
                     (pynini.cross("00", " minutes: \"0\"") |
                      (insert_space + final_graph_minute)) +
                     pynutil.delete(":") +
                     (pynini.cross("00", " seconds: \"0\"") |
                      (insert_space + final_graph_second)) + final_suffix +
                     final_time_zone_optional +
                     pynutil.insert(" preserve_order: true"))

        # 2 Uhr est
        graph_h = final_graph_hour_only + final_suffix + final_time_zone_optional
        final_graph = (graph_hm | graph_h | graph_hms).optimize()
        final_graph = self.add_tokens(final_graph)
        self.fst = final_graph.optimize()
Example #13
0
    def __init__(self, cardinal_tagger: GraphFst, deterministic: bool = True):
        super().__init__(name="time",
                         kind="verbalize",
                         deterministic=deterministic)

        # add weight so when using inverse text normalization this conversion is depriotized
        night_to_early = pynutil.add_weight(pynini.invert(
            pynini.string_file(
                get_abs_path("data/time/hour_to_night.tsv"))).optimize(),
                                            weight=0.0001)
        hour_to = pynini.invert(
            pynini.string_file(
                get_abs_path("data/time/hour_to.tsv"))).optimize()
        minute_to = pynini.invert(
            pynini.string_file(
                get_abs_path("data/time/minute_to.tsv"))).optimize()
        time_zone_graph = pynini.invert(
            convert_space(
                pynini.union(*[
                    x[1] for x in load_labels(
                        get_abs_path("data/time/time_zone.tsv"))
                ])))

        graph_zero = pynini.invert(
            pynini.string_file(
                get_abs_path("data/numbers/zero.tsv"))).optimize()
        number_verbalization = graph_zero | cardinal_tagger.two_digit_non_zero
        hour = pynutil.delete("hours: \"") + pynini.closure(
            NEMO_DIGIT, 1) + pynutil.delete("\"")
        hour_verbalized = hour @ number_verbalization @ pynini.cdrewrite(
            pynini.cross("eins", "ein"), "[BOS]", "[EOS]",
            NEMO_SIGMA) + pynutil.insert(" uhr")
        minute = pynutil.delete("minutes: \"") + pynini.closure(
            NEMO_DIGIT, 1) + pynutil.delete("\"")
        zone = pynutil.delete("zone: \"") + time_zone_graph + pynutil.delete(
            "\"")
        optional_zone = pynini.closure(pynini.accep(" ") + zone, 0, 1)
        second = pynutil.delete("seconds: \"") + pynini.closure(
            NEMO_DIGIT, 1) + pynutil.delete("\"")
        graph_hms = (hour_verbalized + pynini.accep(" ") +
                     minute @ number_verbalization +
                     pynutil.insert(" minuten") + pynini.accep(" ") +
                     second @ number_verbalization +
                     pynutil.insert(" sekunden") + optional_zone)
        graph_hms @= pynini.cdrewrite(
            pynini.cross("eins minuten", "eine minute")
            | pynini.cross("eins sekunden", "eine sekunde"),
            pynini.union(" ", "[BOS]"),
            "",
            NEMO_SIGMA,
        )

        min_30 = [str(x) for x in range(1, 31)]
        min_30 = pynini.union(*min_30)
        min_29 = [str(x) for x in range(1, 30)]
        min_29 = pynini.union(*min_29)

        graph_h = hour_verbalized
        graph_hm = hour_verbalized + pynini.accep(
            " ") + minute @ number_verbalization

        graph_m_past_h = (
            minute @ min_30
            @ (number_verbalization | pynini.cross("15", "viertel")) +
            pynini.accep(" ") + pynutil.insert("nach ")
            # + hour @ number_verbalization
            + hour @ pynini.cdrewrite(night_to_early, "[BOS]", "[EOS]",
                                      NEMO_SIGMA) @ number_verbalization)
        graph_m30_h = (minute @ pynini.cross("30", "halb") +
                       pynini.accep(" ") + hour @ pynini.cdrewrite(
                           night_to_early, "[BOS]", "[EOS]",
                           NEMO_SIGMA) @ hour_to @ number_verbalization)
        graph_m_to_h = (
            minute @ minute_to @ min_29
            @ (number_verbalization | pynini.cross("15", "viertel")) +
            pynini.accep(" ") + pynutil.insert("vor ") + hour
            @ pynini.cdrewrite(night_to_early, "[BOS]", "[EOS]",
                               NEMO_SIGMA) @ hour_to @ number_verbalization)

        self.graph = (graph_hms
                      | graph_h
                      | graph_hm
                      | pynutil.add_weight(graph_m_past_h, weight=0.0001)
                      | pynutil.add_weight(graph_m30_h, weight=0.0001)
                      | pynutil.add_weight(graph_m_to_h,
                                           weight=0.0001)) + optional_zone
        delete_tokens = self.delete_tokens(self.graph + delete_preserve_order)
        self.fst = delete_tokens.optimize()