def acceptor(astring: str, symbol_table: SymbolTable, weight: Optional[float] = None) -> VectorFst: """ Creates an acceptor from a string. This function creates a FST which accepts its input with a fixed weight (defaulting to semiring One). Args: astring: The input string. weight: A Weight or weight string indicating the desired path weight. If omitted or null, the path weight is set to semiring One. symbol_table: SymbolTable to be used to encode the string. Returns: An FST acceptor. """ if weight is None: weight = weight_one() acceptor_fst_ptr = ctypes.pointer(ctypes.c_void_p()) ret_code = lib.utils_string_to_acceptor( astring.encode("utf-8"), symbol_table.ptr, ctypes.c_float(weight), ctypes.byref(acceptor_fst_ptr), ) err_msg = "Error creating acceptor FST" check_ffi_error(ret_code, err_msg) return VectorFst(ptr=acceptor_fst_ptr)
def connect(fst: VectorFst) -> VectorFst: """ This operation trims an Fst, removing states and trs that are not on successful paths. Examples : - Input : ![connect_in](https://raw.githubusercontent.com/Garvys/rustfst-images-doc/master/images/connect_in.svg?sanitize=true) - Output : ![connect_out](https://raw.githubusercontent.com/Garvys/rustfst-images-doc/master/images/connect_out.svg?sanitize=true) Returns : self """ connectd_fst = ctypes.c_void_p() ret_code = lib.fst_connect(fst.ptr, ctypes.byref(connectd_fst)) err_msg = "Error during connect" check_ffi_error(ret_code, err_msg) return VectorFst(ptr=connectd_fst)
def top_sort(fst: VectorFst) -> VectorFst: """ This operation topologically sorts its input. When sorted, all transitions are from lower to higher state IDs. Examples : - Input ![topsort_in](https://raw.githubusercontent.com/Garvys/rustfst-images-doc/master/images/topsort_in.svg?sanitize=true) - Output ![topsort_out](https://raw.githubusercontent.com/Garvys/rustfst-images-doc/master/images/topsort_out.svg?sanitize=true) Args: fst: Fst to top_sort. Returns: Equivalent top sorted Fst. Modification also happens in-place. """ top_sorted_fst = ctypes.c_void_p() ret_code = lib.fst_top_sort(fst.ptr, ctypes.byref(top_sorted_fst)) err_msg = "Error during top_sort" check_ffi_error(ret_code, err_msg) return VectorFst(ptr=top_sorted_fst)
def shortestpath(fst: VectorFst) -> VectorFst: """ shortestpath(fst) construct a FST containing the shortest path of the input FST :param fst: Fst :return: Fst """ shortest_path = ctypes.c_void_p() ret_code = lib.fst_shortest_path(fst.ptr, ctypes.byref(shortest_path)) err_msg = "Error computing shortest path" check_ffi_error(ret_code, err_msg) return VectorFst(ptr=shortest_path)
def reverse(fst: VectorFst): """ reverse(fst) reverse an fst :param fst: Fst :return: Fst """ reversed_fst = ctypes.c_void_p() ret_code = lib.fst_reverse(fst.ptr, ctypes.byref(reversed_fst)) err_msg = "Error during reverse" check_ffi_error(ret_code, err_msg) return VectorFst(ptr=reversed_fst)
def rm_epsilon(fst: VectorFst): """ rm_epsilon(fst) rm_epsilon an fst :param fst: Fst :return: Fst """ rm_epsilon_fst = ctypes.c_void_p() ret_code = lib.fst_rm_epsilon(fst.ptr, ctypes.byref(rm_epsilon_fst)) err_msg = "Error during rm_epsilon" check_ffi_error(ret_code, err_msg) return VectorFst(ptr=rm_epsilon_fst)
def determinize(fst: VectorFst) -> VectorFst: """ Make an Fst deterministic Args: fst: The Fst to make deterministic. Returns: The resulting Fst. """ det_fst = ctypes.pointer(ctypes.c_void_p()) ret_code = lib.fst_determinize(fst.ptr, ctypes.byref(det_fst)) err_msg = "Error during determinization" check_ffi_error(ret_code, err_msg) return VectorFst(ptr=det_fst)
def compose(fst: VectorFst, other_fst: VectorFst) -> VectorFst: """ Compute the composition of two FSTs. Args: fst: Left fst. other_fst: Right fst. Returns: Resulting fst. """ composition = ctypes.pointer(ctypes.c_void_p()) ret_code = lib.fst_compose(fst.ptr, other_fst.ptr, ctypes.byref(composition)) err_msg = "Error Composing FSTs" check_ffi_error(ret_code, err_msg) return VectorFst(ptr=composition)
def determinize_with_config(fst: VectorFst, config: DeterminizeConfig) -> VectorFst: """ Make an Fst deterministic Args: fst: The Fst to make deterministic. config: Configuration of the determinization algorithm to use. Returns: The resulting Fst. """ det_fst = ctypes.pointer(ctypes.c_void_p()) ret_code = lib.fst_determinize_with_config( fst.ptr, config.ptr, ctypes.byref(det_fst) ) err_msg = "Error during determinization" check_ffi_error(ret_code, err_msg) return VectorFst(ptr=det_fst)
def shortestpath_with_config(fst: VectorFst, config: ShortestPathConfig) -> VectorFst: """ shortestpath(fst,config) construct a FST containing the n-shortest path(s) in the input FST :param fst: Fst :param config: ShortestPathConfig :return: Fst """ shortest_path = ctypes.c_void_p() ret_code = lib.fst_shortest_path_with_config(fst.ptr, config.ptr, ctypes.byref(shortest_path)) err_msg = "Error computing shortest path" check_ffi_error(ret_code, err_msg) return VectorFst(ptr=shortest_path)
def compose_with_config(fst: VectorFst, other_fst: VectorFst, config: ComposeConfig) -> VectorFst: """ Compute the composition of two FSTs parametrized with a config. Args: fst: Left fst. other_fst: Right fst. config: Config parameters of the composition. Returns: Resulting fst. """ composition = ctypes.pointer(ctypes.c_void_p()) ret_code = lib.fst_compose_with_config(fst.ptr, other_fst.ptr, config.ptr, ctypes.byref(composition)) err_msg = "Error Composing FSTs" check_ffi_error(ret_code, err_msg) return VectorFst(ptr=composition)
def epsilon_machine(weight: Optional[float] = None) -> VectorFst: """ Constructs a single-state, no-arc FST accepting epsilon. This function creates an unweighted FST with a single state which is both initial and final. Args: weight: A Weight. Default semiring One. Returns: An FST. """ if weight is None: weight = weight_one() fst = VectorFst() state = fst.add_state() fst.set_start(state) fst.set_final(state, weight) return fst
def transducer( istring: str, ostring: str, isymt: SymbolTable, osymt: SymbolTable, weight: Optional[float] = None, ) -> VectorFst: """ Creates a transducer from a pair of strings or acceptor FSTs. This function creates a FST which transduces from the first string to the second with a fixed weight (defaulting to semiring One). Args: istring: The input string ostring: The output string weight: A Weight as float. isymt: SymbolTable to be used to encode the string. osymt: SymbolTable to be used to encode the string. Returns: An FST transducer. """ if weight is None: weight = weight_one() transducer_fst_ptr = ctypes.c_void_p() ret_code = lib.utils_string_to_transducer( istring.encode("utf-8"), ostring.encode("utf-8"), isymt.ptr, osymt.ptr, ctypes.c_float(weight), ctypes.byref(transducer_fst_ptr), ) err_msg = "Error creating tranducer FST" check_ffi_error(ret_code, err_msg) return VectorFst(ptr=transducer_fst_ptr)
def randgen( ifst: Fst, npath: int = 1, seed: int = 0, select: str = "uniform", max_length: int = 2147483647, weight: bool = False, remove_total_weight: bool = False, ) -> VectorFst: """ Randomly generate successful paths in an FST. This operation randomly generates a set of successful paths in the input FST. This relies on a mechanism for selecting arcs, specified using the `select` argument. The default selector, "uniform", randomly selects a transition using a uniform distribution. The "log_prob" selector randomly selects a transition w.r.t. the weights treated as negative log probabilities after normalizing for the total weight leaving the state. In all cases, finality is treated as a transition to a super-final state. Args: ifst: The input FST. npath: The number of random paths to generate. seed: An optional seed value for random path generation; if zero, the current time and process ID is used. select: A string matching a known random arc selection type; one of: "uniform", "log_prob", "fast_log_prob". max_length: The maximum length of each random path. weight: Should the output be weighted by path count? remove_total_weight: Should the total weight be removed (ignored when `weighted` is False)? Returns: An FST containing one or more random paths. Raises: ValueError: when something wrong happened. """ if select != "uniform": raise ValueError( f"Only the uniform distribution is supported for now. Found {select}" ) npath = ctypes.c_size_t(npath) seed = ctypes.c_size_t(seed) max_length = ctypes.c_size_t(max_length) weight = ctypes.c_bool(weight) remove_total_weight = ctypes.c_bool(remove_total_weight) randgen_fst = ctypes.pointer(ctypes.c_void_p()) ret_code = lib.fst_randgen( ifst.ptr, npath, seed, max_length, weight, remove_total_weight, ctypes.byref(randgen_fst), ) err_msg = "Error during randgen" check_ffi_error(ret_code, err_msg) return VectorFst(ptr=randgen_fst)
def replace(root_idx: int, fst_list: List[(int, VectorFst)], epsilon_on_replace: bool) -> VectorFst: """ Recursively replaces trs in the root FSTs with other FSTs. Replace supports replacement of trs in one Fst with another FST. This replacement is recursive. Replace takes an array of FST(s). One FST represents the root (or topology) machine. The root FST refers to other FSTs by recursively replacing trs labeled as non-terminals with the matching non-terminal FST. Currently Replace uses the output symbols of the trs to determine whether the transition is a non-terminal transition or not. A non-terminal can be any label that is not a non-zero terminal label in the output alphabet. Note that input argument is a vector of pairs. These correspond to the tuple of non-terminal Label and corresponding FST. Examples: - Root Fst : ![replace_in_1](https://raw.githubusercontent.com/Garvys/rustfst-images-doc/master/images/replace_in_1.svg?sanitize=true) - Fst for non-terminal #NAME : ![replace_in_2](https://raw.githubusercontent.com/Garvys/rustfst-images-doc/master/images/replace_in_2.svg?sanitize=true) - Fst for non-terminal #FIRSTNAME : ![replace_in_3](https://raw.githubusercontent.com/Garvys/rustfst-images-doc/master/images/replace_in_3.svg?sanitize=true) - Fst for non-terminal #LASTNAME : ![replace_in_4](https://raw.githubusercontent.com/Garvys/rustfst-images-doc/master/images/replace_in_4.svg?sanitize=true) - Output : ![replace_out](https://raw.githubusercontent.com/Garvys/rustfst-images-doc/master/images/replace_out.svg?sanitize=true) Args: root_idx: fst_list: epsilon_on_replace: Returns: The resulting Fst. """ pairs = [LabelFstPair(label, fst.ptr) for (label, fst) in fst_list] pairs_array = (LabelFstPair * len(pairs))(*pairs) res_fst = ctypes.pointer(ctypes.c_void_p()) ret_code = lib.fst_replace( ctypes.c_size_t(root_idx), ctypes.byref(pairs_array), ctypes.c_size_t(len(pairs)), ctypes.c_bool(epsilon_on_replace), ctypes.byref(res_fst), ) err_msg = "Error performing replace" check_ffi_error(ret_code, err_msg) return VectorFst(ptr=res_fst)