class FSTTransition(BaseRecurrent, Initializable):
    def __init__(self, fst, remap_table, no_transition_cost, **kwargs):
        """Wrap FST in a recurrent brick.

        Parameters
        ----------
        fst : FST instance
        remap_table : dict
            Maps neutral network characters to FST characters.
        no_transition_cost : float
            Cost of going to the start state when no arc for an input
            symbol is available.

        """
        super(FSTTransition, self).__init__(**kwargs)
        self.fst = fst
        self.transition = FSTTransitionOp(fst, remap_table)
        self.probability_computer = FSTCostsOp(
            fst, remap_table, no_transition_cost)

        self.out_dim = len(remap_table)

    @recurrent(sequences=['inputs', 'mask'],
               states=['states', 'weights', 'add'],
               outputs=['states', 'weights', 'add'], contexts=[])
    def apply(self, inputs, states, weights, add,
              mask=None):
        new_states, new_weights = self.transition(states, weights, inputs)
        if mask:
            # In fact I don't really understand why we do this:
            # anyway states not covered by masks should have no effect
            # on the cost...
            new_states = tensor.cast(mask * new_states +
                                     (1. - mask) * states, 'int64')
            new_weights = mask * new_weights + (1. - mask) * weights
        new_add = self.probability_computer(new_states, new_weights)
        return new_states, new_weights, new_add

    @application(outputs=['states', 'weights', 'add'])
    def initial_states(self, batch_size, *args, **kwargs):
        states_dict = self.fst.expand({self.fst.fst.start: 0.0})
        states = tensor.as_tensor_variable(
            self.transition.pad(states_dict.keys(), NOT_STATE))
        states = tensor.tile(states[None, :], (batch_size, 1))
        weights = tensor.as_tensor_variable(
            self.transition.pad(states_dict.values(), 0))
        weights = tensor.tile(weights[None, :], (batch_size, 1))
        add = self.probability_computer(states, weights)
        return states, weights, add

    def get_dim(self, name):
        if name == 'states' or name == 'weights':
            return MAX_STATES
        if name == 'add':
            return self.out_dim
        if name == 'inputs':
            return 0
        return super(FSTTransition, self).get_dim(name)
class FSTTransition(BaseRecurrent, Initializable):
    def __init__(self, fst, remap_table, no_transition_cost, **kwargs):
        """Wrap FST in a recurrent brick.

        Parameters
        ----------
        fst : FST instance
        remap_table : dict
            Maps neutral network characters to FST characters.
        no_transition_cost : float
            Cost of going to the start state when no arc for an input
            symbol is available.

        """
        super(FSTTransition, self).__init__(**kwargs)
        self.fst = fst
        self.transition = FSTTransitionOp(fst, remap_table)
        self.probability_computer = FSTCostsOp(fst, remap_table,
                                               no_transition_cost)

        self.out_dim = len(remap_table)

    @recurrent(sequences=['inputs', 'mask'],
               states=['states', 'weights', 'add'],
               outputs=['states', 'weights', 'add'],
               contexts=[])
    def apply(self, inputs, states, weights, add, mask=None):
        new_states, new_weights = self.transition(states, weights, inputs)
        if mask:
            # In fact I don't really understand why we do this:
            # anyway states not covered by masks should have no effect
            # on the cost...
            new_states = tensor.cast(mask * new_states + (1. - mask) * states,
                                     'int64')
            new_weights = mask * new_weights + (1. - mask) * weights
        new_add = self.probability_computer(new_states, new_weights)
        return new_states, new_weights, new_add

    @application(outputs=['states', 'weights', 'add'])
    def initial_states(self, batch_size, *args, **kwargs):
        states_dict = self.fst.expand({self.fst.fst.start: 0.0})
        states = tensor.as_tensor_variable(
            self.transition.pad(states_dict.keys(), NOT_STATE))
        states = tensor.tile(states[None, :], (batch_size, 1))
        weights = tensor.as_tensor_variable(
            self.transition.pad(states_dict.values(), 0))
        weights = tensor.tile(weights[None, :], (batch_size, 1))
        add = self.probability_computer(states, weights)
        return states, weights, add

    def get_dim(self, name):
        if name == 'states' or name == 'weights':
            return MAX_STATES
        if name == 'add':
            return self.out_dim
        if name == 'inputs':
            return 0
        return super(FSTTransition, self).get_dim(name)
    def __init__(self, fst, remap_table, no_transition_cost, **kwargs):
        """Wrap FST in a recurrent brick.

        Parameters
        ----------
        fst : FST instance
        remap_table : dict
            Maps neutral network characters to FST characters.
        no_transition_cost : float
            Cost of going to the start state when no arc for an input
            symbol is available.

        """
        super(FSTTransition, self).__init__(**kwargs)
        self.fst = fst
        self.transition = FSTTransitionOp(fst, remap_table)
        self.probability_computer = FSTCostsOp(fst, remap_table,
                                               no_transition_cost)

        self.out_dim = len(remap_table)
    def __init__(self, fst, remap_table, no_transition_cost, **kwargs):
        """Wrap FST in a recurrent brick.

        Parameters
        ----------
        fst : FST instance
        remap_table : dict
            Maps neutral network characters to FST characters.
        no_transition_cost : float
            Cost of going to the start state when no arc for an input
            symbol is available.

        """
        super(FSTTransition, self).__init__(**kwargs)
        self.fst = fst
        self.transition = FSTTransitionOp(fst, remap_table)
        self.probability_computer = FSTCostsOp(
            fst, remap_table, no_transition_cost)

        self.out_dim = len(remap_table)