Exemple #1
0
    def do_apply(self, **kwargs):
        r"""Process a sequence attending the attended context every step.

        Parameters
        ----------
        \*\*kwargs
            Should contain current inputs, previous step states, contexts,
            the preprocessed attended context, previous step glimpses.

        Returns
        -------
        outputs : list of Theano variables
            The current step states and glimpses.

        """
        attended = kwargs[self.attended_name]
        preprocessed_attended = kwargs.pop(self.preprocessed_attended_name)
        attended_mask = kwargs.get(self.attended_mask_name)

        sequences = dict_subset(kwargs, self.sequence_names, pop=True,
                                must_have=False)
        states = dict_subset(kwargs, self.state_names, pop=True)
        glimpses = dict_subset(kwargs, self.glimpse_names, pop=True)

        current_glimpses = self.take_look(
            mask=attended_mask, return_dict=True,
            **dict_union(
                states, glimpses,
                {self.attended_name: attended,
                 self.preprocessed_attended_name: preprocessed_attended}))
        current_states = self.compute_states(
            return_list=True,
            **dict_union(sequences, states, current_glimpses, kwargs))
        return current_states + list(current_glimpses.values())
Exemple #2
0
    def generate(self, outputs, **kwargs):
        """A sequence generation step.

        Parameters
        ----------
        outputs : :class:`~tensor.TensorVariable`
            The outputs from the previous step.

        Notes
        -----
        The contexts, previous states and glimpses are expected as keyword
        arguments.

        """
        states = dict_subset(kwargs, self._state_names)
        # masks in context are optional (e.g. `attended_mask`)
        contexts = dict_subset(kwargs, self._context_names, must_have=False)
        glimpses = dict_subset(kwargs, self._glimpse_names)

        next_glimpses = self.transition.take_glimpses(
            as_dict=True, **dict_union(states, glimpses, contexts))
        next_readouts = self.readout.readout(
            feedback=self.readout.feedback(outputs),
            **dict_union(states, next_glimpses, contexts))
        next_outputs = self.readout.emit(next_readouts)
        next_costs = self.readout.cost(next_readouts, next_outputs)
        next_feedback = self.readout.feedback(next_outputs)
        next_inputs = (self.fork.apply(next_feedback, as_dict=True)
                       if self.fork else {'feedback': next_feedback})
        next_states = self.transition.compute_states(
            as_list=True,
            **dict_union(next_inputs, states, next_glimpses, contexts))
        return (next_states + [next_outputs] +
                list(next_glimpses.values()) + [next_costs])
    def costs(self, application_call,
              prediction, prediction_mask=None,
              groundtruth=None, groundtruth_mask=None,
              **sequences_states_contexts):
        feedback = self.feedback.apply(prediction, as_dict=True)
        states_outputs = self.recurrent.apply(
            mask=prediction_mask, return_initial_states=True, as_dict=True,
            # Using dict_union gives us a free sanity check that
            # the feedback entries do not override the ones
            # from sequences_states_contexts
            **dict_union(feedback, sequences_states_contexts))
        # These variables can be used to initialize the initial states of the
        # next batch using the last states of the current batch.
        for name in states_outputs:
            application_call.add_auxiliary_variable(
                states_outputs[name][-1].copy(), name=name+"_final_value")
        # Discard the final states
        for name in self.recurrent.apply.states:
            states_outputs[name] = states_outputs[name][:-1]
        # Add all states and outputs and auxiliary variables
        for name, variable in list(states_outputs.items()):
            application_call.add_auxiliary_variable(
                variable.copy(), name=name)

        # Those can potentially be used for computing the cost.
        sequences_contexts = dict_subset(
            sequences_states_contexts,
            self.generate.contexts, self.generate.sequences)
        return self.readout.costs(
            prediction, prediction_mask,
            groundtruth, groundtruth_mask,
            **dict_subset(dict_union(states_outputs,
                                     sequences_contexts),
                          self.readout.costs.inputs,
                          must_have=False))
Exemple #4
0
    def take_glimpses(self, **kwargs):
        r"""Compute glimpses with the attention mechanism.

        A thin wrapper over `self.attention.take_glimpses`: takes care
        of choosing and renaming the necessary arguments.

        Parameters
        ----------
        \*\*kwargs
            Must contain the attended, previous step states and glimpses.
            Can optionaly contain the attended mask and the preprocessed
            attended.

        Returns
        -------
        glimpses : list of :class:`~tensor.TensorVariable`
            Current step glimpses.

        """
        states = dict_subset(kwargs, self._state_names, pop=True)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        glimpses_needed = dict_subset(glimpses, self.previous_glimpses_needed)
        result = self.attention.take_glimpses(
            kwargs.pop(self.attended_name),
            kwargs.pop(self.preprocessed_attended_name, None),
            kwargs.pop(self.attended_mask_name, None),
            **dict_union(states, glimpses_needed))
        if kwargs:
            raise ValueError("extra args to take_glimpses: {}".format(kwargs))
        return result
Exemple #5
0
    def compute_states(self, **kwargs):
        r"""Compute current states when glimpses have already been computed.

        Combines an application of the `distribute` that alter the
        sequential inputs of the wrapped transition and an application of
        the wrapped transition. All unknown keyword arguments go to
        the wrapped transition.

        Parameters
        ----------
        \*\*kwargs
            Should contain everything what `self.transition` needs
            and in addition the current glimpses.

        Returns
        -------
        current_states : list of :class:`~tensor.TensorVariable`
            Current states computed by `self.transition`.

        """
        # Masks are not mandatory, that's why 'must_have=False'
        sequences = dict_subset(kwargs, self._sequence_names,
                                pop=True, must_have=False)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        if self.add_contexts:
            kwargs.pop(self.attended_name)
            kwargs.pop(self.attended_mask_name)

        sequences.update(self.distribute.apply(
            as_dict=True, **dict_subset(dict_union(sequences, glimpses),
                                        self.distribute.apply.inputs)))
        current_states = self.transition.apply(
            iterate=False, as_list=True,
            **dict_union(sequences, kwargs))
        return current_states
Exemple #6
0
    def compute_states(self, **kwargs):
        r"""Compute current states when glimpses have already been computed.

        Parameters
        ----------
        \*\*kwargs
            Should contain everything what `self.transition` needs
            and in addition current glimpses.

        Returns
        -------
        current_states : list of :class:`~tensor.TensorVariable`
            Current states computed by `self.transition`.

        """
        sequences = dict_subset(kwargs,
                                self.sequence_names,
                                pop=True,
                                must_have=False)
        states = dict_subset(kwargs, self.state_names, pop=True)
        glimpses = dict_subset(kwargs, self.glimpse_names, pop=True)
        sequences.update(
            self.mixer.apply(return_dict=True,
                             **dict_subset(dict_union(sequences, glimpses),
                                           self.mixer.apply.inputs)))
        current_states = self.transition.apply(iterate=False,
                                               return_list=True,
                                               **dict_union(
                                                   sequences, states, kwargs))
        return current_states
Exemple #7
0
    def take_glimpses(self, **kwargs):
        r"""Compute glimpses with the attention mechanism.

        A thin wrapper over `self.attention.take_glimpses`: takes care
        of choosing and renaming the necessary arguments.

        Parameters
        ----------
        \*\*kwargs
            Must contain the attended, previous step states and glimpses.
            Can optionaly contain the attended mask and the preprocessed
            attended.

        Returns
        -------
        glimpses : list of :class:`~tensor.TensorVariable`
            Current step glimpses.

        """
        states = dict_subset(kwargs, self._state_names, pop=True)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        glimpses_needed = dict_subset(glimpses, self.previous_glimpses_needed)
        result = self.attention.take_glimpses(
            kwargs.pop(self.attended_name),
            kwargs.pop(self.preprocessed_attended_name, None),
            kwargs.pop(self.attended_mask_name, None),
            **dict_union(states, glimpses_needed))
        # At this point kwargs may contain additional items.
        # e.g. AttentionRecurrent.transition.apply.contexts
        return result
    def do_apply(self, **kwargs):
        """Process a sequence attending the attended context at every step.

        Parameters
        ----------
        **kwargs
            Should contain current inputs, previous step states, contexts, the
            preprocessed attended context, previous step glimpses.
        Returns
        -------
        outputs : list of Theano variables
            The current step states and glimpses.

        """
        attended = kwargs[self.attended_name]
        preprocessed_attended = kwargs.pop(self.preprocessed_attended_name)
        attended_mask = kwargs.get(self.attended_mask_name)

        sequences = dict_subset(kwargs, self.sequence_names, pop=True,
                                must_have=False)
        states = dict_subset(kwargs, self.state_names, pop=True)
        glimpses = dict_subset(kwargs, self.glimpse_names, pop=True)

        current_glimpses = self.take_look(
            mask=attended_mask, return_dict=True,
            **dict_union(
                states, glimpses,
                {self.attended_name: attended,
                 self.preprocessed_attended_name: preprocessed_attended}))
        current_states = self.compute_states(
            return_list=True,
            **dict_union(sequences, states, current_glimpses, kwargs))
        return current_states + list(current_glimpses.values())
Exemple #9
0
    def do_apply(self, **kwargs):
        attended = kwargs[self.attended_name]
        preprocessed_attended = kwargs.pop(self.preprocessed_attended_name)
        attended_mask = kwargs.get(self.attended_mask_name)
        sequences = dict_subset(kwargs,
                                self._sequence_names,
                                pop=True,
                                must_have=False)
        states = dict_subset(kwargs, self._state_names, pop=True)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        add_seqs = dict_subset(kwargs,
                               self.add_sequences,
                               pop=True,
                               must_have=False)

        current_glimpses = self.take_glimpses(
            as_dict=True,
            **dict_union(
                states, glimpses, {
                    self.attended_name: attended,
                    self.attended_mask_name: attended_mask,
                    self.preprocessed_attended_name: preprocessed_attended
                }, add_seqs))
        current_states = self.compute_states(as_list=True,
                                             **dict_union(
                                                 sequences, states,
                                                 current_glimpses, kwargs))
        return current_states + list(current_glimpses.values())
Exemple #10
0
    def do_apply(self, **kwargs):
        r"""Process a sequence attending the attended context every step.

        In addition to the original sequence this method also requires
        its preprocessed version, the one computed by the `preprocess`
        method of the attention mechanism. Unknown keyword arguments
        are passed to the wrapped transition.

        Parameters
        ----------
        \*\*kwargs
            Should contain current inputs, previous step states, contexts,
            the preprocessed attended context, previous step glimpses.

        Returns
        -------
        outputs : list of :class:`~tensor.TensorVariable`
            The current step states and glimpses.

        """
        attended_list = kwargs[self.attended_name]
        preprocessed_attended_list = kwargs.pop(self.preprocessed_attended_name)
        attended_mask_list = kwargs.get(self.attended_mask_name)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        sequences = dict_subset(kwargs, self._sequence_names, pop=True,
                                must_have=False)
        states = dict_subset(kwargs, self._state_names, pop=True)

        utterance_attended=self.context_transition.apply(attended_list,preprocessed_attended_list,attended_mask_list, states['states'], mask=tensor.ones([attended_list.shape[2],attended_list.shape[0]]));
        current_glimpses=self.take_glimpses(utterance_attended)#?
        current_states = self.compute_states(
            as_list=True,
            **dict_union(sequences, states, {'weighted_averages':current_glimpses}, kwargs))
        return current_states + [current_glimpses]
    def compute_states(self, **kwargs):
        """Compute current states when glimpses have already been computed.

        Parameters
        ----------
        **kwargs
            Should contain everything what `self.transition` needs
            and in addition current glimpses.

        Returns
        -------
        current_states : list of Theano variables
            Current states computed by `self.transition`.

        """
        sequences = dict_subset(kwargs, self.sequence_names, pop=True,
                                must_have=False)
        states = dict_subset(kwargs, self.state_names, pop=True)
        glimpses = dict_subset(kwargs, self.glimpse_names, pop=True)
        sequences.update(self.mixer.apply(
            return_dict=True,
            **dict_subset(dict_union(sequences, glimpses),
                          self.mixer.apply.inputs)))
        current_states = self.transition.apply(
            iterate=False, return_list=True,
            **dict_union(sequences, states, kwargs))
        return current_states
Exemple #12
0
    def do_apply(self, **kwargs):
        r"""Process a sequence attending the attended context every step.

        In addition to the original sequence this method also requires
        its preprocessed version, the one computed by the `preprocess`
        method of the attention mechanism. Unknown keyword arguments
        are passed to the wrapped transition.

        Parameters
        ----------
        \*\*kwargs
            Should contain current inputs, previous step states, contexts,
            the preprocessed attended context, previous step glimpses.

        Returns
        -------
        outputs : list of :class:`~tensor.TensorVariable`
            The current step states and glimpses.

        """
        attended = kwargs[self.attended_name]
        preprocessed_attended = kwargs.pop(self.preprocessed_attended_name)
        attended_mask = kwargs.get(self.attended_mask_name)
        if self.add_contexts:
            kwargs.pop(self.attended_name)
            kwargs.pop(self.attended_mask_name, None)
        sequences = dict_subset(kwargs,
                                self._sequence_names,
                                pop=True,
                                must_have=False)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        # By this time **kwargs will contain the states and the contexts
        # of the transition

        # Compute next states
        sequences_without_mask = {
            name: variable
            for name, variable in sequences.items() if 'mask' not in name
        }
        sequences.update(
            self.distribute.apply(as_dict=True,
                                  **dict_subset(
                                      dict_union(sequences_without_mask,
                                                 glimpses),
                                      self.distribute.apply.inputs)))
        current_states = self.transition.apply(iterate=False,
                                               as_dict=True,
                                               **dict_union(sequences, kwargs))

        glimpses_needed = dict_subset(glimpses, self.previous_glimpses_needed)
        current_glimpses = self.attention.take_glimpses(
            as_dict=True,
            **dict_union(
                current_states, glimpses_needed, {
                    self.attended_name: attended,
                    self.attended_mask_name: attended_mask,
                    self.preprocessed_attended_name: preprocessed_attended
                }))
        return list(current_states.values()) + list(current_glimpses.values())
Exemple #13
0
    def cost_matrix(self, application_call, outputs, mask=None, **kwargs):
        """Returns generation costs for output sequences.

        See Also
        --------
        :meth:`cost` : Scalar cost.

        """
        # We assume the data has axes (time, batch, features, ...)
        batch_size = outputs.shape[1]

        # Prepare input for the iterative part
        states = dict_subset(kwargs, self._state_names, must_have=False)
        # masks in context are optional (e.g. `attended_mask`)
        #         contexts = dict_subset(kwargs, self._context_names, must_have=False)
        contexts = dict_subset(kwargs, self._context_names, must_have=False)
        contexts['initial_state_context'] = kwargs['initial_state_context']

        feedback = self.readout.feedback(outputs)
        inputs = self.fork.apply(feedback, as_dict=True)

        # Run the recurrent network
        results = self.transition.apply(mask=mask,
                                        return_initial_states=True,
                                        as_dict=True,
                                        **dict_union(inputs, states, contexts))

        # Separate the deliverables. The last states are discarded: they
        # are not used to predict any output symbol. The initial glimpses
        # are discarded because they are not used for prediction.
        # Remember, glimpses are computed _before_ output stage, states are
        # computed after.
        states = {name: results[name][:-1] for name in self._state_names}
        glimpses = {name: results[name][1:] for name in self._glimpse_names}

        # Compute the cost
        feedback = tensor.roll(feedback, 1, 0)
        feedback = tensor.set_subtensor(
            feedback[0],
            self.readout.feedback(self.readout.initial_outputs(batch_size)))
        readouts = self.readout.readout(feedback=feedback,
                                        **dict_union(states, glimpses,
                                                     contexts))
        costs = self.readout.cost(readouts, outputs)
        if mask is not None:
            costs *= mask

        for name, variable in list(glimpses.items()) + list(states.items()):
            application_call.add_auxiliary_variable(variable.copy(), name=name)

        # This variables can be used to initialize the initial states of the
        # next batch using the last states of the current batch.
        for name in self._state_names + self._glimpse_names:
            application_call.add_auxiliary_variable(results[name][-1].copy(),
                                                    name=name + "_final_value")

        return costs
    def evaluate(self, application_call, outputs, mask=None, **kwargs):
        # We assume the data has axes (time, batch, features, ...)
        batch_size = outputs.shape[1]

        # Prepare input for the iterative part
        states = dict_subset(kwargs, self._state_names, must_have=False)
        # masks in context are optional (e.g. `attended_mask`)
        contexts = dict_subset(kwargs, self._context_names, must_have=False)
        feedback = self.readout.feedback(outputs)
        inputs = self.fork.apply(feedback, as_dict=True)

        # Run the recurrent network
        results = self.transition.apply(
            mask=mask, return_initial_states=True, as_dict=True,
            **dict_union(inputs, states, contexts))

        # Separate the deliverables. The last states are discarded: they
        # are not used to predict any output symbol. The initial glimpses
        # are discarded because they are not used for prediction.
        # Remember, glimpses are computed _before_ output stage, states are
        # computed after.
        states = OrderedDict((name, results[name][:-1]) for name in self._state_names)
        glimpses = OrderedDict((name, results[name][1:]) for name in self._glimpse_names)

        # Compute the cost
        feedback = tensor.roll(feedback, 1, 0)
        feedback = tensor.set_subtensor(
            feedback[0],
            self.readout.feedback(self.readout.initial_outputs(batch_size)))

        # Run the language model
        if self.language_model:
            lm_states = self.language_model.evaluate(
                outputs=outputs, mask=mask, as_dict=True)
            lm_states = {'lm_' + name: value for name, value
                         in lm_states.items()}
        else:
            lm_states = {}

        readouts = self.readout.readout(
            feedback=feedback,
            **dict_union(lm_states, states, glimpses, contexts))
        costs = self.readout.cost(readouts, outputs)
        if mask is not None:
            costs *= mask

        for name, variable in list(glimpses.items()) + list(states.items()):
            application_call.add_auxiliary_variable(
                variable.copy(), name=name)

        # This variables can be used to initialize the initial states of the
        # next batch using the last states of the current batch.
        for name in self._state_names + self._glimpse_names:
            application_call.add_auxiliary_variable(
                results[name][-1].copy(), name=name+"_final_value")

        return [costs] + states.values() + glimpses.values()
Exemple #15
0
    def generate(self, outputs, dont_generate_new_outputs=False, **kwargs):
        """A sequence generation step.

        Parameters
        ----------
        outputs : :class:`~tensor.TensorVariable`
            The outputs from the previous step.
        dont_generate_new_outputs : bool, optional
            If ``True``, the previous outputs are used instead
            of generated ones. It is a temporary hack for ASRU.

        Notes
        -----
        The contexts, previous states and glimpses are expected as keyword
        arguments.

        """
        states = dict_subset(kwargs, self._state_names)
        # masks in context are optional (e.g. `attended_mask`)
        contexts = dict_subset(kwargs, self._context_names, must_have=False)
        glimpses = dict_subset(kwargs, self._glimpse_names)
        lm_states = dict_subset(kwargs, self._lm_state_names)
        next_glimpses = self.transition.take_glimpses(as_dict=True,
                                                      **dict_union(
                                                          states, glimpses,
                                                          contexts))
        next_readouts = self.readout.readout(
            feedback=self.readout.feedback(outputs),
            **dict_union(states, next_glimpses, contexts, lm_states))
        next_outputs = (self.readout.emit(next_readouts)
                        if not dont_generate_new_outputs else outputs)
        next_costs = self.readout.cost(next_readouts, next_outputs)
        next_feedback = self.readout.feedback(next_outputs)
        next_inputs = (self.fork.apply(next_feedback, as_dict=True)
                       if self.fork else {
                           'feedback': next_feedback
                       })
        next_states = self.transition.compute_states(
            as_list=True,
            **dict_union(next_inputs, states, next_glimpses, contexts))
        next_lm_states = {}
        if self.language_model:
            unmangled_lm_states = {
                name[3:]: lm_states[name]
                for name in lm_states
            }
            next_lm_states = OrderedDict(
                zip(
                    self._lm_state_names,
                    self.language_model.generate(
                        next_outputs,
                        dont_generate_new_outputs=True,
                        iterate=False,
                        **unmangled_lm_states)))
        return (next_states + [next_outputs] + list(next_glimpses.values()) +
                list(next_lm_states.values()) + [next_costs])
Exemple #16
0
    def do_apply(self, **kwargs):
        r"""Process a sequence attending the attended context every step.

        In addition to the original sequence this method also requires
        its preprocessed version, the one computed by the `preprocess`
        method of the attention mechanism. Unknown keyword arguments
        are passed to the wrapped transition.

        Parameters
        ----------
        \*\*kwargs
            Should contain current inputs, previous step states, contexts,
            the preprocessed attended context, previous step glimpses.

        Returns
        -------
        outputs : list of :class:`~tensor.TensorVariable`
            The current step states and glimpses.

        """
        attended_list = kwargs[self.attended_name]
        preprocessed_attended_list = kwargs.pop(
            self.preprocessed_attended_name)
        attended_mask_list = kwargs.get(self.attended_mask_name)

        posTag = kwargs[self.posTag_name]
        preprocessed_posTag = kwargs.pop(self.preprocessed_posTag_name)
        sequences = dict_subset(kwargs,
                                self._sequence_names,
                                pop=True,
                                must_have=False)
        states = dict_subset(kwargs, self._state_names, pop=True)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        current_glimpses = self.take_glimpses(
            as_dict=True,
            **dict_union(
                states, glimpses, {
                    self.attended_name: attended_list,
                    self.posTag_name: posTag,
                    self.attended_mask_name: attended_mask_list,
                    self.preprocessed_attended_name:
                    preprocessed_attended_list,
                    self.preprocessed_posTag_name: preprocessed_posTag
                }))
        #the weighted averages to go through context transition GRU one by one.
        current_glimpses['weighted_averages'] = self.context_transition.apply(
            current_glimpses['weighted_averages'],
            tensor.ones([
                current_glimpses['weighted_averages'].shape[1],
                current_glimpses['weighted_averages'].shape[0]
            ]))[-1]
        current_states = self.compute_states(as_list=True,
                                             **dict_union(
                                                 sequences, states,
                                                 current_glimpses, kwargs))
        return current_states + list(current_glimpses.values())
    def mixed_generate(self, return_initial_states=True, **kwargs):
        critic = self.generator.readout.critic
        groundtruth = kwargs.pop('groundtruth')
        groundtruth_mask = kwargs.pop('groundtruth_mask')
        step = kwargs.pop('step')

        sampling_inputs = dict_subset(
            kwargs, self.generator.readout.sample.inputs)
        actor_scores = self.generator.readout.scores(**sampling_inputs)

        critic_inputs = {
            name: kwargs['critic_' + name]
            for name in critic.generator.readout.merge_names}
        critic_outputs = critic.generator.readout.outputs(
            groundtruth, groundtruth_mask, **critic_inputs)

        epsilon = numpy.array(self.generator.readout.epsilon,
                              dtype=theano.config.floatX)
        actor_probs = tensor.exp(actor_scores)
        # This is a poor man's 1-hot argmax
        critic_probs = self.softmax.apply(critic_outputs * 1000)
        probs = (actor_probs * (tensor.constant(1) - epsilon)
                 + critic_probs * epsilon)

        x = self.theano_rng.uniform(size=(probs.shape[0],))
        samples = (tensor.gt(x[:, None], tensor.cumsum(probs, axis=1))
                   .astype(theano.config.floatX)
                   .sum(axis=1)
                   .astype('int64'))
        samples = tensor.minimum(samples, probs.shape[1] - 1)

        actor_feedback = self.generator.feedback.apply(samples, as_dict=True)
        actor_states_contexts = dict_subset(
            kwargs,
            self.generator.recurrent.apply.states
            + self.generator.recurrent.apply.contexts)
        actor_states_outputs = self.generator.recurrent.apply(
            as_dict=True, iterate=False,
            **dict_union(actor_feedback, actor_states_contexts))

        critic_feedback = critic.generator.feedback.apply(samples, as_dict=True)
        critic_states_contexts = {
            name: kwargs['critic_' + name]
            for name in
            critic.generator.recurrent.apply.states
            + critic.generator.recurrent.apply.contexts}
        critic_apply_kwargs = dict(
            as_dict=True, iterate=False,
            **dict_union(critic_feedback, critic_states_contexts))
        if self.generator.readout.critic_uses_actor_states:
            critic_apply_kwargs['extra_inputs'] = actor_states_outputs['states']
        critic_states_outputs = critic.generator.recurrent.apply(**critic_apply_kwargs)
        return ([samples, step + 1]
                + actor_states_outputs.values()
                + critic_states_outputs.values())
Exemple #18
0
    def do_apply(self, **kwargs):
        r"""Process a sequence attending the attended context every step.

        In addition to the original sequence this method also requires
        its preprocessed version, the one computed by the `preprocess`
        method of the attention mechanism. Unknown keyword arguments
        are passed to the wrapped transition.

        Parameters
        ----------
        \*\*kwargs
            Should contain current inputs, previous step states, contexts,
            the preprocessed attended context, previous step glimpses.

        Returns
        -------
        outputs : list of :class:`~tensor.TensorVariable`
            The current step states and glimpses.

        """
        attended = kwargs[self.attended_name]
        preprocessed_attended = kwargs.pop(self.preprocessed_attended_name)
        attended_mask = kwargs.get(self.attended_mask_name)
        if self.add_contexts:
            kwargs.pop(self.attended_name)
            kwargs.pop(self.attended_mask_name, None)
        sequences = dict_subset(kwargs, self._sequence_names, pop=True,
                                must_have=False)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        # By this time **kwargs will contain the states and the contexts
        # of the transition

        # Compute next states
        sequences_without_mask = {
            name: variable for name, variable in sequences.items()
            if 'mask' not in name}
        sequences.update(self.distribute.apply(
            as_dict=True, **dict_subset(
                dict_union(sequences_without_mask, glimpses),
                self.distribute.apply.inputs)))
        current_states = self.transition.apply(
            iterate=False, as_dict=True,
            **dict_union(sequences, kwargs))

        glimpses_needed = dict_subset(glimpses, self.previous_glimpses_needed)
        current_glimpses = self.attention.take_glimpses(
            as_dict=True,
            **dict_union(
                current_states, glimpses_needed,
                {self.attended_name: attended,
                 self.attended_mask_name: attended_mask,
                 self.preprocessed_attended_name: preprocessed_attended}))
        return list(current_states.values()) + list(current_glimpses.values())
    def cost_matrix(self, application_call, outputs, mask=None, **kwargs):
        """Returns generation costs for output sequences.

        See Also
        --------
        :meth:`cost` : Scalar cost.

        """
        # We assume the data has axes (time, batch, features, ...)
        batch_size = outputs.shape[1]

        # Prepare input for the iterative part
        states = dict_subset(kwargs, self._state_names, must_have=False)
        # masks in context are optional (e.g. `attended_mask`)
        contexts = dict_subset(kwargs, self._context_names, must_have=False)
        feedback = self.readout.feedback(outputs)
        inputs = self.fork.apply(feedback, as_dict=True)

        # Run the recurrent network
        results = self.transition.apply(
            mask=mask, return_initial_states=True, as_dict=True,
            **dict_union(inputs, states, contexts))

        # Separate the deliverables. The last states are discarded: they
        # are not used to predict any output symbol. The initial glimpses
        # are discarded because they are not used for prediction.
        # Remember, glimpses are computed _before_ output stage, states are
        # computed after.
        states = {name: results[name][:-1] for name in self._state_names}
        glimpses = {name: results[name][1:] for name in self._glimpse_names}

        # Compute the cost
        feedback = tensor.roll(feedback, 1, 0)
        feedback = tensor.set_subtensor(
            feedback[0],
            self.readout.feedback(self.readout.initial_outputs(batch_size)))
        readouts = self.readout.readout(
            feedback=feedback, **dict_union(states, glimpses, contexts))
        costs = self.readout.cost(readouts, outputs)
        if mask is not None:
            costs *= mask

        for name, variable in list(glimpses.items()) + list(states.items()):
            application_call.add_auxiliary_variable(
                variable.copy(), name=name)

        # This variables can be used to initialize the initial states of the
        # next batch using the last states of the current batch.
        for name in self._state_names:
            application_call.add_auxiliary_variable(
                results[name][-1].copy(), name=name+"_final_value")

        return costs
    def compute_states(self, **kwargs):
        r"""Compute current states when glimpses have already been computed.

        Combines an application of the `distribute` that alter the
        sequential inputs of the wrapped transition and an application of
        the wrapped transition. All unknown keyword arguments go to
        the wrapped transition.

        Parameters
        ----------
        \*\*kwargs
            Should contain everything what `self.transition` needs
            and in addition the current glimpses.

        Returns
        -------
        current_states : list of :class:`~tensor.TensorVariable`
            Current states computed by `self.transition`.

        """
        # make sure we are not popping the mask
        normal_inputs = [
            name for name in self._sequence_names if 'mask' not in name
        ]
        sequences = dict_subset(kwargs, normal_inputs, pop=True)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        topical_glimpses = dict_subset(kwargs,
                                       self._topical_glimpse_names,
                                       pop=True)
        if self.add_contexts:
            kwargs.pop(self.attended_name)
            # attended_mask_name can be optional
            kwargs.pop(self.attended_mask_name, None)
            kwargs.pop(self.topical_attended_name)
            kwargs.pop(self.topical_attended_mask_name, None)

        sequences.update(
            self.distribute.apply(as_dict=True,
                                  **dict_subset(
                                      dict_union(sequences, glimpses),
                                      self.distribute.apply.inputs)))
        sequences.update(
            self.topical_distribute.apply(
                as_dict=True,
                **dict_subset(dict_union(sequences, topical_glimpses),
                              self.topical_distribute.apply.inputs)))
        current_states = self.transition.apply(iterate=False,
                                               as_list=True,
                                               **dict_union(sequences, kwargs))
        return current_states
    def generate(self, outputs, dont_generate_new_outputs=False, **kwargs):
        """A sequence generation step.

        Parameters
        ----------
        outputs : :class:`~tensor.TensorVariable`
            The outputs from the previous step.
        dont_generate_new_outputs : bool, optional
            If ``True``, the previous outputs are used instead
            of generated ones. It is a temporary hack for ASRU.

        Notes
        -----
        The contexts, previous states and glimpses are expected as keyword
        arguments.

        """
        states = dict_subset(kwargs, self._state_names)
        # masks in context are optional (e.g. `attended_mask`)
        contexts = dict_subset(kwargs, self._context_names, must_have=False)
        glimpses = dict_subset(kwargs, self._glimpse_names)
        lm_states = dict_subset(kwargs, self._lm_state_names)
        next_glimpses = self.transition.take_glimpses(
            as_dict=True,
            **dict_union(states, glimpses, contexts))
        next_readouts = self.readout.readout(
            feedback=self.readout.feedback(outputs),
            **dict_union(states, next_glimpses, contexts, lm_states))
        next_outputs = (self.readout.emit(next_readouts)
            if not dont_generate_new_outputs
            else outputs)
        next_costs = self.readout.cost(next_readouts, next_outputs)
        next_feedback = self.readout.feedback(next_outputs)
        next_inputs = (self.fork.apply(next_feedback, as_dict=True)
                       if self.fork else {'feedback': next_feedback})
        next_states = self.transition.compute_states(
            as_list=True,
            **dict_union(next_inputs, states, next_glimpses, contexts))
        next_lm_states = {}
        if self.language_model:
            unmangled_lm_states = {name[3:]: lm_states[name]
                                   for name in lm_states}
            next_lm_states = OrderedDict(zip(
                self._lm_state_names, self.language_model.generate(
                next_outputs, dont_generate_new_outputs=True, iterate=False,
                **unmangled_lm_states)))
        return (next_states + [next_outputs] +
                list(next_glimpses.values()) + list(next_lm_states.values()) +
                [next_costs])
Exemple #22
0
        def apply(self, application, *args, **kwargs):
            # extra_ndim is a mandatory parameter, but in order not to
            # confuse with positional inputs, it has to be extracted from
            # **kwargs
            extra_ndim = kwargs.get("extra_ndim", 0)

            inputs = dict(zip(application.inputs, args))
            inputs.update(dict_subset(kwargs, application.inputs, must_have=False))
            reshaped_inputs = inputs
            # To prevent pollution of the computation graph with no-ops
            if extra_ndim > 0:
                for name, input_ in inputs.items():
                    shape, ndim = input_.shape, input_.ndim
                    # Remember extra_dims for reshaping the outputs correctly.
                    # Does not matter from which input, since we assume
                    # extra dimension match for all inputs.
                    extra_dims = shape[:extra_ndim]
                    new_first_dim = tensor.prod(shape[: extra_ndim + 1])
                    new_shape = tensor.join(0, new_first_dim[None], shape[extra_ndim + 1 :])
                    reshaped_inputs[name] = input_.reshape(new_shape, ndim=ndim - extra_ndim)
            outputs = wrapped.__get__(self, None)(**reshaped_inputs)
            if extra_ndim == 0:
                return outputs
            reshaped_outputs = []
            for output in pack(outputs):
                shape, ndim = output.shape, output.ndim
                new_shape = tensor.join(0, extra_dims, (shape[0] // tensor.prod(extra_dims))[None], shape[1:])
                reshaped_outputs.append(output.reshape(new_shape, ndim=ndim + extra_ndim))
            return reshaped_outputs
Exemple #23
0
        def apply(self, application, *args, **kwargs):
            # extra_ndim is a mandatory parameter, but in order not to
            # confuse with positional inputs, it has to be extracted from
            # **kwargs
            extra_ndim = kwargs.get('extra_ndim', 0)

            inputs = dict(zip(application.inputs, args))
            inputs.update(dict_subset(kwargs, application.inputs,
                                      must_have=False))
            reshaped_inputs = inputs
            # To prevent pollution of the computation graph with no-ops
            if extra_ndim > 0:
                for name, input_ in inputs.items():
                    shape, ndim = input_.shape, input_.ndim
                    # Remember extra_dims for reshaping the outputs correctly.
                    # Does not matter from which input, since we assume
                    # extra dimension match for all inputs.
                    extra_dims = shape[:extra_ndim]
                    new_first_dim = tensor.prod(shape[:extra_ndim + 1])
                    new_shape = tensor.join(
                        0, new_first_dim[None], shape[extra_ndim + 1:])
                    reshaped_inputs[name] = input_.reshape(
                        new_shape, ndim=ndim - extra_ndim)
            outputs = wrapped.__get__(self, None)(**reshaped_inputs)
            if extra_ndim == 0:
                return outputs
            reshaped_outputs = []
            for output in pack(outputs):
                shape, ndim = output.shape, output.ndim
                new_shape = tensor.join(
                    0, extra_dims, (shape[0] // tensor.prod(extra_dims))[None],
                    shape[1:])
                reshaped_outputs.append(
                    output.reshape(new_shape, ndim=ndim + extra_ndim))
            return reshaped_outputs
Exemple #24
0
 def costs(self, prediction, prediction_mask, groundtruth, groundtruth_mask,
           **inputs):
     log_probs = self.all_scores(
         prediction, self.merge(**dict_subset(inputs, self.merge_names)))
     if not prediction_mask:
         prediction_mask = 1
     return -(log_probs * prediction_mask).sum(axis=0)
 def costs(self, prediction, prediction_mask,
           groundtruth, groundtruth_mask, **inputs):
     log_probs = self.all_scores(
         prediction, self.merge(**dict_subset(inputs, self.merge_names)))
     if not prediction_mask:
         prediction_mask = 1
     return -(log_probs * prediction_mask).sum(axis=0)
 def all_outputs(self, application_call, groundtruth, groundtruth_mask,
                 **inputs):
     outputs = self.merge(**dict_subset(inputs, self.merge_names))
     indices = tensor.repeat(tensor.arange(groundtruth.shape[1]),
                             groundtruth.shape[0])
     if self.value_softmax:
         logger.debug('Applying value softmax')
         outputs = (tensor.addbroadcast(outputs[:, :, :1], 2) +
                    self.softmax.apply(outputs[:, :, 1:], extra_ndim=1))
     if self.same_value_for_wrong:
         logger.debug('Same value for apriori wrong actions')
         wrong_output = outputs[:, :, 0]
         outputs = outputs[:, :, 1:]
         wrong_mask = tensor.ones_like(outputs[0])
         wrong_mask = tensor.set_subtensor(
             wrong_mask[indices, groundtruth.T.flatten()], 0)
         outputs = (outputs * (1 - wrong_mask) +
                    wrong_output[:, :, None] * wrong_mask)
         application_call.add_auxiliary_variable(wrong_mask,
                                                 name='wrong_mask')
     if self.groundtruth_word_bonus:
         logger.debug('Bonus for grondtruth words')
         wrong_mask = tensor.ones_like(outputs[0])
         wrong_mask = tensor.set_subtensor(
             wrong_mask[indices, groundtruth.T.flatten()], 0)
         w, = self.parameters
         bonuses = inputs['states'].dot(w)
         outputs += bonuses[:, :, None] * (1 - wrong_mask)[None, :, :]
     if self.dueling_outputs:
         logger.debug('Dueling outputs a-la dueling networks')
         base_output = outputs[:, :, [0]]
         dueling_outputs = outputs[:, :, 1:]
         outputs = base_output + dueling_outputs - dueling_outputs.mean(
             axis=2, keepdims=True)
     return outputs
 def outputs(self, groundtruth, groundtruth_mask, **inputs):
     # Copy-pasted from all_outputs, because Theano does not support ellipsis
     outputs = self.merge(**dict_subset(inputs, self.merge_names))
     indices = tensor.repeat(tensor.arange(groundtruth.shape[1]),
                             groundtruth.shape[0])
     if self.value_softmax:
         logger.debug('Applying value softmax')
         outputs = (tensor.addbroadcast(outputs[:, :1], 1) +
                    self.softmax.apply(outputs[:, 1:]))
     if self.same_value_for_wrong:
         logger.debug('Same value for apriori wrong actions')
         wrong_output = outputs[:, 0]
         outputs = outputs[:, 1:]
         wrong_mask = tensor.ones_like(outputs)
         wrong_mask = tensor.set_subtensor(
             wrong_mask[indices, groundtruth.T.flatten()], 0)
         outputs = (outputs * (1 - wrong_mask) +
                    wrong_output[:, None] * wrong_mask)
     if self.groundtruth_word_bonus:
         logger.debug('Bonus for grondtruth words')
         wrong_mask = tensor.ones_like(outputs)
         wrong_mask = tensor.set_subtensor(
             wrong_mask[indices, groundtruth.T.flatten()], 0)
         w, = self.parameters
         bonuses = inputs['states'].dot(w)
         outputs = outputs + bonuses[:, None] * (1 - wrong_mask)
     if self.dueling_outputs:
         logger.debug('Dueling outputs a-la dueling networks')
         base_output = outputs[:, [0]]
         dueling_outputs = outputs[:, 1:]
         outputs = base_output + dueling_outputs - dueling_outputs.mean(
             axis=1, keepdims=True)
     return outputs
Exemple #28
0
 def _push_allocation_config(self):
     self.attention.state_dims = self.transition.get_dims(self.state_names)
     self.attention.sequence_dim = self.transition.get_dim(
         self.attended_name)
     self.mixer.channel_dims = dict_subset(
         dict_union(self.transition.get_dims(self.sequence_names),
                    self.attention.get_dims(self.glimpse_names)),
         self.mixer.apply.inputs)
Exemple #29
0
 def compute_steps(self, previous_steps):
     filtered_previous_steps = dict_subset(previous_steps, self.variables)
     steps, updates = self.step_rule.compute_steps(filtered_previous_steps)
     actual = OrderedDict(
         (parameter, steps[parameter]) if parameter in steps else (
             parameter, previous_steps[parameter])
         for parameter in previous_steps)
     return actual, updates
Exemple #30
0
 def compute_steps(self, previous_steps):
     filtered_previous_steps = dict_subset(previous_steps, self.variables)
     steps, updates = self.step_rule.compute_steps(filtered_previous_steps)
     actual = OrderedDict(
         (parameter, steps[parameter]) if parameter in steps else (parameter, previous_steps[parameter])
         for parameter in previous_steps
     )
     return actual, updates
Exemple #31
0
 def _push_allocation_config(self):
     self.attention.state_dims = self.transition.get_dims(self._state_names)
     self.attention.sequence_dim = self.get_dim(self.attended_name)
     self.distribute.source_dim = self.attention.get_dim(
         self.distribute.source_name)
     self.distribute.target_dims = dict_subset(
         self.transition.get_dims(self._sequence_names),
         self.distribute.target_names)
Exemple #32
0
 def _push_allocation_config(self):
     self.attention.state_dims = self.transition.get_dims(self.state_names)
     self.attention.sequence_dim = self.get_dim(self.attended_name)
     self.distribute.source_dim = self.attention.get_dim(
         self.distribute.source_name)
     self.distribute.target_dims = dict_subset(
         self.transition.get_dims(self.sequence_names),
         self.distribute.target_names)
Exemple #33
0
    def get_stream(self, part, batches=True, shuffle=True, add_sources=(),
                   num_examples=None, rng=None, seed=None):
        dataset = self.get_dataset(part, add_sources=add_sources)
        if num_examples is None:
            num_examples = dataset.num_examples

        if shuffle:
            iteration_scheme = ShuffledExampleScheme(num_examples, rng=rng)
        else:
            iteration_scheme = SequentialExampleScheme(num_examples)

        stream = DataStream(
            dataset, iteration_scheme=iteration_scheme)

        if self.add_eos:
            stream = Mapping(stream, _AddLabel(
                self.eos_label,
                index=stream.sources.index(self.sources_map['labels'])))
        if self.add_bos:
            if self.bos_label is None:
                raise Exception('No bos label given')
            stream = Mapping(stream, _AddLabel(
                self.bos_label, append=False, times=self.add_bos,
                index=stream.sources.index(self.sources_map['labels'])))

        if self.max_length:
            stream = Filter(stream, self.length_filter)

        if self.sort_k_batches and batches:
            stream = Batch(stream,
                           iteration_scheme=ConstantScheme(
                               self.batch_size * self.sort_k_batches))
            #
            # Hardcode 0 for source on which to sort. This will be good, as
            # most source lengths are correlated and, furthermore, the
            # labels will typically be the last source, thus in a single-input
            # case this sorts on input lengths
            #
            stream = Mapping(stream, SortMapping(_Length(
                index=0)))
            stream = Unpack(stream)

        if self.normalization:
            stream = self.normalization.wrap_stream(stream)
        stream = ForceFloatX(stream)
        stream = Rearrange(
            stream, dict_subset(self.sources_map, self.default_sources + list(add_sources)))
        if not batches:
            return stream

        stream = Batch(
            stream,
            iteration_scheme=ConstantScheme(self.batch_size if part == 'train'
                                            else self.validation_batch_size))
        stream = Padding(stream)
        stream = Mapping(stream, switch_first_two_axes)
        stream = ForceCContiguous(stream)
        return stream
Exemple #34
0
    def take_glimpses(self, **kwargs):
        states = dict_subset(kwargs, self._state_names, pop=True)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        glimpses_needed = dict_subset(glimpses, self.previous_glimpses_needed)

        add_seqs = dict_subset(kwargs,
                               self.add_sequences,
                               pop=True,
                               must_have=False)

        result = self.attention.take_glimpses(
            kwargs.pop(self.attended_name),
            kwargs.pop(self.preprocessed_attended_name, None),
            kwargs.pop(self.attended_mask_name, None),
            **dict_union(states, glimpses_needed, add_seqs))
        # At this point kwargs may contain additional items.
        # e.g. AttentionRecurrent.transition.apply.contexts
        return result
Exemple #35
0
 def process_batch(self, batch):
     try:
         batch = dict_subset(batch, self.buffer_.input_names)
     except KeyError:
         reraise_as("Not all data sources required for monitoring were"
                    " provided. The list of required data sources:"
                    " {}.".format(self.buffer_.input_names))
     if self._accumulate_fun is not None:
         self._accumulate_fun(**batch)
 def _push_allocation_config(self):
     self.attention.state_dims = self.transition.get_dims(self.state_names)
     self.attention.sequence_dim = self.transition.get_dim(
         self.attended_name)
     self.mixer.channel_dims = dict_subset(
         dict_union(
             self.transition.get_dims(self.sequence_names),
             self.attention.get_dims(self.glimpse_names)),
         self.mixer.apply.inputs)
 def generate(self, **sequences_states_contexts):
     sampling_inputs = dict_subset(
         sequences_states_contexts, self.readout.sample.inputs)
     samples, scores = self.readout.sample(**sampling_inputs)
     feedback = self.feedback.apply(samples, as_dict=True)
     next_states_outputs = self.recurrent.apply(
         as_list=True, iterate=False,
         **dict_union(feedback, **sequences_states_contexts))
     return [samples, scores] + next_states_outputs
Exemple #38
0
 def apply(self, **kwargs):
     # Should handle both "iterate=True" and "iterate=False"
     extra_input = kwargs.pop(self.extra_input_name)
     mask = kwargs.pop('mask', None)
     normal_inputs = dict_subset(kwargs, self._normal_inputs, pop=True)
     normal_inputs = self.distribute.apply(
         as_dict=True,
         **dict_union(normal_inputs, {self.extra_input_name: extra_input}))
     return self.recurrent.apply(mask=mask,
                                 **dict_union(normal_inputs, kwargs))
Exemple #39
0
 def generate(self, **sequences_states_contexts):
     sampling_inputs = dict_subset(sequences_states_contexts,
                                   self.readout.sample.inputs)
     samples, scores = self.readout.sample(**sampling_inputs)
     feedback = self.feedback.apply(samples, as_dict=True)
     next_states_outputs = self.recurrent.apply(
         as_list=True,
         iterate=False,
         **dict_union(feedback, **sequences_states_contexts))
     return [samples, scores] + next_states_outputs
Exemple #40
0
 def process_batch(self, batch):
     try:
         batch = dict_subset(batch, self.buffer_.input_names)
     except KeyError:
         reraise_as(
             "Not all data sources required for monitoring were"
             " provided. The list of required data sources:"
             " {}.".format(self.buffer_.input_names))
     if self._accumulate_fun is not None:
         self._accumulate_fun(**batch)
Exemple #41
0
    def do_apply(self, **kwargs):
        r"""Process a sequence attending the attended context every step.

        In addition to the original sequence this method also requires
        its preprocessed version, the one computed by the `preprocess`
        method of the attention mechanism. Unknown keyword arguments
        are passed to the wrapped transition.

        Parameters
        ----------
        \*\*kwargs
            Should contain current inputs, previous step states, contexts,
            the preprocessed attended context, previous step glimpses.

        Returns
        -------
        outputs : list of :class:`~tensor.TensorVariable`
            The current step states and glimpses.

        """
        attended = kwargs[self.attended_name]
        preprocessed_attended = kwargs.pop(self.preprocessed_attended_name)
        attended_mask = kwargs.get(self.attended_mask_name)
        sequences = dict_subset(kwargs,
                                self.sequence_names,
                                pop=True,
                                must_have=False)
        states = dict_subset(kwargs, self.state_names, pop=True)
        glimpses = dict_subset(kwargs, self.glimpse_names, pop=True)

        current_glimpses = self.take_glimpses(
            return_dict=True,
            **dict_union(
                states, glimpses, {
                    self.attended_name: attended,
                    self.attended_mask_name: attended_mask,
                    self.preprocessed_attended_name: preprocessed_attended
                }))
        current_states = self.compute_states(return_list=True,
                                             **dict_union(
                                                 sequences, states,
                                                 current_glimpses, kwargs))
        return current_states + list(current_glimpses.values())
    def compute_states(self, **kwargs):
        r"""Compute current states when glimpses have already been computed.

        Combines an application of the `distribute` that alter the
        sequential inputs of the wrapped transition and an application of
        the wrapped transition. All unknown keyword arguments go to
        the wrapped transition.

        Parameters
        ----------
        \*\*kwargs
            Should contain everything what `self.transition` needs
            and in addition the current glimpses.

        Returns
        -------
        current_states : list of :class:`~tensor.TensorVariable`
            Current states computed by `self.transition`.

        """
        # make sure we are not popping the mask
        normal_inputs = [name for name in self._sequence_names
                         if 'mask' not in name]
        sequences = dict_subset(kwargs, normal_inputs, pop=True)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        topical_glimpses=dict_subset(kwargs,self._topical_glimpse_names,pop=True);
        if self.add_contexts:
            kwargs.pop(self.attended_name)
            # attended_mask_name can be optional
            kwargs.pop(self.attended_mask_name, None)
            kwargs.pop(self.topical_attended_name)
            kwargs.pop(self.topical_attended_mask_name, None)

        sequences.update(self.distribute.apply(
            as_dict=True, **dict_subset(dict_union(sequences, glimpses),
                                        self.distribute.apply.inputs)))
        sequences.update(self.topical_distribute.apply(
            as_dict=True, **dict_subset(dict_union(sequences, topical_glimpses),
                                        self.topical_distribute.apply.inputs)))
        current_states = self.transition.apply(
            iterate=False, as_list=True,
            **dict_union(sequences, kwargs))
        return current_states
 def process_batch(self, batch):
     try:
         input_names = [v.name for v in self.unique_inputs]
         batch = dict_subset(batch, input_names)
     except KeyError:
         reraise_as("Not all data sources required for monitoring were"
                    " provided. The list of required data sources:"
                    " {}.".format(input_names))
     if self._aggregate_fun is not None:
         numerical_values = self._aggregate_fun(**batch)
         self.monitored_quantities_buffer.aggregate_quantities(
             numerical_values)
    def prefix_generate(self, return_initial_states=True, **kwargs):
        step = kwargs.pop('step')

        sampling_inputs = dict_subset(
            kwargs, self.generator.readout.sample.inputs)
        samples, scores = self.generator.readout.sample(**sampling_inputs)
        prefix_mask = tensor.lt(step, self.prefix_steps)
        samples = (prefix_mask * self.prefix_labels[step[0]]
                   + (1 - prefix_mask) * samples)

        feedback = self.generator.feedback.apply(samples, as_dict=True)
        states_contexts = dict_subset(
            kwargs,
            self.generator.recurrent.apply.states
            + self.generator.recurrent.apply.contexts)
        states_outputs = self.generator.recurrent.apply(
            as_dict=True, iterate=False,
            **dict_union(feedback, states_contexts))

        return ([samples, step + 1]
                + states_outputs.values())
Exemple #45
0
 def process_batch(self, batch, accumulate_dict):
     try:
         input_names = [v.name for v in self.inputs]
         batch = dict_subset(batch, input_names)
     except KeyError:
         reraise_as("Not all data sources required for monitoring were"
                    " provided. The list of required data sources:"
                    " {}.".format(input_names))
     results_list = self._func(**batch)
     output_names = [v.name for v in self.outputs]
     for name, res in zip(output_names, results_list):
         accumulate_dict[name].append(res)
Exemple #46
0
 def process_batch(self, batch):
     try:
         input_names = [v.name for v in self.unique_inputs]
         batch = dict_subset(batch, input_names)
     except KeyError:
         reraise_as(
             "Not all data sources required for monitoring were"
             " provided. The list of required data sources:"
             " {}.".format(input_names))
     if self._aggregate_fun is not None:
         numerical_values = self._aggregate_fun(**batch)
         self.monitored_quantities_buffer.aggregate_quantities(
             numerical_values)
Exemple #47
0
 def process_batch(self, batch):
     try:
         input_names = [v.name for v in self.unique_inputs]
         batch = dict_subset(batch, input_names)
     except KeyError:
         reraise_as(
             "Not all data sources required for monitoring were"
             " provided. The list of required data sources:"
             " {}.".format(input_names))
     if self._accumulate_fun is not None:
         numerical_values = self._accumulate_fun(**batch)
         for value, var in zip(numerical_values,self.theano_variables):
             self.data[var.name].append(value)
Exemple #48
0
    def do_apply(self, **kwargs):
        r"""Process a sequence attending the attended context every step.

        In addition to the original sequence this method also requires
        its preprocessed version, the one computed by the `preprocess`
        method of the attention mechanism. Unknown keyword arguments
        are passed to the wrapped transition.

        Parameters
        ----------
        \*\*kwargs
            Should contain current inputs, previous step states, contexts,
            the preprocessed attended context, previous step glimpses.

        Returns
        -------
        outputs : list of :class:`~tensor.TensorVariable`
            The current step states and glimpses.

        """
        attended = kwargs[self.attended_name]
        preprocessed_attended = kwargs.pop(self.preprocessed_attended_name)
        attended_mask = kwargs.get(self.attended_mask_name)
        sequences = dict_subset(kwargs, self._sequence_names, pop=True,
                                must_have=False)
        states = dict_subset(kwargs, self._state_names, pop=True)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)

        current_glimpses = self.take_glimpses(
            as_dict=True,
            **dict_union(
                states, glimpses,
                {self.attended_name: attended,
                 self.attended_mask_name: attended_mask,
                 self.preprocessed_attended_name: preprocessed_attended}))
        current_states = self.compute_states(
            as_list=True,
            **dict_union(sequences, states, current_glimpses, kwargs))
        return current_states + list(current_glimpses.values())
Exemple #49
0
 def process_batch(self, batch, accumulate_dict):
     try:
         input_names = [v.name for v in self.inputs]
         batch = dict_subset(batch, input_names)
     except KeyError:
         reraise_as(
             "Not all data sources required for monitoring were"
             " provided. The list of required data sources:"
             " {}.".format(input_names)
         )
     results_list = self._func(**batch)
     output_names = [v.name for v in self.outputs]
     for name, res in zip(output_names, results_list):
         accumulate_dict[name].append(res)
Exemple #50
0
def extract_sample(activations, data_stream, n=2000):
    cg = ComputationGraph(activations)
    input_names = [v.name for v in cg.inputs]
    fn = theano.function(cg.inputs, [activations])
    result = None
    for batch in data_stream.get_epoch_iterator(as_dict=True):
        values = fn(**dict_subset(batch, input_names))
        if result is None:
            result = values[0]
        else:
            result = numpy.concatenate((result, values[0]))
        if result.shape[0] >= n:
            result = result[(slice(0, n), ) +
                    (slice(None),) * (len(result.shape) - 1)]
            return result
    def take_look(self, **kwargs):
        """Compute glimpses with the attention mechanism.

        Parameters
        ----------
        **kwargs
            Should contain contexts, previous step states and glimpses.

        Returns
        -------
        glimpses : list of Theano variables
            Current step glimpses.

        """
        return self.attention.take_look(
            kwargs[self.attended_name],
            kwargs.get(self.preprocessed_attended_name),
            **dict_subset(kwargs,
                          self.state_names + self.previous_glimpses_needed))
Exemple #52
0
 def _evaluate(self):
     for batch in self.data_stream.get_epoch_iterator(as_dict=True):
         batch = dict_subset(batch, self.inputs_names)
         self._fun(**batch)
Exemple #53
0
def search(config, params, load_path, part, decode_only, report,
           decoded_save, nll_only, seed):
    import matplotlib
    matplotlib.use("Agg")
    from matplotlib import pyplot
    from lvsr.notebook import show_alignment

    data = Data(**config['data'])
    search_conf = config['monitoring']['search']

    logger.info("Recognizer initialization started")
    recognizer = create_model(config, data, load_path)
    recognizer.init_beam_search(search_conf['beam_size'])
    logger.info("Recognizer is initialized")

    has_uttids = 'uttids' in data.info_dataset.provides_sources
    add_sources = ('uttids',) if has_uttids else ()
    dataset = data.get_dataset(part, add_sources)
    stream = data.get_stream(part, batches=False,
                             shuffle=part == 'train',
                             add_sources=add_sources,
                             num_examples=500 if part == 'train' else None,
                             seed=seed)
    it = stream.get_epoch_iterator(as_dict=True)
    if decode_only is not None:
        decode_only = eval(decode_only)

    weights = tensor.matrix('weights')
    weight_statistics = theano.function(
        [weights],
        [weights_std(weights.dimshuffle(0, 'x', 1)),
            monotonicity_penalty(weights.dimshuffle(0, 'x', 1))])

    print_to = sys.stdout
    if report:
        alignments_path = os.path.join(report, "alignments")
        if not os.path.exists(report):
            os.mkdir(report)
            os.mkdir(alignments_path)
        print_to = open(os.path.join(report, "report.txt"), 'w')

    decoded_file = None
    if decoded_save:
        decoded_file = open(decoded_save, 'w')

    num_examples = .0
    total_nll = .0
    total_errors = .0
    total_length = .0
    total_wer_errors = .0
    total_word_length = 0.

    if config.get('vocabulary'):
        with open(os.path.expandvars(config['vocabulary'])) as f:
            vocabulary = dict(line.split() for line in f.readlines())

        def to_words(chars):
            words = chars.split()
            words = [vocabulary[word] if word in vocabulary
                     else vocabulary['<UNK>'] for word in words]
            return words

    for number, example in enumerate(it):
        if decode_only and number not in decode_only:
            continue
        uttids = example.pop('uttids', None)
        raw_groundtruth = example.pop('labels')
        required_inputs = dict_subset(example, recognizer.inputs.keys())

        print("Utterance {} ({})".format(number, uttids), file=print_to)

        groundtruth = dataset.decode(raw_groundtruth)
        groundtruth_text = dataset.pretty_print(raw_groundtruth, example)
        costs_groundtruth, weights_groundtruth = recognizer.analyze(
            inputs=required_inputs,
            groundtruth=raw_groundtruth,
            prediction=raw_groundtruth)[:2]
        weight_std_groundtruth, mono_penalty_groundtruth = weight_statistics(
            weights_groundtruth)
        total_nll += costs_groundtruth.sum()
        num_examples += 1
        print("Groundtruth:", groundtruth_text, file=print_to)
        print("Groundtruth cost:", costs_groundtruth.sum(), file=print_to)
        print("Groundtruth weight std:", weight_std_groundtruth, file=print_to)
        print("Groundtruth monotonicity penalty:", mono_penalty_groundtruth,
              file=print_to)
        print("Average groundtruth cost: {}".format(total_nll / num_examples),
              file=print_to)
        if nll_only:
            print_to.flush()
            continue

        before = time.time()
        try:
            search_kwargs = dict(
                char_discount=search_conf.get('char_discount'),
                round_to_inf=search_conf.get('round_to_inf'),
                stop_on=search_conf.get('stop_on'),
                validate_solution_function=getattr(
                    data.info_dataset, 'validate_solution', None))
            search_kwargs = {k: v for k, v in search_kwargs.items() if v}
            outputs, search_costs = recognizer.beam_search(
                required_inputs, **search_kwargs)
        except CandidateNotFoundError:
            logger.error('Candidate not found!')
            outputs = [[]]
            search_costs = [[numpy.NaN]]

        took = time.time() - before
        recognized = dataset.decode(outputs[0])
        recognized_text = dataset.pretty_print(outputs[0], example)
        if recognized:
            # Theano scan doesn't work with 0 length sequences
            costs_recognized, weights_recognized = recognizer.analyze(
                inputs=required_inputs,
                groundtruth=raw_groundtruth,
                prediction=outputs[0])[:2]
            weight_std_recognized, mono_penalty_recognized = weight_statistics(
                weights_recognized)
            error = min(1, wer(groundtruth, recognized))
        else:
            error = 1
        total_errors += len(groundtruth) * error
        total_length += len(groundtruth)

        if config.get('vocabulary'):
            wer_error = min(1, wer(to_words(groundtruth_text),
                                   to_words(recognized_text)))
            total_wer_errors += len(groundtruth) * wer_error
            total_word_length += len(groundtruth)

        if report and recognized:
            show_alignment(weights_groundtruth, groundtruth, bos_symbol=True)
            pyplot.savefig(os.path.join(
                alignments_path, "{}.groundtruth.png".format(number)))
            show_alignment(weights_recognized, recognized, bos_symbol=True)
            pyplot.savefig(os.path.join(
                alignments_path, "{}.recognized.png".format(number)))

        if decoded_file is not None:
            print("{} {}".format(uttids, ' '.join(recognized)),
                  file=decoded_file)

        print("Decoding took:", took, file=print_to)
        print("Beam search cost:", search_costs[0], file=print_to)
        print("Recognized:", recognized_text, file=print_to)
        if recognized:
            print("Recognized cost:", costs_recognized.sum(), file=print_to)
            print("Recognized weight std:", weight_std_recognized,
                  file=print_to)
            print("Recognized monotonicity penalty:", mono_penalty_recognized,
                  file=print_to)
        print("CER:", error, file=print_to)
        print("Average CER:", total_errors / total_length, file=print_to)
        if config.get('vocabulary'):
            print("WER:", wer_error, file=print_to)
            print("Average WER:", total_wer_errors / total_word_length, file=print_to)
        print_to.flush()
Exemple #54
0
        def recurrent_apply(brick, application, application_call,
                            *args, **kwargs):
            """Iterates a transition function.

            Parameters
            ----------
            iterate : bool
                If ``True`` iteration is made. By default ``True``.
            reverse : bool
                If ``True``, the sequences are processed in backward
                direction. ``False`` by default.
            return_initial_states : bool
                If ``True``, initial states are included in the returned
                state tensors. ``False`` by default.

            .. todo::

                * Handle `updates` returned by the :func:`theano.scan`
                    routine.
                * ``kwargs`` has a random order; check if this is a
                    problem.

            """
            # Extract arguments related to iteration and immediately relay the
            # call to the wrapped function if `iterate=False`
            iterate = kwargs.pop('iterate', True)
            if not iterate:
                return application_function(brick, *args, **kwargs)
            reverse = kwargs.pop('reverse', False)
            return_initial_states = kwargs.pop('return_initial_states', False)

            # Push everything to kwargs
            for arg, arg_name in zip(args, arg_names):
                kwargs[arg_name] = arg

            # Make sure that all arguments for scan are tensor variables
            scan_arguments = (application.sequences + application.states +
                              application.contexts)
            for arg in scan_arguments:
                if arg in kwargs:
                    if kwargs[arg] is None:
                        del kwargs[arg]
                    else:
                        kwargs[arg] = tensor.as_tensor_variable(kwargs[arg])

            # Check which sequence and contexts were provided
            sequences_given = dict_subset(kwargs, application.sequences,
                                          must_have=False)
            contexts_given = dict_subset(kwargs, application.contexts,
                                         must_have=False)

            # Determine number of steps and batch size.
            if len(sequences_given):
                # TODO Assumes 1 time dim!
                shape = list(sequences_given.values())[0].shape
                if not iterate:
                    batch_size = shape[0]
                else:
                    n_steps = shape[0]
                    batch_size = shape[1]
            else:
                # TODO Raise error if n_steps and batch_size not found?
                n_steps = kwargs.pop('n_steps')
                batch_size = kwargs.pop('batch_size')

            # Handle the rest kwargs
            rest_kwargs = {key: value for key, value in kwargs.items()
                           if key not in scan_arguments}
            for value in rest_kwargs.values():
                if (isinstance(value, Variable) and not
                        is_shared_variable(value)):
                    logger.warning("unknown input {}".format(value) +
                                   unknown_scan_input)

            # Ensure that all initial states are available.
            for state_name in application.states:
                dim = brick.get_dim(state_name)
                if state_name in kwargs:
                    if isinstance(kwargs[state_name], NdarrayInitialization):
                        kwargs[state_name] = tensor.alloc(
                            kwargs[state_name].generate(brick.rng, (1, dim)),
                            batch_size, dim)
                    elif isinstance(kwargs[state_name], Application):
                        kwargs[state_name] = (
                            kwargs[state_name](state_name, batch_size,
                                               *args, **kwargs))
                else:
                    # TODO init_func returns 2D-tensor, fails for iterate=False
                    kwargs[state_name] = (
                        brick.initial_state(state_name, batch_size,
                                            *args, **kwargs))
                    assert kwargs[state_name]
            states_given = dict_subset(kwargs, application.states)

            # Theano issue 1772
            for name, state in states_given.items():
                states_given[name] = tensor.unbroadcast(state,
                                                        *range(state.ndim))

            def scan_function(*args):
                args = list(args)
                arg_names = (list(sequences_given) +
                             [output for output in application.outputs
                              if output in application.states] +
                             list(contexts_given))
                kwargs = dict(equizip(arg_names, args))
                kwargs.update(rest_kwargs)
                outputs = application(iterate=False, **kwargs)
                # We want to save the computation graph returned by the
                # `application_function` when it is called inside the
                # `theano.scan`.
                application_call.inner_inputs = args
                application_call.inner_outputs = pack(outputs)
                return outputs
            outputs_info = [
                states_given[name] if name in application.states
                else None
                for name in application.outputs]
            result, updates = theano.scan(
                scan_function, sequences=list(sequences_given.values()),
                outputs_info=outputs_info,
                non_sequences=list(contexts_given.values()),
                n_steps=n_steps,
                go_backwards=reverse)
            result = pack(result)
            if return_initial_states:
                # Undo Subtensor
                for i in range(len(states_given)):
                    assert isinstance(result[i].owner.op,
                                      tensor.subtensor.Subtensor)
                    result[i] = result[i].owner.inputs[0]
            if updates:
                application_call.updates = dict_union(application_call.updates,
                                                      updates)

            return result
 def scores(self, **inputs):
     return self.softmax.log_probabilities(self.merge(
         **dict_subset(inputs, self.merge_names)))