def do_apply(self, **kwargs):
        """Process a sequence attending the attended context at every step.

        Parameters
        ----------
        **kwargs
            Should contain current inputs, previous step states, contexts, the
            preprocessed attended context, previous step glimpses.
        Returns
        -------
        outputs : list of Theano variables
            The current step states and glimpses.

        """
        attended = kwargs[self.attended_name]
        preprocessed_attended = kwargs.pop(self.preprocessed_attended_name)
        attended_mask = kwargs.get(self.attended_mask_name)

        sequences = dict_subset(kwargs, self.sequence_names, pop=True,
                                must_have=False)
        states = dict_subset(kwargs, self.state_names, pop=True)
        glimpses = dict_subset(kwargs, self.glimpse_names, pop=True)

        current_glimpses = self.take_look(
            mask=attended_mask, return_dict=True,
            **dict_union(
                states, glimpses,
                {self.attended_name: attended,
                 self.preprocessed_attended_name: preprocessed_attended}))
        current_states = self.compute_states(
            return_list=True,
            **dict_union(sequences, states, current_glimpses, kwargs))
        return current_states + list(current_glimpses.values())
Exemple #2
0
    def do_apply(self, **kwargs):
        r"""Process a sequence attending the attended context every step.

        Parameters
        ----------
        \*\*kwargs
            Should contain current inputs, previous step states, contexts,
            the preprocessed attended context, previous step glimpses.

        Returns
        -------
        outputs : list of Theano variables
            The current step states and glimpses.

        """
        attended = kwargs[self.attended_name]
        preprocessed_attended = kwargs.pop(self.preprocessed_attended_name)
        attended_mask = kwargs.get(self.attended_mask_name)

        sequences = dict_subset(kwargs, self.sequence_names, pop=True,
                                must_have=False)
        states = dict_subset(kwargs, self.state_names, pop=True)
        glimpses = dict_subset(kwargs, self.glimpse_names, pop=True)

        current_glimpses = self.take_look(
            mask=attended_mask, return_dict=True,
            **dict_union(
                states, glimpses,
                {self.attended_name: attended,
                 self.preprocessed_attended_name: preprocessed_attended}))
        current_states = self.compute_states(
            return_list=True,
            **dict_union(sequences, states, current_glimpses, kwargs))
        return current_states + list(current_glimpses.values())
Exemple #3
0
    def generate(self, outputs, **kwargs):
        """A sequence generation step.

        Parameters
        ----------
        outputs : :class:`~tensor.TensorVariable`
            The outputs from the previous step.

        Notes
        -----
            The contexts, previous states and glimpses are expected
            as keyword arguments.

        """
        states = {name: kwargs[name] for name in self.state_names}
        contexts = {name: kwargs[name] for name in self.context_names}
        glimpses = {name: kwargs[name] for name in self.glimpse_names}

        next_glimpses = self.transition.take_glimpses(
            return_dict=True, **dict_union(states, glimpses, contexts))
        next_readouts = self.readout.readout(
            feedback=self.readout.feedback(outputs),
            **dict_union(states, next_glimpses, contexts))
        next_outputs = self.readout.emit(next_readouts)
        next_costs = self.readout.cost(next_readouts, next_outputs)
        next_feedback = self.readout.feedback(next_outputs)
        next_inputs = (self.fork.apply(next_feedback, return_dict=True)
                       if self.fork else {'feedback': next_feedback})
        next_states = self.transition.compute_states(
            return_list=True,
            **dict_union(next_inputs, states, next_glimpses, contexts))
        return (next_states + [next_outputs] +
                list(next_glimpses.values()) + [next_costs])
Exemple #4
0
    def compute_states(self, **kwargs):
        r"""Compute current states when glimpses have already been computed.

        Combines an application of the `distribute` that alter the
        sequential inputs of the wrapped transition and an application of
        the wrapped transition. All unknown keyword arguments go to
        the wrapped transition.

        Parameters
        ----------
        \*\*kwargs
            Should contain everything what `self.transition` needs
            and in addition the current glimpses.

        Returns
        -------
        current_states : list of :class:`~tensor.TensorVariable`
            Current states computed by `self.transition`.

        """
        # Masks are not mandatory, that's why 'must_have=False'
        sequences = dict_subset(kwargs, self._sequence_names,
                                pop=True, must_have=False)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        if self.add_contexts:
            kwargs.pop(self.attended_name)
            kwargs.pop(self.attended_mask_name)

        sequences.update(self.distribute.apply(
            as_dict=True, **dict_subset(dict_union(sequences, glimpses),
                                        self.distribute.apply.inputs)))
        current_states = self.transition.apply(
            iterate=False, as_list=True,
            **dict_union(sequences, kwargs))
        return current_states
    def generate(self, outputs, **kwargs):
        """A sequence generation step.

        Parameters
        ----------
        outputs : Theano variable
            The outputs from the previous step.

        Notes
        -----
            The contexts, previous states and glimpses are expected
            as keyword arguments.

        """
        states = {name: kwargs[name] for name in self.state_names}
        contexts = {name: kwargs[name] for name in self.context_names}
        glimpses = {name: kwargs[name] for name in self.glimpse_names}

        next_glimpses = self.transition.take_look(
            return_dict=True, **dict_union(states, glimpses, contexts))
        next_readouts = self.readout.readout(
            feedback=self.readout.feedback(outputs),
            **dict_union(states, next_glimpses, contexts))
        next_outputs = self.readout.emit(next_readouts)
        next_costs = self.readout.cost(next_readouts, next_outputs)
        next_feedback = self.readout.feedback(next_outputs)
        next_inputs = (self.fork.apply(next_feedback, return_dict=True)
                       if self.fork else {'feedback': next_feedback})
        next_states = self.transition.compute_states(
            return_list=True,
            **dict_union(next_inputs, states, next_glimpses, contexts))
        return (next_states + [next_outputs]
                + list(next_glimpses.values()) + [next_costs])
    def costs(self, application_call,
              prediction, prediction_mask=None,
              groundtruth=None, groundtruth_mask=None,
              **sequences_states_contexts):
        feedback = self.feedback.apply(prediction, as_dict=True)
        states_outputs = self.recurrent.apply(
            mask=prediction_mask, return_initial_states=True, as_dict=True,
            # Using dict_union gives us a free sanity check that
            # the feedback entries do not override the ones
            # from sequences_states_contexts
            **dict_union(feedback, sequences_states_contexts))
        # These variables can be used to initialize the initial states of the
        # next batch using the last states of the current batch.
        for name in states_outputs:
            application_call.add_auxiliary_variable(
                states_outputs[name][-1].copy(), name=name+"_final_value")
        # Discard the final states
        for name in self.recurrent.apply.states:
            states_outputs[name] = states_outputs[name][:-1]
        # Add all states and outputs and auxiliary variables
        for name, variable in list(states_outputs.items()):
            application_call.add_auxiliary_variable(
                variable.copy(), name=name)

        # Those can potentially be used for computing the cost.
        sequences_contexts = dict_subset(
            sequences_states_contexts,
            self.generate.contexts, self.generate.sequences)
        return self.readout.costs(
            prediction, prediction_mask,
            groundtruth, groundtruth_mask,
            **dict_subset(dict_union(states_outputs,
                                     sequences_contexts),
                          self.readout.costs.inputs,
                          must_have=False))
Exemple #7
0
    def do_apply(self, **kwargs):
        attended = kwargs[self.attended_name]
        preprocessed_attended = kwargs.pop(self.preprocessed_attended_name)
        attended_mask = kwargs.get(self.attended_mask_name)
        sequences = dict_subset(kwargs,
                                self._sequence_names,
                                pop=True,
                                must_have=False)
        states = dict_subset(kwargs, self._state_names, pop=True)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        add_seqs = dict_subset(kwargs,
                               self.add_sequences,
                               pop=True,
                               must_have=False)

        current_glimpses = self.take_glimpses(
            as_dict=True,
            **dict_union(
                states, glimpses, {
                    self.attended_name: attended,
                    self.attended_mask_name: attended_mask,
                    self.preprocessed_attended_name: preprocessed_attended
                }, add_seqs))
        current_states = self.compute_states(as_list=True,
                                             **dict_union(
                                                 sequences, states,
                                                 current_glimpses, kwargs))
        return current_states + list(current_glimpses.values())
Exemple #8
0
    def compute_states(self, **kwargs):
        r"""Compute current states when glimpses have already been computed.

        Parameters
        ----------
        \*\*kwargs
            Should contain everything what `self.transition` needs
            and in addition current glimpses.

        Returns
        -------
        current_states : list of :class:`~tensor.TensorVariable`
            Current states computed by `self.transition`.

        """
        sequences = dict_subset(kwargs,
                                self.sequence_names,
                                pop=True,
                                must_have=False)
        states = dict_subset(kwargs, self.state_names, pop=True)
        glimpses = dict_subset(kwargs, self.glimpse_names, pop=True)
        sequences.update(
            self.mixer.apply(return_dict=True,
                             **dict_subset(dict_union(sequences, glimpses),
                                           self.mixer.apply.inputs)))
        current_states = self.transition.apply(iterate=False,
                                               return_list=True,
                                               **dict_union(
                                                   sequences, states, kwargs))
        return current_states
Exemple #9
0
    def generate(self, outputs, **kwargs):
        """A sequence generation step.

        Parameters
        ----------
        outputs : :class:`~tensor.TensorVariable`
            The outputs from the previous step.

        Notes
        -----
        The contexts, previous states and glimpses are expected as keyword
        arguments.

        """
        states = dict_subset(kwargs, self._state_names)
        # masks in context are optional (e.g. `attended_mask`)
        contexts = dict_subset(kwargs, self._context_names, must_have=False)
        glimpses = dict_subset(kwargs, self._glimpse_names)

        next_glimpses = self.transition.take_glimpses(
            as_dict=True, **dict_union(states, glimpses, contexts))
        next_readouts = self.readout.readout(
            feedback=self.readout.feedback(outputs),
            **dict_union(states, next_glimpses, contexts))
        next_outputs = self.readout.emit(next_readouts)
        next_costs = self.readout.cost(next_readouts, next_outputs)
        next_feedback = self.readout.feedback(next_outputs)
        next_inputs = (self.fork.apply(next_feedback, as_dict=True)
                       if self.fork else {'feedback': next_feedback})
        next_states = self.transition.compute_states(
            as_list=True,
            **dict_union(next_inputs, states, next_glimpses, contexts))
        return (next_states + [next_outputs] +
                list(next_glimpses.values()) + [next_costs])
    def compute_states(self, **kwargs):
        """Compute current states when glimpses have already been computed.

        Parameters
        ----------
        **kwargs
            Should contain everything what `self.transition` needs
            and in addition current glimpses.

        Returns
        -------
        current_states : list of Theano variables
            Current states computed by `self.transition`.

        """
        sequences = dict_subset(kwargs, self.sequence_names, pop=True,
                                must_have=False)
        states = dict_subset(kwargs, self.state_names, pop=True)
        glimpses = dict_subset(kwargs, self.glimpse_names, pop=True)
        sequences.update(self.mixer.apply(
            return_dict=True,
            **dict_subset(dict_union(sequences, glimpses),
                          self.mixer.apply.inputs)))
        current_states = self.transition.apply(
            iterate=False, return_list=True,
            **dict_union(sequences, states, kwargs))
        return current_states
Exemple #11
0
    def do_apply(self, **kwargs):
        r"""Process a sequence attending the attended context every step.

        In addition to the original sequence this method also requires
        its preprocessed version, the one computed by the `preprocess`
        method of the attention mechanism. Unknown keyword arguments
        are passed to the wrapped transition.

        Parameters
        ----------
        \*\*kwargs
            Should contain current inputs, previous step states, contexts,
            the preprocessed attended context, previous step glimpses.

        Returns
        -------
        outputs : list of :class:`~tensor.TensorVariable`
            The current step states and glimpses.

        """
        attended = kwargs[self.attended_name]
        preprocessed_attended = kwargs.pop(self.preprocessed_attended_name)
        attended_mask = kwargs.get(self.attended_mask_name)
        if self.add_contexts:
            kwargs.pop(self.attended_name)
            kwargs.pop(self.attended_mask_name, None)
        sequences = dict_subset(kwargs,
                                self._sequence_names,
                                pop=True,
                                must_have=False)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        # By this time **kwargs will contain the states and the contexts
        # of the transition

        # Compute next states
        sequences_without_mask = {
            name: variable
            for name, variable in sequences.items() if 'mask' not in name
        }
        sequences.update(
            self.distribute.apply(as_dict=True,
                                  **dict_subset(
                                      dict_union(sequences_without_mask,
                                                 glimpses),
                                      self.distribute.apply.inputs)))
        current_states = self.transition.apply(iterate=False,
                                               as_dict=True,
                                               **dict_union(sequences, kwargs))

        glimpses_needed = dict_subset(glimpses, self.previous_glimpses_needed)
        current_glimpses = self.attention.take_glimpses(
            as_dict=True,
            **dict_union(
                current_states, glimpses_needed, {
                    self.attended_name: attended,
                    self.attended_mask_name: attended_mask,
                    self.preprocessed_attended_name: preprocessed_attended
                }))
        return list(current_states.values()) + list(current_glimpses.values())
Exemple #12
0
    def cost_matrix(self, application_call, outputs, mask=None, **kwargs):
        """Returns generation costs for output sequences.

        See Also
        --------
        :meth:`cost` : Scalar cost.

        """
        # We assume the data has axes (time, batch, features, ...)
        batch_size = outputs.shape[1]

        # Prepare input for the iterative part
        states = dict_subset(kwargs, self._state_names, must_have=False)
        # masks in context are optional (e.g. `attended_mask`)
        #         contexts = dict_subset(kwargs, self._context_names, must_have=False)
        contexts = dict_subset(kwargs, self._context_names, must_have=False)
        contexts['initial_state_context'] = kwargs['initial_state_context']

        feedback = self.readout.feedback(outputs)
        inputs = self.fork.apply(feedback, as_dict=True)

        # Run the recurrent network
        results = self.transition.apply(mask=mask,
                                        return_initial_states=True,
                                        as_dict=True,
                                        **dict_union(inputs, states, contexts))

        # Separate the deliverables. The last states are discarded: they
        # are not used to predict any output symbol. The initial glimpses
        # are discarded because they are not used for prediction.
        # Remember, glimpses are computed _before_ output stage, states are
        # computed after.
        states = {name: results[name][:-1] for name in self._state_names}
        glimpses = {name: results[name][1:] for name in self._glimpse_names}

        # Compute the cost
        feedback = tensor.roll(feedback, 1, 0)
        feedback = tensor.set_subtensor(
            feedback[0],
            self.readout.feedback(self.readout.initial_outputs(batch_size)))
        readouts = self.readout.readout(feedback=feedback,
                                        **dict_union(states, glimpses,
                                                     contexts))
        costs = self.readout.cost(readouts, outputs)
        if mask is not None:
            costs *= mask

        for name, variable in list(glimpses.items()) + list(states.items()):
            application_call.add_auxiliary_variable(variable.copy(), name=name)

        # This variables can be used to initialize the initial states of the
        # next batch using the last states of the current batch.
        for name in self._state_names + self._glimpse_names:
            application_call.add_auxiliary_variable(results[name][-1].copy(),
                                                    name=name + "_final_value")

        return costs
    def evaluate(self, application_call, outputs, mask=None, **kwargs):
        # We assume the data has axes (time, batch, features, ...)
        batch_size = outputs.shape[1]

        # Prepare input for the iterative part
        states = dict_subset(kwargs, self._state_names, must_have=False)
        # masks in context are optional (e.g. `attended_mask`)
        contexts = dict_subset(kwargs, self._context_names, must_have=False)
        feedback = self.readout.feedback(outputs)
        inputs = self.fork.apply(feedback, as_dict=True)

        # Run the recurrent network
        results = self.transition.apply(
            mask=mask, return_initial_states=True, as_dict=True,
            **dict_union(inputs, states, contexts))

        # Separate the deliverables. The last states are discarded: they
        # are not used to predict any output symbol. The initial glimpses
        # are discarded because they are not used for prediction.
        # Remember, glimpses are computed _before_ output stage, states are
        # computed after.
        states = OrderedDict((name, results[name][:-1]) for name in self._state_names)
        glimpses = OrderedDict((name, results[name][1:]) for name in self._glimpse_names)

        # Compute the cost
        feedback = tensor.roll(feedback, 1, 0)
        feedback = tensor.set_subtensor(
            feedback[0],
            self.readout.feedback(self.readout.initial_outputs(batch_size)))

        # Run the language model
        if self.language_model:
            lm_states = self.language_model.evaluate(
                outputs=outputs, mask=mask, as_dict=True)
            lm_states = {'lm_' + name: value for name, value
                         in lm_states.items()}
        else:
            lm_states = {}

        readouts = self.readout.readout(
            feedback=feedback,
            **dict_union(lm_states, states, glimpses, contexts))
        costs = self.readout.cost(readouts, outputs)
        if mask is not None:
            costs *= mask

        for name, variable in list(glimpses.items()) + list(states.items()):
            application_call.add_auxiliary_variable(
                variable.copy(), name=name)

        # This variables can be used to initialize the initial states of the
        # next batch using the last states of the current batch.
        for name in self._state_names + self._glimpse_names:
            application_call.add_auxiliary_variable(
                results[name][-1].copy(), name=name+"_final_value")

        return [costs] + states.values() + glimpses.values()
Exemple #14
0
    def cost(self, application_call, outputs, mask=None, **kwargs):
        """Returns generation costs for output sequences.

        Parameters
        ----------
        outputs : :class:`~tensor.TensorVariable`
            The 3(2) dimensional tensor containing output sequences.
            The dimension 0 must stand for time, the dimension 1 for the
            position on the batch.
        mask : :class:`~tensor.TensorVariable`
            The binary matrix identifying fake outputs.

        Notes
        -----
        The contexts are expected as keyword arguments.

        """
        batch_size = outputs.shape[-2]  # TODO Assumes only 1 features dim

        # Prepare input for the iterative part
        states = {
            name: kwargs[name]
            for name in self.state_names if name in kwargs
        }
        contexts = {name: kwargs[name] for name in self.context_names}
        feedback = self.readout.feedback(outputs)
        inputs = (self.fork.apply(feedback, return_dict=True)
                  if self.fork else {
                      'feedback': feedback
                  })

        # Run the recurrent network
        results = self.transition.apply(mask=mask,
                                        return_initial_states=True,
                                        return_dict=True,
                                        **dict_union(inputs, states, contexts))

        # Separate the deliverables
        states = {name: results[name][:-1] for name in self.state_names}
        glimpses = {name: results[name] for name in self.glimpse_names}

        # Compute the cost
        feedback = tensor.roll(feedback, 1, 0)
        feedback = tensor.set_subtensor(
            feedback[0],
            self.readout.feedback(
                self.readout.initial_outputs(batch_size, **contexts)))
        readouts = self.readout.readout(feedback=feedback,
                                        **dict_union(states, glimpses,
                                                     contexts))
        costs = self.readout.cost(readouts, outputs)

        for name, variable in glimpses.items():
            application_call.add_auxiliary_variable(variable.copy(), name=name)

        # In case the user needs some glimpses or states or smth else
        return costs
Exemple #15
0
    def do_apply(self, **kwargs):
        r"""Process a sequence attending the attended context every step.

        In addition to the original sequence this method also requires
        its preprocessed version, the one computed by the `preprocess`
        method of the attention mechanism. Unknown keyword arguments
        are passed to the wrapped transition.

        Parameters
        ----------
        \*\*kwargs
            Should contain current inputs, previous step states, contexts,
            the preprocessed attended context, previous step glimpses.

        Returns
        -------
        outputs : list of :class:`~tensor.TensorVariable`
            The current step states and glimpses.

        """
        attended_list = kwargs[self.attended_name]
        preprocessed_attended_list = kwargs.pop(
            self.preprocessed_attended_name)
        attended_mask_list = kwargs.get(self.attended_mask_name)

        posTag = kwargs[self.posTag_name]
        preprocessed_posTag = kwargs.pop(self.preprocessed_posTag_name)
        sequences = dict_subset(kwargs,
                                self._sequence_names,
                                pop=True,
                                must_have=False)
        states = dict_subset(kwargs, self._state_names, pop=True)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        current_glimpses = self.take_glimpses(
            as_dict=True,
            **dict_union(
                states, glimpses, {
                    self.attended_name: attended_list,
                    self.posTag_name: posTag,
                    self.attended_mask_name: attended_mask_list,
                    self.preprocessed_attended_name:
                    preprocessed_attended_list,
                    self.preprocessed_posTag_name: preprocessed_posTag
                }))
        #the weighted averages to go through context transition GRU one by one.
        current_glimpses['weighted_averages'] = self.context_transition.apply(
            current_glimpses['weighted_averages'],
            tensor.ones([
                current_glimpses['weighted_averages'].shape[1],
                current_glimpses['weighted_averages'].shape[0]
            ]))[-1]
        current_states = self.compute_states(as_list=True,
                                             **dict_union(
                                                 sequences, states,
                                                 current_glimpses, kwargs))
        return current_states + list(current_glimpses.values())
Exemple #16
0
    def generate(self, outputs, dont_generate_new_outputs=False, **kwargs):
        """A sequence generation step.

        Parameters
        ----------
        outputs : :class:`~tensor.TensorVariable`
            The outputs from the previous step.
        dont_generate_new_outputs : bool, optional
            If ``True``, the previous outputs are used instead
            of generated ones. It is a temporary hack for ASRU.

        Notes
        -----
        The contexts, previous states and glimpses are expected as keyword
        arguments.

        """
        states = dict_subset(kwargs, self._state_names)
        # masks in context are optional (e.g. `attended_mask`)
        contexts = dict_subset(kwargs, self._context_names, must_have=False)
        glimpses = dict_subset(kwargs, self._glimpse_names)
        lm_states = dict_subset(kwargs, self._lm_state_names)
        next_glimpses = self.transition.take_glimpses(as_dict=True,
                                                      **dict_union(
                                                          states, glimpses,
                                                          contexts))
        next_readouts = self.readout.readout(
            feedback=self.readout.feedback(outputs),
            **dict_union(states, next_glimpses, contexts, lm_states))
        next_outputs = (self.readout.emit(next_readouts)
                        if not dont_generate_new_outputs else outputs)
        next_costs = self.readout.cost(next_readouts, next_outputs)
        next_feedback = self.readout.feedback(next_outputs)
        next_inputs = (self.fork.apply(next_feedback, as_dict=True)
                       if self.fork else {
                           'feedback': next_feedback
                       })
        next_states = self.transition.compute_states(
            as_list=True,
            **dict_union(next_inputs, states, next_glimpses, contexts))
        next_lm_states = {}
        if self.language_model:
            unmangled_lm_states = {
                name[3:]: lm_states[name]
                for name in lm_states
            }
            next_lm_states = OrderedDict(
                zip(
                    self._lm_state_names,
                    self.language_model.generate(
                        next_outputs,
                        dont_generate_new_outputs=True,
                        iterate=False,
                        **unmangled_lm_states)))
        return (next_states + [next_outputs] + list(next_glimpses.values()) +
                list(next_lm_states.values()) + [next_costs])
Exemple #17
0
 def apply(self, **kwargs):
     # Should handle both "iterate=True" and "iterate=False"
     extra_input = kwargs.pop(self.extra_input_name)
     mask = kwargs.pop('mask', None)
     normal_inputs = dict_subset(kwargs, self._normal_inputs, pop=True)
     normal_inputs = self.distribute.apply(
         as_dict=True,
         **dict_union(normal_inputs, {self.extra_input_name: extra_input}))
     return self.recurrent.apply(mask=mask,
                                 **dict_union(normal_inputs, kwargs))
    def mixed_generate(self, return_initial_states=True, **kwargs):
        critic = self.generator.readout.critic
        groundtruth = kwargs.pop('groundtruth')
        groundtruth_mask = kwargs.pop('groundtruth_mask')
        step = kwargs.pop('step')

        sampling_inputs = dict_subset(
            kwargs, self.generator.readout.sample.inputs)
        actor_scores = self.generator.readout.scores(**sampling_inputs)

        critic_inputs = {
            name: kwargs['critic_' + name]
            for name in critic.generator.readout.merge_names}
        critic_outputs = critic.generator.readout.outputs(
            groundtruth, groundtruth_mask, **critic_inputs)

        epsilon = numpy.array(self.generator.readout.epsilon,
                              dtype=theano.config.floatX)
        actor_probs = tensor.exp(actor_scores)
        # This is a poor man's 1-hot argmax
        critic_probs = self.softmax.apply(critic_outputs * 1000)
        probs = (actor_probs * (tensor.constant(1) - epsilon)
                 + critic_probs * epsilon)

        x = self.theano_rng.uniform(size=(probs.shape[0],))
        samples = (tensor.gt(x[:, None], tensor.cumsum(probs, axis=1))
                   .astype(theano.config.floatX)
                   .sum(axis=1)
                   .astype('int64'))
        samples = tensor.minimum(samples, probs.shape[1] - 1)

        actor_feedback = self.generator.feedback.apply(samples, as_dict=True)
        actor_states_contexts = dict_subset(
            kwargs,
            self.generator.recurrent.apply.states
            + self.generator.recurrent.apply.contexts)
        actor_states_outputs = self.generator.recurrent.apply(
            as_dict=True, iterate=False,
            **dict_union(actor_feedback, actor_states_contexts))

        critic_feedback = critic.generator.feedback.apply(samples, as_dict=True)
        critic_states_contexts = {
            name: kwargs['critic_' + name]
            for name in
            critic.generator.recurrent.apply.states
            + critic.generator.recurrent.apply.contexts}
        critic_apply_kwargs = dict(
            as_dict=True, iterate=False,
            **dict_union(critic_feedback, critic_states_contexts))
        if self.generator.readout.critic_uses_actor_states:
            critic_apply_kwargs['extra_inputs'] = actor_states_outputs['states']
        critic_states_outputs = critic.generator.recurrent.apply(**critic_apply_kwargs)
        return ([samples, step + 1]
                + actor_states_outputs.values()
                + critic_states_outputs.values())
    def cost_matrix(self, application_call, outputs, mask=None, **kwargs):
        """Returns generation costs for output sequences.

        See Also
        --------
        :meth:`cost` : Scalar cost.

        """
        # We assume the data has axes (time, batch, features, ...)
        batch_size = outputs.shape[1]

        # Prepare input for the iterative part
        states = dict_subset(kwargs, self._state_names, must_have=False)
        # masks in context are optional (e.g. `attended_mask`)
        contexts = dict_subset(kwargs, self._context_names, must_have=False)
        feedback = self.readout.feedback(outputs)
        inputs = self.fork.apply(feedback, as_dict=True)

        # Run the recurrent network
        results = self.transition.apply(
            mask=mask, return_initial_states=True, as_dict=True,
            **dict_union(inputs, states, contexts))

        # Separate the deliverables. The last states are discarded: they
        # are not used to predict any output symbol. The initial glimpses
        # are discarded because they are not used for prediction.
        # Remember, glimpses are computed _before_ output stage, states are
        # computed after.
        states = {name: results[name][:-1] for name in self._state_names}
        glimpses = {name: results[name][1:] for name in self._glimpse_names}

        # Compute the cost
        feedback = tensor.roll(feedback, 1, 0)
        feedback = tensor.set_subtensor(
            feedback[0],
            self.readout.feedback(self.readout.initial_outputs(batch_size)))
        readouts = self.readout.readout(
            feedback=feedback, **dict_union(states, glimpses, contexts))
        costs = self.readout.cost(readouts, outputs)
        if mask is not None:
            costs *= mask

        for name, variable in list(glimpses.items()) + list(states.items()):
            application_call.add_auxiliary_variable(
                variable.copy(), name=name)

        # This variables can be used to initialize the initial states of the
        # next batch using the last states of the current batch.
        for name in self._state_names:
            application_call.add_auxiliary_variable(
                results[name][-1].copy(), name=name+"_final_value")

        return costs
Exemple #20
0
    def do_apply(self, **kwargs):
        r"""Process a sequence attending the attended context every step.

        In addition to the original sequence this method also requires
        its preprocessed version, the one computed by the `preprocess`
        method of the attention mechanism. Unknown keyword arguments
        are passed to the wrapped transition.

        Parameters
        ----------
        \*\*kwargs
            Should contain current inputs, previous step states, contexts,
            the preprocessed attended context, previous step glimpses.

        Returns
        -------
        outputs : list of :class:`~tensor.TensorVariable`
            The current step states and glimpses.

        """
        attended = kwargs[self.attended_name]
        preprocessed_attended = kwargs.pop(self.preprocessed_attended_name)
        attended_mask = kwargs.get(self.attended_mask_name)
        if self.add_contexts:
            kwargs.pop(self.attended_name)
            kwargs.pop(self.attended_mask_name, None)
        sequences = dict_subset(kwargs, self._sequence_names, pop=True,
                                must_have=False)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        # By this time **kwargs will contain the states and the contexts
        # of the transition

        # Compute next states
        sequences_without_mask = {
            name: variable for name, variable in sequences.items()
            if 'mask' not in name}
        sequences.update(self.distribute.apply(
            as_dict=True, **dict_subset(
                dict_union(sequences_without_mask, glimpses),
                self.distribute.apply.inputs)))
        current_states = self.transition.apply(
            iterate=False, as_dict=True,
            **dict_union(sequences, kwargs))

        glimpses_needed = dict_subset(glimpses, self.previous_glimpses_needed)
        current_glimpses = self.attention.take_glimpses(
            as_dict=True,
            **dict_union(
                current_states, glimpses_needed,
                {self.attended_name: attended,
                 self.attended_mask_name: attended_mask,
                 self.preprocessed_attended_name: preprocessed_attended}))
        return list(current_states.values()) + list(current_glimpses.values())
    def cost(self, outputs, mask=None, **kwargs):
        """Returns generation costs for output sequences.

        Parameters
        ----------
        outputs : Theano variable
            The 3(2) dimensional tensor containing output sequences.
            The dimension 0 must stand for time, the dimension 1 for the
            position on the batch.
        mask : The 0/1 matrix identifying fake outputs.

        Notes
        -----
        The contexts are expected as keyword arguments.

        """
        batch_size = outputs.shape[-2]  # TODO Assumes only 1 features dim

        # Prepare input for the iterative part
        states = {name: kwargs[name] for name in self.state_names
                  if name in kwargs}
        contexts = {name: kwargs[name] for name in self.context_names}
        feedback = self.readout.feedback(outputs)
        inputs = (self.fork.apply(feedback, return_dict=True)
                  if self.fork else {'feedback': feedback})

        # Run the recurrent network
        results = self.transition.apply(
            mask=mask, return_initial_states=True, return_dict=True,
            **dict_union(inputs, states, contexts))

        # Separate the deliverables
        states = {name: results[name][:-1] for name in self.state_names}
        glimpses = {name: results[name] for name in self.glimpse_names}

        # Compute the cost
        feedback = tensor.roll(feedback, 1, 0)
        feedback = tensor.set_subtensor(
            feedback[0],
            self.readout.feedback(self.readout.initial_outputs(
                batch_size, **contexts)))
        readouts = self.readout.readout(
            feedback=feedback, **dict_union(states, glimpses, contexts))
        costs = self.readout.cost(readouts, outputs)

        # In case the user needs some glimpses or states or smth else
        also_return = kwargs.get("also_return")
        if also_return:
            others = {name: results[name] for name in also_return}
            return (costs, others)
        return costs
    def compute_states(self, **kwargs):
        r"""Compute current states when glimpses have already been computed.

        Combines an application of the `distribute` that alter the
        sequential inputs of the wrapped transition and an application of
        the wrapped transition. All unknown keyword arguments go to
        the wrapped transition.

        Parameters
        ----------
        \*\*kwargs
            Should contain everything what `self.transition` needs
            and in addition the current glimpses.

        Returns
        -------
        current_states : list of :class:`~tensor.TensorVariable`
            Current states computed by `self.transition`.

        """
        # make sure we are not popping the mask
        normal_inputs = [
            name for name in self._sequence_names if 'mask' not in name
        ]
        sequences = dict_subset(kwargs, normal_inputs, pop=True)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        topical_glimpses = dict_subset(kwargs,
                                       self._topical_glimpse_names,
                                       pop=True)
        if self.add_contexts:
            kwargs.pop(self.attended_name)
            # attended_mask_name can be optional
            kwargs.pop(self.attended_mask_name, None)
            kwargs.pop(self.topical_attended_name)
            kwargs.pop(self.topical_attended_mask_name, None)

        sequences.update(
            self.distribute.apply(as_dict=True,
                                  **dict_subset(
                                      dict_union(sequences, glimpses),
                                      self.distribute.apply.inputs)))
        sequences.update(
            self.topical_distribute.apply(
                as_dict=True,
                **dict_subset(dict_union(sequences, topical_glimpses),
                              self.topical_distribute.apply.inputs)))
        current_states = self.transition.apply(iterate=False,
                                               as_list=True,
                                               **dict_union(sequences, kwargs))
        return current_states
    def do_apply(self, **kwargs):
        r"""Process a sequence attending the attended context every step.

        In addition to the original sequence this method also requires
        its preprocessed version, the one computed by the `preprocess`
        method of the attention mechanism. Unknown keyword arguments
        are passed to the wrapped transition.

        Parameters
        ----------
        \*\*kwargs
            Should contain current inputs, previous step states, contexts,
            the preprocessed attended context, previous step glimpses.

        Returns
        -------
        outputs : list of :class:`~tensor.TensorVariable`
            The current step states and glimpses.

        """
        attended = kwargs[self.attended_name]
        preprocessed_attended = kwargs.pop(self.preprocessed_attended_name)
        attended_mask = kwargs.get(self.attended_mask_name)
        topical_attended=kwargs[self.topical_attended_name];
        preprocessed_topical_attended=kwargs.pop(self.preprocessed_topical_attended_name);
        topical_attended_mask=kwargs.get(self.topical_attended_mask_name);
        sequences = dict_subset(kwargs, self._sequence_names, pop=True,
                                must_have=False)
        states = dict_subset(kwargs, self._state_names, pop=True)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        topical_glimpses= dict_subset(kwargs,self._topical_glimpse_names,pop=True)

        topical_embeddings=dict_subset(kwargs,[self.topical_name],pop=True); #check whether this would work.checked!
        content_embeddings=dict_subset(kwargs,[self.content_name],pop=True);
        current_glimpses = self.take_glimpses(
            as_dict=True,
            **dict_union(
                states, glimpses,topical_embeddings,content_embeddings,
                {self.attended_name: attended,
                 self.attended_mask_name: attended_mask,
                 self.preprocessed_attended_name: preprocessed_attended,
                 self.topical_attended_name:topical_attended,
                 self.topical_attended_mask_name:topical_attended_mask,
                 self.preprocessed_topical_attended_name:preprocessed_topical_attended}));#don't know whether

        current_states = self.compute_states(
            as_list=True,
            **dict_union(sequences, states, current_glimpses, kwargs))
        return current_states + list(current_glimpses.values())
    def generate(self, outputs, dont_generate_new_outputs=False, **kwargs):
        """A sequence generation step.

        Parameters
        ----------
        outputs : :class:`~tensor.TensorVariable`
            The outputs from the previous step.
        dont_generate_new_outputs : bool, optional
            If ``True``, the previous outputs are used instead
            of generated ones. It is a temporary hack for ASRU.

        Notes
        -----
        The contexts, previous states and glimpses are expected as keyword
        arguments.

        """
        states = dict_subset(kwargs, self._state_names)
        # masks in context are optional (e.g. `attended_mask`)
        contexts = dict_subset(kwargs, self._context_names, must_have=False)
        glimpses = dict_subset(kwargs, self._glimpse_names)
        lm_states = dict_subset(kwargs, self._lm_state_names)
        next_glimpses = self.transition.take_glimpses(
            as_dict=True,
            **dict_union(states, glimpses, contexts))
        next_readouts = self.readout.readout(
            feedback=self.readout.feedback(outputs),
            **dict_union(states, next_glimpses, contexts, lm_states))
        next_outputs = (self.readout.emit(next_readouts)
            if not dont_generate_new_outputs
            else outputs)
        next_costs = self.readout.cost(next_readouts, next_outputs)
        next_feedback = self.readout.feedback(next_outputs)
        next_inputs = (self.fork.apply(next_feedback, as_dict=True)
                       if self.fork else {'feedback': next_feedback})
        next_states = self.transition.compute_states(
            as_list=True,
            **dict_union(next_inputs, states, next_glimpses, contexts))
        next_lm_states = {}
        if self.language_model:
            unmangled_lm_states = {name[3:]: lm_states[name]
                                   for name in lm_states}
            next_lm_states = OrderedDict(zip(
                self._lm_state_names, self.language_model.generate(
                next_outputs, dont_generate_new_outputs=True, iterate=False,
                **unmangled_lm_states)))
        return (next_states + [next_outputs] +
                list(next_glimpses.values()) + list(next_lm_states.values()) +
                [next_costs])
Exemple #25
0
    def get_cost_graph(self,
                       batch=True,
                       prediction=None,
                       prediction_mask=None):

        if batch:
            inputs = self.inputs
            inputs_mask = self.inputs_mask
            groundtruth = self.labels
            groundtruth_mask = self.labels_mask
        else:
            inputs, inputs_mask = self.bottom.single_to_batch_inputs(
                self.single_inputs)
            groundtruth = self.single_labels[:, None]
            groundtruth_mask = None

        if not prediction:
            prediction = groundtruth
        if not prediction_mask:
            prediction_mask = groundtruth_mask

        kwargs = dict(inputs_mask=inputs_mask,
                      labels=prediction,
                      labels_mask=prediction_mask,
                      additional_sources=dict(self.additional_sources))
        kwargs = {(k + self.names_postfix): v for k, v in kwargs.iteritems()}
        kwargs = dict_union(kwargs, inputs)

        cost = self.cost(**kwargs)
        cost_cg = ComputationGraph(cost)

        return cost_cg
Exemple #26
0
    def initialize(self, **kwargs):
        logger.info("BatchNormAccumulate initializing")

        # get list of bricks
        bricks_seen = set()
        for p in self.parameters:
            brick = get_brick(p)
            if brick not in bricks_seen:
                bricks_seen.add(brick)

        # ensure all updates account for all bricks
        update_parameters = set()
        for b in bricks_seen:
            for var, update in b.updates.items():
                update_parameters.add(var)
            assert b.n.get_value() == 0

        if set(update_parameters) != set(self.parameters):
            raise ValueError("The updates and the parameters passed in do "
                             "not match. This could be due to no applications "
                             "or multiple applications found %d updates, and "
                             "%d parameters" %
                             (len(update_parameters), len(self.parameters)))

        updates = dict_union(*[b.updates for b in bricks_seen])

        logger.info("Compiling BatchNorm accumulate")
        self._func = theano.function(self.inputs, [],
                                     updates=updates,
                                     on_unused_input="warn")

        super(BatchNormAccumulate, self).initialize(**kwargs)
Exemple #27
0
    def get_params(self, param_name=None):
        """Returns parameters the selected bricks and their ancestors.

        Parameters
        ----------
        param_name : :class:`Path.ParamName`
            If given, only parameters with the name `param_name` are
            returned.

        Returns
        -------
        params : OrderedDict
            A dictionary of (`path`, `param`) pairs, where `path` is the
            string representation of the part to the parameter, `param` is
            the parameter.

        """
        def recursion(brick):
            # TODO path logic should be separate
            result = [(Path([Path.BrickName(brick.name),
                             Path.ParamName(param.name)]),
                       param)
                      for param in brick.params
                      if not param_name or param.name == param_name]
            result = OrderedDict(result)
            for child in brick.children:
                for path, param in recursion(child).items():
                    new_path = Path([Path.BrickName(brick.name)]) + path
                    result[new_path] = param
            return result
        result = dict_union(*[recursion(brick)
                            for brick in self.bricks])
        return OrderedDict((str(key), value) for key, value in result.items())
Exemple #28
0
    def do_apply(self, **kwargs):
        r"""Process a sequence attending the attended context every step.

        In addition to the original sequence this method also requires
        its preprocessed version, the one computed by the `preprocess`
        method of the attention mechanism. Unknown keyword arguments
        are passed to the wrapped transition.

        Parameters
        ----------
        \*\*kwargs
            Should contain current inputs, previous step states, contexts,
            the preprocessed attended context, previous step glimpses.

        Returns
        -------
        outputs : list of :class:`~tensor.TensorVariable`
            The current step states and glimpses.

        """
        attended_list = kwargs[self.attended_name]
        preprocessed_attended_list = kwargs.pop(self.preprocessed_attended_name)
        attended_mask_list = kwargs.get(self.attended_mask_name)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        sequences = dict_subset(kwargs, self._sequence_names, pop=True,
                                must_have=False)
        states = dict_subset(kwargs, self._state_names, pop=True)

        utterance_attended=self.context_transition.apply(attended_list,preprocessed_attended_list,attended_mask_list, states['states'], mask=tensor.ones([attended_list.shape[2],attended_list.shape[0]]));
        current_glimpses=self.take_glimpses(utterance_attended)#?
        current_states = self.compute_states(
            as_list=True,
            **dict_union(sequences, states, {'weighted_averages':current_glimpses}, kwargs))
        return current_states + [current_glimpses]
Exemple #29
0
    def _get_variables(self):
        """Collect variables, updates and auxiliary variables.

        In addition collects all :class:`.Scan` ops and recurses in the
        respective inner Theano graphs.

        """
        updates = OrderedDict()

        shared_outputs = [o for o in self.outputs if is_shared_variable(o)]
        usual_outputs = [o for o in self.outputs if not is_shared_variable(o)]
        variables = shared_outputs

        if usual_outputs:
            # Sort apply nodes topologically, get variables and remove
            # duplicates
            inputs = graph.inputs(self.outputs)
            self.sorted_apply_nodes = graph.io_toposort(inputs, usual_outputs)
            self.scans = list(
                unique([
                    node.op for node in self.sorted_apply_nodes
                    if isinstance(node.op, Scan)
                ]))
            self.sorted_scan_nodes = [
                node for node in self.sorted_apply_nodes
                if isinstance(node.op, Scan)
            ]
            self._scan_graphs = [
                ComputationGraph(scan.outputs) for scan in self.scans
            ]

            seen = set()
            main_vars = ([
                var for var in list(
                    chain(*[
                        apply_node.inputs
                        for apply_node in self.sorted_apply_nodes
                    ])) if not (var in seen or seen.add(var))
            ] + [var for var in self.outputs if var not in seen])

            # While preserving order add auxiliary variables, and collect
            # updates
            seen = set()
            # Intermediate variables could be auxiliary
            seen_avs = set(main_vars)
            variables = []
            for var in main_vars:
                variables.append(var)
                for annotation in getattr(var.tag, 'annotations', []):
                    if annotation not in seen:
                        seen.add(annotation)
                        new_avs = [
                            av for av in annotation.auxiliary_variables
                            if not (av in seen_avs or seen_avs.add(av))
                        ]
                        variables.extend(new_avs)
                        updates = dict_union(updates, annotation.updates)

        self.variables = variables
        self.updates = updates
Exemple #30
0
    def _push_allocation_config(self):
        # Configure readout
        # TODO: optional states? contexts?
        state_dims = {
            name: self.transition.get_dim(name)
            for name in self.state_names
        }
        context_dims = {
            name: self.transition.get_dim(name)
            for name in self.context_names
        }
        self.glimpse_dims = {
            name: self.transition.get_dim(name)
            for name in self.glimpse_names
        }
        self.readout.source_dims = dict_union(state_dims, context_dims,
                                              self.glimpse_dims)

        # Configure fork
        feedback_names = self.readout.feedback.outputs
        if not len(feedback_names) == 1:
            raise ValueError
        self.fork.input_dim = self.readout.get_dim(feedback_names[0])
        self.fork.fork_dims = {
            name: self.transition.get_dim(name)
            for name in self.fork.apply.outputs
        }
Exemple #31
0
    def initialize(self, **kwargs):
        logger.info("BatchNormAccumulate initializing")

        # get list of bricks
        bricks_seen = set()
        for p in self.parameters:
            brick = get_brick(p)
            if brick not in bricks_seen:
                bricks_seen.add(brick)

        # ensure all updates account for all bricks
        update_parameters = set()
        for b in bricks_seen:
            for var, update in b.updates.items():
                update_parameters.add(var)
            assert b.n.get_value() == 0

        if set(update_parameters) != set(self.parameters):
            raise ValueError("The updates and the parameters passed in do "
                             "not match. This could be due to no applications "
                             "or multiple applications found %d updates, and "
                             "%d parameters" % (len(update_parameters),
                                                len(self.parameters)))

        updates = dict_union(*[b.updates for b in bricks_seen])

        logger.info("Compiling BatchNorm accumulate")
        self._func = theano.function(self.inputs, [], updates=updates,
                                     on_unused_input="warn")

        super(BatchNormAccumulate, self).initialize(**kwargs)
Exemple #32
0
 def generate(self, chars):
     return self.generator.generate(
         n_steps=3 * chars.shape[0], batch_size=chars.shape[1],
         attended=self.encoder.apply(
             **dict_union(self.fork.apply(self.lookup.apply(chars),
                          as_dict=True))),
         attended_mask=tensor.ones(chars.shape))
Exemple #33
0
 def generate(self, chars):
     return self.generator.generate(
         n_steps=3 * chars.shape[0], batch_size=chars.shape[1],
         attended=self.encoder.apply(
             **dict_union(
                 self.fork.apply(self.lookup.apply(chars), as_dict=True))),
         attended_mask=tensor.ones(chars.shape))
Exemple #34
0
    def take_glimpses(self, **kwargs):
        r"""Compute glimpses with the attention mechanism.

        A thin wrapper over `self.attention.take_glimpses`: takes care
        of choosing and renaming the necessary arguments.

        Parameters
        ----------
        \*\*kwargs
            Must contain the attended, previous step states and glimpses.
            Can optionaly contain the attended mask and the preprocessed
            attended.

        Returns
        -------
        glimpses : list of :class:`~tensor.TensorVariable`
            Current step glimpses.

        """
        states = dict_subset(kwargs, self._state_names, pop=True)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        glimpses_needed = dict_subset(glimpses, self.previous_glimpses_needed)
        result = self.attention.take_glimpses(
            kwargs.pop(self.attended_name),
            kwargs.pop(self.preprocessed_attended_name, None),
            kwargs.pop(self.attended_mask_name, None),
            **dict_union(states, glimpses_needed))
        # At this point kwargs may contain additional items.
        # e.g. AttentionRecurrent.transition.apply.contexts
        return result
Exemple #35
0
    def take_glimpses(self, **kwargs):
        r"""Compute glimpses with the attention mechanism.

        A thin wrapper over `self.attention.take_glimpses`: takes care
        of choosing and renaming the necessary arguments.

        Parameters
        ----------
        \*\*kwargs
            Must contain the attended, previous step states and glimpses.
            Can optionaly contain the attended mask and the preprocessed
            attended.

        Returns
        -------
        glimpses : list of :class:`~tensor.TensorVariable`
            Current step glimpses.

        """
        states = dict_subset(kwargs, self._state_names, pop=True)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        glimpses_needed = dict_subset(glimpses, self.previous_glimpses_needed)
        result = self.attention.take_glimpses(
            kwargs.pop(self.attended_name),
            kwargs.pop(self.preprocessed_attended_name, None),
            kwargs.pop(self.attended_mask_name, None),
            **dict_union(states, glimpses_needed))
        if kwargs:
            raise ValueError("extra args to take_glimpses: {}".format(kwargs))
        return result
Exemple #36
0
    def get_params(self, param_name=None):
        """Returns parameters the selected bricks and their ancestors.

        Parameters
        ----------
        param_name : :class:`Path.ParamName`
            If given, only parameters with the name `param_name` are
            returned.

        Returns
        -------
        params : OrderedDict
            A dictionary of (`path`, `param`) pairs, where `path` is the
            string representation of the part to the parameter, `param` is
            the parameter.

        """
        def recursion(brick):
            # TODO path logic should be separate
            result = [
                (Path([Path.BrickName(brick.name),
                       Path.ParamName(param.name)]), param)
                for param in brick.params
                if not param_name or param.name == param_name
            ]
            result = OrderedDict(result)
            for child in brick.children:
                for path, param in recursion(child).items():
                    new_path = Path([Path.BrickName(brick.name)]) + path
                    result[new_path] = param
            return result

        result = dict_union(*[recursion(brick) for brick in self.bricks])
        return OrderedDict((str(key), value) for key, value in result.items())
Exemple #37
0
 def apply(self, input_, mask=None, **kwargs):
     states=self.transition.apply(
         mask=mask, **dict_union(self.fork.apply(input_, as_dict=True), kwargs))
     # I don't know, why blocks returns a list [states, cell] for LSTM
     # but just states (no list) for GRU or normal RNN. We only want LSTM's states.
     # cells should not be visible from outside.
     return states[0] if isinstance(states,list) else states
    def generate(self, outputs, **kwargs):
        """A sequence generation step.

        Parameters
        ----------
        outputs : :class:`~tensor.TensorVariable`
            The outputs from the previous step.

        Notes
        -----
        The contexts, previous states and glimpses are expected as keyword
        arguments.

        """
        states = dict_subset(kwargs, self._state_names, must_have=False)
        # masks in context are optional (e.g. `attended_mask`)
        contexts = dict_subset(kwargs, self._context_names, must_have=False)
        topical_word_contexts=dict_subset(kwargs, self._topical_context_names)
        topical_embeddings=dict_subset(kwargs,[self.topical_name]);
        content_embeddings=dict_subset(kwargs,[self.content_name]);
        glimpses = dict_subset(kwargs, self._glimpse_names)



        next_glimpses = self.transition.take_glimpses(
            as_dict=True,
            **dict_union(
                states, glimpses,topical_embeddings,content_embeddings,contexts,topical_word_contexts));
        glimpses_modified={'weighted_averages':next_glimpses['weighted_averages'],'weigths':next_glimpses['weights']}
        '''
        next_glimpses = self.transition.take_glimpses(
            as_dict=True, **dict_union(states, glimpses, contexts,topical_embeddings))
        '''
        next_readouts = self.readout.readout(
            feedback=self.readout.feedback(outputs),
            **dict_union(states, glimpses_modified, contexts))
        next_outputs = self.readout.emit(next_readouts)
        next_costs = self.readout.cost(next_readouts, next_outputs)
        next_feedback = self.readout.feedback(next_outputs)
        next_inputs = (self.fork.apply(next_feedback, as_dict=True)
                       if self.fork else {'feedback': next_feedback})
        tmp=next_glimpses.values();
        next_states = self.transition.compute_states(
            as_list=True,
            **dict_union(next_inputs, states, next_glimpses, contexts,topical_word_contexts))#why adding contexts here,the do_apply version do not have contexts
        return (next_states + [next_outputs] +
                list(next_glimpses.values()) + [next_costs])
Exemple #39
0
 def _push_allocation_config(self):
     self.attention.state_dims = self.transition.get_dims(self.state_names)
     self.attention.sequence_dim = self.transition.get_dim(
         self.attended_name)
     self.mixer.channel_dims = dict_subset(
         dict_union(self.transition.get_dims(self.sequence_names),
                    self.attention.get_dims(self.glimpse_names)),
         self.mixer.apply.inputs)
Exemple #40
0
 def cost(self, chars, chars_mask, targets, targets_mask):
     return self.generator.cost_matrix(
         targets, targets_mask,
         attended=self.encoder.apply(
             **dict_union(
                 self.fork.apply(self.lookup.apply(chars), as_dict=True),
                 mask=chars_mask)),
         attended_mask=chars_mask)
Exemple #41
0
 def cost(self, chars, chars_mask, targets, targets_mask):  #{
     return self.generator.cost_matrix(
         targets,
         targets_mask,
         attended=self.encoder.apply(**dict_union(self.fork.apply(
             self.lookup.apply(chars), as_dict=True),
                                                  mask=chars_mask)),
         attended_mask=chars_mask)
    def cost(self, given_x, application_call):
        """Computes the loss function.

        Parameters
        ----------
        given_x : tensor variable
                  Batch of given visible states from dataset.

        Notes
        -----
        The `application_call` argument is an effect of the `application`
        decorator and isn't visible to users. It's used internally to
        set an updates dictionary for  `h` that's
        discoverable by `ComputationGraph`.

        """
        x = given_x
        h_prev = self.h + self.initial_noise * self.theano_rng.normal(size=self.h.shape, dtype=self.h.dtype)
        h = h_next = h_prev
        old_energy = self.pp(self.energy(x, h).sum(), "old_energy", 1)
        for iteration in range(self.n_inference_steps):
            h_prev = h
            h = h_next
            h_next = self.pp(
                disconnected_grad(self.langevin_update(self.pp(x, "x", 3), self.pp(h_next, "h", 2))), "h_next", 2
            )
            new_energy = self.pp(self.energy(x, h_next).sum(), "new_energy", 1)
            delta_energy = self.pp(old_energy - new_energy, "delta_energy", 1)
            old_energy = new_energy
            h_prediction_residual = (
                h_next - self.pp(h_prev, "h_prev", 3) + self.epsilon * tensor.grad(self.energy(x, h_prev).sum(), h_prev)
            )
            J_h = self.pp((h_prediction_residual * h_prediction_residual).sum(axis=1).mean(axis=0), "J_h", 1)
            x_prediction_residual = self.pp(tensor.grad(self.energy(given_x, h_prev).sum(), given_x), "x_residual", 2)
            J_x = self.pp((x_prediction_residual * x_prediction_residual).sum(axis=1).mean(axis=0), "J_x", 1)
            if self.debug > 1:
                application_call.add_auxiliary_variable(J_x, name="J_x" + str(iteration))
                application_call.add_auxiliary_variable(J_h, name="J_h" + str(iteration))
            if iteration == 0:
                total_cost = J_h + J_x
            else:
                total_cost = total_cost + J_h + J_x

        per_iteration_cost = total_cost / self.n_inference_steps

        updates = OrderedDict([(self.h, h_next)])
        application_call.updates = dict_union(application_call.updates, updates)

        if self.debug > 0:
            application_call.add_auxiliary_variable(per_iteration_cost, name="per_iteration_cost")
        if self.debug > 1:
            application_call.add_auxiliary_variable(self.Wxh * 1.0, name="Wxh")
            application_call.add_auxiliary_variable(self.Whh * 1.0, name="Whh")
            application_call.add_auxiliary_variable(self.Wxx * 1.0, name="Wxx")
            application_call.add_auxiliary_variable(self.b * 1, name="b")
            application_call.add_auxiliary_variable(self.c * 1, name="c")

        return self.pp(total_cost, "total_cost")
 def generate(self, **sequences_states_contexts):
     sampling_inputs = dict_subset(
         sequences_states_contexts, self.readout.sample.inputs)
     samples, scores = self.readout.sample(**sampling_inputs)
     feedback = self.feedback.apply(samples, as_dict=True)
     next_states_outputs = self.recurrent.apply(
         as_list=True, iterate=False,
         **dict_union(feedback, **sequences_states_contexts))
     return [samples, scores] + next_states_outputs
 def _push_allocation_config(self):
     self.attention.state_dims = self.transition.get_dims(self.state_names)
     self.attention.sequence_dim = self.transition.get_dim(
         self.attended_name)
     self.mixer.channel_dims = dict_subset(
         dict_union(
             self.transition.get_dims(self.sequence_names),
             self.attention.get_dims(self.glimpse_names)),
         self.mixer.apply.inputs)
Exemple #45
0
    def do_apply(self, **kwargs):
        r"""Process a sequence attending the attended context every step.

        In addition to the original sequence this method also requires
        its preprocessed version, the one computed by the `preprocess`
        method of the attention mechanism. Unknown keyword arguments
        are passed to the wrapped transition.

        Parameters
        ----------
        \*\*kwargs
            Should contain current inputs, previous step states, contexts,
            the preprocessed attended context, previous step glimpses.

        Returns
        -------
        outputs : list of :class:`~tensor.TensorVariable`
            The current step states and glimpses.

        """
        attended = kwargs[self.attended_name]
        preprocessed_attended = kwargs.pop(self.preprocessed_attended_name)
        attended_mask = kwargs.get(self.attended_mask_name)
        sequences = dict_subset(kwargs,
                                self.sequence_names,
                                pop=True,
                                must_have=False)
        states = dict_subset(kwargs, self.state_names, pop=True)
        glimpses = dict_subset(kwargs, self.glimpse_names, pop=True)

        current_glimpses = self.take_glimpses(
            return_dict=True,
            **dict_union(
                states, glimpses, {
                    self.attended_name: attended,
                    self.attended_mask_name: attended_mask,
                    self.preprocessed_attended_name: preprocessed_attended
                }))
        current_states = self.compute_states(return_list=True,
                                             **dict_union(
                                                 sequences, states,
                                                 current_glimpses, kwargs))
        return current_states + list(current_glimpses.values())
    def compute_states(self, **kwargs):
        r"""Compute current states when glimpses have already been computed.

        Combines an application of the `distribute` that alter the
        sequential inputs of the wrapped transition and an application of
        the wrapped transition. All unknown keyword arguments go to
        the wrapped transition.

        Parameters
        ----------
        \*\*kwargs
            Should contain everything what `self.transition` needs
            and in addition the current glimpses.

        Returns
        -------
        current_states : list of :class:`~tensor.TensorVariable`
            Current states computed by `self.transition`.

        """
        # make sure we are not popping the mask
        normal_inputs = [name for name in self._sequence_names
                         if 'mask' not in name]
        sequences = dict_subset(kwargs, normal_inputs, pop=True)
        glimpses = dict_subset(kwargs, self._glimpse_names, pop=True)
        topical_glimpses=dict_subset(kwargs,self._topical_glimpse_names,pop=True);
        if self.add_contexts:
            kwargs.pop(self.attended_name)
            # attended_mask_name can be optional
            kwargs.pop(self.attended_mask_name, None)
            kwargs.pop(self.topical_attended_name)
            kwargs.pop(self.topical_attended_mask_name, None)

        sequences.update(self.distribute.apply(
            as_dict=True, **dict_subset(dict_union(sequences, glimpses),
                                        self.distribute.apply.inputs)))
        sequences.update(self.topical_distribute.apply(
            as_dict=True, **dict_subset(dict_union(sequences, topical_glimpses),
                                        self.topical_distribute.apply.inputs)))
        current_states = self.transition.apply(
            iterate=False, as_list=True,
            **dict_union(sequences, kwargs))
        return current_states
Exemple #47
0
 def generate(self, **sequences_states_contexts):
     sampling_inputs = dict_subset(sequences_states_contexts,
                                   self.readout.sample.inputs)
     samples, scores = self.readout.sample(**sampling_inputs)
     feedback = self.feedback.apply(samples, as_dict=True)
     next_states_outputs = self.recurrent.apply(
         as_list=True,
         iterate=False,
         **dict_union(feedback, **sequences_states_contexts))
     return [samples, scores] + next_states_outputs
Exemple #48
0
 def apply(self, input_, mask=None, **kwargs):
     states = self.transition.apply(mask=mask,
                                    **dict_union(
                                        self.fork.apply(input_,
                                                        as_dict=True),
                                        kwargs))
     # I don't know, why blocks returns a list [states, cell] for LSTM
     # but just states (no list) for GRU or normal RNN. We only want LSTM's states.
     # cells should not be visible from outside.
     return states[0] if isinstance(states, list) else states
Exemple #49
0
    def apply(self, inner_inputs, states, outer_inputs):
        forked_inputs = self.inner_input_fork.apply(inner_inputs, as_dict=True)
        forked_states = self.outer_input_fork.apply(outer_inputs, as_dict=True)

        gru_inputs = {key: forked_inputs[key] + forked_states[key]
                      for key in forked_inputs.keys()}

        new_states = self.inner_gru.apply(
            iterate=False,
            **dict_union(gru_inputs, {'states': states}))
        return new_states  # mean according to the time axis
Exemple #50
0
    def cost(self, x, context, **kwargs):
        x_g = self.mlp_x.apply(context)
        inputs = self.fork.apply(x_g, as_dict = True)
        h = self.transition.apply(**dict_union(inputs, kwargs))

        self.final_states = []
        for var in h:
        	self.final_states.append(var[-1].copy(name = var.name + "_final_value"))

        cost = self.gmm_emitter.cost(h[-1], x)
        return cost.mean()
Exemple #51
0
 def lazy_init(*args, **kwargs):
     self = args[0]
     self.allocation_args = (getattr(self, 'allocation_args',
                                     []) + allocation)
     self.initialization_args = (getattr(self, 'initialization_args',
                                         []) + initialization)
     kwargs = dict_union(args_to_kwargs(args, init), kwargs)
     for allocation_arg in allocation:
         kwargs.setdefault(allocation_arg, NoneAllocation)
     for initialization_arg in initialization:
         kwargs.setdefault(initialization_arg, NoneInitialization)
     return init(**kwargs)
 def initial_states(self, batch_size, *args, **kwargs):
     state_dict = dict(
         self.transition.initial_states(
             batch_size, as_dict=True, *args, **kwargs),
         outputs=self.readout.initial_outputs(batch_size))
     if self.language_model:
         lm_initial_states = self.language_model.initial_states(
             batch_size, as_dict=True, *args, **kwargs)
         state_dict = dict_union(state_dict,
                                 {"lm_" + name: state for name, state
                                  in lm_initial_states.items()})
     return [state_dict[state_name]
             for state_name in self.generate.states]
Exemple #53
0
    def apply(self, **kwargs):
        """Preprocess a sequence attending the attended context at every step.

        Preprocesses the attended context and runs :meth:`do_apply`. See
        :meth:`do_apply` documentation for further information.

        """
        preprocessed_attended = self.attention.preprocess(
            kwargs[self.attended_name])
        return self.do_apply(
            **dict_union(kwargs,
                         {self.preprocessed_attended_name:
                          preprocessed_attended}))
Exemple #54
0
    def _get_variables(self):
        """Collect variables, updates and auxiliary variables.

        In addition collects all :class:`.Scan` ops and recurses in the
        respective inner Theano graphs.

        """
        updates = OrderedDict()

        shared_outputs = [o for o in self.outputs if is_shared_variable(o)]
        usual_outputs = [o for o in self.outputs if not is_shared_variable(o)]
        variables = shared_outputs

        if usual_outputs:
            # Sort apply nodes topologically, get variables and remove
            # duplicates
            inputs = graph.inputs(self.outputs)
            self.sorted_apply_nodes = graph.io_toposort(inputs, usual_outputs)
            self.scans = list(unique([node.op for node in self.sorted_apply_nodes
                                     if isinstance(node.op, Scan)]))
            self.sorted_scan_nodes = [node for node in self.sorted_apply_nodes
                                      if isinstance(node.op, Scan)]
            self._scan_graphs = [ComputationGraph(scan.outputs)
                                 for scan in self.scans]

            seen = set()
            main_vars = (
                [var for var in list(chain(
                    *[apply_node.inputs for apply_node in self.sorted_apply_nodes]))
                 if not (var in seen or seen.add(var))] +
                [var for var in self.outputs if var not in seen])

            # While preserving order add auxiliary variables, and collect
            # updates
            seen = set()
            # Intermediate variables could be auxiliary
            seen_avs = set(main_vars)
            variables = []
            for var in main_vars:
                variables.append(var)
                for annotation in getattr(var.tag, 'annotations', []):
                    if annotation not in seen:
                        seen.add(annotation)
                        new_avs = [
                            av for av in annotation.auxiliary_variables
                            if not (av in seen_avs or seen_avs.add(av))]
                        variables.extend(new_avs)
                        updates = dict_union(updates, annotation.updates)

        self.variables = variables
        self.updates = updates
Exemple #55
0
    def get_theano_function(self, additional_updates=None, **kwargs):
        r"""Create Theano function from the graph contained.

        Parameters
        ----------
        \*\*kwargs : dict
            key word arguments to theano.function.
            Useful for specifying specific compilation modes or profiling.

        """
        updates = self.updates
        if additional_updates:
            updates = dict_union(updates, OrderedDict(additional_updates))
        return theano.function(self.inputs, self.outputs, updates=updates, **kwargs)