def do_apply(self, **kwargs): r"""Process a sequence attending the attended context every step. Parameters ---------- \*\*kwargs Should contain current inputs, previous step states, contexts, the preprocessed attended context, previous step glimpses. Returns ------- outputs : list of Theano variables The current step states and glimpses. """ attended = kwargs[self.attended_name] preprocessed_attended = kwargs.pop(self.preprocessed_attended_name) attended_mask = kwargs.get(self.attended_mask_name) sequences = dict_subset(kwargs, self.sequence_names, pop=True, must_have=False) states = dict_subset(kwargs, self.state_names, pop=True) glimpses = dict_subset(kwargs, self.glimpse_names, pop=True) current_glimpses = self.take_look( mask=attended_mask, return_dict=True, **dict_union( states, glimpses, {self.attended_name: attended, self.preprocessed_attended_name: preprocessed_attended})) current_states = self.compute_states( return_list=True, **dict_union(sequences, states, current_glimpses, kwargs)) return current_states + list(current_glimpses.values())
def generate(self, outputs, **kwargs): """A sequence generation step. Parameters ---------- outputs : :class:`~tensor.TensorVariable` The outputs from the previous step. Notes ----- The contexts, previous states and glimpses are expected as keyword arguments. """ states = dict_subset(kwargs, self._state_names) # masks in context are optional (e.g. `attended_mask`) contexts = dict_subset(kwargs, self._context_names, must_have=False) glimpses = dict_subset(kwargs, self._glimpse_names) next_glimpses = self.transition.take_glimpses( as_dict=True, **dict_union(states, glimpses, contexts)) next_readouts = self.readout.readout( feedback=self.readout.feedback(outputs), **dict_union(states, next_glimpses, contexts)) next_outputs = self.readout.emit(next_readouts) next_costs = self.readout.cost(next_readouts, next_outputs) next_feedback = self.readout.feedback(next_outputs) next_inputs = (self.fork.apply(next_feedback, as_dict=True) if self.fork else {'feedback': next_feedback}) next_states = self.transition.compute_states( as_list=True, **dict_union(next_inputs, states, next_glimpses, contexts)) return (next_states + [next_outputs] + list(next_glimpses.values()) + [next_costs])
def costs(self, application_call, prediction, prediction_mask=None, groundtruth=None, groundtruth_mask=None, **sequences_states_contexts): feedback = self.feedback.apply(prediction, as_dict=True) states_outputs = self.recurrent.apply( mask=prediction_mask, return_initial_states=True, as_dict=True, # Using dict_union gives us a free sanity check that # the feedback entries do not override the ones # from sequences_states_contexts **dict_union(feedback, sequences_states_contexts)) # These variables can be used to initialize the initial states of the # next batch using the last states of the current batch. for name in states_outputs: application_call.add_auxiliary_variable( states_outputs[name][-1].copy(), name=name+"_final_value") # Discard the final states for name in self.recurrent.apply.states: states_outputs[name] = states_outputs[name][:-1] # Add all states and outputs and auxiliary variables for name, variable in list(states_outputs.items()): application_call.add_auxiliary_variable( variable.copy(), name=name) # Those can potentially be used for computing the cost. sequences_contexts = dict_subset( sequences_states_contexts, self.generate.contexts, self.generate.sequences) return self.readout.costs( prediction, prediction_mask, groundtruth, groundtruth_mask, **dict_subset(dict_union(states_outputs, sequences_contexts), self.readout.costs.inputs, must_have=False))
def take_glimpses(self, **kwargs): r"""Compute glimpses with the attention mechanism. A thin wrapper over `self.attention.take_glimpses`: takes care of choosing and renaming the necessary arguments. Parameters ---------- \*\*kwargs Must contain the attended, previous step states and glimpses. Can optionaly contain the attended mask and the preprocessed attended. Returns ------- glimpses : list of :class:`~tensor.TensorVariable` Current step glimpses. """ states = dict_subset(kwargs, self._state_names, pop=True) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) glimpses_needed = dict_subset(glimpses, self.previous_glimpses_needed) result = self.attention.take_glimpses( kwargs.pop(self.attended_name), kwargs.pop(self.preprocessed_attended_name, None), kwargs.pop(self.attended_mask_name, None), **dict_union(states, glimpses_needed)) if kwargs: raise ValueError("extra args to take_glimpses: {}".format(kwargs)) return result
def compute_states(self, **kwargs): r"""Compute current states when glimpses have already been computed. Combines an application of the `distribute` that alter the sequential inputs of the wrapped transition and an application of the wrapped transition. All unknown keyword arguments go to the wrapped transition. Parameters ---------- \*\*kwargs Should contain everything what `self.transition` needs and in addition the current glimpses. Returns ------- current_states : list of :class:`~tensor.TensorVariable` Current states computed by `self.transition`. """ # Masks are not mandatory, that's why 'must_have=False' sequences = dict_subset(kwargs, self._sequence_names, pop=True, must_have=False) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) if self.add_contexts: kwargs.pop(self.attended_name) kwargs.pop(self.attended_mask_name) sequences.update(self.distribute.apply( as_dict=True, **dict_subset(dict_union(sequences, glimpses), self.distribute.apply.inputs))) current_states = self.transition.apply( iterate=False, as_list=True, **dict_union(sequences, kwargs)) return current_states
def compute_states(self, **kwargs): r"""Compute current states when glimpses have already been computed. Parameters ---------- \*\*kwargs Should contain everything what `self.transition` needs and in addition current glimpses. Returns ------- current_states : list of :class:`~tensor.TensorVariable` Current states computed by `self.transition`. """ sequences = dict_subset(kwargs, self.sequence_names, pop=True, must_have=False) states = dict_subset(kwargs, self.state_names, pop=True) glimpses = dict_subset(kwargs, self.glimpse_names, pop=True) sequences.update( self.mixer.apply(return_dict=True, **dict_subset(dict_union(sequences, glimpses), self.mixer.apply.inputs))) current_states = self.transition.apply(iterate=False, return_list=True, **dict_union( sequences, states, kwargs)) return current_states
def take_glimpses(self, **kwargs): r"""Compute glimpses with the attention mechanism. A thin wrapper over `self.attention.take_glimpses`: takes care of choosing and renaming the necessary arguments. Parameters ---------- \*\*kwargs Must contain the attended, previous step states and glimpses. Can optionaly contain the attended mask and the preprocessed attended. Returns ------- glimpses : list of :class:`~tensor.TensorVariable` Current step glimpses. """ states = dict_subset(kwargs, self._state_names, pop=True) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) glimpses_needed = dict_subset(glimpses, self.previous_glimpses_needed) result = self.attention.take_glimpses( kwargs.pop(self.attended_name), kwargs.pop(self.preprocessed_attended_name, None), kwargs.pop(self.attended_mask_name, None), **dict_union(states, glimpses_needed)) # At this point kwargs may contain additional items. # e.g. AttentionRecurrent.transition.apply.contexts return result
def do_apply(self, **kwargs): """Process a sequence attending the attended context at every step. Parameters ---------- **kwargs Should contain current inputs, previous step states, contexts, the preprocessed attended context, previous step glimpses. Returns ------- outputs : list of Theano variables The current step states and glimpses. """ attended = kwargs[self.attended_name] preprocessed_attended = kwargs.pop(self.preprocessed_attended_name) attended_mask = kwargs.get(self.attended_mask_name) sequences = dict_subset(kwargs, self.sequence_names, pop=True, must_have=False) states = dict_subset(kwargs, self.state_names, pop=True) glimpses = dict_subset(kwargs, self.glimpse_names, pop=True) current_glimpses = self.take_look( mask=attended_mask, return_dict=True, **dict_union( states, glimpses, {self.attended_name: attended, self.preprocessed_attended_name: preprocessed_attended})) current_states = self.compute_states( return_list=True, **dict_union(sequences, states, current_glimpses, kwargs)) return current_states + list(current_glimpses.values())
def do_apply(self, **kwargs): attended = kwargs[self.attended_name] preprocessed_attended = kwargs.pop(self.preprocessed_attended_name) attended_mask = kwargs.get(self.attended_mask_name) sequences = dict_subset(kwargs, self._sequence_names, pop=True, must_have=False) states = dict_subset(kwargs, self._state_names, pop=True) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) add_seqs = dict_subset(kwargs, self.add_sequences, pop=True, must_have=False) current_glimpses = self.take_glimpses( as_dict=True, **dict_union( states, glimpses, { self.attended_name: attended, self.attended_mask_name: attended_mask, self.preprocessed_attended_name: preprocessed_attended }, add_seqs)) current_states = self.compute_states(as_list=True, **dict_union( sequences, states, current_glimpses, kwargs)) return current_states + list(current_glimpses.values())
def do_apply(self, **kwargs): r"""Process a sequence attending the attended context every step. In addition to the original sequence this method also requires its preprocessed version, the one computed by the `preprocess` method of the attention mechanism. Unknown keyword arguments are passed to the wrapped transition. Parameters ---------- \*\*kwargs Should contain current inputs, previous step states, contexts, the preprocessed attended context, previous step glimpses. Returns ------- outputs : list of :class:`~tensor.TensorVariable` The current step states and glimpses. """ attended_list = kwargs[self.attended_name] preprocessed_attended_list = kwargs.pop(self.preprocessed_attended_name) attended_mask_list = kwargs.get(self.attended_mask_name) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) sequences = dict_subset(kwargs, self._sequence_names, pop=True, must_have=False) states = dict_subset(kwargs, self._state_names, pop=True) utterance_attended=self.context_transition.apply(attended_list,preprocessed_attended_list,attended_mask_list, states['states'], mask=tensor.ones([attended_list.shape[2],attended_list.shape[0]])); current_glimpses=self.take_glimpses(utterance_attended)#? current_states = self.compute_states( as_list=True, **dict_union(sequences, states, {'weighted_averages':current_glimpses}, kwargs)) return current_states + [current_glimpses]
def compute_states(self, **kwargs): """Compute current states when glimpses have already been computed. Parameters ---------- **kwargs Should contain everything what `self.transition` needs and in addition current glimpses. Returns ------- current_states : list of Theano variables Current states computed by `self.transition`. """ sequences = dict_subset(kwargs, self.sequence_names, pop=True, must_have=False) states = dict_subset(kwargs, self.state_names, pop=True) glimpses = dict_subset(kwargs, self.glimpse_names, pop=True) sequences.update(self.mixer.apply( return_dict=True, **dict_subset(dict_union(sequences, glimpses), self.mixer.apply.inputs))) current_states = self.transition.apply( iterate=False, return_list=True, **dict_union(sequences, states, kwargs)) return current_states
def do_apply(self, **kwargs): r"""Process a sequence attending the attended context every step. In addition to the original sequence this method also requires its preprocessed version, the one computed by the `preprocess` method of the attention mechanism. Unknown keyword arguments are passed to the wrapped transition. Parameters ---------- \*\*kwargs Should contain current inputs, previous step states, contexts, the preprocessed attended context, previous step glimpses. Returns ------- outputs : list of :class:`~tensor.TensorVariable` The current step states and glimpses. """ attended = kwargs[self.attended_name] preprocessed_attended = kwargs.pop(self.preprocessed_attended_name) attended_mask = kwargs.get(self.attended_mask_name) if self.add_contexts: kwargs.pop(self.attended_name) kwargs.pop(self.attended_mask_name, None) sequences = dict_subset(kwargs, self._sequence_names, pop=True, must_have=False) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) # By this time **kwargs will contain the states and the contexts # of the transition # Compute next states sequences_without_mask = { name: variable for name, variable in sequences.items() if 'mask' not in name } sequences.update( self.distribute.apply(as_dict=True, **dict_subset( dict_union(sequences_without_mask, glimpses), self.distribute.apply.inputs))) current_states = self.transition.apply(iterate=False, as_dict=True, **dict_union(sequences, kwargs)) glimpses_needed = dict_subset(glimpses, self.previous_glimpses_needed) current_glimpses = self.attention.take_glimpses( as_dict=True, **dict_union( current_states, glimpses_needed, { self.attended_name: attended, self.attended_mask_name: attended_mask, self.preprocessed_attended_name: preprocessed_attended })) return list(current_states.values()) + list(current_glimpses.values())
def cost_matrix(self, application_call, outputs, mask=None, **kwargs): """Returns generation costs for output sequences. See Also -------- :meth:`cost` : Scalar cost. """ # We assume the data has axes (time, batch, features, ...) batch_size = outputs.shape[1] # Prepare input for the iterative part states = dict_subset(kwargs, self._state_names, must_have=False) # masks in context are optional (e.g. `attended_mask`) # contexts = dict_subset(kwargs, self._context_names, must_have=False) contexts = dict_subset(kwargs, self._context_names, must_have=False) contexts['initial_state_context'] = kwargs['initial_state_context'] feedback = self.readout.feedback(outputs) inputs = self.fork.apply(feedback, as_dict=True) # Run the recurrent network results = self.transition.apply(mask=mask, return_initial_states=True, as_dict=True, **dict_union(inputs, states, contexts)) # Separate the deliverables. The last states are discarded: they # are not used to predict any output symbol. The initial glimpses # are discarded because they are not used for prediction. # Remember, glimpses are computed _before_ output stage, states are # computed after. states = {name: results[name][:-1] for name in self._state_names} glimpses = {name: results[name][1:] for name in self._glimpse_names} # Compute the cost feedback = tensor.roll(feedback, 1, 0) feedback = tensor.set_subtensor( feedback[0], self.readout.feedback(self.readout.initial_outputs(batch_size))) readouts = self.readout.readout(feedback=feedback, **dict_union(states, glimpses, contexts)) costs = self.readout.cost(readouts, outputs) if mask is not None: costs *= mask for name, variable in list(glimpses.items()) + list(states.items()): application_call.add_auxiliary_variable(variable.copy(), name=name) # This variables can be used to initialize the initial states of the # next batch using the last states of the current batch. for name in self._state_names + self._glimpse_names: application_call.add_auxiliary_variable(results[name][-1].copy(), name=name + "_final_value") return costs
def evaluate(self, application_call, outputs, mask=None, **kwargs): # We assume the data has axes (time, batch, features, ...) batch_size = outputs.shape[1] # Prepare input for the iterative part states = dict_subset(kwargs, self._state_names, must_have=False) # masks in context are optional (e.g. `attended_mask`) contexts = dict_subset(kwargs, self._context_names, must_have=False) feedback = self.readout.feedback(outputs) inputs = self.fork.apply(feedback, as_dict=True) # Run the recurrent network results = self.transition.apply( mask=mask, return_initial_states=True, as_dict=True, **dict_union(inputs, states, contexts)) # Separate the deliverables. The last states are discarded: they # are not used to predict any output symbol. The initial glimpses # are discarded because they are not used for prediction. # Remember, glimpses are computed _before_ output stage, states are # computed after. states = OrderedDict((name, results[name][:-1]) for name in self._state_names) glimpses = OrderedDict((name, results[name][1:]) for name in self._glimpse_names) # Compute the cost feedback = tensor.roll(feedback, 1, 0) feedback = tensor.set_subtensor( feedback[0], self.readout.feedback(self.readout.initial_outputs(batch_size))) # Run the language model if self.language_model: lm_states = self.language_model.evaluate( outputs=outputs, mask=mask, as_dict=True) lm_states = {'lm_' + name: value for name, value in lm_states.items()} else: lm_states = {} readouts = self.readout.readout( feedback=feedback, **dict_union(lm_states, states, glimpses, contexts)) costs = self.readout.cost(readouts, outputs) if mask is not None: costs *= mask for name, variable in list(glimpses.items()) + list(states.items()): application_call.add_auxiliary_variable( variable.copy(), name=name) # This variables can be used to initialize the initial states of the # next batch using the last states of the current batch. for name in self._state_names + self._glimpse_names: application_call.add_auxiliary_variable( results[name][-1].copy(), name=name+"_final_value") return [costs] + states.values() + glimpses.values()
def generate(self, outputs, dont_generate_new_outputs=False, **kwargs): """A sequence generation step. Parameters ---------- outputs : :class:`~tensor.TensorVariable` The outputs from the previous step. dont_generate_new_outputs : bool, optional If ``True``, the previous outputs are used instead of generated ones. It is a temporary hack for ASRU. Notes ----- The contexts, previous states and glimpses are expected as keyword arguments. """ states = dict_subset(kwargs, self._state_names) # masks in context are optional (e.g. `attended_mask`) contexts = dict_subset(kwargs, self._context_names, must_have=False) glimpses = dict_subset(kwargs, self._glimpse_names) lm_states = dict_subset(kwargs, self._lm_state_names) next_glimpses = self.transition.take_glimpses(as_dict=True, **dict_union( states, glimpses, contexts)) next_readouts = self.readout.readout( feedback=self.readout.feedback(outputs), **dict_union(states, next_glimpses, contexts, lm_states)) next_outputs = (self.readout.emit(next_readouts) if not dont_generate_new_outputs else outputs) next_costs = self.readout.cost(next_readouts, next_outputs) next_feedback = self.readout.feedback(next_outputs) next_inputs = (self.fork.apply(next_feedback, as_dict=True) if self.fork else { 'feedback': next_feedback }) next_states = self.transition.compute_states( as_list=True, **dict_union(next_inputs, states, next_glimpses, contexts)) next_lm_states = {} if self.language_model: unmangled_lm_states = { name[3:]: lm_states[name] for name in lm_states } next_lm_states = OrderedDict( zip( self._lm_state_names, self.language_model.generate( next_outputs, dont_generate_new_outputs=True, iterate=False, **unmangled_lm_states))) return (next_states + [next_outputs] + list(next_glimpses.values()) + list(next_lm_states.values()) + [next_costs])
def do_apply(self, **kwargs): r"""Process a sequence attending the attended context every step. In addition to the original sequence this method also requires its preprocessed version, the one computed by the `preprocess` method of the attention mechanism. Unknown keyword arguments are passed to the wrapped transition. Parameters ---------- \*\*kwargs Should contain current inputs, previous step states, contexts, the preprocessed attended context, previous step glimpses. Returns ------- outputs : list of :class:`~tensor.TensorVariable` The current step states and glimpses. """ attended_list = kwargs[self.attended_name] preprocessed_attended_list = kwargs.pop( self.preprocessed_attended_name) attended_mask_list = kwargs.get(self.attended_mask_name) posTag = kwargs[self.posTag_name] preprocessed_posTag = kwargs.pop(self.preprocessed_posTag_name) sequences = dict_subset(kwargs, self._sequence_names, pop=True, must_have=False) states = dict_subset(kwargs, self._state_names, pop=True) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) current_glimpses = self.take_glimpses( as_dict=True, **dict_union( states, glimpses, { self.attended_name: attended_list, self.posTag_name: posTag, self.attended_mask_name: attended_mask_list, self.preprocessed_attended_name: preprocessed_attended_list, self.preprocessed_posTag_name: preprocessed_posTag })) #the weighted averages to go through context transition GRU one by one. current_glimpses['weighted_averages'] = self.context_transition.apply( current_glimpses['weighted_averages'], tensor.ones([ current_glimpses['weighted_averages'].shape[1], current_glimpses['weighted_averages'].shape[0] ]))[-1] current_states = self.compute_states(as_list=True, **dict_union( sequences, states, current_glimpses, kwargs)) return current_states + list(current_glimpses.values())
def mixed_generate(self, return_initial_states=True, **kwargs): critic = self.generator.readout.critic groundtruth = kwargs.pop('groundtruth') groundtruth_mask = kwargs.pop('groundtruth_mask') step = kwargs.pop('step') sampling_inputs = dict_subset( kwargs, self.generator.readout.sample.inputs) actor_scores = self.generator.readout.scores(**sampling_inputs) critic_inputs = { name: kwargs['critic_' + name] for name in critic.generator.readout.merge_names} critic_outputs = critic.generator.readout.outputs( groundtruth, groundtruth_mask, **critic_inputs) epsilon = numpy.array(self.generator.readout.epsilon, dtype=theano.config.floatX) actor_probs = tensor.exp(actor_scores) # This is a poor man's 1-hot argmax critic_probs = self.softmax.apply(critic_outputs * 1000) probs = (actor_probs * (tensor.constant(1) - epsilon) + critic_probs * epsilon) x = self.theano_rng.uniform(size=(probs.shape[0],)) samples = (tensor.gt(x[:, None], tensor.cumsum(probs, axis=1)) .astype(theano.config.floatX) .sum(axis=1) .astype('int64')) samples = tensor.minimum(samples, probs.shape[1] - 1) actor_feedback = self.generator.feedback.apply(samples, as_dict=True) actor_states_contexts = dict_subset( kwargs, self.generator.recurrent.apply.states + self.generator.recurrent.apply.contexts) actor_states_outputs = self.generator.recurrent.apply( as_dict=True, iterate=False, **dict_union(actor_feedback, actor_states_contexts)) critic_feedback = critic.generator.feedback.apply(samples, as_dict=True) critic_states_contexts = { name: kwargs['critic_' + name] for name in critic.generator.recurrent.apply.states + critic.generator.recurrent.apply.contexts} critic_apply_kwargs = dict( as_dict=True, iterate=False, **dict_union(critic_feedback, critic_states_contexts)) if self.generator.readout.critic_uses_actor_states: critic_apply_kwargs['extra_inputs'] = actor_states_outputs['states'] critic_states_outputs = critic.generator.recurrent.apply(**critic_apply_kwargs) return ([samples, step + 1] + actor_states_outputs.values() + critic_states_outputs.values())
def do_apply(self, **kwargs): r"""Process a sequence attending the attended context every step. In addition to the original sequence this method also requires its preprocessed version, the one computed by the `preprocess` method of the attention mechanism. Unknown keyword arguments are passed to the wrapped transition. Parameters ---------- \*\*kwargs Should contain current inputs, previous step states, contexts, the preprocessed attended context, previous step glimpses. Returns ------- outputs : list of :class:`~tensor.TensorVariable` The current step states and glimpses. """ attended = kwargs[self.attended_name] preprocessed_attended = kwargs.pop(self.preprocessed_attended_name) attended_mask = kwargs.get(self.attended_mask_name) if self.add_contexts: kwargs.pop(self.attended_name) kwargs.pop(self.attended_mask_name, None) sequences = dict_subset(kwargs, self._sequence_names, pop=True, must_have=False) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) # By this time **kwargs will contain the states and the contexts # of the transition # Compute next states sequences_without_mask = { name: variable for name, variable in sequences.items() if 'mask' not in name} sequences.update(self.distribute.apply( as_dict=True, **dict_subset( dict_union(sequences_without_mask, glimpses), self.distribute.apply.inputs))) current_states = self.transition.apply( iterate=False, as_dict=True, **dict_union(sequences, kwargs)) glimpses_needed = dict_subset(glimpses, self.previous_glimpses_needed) current_glimpses = self.attention.take_glimpses( as_dict=True, **dict_union( current_states, glimpses_needed, {self.attended_name: attended, self.attended_mask_name: attended_mask, self.preprocessed_attended_name: preprocessed_attended})) return list(current_states.values()) + list(current_glimpses.values())
def cost_matrix(self, application_call, outputs, mask=None, **kwargs): """Returns generation costs for output sequences. See Also -------- :meth:`cost` : Scalar cost. """ # We assume the data has axes (time, batch, features, ...) batch_size = outputs.shape[1] # Prepare input for the iterative part states = dict_subset(kwargs, self._state_names, must_have=False) # masks in context are optional (e.g. `attended_mask`) contexts = dict_subset(kwargs, self._context_names, must_have=False) feedback = self.readout.feedback(outputs) inputs = self.fork.apply(feedback, as_dict=True) # Run the recurrent network results = self.transition.apply( mask=mask, return_initial_states=True, as_dict=True, **dict_union(inputs, states, contexts)) # Separate the deliverables. The last states are discarded: they # are not used to predict any output symbol. The initial glimpses # are discarded because they are not used for prediction. # Remember, glimpses are computed _before_ output stage, states are # computed after. states = {name: results[name][:-1] for name in self._state_names} glimpses = {name: results[name][1:] for name in self._glimpse_names} # Compute the cost feedback = tensor.roll(feedback, 1, 0) feedback = tensor.set_subtensor( feedback[0], self.readout.feedback(self.readout.initial_outputs(batch_size))) readouts = self.readout.readout( feedback=feedback, **dict_union(states, glimpses, contexts)) costs = self.readout.cost(readouts, outputs) if mask is not None: costs *= mask for name, variable in list(glimpses.items()) + list(states.items()): application_call.add_auxiliary_variable( variable.copy(), name=name) # This variables can be used to initialize the initial states of the # next batch using the last states of the current batch. for name in self._state_names: application_call.add_auxiliary_variable( results[name][-1].copy(), name=name+"_final_value") return costs
def compute_states(self, **kwargs): r"""Compute current states when glimpses have already been computed. Combines an application of the `distribute` that alter the sequential inputs of the wrapped transition and an application of the wrapped transition. All unknown keyword arguments go to the wrapped transition. Parameters ---------- \*\*kwargs Should contain everything what `self.transition` needs and in addition the current glimpses. Returns ------- current_states : list of :class:`~tensor.TensorVariable` Current states computed by `self.transition`. """ # make sure we are not popping the mask normal_inputs = [ name for name in self._sequence_names if 'mask' not in name ] sequences = dict_subset(kwargs, normal_inputs, pop=True) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) topical_glimpses = dict_subset(kwargs, self._topical_glimpse_names, pop=True) if self.add_contexts: kwargs.pop(self.attended_name) # attended_mask_name can be optional kwargs.pop(self.attended_mask_name, None) kwargs.pop(self.topical_attended_name) kwargs.pop(self.topical_attended_mask_name, None) sequences.update( self.distribute.apply(as_dict=True, **dict_subset( dict_union(sequences, glimpses), self.distribute.apply.inputs))) sequences.update( self.topical_distribute.apply( as_dict=True, **dict_subset(dict_union(sequences, topical_glimpses), self.topical_distribute.apply.inputs))) current_states = self.transition.apply(iterate=False, as_list=True, **dict_union(sequences, kwargs)) return current_states
def generate(self, outputs, dont_generate_new_outputs=False, **kwargs): """A sequence generation step. Parameters ---------- outputs : :class:`~tensor.TensorVariable` The outputs from the previous step. dont_generate_new_outputs : bool, optional If ``True``, the previous outputs are used instead of generated ones. It is a temporary hack for ASRU. Notes ----- The contexts, previous states and glimpses are expected as keyword arguments. """ states = dict_subset(kwargs, self._state_names) # masks in context are optional (e.g. `attended_mask`) contexts = dict_subset(kwargs, self._context_names, must_have=False) glimpses = dict_subset(kwargs, self._glimpse_names) lm_states = dict_subset(kwargs, self._lm_state_names) next_glimpses = self.transition.take_glimpses( as_dict=True, **dict_union(states, glimpses, contexts)) next_readouts = self.readout.readout( feedback=self.readout.feedback(outputs), **dict_union(states, next_glimpses, contexts, lm_states)) next_outputs = (self.readout.emit(next_readouts) if not dont_generate_new_outputs else outputs) next_costs = self.readout.cost(next_readouts, next_outputs) next_feedback = self.readout.feedback(next_outputs) next_inputs = (self.fork.apply(next_feedback, as_dict=True) if self.fork else {'feedback': next_feedback}) next_states = self.transition.compute_states( as_list=True, **dict_union(next_inputs, states, next_glimpses, contexts)) next_lm_states = {} if self.language_model: unmangled_lm_states = {name[3:]: lm_states[name] for name in lm_states} next_lm_states = OrderedDict(zip( self._lm_state_names, self.language_model.generate( next_outputs, dont_generate_new_outputs=True, iterate=False, **unmangled_lm_states))) return (next_states + [next_outputs] + list(next_glimpses.values()) + list(next_lm_states.values()) + [next_costs])
def apply(self, application, *args, **kwargs): # extra_ndim is a mandatory parameter, but in order not to # confuse with positional inputs, it has to be extracted from # **kwargs extra_ndim = kwargs.get("extra_ndim", 0) inputs = dict(zip(application.inputs, args)) inputs.update(dict_subset(kwargs, application.inputs, must_have=False)) reshaped_inputs = inputs # To prevent pollution of the computation graph with no-ops if extra_ndim > 0: for name, input_ in inputs.items(): shape, ndim = input_.shape, input_.ndim # Remember extra_dims for reshaping the outputs correctly. # Does not matter from which input, since we assume # extra dimension match for all inputs. extra_dims = shape[:extra_ndim] new_first_dim = tensor.prod(shape[: extra_ndim + 1]) new_shape = tensor.join(0, new_first_dim[None], shape[extra_ndim + 1 :]) reshaped_inputs[name] = input_.reshape(new_shape, ndim=ndim - extra_ndim) outputs = wrapped.__get__(self, None)(**reshaped_inputs) if extra_ndim == 0: return outputs reshaped_outputs = [] for output in pack(outputs): shape, ndim = output.shape, output.ndim new_shape = tensor.join(0, extra_dims, (shape[0] // tensor.prod(extra_dims))[None], shape[1:]) reshaped_outputs.append(output.reshape(new_shape, ndim=ndim + extra_ndim)) return reshaped_outputs
def apply(self, application, *args, **kwargs): # extra_ndim is a mandatory parameter, but in order not to # confuse with positional inputs, it has to be extracted from # **kwargs extra_ndim = kwargs.get('extra_ndim', 0) inputs = dict(zip(application.inputs, args)) inputs.update(dict_subset(kwargs, application.inputs, must_have=False)) reshaped_inputs = inputs # To prevent pollution of the computation graph with no-ops if extra_ndim > 0: for name, input_ in inputs.items(): shape, ndim = input_.shape, input_.ndim # Remember extra_dims for reshaping the outputs correctly. # Does not matter from which input, since we assume # extra dimension match for all inputs. extra_dims = shape[:extra_ndim] new_first_dim = tensor.prod(shape[:extra_ndim + 1]) new_shape = tensor.join( 0, new_first_dim[None], shape[extra_ndim + 1:]) reshaped_inputs[name] = input_.reshape( new_shape, ndim=ndim - extra_ndim) outputs = wrapped.__get__(self, None)(**reshaped_inputs) if extra_ndim == 0: return outputs reshaped_outputs = [] for output in pack(outputs): shape, ndim = output.shape, output.ndim new_shape = tensor.join( 0, extra_dims, (shape[0] // tensor.prod(extra_dims))[None], shape[1:]) reshaped_outputs.append( output.reshape(new_shape, ndim=ndim + extra_ndim)) return reshaped_outputs
def costs(self, prediction, prediction_mask, groundtruth, groundtruth_mask, **inputs): log_probs = self.all_scores( prediction, self.merge(**dict_subset(inputs, self.merge_names))) if not prediction_mask: prediction_mask = 1 return -(log_probs * prediction_mask).sum(axis=0)
def all_outputs(self, application_call, groundtruth, groundtruth_mask, **inputs): outputs = self.merge(**dict_subset(inputs, self.merge_names)) indices = tensor.repeat(tensor.arange(groundtruth.shape[1]), groundtruth.shape[0]) if self.value_softmax: logger.debug('Applying value softmax') outputs = (tensor.addbroadcast(outputs[:, :, :1], 2) + self.softmax.apply(outputs[:, :, 1:], extra_ndim=1)) if self.same_value_for_wrong: logger.debug('Same value for apriori wrong actions') wrong_output = outputs[:, :, 0] outputs = outputs[:, :, 1:] wrong_mask = tensor.ones_like(outputs[0]) wrong_mask = tensor.set_subtensor( wrong_mask[indices, groundtruth.T.flatten()], 0) outputs = (outputs * (1 - wrong_mask) + wrong_output[:, :, None] * wrong_mask) application_call.add_auxiliary_variable(wrong_mask, name='wrong_mask') if self.groundtruth_word_bonus: logger.debug('Bonus for grondtruth words') wrong_mask = tensor.ones_like(outputs[0]) wrong_mask = tensor.set_subtensor( wrong_mask[indices, groundtruth.T.flatten()], 0) w, = self.parameters bonuses = inputs['states'].dot(w) outputs += bonuses[:, :, None] * (1 - wrong_mask)[None, :, :] if self.dueling_outputs: logger.debug('Dueling outputs a-la dueling networks') base_output = outputs[:, :, [0]] dueling_outputs = outputs[:, :, 1:] outputs = base_output + dueling_outputs - dueling_outputs.mean( axis=2, keepdims=True) return outputs
def outputs(self, groundtruth, groundtruth_mask, **inputs): # Copy-pasted from all_outputs, because Theano does not support ellipsis outputs = self.merge(**dict_subset(inputs, self.merge_names)) indices = tensor.repeat(tensor.arange(groundtruth.shape[1]), groundtruth.shape[0]) if self.value_softmax: logger.debug('Applying value softmax') outputs = (tensor.addbroadcast(outputs[:, :1], 1) + self.softmax.apply(outputs[:, 1:])) if self.same_value_for_wrong: logger.debug('Same value for apriori wrong actions') wrong_output = outputs[:, 0] outputs = outputs[:, 1:] wrong_mask = tensor.ones_like(outputs) wrong_mask = tensor.set_subtensor( wrong_mask[indices, groundtruth.T.flatten()], 0) outputs = (outputs * (1 - wrong_mask) + wrong_output[:, None] * wrong_mask) if self.groundtruth_word_bonus: logger.debug('Bonus for grondtruth words') wrong_mask = tensor.ones_like(outputs) wrong_mask = tensor.set_subtensor( wrong_mask[indices, groundtruth.T.flatten()], 0) w, = self.parameters bonuses = inputs['states'].dot(w) outputs = outputs + bonuses[:, None] * (1 - wrong_mask) if self.dueling_outputs: logger.debug('Dueling outputs a-la dueling networks') base_output = outputs[:, [0]] dueling_outputs = outputs[:, 1:] outputs = base_output + dueling_outputs - dueling_outputs.mean( axis=1, keepdims=True) return outputs
def _push_allocation_config(self): self.attention.state_dims = self.transition.get_dims(self.state_names) self.attention.sequence_dim = self.transition.get_dim( self.attended_name) self.mixer.channel_dims = dict_subset( dict_union(self.transition.get_dims(self.sequence_names), self.attention.get_dims(self.glimpse_names)), self.mixer.apply.inputs)
def compute_steps(self, previous_steps): filtered_previous_steps = dict_subset(previous_steps, self.variables) steps, updates = self.step_rule.compute_steps(filtered_previous_steps) actual = OrderedDict( (parameter, steps[parameter]) if parameter in steps else ( parameter, previous_steps[parameter]) for parameter in previous_steps) return actual, updates
def compute_steps(self, previous_steps): filtered_previous_steps = dict_subset(previous_steps, self.variables) steps, updates = self.step_rule.compute_steps(filtered_previous_steps) actual = OrderedDict( (parameter, steps[parameter]) if parameter in steps else (parameter, previous_steps[parameter]) for parameter in previous_steps ) return actual, updates
def _push_allocation_config(self): self.attention.state_dims = self.transition.get_dims(self._state_names) self.attention.sequence_dim = self.get_dim(self.attended_name) self.distribute.source_dim = self.attention.get_dim( self.distribute.source_name) self.distribute.target_dims = dict_subset( self.transition.get_dims(self._sequence_names), self.distribute.target_names)
def _push_allocation_config(self): self.attention.state_dims = self.transition.get_dims(self.state_names) self.attention.sequence_dim = self.get_dim(self.attended_name) self.distribute.source_dim = self.attention.get_dim( self.distribute.source_name) self.distribute.target_dims = dict_subset( self.transition.get_dims(self.sequence_names), self.distribute.target_names)
def get_stream(self, part, batches=True, shuffle=True, add_sources=(), num_examples=None, rng=None, seed=None): dataset = self.get_dataset(part, add_sources=add_sources) if num_examples is None: num_examples = dataset.num_examples if shuffle: iteration_scheme = ShuffledExampleScheme(num_examples, rng=rng) else: iteration_scheme = SequentialExampleScheme(num_examples) stream = DataStream( dataset, iteration_scheme=iteration_scheme) if self.add_eos: stream = Mapping(stream, _AddLabel( self.eos_label, index=stream.sources.index(self.sources_map['labels']))) if self.add_bos: if self.bos_label is None: raise Exception('No bos label given') stream = Mapping(stream, _AddLabel( self.bos_label, append=False, times=self.add_bos, index=stream.sources.index(self.sources_map['labels']))) if self.max_length: stream = Filter(stream, self.length_filter) if self.sort_k_batches and batches: stream = Batch(stream, iteration_scheme=ConstantScheme( self.batch_size * self.sort_k_batches)) # # Hardcode 0 for source on which to sort. This will be good, as # most source lengths are correlated and, furthermore, the # labels will typically be the last source, thus in a single-input # case this sorts on input lengths # stream = Mapping(stream, SortMapping(_Length( index=0))) stream = Unpack(stream) if self.normalization: stream = self.normalization.wrap_stream(stream) stream = ForceFloatX(stream) stream = Rearrange( stream, dict_subset(self.sources_map, self.default_sources + list(add_sources))) if not batches: return stream stream = Batch( stream, iteration_scheme=ConstantScheme(self.batch_size if part == 'train' else self.validation_batch_size)) stream = Padding(stream) stream = Mapping(stream, switch_first_two_axes) stream = ForceCContiguous(stream) return stream
def take_glimpses(self, **kwargs): states = dict_subset(kwargs, self._state_names, pop=True) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) glimpses_needed = dict_subset(glimpses, self.previous_glimpses_needed) add_seqs = dict_subset(kwargs, self.add_sequences, pop=True, must_have=False) result = self.attention.take_glimpses( kwargs.pop(self.attended_name), kwargs.pop(self.preprocessed_attended_name, None), kwargs.pop(self.attended_mask_name, None), **dict_union(states, glimpses_needed, add_seqs)) # At this point kwargs may contain additional items. # e.g. AttentionRecurrent.transition.apply.contexts return result
def process_batch(self, batch): try: batch = dict_subset(batch, self.buffer_.input_names) except KeyError: reraise_as("Not all data sources required for monitoring were" " provided. The list of required data sources:" " {}.".format(self.buffer_.input_names)) if self._accumulate_fun is not None: self._accumulate_fun(**batch)
def _push_allocation_config(self): self.attention.state_dims = self.transition.get_dims(self.state_names) self.attention.sequence_dim = self.transition.get_dim( self.attended_name) self.mixer.channel_dims = dict_subset( dict_union( self.transition.get_dims(self.sequence_names), self.attention.get_dims(self.glimpse_names)), self.mixer.apply.inputs)
def generate(self, **sequences_states_contexts): sampling_inputs = dict_subset( sequences_states_contexts, self.readout.sample.inputs) samples, scores = self.readout.sample(**sampling_inputs) feedback = self.feedback.apply(samples, as_dict=True) next_states_outputs = self.recurrent.apply( as_list=True, iterate=False, **dict_union(feedback, **sequences_states_contexts)) return [samples, scores] + next_states_outputs
def apply(self, **kwargs): # Should handle both "iterate=True" and "iterate=False" extra_input = kwargs.pop(self.extra_input_name) mask = kwargs.pop('mask', None) normal_inputs = dict_subset(kwargs, self._normal_inputs, pop=True) normal_inputs = self.distribute.apply( as_dict=True, **dict_union(normal_inputs, {self.extra_input_name: extra_input})) return self.recurrent.apply(mask=mask, **dict_union(normal_inputs, kwargs))
def generate(self, **sequences_states_contexts): sampling_inputs = dict_subset(sequences_states_contexts, self.readout.sample.inputs) samples, scores = self.readout.sample(**sampling_inputs) feedback = self.feedback.apply(samples, as_dict=True) next_states_outputs = self.recurrent.apply( as_list=True, iterate=False, **dict_union(feedback, **sequences_states_contexts)) return [samples, scores] + next_states_outputs
def process_batch(self, batch): try: batch = dict_subset(batch, self.buffer_.input_names) except KeyError: reraise_as( "Not all data sources required for monitoring were" " provided. The list of required data sources:" " {}.".format(self.buffer_.input_names)) if self._accumulate_fun is not None: self._accumulate_fun(**batch)
def do_apply(self, **kwargs): r"""Process a sequence attending the attended context every step. In addition to the original sequence this method also requires its preprocessed version, the one computed by the `preprocess` method of the attention mechanism. Unknown keyword arguments are passed to the wrapped transition. Parameters ---------- \*\*kwargs Should contain current inputs, previous step states, contexts, the preprocessed attended context, previous step glimpses. Returns ------- outputs : list of :class:`~tensor.TensorVariable` The current step states and glimpses. """ attended = kwargs[self.attended_name] preprocessed_attended = kwargs.pop(self.preprocessed_attended_name) attended_mask = kwargs.get(self.attended_mask_name) sequences = dict_subset(kwargs, self.sequence_names, pop=True, must_have=False) states = dict_subset(kwargs, self.state_names, pop=True) glimpses = dict_subset(kwargs, self.glimpse_names, pop=True) current_glimpses = self.take_glimpses( return_dict=True, **dict_union( states, glimpses, { self.attended_name: attended, self.attended_mask_name: attended_mask, self.preprocessed_attended_name: preprocessed_attended })) current_states = self.compute_states(return_list=True, **dict_union( sequences, states, current_glimpses, kwargs)) return current_states + list(current_glimpses.values())
def compute_states(self, **kwargs): r"""Compute current states when glimpses have already been computed. Combines an application of the `distribute` that alter the sequential inputs of the wrapped transition and an application of the wrapped transition. All unknown keyword arguments go to the wrapped transition. Parameters ---------- \*\*kwargs Should contain everything what `self.transition` needs and in addition the current glimpses. Returns ------- current_states : list of :class:`~tensor.TensorVariable` Current states computed by `self.transition`. """ # make sure we are not popping the mask normal_inputs = [name for name in self._sequence_names if 'mask' not in name] sequences = dict_subset(kwargs, normal_inputs, pop=True) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) topical_glimpses=dict_subset(kwargs,self._topical_glimpse_names,pop=True); if self.add_contexts: kwargs.pop(self.attended_name) # attended_mask_name can be optional kwargs.pop(self.attended_mask_name, None) kwargs.pop(self.topical_attended_name) kwargs.pop(self.topical_attended_mask_name, None) sequences.update(self.distribute.apply( as_dict=True, **dict_subset(dict_union(sequences, glimpses), self.distribute.apply.inputs))) sequences.update(self.topical_distribute.apply( as_dict=True, **dict_subset(dict_union(sequences, topical_glimpses), self.topical_distribute.apply.inputs))) current_states = self.transition.apply( iterate=False, as_list=True, **dict_union(sequences, kwargs)) return current_states
def process_batch(self, batch): try: input_names = [v.name for v in self.unique_inputs] batch = dict_subset(batch, input_names) except KeyError: reraise_as("Not all data sources required for monitoring were" " provided. The list of required data sources:" " {}.".format(input_names)) if self._aggregate_fun is not None: numerical_values = self._aggregate_fun(**batch) self.monitored_quantities_buffer.aggregate_quantities( numerical_values)
def prefix_generate(self, return_initial_states=True, **kwargs): step = kwargs.pop('step') sampling_inputs = dict_subset( kwargs, self.generator.readout.sample.inputs) samples, scores = self.generator.readout.sample(**sampling_inputs) prefix_mask = tensor.lt(step, self.prefix_steps) samples = (prefix_mask * self.prefix_labels[step[0]] + (1 - prefix_mask) * samples) feedback = self.generator.feedback.apply(samples, as_dict=True) states_contexts = dict_subset( kwargs, self.generator.recurrent.apply.states + self.generator.recurrent.apply.contexts) states_outputs = self.generator.recurrent.apply( as_dict=True, iterate=False, **dict_union(feedback, states_contexts)) return ([samples, step + 1] + states_outputs.values())
def process_batch(self, batch, accumulate_dict): try: input_names = [v.name for v in self.inputs] batch = dict_subset(batch, input_names) except KeyError: reraise_as("Not all data sources required for monitoring were" " provided. The list of required data sources:" " {}.".format(input_names)) results_list = self._func(**batch) output_names = [v.name for v in self.outputs] for name, res in zip(output_names, results_list): accumulate_dict[name].append(res)
def process_batch(self, batch): try: input_names = [v.name for v in self.unique_inputs] batch = dict_subset(batch, input_names) except KeyError: reraise_as( "Not all data sources required for monitoring were" " provided. The list of required data sources:" " {}.".format(input_names)) if self._aggregate_fun is not None: numerical_values = self._aggregate_fun(**batch) self.monitored_quantities_buffer.aggregate_quantities( numerical_values)
def process_batch(self, batch): try: input_names = [v.name for v in self.unique_inputs] batch = dict_subset(batch, input_names) except KeyError: reraise_as( "Not all data sources required for monitoring were" " provided. The list of required data sources:" " {}.".format(input_names)) if self._accumulate_fun is not None: numerical_values = self._accumulate_fun(**batch) for value, var in zip(numerical_values,self.theano_variables): self.data[var.name].append(value)
def do_apply(self, **kwargs): r"""Process a sequence attending the attended context every step. In addition to the original sequence this method also requires its preprocessed version, the one computed by the `preprocess` method of the attention mechanism. Unknown keyword arguments are passed to the wrapped transition. Parameters ---------- \*\*kwargs Should contain current inputs, previous step states, contexts, the preprocessed attended context, previous step glimpses. Returns ------- outputs : list of :class:`~tensor.TensorVariable` The current step states and glimpses. """ attended = kwargs[self.attended_name] preprocessed_attended = kwargs.pop(self.preprocessed_attended_name) attended_mask = kwargs.get(self.attended_mask_name) sequences = dict_subset(kwargs, self._sequence_names, pop=True, must_have=False) states = dict_subset(kwargs, self._state_names, pop=True) glimpses = dict_subset(kwargs, self._glimpse_names, pop=True) current_glimpses = self.take_glimpses( as_dict=True, **dict_union( states, glimpses, {self.attended_name: attended, self.attended_mask_name: attended_mask, self.preprocessed_attended_name: preprocessed_attended})) current_states = self.compute_states( as_list=True, **dict_union(sequences, states, current_glimpses, kwargs)) return current_states + list(current_glimpses.values())
def process_batch(self, batch, accumulate_dict): try: input_names = [v.name for v in self.inputs] batch = dict_subset(batch, input_names) except KeyError: reraise_as( "Not all data sources required for monitoring were" " provided. The list of required data sources:" " {}.".format(input_names) ) results_list = self._func(**batch) output_names = [v.name for v in self.outputs] for name, res in zip(output_names, results_list): accumulate_dict[name].append(res)
def extract_sample(activations, data_stream, n=2000): cg = ComputationGraph(activations) input_names = [v.name for v in cg.inputs] fn = theano.function(cg.inputs, [activations]) result = None for batch in data_stream.get_epoch_iterator(as_dict=True): values = fn(**dict_subset(batch, input_names)) if result is None: result = values[0] else: result = numpy.concatenate((result, values[0])) if result.shape[0] >= n: result = result[(slice(0, n), ) + (slice(None),) * (len(result.shape) - 1)] return result
def take_look(self, **kwargs): """Compute glimpses with the attention mechanism. Parameters ---------- **kwargs Should contain contexts, previous step states and glimpses. Returns ------- glimpses : list of Theano variables Current step glimpses. """ return self.attention.take_look( kwargs[self.attended_name], kwargs.get(self.preprocessed_attended_name), **dict_subset(kwargs, self.state_names + self.previous_glimpses_needed))
def _evaluate(self): for batch in self.data_stream.get_epoch_iterator(as_dict=True): batch = dict_subset(batch, self.inputs_names) self._fun(**batch)
def search(config, params, load_path, part, decode_only, report, decoded_save, nll_only, seed): import matplotlib matplotlib.use("Agg") from matplotlib import pyplot from lvsr.notebook import show_alignment data = Data(**config['data']) search_conf = config['monitoring']['search'] logger.info("Recognizer initialization started") recognizer = create_model(config, data, load_path) recognizer.init_beam_search(search_conf['beam_size']) logger.info("Recognizer is initialized") has_uttids = 'uttids' in data.info_dataset.provides_sources add_sources = ('uttids',) if has_uttids else () dataset = data.get_dataset(part, add_sources) stream = data.get_stream(part, batches=False, shuffle=part == 'train', add_sources=add_sources, num_examples=500 if part == 'train' else None, seed=seed) it = stream.get_epoch_iterator(as_dict=True) if decode_only is not None: decode_only = eval(decode_only) weights = tensor.matrix('weights') weight_statistics = theano.function( [weights], [weights_std(weights.dimshuffle(0, 'x', 1)), monotonicity_penalty(weights.dimshuffle(0, 'x', 1))]) print_to = sys.stdout if report: alignments_path = os.path.join(report, "alignments") if not os.path.exists(report): os.mkdir(report) os.mkdir(alignments_path) print_to = open(os.path.join(report, "report.txt"), 'w') decoded_file = None if decoded_save: decoded_file = open(decoded_save, 'w') num_examples = .0 total_nll = .0 total_errors = .0 total_length = .0 total_wer_errors = .0 total_word_length = 0. if config.get('vocabulary'): with open(os.path.expandvars(config['vocabulary'])) as f: vocabulary = dict(line.split() for line in f.readlines()) def to_words(chars): words = chars.split() words = [vocabulary[word] if word in vocabulary else vocabulary['<UNK>'] for word in words] return words for number, example in enumerate(it): if decode_only and number not in decode_only: continue uttids = example.pop('uttids', None) raw_groundtruth = example.pop('labels') required_inputs = dict_subset(example, recognizer.inputs.keys()) print("Utterance {} ({})".format(number, uttids), file=print_to) groundtruth = dataset.decode(raw_groundtruth) groundtruth_text = dataset.pretty_print(raw_groundtruth, example) costs_groundtruth, weights_groundtruth = recognizer.analyze( inputs=required_inputs, groundtruth=raw_groundtruth, prediction=raw_groundtruth)[:2] weight_std_groundtruth, mono_penalty_groundtruth = weight_statistics( weights_groundtruth) total_nll += costs_groundtruth.sum() num_examples += 1 print("Groundtruth:", groundtruth_text, file=print_to) print("Groundtruth cost:", costs_groundtruth.sum(), file=print_to) print("Groundtruth weight std:", weight_std_groundtruth, file=print_to) print("Groundtruth monotonicity penalty:", mono_penalty_groundtruth, file=print_to) print("Average groundtruth cost: {}".format(total_nll / num_examples), file=print_to) if nll_only: print_to.flush() continue before = time.time() try: search_kwargs = dict( char_discount=search_conf.get('char_discount'), round_to_inf=search_conf.get('round_to_inf'), stop_on=search_conf.get('stop_on'), validate_solution_function=getattr( data.info_dataset, 'validate_solution', None)) search_kwargs = {k: v for k, v in search_kwargs.items() if v} outputs, search_costs = recognizer.beam_search( required_inputs, **search_kwargs) except CandidateNotFoundError: logger.error('Candidate not found!') outputs = [[]] search_costs = [[numpy.NaN]] took = time.time() - before recognized = dataset.decode(outputs[0]) recognized_text = dataset.pretty_print(outputs[0], example) if recognized: # Theano scan doesn't work with 0 length sequences costs_recognized, weights_recognized = recognizer.analyze( inputs=required_inputs, groundtruth=raw_groundtruth, prediction=outputs[0])[:2] weight_std_recognized, mono_penalty_recognized = weight_statistics( weights_recognized) error = min(1, wer(groundtruth, recognized)) else: error = 1 total_errors += len(groundtruth) * error total_length += len(groundtruth) if config.get('vocabulary'): wer_error = min(1, wer(to_words(groundtruth_text), to_words(recognized_text))) total_wer_errors += len(groundtruth) * wer_error total_word_length += len(groundtruth) if report and recognized: show_alignment(weights_groundtruth, groundtruth, bos_symbol=True) pyplot.savefig(os.path.join( alignments_path, "{}.groundtruth.png".format(number))) show_alignment(weights_recognized, recognized, bos_symbol=True) pyplot.savefig(os.path.join( alignments_path, "{}.recognized.png".format(number))) if decoded_file is not None: print("{} {}".format(uttids, ' '.join(recognized)), file=decoded_file) print("Decoding took:", took, file=print_to) print("Beam search cost:", search_costs[0], file=print_to) print("Recognized:", recognized_text, file=print_to) if recognized: print("Recognized cost:", costs_recognized.sum(), file=print_to) print("Recognized weight std:", weight_std_recognized, file=print_to) print("Recognized monotonicity penalty:", mono_penalty_recognized, file=print_to) print("CER:", error, file=print_to) print("Average CER:", total_errors / total_length, file=print_to) if config.get('vocabulary'): print("WER:", wer_error, file=print_to) print("Average WER:", total_wer_errors / total_word_length, file=print_to) print_to.flush()
def recurrent_apply(brick, application, application_call, *args, **kwargs): """Iterates a transition function. Parameters ---------- iterate : bool If ``True`` iteration is made. By default ``True``. reverse : bool If ``True``, the sequences are processed in backward direction. ``False`` by default. return_initial_states : bool If ``True``, initial states are included in the returned state tensors. ``False`` by default. .. todo:: * Handle `updates` returned by the :func:`theano.scan` routine. * ``kwargs`` has a random order; check if this is a problem. """ # Extract arguments related to iteration and immediately relay the # call to the wrapped function if `iterate=False` iterate = kwargs.pop('iterate', True) if not iterate: return application_function(brick, *args, **kwargs) reverse = kwargs.pop('reverse', False) return_initial_states = kwargs.pop('return_initial_states', False) # Push everything to kwargs for arg, arg_name in zip(args, arg_names): kwargs[arg_name] = arg # Make sure that all arguments for scan are tensor variables scan_arguments = (application.sequences + application.states + application.contexts) for arg in scan_arguments: if arg in kwargs: if kwargs[arg] is None: del kwargs[arg] else: kwargs[arg] = tensor.as_tensor_variable(kwargs[arg]) # Check which sequence and contexts were provided sequences_given = dict_subset(kwargs, application.sequences, must_have=False) contexts_given = dict_subset(kwargs, application.contexts, must_have=False) # Determine number of steps and batch size. if len(sequences_given): # TODO Assumes 1 time dim! shape = list(sequences_given.values())[0].shape if not iterate: batch_size = shape[0] else: n_steps = shape[0] batch_size = shape[1] else: # TODO Raise error if n_steps and batch_size not found? n_steps = kwargs.pop('n_steps') batch_size = kwargs.pop('batch_size') # Handle the rest kwargs rest_kwargs = {key: value for key, value in kwargs.items() if key not in scan_arguments} for value in rest_kwargs.values(): if (isinstance(value, Variable) and not is_shared_variable(value)): logger.warning("unknown input {}".format(value) + unknown_scan_input) # Ensure that all initial states are available. for state_name in application.states: dim = brick.get_dim(state_name) if state_name in kwargs: if isinstance(kwargs[state_name], NdarrayInitialization): kwargs[state_name] = tensor.alloc( kwargs[state_name].generate(brick.rng, (1, dim)), batch_size, dim) elif isinstance(kwargs[state_name], Application): kwargs[state_name] = ( kwargs[state_name](state_name, batch_size, *args, **kwargs)) else: # TODO init_func returns 2D-tensor, fails for iterate=False kwargs[state_name] = ( brick.initial_state(state_name, batch_size, *args, **kwargs)) assert kwargs[state_name] states_given = dict_subset(kwargs, application.states) # Theano issue 1772 for name, state in states_given.items(): states_given[name] = tensor.unbroadcast(state, *range(state.ndim)) def scan_function(*args): args = list(args) arg_names = (list(sequences_given) + [output for output in application.outputs if output in application.states] + list(contexts_given)) kwargs = dict(equizip(arg_names, args)) kwargs.update(rest_kwargs) outputs = application(iterate=False, **kwargs) # We want to save the computation graph returned by the # `application_function` when it is called inside the # `theano.scan`. application_call.inner_inputs = args application_call.inner_outputs = pack(outputs) return outputs outputs_info = [ states_given[name] if name in application.states else None for name in application.outputs] result, updates = theano.scan( scan_function, sequences=list(sequences_given.values()), outputs_info=outputs_info, non_sequences=list(contexts_given.values()), n_steps=n_steps, go_backwards=reverse) result = pack(result) if return_initial_states: # Undo Subtensor for i in range(len(states_given)): assert isinstance(result[i].owner.op, tensor.subtensor.Subtensor) result[i] = result[i].owner.inputs[0] if updates: application_call.updates = dict_union(application_call.updates, updates) return result
def scores(self, **inputs): return self.softmax.log_probabilities(self.merge( **dict_subset(inputs, self.merge_names)))