def _create_start_trackers_for_augmentation(self, story_end_trackers): # type: (List[TrackerWithCachedStates]) -> TrackerLookupDict """This is where the augmentation magic happens. We will reuse all the trackers that reached the end checkpoint `None` (which is the end of a story) and start processing all steps again. So instead of starting with a fresh tracker, the second and all following phases will reuse a couple of the trackers that made their way to a story end. We need to do some cleanup before processing them again. """ next_active_trackers = defaultdict(list) if self.config.use_story_concatenation: ending_trackers = utils.subsample_array( story_end_trackers, self.config.augmentation_factor, rand=self.config.rand) for t in ending_trackers: # this is a nasty thing - all stories end and # start with action listen - so after logging the first # actions in the next phase the trackers would # contain action listen followed by action listen. # to fix this we are going to "undo" the last action listen # tracker should be copied, # otherwise original tracker is updated aug_t = t.copy() aug_t.update(ActionReverted()) next_active_trackers[STORY_START].append(aug_t) return next_active_trackers
def test_subsample_array(): t = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] # this will modify the original array and shuffle it r = utils.subsample_array(t, 5) assert len(r) == 5 assert set(r).issubset(t)
def _prepare_next_phase( active_trackers, # type: TrackerLookupDict augmentation_factor, # type: int rand # type: Random ): # type: (...) -> Dict[Optional[Text], List[FeaturizedTracker]] """One phase is one traversal of all story steps. We need to do some cleanup before processing them again.""" ending_trackers = active_trackers.get(None, []) subsampled_trackers = utils.subsample_array(ending_trackers, augmentation_factor, rand) active_trackers = {STORY_START: []} # This is where the augmentation magic happens. We # will reuse all the trackers that reached the # end checkpoint `None` (which is the end of a # story) and start processing all steps again. So instead # of starting with a fresh tracker, the second and # all following phases will reuse a couple of the trackers # that made their way to a story end. for t in subsampled_trackers: # this is a nasty thing - all stories end and # start with action listen - so after logging the first # actions in the next phase the trackers would # contain action listen followed by action listen. # to fix this we are going to "undo" the last action listen t.undo_last_action() active_trackers[STORY_START].append(t) return active_trackers
def test_subsample_array_read_only(): t = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] r = utils.subsample_array(t, 5, can_modify_incoming_array=False) assert len(r) == 5 assert set(r).issubset(t)
def _subsample_trackers(incoming_trackers, max_number_of_trackers, augmentation_factor, phase_idx, rand): # if flows get very long and have a lot of forks we # get into trouble by collecting to many trackers # hence the sub sampling if phase_idx == 0: if max_number_of_trackers is not None: return utils.subsample_array(incoming_trackers, max_number_of_trackers, rand) else: return incoming_trackers else: # after the first phase we always sample max # `augmentation_factor` samples return utils.subsample_array(incoming_trackers, augmentation_factor, rand)
def _create_start_trackers(self, active_trackers): # type: (TrackerLookupDict) -> TrackerLookupDict """One phase is one traversal of all story steps. We need to do some cleanup before processing them again.""" glue_mapping = self.story_graph.story_end_checkpoints if self.config.use_story_concatenation: glue_mapping[STORY_END] = STORY_START next_active_trackers = defaultdict(list) for end, start in glue_mapping.items(): ending_trackers = active_trackers.get(end, []) if start == STORY_START: ending_trackers = utils.subsample_array( ending_trackers, self.config.augmentation_factor, self.config.rand) # This is where the augmentation magic happens. We # will reuse all the trackers that reached the # end checkpoint `None` (which is the end of a # story) and start processing all steps again. So instead # of starting with a fresh tracker, the second and # all following phases will reuse a couple of the trackers # that made their way to a story end. for t in ending_trackers: # this is a nasty thing - all stories end and # start with action listen - so after logging the first # actions in the next phase the trackers would # contain action listen followed by action listen. # to fix this we are going to "undo" the last action listen if start == STORY_START: t.undo_last_action() next_active_trackers[start].append(t) return next_active_trackers
def _create_start_trackers_for_augmentation(self, story_end_trackers): # type: (List[TrackerWithCachedStates]) -> TrackerLookupDict """This is where the augmentation magic happens. We will reuse all the trackers that reached the end checkpoint `None` (which is the end of a story) and start processing all steps again. So instead of starting with a fresh tracker, the second and all following phases will reuse a couple of the trackers that made their way to a story end. We need to do some cleanup before processing them again. """ next_active_trackers = defaultdict(list) if self.config.use_story_concatenation: ending_trackers = utils.subsample_array( story_end_trackers, self.config.augmentation_factor, rand=self.config.rand ) for t in ending_trackers: # this is a nasty thing - all stories end and # start with action listen - so after logging the first # actions in the next phase the trackers would # contain action listen followed by action listen. # to fix this we are going to "undo" the last action listen # tracker should be copied, # otherwise original tracker is updated aug_t = t.copy() aug_t.update(ActionReverted()) next_active_trackers[STORY_START].append(aug_t) return next_active_trackers
def _subsample_trackers(self, incoming_trackers): # type: (List[DialogueStateTracker]) -> List[DialogueStateTracker] """Subsample the list of trackers to retrieve a random subset.""" # if flows get very long and have a lot of forks we # get into trouble by collecting to many trackers # hence the sub sampling if self.config.max_number_of_trackers is not None: return utils.subsample_array(incoming_trackers, self.config.max_number_of_trackers, rand=self.config.rand) else: return incoming_trackers
def _subsample_trackers( self, incoming_trackers: List[TrackerWithCachedStates], max_number_of_trackers: int) -> List[TrackerWithCachedStates]: """Subsample the list of trackers to retrieve a random subset.""" # if flows get very long and have a lot of forks we # get into trouble by collecting too many trackers # hence the sub sampling if max_number_of_trackers is not None: return utils.subsample_array(incoming_trackers, max_number_of_trackers, rand=self.config.rand) else: return incoming_trackers
def build_stories(self, domain, max_number_of_trackers=2000): # type: (Domain, NaturalLanguageInterpreter, bool, int) -> List[Story] """Build the stories of a graph.""" from rasa_core.training_utils.dsl import STORY_START, Story active_trackers = {STORY_START: [Story()]} rand = random.Random(42) for step in self.ordered_steps(): if step.start_checkpoint_name() in active_trackers: # these are the trackers that reached this story step # and that need to handle all events of the step incoming_trackers = active_trackers[ step.start_checkpoint_name()] # TODO: we can't use tracker filter here to filter for # checkpoint conditions since we don't have trackers. # this code should rather use the code from the dsl. if max_number_of_trackers is not None: incoming_trackers = utils.subsample_array( incoming_trackers, max_number_of_trackers, rand) events = step.explicit_events(domain) # need to copy the tracker as multiple story steps might # start with the same checkpoint and all of them # will use the same set of incoming trackers if events: trackers = [ Story(tracker.story_steps + [step]) for tracker in incoming_trackers ] else: trackers = [] # small optimization # update our tracker dictionary with the trackers that handled # the events of the step and that can now be used for further # story steps that start with the checkpoint this step ended on if step.end_checkpoint_name() not in active_trackers: active_trackers[step.end_checkpoint_name()] = [] active_trackers[step.end_checkpoint_name()].extend(trackers) return active_trackers[None]