Example #1
0
    def _create_start_trackers_for_augmentation(self, story_end_trackers):
        # type: (List[TrackerWithCachedStates]) -> TrackerLookupDict
        """This is where the augmentation magic happens.

            We will reuse all the trackers that reached the
            end checkpoint `None` (which is the end of a
            story) and start processing all steps again. So instead
            of starting with a fresh tracker, the second and
            all following phases will reuse a couple of the trackers
            that made their way to a story end.

            We need to do some cleanup before processing them again.
        """
        next_active_trackers = defaultdict(list)

        if self.config.use_story_concatenation:
            ending_trackers = utils.subsample_array(
                story_end_trackers,
                self.config.augmentation_factor,
                rand=self.config.rand)
            for t in ending_trackers:
                # this is a nasty thing - all stories end and
                # start with action listen - so after logging the first
                # actions in the next phase the trackers would
                # contain action listen followed by action listen.
                # to fix this we are going to "undo" the last action listen

                # tracker should be copied,
                # otherwise original tracker is updated
                aug_t = t.copy()
                aug_t.update(ActionReverted())
                next_active_trackers[STORY_START].append(aug_t)

        return next_active_trackers
Example #2
0
def test_subsample_array():
    t = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    # this will modify the original array and shuffle it
    r = utils.subsample_array(t, 5)

    assert len(r) == 5
    assert set(r).issubset(t)
Example #3
0
    def _prepare_next_phase(
            active_trackers,  # type: TrackerLookupDict
            augmentation_factor,  # type: int
            rand  # type: Random
    ):
        # type: (...) -> Dict[Optional[Text], List[FeaturizedTracker]]
        """One phase is one traversal of all story steps.

        We need to do some cleanup before processing them again."""

        ending_trackers = active_trackers.get(None, [])
        subsampled_trackers = utils.subsample_array(ending_trackers,
                                                    augmentation_factor, rand)
        active_trackers = {STORY_START: []}

        # This is where the augmentation magic happens. We
        # will reuse all the trackers that reached the
        # end checkpoint `None` (which is the end of a
        # story) and start processing all steps again. So instead
        # of starting with a fresh tracker, the second and
        # all following phases will reuse a couple of the trackers
        # that made their way to a story end.
        for t in subsampled_trackers:
            # this is a nasty thing - all stories end and
            # start with action listen - so after logging the first
            # actions in the next phase the trackers would
            # contain action listen followed by action listen.
            # to fix this we are going to "undo" the last action listen
            t.undo_last_action()
            active_trackers[STORY_START].append(t)
        return active_trackers
Example #4
0
def test_subsample_array_read_only():
    t = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    r = utils.subsample_array(t, 5,
                              can_modify_incoming_array=False)

    assert len(r) == 5
    assert set(r).issubset(t)
Example #5
0
def test_subsample_array():
    t = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    # this will modify the original array and shuffle it
    r = utils.subsample_array(t, 5)

    assert len(r) == 5
    assert set(r).issubset(t)
Example #6
0
 def _subsample_trackers(incoming_trackers, max_number_of_trackers,
                         augmentation_factor, phase_idx, rand):
     # if flows get very long and have a lot of forks we
     # get into trouble by collecting to many trackers
     # hence the sub sampling
     if phase_idx == 0:
         if max_number_of_trackers is not None:
             return utils.subsample_array(incoming_trackers,
                                          max_number_of_trackers, rand)
         else:
             return incoming_trackers
     else:
         # after the first phase we always sample max
         # `augmentation_factor` samples
         return utils.subsample_array(incoming_trackers,
                                      augmentation_factor, rand)
Example #7
0
    def _create_start_trackers(self, active_trackers):
        # type: (TrackerLookupDict) -> TrackerLookupDict
        """One phase is one traversal of all story steps.

        We need to do some cleanup before processing them again."""

        glue_mapping = self.story_graph.story_end_checkpoints
        if self.config.use_story_concatenation:
            glue_mapping[STORY_END] = STORY_START

        next_active_trackers = defaultdict(list)
        for end, start in glue_mapping.items():
            ending_trackers = active_trackers.get(end, [])
            if start == STORY_START:
                ending_trackers = utils.subsample_array(
                    ending_trackers, self.config.augmentation_factor,
                    self.config.rand)

            # This is where the augmentation magic happens. We
            # will reuse all the trackers that reached the
            # end checkpoint `None` (which is the end of a
            # story) and start processing all steps again. So instead
            # of starting with a fresh tracker, the second and
            # all following phases will reuse a couple of the trackers
            # that made their way to a story end.
            for t in ending_trackers:
                # this is a nasty thing - all stories end and
                # start with action listen - so after logging the first
                # actions in the next phase the trackers would
                # contain action listen followed by action listen.
                # to fix this we are going to "undo" the last action listen
                if start == STORY_START:
                    t.undo_last_action()
                next_active_trackers[start].append(t)
        return next_active_trackers
Example #8
0
def test_subsample_array_read_only():
    t = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
    r = utils.subsample_array(t, 5,
                              can_modify_incoming_array=False)

    assert len(r) == 5
    assert set(r).issubset(t)
Example #9
0
    def _create_start_trackers_for_augmentation(self, story_end_trackers):
        # type: (List[TrackerWithCachedStates]) -> TrackerLookupDict
        """This is where the augmentation magic happens.

            We will reuse all the trackers that reached the
            end checkpoint `None` (which is the end of a
            story) and start processing all steps again. So instead
            of starting with a fresh tracker, the second and
            all following phases will reuse a couple of the trackers
            that made their way to a story end.

            We need to do some cleanup before processing them again.
        """
        next_active_trackers = defaultdict(list)

        if self.config.use_story_concatenation:
            ending_trackers = utils.subsample_array(
                    story_end_trackers,
                    self.config.augmentation_factor,
                    rand=self.config.rand
            )
            for t in ending_trackers:
                # this is a nasty thing - all stories end and
                # start with action listen - so after logging the first
                # actions in the next phase the trackers would
                # contain action listen followed by action listen.
                # to fix this we are going to "undo" the last action listen

                # tracker should be copied,
                # otherwise original tracker is updated
                aug_t = t.copy()
                aug_t.update(ActionReverted())
                next_active_trackers[STORY_START].append(aug_t)

        return next_active_trackers
Example #10
0
    def _subsample_trackers(self, incoming_trackers):
        # type: (List[DialogueStateTracker]) -> List[DialogueStateTracker]
        """Subsample the list of trackers to retrieve a random subset."""

        # if flows get very long and have a lot of forks we
        # get into trouble by collecting to many trackers
        # hence the sub sampling
        if self.config.max_number_of_trackers is not None:
            return utils.subsample_array(incoming_trackers,
                                         self.config.max_number_of_trackers,
                                         rand=self.config.rand)
        else:
            return incoming_trackers
Example #11
0
    def _subsample_trackers(self, incoming_trackers):
        # type: (List[DialogueStateTracker]) -> List[DialogueStateTracker]
        """Subsample the list of trackers to retrieve a random subset."""

        # if flows get very long and have a lot of forks we
        # get into trouble by collecting to many trackers
        # hence the sub sampling
        if self.config.max_number_of_trackers is not None:
            return utils.subsample_array(incoming_trackers,
                                         self.config.max_number_of_trackers,
                                         rand=self.config.rand)
        else:
            return incoming_trackers
Example #12
0
    def _subsample_trackers(
            self, incoming_trackers: List[TrackerWithCachedStates],
            max_number_of_trackers: int) -> List[TrackerWithCachedStates]:
        """Subsample the list of trackers to retrieve a random subset."""

        # if flows get very long and have a lot of forks we
        # get into trouble by collecting too many trackers
        # hence the sub sampling
        if max_number_of_trackers is not None:
            return utils.subsample_array(incoming_trackers,
                                         max_number_of_trackers,
                                         rand=self.config.rand)
        else:
            return incoming_trackers
Example #13
0
    def build_stories(self, domain, max_number_of_trackers=2000):
        # type: (Domain, NaturalLanguageInterpreter, bool, int) -> List[Story]
        """Build the stories of a graph."""
        from rasa_core.training_utils.dsl import STORY_START, Story

        active_trackers = {STORY_START: [Story()]}
        rand = random.Random(42)

        for step in self.ordered_steps():
            if step.start_checkpoint_name() in active_trackers:
                # these are the trackers that reached this story step
                # and that need to handle all events of the step
                incoming_trackers = active_trackers[
                    step.start_checkpoint_name()]

                # TODO: we can't use tracker filter here to filter for
                #       checkpoint conditions since we don't have trackers.
                #       this code should rather use the code from the dsl.

                if max_number_of_trackers is not None:
                    incoming_trackers = utils.subsample_array(
                        incoming_trackers, max_number_of_trackers, rand)

                events = step.explicit_events(domain)
                # need to copy the tracker as multiple story steps might
                # start with the same checkpoint and all of them
                # will use the same set of incoming trackers
                if events:
                    trackers = [
                        Story(tracker.story_steps + [step])
                        for tracker in incoming_trackers
                    ]
                else:
                    trackers = []  # small optimization

                # update our tracker dictionary with the trackers that handled
                # the events of the step and that can now be used for further
                # story steps that start with the checkpoint this step ended on
                if step.end_checkpoint_name() not in active_trackers:
                    active_trackers[step.end_checkpoint_name()] = []
                active_trackers[step.end_checkpoint_name()].extend(trackers)

        return active_trackers[None]