Ejemplo n.º 1
0
def load_data(
    resource_name: Union[Text, "TrainingDataImporter"],
    domain: "Domain",
    remove_duplicates: bool = True,
    unique_last_num_states: Optional[int] = None,
    augmentation_factor: int = 50,
    tracker_limit: Optional[int] = None,
    use_story_concatenation: bool = True,
    debug_plots: bool = False,
    exclusion_percentage: Optional[int] = None,
) -> List["TrackerWithCachedStates"]:
    """
    Load training data from a resource.

    Args:
        resource_name: resource to load the data from. either a path or an importer
        domain: domain used for loading
        remove_duplicates: should duplicated training examples be removed?
        unique_last_num_states: number of states in a conversation that make the
            a tracker unique (this is used to identify duplicates)
        augmentation_factor:
            by how much should the story training data be augmented
        tracker_limit:
            maximum number of trackers to generate during augmentation
        use_story_concatenation:
            should stories be concatenated when doing data augmentation
        debug_plots:
            generate debug plots during loading
        exclusion_percentage:
            how much data to exclude

    Returns:
        list of loaded trackers
    """
    from rasa.shared.core.generator import TrainingDataGenerator
    from rasa.shared.importers.importer import TrainingDataImporter

    if resource_name:
        if isinstance(resource_name, TrainingDataImporter):
            graph = resource_name.get_stories(
                exclusion_percentage=exclusion_percentage)
        else:
            graph = extract_story_graph(
                resource_name,
                domain,
                exclusion_percentage=exclusion_percentage)

        g = TrainingDataGenerator(
            graph,
            domain,
            remove_duplicates,
            unique_last_num_states,
            augmentation_factor,
            tracker_limit,
            use_story_concatenation,
            debug_plots,
        )
        return g.generate()
    else:
        return []
Ejemplo n.º 2
0
def visualize_stories(
    story_steps: List[StoryStep],
    domain: Domain,
    output_file: Optional[Text],
    max_history: int,
    nlu_training_data: Optional["TrainingData"] = None,
    should_merge_nodes: bool = True,
    fontsize: int = 12,
) -> "networkx.MultiDiGraph":
    """Given a set of stories, generates a graph visualizing the flows in the stories.

    Visualization is always a trade off between making the graph as small as
    possible while
    at the same time making sure the meaning doesn't change to "much". The
    algorithm will
    compress the graph generated from the stories to merge nodes that are
    similar. Hence,
    the algorithm might create paths through the graph that aren't actually
    specified in the
    stories, but we try to minimize that.

    Output file defines if and where a file containing the plotted graph
    should be stored.

    The history defines how much 'memory' the graph has. This influences in
    which situations the
    algorithm will merge nodes. Nodes will only be merged if they are equal
    within the history, this
    means the larger the history is we take into account the less likely it
    is we merge any nodes.

    The training data parameter can be used to pass in a Rasa NLU training
    data instance. It will
    be used to replace the user messages from the story file with actual
    messages from the training data.
    """
    story_graph = StoryGraph(story_steps)

    g = TrainingDataGenerator(
        story_graph,
        domain,
        use_story_concatenation=False,
        tracker_limit=100,
        augmentation_factor=0,
    )
    completed_trackers = g.generate()
    event_sequences = [t.events for t in completed_trackers]

    graph = visualize_neighborhood(
        None,
        event_sequences,
        output_file,
        max_history,
        nlu_training_data,
        should_merge_nodes,
        max_distance=1,
        fontsize=fontsize,
    )
    return graph
Ejemplo n.º 3
0
    def provide(self, story_graph: StoryGraph,
                domain: Domain) -> List[TrackerWithCachedStates]:
        """Generates the training trackers from the training data.

        Args:
            story_graph: The story graph containing the test stories and rules.
            domain: The domain of the model.

        Returns:
            The trackers which can be used to train dialogue policies.
        """
        generator = TrainingDataGenerator(story_graph, domain, **self._config)
        return generator.generate()