Ejemplo n.º 1
0
def test_persist_and_read_test_story(tmpdir, default_domain):
    graph = training.extract_story_graph("data/test_stories/stories.md",
                                         default_domain)
    out_path = tmpdir.join("persisted_story.md")
    Story(graph.story_steps).dump_to_file(out_path.strpath)

    recovered_trackers = training.load_data(
        out_path.strpath,
        default_domain,
        use_story_concatenation=False,
        tracker_limit=1000,
        remove_duplicates=False
    )
    existing_trackers = training.load_data(
        "data/test_stories/stories.md",
        default_domain,
        use_story_concatenation=False,
        tracker_limit=1000,
        remove_duplicates=False
    )
    existing_stories = {t.export_stories() for t in existing_trackers}
    for t in recovered_trackers:
        story_str = t.export_stories()
        assert story_str in existing_stories
        existing_stories.discard(story_str)
Ejemplo n.º 2
0
def load_data(
        resource_name,  # type: Text
        domain,  # type: Domain
        remove_duplicates=True,  # type: bool
        unique_last_num_states=None,  # type: Optional[int]
        augmentation_factor=20,  # type: int
        max_number_of_trackers=None,  # deprecated
        tracker_limit=None,  # type: Optional[int]
        use_story_concatenation=True,  # type: bool
        debug_plots=False  # type: bool
):
    # type: (...) -> List[DialogueStateTracker]
    from rasa_core.training import extract_story_graph
    from rasa_core.training.generator import TrainingDataGenerator

    if resource_name:
        graph = extract_story_graph(resource_name, domain)

        g = TrainingDataGenerator(graph, domain, remove_duplicates,
                                  unique_last_num_states, augmentation_factor,
                                  max_number_of_trackers, tracker_limit,
                                  use_story_concatenation, debug_plots)
        return g.generate()
    else:
        return []
Ejemplo n.º 3
0
def test_persist_and_read_test_story_graph(tmpdir, default_domain):
    graph = training.extract_story_graph("data/test_stories/stories.md",
                                         default_domain)
    out_path = tmpdir.join("persisted_story.md")
    with io.open(out_path.strpath, "w", encoding="utf-8") as f:
        f.write(graph.as_story_string())

    recovered_trackers = training.load_data(
        out_path.strpath,
        default_domain,
        use_story_concatenation=False,
        tracker_limit=1000,
        remove_duplicates=False
    )
    existing_trackers = training.load_data(
        "data/test_stories/stories.md",
        default_domain,
        use_story_concatenation=False,
        tracker_limit=1000,
        remove_duplicates=False
    )

    existing_stories = {t.export_stories() for t in existing_trackers}
    for t in recovered_trackers:
        story_str = t.export_stories()
        assert story_str in existing_stories
        existing_stories.discard(story_str)
Ejemplo n.º 4
0
def load_data(
    resource_name: Text,
    domain: 'Domain',
    remove_duplicates: bool = True,
    unique_last_num_states: Optional[int] = None,
    augmentation_factor: int = 20,
    tracker_limit: Optional[int] = None,
    use_story_concatenation: bool = True,
    debug_plots=False,
    exclusion_percentage: int = None
) -> List['DialogueStateTracker']:
    from rasa_core.training import extract_story_graph
    from rasa_core.training.generator import TrainingDataGenerator

    if resource_name:
        graph = extract_story_graph(resource_name, domain,
                                    exclusion_percentage=exclusion_percentage)

        g = TrainingDataGenerator(graph, domain,
                                  remove_duplicates,
                                  unique_last_num_states,
                                  augmentation_factor,
                                  tracker_limit,
                                  use_story_concatenation,
                                  debug_plots)
        return g.generate()
    else:
        return []
Ejemplo n.º 5
0
def load_data(
        resource_name,  # type: Text
        domain,  # type: Domain
        remove_duplicates=True,  # type: bool
        unique_last_num_states=None,  # type: Optional[int]
        augmentation_factor=20,  # type: int
        tracker_limit=None,  # type: Optional[int]
        use_story_concatenation=True,  # type: bool
        debug_plots=False  # type: bool
):
    # type: (...) -> List[DialogueStateTracker]
    from rasa_core.training import extract_story_graph
    from rasa_core.training.generator import TrainingDataGenerator

    if resource_name:
        graph = extract_story_graph(resource_name, domain)

        g = TrainingDataGenerator(graph, domain,
                                  remove_duplicates,
                                  unique_last_num_states,
                                  augmentation_factor,
                                  tracker_limit,
                                  use_story_concatenation,
                                  debug_plots)
        return g.generate()
    else:
        return []
Ejemplo n.º 6
0
def test_persist_and_read_test_story(tmpdir, default_domain):
    graph = training.extract_story_graph("data/test_stories/stories.md",
                                         default_domain)
    out_path = tmpdir.join("persisted_story.md")
    Story(graph.story_steps).dump_to_file(out_path.strpath)

    recovered_trackers = training.load_data(
            out_path.strpath,
            default_domain,
            use_story_concatenation=False,
            tracker_limit=1000,
            remove_duplicates=False
    )
    existing_trackers = training.load_data(
            "data/test_stories/stories.md",
            default_domain,
            use_story_concatenation=False,
            tracker_limit=1000,
            remove_duplicates=False
    )
    existing_stories = {t.export_stories() for t in existing_trackers}
    for t in recovered_trackers:
        story_str = t.export_stories()
        assert story_str in existing_stories
        existing_stories.discard(story_str)
Ejemplo n.º 7
0
def _generate_trackers(resource_name, agent, max_stories=None, use_e2e=False):
    story_graph = training.extract_story_graph(resource_name, agent.domain,
                                               agent.interpreter, use_e2e)
    g = TrainingDataGenerator(story_graph, agent.domain,
                              use_story_concatenation=False,
                              augmentation_factor=0,
                              tracker_limit=max_stories)
    return g.generate()
Ejemplo n.º 8
0
def test_read_story_file_with_cycles(tmpdir, default_domain):
    graph = training.extract_story_graph(
        "data/test_stories/stories_with_cycle.md", default_domain)

    assert len(graph.story_steps) == 5

    graph_without_cycles = graph.with_cycles_removed()

    assert graph.cyclic_edge_ids != set()
    assert graph_without_cycles.cyclic_edge_ids == set()

    assert len(graph.story_steps) == len(graph_without_cycles.story_steps) == 5

    assert len(graph_without_cycles.story_end_checkpoints) == 2
Ejemplo n.º 9
0
def test_read_story_file_with_cycles(tmpdir, default_domain):
    graph = training.extract_story_graph(
            "data/test_stories/stories_with_cycle.md", default_domain)

    assert len(graph.story_steps) == 5

    graph_without_cycles = graph.with_cycles_removed()

    assert graph.cyclic_edge_ids != set()
    assert graph_without_cycles.cyclic_edge_ids == set()

    assert len(graph.story_steps) == len(graph_without_cycles.story_steps) == 5

    assert len(graph_without_cycles.story_end_checkpoints) == 2
Ejemplo n.º 10
0
def test_persist_and_read_test_story(tmpdir, default_domain):
    graph = training.extract_story_graph("data/test_stories/stories.md",
                                         default_domain)
    out_path = tmpdir.join("persisted_story.md")
    Story(graph.story_steps).dump_to_file(out_path.strpath)

    recovered_trackers = training.extract_trackers(out_path.strpath,
                                                   default_domain,
                                                   BinaryFeaturizer())
    existing_trackers = training.extract_trackers(
        "data/test_stories/stories.md", default_domain, BinaryFeaturizer())
    existing_stories = {t.export_stories() for t in existing_trackers}
    for t in recovered_trackers:
        story_str = t.export_stories()
        assert story_str in existing_stories
        existing_stories.discard(story_str)
Ejemplo n.º 11
0
def test_visualize_training_data_graph(tmpdir, default_domain):
    graph = training.extract_story_graph(
                "data/test_stories/stories_with_cycle.md", default_domain)

    graph = graph.with_cycles_removed()

    out_path = tmpdir.join("graph.png").strpath

    # this will be the plotted networkx graph
    G = graph.visualize(out_path)

    assert os.path.exists(out_path)

    # we can't check the exact topology - but this should be enough to ensure
    # the visualisation created a sane graph
    assert set(G.nodes()) == set(range(-1, 14))
    assert len(G.edges()) == 16
Ejemplo n.º 12
0
def test_persist_and_read_test_story_graph(tmpdir, default_domain):
    graph = training.extract_story_graph("data/test_stories/stories.md",
                                         default_domain)
    out_path = tmpdir.join("persisted_story.md")
    with io.open(out_path.strpath, "w") as f:
        f.write(graph.as_story_string())

    recovered_trackers = training.extract_trackers(out_path.strpath,
                                                   default_domain,
                                                   BinaryFeaturizer())
    existing_trackers = training.extract_trackers(
        "data/test_stories/stories.md", default_domain, BinaryFeaturizer())

    existing_stories = {t.export_stories() for t in existing_trackers}
    for t in recovered_trackers:
        story_str = t.export_stories()
        assert story_str in existing_stories
        existing_stories.discard(story_str)
Ejemplo n.º 13
0
def test_visualize_training_data_graph(tmpdir, default_domain):
    graph = training.extract_story_graph(
                "data/test_stories/stories_with_cycle.md", default_domain)

    graph = graph.with_cycles_removed()

    out_path = tmpdir.join("graph.png").strpath

    # this will be the plotted networkx graph
    G = graph.visualize(out_path)

    assert os.path.exists(out_path)

    # we can't check the exact topology - but this should be enough to ensure
    # the visualisation created a sane graph
    assert (set(G.nodes()) == set(range(-1, 13)) or
            set(G.nodes()) == set(range(-1, 14)))
    if set(G.nodes()) == set(range(-1, 13)):
        assert len(G.edges()) == 14
    elif set(G.nodes()) == set(range(-1, 14)):
        assert len(G.edges()) == 16
Ejemplo n.º 14
0
def collect_story_predictions(resource_name, policy_model_path, nlu_model_path,
                              max_stories):
    """Test the stories from a file, running them through the stored model."""

    if nlu_model_path is not None:
        interpreter = RasaNLUInterpreter(model_directory=nlu_model_path)
    else:
        interpreter = RegexInterpreter()

    agent = Agent.load(policy_model_path, interpreter=interpreter)
    story_graph = training.extract_story_graph(resource_name, agent.domain,
                                               interpreter)
    preds = []
    actual = []

    g = TrainingDataGenerator(story_graph,
                              agent.domain,
                              use_story_concatenation=False,
                              tracker_limit=max_stories)
    completed_trackers = g.generate()

    failed_stories = []

    logger.info("Evaluating {} stories\nProgress:"
                "".format(len(completed_trackers)))

    for tracker in tqdm(completed_trackers):
        sender_id = "default-" + uuid.uuid4().hex
        story = {"predicted": [], "actual": []}
        events = list(tracker.events)
        actions_between_utterances = []
        last_prediction = []

        for i, event in enumerate(events[1:]):
            if isinstance(event, UserUttered):
                p, a = align_lists(last_prediction, actions_between_utterances)
                story["predicted"].extend(p)
                story["actual"].extend(a)
                actions_between_utterances = []
                agent.handle_message(event.text, sender_id=sender_id)
                tracker = agent.tracker_store.retrieve(sender_id)
                last_prediction = actions_since_last_utterance(tracker)

            elif isinstance(event, ActionExecuted):
                actions_between_utterances.append(event.action_name)

        if last_prediction:

            preds.extend(last_prediction)
            preds_padding = (len(actions_between_utterances) -
                             len(last_prediction))

            story["predicted"].extend(["None"] * preds_padding)
            preds.extend(story["predicted"])

            actual.extend(actions_between_utterances)
            actual_padding = (len(last_prediction) -
                              len(actions_between_utterances))

            story["actual"].extend(["None"] * actual_padding)
            actual.extend(story["actual"])

        if story["predicted"] != story["actual"]:
            failed_stories.append(story)

    return actual, preds, failed_stories
Ejemplo n.º 15
0
def collect_story_predictions(resource_name,
                              policy_model_path,
                              nlu_model_path,
                              max_stories=None,
                              shuffle_stories=True):
    """Test the stories from a file, running them through the stored model."""

    if nlu_model_path is not None:
        interpreter = RasaNLUInterpreter(model_directory=nlu_model_path)
    else:
        interpreter = RegexInterpreter()

    agent = Agent.load(policy_model_path, interpreter=interpreter)
    story_graph = training.extract_story_graph(resource_name, agent.domain,
                                               interpreter)
    preds = []
    actual = []

    max_history = agent.policy_ensemble.policies[0].max_history

    g = TrainingsDataGenerator(story_graph,
                               agent.domain,
                               agent.featurizer,
                               max_history=max_history,
                               use_story_concatenation=False,
                               tracker_limit=100)
    data = g.generate()

    completed_trackers = data.metadata["trackers"]
    logger.info("Evaluating {} stories\nProgress:".format(
        len(completed_trackers)))

    for tracker in tqdm(completed_trackers):
        sender_id = "default-" + uuid.uuid4().hex

        events = list(tracker.events)
        actions_between_utterances = []
        last_prediction = []

        for i, event in enumerate(events[1:]):
            if isinstance(event, UserUttered):
                p, a = align_lists(last_prediction, actions_between_utterances)
                preds.extend(p)
                actual.extend(a)

                actions_between_utterances = []
                agent.handle_message(event.text, sender_id=sender_id)
                tracker = agent.tracker_store.retrieve(sender_id)
                last_prediction = actions_since_last_utterance(tracker)

            elif isinstance(event, ActionExecuted):
                actions_between_utterances.append(event.action_name)

        if last_prediction:
            preds.extend(last_prediction)
            preds_padding = len(actions_between_utterances) - \
                            len(last_prediction)
            preds.extend(["None"] * preds_padding)

            actual.extend(actions_between_utterances)
            actual_padding = len(last_prediction) - \
                             len(actions_between_utterances)
            actual.extend(["None"] * actual_padding)

    return actual, preds
Ejemplo n.º 16
0
def collect_story_predictions(resource_name, policy_model_path, nlu_model_path,
                              max_stories):
    """Test the stories from a file, running them through the stored model."""

    if nlu_model_path is not None:
        interpreter = RasaNLUInterpreter(model_directory=nlu_model_path)
    else:
        interpreter = RegexInterpreter()

    agent = Agent.load(policy_model_path, interpreter=interpreter)
    story_graph = training.extract_story_graph(resource_name, agent.domain,
                                               interpreter)
    preds = []
    actual = []

    g = TrainingDataGenerator(story_graph, agent.domain,
                              use_story_concatenation=False,
                              tracker_limit=max_stories)
    completed_trackers = g.generate()

    failed_stories = []

    logger.info("Evaluating {} stories\nProgress:"
                "".format(len(completed_trackers)))

    for tracker in tqdm(completed_trackers):
        sender_id = "default-" + uuid.uuid4().hex
        story = {"predicted": [], "actual": []}
        events = list(tracker.events)
        actions_between_utterances = []
        last_prediction = []

        for i, event in enumerate(events[1:]):
            if isinstance(event, UserUttered):
                p, a = align_lists(last_prediction, actions_between_utterances)
                story["predicted"].extend(p)
                story["actual"].extend(a)
                actions_between_utterances = []
                agent.handle_message(event.text, sender_id=sender_id)
                tracker = agent.tracker_store.retrieve(sender_id)
                last_prediction = actions_since_last_utterance(tracker)

            elif isinstance(event, ActionExecuted):
                actions_between_utterances.append(event.action_name)

        if last_prediction:

            preds.extend(last_prediction)
            preds_padding = (len(actions_between_utterances) -
                             len(last_prediction))

            story["predicted"].extend(["None"] * preds_padding)
            preds.extend(story["predicted"])

            actual.extend(actions_between_utterances)
            actual_padding = (len(last_prediction) -
                              len(actions_between_utterances))

            story["actual"].extend(["None"] * actual_padding)
            actual.extend(story["actual"])

        if story["predicted"] != story["actual"]:
            failed_stories.append(story)

    return actual, preds, failed_stories