def test_create_train_data_no_history(default_domain): featurizer = BinaryFeaturizer() X, y = extract_training_data_from_file( DEFAULT_STORIES_FILE, augmentation_factor=0, domain=default_domain, featurizer=featurizer, max_history=1 ) assert X.shape == (11, 1, 10) decoded = [featurizer.decode(X[i, :, :], default_domain.input_features) for i in range(0, 11)] assert decoded == [ [None], [[('intent_goodbye', 1), ('prev_utter_goodbye', 1)]], [[('intent_goodbye', 1), ('prev_action_listen', 1)]], [[('intent_default', 1), ('prev_utter_default', 1)]], [[('intent_default', 1), ('prev_action_listen', 1)]], [[('intent_default', 1), ('slot_name_0', 1), ('prev_utter_default', 1)]], [[('intent_default', 1), ('slot_name_0', 1), ('prev_action_listen', 1)]], [[('intent_greet', 1), ('prev_utter_greet', 1)]], [[('intent_greet', 1), ('prev_action_listen', 1)]], [[('intent_greet', 1), ('entity_name', 1), ('slot_name_0', 1), ('prev_utter_greet', 1)]], [[('intent_greet', 1), ('entity_name', 1), ('slot_name_0', 1), ('prev_action_listen', 1)]]]
def test_binary_featurizer_uses_correct_dtype_float(): f = BinaryFeaturizer() encoded = f.encode({ "a": 1.0, "b": 0.2, "c": 0.0 }, { "a": 0, "b": 3, "c": 2, "d": 1 }) assert encoded.dtype == np.float64
def test_binary_featurizer_handles_on_non_existing_features(): f = BinaryFeaturizer() encoded = f.encode({ "a": 1.0, "b": 1.0, "c": 0.0, "e": 1.0 }, { "a": 0, "b": 3, "c": 2, "d": 1 }) assert (encoded == np.array([1, 0, 0, 1])).all()
def test_can_read_test_story(default_domain): trackers = extract_trackers_from_file("data/test_stories/stories.md", default_domain, featurizer=BinaryFeaturizer()) assert len(trackers) == 7 # this should be the story simple_story_with_only_end -> show_it_all # the generated stories are in a non stable order - therefore we need to # do some trickery to find the one we want to test tracker = [t for t in trackers if len(t.events) == 5][0] assert tracker.events[0] == ActionExecuted("action_listen") assert tracker.events[1] == UserUttered("simple", intent={ "name": "simple", "confidence": 1.0 }, parse_data={ 'text': 'simple', 'intent_ranking': [{ 'confidence': 1.0, 'name': 'simple' }], 'intent': { 'confidence': 1.0, 'name': 'simple' }, 'entities': [] }) assert tracker.events[2] == ActionExecuted("utter_default") assert tracker.events[3] == ActionExecuted("utter_greet") assert tracker.events[4] == ActionExecuted("action_listen")
def test_load_multi_file_training_data(default_domain): # the stories file in `data/test_multifile_stories` is the same as in # `data/test_stories/stories.md`, but split across multiple files data = training.extract_training_data("data/test_stories/stories.md", default_domain, featurizer=BinaryFeaturizer(), max_history=2) data_mul = training.extract_training_data("data/test_multifile_stories", default_domain, featurizer=BinaryFeaturizer(), max_history=2) assert np.all(data.X == data_mul.X) assert np.all(data.y == data_mul.y)
def test_missing_classes_filled_correctly(self, default_domain, data, tracker): # Pretend that a couple of classes are missing and check that # those classes are predicted as 0, while the other class # probabilities are predicted normally. policy = self.create_policy( featurizer=BinaryFeaturizer(), max_history=self.max_history, cv=None, ) X = data.X classes = [3, 4, 7] y = np.asarray([np.random.choice(classes) for _ in X]) data = DialogueTrainingData(X, y) policy.train(data, domain=default_domain) predicted_probabilities = policy.predict_action_probabilities( tracker, default_domain) assert len(predicted_probabilities) == 8 assert np.allclose(sum(predicted_probabilities), 1.0) for i, prob in enumerate(predicted_probabilities): if i in classes: assert prob >= 0.0 else: assert prob == 0.0
def test_persist_and_read_test_story(tmpdir, default_domain): graph = extract_story_graph_from_file("data/test_stories/stories.md", default_domain) out_path = tmpdir.join("persisted_story.md") Story(graph.story_steps).dump_to_file(out_path.strpath) recovered_trackers = extract_trackers_from_file(out_path.strpath, default_domain, BinaryFeaturizer()) existing_trackers = extract_trackers_from_file( "data/test_stories/stories.md", default_domain, BinaryFeaturizer()) existing_stories = {t.export_stories() for t in existing_trackers} for t in recovered_trackers: story_str = t.export_stories() assert story_str in existing_stories existing_stories.discard(story_str)
def train_data(max_history, domain): return extract_training_data_from_file( "data/dsl_stories/stories_defaultdomain.md", domain=domain, max_history=max_history, remove_duplicates=True, featurizer=BinaryFeaturizer())
def test_persist_and_load_empty_policy(self, tmpdir): empty_policy = self.create_policy() empty_policy.persist(tmpdir.strpath) loaded = empty_policy.__class__.load(tmpdir.strpath, BinaryFeaturizer(), empty_policy.max_history) assert loaded is not None
def test_create_train_data_with_history(default_domain): featurizer = BinaryFeaturizer() X, y = extract_training_data_from_file( "data/dsl_stories/stories_defaultdomain.md", augmentation_factor=0, domain=default_domain, featurizer=featurizer, max_history=4) reference = np.array([ [[0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 1, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 1, 0], [0, 1, 0, 0, 1, 0, 0, 0, 0]], [[0, 1, 0, 0, 1, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1, 0, 0], [0, 0, 1, 0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 1]], [[1, 0, 0, 0, 0, 0, 0, 1, 0], [0, 1, 0, 0, 1, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1, 0, 0], [0, 0, 1, 0, 1, 0, 0, 0, 0]], [[1, 0, 0, 0, 1, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 1, 0], [0, 1, 0, 0, 1, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1, 0, 0]], [[-1, -1, -1, -1, -1, -1, -1, -1, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 1, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 1, 0]], [[-1, -1, -1, -1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1, -1, -1, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0]], [[-1, -1, -1, -1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1, -1, -1, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 1, 0, 0, 0, 0]], ]) assert X.shape == reference.shape assert np.array_equal(X, reference)
def test_train_kwargs_are_set_on_model(self, default_domain, data): policy = self.create_policy( featurizer=BinaryFeaturizer(), max_history=self.max_history, cv=None, ) policy.train(data, domain=default_domain, C=123) assert policy.model.C == 123
def trained_policy(self): default_domain = TemplateDomain.load(DEFAULT_DOMAIN_PATH) policy = self.create_policy() X, y = train_data(self.max_history, default_domain) policy.max_history = self.max_history policy.featurizer = BinaryFeaturizer() policy.train(X, y, default_domain) return policy
def trained_policy(self): default_domain = TemplateDomain.load("examples/default_domain.yml") policy = self.create_policy() X, y = train_data(self.max_history, default_domain) policy.max_history = self.max_history policy.featurizer = BinaryFeaturizer() policy.train(X, y, default_domain) return policy
def test_persist_and_read_test_story_graph(tmpdir, default_domain): graph = training.extract_story_graph("data/test_stories/stories.md", default_domain) out_path = tmpdir.join("persisted_story.md") with io.open(out_path.strpath, "w") as f: f.write(graph.as_story_string()) recovered_trackers = training.extract_trackers(out_path.strpath, default_domain, BinaryFeaturizer()) existing_trackers = training.extract_trackers( "data/test_stories/stories.md", default_domain, BinaryFeaturizer()) existing_stories = {t.export_stories() for t in existing_trackers} for t in recovered_trackers: story_str = t.export_stories() assert story_str in existing_stories existing_stories.discard(story_str)
def default_processor(default_domain): ensemble = SimplePolicyEnsemble([ScoringPolicy()]) interpreter = RegexInterpreter() PolicyTrainer(ensemble, default_domain, BinaryFeaturizer()).train(DEFAULT_STORIES_FILE, max_history=3) tracker_store = InMemoryTrackerStore(default_domain) return MessageProcessor(interpreter, ensemble, default_domain, tracker_store)
def test_tracker_write_to_story(tmpdir, default_domain): tracker = tracker_from_dialogue_file("data/test_dialogues/enter_name.json", default_domain) p = tmpdir.join("export.md") tracker.export_stories_to_file(p.strpath) trackers = extract_trackers_from_file(p.strpath, default_domain, BinaryFeaturizer()) assert len(trackers) == 1 recovered = trackers[0] assert len(recovered.events) == 8 assert recovered.events[6] == SlotSet("location", "central")
def test_cv_none_does_not_trigger_search(self, mock_search, default_domain, data): policy = self.create_policy( featurizer=BinaryFeaturizer(), max_history=self.max_history, cv=None, ) policy.train(data, domain=default_domain) assert mock_search.call_count == 0 assert policy.model != 'mockmodel'
def test_tracker_write_to_story(tmpdir, default_domain): tracker = tracker_from_dialogue_file("data/test_dialogues/enter_name.json", default_domain) p = tmpdir.join("export.md") tracker.export_stories_to_file(p.strpath) trackers = training.extract_trackers(p.strpath, default_domain, BinaryFeaturizer()) assert len(trackers) == 1 recovered = trackers[0] assert len(recovered.events) == 8 assert recovered.events[6].type_name == "slot" assert recovered.events[6].key in {"location", "name"} assert recovered.events[6].value in {"central", "holger"}
def test_cv_not_none_param_grid_none_triggers_search_without_params( self, mock_search, default_domain, data): policy = self.create_policy( featurizer=BinaryFeaturizer(), max_history=self.max_history, cv=3, ) policy.train(data, domain=default_domain) assert mock_search.call_count > 0 assert mock_search.call_args_list[0][1]['cv'] == 3 assert mock_search.call_args_list[0][1]['param_grid'] == {} assert policy.model == 'mockmodel'
def test_generate_training_data_with_cycles(tmpdir, default_domain): featurizer = BinaryFeaturizer() training_data = extract_training_data_from_file( "data/test_stories/stories_with_cycle.md", default_domain, featurizer, augmentation_factor=0, max_history=4) assert training_data.num_examples() == 15 np.testing.assert_array_equal( training_data.y, [2, 4, 0, 2, 4, 0, 1, 0, 2, 4, 0, 1, 0, 0, 3])
def test_persist_and_load(self, trained_policy, default_domain, tmpdir): trained_policy.persist(tmpdir.strpath) loaded = trained_policy.__class__.load(tmpdir.strpath, trained_policy.featurizer, trained_policy.max_history) trackers = extract_trackers( DEFAULT_STORIES_FILE, default_domain, BinaryFeaturizer()) for tracker in trackers: predicted_probabilities = loaded.predict_action_probabilities( tracker, default_domain) actual_probabilities = trained_policy.predict_action_probabilities( tracker, default_domain) assert predicted_probabilities == actual_probabilities
def test_continue_training_with_unsuitable_model_raises( self, default_domain, data): policy = self.create_policy( featurizer=BinaryFeaturizer(), max_history=self.max_history, cv=None, ) policy.train(data, domain=default_domain) with pytest.raises(TypeError) as exc: policy.continue_training(data, domain=default_domain) assert exc.value.args[0] == ( "Continuing training is only possible with " "sklearn models that support 'partial_fit'.")
def test_load_training_data_handles_hidden_files(tmpdir, default_domain): # create a hidden file open(os.path.join(tmpdir.strpath, ".hidden"), 'a').close() # create a normal file normal_file = os.path.join(tmpdir.strpath, "normal_file") open(normal_file, 'a').close() data = training.extract_training_data(tmpdir.strpath, default_domain, featurizer=BinaryFeaturizer(), max_history=2) assert len(data.X) == 0 assert len(data.y) == 0
def test_message_processor(default_domain, capsys): story_filename = "data/dsl_stories/stories_defaultdomain.md" ensemble = SimplePolicyEnsemble([ScoringPolicy()]) interpreter = RegexInterpreter() PolicyTrainer(ensemble, default_domain, BinaryFeaturizer()).train(story_filename, max_history=3) tracker_store = InMemoryTrackerStore(default_domain) processor = MessageProcessor(interpreter, ensemble, default_domain, tracker_store) processor.handle_message(UserMessage("_greet", ConsoleOutputChannel())) out, _ = capsys.readouterr() assert "hey there!" in out
def test_message_processor(default_domain, capsys): story_filename = "data/dsl_stories/stories_defaultdomain.md" ensemble = SimplePolicyEnsemble([ScoringPolicy()]) interpreter = RegexInterpreter() PolicyTrainer(ensemble, default_domain, BinaryFeaturizer()).train(story_filename, max_history=3) tracker_store = InMemoryTrackerStore(default_domain) processor = MessageProcessor(interpreter, ensemble, default_domain, tracker_store) out = CollectingOutputChannel() processor.handle_message(UserMessage("_greet[name=Core]", out)) assert ("default", "hey there Core!") == out.latest_output()
def test_tracker_state_regression(default_domain): class HelloInterpreter(NaturalLanguageInterpreter): def parse(self, text): intent = "greet" if 'hello' in text else "nlu" return {"text": text, "intent": {"name": intent}, "entities": []} agent = Agent(domain, [SimplePolicy()], BinaryFeaturizer(), interpreter=HelloInterpreter()) n_actions = [] for i in range(0, 2): agent.handle_message("hello") tracker = agent.tracker_store.get_or_create_tracker('nlu') # Ensures that the tracker has changed between the utterances # (and wasn't reset in between them) expected = ("action_listen;" "_greet;utter_greet;action_listen;" "_greet;utter_greet;action_listen") assert ";".join([e.as_story_string() for e in tracker.events]) == expected
def train_data(max_history, domain): return extract_training_data_from_file(DEFAULT_STORIES_FILE, domain, BinaryFeaturizer(), max_history=max_history, remove_duplicates=True)
def test_create_train_data_with_history(default_domain): featurizer = BinaryFeaturizer() training_data = extract_training_data_from_file(DEFAULT_STORIES_FILE, default_domain, featurizer, augmentation_factor=0, max_history=4) assert training_data.X.shape == (11, 4, 10) decoded = [ featurizer.decode(training_data.X[i, :, :], default_domain.input_features) for i in range(0, 11) ] assert decoded == [[ None, [(u'intent_greet', 1), (u'prev_action_listen', 1)], [(u'intent_greet', 1), (u'prev_utter_greet', 1)], [(u'intent_default', 1), (u'prev_action_listen', 1)] ], [ None, [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1), (u'prev_action_listen', 1)], [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1), (u'prev_utter_greet', 1)], [(u'intent_default', 1), (u'slot_name_0', 1), (u'prev_action_listen', 1)] ], [[(u'intent_default', 1), (u'prev_action_listen', 1)], [(u'intent_default', 1), (u'prev_utter_default', 1)], [(u'intent_goodbye', 1), (u'prev_action_listen', 1)], [(u'intent_goodbye', 1), (u'prev_utter_goodbye', 1)]], [[(u'intent_greet', 1), (u'prev_utter_greet', 1)], [(u'intent_default', 1), (u'prev_action_listen', 1)], [(u'intent_default', 1), (u'prev_utter_default', 1)], [(u'intent_goodbye', 1), (u'prev_action_listen', 1)]], [[(u'intent_greet', 1), (u'prev_action_listen', 1)], [(u'intent_greet', 1), (u'prev_utter_greet', 1)], [(u'intent_default', 1), (u'prev_action_listen', 1)], [(u'intent_default', 1), (u'prev_utter_default', 1)]], [[(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1), (u'prev_action_listen', 1)], [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1), (u'prev_utter_greet', 1)], [(u'intent_default', 1), (u'slot_name_0', 1), (u'prev_action_listen', 1)], [(u'intent_default', 1), (u'slot_name_0', 1), (u'prev_utter_default', 1)]], [ None, None, [(u'intent_greet', 1), (u'prev_action_listen', 1)], [(u'intent_greet', 1), (u'prev_utter_greet', 1)] ], [ None, None, [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1), (u'prev_action_listen', 1)], [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1), (u'prev_utter_greet', 1)] ], [None, None, None, None], [ None, None, None, [(u'intent_greet', 1), (u'prev_action_listen', 1)] ], [ None, None, None, [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1), (u'prev_action_listen', 1)] ]]
def test_create_train_data_with_history(default_domain): featurizer = BinaryFeaturizer() X, y = extract_training_data_from_file( "data/dsl_stories/stories_defaultdomain.md", augmentation_factor=0, domain=default_domain, featurizer=featurizer, max_history=4 ) assert X.shape == (11, 4, 10) decoded = [featurizer.decode(X[i, :, :], default_domain.input_features) for i in range(0, 11)] assert decoded == [ [ None, [(u'intent_greet', 1), (u'prev_action_listen', 1)], [(u'intent_greet', 1), (u'prev_utter_greet', 1)], [(u'intent_default', 1), (u'prev_action_listen', 1)]], [ None, [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1), (u'prev_action_listen', 1)], [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1), (u'prev_utter_greet', 1)], [(u'intent_default', 1), (u'slot_name_0', 1), (u'prev_action_listen', 1)]], [ [(u'intent_default', 1), (u'prev_action_listen', 1)], [(u'intent_default', 1), (u'prev_utter_default', 1)], [(u'intent_goodbye', 1), (u'prev_action_listen', 1)], [(u'intent_goodbye', 1), (u'prev_utter_goodbye', 1)]], [ [(u'intent_greet', 1), (u'prev_utter_greet', 1)], [(u'intent_default', 1), (u'prev_action_listen', 1)], [(u'intent_default', 1), (u'prev_utter_default', 1)], [(u'intent_goodbye', 1), (u'prev_action_listen', 1)]], [ [(u'intent_greet', 1), (u'prev_action_listen', 1)], [(u'intent_greet', 1), (u'prev_utter_greet', 1)], [(u'intent_default', 1), (u'prev_action_listen', 1)], [(u'intent_default', 1), (u'prev_utter_default', 1)]], [ [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1), (u'prev_action_listen', 1)], [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1), (u'prev_utter_greet', 1)], [(u'intent_default', 1), (u'slot_name_0', 1), (u'prev_action_listen', 1)], [(u'intent_default', 1), (u'slot_name_0', 1), (u'prev_utter_default', 1)]], [ None, None, [(u'intent_greet', 1), (u'prev_action_listen', 1)], [(u'intent_greet', 1), (u'prev_utter_greet', 1)]], [ None, None, [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1), (u'prev_action_listen', 1)], [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1), (u'prev_utter_greet', 1)]], [ None, None, None, None], [ None, None, None, [(u'intent_greet', 1), (u'prev_action_listen', 1)]], [ None, None, None, [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1), (u'prev_action_listen', 1)]]]
def _create_featurizer(cls, featurizer): return featurizer if featurizer is not None else BinaryFeaturizer()