async def test_softmax_ranking( component_builder: ComponentBuilder, tmp_path: Path, classifier_params: Dict[Text, int], data_path: Text, output_length: int, ): pipeline = as_pipeline( "WhitespaceTokenizer", "CountVectorsFeaturizer", "ResponseSelector" ) assert pipeline[2]["name"] == "ResponseSelector" pipeline[2].update(classifier_params) _config = RasaNLUModelConfig({"pipeline": pipeline}) (trained_model, _, persisted_path) = await rasa.nlu.train.train( _config, path=str(tmp_path), data=data_path, component_builder=component_builder, ) loaded = Interpreter.load(persisted_path, component_builder) parse_data = loaded.parse("hello") response_ranking = parse_data.get("response_selector").get("default").get("ranking") # check that the output was correctly truncated after normalization assert len(response_ranking) == output_length
async def test_margin_loss_is_not_normalized( monkeypatch: MonkeyPatch, component_builder: ComponentBuilder, tmp_path: Path, classifier_params: Dict[Text, int], ): pipeline = as_pipeline( "WhitespaceTokenizer", "CountVectorsFeaturizer", "ResponseSelector" ) assert pipeline[2]["name"] == "ResponseSelector" pipeline[2].update(classifier_params) mock = Mock() monkeypatch.setattr(train_utils, "normalize", mock.normalize) _config = RasaNLUModelConfig({"pipeline": pipeline}) (trained_model, _, persisted_path) = await rasa.nlu.train.train( _config, path=str(tmp_path), data="data/test_selectors", component_builder=component_builder, ) loaded = Interpreter.load(persisted_path, component_builder) parse_data = loaded.parse("hello") response_ranking = parse_data.get("response_selector").get("default").get("ranking") # check that the output was not normalized mock.normalize.assert_not_called() # check that the output was correctly truncated assert len(response_ranking) == 9
async def test_cross_entropy_without_normalization( component_builder: ComponentBuilder, tmp_path: Path, classifier_params: Dict[Text, Any], prediction_min: float, prediction_max: float, output_length: int, monkeypatch: MonkeyPatch, ): pipeline = as_pipeline("WhitespaceTokenizer", "CountVectorsFeaturizer", "ResponseSelector") assert pipeline[2]["name"] == "ResponseSelector" pipeline[2].update(classifier_params) _config = RasaNLUModelConfig({"pipeline": pipeline}) (trained_model, _, persisted_path) = await train( _config, path=str(tmp_path), data="data/test_selectors", component_builder=component_builder, ) loaded = Interpreter.load(persisted_path, component_builder) mock = Mock() monkeypatch.setattr(train_utils, "normalize", mock.normalize) parse_data = loaded.parse("hello") response_ranking = parse_data.get("response_selector").get("default").get( "ranking") # check that the output was correctly truncated assert len(response_ranking) == output_length response_confidences = [ response.get("confidence") for response in response_ranking ] # check each confidence is in range confidence_in_range = [ prediction_min <= confidence <= prediction_max for confidence in response_confidences ] assert all(confidence_in_range) # normalize shouldn't have been called mock.normalize.assert_not_called()
async def test_cross_entropy_with_linear_norm( component_builder: ComponentBuilder, tmp_path: Path, classifier_params: Dict[Text, Any], output_length: int, monkeypatch: MonkeyPatch, ): pipeline = as_pipeline("WhitespaceTokenizer", "CountVectorsFeaturizer", "ResponseSelector") assert pipeline[2]["name"] == "ResponseSelector" pipeline[2].update(classifier_params) _config = RasaNLUModelConfig({"pipeline": pipeline}) (trained_model, _, persisted_path) = await rasa.nlu.train.train( _config, path=str(tmp_path), data="data/test_selectors", component_builder=component_builder, ) loaded = Interpreter.load(persisted_path, component_builder) mock = Mock() monkeypatch.setattr(train_utils, "normalize", mock.normalize) parse_data = loaded.parse("hello") response_ranking = parse_data.get("response_selector").get("default").get( "ranking") # check that the output was correctly truncated assert len(response_ranking) == output_length response_confidences = [ response.get("confidence") for response in response_ranking ] # check whether normalization had the expected effect output_sums_to_1 = sum(response_confidences) == pytest.approx(1) assert output_sums_to_1 # normalize shouldn't have been called mock.normalize.assert_not_called()