Esempio n. 1
0
async def test_softmax_ranking(
    component_builder: ComponentBuilder,
    tmp_path: Path,
    classifier_params: Dict[Text, int],
    data_path: Text,
    output_length: int,
):
    pipeline = as_pipeline(
        "WhitespaceTokenizer", "CountVectorsFeaturizer", "ResponseSelector"
    )
    assert pipeline[2]["name"] == "ResponseSelector"
    pipeline[2].update(classifier_params)

    _config = RasaNLUModelConfig({"pipeline": pipeline})
    (trained_model, _, persisted_path) = await rasa.nlu.train.train(
        _config,
        path=str(tmp_path),
        data=data_path,
        component_builder=component_builder,
    )
    loaded = Interpreter.load(persisted_path, component_builder)

    parse_data = loaded.parse("hello")
    response_ranking = parse_data.get("response_selector").get("default").get("ranking")
    # check that the output was correctly truncated after normalization
    assert len(response_ranking) == output_length
Esempio n. 2
0
async def test_margin_loss_is_not_normalized(
    monkeypatch: MonkeyPatch,
    component_builder: ComponentBuilder,
    tmp_path: Path,
    classifier_params: Dict[Text, int],
):
    pipeline = as_pipeline(
        "WhitespaceTokenizer", "CountVectorsFeaturizer", "ResponseSelector"
    )
    assert pipeline[2]["name"] == "ResponseSelector"
    pipeline[2].update(classifier_params)

    mock = Mock()
    monkeypatch.setattr(train_utils, "normalize", mock.normalize)

    _config = RasaNLUModelConfig({"pipeline": pipeline})
    (trained_model, _, persisted_path) = await rasa.nlu.train.train(
        _config,
        path=str(tmp_path),
        data="data/test_selectors",
        component_builder=component_builder,
    )
    loaded = Interpreter.load(persisted_path, component_builder)

    parse_data = loaded.parse("hello")
    response_ranking = parse_data.get("response_selector").get("default").get("ranking")

    # check that the output was not normalized
    mock.normalize.assert_not_called()

    # check that the output was correctly truncated
    assert len(response_ranking) == 9
Esempio n. 3
0
async def test_cross_entropy_without_normalization(
    component_builder: ComponentBuilder,
    tmp_path: Path,
    classifier_params: Dict[Text, Any],
    prediction_min: float,
    prediction_max: float,
    output_length: int,
    monkeypatch: MonkeyPatch,
):
    pipeline = as_pipeline("WhitespaceTokenizer", "CountVectorsFeaturizer",
                           "ResponseSelector")
    assert pipeline[2]["name"] == "ResponseSelector"
    pipeline[2].update(classifier_params)

    _config = RasaNLUModelConfig({"pipeline": pipeline})
    (trained_model, _, persisted_path) = await train(
        _config,
        path=str(tmp_path),
        data="data/test_selectors",
        component_builder=component_builder,
    )
    loaded = Interpreter.load(persisted_path, component_builder)

    mock = Mock()
    monkeypatch.setattr(train_utils, "normalize", mock.normalize)

    parse_data = loaded.parse("hello")
    response_ranking = parse_data.get("response_selector").get("default").get(
        "ranking")

    # check that the output was correctly truncated
    assert len(response_ranking) == output_length

    response_confidences = [
        response.get("confidence") for response in response_ranking
    ]

    # check each confidence is in range
    confidence_in_range = [
        prediction_min <= confidence <= prediction_max
        for confidence in response_confidences
    ]
    assert all(confidence_in_range)

    # normalize shouldn't have been called
    mock.normalize.assert_not_called()
Esempio n. 4
0
async def test_cross_entropy_with_linear_norm(
    component_builder: ComponentBuilder,
    tmp_path: Path,
    classifier_params: Dict[Text, Any],
    output_length: int,
    monkeypatch: MonkeyPatch,
):
    pipeline = as_pipeline("WhitespaceTokenizer", "CountVectorsFeaturizer",
                           "ResponseSelector")
    assert pipeline[2]["name"] == "ResponseSelector"
    pipeline[2].update(classifier_params)

    _config = RasaNLUModelConfig({"pipeline": pipeline})
    (trained_model, _, persisted_path) = await rasa.nlu.train.train(
        _config,
        path=str(tmp_path),
        data="data/test_selectors",
        component_builder=component_builder,
    )
    loaded = Interpreter.load(persisted_path, component_builder)

    mock = Mock()
    monkeypatch.setattr(train_utils, "normalize", mock.normalize)

    parse_data = loaded.parse("hello")
    response_ranking = parse_data.get("response_selector").get("default").get(
        "ranking")

    # check that the output was correctly truncated
    assert len(response_ranking) == output_length

    response_confidences = [
        response.get("confidence") for response in response_ranking
    ]

    # check whether normalization had the expected effect
    output_sums_to_1 = sum(response_confidences) == pytest.approx(1)
    assert output_sums_to_1

    # normalize shouldn't have been called
    mock.normalize.assert_not_called()