Beispiel #1
0
def test_entity_type(payload) -> None:
    """
    Evaluate a set of cases from a file.
    """
    body = payload["input"]
    mock_entity_json = payload["mock_entity_json"]
    expected = payload.get("expected")
    exception = payload.get("exception")

    duckling_plugin = DucklingPlugin(
        dest="output.entities",
        dimensions=["people", "time", "date", "duration"],
        locale="en_IN",
        timezone="Asia/Kolkata",
    )

    request_callback = request_builder(mock_entity_json)
    httpretty.register_uri(httpretty.POST,
                           "http://0.0.0.0:8000/parse",
                           body=request_callback)

    workflow = Workflow([duckling_plugin])

    if expected:
        _, output = workflow.run(Input(utterances=body))
        entities = output["entities"]
        for i, entity in enumerate(entities):
            assert entity["entity_type"] == expected[i]["entity_type"]
    elif exception:
        with pytest.raises(EXCEPTIONS[exception]):
            workflow.run(Input(utterances=body))
def test_plugin_no_transform():
    mock_entity_json = [{
        "body": "today",
        "start": 0,
        "value": {
            "values": [{
                "value": "2021-09-14T00:00:00.000+05:30",
                "grain": "day",
                "type": "value",
            }],
            "value":
            "2021-09-14T00:00:00.000+05:30",
            "grain":
            "day",
            "type":
            "value",
        },
        "end": 5,
        "dim": "time",
        "latent": False,
    }]
    request_callback = request_builder(mock_entity_json, response_code=200)
    httpretty.register_uri(httpretty.POST,
                           "http://0.0.0.0:8000/parse",
                           body=request_callback)

    df = pd.DataFrame(
        [
            {
                "data": ["today"],
                "reftime": "2021-09-14T00:00:00.000+05:30",
            },
            {
                "data": ["no"],
                "reftime": "2021-09-14T00:00:00.000+05:30",
            },
        ],
        columns=["data", "reftime"],
    )

    duckling_plugin = DucklingPlugin(
        locale="en_IN",
        dimensions=["time"],
        timezone="Asia/Kolkata",
        dest="output.entities",
        threshold=0.2,
        timeout=0.01,
        use_transform=False,
        input_column="data",
        output_column="entities",
    )

    df_ = duckling_plugin.transform(df)
    assert "entities" not in df_.columns
def test_plugin_working_cases(payload) -> None:
    """
    An end-to-end example showing how to use `DucklingPlugin` with a `Workflow`.
    """
    body = payload["input"]
    mock_entity_json = payload["mock_entity_json"]
    expected_types = payload.get("expected")
    exception = payload.get("exception")
    duckling_args = payload.get("duckling")
    response_code = payload.get("response_code", 200)
    locale = payload.get("locale")
    reference_time = payload.get("reference_time")
    use_latent = payload.get("use_latent")

    duckling_plugin = DucklingPlugin(dest="output.entities", **duckling_args)

    request_callback = request_builder(mock_entity_json,
                                       response_code=response_code)
    httpretty.register_uri(httpretty.POST,
                           "http://0.0.0.0:8000/parse",
                           body=request_callback)

    workflow = Workflow([duckling_plugin])
    if isinstance(reference_time, str):
        reference_time = make_unix_ts("Asia/Kolkata")(reference_time)

    if expected_types is not None:
        input_ = Input(
            utterances=body,
            locale=locale,
            reference_time=reference_time,
            latent_entities=use_latent,
        )
        _, output = workflow.run(input_)

        if not output["entities"]:
            assert output["entities"] == []

        for i, entity in enumerate(output["entities"]):
            expected_entity_type = expected_types[i]["entity_type"]
            assert entity["entity_type"] == expected_entity_type
    else:
        with pytest.raises(EXCEPTIONS[exception]):
            input_ = Input(
                utterances=body,
                locale=locale,
                reference_time=reference_time,
                latent_entities=use_latent,
            )
            workflow.run(input_)
def test_plugin_transform_existing_entity():
    mock_entity_json = [{
        "body": "today",
        "start": 0,
        "value": {
            "values": [{
                "value": "2021-09-14T00:00:00.000+05:30",
                "grain": "day",
                "type": "value",
            }],
            "value":
            "2021-09-14T00:00:00.000+05:30",
            "grain":
            "day",
            "type":
            "value",
        },
        "end": 5,
        "dim": "time",
        "latent": False,
    }]
    request_callback = request_builder(mock_entity_json, response_code=200)
    httpretty.register_uri(httpretty.POST,
                           "http://0.0.0.0:8000/parse",
                           body=request_callback)

    df = pd.DataFrame(
        [
            {
                "data": '["today"]',
                "reftime": "2021-09-14T00:00:00.000+05:30",
            },
            {
                "data":
                '["no"]',
                "reftime":
                "2021-09-14T00:00:00.000+05:30",
                "entities": [
                    KeywordEntity(
                        range={
                            "start": 0,
                            "end": 0
                        },
                        value="apple",
                        entity_type="fruits",
                        body="apple",
                    )
                ],
            },
            {
                "data":
                '["no"]',
                "reftime":
                pd.NA,
                "entities": [
                    KeywordEntity(
                        range={
                            "start": 0,
                            "end": 0
                        },
                        value="apple",
                        entity_type="fruits",
                        body="apple",
                    )
                ],
            },
        ],
        columns=["data", "reftime", "entities"],
    )

    duckling_plugin = DucklingPlugin(
        locale="en_IN",
        dimensions=["time"],
        timezone="Asia/Kolkata",
        dest="output.entities",
        threshold=0.2,
        timeout=0.01,
        use_transform=True,
        input_column="data",
        output_column="entities",
    )

    df_ = duckling_plugin.transform(df)
    today = TimeEntity(
        entity_type="date",
        body="today",
        parsers=["DucklingPlugin"],
        range={
            "start": 0,
            "end": 5
        },
        score=1.0,
        alternative_index=0,
        latent=False,
        value="2021-09-14T00:00:00.000+05:30",
        origin="value",
        grain="day",
    )
    parsed_entity = df_["entities"].iloc[0][0]
    assert parsed_entity.value == today.value
    assert parsed_entity.type == today.type
    assert df_["entities"].iloc[1][0].value == "apple"