Beispiel #1
0
def test_data_merging(files):
    td_reference = load_data(files[0])
    td = load_data(files[1])
    assert len(td.entity_examples) == len(td_reference.entity_examples)
    assert len(td.intent_examples) == len(td_reference.intent_examples)
    assert len(td.training_examples) == len(td_reference.training_examples)
    assert td.intents == td_reference.intents
    assert td.entities == td_reference.entities
    assert td.entity_synonyms == td_reference.entity_synonyms
    assert td.regex_features == td_reference.regex_features
Beispiel #2
0
def test_get_nlu_file(project):
    data_file = os.path.join(project, "data/nlu.yml")
    nlu_directory = rasa.shared.data.get_nlu_directory(data_file)

    nlu_files = os.listdir(nlu_directory)

    original = load_data(data_file)
    copied = load_data(nlu_directory)

    assert nlu_files[0].endswith("nlu.yml")
    assert original.intent_examples == copied.intent_examples
Beispiel #3
0
def test_markdown_single_sections():
    td_regex_only = load_data(
        "data/test/markdown_single_sections/regex_only.md")
    assert td_regex_only.regex_features == [{
        "name": "greet",
        "pattern": r"hey[^\s]*"
    }]

    td_syn_only = load_data(
        "data/test/markdown_single_sections/synonyms_only.md")
    assert td_syn_only.entity_synonyms == {
        "Chines": "chinese",
        "Chinese": "chinese"
    }
Beispiel #4
0
def test_predict():
    model_path = train_nlu(
        nlu_data=NLU_DATA_PATH,
        config="tests/configs/sparse-naive-bayes-intent-classifier-config.yml",
        output="models",
    )

    interpreter = load_interpreter(model_path)

    # Get features from the pipeline and prepare data in the format sklearn
    # expects.
    training_data = load_data(NLU_DATA_PATH)
    for example in training_data.intent_examples:
        interpreter.featurize_message(example)
    model = interpreter.interpreter.pipeline[-1]
    X, y = model.prepare_data(training_data)

    # Fit the equivalent sklearn classifier.
    from sklearn.naive_bayes import BernoulliNB
    clf = BernoulliNB(alpha=0.1, binarize=0.0, fit_prior=True)
    clf.fit(X, y)

    # Check that predictions agree.
    assert (clf.predict_proba(X) == model.predict_prob(X)).all()
    assert (clf.predict(X) == model.predict(X)[0][:, 0]).all()
Beispiel #5
0
def test_repeated_entities(tmp_path):
    data = """
{
  "rasa_nlu_data": {
    "common_examples" : [
      {
        "text": "book a table today from 3 to 6 for 3 people",
        "intent": "unk",
        "entities": [
          {
            "entity": "description",
            "start": 35,
            "end": 36,
            "value": "3"
          }
        ]
      }
    ]
  }
}"""
    f = tmp_path / "tmp_training_data.json"
    f.write_text(data, rasa.shared.utils.io.DEFAULT_ENCODING)
    td = load_data(str(f))
    assert len(td.entity_examples) == 1
    example = td.entity_examples[0]
    entities = example.get("entities")
    assert len(entities) == 1
    tokens = WhitespaceTokenizer().tokenize(example, attribute=TEXT)
    start, end = MitieEntityExtractor.find_entity(entities[0],
                                                  example.get(TEXT), tokens)
    assert start == 9
    assert end == 10
Beispiel #6
0
def test_multiword_entities(tmp_path):
    data = """
{
  "rasa_nlu_data": {
    "common_examples" : [
      {
        "text": "show me flights to New York City",
        "intent": "unk",
        "entities": [
          {
            "entity": "destination",
            "start": 19,
            "end": 32,
            "value": "New York City"
          }
        ]
      }
    ]
  }
}"""
    f = tmp_path / "tmp_training_data.json"
    f.write_text(data, rasa.shared.utils.io.DEFAULT_ENCODING)
    td = load_data(str(f))
    assert len(td.entity_examples) == 1
    example = td.entity_examples[0]
    entities = example.get("entities")
    assert len(entities) == 1
    tokens = WhitespaceTokenizer().tokenize(example, attribute=TEXT)
    start, end = MitieEntityExtractor.find_entity(entities[0],
                                                  example.get(TEXT), tokens)
    assert start == 4
    assert end == 7
Beispiel #7
0
def test_nonascii_entities(tmp_path):
    data = """
{
  "luis_schema_version": "5.0",
  "utterances" : [
    {
      "text": "I am looking for a ßäæ ?€ö) item",
      "intent": "unk",
      "entities": [
        {
          "entity": "description",
          "startPos": 19,
          "endPos": 26
        }
      ]
    }
  ]
}"""
    f = tmp_path / "tmp_training_data.json"
    f.write_text(data, rasa.shared.utils.io.DEFAULT_ENCODING)
    td = load_data(str(f))
    assert len(td.entity_examples) == 1
    example = td.entity_examples[0]
    entities = example.get("entities")
    assert len(entities) == 1
    entity = entities[0]
    assert entity["value"] == "ßäæ ?€ö)"
    assert entity["start"] == 19
    assert entity["end"] == 27
    assert entity["entity"] == "description"
Beispiel #8
0
def test_dialogflow_data():
    td = load_data("data/examples/dialogflow/")
    assert not td.is_empty()
    assert len(td.entity_examples) == 5
    assert len(td.intent_examples) == 24
    assert len(td.training_examples) == 24
    assert len(td.lookup_tables) == 2
    assert td.intents == {"affirm", "goodbye", "hi", "inform"}
    assert td.entities == {"cuisine", "location"}
    non_trivial_synonyms = {
        k: v
        for k, v in td.entity_synonyms.items() if k != v
    }
    assert non_trivial_synonyms == {
        "mexico": "mexican",
        "china": "chinese",
        "india": "indian",
    }
    # The order changes based on different computers hence the grouping
    assert {td.lookup_tables[0]["name"], td.lookup_tables[1]["name"]} == {
        "location",
        "cuisine",
    }
    assert {
        len(td.lookup_tables[0]["elements"]),
        len(td.lookup_tables[1]["elements"]),
    } == {4, 6}
Beispiel #9
0
def test_lookup_table_md():
    lookup_fname = "data/test/lookup_tables/plates.txt"
    td_lookup = load_data("data/test/lookup_tables/lookup_table.md")
    assert not td_lookup.is_empty()
    assert len(td_lookup.lookup_tables) == 1
    assert td_lookup.lookup_tables[0]["name"] == "plates"
    assert td_lookup.lookup_tables[0]["elements"] == lookup_fname
Beispiel #10
0
def training_data_from_paths(paths: Iterable[Text],
                             language: Text) -> TrainingData:
    from rasa.shared.nlu.training_data import loading

    training_data_sets = [
        loading.load_data(nlu_file, language) for nlu_file in paths
    ]
    return TrainingData().merge(*training_data_sets)
Beispiel #11
0
def test_markdown_empty_section():
    data = load_data("data/test/markdown_single_sections/empty_section.md")
    assert data.regex_features == [{"name": "greet", "pattern": r"hey[^\s]*"}]

    assert not data.entity_synonyms
    assert len(data.lookup_tables) == 1
    assert data.lookup_tables[0]["name"] == "chinese"
    assert "Chinese" in data.lookup_tables[0]["elements"]
    assert "Chines" in data.lookup_tables[0]["elements"]
Beispiel #12
0
def test_wit_data():
    td = load_data("data/examples/wit/demo-flights.json")
    assert not td.is_empty()
    assert len(td.entity_examples) == 4
    assert len(td.intent_examples) == 1
    assert len(td.training_examples) == 4
    assert td.entity_synonyms == {}
    assert td.intents == {"flight_booking"}
    assert td.entities == {"location", "datetime"}
def getUtterance(intent_):
    train_data = load_data(dataFile)
    training_examples = OrderedDict()
    INTENT = 'intent'
    for example in [e.as_dict_nlu() for e in train_data.training_examples]:
        intent = example[INTENT]
        training_examples.setdefault(intent, [])
        training_examples[intent].append(example)
    return training_examples[intent_][0]['text']
Beispiel #14
0
def test_luis_data():
    td = load_data("data/examples/luis/demo-restaurants_v5.json")

    assert not td.is_empty()
    assert len(td.entity_examples) == 8
    assert len(td.intent_examples) == 28
    assert len(td.training_examples) == 28
    assert td.entity_synonyms == {}
    assert td.intents == {"affirm", "goodbye", "greet", "inform"}
    assert td.entities == {"location", "cuisine"}
Beispiel #15
0
def test_training_data_conversion(tmpdir, data_file, gold_standard_file,
                                  output_format, language):
    out_path = tmpdir.join("rasa_nlu_data.json")
    convert_training_data(data_file, out_path.strpath, output_format, language)
    td = load_data(out_path.strpath, language)
    assert td.entity_examples != []
    assert td.intent_examples != []

    gold_standard = load_data(gold_standard_file, language)
    cmp_message_list(td.entity_examples, gold_standard.entity_examples)
    cmp_message_list(td.intent_examples, gold_standard.intent_examples)
    assert td.entity_synonyms == gold_standard.entity_synonyms

    # converting the converted file back to original
    # file format and performing the same tests
    rto_path = tmpdir.join("data_in_original_format.txt")
    convert_training_data(out_path.strpath, rto_path.strpath, "json", language)
    rto = load_data(rto_path.strpath, language)
    cmp_message_list(gold_standard.entity_examples, rto.entity_examples)
    cmp_message_list(gold_standard.intent_examples, rto.intent_examples)
    assert gold_standard.entity_synonyms == rto.entity_synonyms
Beispiel #16
0
async def train(
    nlu_config: Union[Text, Dict, RasaNLUModelConfig],
    data: Union[Text, "TrainingDataImporter"],
    path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    storage: Optional[Text] = None,
    component_builder: Optional[ComponentBuilder] = None,
    training_data_endpoint: Optional[EndpointConfig] = None,
    persist_nlu_training_data: bool = False,
    model_to_finetune: Optional[Interpreter] = None,
    **kwargs: Any,
) -> Tuple[Trainer, Interpreter, Optional[Text]]:
    """Loads the trainer and the data and runs the training of the model."""
    from rasa.shared.importers.importer import TrainingDataImporter

    if not isinstance(nlu_config, RasaNLUModelConfig):
        nlu_config = config.load(nlu_config)

    # Ensure we are training a model that we can save in the end
    # WARN: there is still a race condition if a model with the same name is
    # trained in another subprocess
    trainer = Trainer(
        nlu_config, component_builder, model_to_finetune=model_to_finetune
    )
    persistor = create_persistor(storage)
    if training_data_endpoint is not None:
        training_data = await load_data_from_endpoint(
            training_data_endpoint, nlu_config.language
        )
    elif isinstance(data, TrainingDataImporter):
        training_data = await data.get_nlu_data(nlu_config.language)
    else:
        training_data = load_data(data, nlu_config.language)

    training_data.print_stats()
    if training_data.entity_roles_groups_used():
        rasa.shared.utils.common.mark_as_experimental_feature(
            "Entity Roles and Groups feature"
        )

    interpreter = trainer.train(training_data, **kwargs)

    if path:
        persisted_path = trainer.persist(
            path, persistor, fixed_model_name, persist_nlu_training_data
        )
    else:
        persisted_path = None

    return trainer, interpreter, persisted_path
Beispiel #17
0
def test_spacy_intent_featurizer(spacy_nlp_component: SpacyNLP,
                                 spacy_model: SpacyModel):
    td = loading.load_data("data/examples/rasa/demo-rasa.json")
    spacy_nlp_component.process_training_data(td, spacy_model)
    spacy_featurizer = create_spacy_featurizer({})
    spacy_featurizer.process_training_data(td)

    intent_features_exist = np.array([
        True if example.get("intent_features") is not None else False
        for example in td.intent_examples
    ])

    # no intent features should have been set
    assert not any(intent_features_exist)
Beispiel #18
0
def test_composite_entities_data():
    td = load_data("data/test/demo-rasa-composite-entities.yml")
    assert not td.is_empty()
    assert len(td.entity_examples) == 11
    assert len(td.intent_examples) == 29
    assert len(td.training_examples) == 29
    assert td.entity_synonyms == {"SF": "San Fransisco"}
    assert td.intents == {"order_pizza", "book_flight", "chitchat", "affirm"}
    assert td.entities == {"location", "topping", "size"}
    assert td.entity_groups == {"1", "2"}
    assert td.entity_roles == {"to", "from"}
    assert td.number_of_examples_per_entity["entity 'location'"] == 8
    assert td.number_of_examples_per_entity["group '1'"] == 9
    assert td.number_of_examples_per_entity["role 'from'"] == 3
Beispiel #19
0
async def _convert_nlu_training_data(
    in_path: Text, out_path: Text, language: Text,
):
    if rasa.shared.data.is_likely_yaml_file(out_path):
        from rasa.shared.nlu.training_data.loading import load_data
        from rasa.shared.nlu.training_data.formats.rasa_yaml import RasaYAMLWriter

        training_data = load_data(in_path, language)
        RasaYAMLWriter().dump(out_path, training_data)
    else:
        from rasa.nlu.convert import convert_training_data

        convert_training_data(
            in_path, out_path, Path(out_path).suffix.replace('.', ''), language,
        )
Beispiel #20
0
async def load_data_from_endpoint(
    data_endpoint: EndpointConfig, language: Optional[Text] = "en"
) -> "TrainingData":
    """Load training data from a URL."""
    import requests

    if not utils.is_url(data_endpoint.url):
        raise requests.exceptions.InvalidURL(data_endpoint.url)
    try:
        response = await data_endpoint.request("get")
        response.raise_for_status()
        temp_data_file = io_utils.create_temporary_file(response.content, mode="w+b")
        training_data = load_data(temp_data_file, language)

        return training_data
    except Exception as e:
        logger.warning(f"Could not retrieve training data from URL:\n{e}")
Beispiel #21
0
def test_spacy_intent_featurizer(spacy_nlp_component):
    from rasa.nlu.featurizers.dense_featurizer.spacy_featurizer import SpacyFeaturizer

    td = loading.load_data("data/examples/rasa/demo-rasa.json")
    spacy_nlp_component.train(td, config=None)
    spacy_featurizer = SpacyFeaturizer()
    spacy_featurizer.train(td, config=None)

    intent_features_exist = np.array(
        [
            True if example.get("intent_features") is not None else False
            for example in td.intent_examples
        ]
    )

    # no intent features should have been set
    assert not any(intent_features_exist)
Beispiel #22
0
def split_nlu_data(args: argparse.Namespace) -> None:
    """Load data from a file path and split the NLU data into test and train examples.

    Args:
        args: Commandline arguments
    """
    from rasa.shared.nlu.training_data.loading import load_data
    from rasa.shared.nlu.training_data.util import get_file_format

    data_path = rasa.cli.utils.get_validated_path(args.nlu, "nlu", DEFAULT_DATA_PATH)
    data_path = rasa.shared.data.get_nlu_directory(data_path)

    nlu_data = load_data(data_path)
    fformat = get_file_format(data_path)

    train, test = nlu_data.train_test_split(args.training_fraction, args.random_seed)

    train.persist(args.out, filename=f"training_data.{fformat}")
    test.persist(args.out, filename=f"test_data.{fformat}")
Beispiel #23
0
def test_custom_attributes(tmp_path):
    data = """
{
  "rasa_nlu_data": {
    "common_examples" : [
      {
        "intent": "happy",
        "text": "I'm happy.",
        "sentiment": 0.8
      }
    ]
  }
}"""
    f = tmp_path / "tmp_training_data.json"
    f.write_text(data, rasa.shared.utils.io.DEFAULT_ENCODING)
    td = load_data(str(f))
    assert len(td.training_examples) == 1
    example = td.training_examples[0]
    assert example.get("sentiment") == 0.8
Beispiel #24
0
def test_entities_synonyms(tmp_path):
    data = """
{
  "rasa_nlu_data": {
    "entity_synonyms": [
      {
        "value": "nyc",
        "synonyms": ["New York City", "nyc", "the big apple"]
      }
    ],
    "common_examples" : [
      {
        "text": "show me flights to New York City",
        "intent": "unk",
        "entities": [
          {
            "entity": "destination",
            "start": 19,
            "end": 32,
            "value": "NYC"
          }
        ]
      },
      {
        "text": "show me flights to nyc",
        "intent": "unk",
        "entities": [
          {
            "entity": "destination",
            "start": 19,
            "end": 22,
            "value": "nyc"
          }
        ]
      }
    ]
  }
}"""
    f = tmp_path / "tmp_training_data.json"
    f.write_text(data, rasa.shared.utils.io.DEFAULT_ENCODING)
    td = load_data(str(f))
    assert td.entity_synonyms["New York City"] == "nyc"
Beispiel #25
0
def test_spacy_featurizer_casing(spacy_nlp):

    # if this starts failing for the default model, we should think about
    # removing the lower casing the spacy nlp component does when it
    # retrieves vectors. For compressed spacy models (e.g. models
    # ending in _sm) this test will most likely fail.

    ftr = create_spacy_featurizer({})

    td = loading.load_data("data/examples/rasa/demo-rasa.json")
    for e in td.intent_examples:
        doc = spacy_nlp(e.get(TEXT))
        doc_capitalized = spacy_nlp(e.get(TEXT).capitalize())

        vecs = ftr._features_for_doc(doc)
        vecs_capitalized = ftr._features_for_doc(doc_capitalized)

        assert np.allclose(
            vecs, vecs_capitalized,
            atol=1e-5), "Vectors are unequal for texts '{}' and '{}'".format(
                e.text, e.text.capitalize())
Beispiel #26
0
    async def get_nlu_data(self, language: Optional[Text] = "en") -> TrainingData:
        fake_data_count = self.DEFAULT_FAKE_DATA_COUNT

        for importer in self.config["importers"]:
            if importer.get("name") == "rasam.PlaceholderImporter":
                fake_data_count = importer.get("fake_data_count", self.DEFAULT_FAKE_DATA_COUNT)

        faker_ = faker.Faker()
        faker_.seed_instance(fake_data_count)

        training_data = [loading.load_data(nlu_file, language) for nlu_file in self._nlu_files]

        new_training_data = []

        for data in training_data:
            training_examples = []
            example: Message
            for example in data.training_examples:
                if example.get("intent"):
                    matches = [i async for i in self.find_placeholders(example.data.get("text"))]
                    if matches:
                        async for new_message in self.replace_placeholders(example, faker_, matches, fake_data_count):
                            training_examples.append(new_message)
                    else:
                        training_examples.append(example)
                else:
                    training_examples.append(example)
            new_training_data.append(
                TrainingData(
                    training_examples, data.entity_synonyms, data.regex_features, data.lookup_tables, data.responses
                )
            )

        merged_training_data = TrainingData().merge(*new_training_data)
        merged_training_data._fill_response_phrases()
        return merged_training_data
Beispiel #27
0
from rasa.shared.nlu.training_data.loading import load_data
from rasa.nlu.model import Trainer
from rasa.nlu import config

data_path = 'data/nlu_textual_queries.md'
# data_path = 'data/nlu.md'

training_data = load_data(data_path)
trainer = Trainer(config.load())
trainer.train(training_data)

directory = '../data/models/rasa/nlu'
trainer.persist(directory)
Beispiel #28
0
async def test_adjusting_layers_incremental_training(
    create_response_selector: Callable[[Dict[Text, Any]], ResponseSelector],
    load_response_selector: Callable[[Dict[Text, Any]], ResponseSelector],
    train_and_preprocess: Callable[..., Tuple[TrainingData,
                                              List[GraphComponent]]],
    process_message: Callable[..., Message],
):
    """Tests adjusting sparse layers of `ResponseSelector` to increased sparse
    feature sizes during incremental training.

    Testing is done by checking the layer sizes.
    Checking if they were replaced correctly is also important
    and is done in `test_replace_dense_for_sparse_layers`
    in `test_rasa_layers.py`.
    """
    iter1_data_path = "data/test_incremental_training/iter1/"
    iter2_data_path = "data/test_incremental_training/"
    pipeline = [
        {
            "component": WhitespaceTokenizer
        },
        {
            "component": LexicalSyntacticFeaturizer
        },
        {
            "component": RegexFeaturizer
        },
        {
            "component": CountVectorsFeaturizer
        },
        {
            "component": CountVectorsFeaturizer,
            "analyzer": "char_wb",
            "min_ngram": 1,
            "max_ngram": 4,
        },
    ]
    training_data, loaded_pipeline = train_and_preprocess(
        pipeline, iter1_data_path)
    response_selector = create_response_selector({EPOCHS: 1})
    response_selector.train(training_data=training_data)

    old_data_signature = response_selector.model.data_signature
    old_predict_data_signature = response_selector.model.predict_data_signature

    message = Message(data={TEXT: "Rasa is great!"})
    message = process_message(loaded_pipeline, message)

    message2 = copy.deepcopy(message)

    classified_message = response_selector.process([message])[0]

    old_sparse_feature_sizes = classified_message.get_sparse_feature_sizes(
        attribute=TEXT)

    initial_rs_layers = response_selector.model._tf_layers[
        "sequence_layer.text"]._tf_layers["feature_combining"]
    initial_rs_sequence_layer = initial_rs_layers._tf_layers[
        "sparse_dense.sequence"]._tf_layers["sparse_to_dense"]
    initial_rs_sentence_layer = initial_rs_layers._tf_layers[
        "sparse_dense.sentence"]._tf_layers["sparse_to_dense"]

    initial_rs_sequence_size = initial_rs_sequence_layer.get_kernel().shape[0]
    initial_rs_sentence_size = initial_rs_sentence_layer.get_kernel().shape[0]
    assert initial_rs_sequence_size == sum(
        old_sparse_feature_sizes[FEATURE_TYPE_SEQUENCE])
    assert initial_rs_sentence_size == sum(
        old_sparse_feature_sizes[FEATURE_TYPE_SENTENCE])

    loaded_selector = load_response_selector({EPOCHS: 1})

    classified_message2 = loaded_selector.process([message2])[0]

    assert classified_message2.fingerprint() == classified_message.fingerprint(
    )

    training_data2, loaded_pipeline2 = train_and_preprocess(
        pipeline, iter2_data_path)

    response_selector.train(training_data=training_data2)

    new_message = Message.build(text="Rasa is great!")
    new_message = process_message(loaded_pipeline2, new_message)

    classified_new_message = response_selector.process([new_message])[0]
    new_sparse_feature_sizes = classified_new_message.get_sparse_feature_sizes(
        attribute=TEXT)

    final_rs_layers = response_selector.model._tf_layers[
        "sequence_layer.text"]._tf_layers["feature_combining"]
    final_rs_sequence_layer = final_rs_layers._tf_layers[
        "sparse_dense.sequence"]._tf_layers["sparse_to_dense"]
    final_rs_sentence_layer = final_rs_layers._tf_layers[
        "sparse_dense.sentence"]._tf_layers["sparse_to_dense"]

    final_rs_sequence_size = final_rs_sequence_layer.get_kernel().shape[0]
    final_rs_sentence_size = final_rs_sentence_layer.get_kernel().shape[0]
    assert final_rs_sequence_size == sum(
        new_sparse_feature_sizes[FEATURE_TYPE_SEQUENCE])
    assert final_rs_sentence_size == sum(
        new_sparse_feature_sizes[FEATURE_TYPE_SENTENCE])
    # check if the data signatures were correctly updated
    new_data_signature = response_selector.model.data_signature
    new_predict_data_signature = response_selector.model.predict_data_signature
    iter2_data = load_data(iter2_data_path)
    expected_sequence_lengths = len([
        message for message in iter2_data.training_examples
        if message.get(INTENT_RESPONSE_KEY)
    ])

    def test_data_signatures(
        new_signature: Dict[Text, Dict[Text, List[FeatureArray]]],
        old_signature: Dict[Text, Dict[Text, List[FeatureArray]]],
    ):
        # Wherever attribute / feature_type signature is not
        # expected to change, directly compare it to old data signature.
        # Else compute its expected signature and compare
        attributes_expected_to_change = [TEXT]
        feature_types_expected_to_change = [
            FEATURE_TYPE_SEQUENCE,
            FEATURE_TYPE_SENTENCE,
        ]

        for attribute, signatures in new_signature.items():

            for feature_type, feature_signatures in signatures.items():

                if feature_type == "sequence_lengths":
                    assert feature_signatures[
                        0].units == expected_sequence_lengths

                elif feature_type not in feature_types_expected_to_change:
                    assert feature_signatures == old_signature.get(
                        attribute).get(feature_type)
                else:
                    for index, feature_signature in enumerate(
                            feature_signatures):
                        if (feature_signature.is_sparse and attribute
                                in attributes_expected_to_change):
                            assert feature_signature.units == sum(
                                new_sparse_feature_sizes.get(feature_type))
                        else:
                            # dense signature or attributes that are not
                            # expected to change can be compared directly
                            assert (
                                feature_signature.units == old_signature.get(
                                    attribute).get(feature_type)[index].units)

    test_data_signatures(new_data_signature, old_data_signature)
    test_data_signatures(new_predict_data_signature,
                         old_predict_data_signature)
Beispiel #29
0
def test_lookup_table_yaml():
    td_lookup = load_data("data/test/lookup_tables/lookup_table.yml")
    assert not td_lookup.is_empty()
    assert len(td_lookup.lookup_tables) == 1
    assert td_lookup.lookup_tables[0]["name"] == "plates"
    assert len(td_lookup.lookup_tables[0]["elements"]) == 5
def test_nlu_training_data_provider(
    default_model_storage: ModelStorage,
    default_execution_context: ExecutionContext,
    config_path: Text,
    nlu_data_path: Text,
):
    # create a resource and an importer
    resource = Resource("xy")
    importer = TrainingDataImporter.load_from_config(
        config_path=config_path, training_data_paths=[nlu_data_path])

    # check the default configuration is as expected
    config_1 = NLUTrainingDataProvider.get_default_config()
    assert config_1["language"] is None
    assert config_1["persist"] is False

    # create a provider with persist == True
    provider_1 = NLUTrainingDataProvider.create(
        {
            "language": "en",
            "persist": True
        },
        default_model_storage,
        resource,
        default_execution_context,
    )
    assert isinstance(provider_1, NLUTrainingDataProvider)

    # check the data provided is as expected
    data_0 = provider_1.provide(importer)
    data_1 = importer.get_nlu_data(language="en")
    assert data_0.fingerprint() == data_1.fingerprint()

    # check the data was persisted
    with default_model_storage.read_from(resource) as resource_directory:
        data_file = os.path.join(str(resource_directory),
                                 DEFAULT_TRAINING_DATA_OUTPUT_PATH)
        data = load_data(resource_name=data_file, language="en")
        assert os.path.isfile(data_file)
        assert isinstance(data, TrainingData)

        # delete the persisted data
        os.remove(data_file)
        assert not os.path.isfile(data_file)

    # create a provider with persist == False
    provider_2 = NLUTrainingDataProvider.create(
        {
            "language": "en",
            "persist": False
        },
        default_model_storage,
        resource,
        default_execution_context,
    )
    provider_2.provide(importer)

    # check the data was not persisted
    with default_model_storage.read_from(resource) as resource_directory:
        data_file = os.path.join(str(resource_directory),
                                 DEFAULT_TRAINING_DATA_OUTPUT_PATH)
        assert not os.path.isfile(data_file)