def test_duckling_entity_extractor_and_synonyms(component_builder): _config = RasaNLUModelConfig({"pipeline": [{"name": "ner_duckling"}]}) _config.set_component_attr("ner_duckling", dimensions=["number"]) duckling = component_builder.create_component("ner_duckling", _config) synonyms = component_builder.create_component("ner_synonyms", _config) message = Message("He was 6 feet away") duckling.process(message) # checks that the synonym processor can handle entities that have int values synonyms.process(message) assert message is not None
def test_duckling_entity_extractor(component_builder): _config = RasaNLUModelConfig({"pipeline": [{"name": "ner_duckling"}]}) _config.set_component_attr("ner_duckling", dimensions=["time"]) duckling = component_builder.create_component("ner_duckling", _config) message = Message("Today is the 5th of May. Let us meet tomorrow.") duckling.process(message) entities = message.get("entities") assert len(entities) == 3 # Test duckling with a defined date # 1381536182000 == 2013/10/12 02:03:02 message = Message("Let us meet tomorrow.", time="1381536182000") duckling.process(message) entities = message.get("entities") assert len(entities) == 1 assert entities[0]["text"] == "tomorrow" assert entities[0]["value"] == "2013-10-13T00:00:00.000Z"
def create_component(self, component_config: Dict[Text, Any], cfg: RasaNLUModelConfig) -> Component: """Tries to retrieve a component from the cache, calls `create` to create a new component.""" from rasa_nlu import registry from rasa_nlu.model import Metadata try: component, cache_key = self.__get_cached_component( component_config, Metadata(cfg.as_dict(), None)) if component is None: component = registry.create_component_by_config( component_config, cfg) self.__add_to_cache(component, cache_key) return component except MissingArgumentError as e: # pragma: no cover raise Exception("Failed to create component `{}`. " "{}".format(component_config['name'], e))
def test_spacy_ner_extractor(component_builder, spacy_nlp): _config = RasaNLUModelConfig({"pipeline": [{"name": "SpacyEntityExtractor"}]}) ext = component_builder.create_component(_config.for_component(0), _config) example = Message("anywhere in the West", { "intent": "restaurant_search", "entities": [], "spacy_doc": spacy_nlp("anywhere in the west")}) ext.process(example, spacy_nlp=spacy_nlp) assert len(example.get("entities", [])) == 1 assert example.get("entities")[0] == { 'start': 16, 'extractor': 'SpacyEntityExtractor', 'end': 20, 'value': 'West', 'entity': 'LOC', 'confidence': None} # Test dimension filtering includes only specified dimensions example = Message("anywhere in the West with Sebastian Thrun", { "intent": "example_intent", "entities": [], "spacy_doc": spacy_nlp("anywhere in the West with Sebastian Thrun")}) _config = RasaNLUModelConfig({"pipeline": [{"name": "SpacyEntityExtractor"}]}) _config.set_component_attr(0, dimensions=["PERSON"]) ext = component_builder.create_component(_config.for_component(0), _config) ext.process(example, spacy_nlp=spacy_nlp) assert len(example.get("entities", [])) == 1 assert example.get("entities")[0] == { 'start': 26, 'extractor': 'SpacyEntityExtractor', 'end': 41, 'value': 'Sebastian Thrun', 'entity': 'PERSON', 'confidence': None}
import logging from rasa_core.agent import Agent from rasa_core.policies.keras_policy import KerasPolicy from rasa_core.policies.memoization import MemoizationPolicy from rasa_core.policies.form_policy import FormPolicy import warnings import ruamel.yaml as yaml warnings.simplefilter('ignore', yaml.error.UnsafeLoaderWarning) logging.basicConfig(level='INFO') ''' training the nlu ''' args1 = {"pipeline": "tensorflow_embedding"} conf1 = RasaNLUModelConfig(args1) trainer1 = Trainer(conf1) #nlu for agent 1 training_data1 = load_data("./data2/nlu.md") Interpreter1 = trainer1.train(training_data1) model_directory1 = trainer1.persist('./models', fixed_model_name="ner_a2") #core for agent1 domain_file = "domain2.yml" training_data_file = './data2/stories.md' model_path = './models/dialogue_agent_2' agent = Agent(domain_file, policies=[ MemoizationPolicy(max_history=3), KerasPolicy(max_history=3, epochs=500, batch_size=10),
import json import random from rasa_nlu.training_data import load_data from rasa_nlu.config import RasaNLUModelConfig from rasa_nlu.model import Trainer config = RasaNLUModelConfig( configuration_values={ "pipeline": [{ "name": "nlp_spacy" }, { "name": "tokenizer_spacy" }, { "name": "intent_entity_featurizer_regex" }, { "name": "intent_featurizer_spacy" }, { "name": "ner_crf" }, { "name": "ner_synonyms" }, { "name": "intent_classifier_sklearn" }, { "name": "ner_spacy" }] }) trainer = Trainer(config) training_data = load_data("training_data.json") interpreter = trainer.train(training_data) response, adr, params, suggestions, excluded = '', '', {}, [], []
def test_crf_extractor(spacy_nlp, ner_crf_pos_feature_config): from rasa_nlu.extractors.crf_entity_extractor import CRFEntityExtractor ext = CRFEntityExtractor(component_config=ner_crf_pos_feature_config) examples = [ Message( "anywhere in the west", { "intent": "restaurant_search", "entities": [{ "start": 16, "end": 20, "value": "west", "entity": "location" }], "spacy_doc": spacy_nlp("anywhere in the west") }), Message( "central indian restaurant", { "intent": "restaurant_search", "entities": [{ "start": 0, "end": 7, "value": "central", "entity": "location", "extractor": "random_extractor" }, { "start": 8, "end": 14, "value": "indian", "entity": "cuisine", "extractor": "CRFEntityExtractor" }], "spacy_doc": spacy_nlp("central indian restaurant") }) ] # uses BILOU and the default features ext.train(TrainingData(training_examples=examples), RasaNLUModelConfig()) sentence = 'anywhere in the west' doc = {"spacy_doc": spacy_nlp(sentence)} crf_format = ext._from_text_to_crf(Message(sentence, doc)) assert [word[0] for word in crf_format] == ['anywhere', 'in', 'the', 'west'] feats = ext._sentence_to_features(crf_format) assert 'BOS' in feats[0] assert 'EOS' in feats[-1] assert feats[1]['0:low'] == "in" sentence = 'anywhere in the west' ext.extract_entities(Message(sentence, {"spacy_doc": spacy_nlp(sentence)})) filtered = ext.filter_trainable_entities(examples) assert filtered[0].get('entities') == [{ "start": 16, "end": 20, "value": "west", "entity": "location" }], 'Entity without extractor remains' assert filtered[1].get('entities') == [{ "start": 8, "end": 14, "value": "indian", "entity": "cuisine", "extractor": "CRFEntityExtractor" }], 'Only CRFEntityExtractor entity annotation remains' assert examples[1].get('entities')[0] == { "start": 0, "end": 7, "value": "central", "entity": "location", "extractor": "random_extractor" }, 'Original examples are not mutated'
def test_duckling_entity_extractor(component_builder): httpretty.register_uri( httpretty.POST, "http://localhost:8000/parse", body="""[{"body":"Today","start":0,"value":{"values":[{ "value":"2018-11-13T00:00:00.000-08:00","grain":"day", "type":"value"}],"value":"2018-11-13T00:00:00.000-08:00", "grain":"day","type":"value"},"end":5, "dim":"time","latent":false},{"body":"the 5th","start":9, "value":{"values":[{ "value":"2018-12-05T00:00:00.000-08:00","grain":"day", "type":"value"}, {"value":"2019-01-05T00:00:00.000-08:00","grain":"day", "type":"value"}, {"value":"2019-02-05T00:00:00.000-08:00","grain":"day", "type":"value"}], "value":"2018-12-05T00:00:00.000-08:00","grain":"day", "type":"value"},"end":16,"dim":"time", "latent":false},{"body":"5th of May","start":13,"value":{ "values":[{ "value":"2019-05-05T00:00:00.000-07:00","grain":"day", "type":"value"}, {"value":"2020-05-05T00:00:00.000-07:00","grain":"day", "type":"value"}, {"value":"2021-05-05T00:00:00.000-07:00","grain":"day", "type":"value"}], "value":"2019-05-05T00:00:00.000-07:00","grain":"day", "type":"value"},"end":23,"dim":"time", "latent":false},{"body":"tomorrow","start":37,"value":{ "values":[{ "value":"2018-11-14T00:00:00.000-08:00","grain":"day", "type":"value"}], "value":"2018-11-14T00:00:00.000-08:00","grain":"day", "type":"value"},"end":45,"dim":"time", "latent":false}]""") httpretty.enable() _config = RasaNLUModelConfig( {"pipeline": [{ "name": "DucklingHTTPExtractor" }]}) _config.set_component_attr(0, dimensions=["time"], timezone="UTC", url="http://localhost:8000") duckling = component_builder.create_component(_config.for_component(0), _config) message = Message("Today is the 5th of May. Let us meet tomorrow.") duckling.process(message) entities = message.get("entities") assert len(entities) == 4 # Test duckling with a defined date httpretty.register_uri( httpretty.POST, "http://localhost:8000/parse", body="""[{"body":"tomorrow","start":12,"value":{"values":[{ "value":"2013-10-13T00:00:00.000Z","grain":"day", "type":"value"}],"value":"2013-10-13T00:00:00.000Z", "grain":"day","type":"value"},"end":20, "dim":"time","latent":false}]""") # 1381536182 == 2013/10/12 02:03:02 message = Message("Let us meet tomorrow.", time="1381536182") duckling.process(message) entities = message.get("entities") assert len(entities) == 1 assert entities[0]["text"] == "tomorrow" assert entities[0]["value"] == "2013-10-13T00:00:00.000Z" # Test dimension filtering includes only specified dimensions _config = RasaNLUModelConfig( {"pipeline": [{ "name": "DucklingHTTPExtractor" }]}) _config.set_component_attr(0, dimensions=["number"], url="http://localhost:8000") ducklingNumber = component_builder.create_component( _config.for_component(0), _config) httpretty.register_uri( httpretty.POST, "http://localhost:8000/parse", body="""[{"body":"Yesterday","start":0,"value":{"values":[{ "value":"2019-02-28T00:00:00.000+01:00","grain":"day", "type":"value"}],"value":"2019-02-28T00:00:00.000+01:00", "grain":"day","type":"value"},"end":9,"dim":"time"}, {"body":"5","start":21,"value":{"value":5,"type":"value"}, "end":22,"dim":"number"}]""") message = Message("Yesterday there were 5 people in a room") ducklingNumber.process(message) entities = message.get("entities") assert len(entities) == 1 assert entities[0]["text"] == "5" assert entities[0]["value"] == 5
def train_nlu(data, config, model_dir): training_data = load_data(data) rasa_model_config = RasaNLUModelConfig(load_json(config)) trainer = Trainer(rasa_model_config) trainer.train(training_data) model_directory = trainer.persist(model_dir, fixed_model_name='weathernlu')
def create(cls, config: RasaNLUModelConfig) -> 'DucklingHTTPExtractor': return cls(config.for_component(cls.name, cls.defaults), config.language)
from rasa_nlu.training_data import load_data from rasa_nlu.config import RasaNLUModelConfig from rasa_nlu.model import Trainer training_data = load_data('nlu.md') pipeline = [{ "name": "nlp_spacy" }, { "name": "tokenizer_spacy" }, { "name": "intent_featurizer_spacy" }, { "name": "intent_classifier_sklearn" }] trainer = Trainer(RasaNLUModelConfig({"pipeline": pipeline})) interpreter = trainer.train(training_data) model_directory = trainer.persist('./projects/default/')
"name": "tokenizer_bert" }, { "name": "intent_featurizer_bert", "lm_spell_checker": True, "mask_spell_checker": False, "mul_similar_matrix": True, "spell_checker_score": 1 }] # pipeline = [{"name": "tokenizer_bert"}, # {"name": "intent_featurizer_bert", # "lm_spell_checker": False, # "mask_spell_checker": True, # "mul_similar_matrix": True, # "spell_checker_score": 1} # ] training_data = load_data('./data/examples/rasa/demo-rasa_zh.json') trainer = Trainer( RasaNLUModelConfig({ "pipeline": pipeline, "language": "zh" })) interpreter = trainer.train(training_data) model = interpreter # model = Interpreter.load('./projects/spell_checker/default/model_20190115-163425') print('loaded') tornado.options.parse_command_line() http_server = tornado.httpserver.HTTPServer(Application()) http_server.listen(options.port) tornado.ioloop.IOLoop.instance().start()
from rasa_nlu.train import load_data from rasa_nlu.config import RasaNLUModelConfig from rasa_nlu.utils.spacy_utils import SpacyNLP from rasa_nlu.tokenizers.spacy_tokenizer import SpacyTokenizer from rasa_nlu.featurizers.spacy_featurizer import SpacyFeaturizer import numpy as np, spacy training_data = load_data("data/examples/rasa/demo-rasa.json") config = RasaNLUModelConfig() SpacyNLP(nlp=spacy.load("en")).train(training_data, config) SpacyTokenizer().train(training_data, config) SpacyFeaturizer().train(training_data, config) from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC labels = [e.get("intent") for e in training_data.intent_examples] le = LabelEncoder() y = le.fit_transform(labels) X = np.stack([ example.get("text_features") for example in training_data.intent_examples ]) defaults = { # C parameter of the svm - cross validation will select the best value "C": [1, 2, 5, 10, 20, 100], # the kernels to use for the svm training - cross validation will # decide which one of them performs best
def test_duckling_entity_extractor(component_builder): httpretty.register_uri( httpretty.POST, "http://localhost:8000/parse", body="""[{"body":"Today","start":0,"value":{"values":[{ "value":"2018-11-13T00:00:00.000-08:00","grain":"day", "type":"value"}],"value":"2018-11-13T00:00:00.000-08:00", "grain":"day","type":"value"},"end":5, "dim":"time","latent":false},{"body":"the 5th","start":9, "value":{"values":[{ "value":"2018-12-05T00:00:00.000-08:00","grain":"day", "type":"value"}, {"value":"2019-01-05T00:00:00.000-08:00","grain":"day", "type":"value"}, {"value":"2019-02-05T00:00:00.000-08:00","grain":"day", "type":"value"}], "value":"2018-12-05T00:00:00.000-08:00","grain":"day", "type":"value"},"end":16,"dim":"time", "latent":false},{"body":"5th of May","start":13,"value":{ "values":[{ "value":"2019-05-05T00:00:00.000-07:00","grain":"day", "type":"value"}, {"value":"2020-05-05T00:00:00.000-07:00","grain":"day", "type":"value"}, {"value":"2021-05-05T00:00:00.000-07:00","grain":"day", "type":"value"}], "value":"2019-05-05T00:00:00.000-07:00","grain":"day", "type":"value"},"end":23,"dim":"time", "latent":false},{"body":"tomorrow","start":37,"value":{ "values":[{ "value":"2018-11-14T00:00:00.000-08:00","grain":"day", "type":"value"}], "value":"2018-11-14T00:00:00.000-08:00","grain":"day", "type":"value"},"end":45,"dim":"time", "latent":false}]""" ) httpretty.enable() _config = RasaNLUModelConfig( {"pipeline": [{"name": "DucklingHTTPExtractor"}]} ) _config.set_component_attr(0, dimensions=["time"], timezone="UTC", url="http://localhost:8000") duckling = component_builder.create_component(_config.for_component(0), _config) message = Message("Today is the 5th of May. Let us meet tomorrow.") duckling.process(message) entities = message.get("entities") assert len(entities) == 4 # Test duckling with a defined date httpretty.register_uri( httpretty.POST, "http://localhost:8000/parse", body="""[{"body":"tomorrow","start":12,"value":{"values":[{ "value":"2013-10-13T00:00:00.000Z","grain":"day", "type":"value"}],"value":"2013-10-13T00:00:00.000Z", "grain":"day","type":"value"},"end":20, "dim":"time","latent":false}]""" ) # 1381536182 == 2013/10/12 02:03:02 message = Message("Let us meet tomorrow.", time="1381536182") duckling.process(message) entities = message.get("entities") assert len(entities) == 1 assert entities[0]["text"] == "tomorrow" assert entities[0]["value"] == "2013-10-13T00:00:00.000Z" # Test dimension filtering includes only specified dimensions _config = RasaNLUModelConfig( {"pipeline": [{"name": "DucklingHTTPExtractor"}]} ) _config.set_component_attr(0, dimensions=["number"], url="http://localhost:8000") ducklingNumber = component_builder.create_component( _config.for_component(0), _config) httpretty.register_uri( httpretty.POST, "http://localhost:8000/parse", body="""[{"body":"Yesterday","start":0,"value":{"values":[{ "value":"2019-02-28T00:00:00.000+01:00","grain":"day", "type":"value"}],"value":"2019-02-28T00:00:00.000+01:00", "grain":"day","type":"value"},"end":9,"dim":"time"}, {"body":"5","start":21,"value":{"value":5,"type":"value"}, "end":22,"dim":"number"}]""" ) message = Message("Yesterday there were 5 people in a room") ducklingNumber.process(message) entities = message.get("entities") assert len(entities) == 1 assert entities[0]["text"] == "5" assert entities[0]["value"] == 5