def test_override_defaults_supervised_embeddings_pipeline(): cfg = config.load("data/test/config_embedding_test.yml") builder = ComponentBuilder() component1_cfg = cfg.for_component(0) component1 = builder.create_component(component1_cfg, cfg) assert component1.max_ngram == 3 component2_cfg = cfg.for_component(1) component2 = builder.create_component(component2_cfg, cfg) assert component2.epochs == 10
def test_override_defaults_tensorflow_embedding_pipeline(): cfg = config.load("data/test/config_embedding_test.yml") builder = ComponentBuilder() name1 = "intent_featurizer_count_vectors" component1 = builder.create_component(name1, cfg) assert component1.max_ngram == 3 name2 = "intent_classifier_tensorflow_embedding" component2 = builder.create_component(name2, cfg) assert component2.epochs == 10
def test_override_defaults_supervised_embeddings_pipeline(): cfg = config.load("data/test/config_embedding_test.yml") builder = ComponentBuilder() component1_cfg = cfg.for_component(0) component1 = builder.create_component(component1_cfg, cfg) assert component1.max_ngram == 3 component2_cfg = cfg.for_component(1) component2 = builder.create_component(component2_cfg, cfg) assert component2.epochs == 10
def __init__(self, project_dir=None, max_training_processes=1, response_log=None, emulation_mode=None, remote_storage=None, component_builder=None, model_server=None, wait_time_between_pulls=None): self._training_processes = max(max_training_processes, 1) self._current_training_processes = 0 self.responses = self._create_query_logger(response_log) self.project_dir = config.make_path_absolute(project_dir) self.emulator = self._create_emulator(emulation_mode) self.remote_storage = remote_storage self.model_server = model_server self.wait_time_between_pulls = wait_time_between_pulls if component_builder: self.component_builder = component_builder else: self.component_builder = ComponentBuilder(use_cache=True) self.project_store = self._create_project_store(project_dir) self.pool = ProcessPool(self._training_processes)
def train_nlu(): from rasa_nlu.training_data import load_data from rasa_nlu.model import Trainer from rasa_nlu import config from rasa_nlu.components import ComponentBuilder import datetime starttime = datetime.datetime.now() configs = config.load("robot/config/nlu_model_config.yml") project = configs.get("project") model = configs.get("fixed_model_name") path = configs.get("path") num_threads = configs.get('num_threads') nlu_data_path = str(configs.get("data")) training_data = load_data(nlu_data_path) builder = ComponentBuilder( use_cache=True ) # will cache components between pipelines (where possible) trainer = Trainer(configs, component_builder=builder) trainer.train(training_data, num_threads=num_threads) model_directory = trainer.persist(path=path, project_name=project, fixed_model_name=model) endtime = datetime.datetime.now() print('一共耗时', (endtime - starttime).seconds, 's') return model_directory
def __init__(self, project_dir=None, max_training_processes=1, response_log=None, emulation_mode=None, remote_storage=None, component_builder=None, model_server=None, wait_time_between_pulls=None): self._training_processes = max(max_training_processes, 1) self._current_training_processes = 0 self.responses = self._create_query_logger(response_log) self.project_dir = config.make_path_absolute(project_dir) self.emulator = self._create_emulator(emulation_mode) self.remote_storage = remote_storage self.model_server = model_server self.wait_time_between_pulls = wait_time_between_pulls if component_builder: self.component_builder = component_builder else: self.component_builder = ComponentBuilder(use_cache=True) self.project_store = self._create_project_store(project_dir) if six.PY3: # tensorflow sessions are not fork-safe, # and training processes have to be spawned instead of forked. # See https://github.com/tensorflow/tensorflow/issues/5448#issuecomment-258934405 multiprocessing.set_start_method('spawn', force=True) self.pool = ProcessPool(self._training_processes)
def __init__(self, project_dir=None, max_training_processes=1, response_log=None, emulation_mode=None, remote_storage=None, component_builder=None): self._training_processes = max(max_training_processes, 1) self.responses = self._create_query_logger(response_log) self.project_dir = config.make_path_absolute(project_dir) self.emulator = self._create_emulator(emulation_mode) self.remote_storage = remote_storage if component_builder: self.component_builder = component_builder else: self.component_builder = ComponentBuilder(use_cache=True) self.project_manager = ProjectManager.create(project_dir, remote_storage, component_builder) self.project_store = self.project_manager.get_projects() self.pool = ProcessPool(self._training_processes)
def __init__(self): try: self.builder = ComponentBuilder(use_cache=True) self.config = ChatBotConfigParser().parser except Exception as e: logger.error("Unable to build NluModel Obj, exception : %s" % (str(e))) raise (e)
def __init__(self, config, component_builder): self.config = config self.responses = DataRouter._create_query_logger(config['response_log']) self._train_procs = [] self.model_dir = config['path'] self.token = config['token'] self.emulator = self.__create_emulator() self.component_builder = component_builder if component_builder else ComponentBuilder(use_cache=True) self.model_store = self.__create_model_store()
def __init__(self, config, component_builder): self._training_processes = config['max_training_processes'] if config['max_training_processes'] > 0 else 1 self.config = config self.responses = self._create_query_logger(config) self.model_dir = config['path'] self.emulator = self._create_emulator() self.component_builder = component_builder if component_builder else ComponentBuilder(use_cache=True) self.project_store = self._create_project_store() self.pool = ProcessPool(self._training_processes)
def train_nlu(self): builder = ComponentBuilder(use_cache=False) self.__trainer_data = load_data("data/nlu.md") self.__trainer = Trainer(config.load("config/config.yml"), builder) self.__trainer.train(self.__trainer_data) self.__model_directory = self.__trainer.persist( 'model/', fixed_model_name='Jarvis') return self.__model_directory
def __init__(self, config, component_builder): self._training_processes = config['max_training_processes'] if config['max_training_processes'] > 0 else 1 self.config = config self.responses = DataRouter._create_query_logger(config['response_log']) self._trainings_queued = 0 self.model_dir = config['path'] self.token = config['token'] self.emulator = self.__create_emulator() self.component_builder = component_builder if component_builder else ComponentBuilder(use_cache=True) self.model_store = self.__create_model_store() self.pool = ProcessPool(self._training_processes)
def __init__(self, project_dir, remote_storage, component_builder=None): # type: (str, str, ComponentBuilder) -> None self.project_dir = project_dir self.remote_storage = remote_storage if component_builder: self.component_builder = component_builder else: self.component_builder = ComponentBuilder(use_cache=True) self.project_store = None
def _build_pipeline( cfg: RasaNLUModelConfig, component_builder: ComponentBuilder) -> List[Component]: """Transform the passed names of the pipeline components into classes""" pipeline = [] # Transform the passed names of the pipeline components into classes for component_name in cfg.component_names: component = component_builder.create_component(component_name, cfg) pipeline.append(component) return pipeline
def _build_pipeline(cfg: RasaNLUModelConfig, component_builder: ComponentBuilder ) -> List[Component]: """Transform the passed names of the pipeline components into classes""" pipeline = [] # Transform the passed names of the pipeline components into classes for i in range(len(cfg.pipeline)): component_cfg = cfg.for_component(i) component = component_builder.create_component(component_cfg, cfg) pipeline.append(component) return pipeline
def train_nlu_model(): try: builder = ComponentBuilder( use_cache=True ) # will cache components between pipelines (where possible) training_data = load_data(RASA_NLU_TRAINING_DATA_PATH) trainer = Trainer(RasaNLUConfig(RASA_NLU_CONFIG_PATH), builder) trainer.train(training_data) model_directory = trainer.persist(RASA_NLU_MODEL_PATH, fixed_model_name=RASA_NLU_MODEL_NAME) return model_directory except Exception as e: print('Exception in train nlu', e)
def call(): from rasa_nlu.training_data import load_data from rasa_nlu import config from rasa_nlu.components import ComponentBuilder from rasa_nlu.model import Trainer builder = ComponentBuilder(use_cache=True) training_data = load_data('./rasa_nlu_api/dataset.json') trainer = Trainer(config.load("./rasa_nlu_api/config_pipeline.yml"), builder) trainer.train(training_data) model_directory = trainer.persist('./rasa_nlu_api/', fixed_model_name="model") print('done') return model_directory
def train_nlu_mode(nlu_train_file="/nlu_train_data/testData.json", nlu_config_file="/nlu_model/config_spacy.yml", nlu_persist_dir="/nlu_model", nlu_model_name="evanlu"): # will cache components between pipelines (where possible) builder = ComponentBuilder(use_cache=True) training_data = load_data(add_cur_dir(nlu_train_file)) #trainer = Trainer(RasaNLUModelConfig(add_cur_dir(nlu_config_file)), builder) trainer = Trainer(config.load(add_cur_dir(nlu_config_file)), builder) trainer.train(training_data) model_directory = trainer.persist(add_cur_dir(nlu_persist_dir), fixed_model_name=nlu_model_name) return model_directory pass
def train_update(update, by): update.start_training(by) examples = [ Message.build( text=example.get_text(update.language), intent=example.intent, entities=[ example_entity.rasa_nlu_data for example_entity in example.get_entities(update.language) ]) for example in update.examples ] label_examples_query = update.examples \ .filter(entities__entity__label__isnull=False) \ .annotate(entities_count=models.Count('entities')) \ .filter(entities_count__gt=0) label_examples = [ Message.build( text=example.get_text(update.language), entities=[ example_entity.get_rasa_nlu_data(label_as_entity=True) for example_entity in filter( lambda ee: ee.entity.label, example.get_entities(update.language)) ]) for example in label_examples_query ] rasa_nlu_config = get_rasa_nlu_config_from_update(update) trainer = Trainer(rasa_nlu_config, ComponentBuilder(use_cache=False)) training_data = BothubTrainingData(label_training_examples=label_examples, training_examples=examples) trainer.train(training_data) persistor = BothubPersistor(update) trainer.persist(mkdtemp(), persistor=persistor, project_name=str(update.repository.uuid), fixed_model_name=str(update.id))
#!/usr/bin/env python3 import twitter import twitter_client import eliteprospect import parser import engine import logging import tasks import os from rasa_nlu.components import ComponentBuilder from rasa_nlu.model import Metadata, Interpreter builder = ComponentBuilder(use_cache=False) try: model_dir = os.environ["MODEL_DIR"] twitter_consumer_key = os.environ["TWITTER_CONSUMER_KEY"] twitter_consumer_secret = os.environ["TWITTER_CONSUMER_SECRET"] twitter_access_token_key = os.environ["TWITTER_ACCESS_TOKEN_KEY"] twitter_access_token_secret = os.environ["TWITTER_ACCESS_TOKEN_SECRET"] eliteprospect_key = os.environ["EP_API_KEY"] except KeyError: print "Please set all the env variables" sys.exit(1) # where `model_directory points to the folder the mo interpreter = Interpreter.load(model_dir) api = twitter.Api(consumer_key=twitter_consumer_key, consumer_secret=twitter_consumer_secret, access_token_key=twitter_access_token_key,
class NLU_DE(object): builder = ComponentBuilder(use_cache=True) training_data = load_data(trainingdataEN) trainer = Trainer(config.load(configfileEN), builder) trainer.train(training_data) interpreter = Interpreter.load(trainer.persist('model/default')) intent = "" confidence = 0 dictionary = Woerterbuch() sNLU = simpleNLU_DE() def __init__(self): return def parse_thema(self, eingabe): String = "" parsetext = eingabe.split(' ') for word in parsetext: String = word if (levenshtein(String, "Kontrollstrukturen") <= 2): return "controll structures" elif (levenshtein(String, "Programmstruktur") <= 2): return "programm structure" elif (levenshtein(String, "Grundlagen") <= 2): return "basics" elif (levenshtein(String, "Arrays") <= 2): return "arrays" elif (levenshtein(String, "Operatoren") <= 2): return "operators" elif (levenshtein(String, "Anweisungen") <= 2): return "statements" elif (levenshtein(String, "Methoden") <= 2): return "methods" elif (levenshtein(String, "Klassen") <= 2): return "classes" return "" def parse(self, Eingabe): #print("trying to parse: \"" + Eingabe + "\"") eingabe = Eingabe.lower() eingabe = eingabe.replace(",", "") eingabe = eingabe.replace(".", "") eingabe = eingabe.replace(";", "") eingabe = eingabe.replace("!", "") eingabe = eingabe.replace("?", "") if (self.sNLU.toleranzpruefung(Eingabe) != ""): print("nlu: eingabe: \"" + eingabe + "\", intent: \"" + self.sNLU.toleranzpruefung(Eingabe) + "\", with confidence 100%") return self.sNLU.toleranzpruefung(Eingabe) while " " in eingabe: eingabe = eingabe.replace(" ", " ") if eingabe.startswith(" "): eingabe = eingabe[1:] for word in eingabe.split(' '): if (self.dictionary.profanity_check(word) == True): return "profanity" for word in eingabe.split(' '): if (self.dictionary.spellcheck(word) == False): return "" try: intent = "" parse = self.interpreter.parse((eingabe)) intent = parse['intent']['name'] confidence = str(parse['intent']['confidence']) print("nlu: eingabe: \"" + eingabe + "\", intent: \"" + intent + "\", with confidence " + str(float(confidence))) if (intent == "name"): 1 + 1 #print(str(self.parseName(eingabe))) if float(confidence) < 0.40: intent = "" else: return intent except Exception as e: print('Generic exception: {}'.format(e)) return intent return intent #except: # print("Fehler: keine Exeception!") # return "" def parseName(self, String): patterns = { r"my name is ([a-z])+": "my name is ", r"im called ([a-z])+": "im called " } for pattern in patterns: if (re.search(pattern, String)): String = String.replace(String, re.search(pattern, String).group(0)) String = String.replace( re.search(patterns[pattern], String).group(0), "") return String return "" #self.ner.name_extraction(String)
class RasaClassifier: # Directories & Files config_file = "rasa/config/rasa_config.json" model_dir = "rasa/projects/justiceai/" fact_data_dir = "rasa/data/fact/" category_data_dir = "rasa/data/category/" acknowledgement_data_dir = "rasa/data/acknowledgement/" # Dicts category_interpreters = {} fact_interpreters = {} acknowledgement_interpreters = {} # RASA Caching builder = ComponentBuilder(use_cache=True) def __init__(self): self.rasa_config = RasaNLUConfig(self.config_file) self.trainer = Trainer(self.rasa_config, self.builder) def train(self, force_train=False, initialize_interpreters=True): """ Trains the data sets from facts and problem categories separately :param force_train: If False will use saved models :param initialize_interpreters: If True the interpreters get initialized with models already present """ # Train fact classifier self.__train_interpreter(self.fact_data_dir, self.fact_interpreters, force_train=force_train, initialize_interpreters=initialize_interpreters) # Train problem category classifier self.__train_interpreter(self.category_data_dir, self.category_interpreters, force_train=force_train, initialize_interpreters=initialize_interpreters) # Train acknowledgement classifier self.__train_interpreter(self.acknowledgement_data_dir, self.acknowledgement_interpreters, force_train=force_train, initialize_interpreters=initialize_interpreters) def classify_problem_category(self, message, person_type): """ Classifies a claim category based on a message and person type :param message: Message received from user :param person_type: The person type of the user AS A STRING, ie: "TENANT". If passing PersonType, use .value - Ex: PersonType.TENANT.value :return: The classified claim category dict from RASA """ if person_type.lower() == "tenant": return self.category_interpreters['category_tenant'].parse(message.lower()) elif person_type.lower() == "landlord": return self.category_interpreters['category_landlord'].parse(message.lower()) def classify_fact(self, fact_name, message): """ Classifies a fact based on a message :param fact_name: Name of the fact being classified i.e. tenant_owes_rent :param message: Message received from user :return: The classified fact dict from RASA """ if fact_name in self.fact_interpreters: return self.fact_interpreters[fact_name].parse(message.lower()) return None def classify_acknowledgement(self, message): """ Classifies a true/false acknowledgement based on a message. Ie: "sure thing", "yeah ok", "nah" :param message: Message received from use :return: The classified fact dict from RASA """ return self.acknowledgement_interpreters['additional_fact_acknowledgement'].parse(message.lower()) def __train_interpreter(self, training_data_dir, interpreter_dict, force_train, initialize_interpreters): """ Trains the interpreters for fact and claim category classification :param training_data_dir: Directory where data is stores :param interpreter_dict: Dictionary will contain the interpreters :param force_train: If True will retrain model data :param initialize_interpreters: If True will initialize the interpreters """ print("~~Starting training with data directory {}~~".format(training_data_dir)) if force_train is False: print("->No force train, using saved models.".format(training_data_dir)) if initialize_interpreters is False: print("->No interpreter initialization. Will only create model data.".format(training_data_dir)) training_start = timeit.default_timer() fact_files = os.listdir(training_data_dir) for filename in fact_files: fact_key = os.path.splitext(filename)[0] if force_train: training_data = load_data(training_data_dir + filename) self.trainer.train(training_data) model_directory = self.trainer.persist(path=self.model_dir, fixed_model_name=fact_key) else: model_directory = self.model_dir + "default/" + fact_key print("Model data directory for fact {}: {}".format(fact_key, model_directory)) if initialize_interpreters: interpreter_dict[fact_key] = Interpreter.load(model_directory, self.rasa_config, self.builder) training_end = timeit.default_timer() total_training_time = round(training_end - training_start, 2) print("~~Training Finished. Took {}s for {} facts ~".format(total_training_time, len(fact_files)))
class Parsing: builder = ComponentBuilder(use_cache=True) training_data = load_data('liza/rasa/training.json') trainer = Trainer(config.load("liza/rasa/config_spacy.yml"), builder) trainer.train(training_data) interpreter = Interpreter.load(trainer.persist('model/default')) # interpreter = Interpreter.load('model\default\default\model_20180607-104830') def __init__(self): self.ui = None print ("--parser initialised--") def setui(self,ui): self.ui = ui def train(self): print("trained.") def getAnswer(self): return random.choice(["yes","no"]) def askAgain(self): self.ui.tell("Sorry, I didn't get that. Can you rephrase?") self.ui.prompt() answer = self.ui.listen() parse = self.interpreter.parse(answer) print(" rasa nlu " + parse['intent']['name'] + ", with confidence " + str(parse['intent']['confidence']) + "%\n") if parse['intent']['confidence'] < 0.4: parse = self.askAgain() return parse['intent']['name'] def parsePercent(self,string): #numbers outside of the range are not parsed correctly. e.g. 105 would be parsed as "1". #print(string) if str(100) in string: return 100 for i in range(10,100): if str(i) in string: return i for i in range(1,11): if str(i) in string: return i if "0" in string: return 0 if "totally sure" in string: return 90 if "unsure" in string: return 30 if "no idea" in string: return 20 if "i guess" in string: return 60 if "no really" in string: return 30 if "no idea" in string: return 40 if "not very" in string: return 10 if "not high" in string: return 25 if "very sure" in string: return 90 if "quite" in string: return 75 if "highly likely" in string: return 85 if "confident" in string: return 95 if "extremely" in string: return 99 if "absolutely" in string: return 99 if "sure" in string: return 80 if "likely" in string: return 80 if "high probability" in string: return 80 if "low probability" in string: return 30 if "very high probability" in string: return 90 if "tiny probability" in string: return 10 if "high" in string: return 80 if "low" in string: return 30 return 50 def parse(self,String): #print("trying to parse: " + String + ".") try: String = String.replace(".","") String = String.replace(",","") String = String.replace("!","") String = String.lower() #print(String + " is being parsed") parse = self.interpreter.parse((String)) print(" rasa nlu " + parse['intent']['name'] + ", with confidence " + str(parse['intent']['confidence'])) if parse['intent']['confidence'] < 0.4: parse = self.askAgain() #print(parse['intent']['name']) return parse['intent']['name'] except: print("no input") return "" def parseQuiz(self,String,Story): if String == None: return "" print("trying to parse: " + String + ".") parse = self.interpreter.parse(String) if parse['intent']['name'] == "explain" and parse['intent']['confidence'] > 0.4: return "explain" if (Story.correct.isdigit() and len(Story.incorrect)==0): #it's a story which expects a percentage or a likelihood as an answer! #print("we need to parse the input (" + String + ") as percent!") percent = self.parsePercent(String) correct = int(Story.correct) print("Answer was parsed interpreted as " + str(percent) + " and the correct percentage is " + str(correct)) if (abs(correct-percent) > 20): return "incorrect" return "correct" try: String = String.replace(".","") String = String.replace("!","") String = String.lower() String = "~"+String+"~" corrects = Story.correct.split(", ") incorrects = Story.incorrect.split(", ") # print ("\n") # print (String) # print(corrects) # print(incorrects) for c in corrects: if c.lower() in String: #print(c) #print("correct! " + c) return "correct" #else: # print("no " + c) for i in incorrects: if i.lower() in String: #print(i) #print("incorrect! " + i) return "incorrect" #else: # print("no " + i) else: self.ui.tell("Sorry, I didn't get that. Can you rephrase?") answer = self.ui.listen() return self.parseQuiz(answer,Story) except: print("no input") return ""
if detected_lang == 'en': trans_input = out_list else: mid_trans = translator.translate(out_list, src="hi", dest = "ur").text trans_input = translator.translate(mid_trans, src = "ur", dest = "en").text print("Translated Input: ", trans_input) #----- ASSIGN TO RASA_NLU -----# #Training Time from rasa_nlu.converters import load_data from rasa_nlu.config import RasaNLUConfig from rasa_nlu.components import ComponentBuilder from rasa_nlu.model import Trainer builder = ComponentBuilder(use_cache=True) #will cache components between pipelines (where possible) training_data = load_data('testData.json') trainer = Trainer(RasaNLUConfig("config_spacy.json"), builder) trainer.train(training_data) model_directory = trainer.persist('./models/') #returns the directory the model is stored in #Prediction Time from rasa_nlu.model import Metadata, Interpreter config = RasaNLUConfig("config_spacy.json") metad = Metadata.load(model_directory) #loads metadata.json interpreter = Interpreter.load(metad, config, builder) interpreter.parse(trans_input) #output
from rasa_nlu.training_data import load_data from rasa_nlu.config import RasaNLUModelConfig from rasa_nlu.model import Trainer from rasa_nlu.model import Metadata, Interpreter from rasa_nlu import config from rasa_nlu.components import ComponentBuilder builder = ComponentBuilder(use_cache=True) def train_nlu(data, config_file, model_dir): training_data = load_data(data) trainer = Trainer(config.load(config_file), builder) trainer.train(training_data) model_directory = trainer.persist(model_dir, fixed_model_name='restaurantnlu') def run_nlu(): interpreter = Interpreter.load('./models/nlu/default/restaurantnlu', builder) print(interpreter.parse("below 300")) if __name__ == '__main__': train_nlu('./data/data.json', 'config_spacy.json', './models/nlu') run_nlu()
def component_builder(): return ComponentBuilder()