def command(languages: tuple, path: str): """ Checks for differences in bots under the same multi-lingual structure. Args: languages: language code of the bots to be checked. path (str): optional argument stating the root directory of the bot. """ languages_paths = get_all_languages(path=path, languages=languages) comparisons_list = itertools.combinations(languages_paths, 2) print_info( f"Starting diff comparison for the following bots: \n{languages_paths}\n\n *** *** ***\n" ) for path_a, path_b in comparisons_list: domain_a_json = load_yaml(join(path_a, "domain.yml")) domain_b_json = load_yaml(join(path_b, "domain.yml")) print_info( f"Comparing {os.path.basename(path_a)} and {os.path.basename(path_b)} bots:\n ------" ) is_diff = diff(domain_a_json, domain_b_json) print_info(f" ------\nEnd of {os.path.basename(path_a)} \ and {os.path.basename(path_b)} bots comparison.\n *** \n") if not is_diff: print_info( f"No differences found between {os.path.basename(path_a)} \ and {os.path.basename(path_b)} bots.\n") print_success("*** Difference comparison succeded! ***")
def generate_conversation_md_from_domain(path_to_language: str): """Generates test stories based on the intents available in the domain. Complex stories are not generated. Args: path_to_language (str): path to language folder. If it's a single-language bot this will be the bot root's folder. """ domain = load_yaml(join(path_to_language, "domain.yml")) intents_list = domain.get("intents", None) all_nlu = [] for filename in listdir(join(path_to_language, "data")): if isfile(join(path_to_language, "data", filename)): if "stories" not in filename: nlu = load_md(join(path_to_language, "data", filename)) all_nlu.append(nlu) all_nlu = [item for sublist in all_nlu for item in sublist] if not intents_list: print_error("No intents were found.") exit(0) elif intents_list: output_path = join(path_to_language, "tests", "conversation_tests.md") if not exists(join(path_to_language, "tests")): mkdir(join(path_to_language, "tests")) with open(output_path, "w", encoding="utf-8") as out_f: for intent in intents_list: out_f.write(f"## {intent}\n") out_f.write( f"* {intent}: {get_intent_example(intent, all_nlu)}\n" ) out_f.write(f" - utter_{intent}\n") out_f.write("\n")
def language_detection_setup(path: str) -> None: """ Setup language detection module. Args: path (str): path of the project given by the user. """ # include command in config yaml pipeline folder_list = [ join(path, "languages", folder) for folder in os.listdir(join(path, "languages")) if os.path.isdir(os.path.join(path, "languages", folder)) ] for folder in folder_list: # add Language Detection Policy as last Policy in config.yml cfg_yaml = load_yaml(join(folder, "config.yml")) policies_list = cfg_yaml["policies"] policies_list.append({ "name": "custom.policies.language_detection.lang_change_policy.LangChangePolicy", "lang_detect_threshold": 0.8, "fallback_action_name": "utter_bot_languages", "model_path": "./custom/policies/language_detection/lid.176.ftz", }) cfg_yaml["policies"] = policies_list save_yaml(path=join(folder, "config.yml"), yaml_dict=cfg_yaml) # add utter example for Labguage Detection Policy fallback in domain.yml domain_yaml = load_yaml(join(folder, "domain.yml")) responses_dict = domain_yaml["responses"] responses_dict["utter_bot_languages"] = [{ "text": "Do you want to speak with me in another language?" }] domain_yaml["responses"] = responses_dict save_yaml(path=join(folder, "domain.yml"), yaml_dict=domain_yaml) # add requirements to requirements.txt write_requirements(join(path, "requirements.txt"), ["fasttext==0.9.2"]) print_success("Language detection successfully integrated!")
def chit_chat_setup(path: str) -> None: """ Setup the cit-chat module. Args: path (str): path of the project given by the user. """ folder_list = [ join(path, "languages", folder) for folder in os.listdir(join(path, "languages")) if os.path.isdir(os.path.join(path, "languages", folder)) ] if join(path, "languages", "en") in folder_list: en_path = join(path, "languages", "en") # add chitchat fallback action in config.yml cfg_yaml = load_yaml(join(en_path, "config.yml")) policies_list = cfg_yaml["policies"] policies_list.append({ "name": "FallbackPolicy", "nlu_threshold": 0.55, "core_threshold": 0.3, "fallback_action_name": "action_parlai_fallback", }) cfg_yaml["policies"] = policies_list save_yaml(path=join(en_path, "config.yml"), yaml_dict=cfg_yaml) # add chitchat fallback action and nedded slots in domain.yml domain_yaml = load_yaml(join(en_path, "domain.yml")) domain_yaml["actions"] = ["action_parlai_fallback"] slots = domain_yaml["slots"] slots.update({ "parlai_world_created": { "type": "bool", "initial_value": False }, "parlai_world": { "type": "text" } }) domain_yaml["slots"] = slots save_yaml(path=join(en_path, "domain.yml"), yaml_dict=domain_yaml) print_success( "Chit-chat fallback action successfully integrated into 'en' bot!")
def coref_resolution_setup(path: str) -> None: """ Setup coreference resolution module into the bot pipeline. Args: path (str): path of the project given by the user. """ folder_list = [ join(path, "languages", folder) for folder in os.listdir(join(path, "languages")) if os.path.isdir(os.path.join(path, "languages", folder)) ] if join(path, "languages", "en") in folder_list: en_path = join(path, "languages", "en") # add coref resolution to config.yml cfg_yaml = load_yaml(join(en_path, "config.yml")) components_list = cfg_yaml["pipeline"] # remove whitespacetokenizer from the pipeline whitespace_idx = get_index(components_list, "WhitespaceTokenizer") components_list.pop(whitespace_idx) # add spacy model and spacy tokenizer components_list.insert( 0, { "name": "custom.components.spacy_nlp.\ spacy_tokenizer_neuralcoref.SpacyTokenizerNeuralCoref" }, ) components_list.insert( 0, { "name": "custom.components.spacy_nlp.\ spacy_nlp_neuralcoref.SpacyNLPNeuralCoref", "model": "en_neuralcoref", }, ) cfg_yaml["pipeline"] = components_list save_yaml(path=join(en_path, "config.yml"), yaml_dict=cfg_yaml) print_success( "Coreference resolution model successfully integrated into 'en' bot!" ) # add requirements to requirements.txt write_requirements( join(path, "requirements.txt"), [ "neuralcoref", "en-neuralcoref", "en-core-web-sm @ https://github.com/explosion/spacy-models/releases/\ download/en_core_web_sm-2.1.0/en_core_web_sm-2.1.0.tar.gz", ], )
def generate_stories_md(path_to_language: str, multi_language_bot: bool): domain = load_yaml(join(path_to_language, "domain.yml")) intents_list = domain.get("intents", None) if not intents_list: print_error("No intents were found.") exit(0) elif intents_list: intents_list = clean_intents(intents_list) if multi_language_bot: output_path = join(abspath("."), "languages", "stories.md") else: output_path = join(path_to_language, "data", "stories.md") with open(output_path, "w", encoding="utf-8") as out_f: for intent in intents_list: out_f.write(f"## {intent}\n") out_f.write(f"* {intent}\n") out_f.write(f" - utter_{intent}\n") out_f.write("\n")
def check_covered_intents(language_path: list, multi_language_bot: bool): for language in language_path: intents = load_yaml(join(language, "domain.yml")).get("intents", None) if intents is None: print_error("No intents were found.\n") exit(0) else: intents = clean_intents(intents) check_specific_stories = False if multi_language_bot: stories_dir_path = join(abspath("."), "languages") check_specific_stories = True else: stories_dir_path = join(language, "data") for filename in listdir(stories_dir_path): if isfile(join(stories_dir_path, filename)): if "stories" in filename: lines = load_md(join(stories_dir_path, filename)) for line in lines: for intent in intents: if intent in line: intents.remove(intent) break if check_specific_stories: lang = basename(language) for filename in listdir(join(stories_dir_path, lang, "data")): if isfile(join(stories_dir_path, lang, "data", filename)): if "stories" in filename: lines = load_md(join(stories_dir_path, lang, "data", filename)) for line in lines: for intent in intents: if intent in line: intents.remove(intent) break if intents: print("The following intents are not covered in your stories:") print(*intents, sep="\n")
def check_covered_intents(language_path: str) -> bool: intents = load_yaml(join(language_path, "domain.yml")).get("intents", None) if intents is None: print_error("No intents were found.\n") exit(0) else: intents = clean_intents(intents) for filename in listdir(join(language_path, "tests")): if filename.endswith(".md"): lines = load_md(join(language_path, "tests", filename)) for line in lines: for intent in intents: if intent in line: intents.remove(intent) break if intents: print( "The following intents are not covered in your test stories:" ) print(*intents, sep="\n") should_test = proceed_with_test("Continue testing?\n") else: should_test = True return should_test