def train_rasa(): """ generate and/or train the RASA model """ print('TRAIN RASA') if ARGS.generate: cmd = ['npx chatito --format rasa data/'] call(cmd, shell=True, cwd=os.path.join(os.path.dirname(__file__), '../rasa/chatito')) print('CONVERT TO RASA MD') convert_training_data(data_file=os.path.join( os.path.dirname(__file__), '../rasa/chatito/rasa_dataset_training.json'), out_file=os.path.join(os.path.dirname(__file__), '../rasa/chatito/nlu.md'), output_format="md", language="") print('DONE CONVERT TO RASA MD') if ARGS.train: train(domain=os.path.join(os.path.dirname(__file__), '../rasa/domain.yml'), config=os.path.join(os.path.dirname(__file__), '../rasa/config.yml'), training_files=[ os.path.join(os.path.dirname(__file__), '../rasa/data/nlu.md'), os.path.join(os.path.dirname(__file__), '../rasa/data/stories.md'), os.path.join(os.path.dirname(__file__), '../rasa/chatito/nlu.md') ], output=os.path.join(os.path.dirname(__file__), '../rasa/models'))
def nlu_path_to_dataframe(path): """ Converts a single nlu file with intents into a dataframe. Usage: ```python from rasa_nlu_examples.scikit import nlu_path_to_dataframe df = nlu_path_to_dataframe("path/to/nlu/nlu.yml") ``` """ from rasa.nlu.convert import convert_training_data data = [] p = pathlib.Path(path) name = p.parts[-1] name = name[:name.find(".")] convert_training_data(str(p), f"{name}.json", output_format="json", language="en") blob = json.loads(pathlib.Path(f"{name}.json").read_text()) for d in blob["rasa_nlu_data"]["common_examples"]: data.append({"text": d["text"], "label": d["intent"]}) pathlib.Path(f"{name}.json").unlink() return pd.DataFrame(data)
def train_rasa(): print('TRAIN RASA') cmd = ['npx chatito --format rasa data/'] p = call(cmd, shell=True, cwd=os.path.join(os.path.dirname(__file__), '../rasa/chatito')) convert_training_data(data_file=os.path.join( os.path.dirname(__file__), '../rasa/chatito/rasa_dataset_training.json'), out_file=os.path.join(os.path.dirname(__file__), '../rasa/chatito/nlu.md'), output_format="md", language="") train(domain=os.path.join(os.path.dirname(__file__), '../rasa/domain.yml'), config=os.path.join(os.path.dirname(__file__), '../rasa/config.yml'), training_files=[ os.path.join(os.path.dirname(__file__), '../rasa/data/nlu.md'), os.path.join(os.path.dirname(__file__), '../rasa/data/stories.md'), os.path.join(os.path.dirname(__file__), '../rasa/chatito/nlu.md') ], output=os.path.join(os.path.dirname(__file__), '../rasa/models'))
def _convert_nlu_data(args: argparse.Namespace) -> None: if args.format in ["json", "md"]: convert_training_data(args.data, args.out, args.format, args.language) elif args.format == "yaml": _convert_to_yaml(args, True) else: print_error_and_exit( "Could not recognize output format. Supported output formats: 'json', " "'md', 'yaml'. Specify the desired output format with '--format'.")
def _convert_nlu_data(args: argparse.Namespace) -> None: from rasa.nlu.training_data.converters.nlu_markdown_to_yaml_converter import ( NLUMarkdownToYamlConverter, ) if args.format in ["json", "md"]: convert_training_data(args.data, args.out, args.format, args.language) elif args.format == "yaml": _convert_to_yaml(args, NLUMarkdownToYamlConverter()) else: print_error_and_exit( "Could not recognize output format. Supported output formats: 'json', " "'md', 'yaml'. Specify the desired output format with '--format'.")
async def post_data_convert(request: Request): """Converts current domain in yaml or json format.""" validate_request_body( request, "You must provide training data in the request body in order to " "train your model.", ) rjs = request.json if "data" not in rjs: raise ErrorResponse( 400, "BadRequest", "Must provide training data in 'data' property") if "output_format" not in rjs or rjs["output_format"] not in [ "json", "md" ]: raise ErrorResponse( 400, "BadRequest", "'output_format' is required and must be either 'md' or 'json", ) if "language" not in rjs: raise ErrorResponse(400, "BadRequest", "'language' is required") temp_dir = tempfile.mkdtemp() out_dir = tempfile.mkdtemp() nlu_data_path = os.path.join(temp_dir, "nlu_data") output_path = os.path.join(out_dir, "output") if type(rjs["data"]) is dict: rasa.utils.io.dump_obj_as_json_to_file(nlu_data_path, rjs["data"]) else: rasa.utils.io.write_text_file(rjs["data"], nlu_data_path) from rasa.nlu.convert import convert_training_data convert_training_data(nlu_data_path, output_path, rjs["output_format"], rjs["language"]) with open(output_path, encoding="utf-8") as f: data = f.read() if rjs["output_format"] == "json": import json data = json.loads(data, encoding="utf-8") return response.json({"data": data})
async def _convert_nlu_training_data( in_path: Text, out_path: Text, language: Text, ): if rasa.shared.data.is_likely_yaml_file(out_path): from rasa.shared.nlu.training_data.loading import load_data from rasa.shared.nlu.training_data.formats.rasa_yaml import RasaYAMLWriter training_data = load_data(in_path, language) RasaYAMLWriter().dump(out_path, training_data) else: from rasa.nlu.convert import convert_training_data convert_training_data( in_path, out_path, Path(out_path).suffix.replace('.', ''), language, )
def test_training_data_conversion(tmpdir, data_file, gold_standard_file, output_format, language): out_path = tmpdir.join("rasa_nlu_data.json") convert_training_data(data_file, out_path.strpath, output_format, language) td = load_data(out_path.strpath, language) assert td.entity_examples != [] assert td.intent_examples != [] gold_standard = load_data(gold_standard_file, language) cmp_message_list(td.entity_examples, gold_standard.entity_examples) cmp_message_list(td.intent_examples, gold_standard.intent_examples) assert td.entity_synonyms == gold_standard.entity_synonyms # converting the converted file back to original # file format and performing the same tests rto_path = tmpdir.join("data_in_original_format.txt") convert_training_data(out_path.strpath, rto_path.strpath, "json", language) rto = load_data(rto_path.strpath, language) cmp_message_list(gold_standard.entity_examples, rto.entity_examples) cmp_message_list(gold_standard.intent_examples, rto.intent_examples) assert gold_standard.entity_synonyms == rto.entity_synonyms
from rasa.nlu.convert import convert_training_data convert_training_data(data_file="covid19-dataset.json", out_file="training.md", output_format="md", language="")
from rasa.nlu.convert import convert_training_data convert_training_data(data_file="./input.json", out_file="./nlu.md", output_format="md", language="")
# from rasa_nlu.converters import load_data # input_training_file = 'testing.json' # output_md_file = 'training.md' # with open(output_md_file,'w') as f: # f.write(load_data(input_training_file).as_markdown()) import json with open("testing.json", "r") as data: rasa_data = json.load(data) from rasa.nlu.convert import convert_training_data convert_training_data(data_file="testing.json", out_file="out_file.md", output_format="md", language="")
#!/usr/local/bin/python from rasa.nlu.convert import convert_training_data from subprocess import call, run import os cmd = ['npx chatito --format rasa data/'] p = call(cmd, shell=True, cwd=os.path.dirname(__file__)) convert_training_data(data_file="rasa_dataset_training.json", out_file="nlu.md", output_format="md", language="")
from rasa.nlu.convert import convert_training_data input_training_file = '/home/emilio/wilson/data/nlu.json' output_md_file = '/home/emilio/wilson/data/nlu.md' convert_training_data(data_file=input_training_file, out_file=output_md_file, output_format="md", language="")
import sys from rasa.nlu.convert import convert_training_data convert_training_data(data_file=sys.argv[1], out_file=sys.argv[2], output_format="md", language="")