def split_nlu_data(args: argparse.Namespace) -> None: from rasa.nlu.training_data.loading import load_data from rasa.nlu.training_data.util import get_file_format data_path = rasa.cli.utils.get_validated_path(args.nlu, "nlu", DEFAULT_DATA_PATH) data_path = data.get_nlu_directory(data_path) nlu_data = load_data(data_path) fformat = get_file_format(data_path) train, test = nlu_data.train_test_split(args.training_fraction, args.random_seed) train.persist(args.out, filename=f"training_data.{fformat}") test.persist(args.out, filename=f"test_data.{fformat}")
def split_nlu_data(args): from rasa.nlu.training_data.loading import load_data from rasa.nlu.training_data.util import get_file_format data_path = get_validated_path(args.nlu, "nlu", DEFAULT_DATA_PATH) data_path = data.get_nlu_directory(data_path) nlu_data = load_data(data_path) fformat = get_file_format(data_path) train, test = nlu_data.train_test_split(args.training_fraction) train.persist(args.out, filename=f"training_data.{fformat}") test.persist(args.out, filename=f"test_data.{fformat}")
def test_get_file_format(): fformat = get_file_format("data/examples/luis/demo-restaurants_v5.json") assert fformat == "json" fformat = get_file_format("data/examples") assert fformat == "json" fformat = get_file_format("examples/restaurantbot/data/nlu.md") assert fformat == "md" with pytest.raises(AttributeError): get_file_format("path-does-not-exists") with pytest.raises(AttributeError): get_file_format(None)
def split_nlu_data(args: argparse.Namespace) -> None: """Load data from a file path and split the NLU data into test and train examples. Args: args: Commandline arguments """ from rasa.nlu.training_data.loading import load_data from rasa.nlu.training_data.util import get_file_format data_path = rasa.cli.utils.get_validated_path(args.nlu, "nlu", DEFAULT_DATA_PATH) data_path = data.get_nlu_directory(data_path) nlu_data = load_data(data_path) fformat = get_file_format(data_path) train, test = nlu_data.train_test_split(args.training_fraction, args.random_seed) train.persist(args.out, filename=f"training_data.{fformat}") test.persist(args.out, filename=f"test_data.{fformat}")
def test_get_non_existing_file_format_raises(data_file: Text): with pytest.raises(AttributeError): get_file_format(data_file)
def test_get_supported_file_format(data_file: Text, expected_format: Text): fformat = get_file_format(data_file) assert fformat == expected_format