def test_split_data_by_label(model_data: RasaModelData): split_model_data = model_data._split_by_label_ids( model_data.data, model_data.get("intent", "ids")[0], np.array([0, 1])) assert len(split_model_data) == 2 for s in split_model_data: assert len(set(s.get("intent", "ids")[0])) == 1
def test_session_data_for_ids(model_data: RasaModelData): filtered_data = model_data._data_for_ids(model_data.data, np.array([0, 1])) for values in filtered_data.values(): for data in values.values(): for v in data: assert v.shape[0] == 2 key = model_data.keys()[0] sub_key = model_data.keys(key)[0] assert np.all( np.array(filtered_data[key][sub_key][0][0]) == np.array( model_data.get(key, sub_key)[0][0])) assert np.all( np.array(filtered_data[key][sub_key][0][1]) == np.array( model_data.get(key, sub_key)[0][1]))
def test_not_balance_model_data(model_data: RasaModelData): test_model_data = RasaModelData(label_key="entities", label_sub_key="tag_ids", data=model_data.data) data = test_model_data._balanced_data(test_model_data.data, 2, False) assert np.all(data["entities"]["tag_ids"] == test_model_data.get( "entities", "tag_ids"))
def test_session_data_for_ids(model_data: RasaModelData): filtered_data = model_data._data_for_ids(model_data.data, np.array([0, 1])) for values in filtered_data.values(): for v in values: assert v.shape[0] == 2 k = list(model_data.keys())[0] assert np.all(np.array(filtered_data[k][0][0]) == np.array(model_data.get(k)[0][0])) assert np.all(np.array(filtered_data[k][0][1]) == np.array(model_data.get(k)[0][1]))
def test_split_data_by_label(model_data: RasaModelData): split_model_data = model_data._split_by_label_ids( model_data.data, model_data.get("label", "ids")[0], np.array([0, 1]) ) assert len(split_model_data) == 2 for s in split_model_data: assert len(set(s.get("label", "ids")[0])) == 1 for key, attribute_data in split_model_data[0].items(): for sub_key, features in attribute_data.items(): assert len(features) == len(model_data.data[key][sub_key]) assert len(features[0]) == 2
def _check_enough_labels(model_data: RasaModelData) -> bool: return len(np.unique(model_data.get(LABEL_IDS))) >= 2
def test_not_balance_model_data(model_data: RasaModelData): test_model_data = RasaModelData(label_key="tag_ids", data=model_data.data) data = test_model_data._balanced_data(test_model_data.data, 2, False) assert np.all(data.get("tag_ids") == test_model_data.get("tag_ids"))