async def test_load_multi_file_training_data(default_domain): # the stories file in `data/test_multifile_stories` is the same as in # `data/test_stories/stories.md`, but split across multiple files featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=2) trackers = await training.load_data("data/test_stories/stories.md", default_domain, augmentation_factor=0) (tr_as_sts, tr_as_acts) = featurizer.training_states_and_actions( trackers, default_domain) hashed = [] for sts, acts in zip(tr_as_sts, tr_as_acts): hashed.append(json.dumps(sts + acts, sort_keys=True)) hashed = sorted(hashed, reverse=True) data = featurizer.featurize_trackers(trackers, default_domain) featurizer_mul = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=2) trackers_mul = await training.load_data("data/test_multifile_stories", default_domain, augmentation_factor=0) (tr_as_sts_mul, tr_as_acts_mul) = featurizer.training_states_and_actions( trackers_mul, default_domain) hashed_mul = [] for sts_mul, acts_mul in zip(tr_as_sts_mul, tr_as_acts_mul): hashed_mul.append(json.dumps(sts_mul + acts_mul, sort_keys=True)) hashed_mul = sorted(hashed_mul, reverse=True) data_mul = featurizer_mul.featurize_trackers(trackers_mul, default_domain) assert hashed == hashed_mul assert np.all(data.X.sort(axis=0) == data_mul.X.sort(axis=0)) assert np.all(data.y.sort(axis=0) == data_mul.y.sort(axis=0))
async def test_create_train_data_with_history(default_domain): featurizer = MaxHistoryTrackerFeaturizer(max_history=4) training_trackers = await training.load_data( DEFAULT_STORIES_FILE, default_domain, augmentation_factor=0 ) assert len(training_trackers) == 3 (decoded, _) = featurizer.training_states_and_actions( training_trackers, default_domain ) # decoded needs to be sorted hashed = [] for states in decoded: hashed.append(json.dumps(states, sort_keys=True)) hashed = sorted(hashed) assert hashed == [ "[null, null, null, {}]", "[null, null, {}, " '{"entity_name": 1.0, "intent_greet": 1.0, ' '"prev_action_listen": 1.0, "slot_name_0": 1.0}]', "[null, null, {}, " '{"intent_greet": 1.0, "prev_action_listen": 1.0}]', "[null, {}, " '{"entity_name": 1.0, "intent_greet": 1.0, ' '"prev_action_listen": 1.0, "slot_name_0": 1.0}, ' '{"entity_name": 1.0, "intent_greet": 1.0, ' '"prev_utter_greet": 1.0, "slot_name_0": 1.0}]', "[null, {}, " '{"intent_greet": 1.0, "prev_action_listen": 1.0}, ' '{"intent_greet": 1.0, "prev_utter_greet": 1.0}]', '[{"entity_name": 1.0, "intent_greet": 1.0, ' '"prev_action_listen": 1.0, "slot_name_0": 1.0}, ' '{"entity_name": 1.0, "intent_greet": 1.0, ' '"prev_utter_greet": 1.0, "slot_name_0": 1.0}, ' '{"intent_default": 1.0, ' '"prev_action_listen": 1.0, "slot_name_0": 1.0}, ' '{"intent_default": 1.0, ' '"prev_utter_default": 1.0, "slot_name_0": 1.0}]', '[{"intent_default": 1.0, "prev_action_listen": 1.0}, ' '{"intent_default": 1.0, "prev_utter_default": 1.0}, ' '{"intent_goodbye": 1.0, "prev_action_listen": 1.0}, ' '{"intent_goodbye": 1.0, "prev_utter_goodbye": 1.0}]', '[{"intent_greet": 1.0, "prev_action_listen": 1.0}, ' '{"intent_greet": 1.0, "prev_utter_greet": 1.0}, ' '{"intent_default": 1.0, "prev_action_listen": 1.0}, ' '{"intent_default": 1.0, "prev_utter_default": 1.0}]', '[{"intent_greet": 1.0, "prev_utter_greet": 1.0}, ' '{"intent_default": 1.0, "prev_action_listen": 1.0}, ' '{"intent_default": 1.0, "prev_utter_default": 1.0}, ' '{"intent_goodbye": 1.0, "prev_action_listen": 1.0}]', '[{}, {"entity_name": 1.0, "intent_greet": 1.0, ' '"prev_action_listen": 1.0, "slot_name_0": 1.0}, ' '{"entity_name": 1.0, "intent_greet": 1.0, ' '"prev_utter_greet": 1.0, "slot_name_0": 1.0}, ' '{"intent_default": 1.0, ' '"prev_action_listen": 1.0, "slot_name_0": 1.0}]', '[{}, {"intent_greet": 1.0, "prev_action_listen": 1.0}, ' '{"intent_greet": 1.0, "prev_utter_greet": 1.0}, ' '{"intent_default": 1.0, "prev_action_listen": 1.0}]', ]
async def test_create_train_data_unfeaturized_entities(): domain_file = "data/test_domains/default_unfeaturized_entities.yml" stories_file = "data/test_stories/stories_unfeaturized_entities.md" domain = Domain.load(domain_file) featurizer = MaxHistoryTrackerFeaturizer(max_history=1) training_trackers = await training.load_data( stories_file, domain, augmentation_factor=0 ) assert len(training_trackers) == 2 (decoded, _) = featurizer.training_states_and_actions(training_trackers, domain) # decoded needs to be sorted hashed = [] for states in decoded: hashed.append(json.dumps(states, sort_keys=True)) hashed = sorted(hashed, reverse=True) assert hashed == [ "[{}]", '[{"intent_why": 1.0, "prev_utter_default": 1.0}]', '[{"intent_why": 1.0, "prev_action_listen": 1.0}]', '[{"intent_thank": 1.0, "prev_utter_default": 1.0}]', '[{"intent_thank": 1.0, "prev_action_listen": 1.0}]', '[{"intent_greet": 1.0, "prev_utter_greet": 1.0}]', '[{"intent_greet": 1.0, "prev_action_listen": 1.0}]', '[{"intent_goodbye": 1.0, "prev_utter_goodbye": 1.0}]', '[{"intent_goodbye": 1.0, "prev_action_listen": 1.0}]', '[{"entity_name": 1.0, "intent_greet": 1.0, "prev_utter_greet": 1.0}]', '[{"entity_name": 1.0, "intent_greet": 1.0, "prev_action_listen": 1.0}]', '[{"entity_name": 1.0, "entity_other": 1.0, "intent_default": 1.0, "prev_utter_default": 1.0}]', '[{"entity_name": 1.0, "entity_other": 1.0, "intent_default": 1.0, "prev_action_listen": 1.0}]', '[{"entity_name": 1.0, "entity_other": 1.0, "entity_unrelated_recognized_entity": 1.0, "intent_ask": 1.0, "prev_utter_default": 1.0}]', '[{"entity_name": 1.0, "entity_other": 1.0, "entity_unrelated_recognized_entity": 1.0, "intent_ask": 1.0, "prev_action_listen": 1.0}]', ]
async def test_MaxHistoryTrackerFeaturizer(): # viz_domain(default_domain) default_domain = Domain.load("{}/domain_with_slots.yml".format(prj_dir)) stories_file = "{}/data/stories.md".format(prj_dir) trackers = await training.load_data( stories_file, default_domain, augmentation_factor=0, debug_plots=True ) viz_trackers(trackers) featurizer = MaxHistoryTrackerFeaturizer(max_history=5) (decoded, actions) = featurizer.training_states_and_actions(trackers, default_domain)