def test_load_multi_file_training_data(default_domain): # the stories file in `data/test_multifile_stories` is the same as in # `data/test_stories/stories.md`, but split across multiple files featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=2) trackers = training.load_data("data/test_stories/stories.md", default_domain, augmentation_factor=0) (tr_as_sts, tr_as_acts) = featurizer.training_states_and_actions( trackers, default_domain) hashed = [] for sts, acts in zip(tr_as_sts, tr_as_acts): hashed.append(json.dumps(sts + acts, sort_keys=True)) hashed = sorted(hashed, reverse=True) data = featurizer.featurize_trackers(trackers, default_domain) featurizer_mul = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=2) trackers_mul = training.load_data("data/test_multifile_stories", default_domain, augmentation_factor=0) (tr_as_sts_mul, tr_as_acts_mul) = featurizer.training_states_and_actions( trackers_mul, default_domain) hashed_mul = [] for sts_mul, acts_mul in zip(tr_as_sts_mul, tr_as_acts_mul): hashed_mul.append(json.dumps(sts_mul + acts_mul, sort_keys=True)) hashed_mul = sorted(hashed_mul, reverse=True) data_mul = featurizer_mul.featurize_trackers(trackers_mul, default_domain) assert hashed == hashed_mul assert np.all(data.X.sort(axis=0) == data_mul.X.sort(axis=0)) assert np.all(data.y.sort(axis=0) == data_mul.y.sort(axis=0))
def test_load_multi_file_training_data(default_domain): # the stories file in `data/test_multifile_stories` is the same as in # `data/test_stories/stories.md`, but split across multiple files featurizer = MaxHistoryTrackerFeaturizer( BinarySingleStateFeaturizer(), max_history=2) trackers = training.load_data( "data/test_stories/stories.md", default_domain, augmentation_factor=0 ) (tr_as_sts, tr_as_acts) = featurizer.training_states_and_actions( trackers, default_domain) hashed = [] for sts, acts in zip(tr_as_sts, tr_as_acts): hashed.append(json.dumps(sts + acts, sort_keys=True)) hashed = sorted(hashed, reverse=True) data = featurizer.featurize_trackers(trackers, default_domain) featurizer_mul = MaxHistoryTrackerFeaturizer( BinarySingleStateFeaturizer(), max_history=2) trackers_mul = training.load_data( "data/test_multifile_stories", default_domain, augmentation_factor=0 ) (tr_as_sts_mul, tr_as_acts_mul) = featurizer.training_states_and_actions( trackers_mul, default_domain) hashed_mul = [] for sts_mul, acts_mul in zip(tr_as_sts_mul, tr_as_acts_mul): hashed_mul.append(json.dumps(sts_mul + acts_mul, sort_keys=True)) hashed_mul = sorted(hashed_mul, reverse=True) data_mul = featurizer_mul.featurize_trackers(trackers_mul, default_domain) assert hashed == hashed_mul assert np.all(data.X.sort(axis=0) == data_mul.X.sort(axis=0)) assert np.all(data.y.sort(axis=0) == data_mul.y.sort(axis=0))
def test_load_training_data_handles_hidden_files(tmpdir, default_domain): # create a hidden file open(os.path.join(tmpdir.strpath, ".hidden"), 'a').close() # create a normal file normal_file = os.path.join(tmpdir.strpath, "normal_file") open(normal_file, 'a').close() featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=2) trackers = training.load_data(tmpdir.strpath, default_domain) data = featurizer.featurize_trackers(trackers, default_domain) assert len(data.X) == 0 assert len(data.y) == 0
def test_load_training_data_handles_hidden_files(tmpdir, default_domain): # create a hidden file open(os.path.join(tmpdir.strpath, ".hidden"), 'a').close() # create a normal file normal_file = os.path.join(tmpdir.strpath, "normal_file") open(normal_file, 'a').close() featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=2) trackers = training.load_data( tmpdir.strpath, default_domain ) data = featurizer.featurize_trackers(trackers, default_domain) assert len(data.X) == 0 assert len(data.y) == 0
def test_generate_training_data_with_cycles(tmpdir, default_domain): featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=4) training_trackers = training.load_data( "data/test_stories/stories_with_cycle.md", default_domain, augmentation_factor=0) training_data = featurizer.featurize_trackers(training_trackers, default_domain) y = training_data.y.argmax(axis=-1) # how many there are depends on the graph which is not created in a # deterministic way but should always be 3 or assert len(training_trackers) == 3 or len(training_trackers) == 4 # if we have 4 trackers, there is going to be one example more for label 2 num_twos = len(training_trackers) - 1 assert Counter(y) == {0: 6, 1: 2, 2: num_twos, 3: 1, 4: 3}
def test_generate_training_data_with_cycles(tmpdir, default_domain): featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=4) training_trackers = training.load_data( "data/test_stories/stories_with_cycle.md", default_domain, augmentation_factor=0 ) training_data = featurizer.featurize_trackers(training_trackers, default_domain) y = training_data.y.argmax(axis=-1) # how many there are depends on the graph which is not created in a # deterministic way but should always be 3 or assert len(training_trackers) == 3 or len(training_trackers) == 4 # if we have 4 trackers, there is going to be one example more for label 3 num_threes = len(training_trackers) - 1 assert Counter(y) == {0: 6, 1: 2, 3: num_threes, 4: 1, 5: 3}