예제 #1
0
async def test_load_training_data_handles_hidden_files(tmpdir, default_domain):
    # create a hidden file
    Path(tmpdir / ".hidden").touch()
    # create a normal file
    Path(tmpdir / "normal_file").touch()

    featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                             max_history=2)
    trackers = await training.load_data(tmpdir.strpath, default_domain)
    data = featurizer.featurize_trackers(trackers, default_domain)

    assert len(data.X) == 0
    assert len(data.y) == 0
예제 #2
0
async def test_FullDialogueTrackerFeaturizer():
    # viz_domain(default_domain)
    default_domain = Domain.load("{}/data/domain_with_slots.yml".format(prj_dir))
    stories_file = "{}/data/stories.md".format(prj_dir)

    trackers = await training.load_data(
        stories_file, default_domain, augmentation_factor=0, debug_plots=False
    )
    # viz_trackers(trackers)
    featurizer = FullDialogueTrackerFeaturizer(state_featurizer=BinarySingleStateFeaturizer())
    # print_title("START TRAINING STATES")
    (trackers_as_states, trackers_as_actions) = featurizer.training_states_and_actions(trackers, default_domain)
    print_data_training(trackers_as_states, trackers_as_actions)
예제 #3
0
async def test_load_multi_file_training_data(default_domain):
    # the stories file in `data/test_multifile_stories` is the same as in
    # `data/test_stories/stories.md`, but split across multiple files
    featurizer = MaxHistoryTrackerFeaturizer(
        BinarySingleStateFeaturizer(), max_history=2
    )
    trackers = await training.load_data(
        "data/test_stories/stories.md", default_domain, augmentation_factor=0
    )
    (tr_as_sts, tr_as_acts) = featurizer.training_states_and_actions(
        trackers, default_domain
    )
    hashed = []
    for sts, acts in zip(tr_as_sts, tr_as_acts):
        hashed.append(json.dumps(sts + acts, sort_keys=True))
    hashed = sorted(hashed, reverse=True)

    data = featurizer.featurize_trackers(trackers, default_domain)

    featurizer_mul = MaxHistoryTrackerFeaturizer(
        BinarySingleStateFeaturizer(), max_history=2
    )
    trackers_mul = await training.load_data(
        "data/test_multifile_stories", default_domain, augmentation_factor=0
    )
    (tr_as_sts_mul, tr_as_acts_mul) = featurizer.training_states_and_actions(
        trackers_mul, default_domain
    )
    hashed_mul = []
    for sts_mul, acts_mul in zip(tr_as_sts_mul, tr_as_acts_mul):
        hashed_mul.append(json.dumps(sts_mul + acts_mul, sort_keys=True))
    hashed_mul = sorted(hashed_mul, reverse=True)

    data_mul = featurizer_mul.featurize_trackers(trackers_mul, default_domain)

    assert hashed == hashed_mul

    assert np.all(data.X.sort(axis=0) == data_mul.X.sort(axis=0))
    assert np.all(data.y.sort(axis=0) == data_mul.y.sort(axis=0))
예제 #4
0
async def test_load_training_data_handles_hidden_files(tmpdir, default_domain):
    # create a hidden file

    with open(os.path.join(tmpdir.strpath, ".hidden"), "a") as f:
        f.close()
    # create a normal file
    normal_file = os.path.join(tmpdir.strpath, "normal_file")
    with open(normal_file, "a") as f:
        f.close()

    featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                             max_history=2)
    trackers = await training.load_data(tmpdir.strpath, default_domain)
    data = featurizer.featurize_trackers(trackers, default_domain)

    assert len(data.X) == 0
    assert len(data.y) == 0
def test_BinarySingleStateFeaturizer():
    f = BinarySingleStateFeaturizer()
    f.input_state_map = {"a": 0, "b": 3, "c": 2, "d": 1}
    # "a"  "d"  "c"   "b"
    f.num_features = len(f.input_state_map)
    encoded = f.encode({"a": 1.0, "b": 1.0, "c": 0.0, "e": 1.0})
    assert is_numpy(encoded)
    assert list(encoded) == [1, 0, 0, 1]

    encoded = f.encode({"a": 1.0, "b": 0.1, "c": 0.2, "e": 1.0})
    assert is_numpy(encoded)
    assert list(encoded) == [1.0, 0.0, 0.2, 0.1]
예제 #6
0
async def test_generate_training_data_with_cycles(tmpdir, default_domain):
    featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                             max_history=4)
    training_trackers = await training.load_data(
        "data/test_stories/stories_with_cycle.md",
        default_domain,
        augmentation_factor=0)
    training_data = featurizer.featurize_trackers(training_trackers,
                                                  default_domain)
    y = training_data.y.argmax(axis=-1)

    # how many there are depends on the graph which is not created in a
    # deterministic way but should always be 3 or 4
    assert len(training_trackers) == 3 or len(training_trackers) == 4

    # if we have 4 trackers, there is going to be one example more for label 4
    num_threes = len(training_trackers) - 1
    # if new default actions are added the keys of the actions will be changed
    assert Counter(y) == {0: 6, 1: 2, 8: num_threes, 9: 1, 10: 3}
예제 #7
0
 def _standard_featurizer():
     return MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer())
예제 #8
0
def test_binary_featurizer_handles_on_non_existing_features():
    f = BinarySingleStateFeaturizer()
    f.input_state_map = {"a": 0, "b": 3, "c": 2, "d": 1}
    f.num_features = len(f.input_state_map)
    encoded = f.encode({"a": 1.0, "b": 1.0, "c": 0.0, "e": 1.0})
    assert (encoded == np.array([1, 0, 0, 1])).all()
예제 #9
0
def test_binary_featurizer_handles_probabilistic_intents():
    f = BinarySingleStateFeaturizer()
    f.input_state_map = {"intent_a": 0, "b": 3, "intent_c": 2, "d": 1}
    f.num_features = len(f.input_state_map)
    encoded = f.encode({"intent_a": 0.5, "b": 0.2, "intent_c": 1.0})
    assert (encoded == np.array([0.5, 0, 1.0, 0.2])).all()
예제 #10
0
def test_binary_featurizer_uses_correct_dtype_float():
    f = BinarySingleStateFeaturizer()
    f.input_state_map = {"a": 0, "b": 3, "c": 2, "d": 1}
    f.num_features = len(f.input_state_map)
    encoded = f.encode({"a": 1.0, "b": 0.2, "c": 0.0})
    assert encoded.dtype == np.float64
예제 #11
0
 def featurizer(self):
     featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                              max_history=self.max_history)
     return featurizer
 def _standard_featurizer(max_history=None) -> MaxHistoryTrackerFeaturizer:
     return MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(),
                                        max_history=max_history)