Esempio n. 1
0
def test_create_train_data_with_history(default_domain):
    featurizer = BinaryFeaturizer()
    X, y = extract_training_data_from_file(
        "data/dsl_stories/stories_defaultdomain.md",
        augmentation_factor=0,
        domain=default_domain,
        featurizer=featurizer,
        max_history=4)
    reference = np.array([
        [[0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 1, 0, 0, 0, 0],
         [1, 0, 0, 0, 0, 0, 0, 1, 0], [0, 1, 0, 0, 1, 0, 0, 0, 0]],
        [[0, 1, 0, 0, 1, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1, 0, 0],
         [0, 0, 1, 0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 1]],
        [[1, 0, 0, 0, 0, 0, 0, 1, 0], [0, 1, 0, 0, 1, 0, 0, 0, 0],
         [0, 1, 0, 0, 0, 0, 1, 0, 0], [0, 0, 1, 0, 1, 0, 0, 0, 0]],
        [[1, 0, 0, 0, 1, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 1, 0],
         [0, 1, 0, 0, 1, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1, 0, 0]],
        [[-1, -1, -1, -1, -1, -1, -1, -1, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0],
         [1, 0, 0, 0, 1, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 1, 0]],
        [[-1, -1, -1, -1, -1, -1, -1, -1, -1],
         [-1, -1, -1, -1, -1, -1, -1, -1, -1],
         [-1, -1, -1, -1, -1, -1, -1, -1, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0]],
        [[-1, -1, -1, -1, -1, -1, -1, -1, -1],
         [-1, -1, -1, -1, -1, -1, -1, -1, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0],
         [1, 0, 0, 0, 1, 0, 0, 0, 0]],
    ])
    assert X.shape == reference.shape
    assert np.array_equal(X, reference)
Esempio n. 2
0
def train_data(max_history, domain):
    return extract_training_data_from_file(
        "data/dsl_stories/stories_defaultdomain.md",
        domain=domain,
        max_history=max_history,
        remove_duplicates=True,
        featurizer=BinaryFeaturizer())
Esempio n. 3
0
def test_create_train_data_no_history(default_domain):
    featurizer = BinaryFeaturizer()
    X, y = extract_training_data_from_file(
            DEFAULT_STORIES_FILE,
            augmentation_factor=0,
            domain=default_domain,
            featurizer=featurizer,
            max_history=1
    )
    assert X.shape == (11, 1, 10)
    decoded = [featurizer.decode(X[i, :, :], default_domain.input_features)
               for i in range(0, 11)]
    assert decoded == [
        [None],
        [[('intent_goodbye', 1), ('prev_utter_goodbye', 1)]],
        [[('intent_goodbye', 1), ('prev_action_listen', 1)]],
        [[('intent_default', 1), ('prev_utter_default', 1)]],
        [[('intent_default', 1), ('prev_action_listen', 1)]],
        [[('intent_default', 1), ('slot_name_0', 1),
          ('prev_utter_default', 1)]],
        [[('intent_default', 1), ('slot_name_0', 1),
          ('prev_action_listen', 1)]],
        [[('intent_greet', 1), ('prev_utter_greet', 1)]],
        [[('intent_greet', 1), ('prev_action_listen', 1)]],
        [[('intent_greet', 1), ('entity_name', 1), ('slot_name_0', 1),
          ('prev_utter_greet', 1)]],
        [[('intent_greet', 1), ('entity_name', 1), ('slot_name_0', 1),
          ('prev_action_listen', 1)]]]
Esempio n. 4
0
    def _prepare_training_data(self,
                               filename,
                               max_history,
                               augmentation_factor,
                               max_training_samples=None,
                               max_number_of_trackers=2000,
                               remove_duplicates=True):
        """Reads training data from file and prepares it for the training."""

        from rasa_core.training_utils import extract_training_data_from_file

        if filename:
            X, y = extract_training_data_from_file(
                filename,
                augmentation_factor=augmentation_factor,
                max_history=max_history,
                remove_duplicates=remove_duplicates,
                domain=self.domain,
                featurizer=self.featurizer,
                interpreter=RegexInterpreter(),
                max_number_of_trackers=max_number_of_trackers)
            if max_training_samples is not None:
                X = X[:max_training_samples, :]
                y = y[:max_training_samples]
        else:
            X = np.zeros((0, self.domain.num_features))
            y = np.zeros(self.domain.num_actions)
        return X, y
def test_create_train_data_with_history(default_domain):
    featurizer = BinaryFeaturizer()
    X, y = extract_training_data_from_file(
            "data/dsl_stories/stories_defaultdomain.md",
            augmentation_factor=0,
            domain=default_domain,
            featurizer=featurizer,
            max_history=4
    )
    assert X.shape == (11, 4, 10)
    decoded = [featurizer.decode(X[i, :, :], default_domain.input_features)
               for i in range(0, 11)]
    assert decoded == [
        [
            None,
            [(u'intent_greet', 1), (u'prev_action_listen', 1)],
            [(u'intent_greet', 1), (u'prev_utter_greet', 1)],
            [(u'intent_default', 1), (u'prev_action_listen', 1)]],
        [
            None,
            [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1),
             (u'prev_action_listen', 1)],
            [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1),
             (u'prev_utter_greet', 1)],
            [(u'intent_default', 1), (u'slot_name_0', 1),
             (u'prev_action_listen', 1)]],
        [
            [(u'intent_default', 1), (u'prev_action_listen', 1)],
            [(u'intent_default', 1), (u'prev_utter_default', 1)],
            [(u'intent_goodbye', 1), (u'prev_action_listen', 1)],
            [(u'intent_goodbye', 1), (u'prev_utter_goodbye', 1)]],
        [
            [(u'intent_greet', 1), (u'prev_utter_greet', 1)],
            [(u'intent_default', 1), (u'prev_action_listen', 1)],
            [(u'intent_default', 1), (u'prev_utter_default', 1)],
            [(u'intent_goodbye', 1), (u'prev_action_listen', 1)]],
        [
            [(u'intent_greet', 1), (u'prev_action_listen', 1)],
            [(u'intent_greet', 1), (u'prev_utter_greet', 1)],
            [(u'intent_default', 1), (u'prev_action_listen', 1)],
            [(u'intent_default', 1), (u'prev_utter_default', 1)]],
        [
            [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1),
             (u'prev_action_listen', 1)],
            [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1),
             (u'prev_utter_greet', 1)],
            [(u'intent_default', 1), (u'slot_name_0', 1),
             (u'prev_action_listen', 1)],
            [(u'intent_default', 1), (u'slot_name_0', 1),
             (u'prev_utter_default', 1)]],
        [
            None,
            None,
            [(u'intent_greet', 1), (u'prev_action_listen', 1)],
            [(u'intent_greet', 1), (u'prev_utter_greet', 1)]],
        [
            None,
            None,
            [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1),
             (u'prev_action_listen', 1)],
            [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1),
             (u'prev_utter_greet', 1)]],
        [
            None, None, None, None],
        [
            None, None, None,
            [(u'intent_greet', 1), (u'prev_action_listen', 1)]],
        [
            None, None, None,
            [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1),
             (u'prev_action_listen', 1)]]]
Esempio n. 6
0
def train_data(max_history, domain):
    return extract_training_data_from_file(
            DEFAULT_STORIES_FILE,
            domain=domain, max_history=max_history, remove_duplicates=True,
            featurizer=BinaryFeaturizer())