def test_create_train_data_with_history(default_domain): featurizer = BinaryFeaturizer() X, y = extract_training_data_from_file( "data/dsl_stories/stories_defaultdomain.md", augmentation_factor=0, domain=default_domain, featurizer=featurizer, max_history=4) reference = np.array([ [[0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 1, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 1, 0], [0, 1, 0, 0, 1, 0, 0, 0, 0]], [[0, 1, 0, 0, 1, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1, 0, 0], [0, 0, 1, 0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 1]], [[1, 0, 0, 0, 0, 0, 0, 1, 0], [0, 1, 0, 0, 1, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1, 0, 0], [0, 0, 1, 0, 1, 0, 0, 0, 0]], [[1, 0, 0, 0, 1, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 1, 0], [0, 1, 0, 0, 1, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1, 0, 0]], [[-1, -1, -1, -1, -1, -1, -1, -1, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 1, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 1, 0]], [[-1, -1, -1, -1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1, -1, -1, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0]], [[-1, -1, -1, -1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1, -1, -1, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 1, 0, 0, 0, 0]], ]) assert X.shape == reference.shape assert np.array_equal(X, reference)
def train_data(max_history, domain): return extract_training_data_from_file( "data/dsl_stories/stories_defaultdomain.md", domain=domain, max_history=max_history, remove_duplicates=True, featurizer=BinaryFeaturizer())
def test_create_train_data_no_history(default_domain): featurizer = BinaryFeaturizer() X, y = extract_training_data_from_file( DEFAULT_STORIES_FILE, augmentation_factor=0, domain=default_domain, featurizer=featurizer, max_history=1 ) assert X.shape == (11, 1, 10) decoded = [featurizer.decode(X[i, :, :], default_domain.input_features) for i in range(0, 11)] assert decoded == [ [None], [[('intent_goodbye', 1), ('prev_utter_goodbye', 1)]], [[('intent_goodbye', 1), ('prev_action_listen', 1)]], [[('intent_default', 1), ('prev_utter_default', 1)]], [[('intent_default', 1), ('prev_action_listen', 1)]], [[('intent_default', 1), ('slot_name_0', 1), ('prev_utter_default', 1)]], [[('intent_default', 1), ('slot_name_0', 1), ('prev_action_listen', 1)]], [[('intent_greet', 1), ('prev_utter_greet', 1)]], [[('intent_greet', 1), ('prev_action_listen', 1)]], [[('intent_greet', 1), ('entity_name', 1), ('slot_name_0', 1), ('prev_utter_greet', 1)]], [[('intent_greet', 1), ('entity_name', 1), ('slot_name_0', 1), ('prev_action_listen', 1)]]]
def _prepare_training_data(self, filename, max_history, augmentation_factor, max_training_samples=None, max_number_of_trackers=2000, remove_duplicates=True): """Reads training data from file and prepares it for the training.""" from rasa_core.training_utils import extract_training_data_from_file if filename: X, y = extract_training_data_from_file( filename, augmentation_factor=augmentation_factor, max_history=max_history, remove_duplicates=remove_duplicates, domain=self.domain, featurizer=self.featurizer, interpreter=RegexInterpreter(), max_number_of_trackers=max_number_of_trackers) if max_training_samples is not None: X = X[:max_training_samples, :] y = y[:max_training_samples] else: X = np.zeros((0, self.domain.num_features)) y = np.zeros(self.domain.num_actions) return X, y
def test_create_train_data_with_history(default_domain): featurizer = BinaryFeaturizer() X, y = extract_training_data_from_file( "data/dsl_stories/stories_defaultdomain.md", augmentation_factor=0, domain=default_domain, featurizer=featurizer, max_history=4 ) assert X.shape == (11, 4, 10) decoded = [featurizer.decode(X[i, :, :], default_domain.input_features) for i in range(0, 11)] assert decoded == [ [ None, [(u'intent_greet', 1), (u'prev_action_listen', 1)], [(u'intent_greet', 1), (u'prev_utter_greet', 1)], [(u'intent_default', 1), (u'prev_action_listen', 1)]], [ None, [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1), (u'prev_action_listen', 1)], [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1), (u'prev_utter_greet', 1)], [(u'intent_default', 1), (u'slot_name_0', 1), (u'prev_action_listen', 1)]], [ [(u'intent_default', 1), (u'prev_action_listen', 1)], [(u'intent_default', 1), (u'prev_utter_default', 1)], [(u'intent_goodbye', 1), (u'prev_action_listen', 1)], [(u'intent_goodbye', 1), (u'prev_utter_goodbye', 1)]], [ [(u'intent_greet', 1), (u'prev_utter_greet', 1)], [(u'intent_default', 1), (u'prev_action_listen', 1)], [(u'intent_default', 1), (u'prev_utter_default', 1)], [(u'intent_goodbye', 1), (u'prev_action_listen', 1)]], [ [(u'intent_greet', 1), (u'prev_action_listen', 1)], [(u'intent_greet', 1), (u'prev_utter_greet', 1)], [(u'intent_default', 1), (u'prev_action_listen', 1)], [(u'intent_default', 1), (u'prev_utter_default', 1)]], [ [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1), (u'prev_action_listen', 1)], [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1), (u'prev_utter_greet', 1)], [(u'intent_default', 1), (u'slot_name_0', 1), (u'prev_action_listen', 1)], [(u'intent_default', 1), (u'slot_name_0', 1), (u'prev_utter_default', 1)]], [ None, None, [(u'intent_greet', 1), (u'prev_action_listen', 1)], [(u'intent_greet', 1), (u'prev_utter_greet', 1)]], [ None, None, [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1), (u'prev_action_listen', 1)], [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1), (u'prev_utter_greet', 1)]], [ None, None, None, None], [ None, None, None, [(u'intent_greet', 1), (u'prev_action_listen', 1)]], [ None, None, None, [(u'intent_greet', 1), (u'entity_name', 1), (u'slot_name_0', 1), (u'prev_action_listen', 1)]]]
def train_data(max_history, domain): return extract_training_data_from_file( DEFAULT_STORIES_FILE, domain=domain, max_history=max_history, remove_duplicates=True, featurizer=BinaryFeaturizer())