async def test_create_train_data_no_history(default_domain): featurizer = MaxHistoryTrackerFeaturizer(max_history=1) training_trackers = await training.load_data(DEFAULT_STORIES_FILE, default_domain, augmentation_factor=0) assert len(training_trackers) == 3 (decoded, _) = featurizer.training_states_and_actions(training_trackers, default_domain) # decoded needs to be sorted hashed = [] for states in decoded: hashed.append(json.dumps(states, sort_keys=True)) hashed = sorted(hashed, reverse=True) assert hashed == [ '[{}]', '[{"intent_greet": 1.0, "prev_utter_greet": 1.0}]', '[{"intent_greet": 1.0, "prev_action_listen": 1.0}]', '[{"intent_goodbye": 1.0, "prev_utter_goodbye": 1.0}]', '[{"intent_goodbye": 1.0, "prev_action_listen": 1.0}]', '[{"intent_default": 1.0, "prev_utter_default": 1.0}]', '[{"intent_default": 1.0, "prev_utter_default": 1.0, ' '"slot_name_0": 1.0}]', '[{"intent_default": 1.0, "prev_action_listen": 1.0}]', '[{"intent_default": 1.0, "prev_action_listen": 1.0, ' '"slot_name_0": 1.0}]', '[{"entity_name": 1.0, "intent_greet": 1.0, ' '"prev_utter_greet": 1.0, "slot_name_0": 1.0}]', '[{"entity_name": 1.0, "intent_greet": 1.0, ' '"prev_action_listen": 1.0, "slot_name_0": 1.0}]' ]
def test_create_train_data_no_history(default_domain): featurizer = MaxHistoryTrackerFeaturizer(max_history=1) training_trackers = training.load_data( DEFAULT_STORIES_FILE, default_domain, augmentation_factor=0 ) assert len(training_trackers) == 3 (decoded, _) = featurizer.training_states_and_actions( training_trackers, default_domain) # decoded needs to be sorted hashed = [] for states in decoded: hashed.append(json.dumps(states, sort_keys=True)) hashed = sorted(hashed, reverse=True) assert hashed == [ '[{}]', '[{"intent_greet": 1.0, "prev_utter_greet": 1.0}]', '[{"intent_greet": 1.0, "prev_action_listen": 1.0}]', '[{"intent_goodbye": 1.0, "prev_utter_goodbye": 1.0}]', '[{"intent_goodbye": 1.0, "prev_action_listen": 1.0}]', '[{"intent_default": 1.0, "prev_utter_default": 1.0}]', '[{"intent_default": 1.0, "prev_utter_default": 1.0, ' '"slot_name_0": 1.0}]', '[{"intent_default": 1.0, "prev_action_listen": 1.0}]', '[{"intent_default": 1.0, "prev_action_listen": 1.0, ' '"slot_name_0": 1.0}]', '[{"entity_name": 1.0, "intent_greet": 1.0, ' '"prev_utter_greet": 1.0, "slot_name_0": 1.0}]', '[{"entity_name": 1.0, "intent_greet": 1.0, ' '"prev_action_listen": 1.0, "slot_name_0": 1.0}]' ]
def test_load_multi_file_training_data(default_domain): # the stories file in `data/test_multifile_stories` is the same as in # `data/test_stories/stories.md`, but split across multiple files featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=2) trackers = training.load_data("data/test_stories/stories.md", default_domain, augmentation_factor=0) (tr_as_sts, tr_as_acts) = featurizer.training_states_and_actions( trackers, default_domain) hashed = [] for sts, acts in zip(tr_as_sts, tr_as_acts): hashed.append(json.dumps(sts + acts, sort_keys=True)) hashed = sorted(hashed, reverse=True) data = featurizer.featurize_trackers(trackers, default_domain) featurizer_mul = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=2) trackers_mul = training.load_data("data/test_multifile_stories", default_domain, augmentation_factor=0) (tr_as_sts_mul, tr_as_acts_mul) = featurizer.training_states_and_actions( trackers_mul, default_domain) hashed_mul = [] for sts_mul, acts_mul in zip(tr_as_sts_mul, tr_as_acts_mul): hashed_mul.append(json.dumps(sts_mul + acts_mul, sort_keys=True)) hashed_mul = sorted(hashed_mul, reverse=True) data_mul = featurizer_mul.featurize_trackers(trackers_mul, default_domain) assert hashed == hashed_mul assert np.all(data.X.sort(axis=0) == data_mul.X.sort(axis=0)) assert np.all(data.y.sort(axis=0) == data_mul.y.sort(axis=0))
def test_load_training_data_handles_hidden_files(tmpdir, default_domain): # create a hidden file open(os.path.join(tmpdir.strpath, ".hidden"), 'a').close() # create a normal file normal_file = os.path.join(tmpdir.strpath, "normal_file") open(normal_file, 'a').close() featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=2) trackers = training.load_data(tmpdir.strpath, default_domain) data = featurizer.featurize_trackers(trackers, default_domain) assert len(data.X) == 0 assert len(data.y) == 0
def train_bot(): logging.basicConfig(level='INFO') training_data_file = './data/stories' model_path = './models/dialogue' fallback = FallbackPolicy(fallback_action_name="utter_not_understood", core_threshold=0.3, nlu_threshold=0.6) featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=5) agent = Agent('./data/domain.yml', policies=[ MemoizationPolicy(max_history=5), KerasPolicy(featurizer), fallback ]) training_data = agent.load_data(training_data_file) agent.train(training_data, augmentation_factor=50, epochs=500, batch_size=10, validation_split=0.2) agent.persist(model_path)
def train_dialogue(domain_file='restaurant_domain.yml', model_path='./models/dialogue', training_data_file='./data/stories.md'): featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=5) fallback = FallbackPolicy(fallback_action_name="action_default_fallback", core_threshold=0.3, nlu_threshold=0.3) agent = Agent(domain_file, policies=[ MemoizationPolicy(max_history=5), KerasPolicy(featurizer), fallback ]) agent.train( training_data_file, #max_history = 3, epochs=300, batch_size=50, validation_split=0.2, augmentation_factor=50) agent.persist(model_path) return agent
def train_bot(): training_data_file = './data/stories' model_path = './models/dialogue' domain_file = './data/domain.yml' # core_threshold: min confidence needed to accept an action predicted by Rasa Core # nlu_threshold: min confidence needed to accept an intent predicted by the interpreter (NLU) fallback = FallbackPolicy(fallback_action_name="action_not_understood", core_threshold=0.5, nlu_threshold=0.35) featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=3) agent = Agent(domain=domain_file, policies=[ MemoizationPolicy(max_history=2), KerasPolicy(featurizer), fallback ]) training_data = agent.load_data(training_data_file) agent.train(training_data, augmentation_factor=50, epochs=400, batch_size=50, validation_split=0.2) agent.persist(model_path)
def train_dialogue(self, domain_file, model_path, training_data_file): fallback = FallbackPolicy(fallback_action_name="utter_default",core_threshold=0.2, nlu_threshold=0.5) featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=10) self.agent = Agent(domain_file , policies=[MemoizationPolicy(max_history=10), KerasPolicy(epochs = 90, batch_size = 20, validation_split = 0.1), fallback]) data = self.agent.load_data(training_data_file) self.agent.train(data) self.agent.persist(model_path)
def train_core(domain_file="robot/config/domain.yml", model_path="robot/models/dialogue", training_data_file="robot/config/stories.md"): from rasa_core.featurizers import (MaxHistoryTrackerFeaturizer, BinarySingleStateFeaturizer) # fallback = FallbackPolicy(fallback_action_name="action_default_fallback", # core_threshold=0.9, # nlu_threshold=0.9) fallback = FallbackPolicy(fallback_action_name="action_default_custom", core_threshold=0.8, nlu_threshold=0.8) agent = Agent( domain_file, policies=[ MemoizationPolicy(max_history=5), KerasPolicy( MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=5)), fallback ]) training_data = agent.load_data(training_data_file) # 训练agent的策略policy agent.train(training_data, epochs=500) agent.persist(model_path) return agent
def _standard_featurizer(cls, max_history=None): max_history = max_history or cls.MAX_HISTORY_DEFAULT # Memoization policy always uses MaxHistoryTrackerFeaturizer # without state_featurizer return MaxHistoryTrackerFeaturizer(state_featurizer=None, max_history=max_history, use_intent_probabilities=False)
def run_bot_cli(input_channel, interpreter, domain_file="./data/student_info_domain.yml", training_data_file='./data/stories.md'): # Featureizer Generation featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=5) # Not really sure what is happening here agent = Agent(domain_file, policies=[MemoizationPolicy(max_history=5), KerasPolicy(featurizer)], interpreter=interpreter) # This is where our training data file is loaded in for training training_data = agent.load_data(training_data_file) # Training data is the training data object created in the above line # input_channel - How the trainer recieves its input # batch_size - How many times the model is updated per pass # epochs - Number of training passes # validation_split - Fraction of the training data to be used as validation data # augmentation_factor - How many of the dialogue stories are randomly glued together # the more stories you have the higher the augmentation factor you want agent.train_online(training_data, input_channel=input_channel, batch_size=35, epochs=400, max_training_samples=200, validation_split = 0.2, augmentation_factor = 20) return agent
def train_dialogue(domain_file = 'restaurant_domain.yml', model_path = './models/dialogue', training_data_file = './data/stories.md'): featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=5) agent = Agent(domain_file, policies = [MemoizationPolicy(max_history = 5), KerasPolicy(featurizer)]) data = agent.load_data(training_data_file, augmentation_factor = 50) agent.train(data, epochs = 500, batch_size = 30, validation_split = 0.2) # agent.train( # training_data_file, # #max_history = 3, # epochs = 300, # batch_size = 50, # validation_split = 0.2, # augmentation_factor = 50) agent.persist(model_path) return agent
def test_generate_training_data_with_cycles(tmpdir, default_domain): featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=4) training_trackers = training.load_data( "data/test_stories/stories_with_cycle.md", default_domain, augmentation_factor=0) training_data = featurizer.featurize_trackers(training_trackers, default_domain) y = training_data.y.argmax(axis=-1) # how many there are depends on the graph which is not created in a # deterministic way but should always be 3 or assert len(training_trackers) == 3 or len(training_trackers) == 4 # if we have 4 trackers, there is going to be one example more for label 2 num_twos = len(training_trackers) - 1 assert Counter(y) == {0: 6, 1: 2, 2: num_twos, 3: 1, 4: 3}
def test_load_training_data_handles_hidden_files(tmpdir, default_domain): # create a hidden file open(os.path.join(tmpdir.strpath, ".hidden"), 'a').close() # create a normal file normal_file = os.path.join(tmpdir.strpath, "normal_file") open(normal_file, 'a').close() featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=2) trackers = training.load_data( tmpdir.strpath, default_domain ) data = featurizer.featurize_trackers(trackers, default_domain) assert len(data.X) == 0 assert len(data.y) == 0
def test_generate_training_data_with_cycles(tmpdir, default_domain): featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=4) training_trackers = training.load_data( "data/test_stories/stories_with_cycle.md", default_domain, augmentation_factor=0 ) training_data = featurizer.featurize_trackers(training_trackers, default_domain) y = training_data.y.argmax(axis=-1) # how many there are depends on the graph which is not created in a # deterministic way but should always be 3 or assert len(training_trackers) == 3 or len(training_trackers) == 4 # if we have 4 trackers, there is going to be one example more for label 3 num_threes = len(training_trackers) - 1 assert Counter(y) == {0: 6, 1: 2, 3: num_threes, 4: 1, 5: 3}
def train_core(domain_file="config/domain.yml", model_path="models/dialogue", training_data_file="config/stories.md"): from rasa_core.featurizers import (MaxHistoryTrackerFeaturizer, BinarySingleStateFeaturizer) agent = Agent(domain_file, policies=[MemoizationPolicy(max_history=6), KerasPolicy(MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=6))]) training_data = agent.load_data(training_data_file) # 训练agent的策略policy agent.train(training_data, epochs=800) agent.persist(model_path) return agent
def graph(domain_file="robot/config/domain.yml", training_data_file="robot/config/stories.md"): from rasa_core.featurizers import (MaxHistoryTrackerFeaturizer, BinarySingleStateFeaturizer) agent = Agent(domain_file, policies=[ MemoizationPolicy(max_history=6), KerasPolicy( MaxHistoryTrackerFeaturizer( BinarySingleStateFeaturizer(), max_history=6)) ]) agent.visualize(training_data_file, output_file='graph.png', max_history=6)
def train_dialogue(domain_file, model_path, training_folder): agent = Agent(domain_file, policies=[ MemoizationPolicy(max_history=5), KerasPolicy(MaxHistoryTrackerFeaturizer( BinarySingleStateFeaturizer(), max_history=5), epochs=300), fallback ]) training_data = agent.load_data(training_folder) agent.train(training_data) agent.persist(model_path)
def train_dialogue(domain_file="mobile_domain.yml", model_path="models/dialogue", training_data_file="data/mobile_story.md"): agent = Agent(domain_file, policies=[ MemoizationPolicy(max_history=6), KerasPolicy( MaxHistoryTrackerFeaturizer( BinarySingleStateFeaturizer(), max_history=6)) ]) training_data = agent.load_data(training_data_file) agent.train(training_data, epochs=100) agent.persist(model_path) return agent
def train_dialogue(domain_file, dia_data_file, nlu_model_dir, dia_model_dir): from rasa_core.featurizers import (MaxHistoryTrackerFeaturizer,BinarySingleStateFeaturizer) featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=5) agent = Agent(domain_file, policies=[MemoizationPolicy(max_history=5), KerasPolicy(featurizer), policy.fallback], \ interpreter=RasaNLUInterpreter(nlu_model_dir)) training_data = agent.load_data(dia_data_file) agent.train(training_data, epochs = 400, batch_size = 100, # max_history=5, validation_split = 0.2) # augmentation_factor=50, agent.persist(dia_model_dir) return agent
def train_dialogue(domain_file, model_path, training_folder): agent = Agent(domain_file, policies=[ MemoizationPolicy(max_history=6), KerasPolicy( MaxHistoryTrackerFeaturizer( BinarySingleStateFeaturizer(), max_history=6)), FallbackPolicy(nlu_threshold=0.8, core_threshold=0.3) ]) training_data = agent.load_data(training_folder) agent.train(training_data, epochs=100) agent.persist(model_path)
def train_dialogue(domain_file='restaurant_domain.yml', model_path="models/dialogue", training_data_file='data/babi_stories.md'): featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=5) agent = Agent( domain_file, policies=[MemoizationPolicy(max_history=5), KerasPolicy(featurizer)]) training_data = agent.load_data(training_data_file) agent.train(training_data, epochs=400, batch_size=100, validation_split=0.2) agent.persist(model_path) return agent
def train_dialog(domain_file, training_data_file, model_dir, interpreter): _agent = Agent(domain_file, policies=[ MemoizationPolicy(max_history=6), KerasPolicy(MaxHistoryTrackerFeaturizer( BinarySingleStateFeaturizer(), max_history=6), augmentation_factor=50, epochs=300, batch_size=50, validation_split=0.2) ], interpreter=interpreter) _training_data = _agent.load_data(training_data_file) _agent.train(_training_data) _agent.persist(model_dir) return _agent
def train_dialogue_model(domain_file, stories_file, output_path, nlu_model_path=None, endpoints=None, max_history=None, dump_flattened_stories=False, kwargs=None): if not kwargs: kwargs = {} action_endpoint = utils.read_endpoint_config(endpoints, "action_endpoint") fallback_args, kwargs = utils.extract_args(kwargs, {"nlu_threshold", "core_threshold", "fallback_action_name"}) policies = [ FallbackPolicy( fallback_args.get("nlu_threshold", DEFAULT_NLU_FALLBACK_THRESHOLD), fallback_args.get("core_threshold", DEFAULT_CORE_FALLBACK_THRESHOLD), fallback_args.get("fallback_action_name", DEFAULT_FALLBACK_ACTION)), MemoizationPolicy( max_history=max_history), KerasPolicy( MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=max_history))] agent = Agent(domain_file, action_endpoint=action_endpoint, interpreter=nlu_model_path, policies=policies) data_load_args, kwargs = utils.extract_args(kwargs, {"use_story_concatenation", "unique_last_num_states", "augmentation_factor", "remove_duplicates", "debug_plots"}) training_data = agent.load_data(stories_file, **data_load_args) agent.train(training_data, **kwargs) agent.persist(output_path, dump_flattened_stories) return agent
def train_dialogue(domain_file = 'foodie.yml', model_path = './models/dialogue', training_data_file = './data/stories.md'): featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=5) agent = Agent(domain_file, policies = [MemoizationPolicy(max_history = 5), KerasPolicy(featurizer)]) agent.train( training_data_file, #max_history = 3, epochs = 300, batch_size = 50, validation_split = 0.2, augmentation_factor = 50) agent.persist(model_path) return agent
def default_policies(cls, fallback_args, max_history): # type: (Dict[Text, Any], int) -> List[Policy] """Load the default policy setup consisting of FallbackPolicy, MemoizationPolicy and KerasPolicy.""" return [ FallbackPolicy( fallback_args.get("nlu_threshold", DEFAULT_NLU_FALLBACK_THRESHOLD), fallback_args.get("core_threshold", DEFAULT_CORE_FALLBACK_THRESHOLD), fallback_args.get("fallback_action_name", DEFAULT_FALLBACK_ACTION)), MemoizationPolicy(max_history=max_history), KerasPolicy( MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=max_history)) ]
def from_dict(cls, dictionary): # type: Dict[Text, Any] -> List[Policy] policies = [] for policy in dictionary.get('policies', []): policy_name = policy.pop('name') if policy_name == 'KerasPolicy': policy_object = KerasPolicy(MaxHistoryTrackerFeaturizer( BinarySingleStateFeaturizer(), max_history=policy.get('max_history', 3))) constr_func = utils.class_from_module_path(policy_name) policy_object = constr_func(**policy) policies.append(policy_object) return policies
def trainingBot(to_bot_queue, to_human_queue, base_model, output_model, nlu_model, training_data): utils.configure_colored_logging(loglevel="INFO") max_history = None interactive_learning_on = True channel = TrainingInputChannel(to_bot_queue, to_human_queue) preloaded_model = True if preloaded_model: agent = CustomAgent.load(base_model, NaturalLanguageInterpreter.create(nlu_model)) training_data = agent.load_data(training_data) agent.train_online_preloaded_model(training_data, input_channel=channel, model_path=output_model) else: agent = CustomAgent( "domain.yml", policies=[ MemoizationPolicy(max_history=max_history), KerasPolicy( MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=max_history)), FallbackPolicy(fallback_action_name="utter_fallback", nlu_threshold=0.3) ]) training_data = agent.load_data(training_data) agent.interpreter = NaturalLanguageInterpreter.create(nlu_model) agent.train_online(training_data, input_channel=channel, model_path=output_model, augmentation_factor=50, epochs=250, batch_size=10, validation_split=0.2) agent.persist(output_model)
def trainRasaCore(self): try: training_data_file = "./" + self.config.get('inputData', 'stories') domain_yml = "./" + self.config.get('inputData', 'coreyml') logger.info( "Building RASA Core model with stories : %s, domain_yml : %s" % (training_data_file, domain_yml)) model_name = "model_" + datetime.now().strftime("%Y%m%dT%H%M%S") model_location = "./models/ourgroup/dialogue/" + model_name featurizer = MaxHistoryTrackerFeaturizer( BinarySingleStateFeaturizer(), max_history=5) agent = Agent(domain_yml, policies=[ MemoizationPolicy(max_history=4), KerasPolicy(featurizer) ]) agent.train( training_data_file, augmentation_factor=50, #max_history = 4, epochs=500, batch_size=30, validation_split=0.2) agent.persist(model_location) model_location = os.path.realpath(model_location) logger.info("RASA Core model_location : %s" % (str(model_location))) self.config.set('coreModel', 'model_location', value=model_location) with open("./etc/config.ini", "w+") as f: self.config.write(f) return ("RASA core model training completed, see details above") except Exception as e: logger.error("unable to train rasa core model, exception : %s" % (str(e))) raise (e)
def train_dialogue(domain_file="data/domain.yml", model_path="models/dialogue", training_data_file="data/stories.md"): from rasa_core.featurizers import (MaxHistoryTrackerFeaturizer, BinarySingleStateFeaturizer) featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=5) agent = Agent( domain_file, policies=[MemoizationPolicy(max_history=5), KerasPolicy(featurizer)]) agent.train(training_data_file, epochs=200, batch_size=16, augmentation_factor=50, validation_split=0.2) agent.persist(model_path) return agent
def run_ivrbot_online(input_channel=ConsoleInputChannel(), interpreter=RasaNLUInterpreter("models/ivr/demo"), domain_file="data/domain.yml", training_data_file="data/stories.md"): from rasa_core.featurizers import (MaxHistoryTrackerFeaturizer, BinarySingleStateFeaturizer) featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=5) agent = Agent( domain_file, policies=[MemoizationPolicy(max_history=5), KerasPolicy(featurizer)], interpreter=interpreter) agent.train_online(training_data_file, input_channel=input_channel, batch_size=50, epochs=200, max_training_samples=300) return agent
def run_foodie_online(input_channel, interpreter, domain_file="foodie_domain.yml", training_data_file='data/stories.md'): featurizer = MaxHistoryTrackerFeaturizer(BinarySingleStateFeaturizer(), max_history=10) agent = Agent( domain_file, policies=[MemoizationPolicy(max_history=10), KerasPolicy(featurizer)], interpreter=interpreter) agent.train_online(training_data_file, input_channel=input_channel, max_history=2, batch_size=50, epochs=200, max_training_samples=300) return agent
def train_dialogue_model(domain_file, stories_file, output_path, use_online_learning=False, nlu_model_path=None, max_history=None, dump_flattened_stories=False, kwargs=None): if not kwargs: kwargs = {} agent = Agent(domain_file, policies=[ MemoizationPolicy(max_history=max_history), KerasPolicy( MaxHistoryTrackerFeaturizer( BinarySingleStateFeaturizer(), max_history=max_history)) ]) data_load_args, kwargs = utils.extract_args( kwargs, { "use_story_concatenation", "unique_last_num_states", "augmentation_factor", "remove_duplicates", "debug_plots" }) training_data = agent.load_data(stories_file, **data_load_args) if use_online_learning: if nlu_model_path: agent.interpreter = RasaNLUInterpreter(nlu_model_path) else: agent.interpreter = RegexInterpreter() agent.train_online(training_data, input_channel=ConsoleInputChannel(), model_path=output_path, **kwargs) else: agent.train(training_data, **kwargs) agent.persist(output_path, dump_flattened_stories)
def test_load_multi_file_training_data(default_domain): # the stories file in `data/test_multifile_stories` is the same as in # `data/test_stories/stories.md`, but split across multiple files featurizer = MaxHistoryTrackerFeaturizer( BinarySingleStateFeaturizer(), max_history=2) trackers = training.load_data( "data/test_stories/stories.md", default_domain, augmentation_factor=0 ) (tr_as_sts, tr_as_acts) = featurizer.training_states_and_actions( trackers, default_domain) hashed = [] for sts, acts in zip(tr_as_sts, tr_as_acts): hashed.append(json.dumps(sts + acts, sort_keys=True)) hashed = sorted(hashed, reverse=True) data = featurizer.featurize_trackers(trackers, default_domain) featurizer_mul = MaxHistoryTrackerFeaturizer( BinarySingleStateFeaturizer(), max_history=2) trackers_mul = training.load_data( "data/test_multifile_stories", default_domain, augmentation_factor=0 ) (tr_as_sts_mul, tr_as_acts_mul) = featurizer.training_states_and_actions( trackers_mul, default_domain) hashed_mul = [] for sts_mul, acts_mul in zip(tr_as_sts_mul, tr_as_acts_mul): hashed_mul.append(json.dumps(sts_mul + acts_mul, sort_keys=True)) hashed_mul = sorted(hashed_mul, reverse=True) data_mul = featurizer_mul.featurize_trackers(trackers_mul, default_domain) assert hashed == hashed_mul assert np.all(data.X.sort(axis=0) == data_mul.X.sort(axis=0)) assert np.all(data.y.sort(axis=0) == data_mul.y.sort(axis=0))