def test_featurize_trackers_raises_on_missing_state_featurizer( default_domain: Domain): tracker_featurizer = TrackerFeaturizer() with pytest.raises(ValueError): tracker_featurizer.featurize_trackers([], default_domain, RegexInterpreter())
def test_convert_labels_to_ids(domain: Domain): trackers_as_actions = [ ["utter_greet", "utter_channel"], ["utter_greet", "utter_default", "utter_goodbye"], ] tracker_featurizer = TrackerFeaturizer() actual_output = tracker_featurizer._convert_labels_to_ids( trackers_as_actions, domain) expected_output = np.array([np.array([14, 11]), np.array([14, 12, 13])]) assert expected_output.size == actual_output.size for expected_array, actual_array in zip(expected_output, actual_output): assert np.all(expected_array == actual_array)
def load(cls, path: Text) -> Policy: filename = os.path.join(path, "sklearn_model.pkl") zero_features_filename = os.path.join(path, "zero_state_features.pkl") if not os.path.exists(path): raise OSError("Failed to load dialogue model. Path {} " "doesn't exist".format(os.path.abspath(filename))) featurizer = TrackerFeaturizer.load(path) assert isinstance(featurizer, MaxHistoryTrackerFeaturizer), ( "Loaded featurizer of type {}, should be " "MaxHistoryTrackerFeaturizer.".format(type(featurizer).__name__)) meta_file = os.path.join(path, "sklearn_policy.json") meta = json.loads(rasa.shared.utils.io.read_file(meta_file)) zero_state_features = io_utils.pickle_load(zero_features_filename) policy = cls( featurizer=featurizer, priority=meta["priority"], zero_state_features=zero_state_features, ) state = io_utils.pickle_load(filename) vars(policy).update(state) logger.info("Loaded sklearn model") return policy
def load(cls, path: Union[Text, Path]) -> Policy: filename = Path(path) / "sklearn_model.pkl" zero_features_filename = Path(path) / "zero_state_features.pkl" if not Path(path).exists(): raise OSError( f"Failed to load dialogue model. Path {filename.absolute()} " f"doesn't exist.") featurizer = TrackerFeaturizer.load(path) assert isinstance(featurizer, MaxHistoryTrackerFeaturizer), ( f"Loaded featurizer of type {type(featurizer).__name__}, should be " f"MaxHistoryTrackerFeaturizer.") meta_file = Path(path) / "sklearn_policy.json" meta = json.loads(rasa.shared.utils.io.read_file(meta_file)) zero_state_features = io_utils.pickle_load(zero_features_filename) policy = cls( featurizer=featurizer, priority=meta["priority"], zero_state_features=zero_state_features, ) state = io_utils.pickle_load(filename) vars(policy).update(state) logger.info("Loaded sklearn model") return policy
def load( cls, config: Dict[Text, Any], model_storage: ModelStorage, resource: Resource, execution_context: ExecutionContext, **kwargs: Any, ) -> Policy: """Loads a trained policy (see parent class for full docstring).""" featurizer = None try: with model_storage.read_from(resource) as path: if (Path(path) / FEATURIZER_FILE).is_file(): featurizer = TrackerFeaturizer.load(path) config.update(kwargs) except (ValueError, FileNotFoundError, FileIOException): logger.debug( f"Couldn't load metadata for policy '{cls.__name__}' as the persisted " f"metadata couldn't be loaded." ) return cls( config, model_storage, resource, execution_context, featurizer=featurizer, )
def load( cls, config: Dict[Text, Any], model_storage: ModelStorage, resource: Resource, execution_context: ExecutionContext, **kwargs: Any, ) -> MemoizationPolicy: """Loads a trained policy (see parent class for full docstring).""" featurizer = None lookup = None try: with model_storage.read_from(resource) as path: metadata_file = Path(path) / cls._metadata_filename() metadata = rasa.shared.utils.io.read_json_file(metadata_file) lookup = metadata["lookup"] if (Path(path) / FEATURIZER_FILE).is_file(): featurizer = TrackerFeaturizer.load(path) except (ValueError, FileNotFoundError, FileIOException): logger.warning( f"Couldn't load metadata for policy '{cls.__name__}' as the persisted " f"metadata couldn't be loaded.") return cls( config, model_storage, resource, execution_context, featurizer=featurizer, lookup=lookup, )
def load( cls, path: Union[Text, Path], should_finetune: bool = False, epoch_override: int = defaults[EPOCHS], **kwargs: Any, ) -> "TEDPolicy": """Loads a policy from the storage. Args: path: Path on disk where policy is persisted. should_finetune: Whether to load the policy for finetuning. epoch_override: Override the number of epochs in persisted configuration for further finetuning. **kwargs: Any other arguments Returns: Loaded policy Raises: `PolicyModelNotFound` if the model is not found in the supplied `path`. """ model_path = Path(path) if not model_path.exists(): logger.warning( f"Failed to load {cls.__class__.__name__} model. Path " f"'{model_path.absolute()}' doesn't exist.") return cls() featurizer = TrackerFeaturizer.load(path) if not (model_path / f"{cls._metadata_filename()}.data_example.pkl").is_file(): return cls(featurizer=featurizer) model_utilities = cls._load_model_utilities(model_path) model_utilities["meta"] = cls._update_loaded_params( model_utilities["meta"]) if should_finetune: model_utilities["meta"][EPOCHS] = epoch_override ( model_data_example, predict_data_example, ) = cls._construct_model_initialization_data( model_utilities["loaded_data"]) model = cls._load_tf_model( model_utilities, model_data_example, predict_data_example, featurizer, should_finetune, ) return cls._load_policy_with_model(model, featurizer, model_utilities, should_finetune)
def load(cls, path: Union[Text, Path]) -> "Policy": """Loads a policy from path. Args: path: Path to load policy from. Returns: An instance of `Policy`. """ metadata_file = Path(path) / cls._metadata_filename() if metadata_file.is_file(): data = json.loads(rasa.shared.utils.io.read_file(metadata_file)) if (Path(path) / FEATURIZER_FILE).is_file(): featurizer = TrackerFeaturizer.load(path) data["featurizer"] = featurizer return cls(**data) logger.info( f"Couldn't load metadata for policy '{cls.__name__}'. " f"File '{metadata_file}' doesn't exist." ) return cls()
def test_persist_and_load_tracker_featurizer(tmp_path: Text, moodbot_domain: Domain): state_featurizer = SingleStateFeaturizer() state_featurizer.prepare_for_training(moodbot_domain, RegexInterpreter()) tracker_featurizer = MaxHistoryTrackerFeaturizer(state_featurizer) tracker_featurizer.persist(tmp_path) loaded_tracker_featurizer = TrackerFeaturizer.load(tmp_path) assert loaded_tracker_featurizer is not None assert loaded_tracker_featurizer.state_featurizer is not None
def load(cls, path: Text) -> "MemoizationPolicy": featurizer = TrackerFeaturizer.load(path) memorized_file = os.path.join(path, "memorized_turns.json") if os.path.isfile(memorized_file): data = json.loads(rasa.shared.utils.io.read_file(memorized_file)) return cls(featurizer=featurizer, priority=data["priority"], lookup=data["lookup"]) else: logger.info("Couldn't load memoization for policy. " "File '{}' doesn't exist. Falling back to empty " "turn memory.".format(memorized_file)) return cls()
def load(cls, path: Union[Text, Path], **kwargs: Any) -> "Policy": """Loads a policy from path. Args: path: Path to load policy from. Returns: An instance of `Policy`. """ metadata_file = Path(path) / cls._metadata_filename() if metadata_file.is_file(): data = json.loads(rasa.shared.utils.io.read_file(metadata_file)) if (Path(path) / FEATURIZER_FILE).is_file(): featurizer = TrackerFeaturizer.load(path) data["featurizer"] = featurizer data.update(kwargs) constructor_args = rasa.shared.utils.common.arguments_of(cls) if "kwargs" not in constructor_args: if set(data.keys()).issubset(set(constructor_args)): rasa.shared.utils.io.raise_deprecation_warning( f"`{cls.__name__}.__init__` does not accept `**kwargs` " f"This is required for contextual information e.g. the flag " f"`should_finetune`.", warn_until_version="3.0.0", ) else: raise UnsupportedDialogueModelError( f"`{cls.__name__}.__init__` does not accept `**kwargs`. " f"Attempting to pass {data} to the policy. " f"This argument should be added to all policies by " f"Rasa Open Source 3.0.0." ) return cls(**data) logger.info( f"Couldn't load metadata for policy '{cls.__name__}'. " f"File '{metadata_file}' doesn't exist." ) return cls()
def load(cls, path: Text) -> "TEDPolicy": """Loads a policy from the storage. **Needs to load its featurizer** """ if not os.path.exists(path): raise Exception( f"Failed to load TED policy model. Path " f"'{os.path.abspath(path)}' doesn't exist." ) model_path = Path(path) tf_model_file = model_path / f"{SAVE_MODEL_FILE_NAME}.tf_model" featurizer = TrackerFeaturizer.load(path) if not (model_path / f"{SAVE_MODEL_FILE_NAME}.data_example.pkl").is_file(): return cls(featurizer=featurizer) loaded_data = io_utils.pickle_load( model_path / f"{SAVE_MODEL_FILE_NAME}.data_example.pkl" ) label_data = io_utils.pickle_load( model_path / f"{SAVE_MODEL_FILE_NAME}.label_data.pkl" ) zero_state_features = io_utils.pickle_load( model_path / f"{SAVE_MODEL_FILE_NAME}.zero_state_features.pkl" ) label_data = RasaModelData(data=label_data) meta = io_utils.pickle_load(model_path / f"{SAVE_MODEL_FILE_NAME}.meta.pkl") priority = io_utils.json_unpickle( model_path / f"{SAVE_MODEL_FILE_NAME}.priority.pkl" ) model_data_example = RasaModelData( label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY, data=loaded_data ) meta = train_utils.update_similarity_type(meta) model = TED.load( str(tf_model_file), model_data_example, data_signature=model_data_example.get_signature(), config=meta, max_history_tracker_featurizer_used=isinstance( featurizer, MaxHistoryTrackerFeaturizer ), label_data=label_data, ) # build the graph for prediction predict_data_example = RasaModelData( label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY, data={ feature_name: features for feature_name, features in model_data_example.items() if feature_name in STATE_LEVEL_FEATURES + FEATURES_TO_ENCODE + [DIALOGUE] }, ) model.build_for_predict(predict_data_example) return cls( featurizer=featurizer, priority=priority, model=model, zero_state_features=zero_state_features, **meta, )
def load( cls, path: Union[Text, Path], should_finetune: bool = False, epoch_override: int = defaults[EPOCHS], **kwargs: Any, ) -> "TEDPolicy": """Loads a policy from the storage. **Needs to load its featurizer** """ model_path = Path(path) if not model_path.exists(): raise Exception(f"Failed to load TED policy model. Path " f"'{model_path.absolute()}' doesn't exist.") tf_model_file = model_path / f"{SAVE_MODEL_FILE_NAME}.tf_model" featurizer = TrackerFeaturizer.load(path) if not (model_path / f"{SAVE_MODEL_FILE_NAME}.data_example.pkl").is_file(): return cls(featurizer=featurizer) loaded_data = io_utils.pickle_load( model_path / f"{SAVE_MODEL_FILE_NAME}.data_example.pkl") label_data = io_utils.pickle_load( model_path / f"{SAVE_MODEL_FILE_NAME}.label_data.pkl") zero_state_features = io_utils.pickle_load( model_path / f"{SAVE_MODEL_FILE_NAME}.zero_state_features.pkl") label_data = RasaModelData(data=label_data) meta = io_utils.pickle_load(model_path / f"{SAVE_MODEL_FILE_NAME}.meta.pkl") priority = io_utils.json_unpickle( model_path / f"{SAVE_MODEL_FILE_NAME}.priority.pkl") model_data_example = RasaModelData(label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY, data=loaded_data) meta = train_utils.update_similarity_type(meta) meta[EPOCHS] = epoch_override model = TED.load( str(tf_model_file), model_data_example, data_signature=model_data_example.get_signature(), config=meta, max_history_tracker_featurizer_used=isinstance( featurizer, MaxHistoryTrackerFeaturizer), label_data=label_data, finetune_mode=should_finetune, ) if not should_finetune: # build the graph for prediction features_to_select = STATE_LEVEL_FEATURES + FEATURES_TO_ENCODE + [ DIALOGUE ] predict_data_example = RasaModelData( label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY, data={ feature_name: features for feature_name, features in model_data_example.items() if feature_name in features_to_select }, ) model.build_for_predict(predict_data_example) return cls( featurizer=featurizer, priority=priority, model=model, zero_state_features=zero_state_features, should_finetune=should_finetune, **meta, )
def test_fail_to_load_non_existent_featurizer(): assert TrackerFeaturizer.load("non_existent_class") is None