def _load_from_files(cls, meta: Dict[Text, Any], model_dir: Text): file_name = meta.get("file") model_dir = Path(model_dir) data_example = io_utils.pickle_load(model_dir / f"{file_name}.data_example.pkl") label_data = io_utils.pickle_load(model_dir / f"{file_name}.label_data.pkl") index_label_id_mapping = io_utils.json_unpickle( model_dir / f"{file_name}.index_label_id_mapping.pkl") index_tag_id_mapping = io_utils.json_unpickle( model_dir / f"{file_name}.index_tag_id_mapping.pkl") # jsonpickle converts dictionary keys to strings index_label_id_mapping = { int(key): value for key, value in index_label_id_mapping.items() } if index_tag_id_mapping is not None: index_tag_id_mapping = { int(key): value for key, value in index_tag_id_mapping.items() } return ( index_label_id_mapping, index_tag_id_mapping, label_data, meta, data_example, )
def load( cls, config: Dict[Text, Any], model_storage: ModelStorage, resource: Resource, execution_context: ExecutionContext, **kwargs: Any, ) -> SklearnIntentClassifier: """Loads trained component (see parent class for full docstring).""" from sklearn.preprocessing import LabelEncoder try: with model_storage.read_from(resource) as model_dir: file_name = cls.__name__ classifier_file = model_dir / f"{file_name}_classifier.pkl" if classifier_file.exists(): classifier = io_utils.json_unpickle(classifier_file) encoder_file = model_dir / f"{file_name}_encoder.pkl" classes = io_utils.json_unpickle(encoder_file) encoder = LabelEncoder() encoder.classes_ = classes return cls(config, model_storage, resource, classifier, encoder) except ValueError: logger.debug( f"Failed to load '{cls.__name__}' from model storage. Resource " f"'{resource.name}' doesn't exist.") return cls(config, model_storage, resource)
def load(cls, path: Text) -> "TEDPolicy": """Loads a policy from the storage. **Needs to load its featurizer** """ if not os.path.exists(path): raise Exception(f"Failed to load TED policy model. Path " f"'{os.path.abspath(path)}' doesn't exist.") model_path = Path(path) tf_model_file = model_path / f"{SAVE_MODEL_FILE_NAME}.tf_model" featurizer = TrackerFeaturizer.load(path) if not (model_path / f"{SAVE_MODEL_FILE_NAME}.data_example.pkl").is_file(): return cls(featurizer=featurizer) loaded_data = io_utils.json_unpickle( model_path / f"{SAVE_MODEL_FILE_NAME}.data_example.pkl") label_data = io_utils.json_unpickle( model_path / f"{SAVE_MODEL_FILE_NAME}.label_data.pkl") meta = io_utils.pickle_load(model_path / f"{SAVE_MODEL_FILE_NAME}.meta.pkl") priority = io_utils.json_unpickle( model_path / f"{SAVE_MODEL_FILE_NAME}.priority.pkl") model_data_example = RasaModelData(label_key=LABEL_IDS, data=loaded_data) meta = train_utils.update_similarity_type(meta) model = TED.load( str(tf_model_file), model_data_example, data_signature=model_data_example.get_signature(), config=meta, max_history_tracker_featurizer_used=isinstance( featurizer, MaxHistoryTrackerFeaturizer), label_data=label_data, ) # build the graph for prediction predict_data_example = RasaModelData( label_key=LABEL_IDS, data={ feature_name: features for feature_name, features in model_data_example.items() if DIALOGUE in feature_name }, ) model.build_for_predict(predict_data_example) return cls(featurizer=featurizer, priority=priority, model=model, **meta)
def load( cls, meta: Dict[Text, Any], model_dir: Text, model_metadata: Optional[Metadata] = None, cached_component: Optional["CountVectorsFeaturizer"] = None, should_finetune: bool = False, **kwargs: Any, ) -> "CountVectorsFeaturizer": """Loads trained component (see parent class for full docstring).""" file_name = meta.get("file") featurizer_file = os.path.join(model_dir, file_name) if not os.path.exists(featurizer_file): return cls(meta) vocabulary = io_utils.json_unpickle(featurizer_file) share_vocabulary = meta["use_shared_vocab"] if share_vocabulary: vectorizers = cls._create_shared_vocab_vectorizers( meta, vocabulary=vocabulary) else: vectorizers = cls._create_independent_vocab_vectorizers( meta, vocabulary=vocabulary) ftr = cls(meta, vectorizers, should_finetune) # make sure the vocabulary has been loaded correctly for attribute in vectorizers: ftr.vectorizers[attribute]._validate_vocabulary() return ftr
def load( cls, meta: Dict[Text, Any], model_dir: Text = None, model_metadata: Metadata = None, cached_component: Optional["ResponseSelector"] = None, **kwargs: Any, ) -> "ResponseSelector": """Loads the trained model from the provided directory.""" model = super().load( meta, model_dir, model_metadata, cached_component, **kwargs ) if model == cls(component_config=meta): model.retrieval_intent_mapping = {} return model # pytype: disable=bad-return-type file_name = meta.get("file") model_dir = Path(model_dir) retrieval_intent_mapping = io_utils.json_unpickle( model_dir / f"{file_name}.retrieval_intent_mapping.pkl" ) model.retrieval_intent_mapping = retrieval_intent_mapping return model # pytype: disable=bad-return-type
def load( cls, meta: Dict[Text, Any], model_dir: Optional[Text] = None, model_metadata: Optional[Metadata] = None, cached_component: Optional["CountVectorsFeaturizer"] = None, **kwargs: Any, ) -> "CountVectorsFeaturizer": file_name = meta.get("file") featurizer_file = os.path.join(model_dir, file_name) if not os.path.exists(featurizer_file): return cls(meta) vocabulary = io_utils.json_unpickle(featurizer_file) share_vocabulary = meta["use_shared_vocab"] if share_vocabulary: vectorizers = cls._create_shared_vocab_vectorizers( meta, vocabulary=vocabulary) else: vectorizers = cls._create_independent_vocab_vectorizers( meta, vocabulary=vocabulary) return cls(meta, vectorizers)
def load(cls, meta: Dict[Text, Any], model_dir: Optional[Text] = None, model_metadata: Optional[Metadata] = None, cached_component: Optional["SklearnIntentClassifier"] = None, **kwargs: Any) -> "SklearnIntentClassifier": class_encoder_file = os.path.join(model_dir, meta.get("class_encoder")) if os.path.exists(class_encoder_file): class_encoder = io_utils.json_unpickle(class_encoder_file) return cls(meta, class_encoder) else: return cls(meta)
def load(cls, meta: Dict[Text, Any], model_dir=None, model_metadata=None, cached_component=None, **kwargs): file_name = meta.get("classifier_file") classifier_file = os.path.join(model_dir, file_name) return json_unpickle(classifier_file)
def load( cls, config: Dict[Text, Any], model_storage: ModelStorage, resource: Resource, execution_context: ExecutionContext, **kwargs: Any, ) -> CountVectorsFeaturizer: """Loads trained component (see parent class for full docstring).""" try: with model_storage.read_from(resource) as model_dir: featurizer_file = model_dir / "vocabularies.pkl" vocabulary = io_utils.json_unpickle(featurizer_file) share_vocabulary = config["use_shared_vocab"] if share_vocabulary: vectorizers = cls._create_shared_vocab_vectorizers( config, vocabulary=vocabulary ) else: vectorizers = cls._create_independent_vocab_vectorizers( config, vocabulary=vocabulary ) oov_words = rasa.shared.utils.io.read_json_file( model_dir / "oov_words.json" ) ftr = cls( config, model_storage, resource, execution_context, vectorizers=vectorizers, oov_token=config["OOV_token"], oov_words=oov_words, ) # make sure the vocabulary has been loaded correctly for attribute in vectorizers: ftr.vectorizers[attribute]._validate_vocabulary() return ftr except (ValueError, FileNotFoundError, FileIOException): logger.debug( f"Failed to load `{cls.__class__.__name__}` from model storage. " f"Resource '{resource.name}' doesn't exist." ) return cls( config=config, model_storage=model_storage, resource=resource, execution_context=execution_context, )
def load( cls, meta: Dict[Text, Any], model_dir: Optional[Text] = None, model_metadata: Optional[Metadata] = None, cached_component: Optional["SklearnIntentClassifier"] = None, **kwargs: Any, ) -> "SklearnIntentClassifier": from sklearn.preprocessing import LabelEncoder classifier_file = os.path.join(model_dir, meta.get("classifier")) encoder_file = os.path.join(model_dir, meta.get("encoder")) if os.path.exists(classifier_file): classifier = io_utils.json_unpickle(classifier_file) classes = io_utils.json_unpickle(encoder_file) encoder = LabelEncoder() encoder.classes_ = classes return cls(meta, classifier, encoder) else: return cls(meta)
def load( cls, meta: Dict[Text, Any], model_dir: Text, model_metadata: Optional[Metadata] = None, cached_component: Optional["SklearnIntentClassifier"] = None, **kwargs: Any, ) -> "SklearnIntentClassifier": """Loads trained component (see parent class for full docstring).""" from sklearn.preprocessing import LabelEncoder classifier_file = os.path.join(model_dir, meta["classifier"]) encoder_file = os.path.join(model_dir, meta["encoder"]) if os.path.exists(classifier_file): classifier = io_utils.json_unpickle(classifier_file) classes = io_utils.json_unpickle(encoder_file) encoder = LabelEncoder() encoder.classes_ = classes return cls(meta, classifier, encoder) else: return cls(meta)
def load( cls, meta: Dict[Text, Any], model_dir: Optional[Text] = None, model_metadata: Optional[Metadata] = None, cached_component: Optional["LexicalSyntacticFeaturizer"] = None, **kwargs: Any, ) -> "LexicalSyntacticFeaturizer": file_name = meta.get("file") feature_to_idx_file = Path(model_dir) / f"{file_name}.feature_to_idx_dict.pkl" feature_to_idx_dict = io_utils.json_unpickle(feature_to_idx_file) return LexicalSyntacticFeaturizer(meta, feature_to_idx_dict=feature_to_idx_dict)
def load( cls, meta: Dict[Text, Any], model_dir: Text, model_metadata: Optional[Metadata] = None, cached_component: Optional["LexicalSyntacticFeaturizer"] = None, **kwargs: Any, ) -> "LexicalSyntacticFeaturizer": """Loads trained component (see parent class for full docstring).""" file_name = meta.get("file") feature_to_idx_file = Path( model_dir) / f"{file_name}.feature_to_idx_dict.pkl" feature_to_idx_dict = io_utils.json_unpickle(feature_to_idx_file) return LexicalSyntacticFeaturizer( meta, feature_to_idx_dict=feature_to_idx_dict)
def _load_model_utilities(cls, model_path: Path) -> Dict[Text, Any]: """Loads model's utility attributes. Args: model_path: Path where model is to be persisted. """ tf_model_file = model_path / f"{cls._metadata_filename()}.tf_model" loaded_data = io_utils.pickle_load( model_path / f"{cls._metadata_filename()}.data_example.pkl") label_data = io_utils.pickle_load( model_path / f"{cls._metadata_filename()}.label_data.pkl") fake_features = io_utils.pickle_load( model_path / f"{cls._metadata_filename()}.fake_features.pkl") label_data = RasaModelData(data=label_data) meta = io_utils.pickle_load(model_path / f"{cls._metadata_filename()}.meta.pkl") priority = io_utils.json_unpickle( model_path / f"{cls._metadata_filename()}.priority.pkl") entity_tag_specs = rasa.shared.utils.io.read_json_file( model_path / f"{cls._metadata_filename()}.entity_tag_specs.json") entity_tag_specs = [ EntityTagSpec( tag_name=tag_spec["tag_name"], ids_to_tags={ int(key): value for key, value in tag_spec["ids_to_tags"].items() }, tags_to_ids={ key: int(value) for key, value in tag_spec["tags_to_ids"].items() }, num_tags=tag_spec["num_tags"], ) for tag_spec in entity_tag_specs ] return { "tf_model_file": tf_model_file, "loaded_data": loaded_data, "fake_features": fake_features, "label_data": label_data, "meta": meta, "priority": priority, "entity_tag_specs": entity_tag_specs, }
def load(cls, path: Union[Text, Path]) -> "TEDPolicy": """Loads a policy from the storage. **Needs to load its featurizer** """ model_path = Path(path) if not model_path.exists(): logger.error( f"Failed to load TED policy model. Path " f"'{model_path.absolute()}' doesn't exist." ) return tf_model_file = model_path / f"{SAVE_MODEL_FILE_NAME}.tf_model" featurizer = TrackerFeaturizer.load(path) if not (model_path / f"{SAVE_MODEL_FILE_NAME}.data_example.pkl").is_file(): return cls(featurizer=featurizer) loaded_data = io_utils.pickle_load( model_path / f"{SAVE_MODEL_FILE_NAME}.data_example.pkl" ) label_data = io_utils.pickle_load( model_path / f"{SAVE_MODEL_FILE_NAME}.label_data.pkl" ) zero_state_features = io_utils.pickle_load( model_path / f"{SAVE_MODEL_FILE_NAME}.zero_state_features.pkl" ) label_data = RasaModelData(data=label_data) meta = io_utils.pickle_load(model_path / f"{SAVE_MODEL_FILE_NAME}.meta.pkl") priority = io_utils.json_unpickle( model_path / f"{SAVE_MODEL_FILE_NAME}.priority.pkl" ) model_data_example = RasaModelData( label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY, data=loaded_data ) meta = train_utils.update_similarity_type(meta) model = TED.load( str(tf_model_file), model_data_example, data_signature=model_data_example.get_signature(), config=meta, max_history_tracker_featurizer_used=isinstance( featurizer, MaxHistoryTrackerFeaturizer ), label_data=label_data, ) # build the graph for prediction predict_data_example = RasaModelData( label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY, data={ feature_name: features for feature_name, features in model_data_example.items() if feature_name in STATE_LEVEL_FEATURES + FEATURES_TO_ENCODE + [DIALOGUE] }, ) model.build_for_predict(predict_data_example) return cls( featurizer=featurizer, priority=priority, model=model, zero_state_features=zero_state_features, **meta, )