def test_train_val_split(model_data: RasaModelData): train_model_data, test_model_data = model_data.split(2, 42) for key, values in model_data.items(): assert len(values) == len(train_model_data.get(key)) assert len(values) == len(test_model_data.get(key)) for sub_key, data in values.items(): assert len(data) == len(train_model_data.get(key, sub_key)) assert len(data) == len(test_model_data.get(key, sub_key)) for i, v in enumerate(data): if isinstance(v[0], list): assert ( v[0][0].dtype == train_model_data.get(key, sub_key)[i][0][0].dtype ) else: assert v[0].dtype == train_model_data.get(key, sub_key)[i][0].dtype for values in train_model_data.values(): for data in values.values(): for v in data: assert np.array(v).shape[0] == 3 for values in test_model_data.values(): for data in values.values(): for v in data: assert np.array(v).shape[0] == 2
def _load_model_class( cls, tf_model_file: Text, model_data_example: RasaModelData, label_data: RasaModelData, entity_tag_specs: List[EntityTagSpec], config: Dict[Text, Any], finetune_mode: bool = False, ) -> "RasaModel": predict_data_example = RasaModelData( label_key=model_data_example.label_key, data={ feature_name: features for feature_name, features in model_data_example.items() if TEXT in feature_name }, ) return cls.model_class(config[USE_TEXT_AS_LABEL]).load( tf_model_file, model_data_example, predict_data_example, data_signature=model_data_example.get_signature(), label_data=label_data, entity_tag_specs=entity_tag_specs, config=copy.deepcopy(config), finetune_mode=finetune_mode, )
def load(cls, path: Text) -> "TEDPolicy": """Loads a policy from the storage. **Needs to load its featurizer** """ if not os.path.exists(path): raise Exception(f"Failed to load TED policy model. Path " f"'{os.path.abspath(path)}' doesn't exist.") model_path = Path(path) tf_model_file = model_path / f"{SAVE_MODEL_FILE_NAME}.tf_model" featurizer = TrackerFeaturizer.load(path) if not (model_path / f"{SAVE_MODEL_FILE_NAME}.data_example.pkl").is_file(): return cls(featurizer=featurizer) loaded_data = io_utils.json_unpickle( model_path / f"{SAVE_MODEL_FILE_NAME}.data_example.pkl") label_data = io_utils.json_unpickle( model_path / f"{SAVE_MODEL_FILE_NAME}.label_data.pkl") meta = io_utils.pickle_load(model_path / f"{SAVE_MODEL_FILE_NAME}.meta.pkl") priority = io_utils.json_unpickle( model_path / f"{SAVE_MODEL_FILE_NAME}.priority.pkl") model_data_example = RasaModelData(label_key=LABEL_IDS, data=loaded_data) meta = train_utils.update_similarity_type(meta) model = TED.load( str(tf_model_file), model_data_example, data_signature=model_data_example.get_signature(), config=meta, max_history_tracker_featurizer_used=isinstance( featurizer, MaxHistoryTrackerFeaturizer), label_data=label_data, ) # build the graph for prediction predict_data_example = RasaModelData( label_key=LABEL_IDS, data={ feature_name: features for feature_name, features in model_data_example.items() if DIALOGUE in feature_name }, ) model.build_for_predict(predict_data_example) return cls(featurizer=featurizer, priority=priority, model=model, **meta)
def test_train_val_split(model_data: RasaModelData): train_model_data, test_model_data = model_data.split(2, 42) for k, values in model_data.items(): assert len(values) == len(train_model_data.get(k)) assert len(values) == len(test_model_data.get(k)) for i, v in enumerate(values): assert v[0].dtype == train_model_data.get(k)[i][0].dtype for values in train_model_data.values(): for v in values: assert v.shape[0] == 3 for values in test_model_data.values(): for v in values: assert v.shape[0] == 2
def _construct_model_initialization_data( cls, loaded_data: Dict[Text, Dict[Text, List[FeatureArray]]] ) -> Tuple[RasaModelData, RasaModelData]: model_data_example = RasaModelData(label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY, data=loaded_data) predict_data_example = RasaModelData( label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY, data={ feature_name: features for feature_name, features in model_data_example.items() if feature_name # we need to remove label features for prediction if they are present in PREDICTION_FEATURES }, ) return model_data_example, predict_data_example
def _load_model( cls, index_label_id_mapping: Dict[int, Text], index_tag_id_mapping: Dict[int, Text], label_data: RasaModelData, meta: Dict[Text, Any], data_example: Dict[Text, List[np.ndarray]], model_dir: Text, ): file_name = meta.get("file") tf_model_file = os.path.join(model_dir, file_name + ".tf_model") label_key = LABEL_IDS if meta[INTENT_CLASSIFICATION] else None model_data_example = RasaModelData(label_key=label_key, data=data_example) model = cls.model_class(meta).load( tf_model_file, model_data_example, data_signature=model_data_example.get_signature(), label_data=label_data, index_label_id_mapping=index_label_id_mapping, index_tag_id_mapping=index_tag_id_mapping, config=meta, ) # build the graph for prediction predict_data_example = RasaModelData( label_key=label_key, data={ feature_name: features for feature_name, features in model_data_example.items() if TEXT in feature_name }, ) model.build_for_predict(predict_data_example) return model
def load(cls, path: Union[Text, Path]) -> "TEDPolicy": """Loads a policy from the storage. **Needs to load its featurizer** """ model_path = Path(path) if not model_path.exists(): logger.error( f"Failed to load TED policy model. Path " f"'{model_path.absolute()}' doesn't exist." ) return tf_model_file = model_path / f"{SAVE_MODEL_FILE_NAME}.tf_model" featurizer = TrackerFeaturizer.load(path) if not (model_path / f"{SAVE_MODEL_FILE_NAME}.data_example.pkl").is_file(): return cls(featurizer=featurizer) loaded_data = io_utils.pickle_load( model_path / f"{SAVE_MODEL_FILE_NAME}.data_example.pkl" ) label_data = io_utils.pickle_load( model_path / f"{SAVE_MODEL_FILE_NAME}.label_data.pkl" ) zero_state_features = io_utils.pickle_load( model_path / f"{SAVE_MODEL_FILE_NAME}.zero_state_features.pkl" ) label_data = RasaModelData(data=label_data) meta = io_utils.pickle_load(model_path / f"{SAVE_MODEL_FILE_NAME}.meta.pkl") priority = io_utils.json_unpickle( model_path / f"{SAVE_MODEL_FILE_NAME}.priority.pkl" ) model_data_example = RasaModelData( label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY, data=loaded_data ) meta = train_utils.update_similarity_type(meta) model = TED.load( str(tf_model_file), model_data_example, data_signature=model_data_example.get_signature(), config=meta, max_history_tracker_featurizer_used=isinstance( featurizer, MaxHistoryTrackerFeaturizer ), label_data=label_data, ) # build the graph for prediction predict_data_example = RasaModelData( label_key=LABEL_KEY, label_sub_key=LABEL_SUB_KEY, data={ feature_name: features for feature_name, features in model_data_example.items() if feature_name in STATE_LEVEL_FEATURES + FEATURES_TO_ENCODE + [DIALOGUE] }, ) model.build_for_predict(predict_data_example) return cls( featurizer=featurizer, priority=priority, model=model, zero_state_features=zero_state_features, **meta, )