async def get_stories_hash(self): # Use a file hash of stories file to figure out Core fingerprint, instead of # storygraph object hash which is unstable if len(self._story_files): return get_file_hash(self._story_files[0]) else: return 0
def model_fingerprint( config_files: Dict[Text, Text], domain: Optional[Union[Domain, Text]] = None, nlu_data: Optional[Text] = None, stories: Optional[Text] = None, ) -> Fingerprint: """Creates a model fingerprint from its used configuration and training data. Args: config_files: Paths to the configuration files. domain: Path to the models domain file. nlu_data: Paths to the used NLU training data files. stories: Path to the used story training data. Returns: The fingerprint. """ import rasa import time if isinstance(domain, Domain): domain_hash = hash(domain) else: domain_hash = _get_hashes_for_paths(domain) # botfront: multilingual fingerprints # nlu config and data have per language hash (dict) nlu_files = list( os.path.join(nlu_data, file) for file in os.listdir(nlu_data)) if nlu_data else [] nlu_languages = list(file.split('.')[0][-2:] for file in nlu_files) nlu_configs = {lang: config_files[lang] for lang in nlu_languages} from rasa.core.utils import get_file_hash return { FINGERPRINT_CONFIG_KEY: _get_hash_of_config(config_files[list(config_files.keys())[0]], exclude_keys=CONFIG_MANDATORY_KEYS), FINGERPRINT_CONFIG_CORE_KEY: _get_hash_of_config(config_files[list(config_files.keys())[0]], include_keys=CONFIG_MANDATORY_KEYS_CORE), FINGERPRINT_CONFIG_NLU_KEY: { key: _get_hash_of_config(value, include_keys=CONFIG_MANDATORY_KEYS_NLU) for (key, value) in nlu_configs.items() }, FINGERPRINT_DOMAIN_KEY: domain_hash, FINGERPRINT_NLU_DATA_KEY: {file.split('.')[0][-2:]: get_file_hash(file) for file in nlu_files}, FINGERPRINT_STORIES_KEY: _get_hashes_for_paths(stories), FINGERPRINT_TRAINED_AT_KEY: time.time(), FINGERPRINT_RASA_VERSION_KEY: rasa.__version__, }
def _get_hashes_for_paths(path: Text) -> List[Text]: from rasa.core.utils import get_file_hash files = [] if path and os.path.isdir(path): files = [ os.path.join(path, f) for f in os.listdir(path) if not f.startswith(".") ] elif path and os.path.isfile(path): files = [path] return sorted([get_file_hash(f) for f in files])
async def get_stories_hash(self): # Use a file hash of stories file to figure out Core fingerprint, instead of # storygraph object hash which is unstable return get_file_hash(self._stories_path)