def get_data(self, language): lookup_tables = [] composite_entities = [] cmdline_args = create_argument_parser().parse_args() files = utils.list_files(cmdline_args.data) for file in files: fformat = _guess_format(file) file_content = utils.read_json_file(file) if fformat == DIALOGFLOW_ENTITIES: entity = file_content['name'] dialogflowReader = DialogflowReader() examples_js = dialogflowReader._read_examples_js(fn=file, language=language, fformat=fformat) lookup_table = self._extract_lookup_tables(entity, examples_js) if(lookup_table): lookup_tables.append(lookup_table) composite_entity = self._extract_composite_entities( entity, examples_js) if(composite_entity): composite_entities.append(composite_entity) if fformat == RASA_NLU: rasa_nlu_data = file_content['rasa_nlu_data'] composite_entities = rasa_nlu_data['composite_entities'] lookup_tables = rasa_nlu_data['lookup_tables'] return lookup_tables, composite_entities
def load( cls, model_dir=None, # type: Optional[Text] model_metadata=None, # type: Optional[Metadata] cached_component=None, # type: Optional[CompositeEntitiesMapper] **kwargs # type: **Any ): # type: (...) -> CompositeEntitiesMapper meta = model_metadata.for_component(cls.name) file_name = meta.get("composite_entities_file", COMPOSITE_ENTITIES_FILE_NAME) composite_entities_file = os.path.join(model_dir, file_name) if os.path.isfile(composite_entities_file): composite_entities = utils.read_json_file(composite_entities_file) else: composite_entities = { 'lookup_tables': [], 'composite_entities': [] } warnings.warn("Failed to load composite entities file from '{}'" "".format(composite_entities_file)) return cls(meta, composite_entities)
def __init__(self, filename=None, env_vars=None, cmdline_args=None): if filename is None and os.path.isfile(DEFAULT_CONFIG_LOCATION): filename = DEFAULT_CONFIG_LOCATION self.override(DEFAULT_CONFIG) if filename is not None: try: file_config = utils.read_json_file(filename) except ValueError as e: raise InvalidConfigError("Failed to read configuration file " "'{}'. Error: {}".format(filename, e)) self.override(file_config) if env_vars is not None: env_config = self.create_env_config(env_vars) self.override(env_config) if cmdline_args is not None: cmdline_config = self.create_cmdline_config(cmdline_args) self.override(cmdline_config) if isinstance(self.__dict__['pipeline'], six.string_types): from rasa_nlu import registry if self.__dict__['pipeline'] in registry.registered_pipeline_templates: self.__dict__['pipeline'] = registry.registered_pipeline_templates[self.__dict__['pipeline']] else: raise InvalidConfigError("No pipeline specified and unknown pipeline template " + "'{}' passed. Known pipeline templates: {}".format( self.__dict__['pipeline'], ", ".join(registry.registered_pipeline_templates.keys()))) for key, value in self.items(): setattr(self, key, value)
def test_url_data_format(): data = u""" { "rasa_nlu_data": { "entity_synonyms": [ { "value": "nyc", "synonyms": ["New York City", "nyc", "the big apple"] } ], "common_examples" : [ { "text": "show me flights to New York City", "intent": "unk", "entities": [ { "entity": "destination", "start": 19, "end": 32, "value": "NYC" } ] } ] } }""" fname = utils.create_temporary_file(data.encode("utf-8"), suffix="_tmp_training_data.json", mode="w+b") data = utils.read_json_file(fname) assert data is not None validate_rasa_nlu_data(data)
def _read_examples_js(self, fn, language, fformat): """Infer and load the example file based on the root filename and root format.""" examples_type = "usersays" if fformat == DIALOGFLOW_INTENT else "entries" examples_fn_ending = "_{}_{}.json".format(examples_type, language) examples_fn = fn.replace(".json", examples_fn_ending) if os.path.isfile(examples_fn): return utils.read_json_file(examples_fn) else: return None
def load(model_dir): # type: (Text) -> 'Metadata' """Loads the metadata from a models directory.""" try: metadata_file = os.path.join(model_dir, 'metadata.json') data = utils.read_json_file(metadata_file) return Metadata(data, model_dir) except Exception as e: abspath = os.path.abspath(os.path.join(model_dir, 'metadata.json')) raise InvalidProjectError("Failed to load model metadata " "from '{}'. {}".format(abspath, e))
def load(cls, model_dir, model_metadata, cached_component, **kwargs): # type: (Text, Metadata, Optional[EntitySynonymMapper], **Any) -> EntitySynonymMapper if model_dir and model_metadata.get("entity_synonyms"): entity_synonyms_file = os.path.join( model_dir, model_metadata.get("entity_synonyms")) if os.path.isfile(entity_synonyms_file): synonyms = utils.read_json_file(entity_synonyms_file) return EntitySynonymMapper(synonyms) else: warnings.warn("Failed to load synonyms file from '{}'".format( entity_synonyms_file)) return EntitySynonymMapper()
def load(cls, model_dir=None, model_metadata=None, cached_component=None, **kwargs): meta = model_metadata.for_component(cls.name) file_name = meta.get('regex_file', REGEX_FEATURIZER_FILE_NAME) regex_file = os.path.join(model_dir, file_name) if os.path.exists(regex_file): known_patterns = utils.read_json_file(regex_file) return cls(meta, known_patterns=known_patterns) else: return cls(meta)
def load(cls, model_dir: Optional[Text] = None, model_metadata: Optional['Metadata'] = None, cached_component: Optional['RegexFeaturizer'] = None, **kwargs: Any) -> 'RegexFeaturizer': meta = model_metadata.for_component(cls.name) file_name = meta.get("regex_file", REGEX_FEATURIZER_FILE_NAME) regex_file = os.path.join(model_dir, file_name) if os.path.exists(regex_file): known_patterns = utils.read_json_file(regex_file) return RegexFeaturizer(meta, known_patterns=known_patterns) else: return RegexFeaturizer(meta)
def load(cls, meta: Dict[Text, Any], model_dir: Optional[Text] = None, model_metadata: Optional['Metadata'] = None, cached_component: Optional['RegexFeaturizer'] = None, **kwargs: Any) -> 'RegexFeaturizer': file_name = meta.get("file") regex_file = os.path.join(model_dir, file_name) if os.path.exists(regex_file): known_patterns = utils.read_json_file(regex_file) return RegexFeaturizer(meta, known_patterns=known_patterns) else: return RegexFeaturizer(meta)
def load(cls, meta: Dict[Text, Any], model_dir: Optional[Text] = None, model_metadata: Optional['Metadata'] = None, cached_component: Optional['RegexFeaturizer'] = None, **kwargs: Any ) -> 'RegexFeaturizer': file_name = meta.get("file") regex_file = os.path.join(model_dir, file_name) if os.path.exists(regex_file): known_patterns = utils.read_json_file(regex_file) return RegexFeaturizer(meta, known_patterns=known_patterns) else: return RegexFeaturizer(meta)
def load(cls, model_dir=None, model_metadata=None, cached_component=None, **kwargs): meta = model_metadata.for_component(cls.name) file_name = meta.get('composite_entities_file', COMPOSITE_ENTITIES_FILE_NAME) composite_entities_file = os.path.join(model_dir, file_name) if os.path.isfile(composite_entities_file): composite_entities = utils.read_json_file(composite_entities_file) else: composite_entities = [] warnings.warn('Failed to load composite entities' 'file from "{}"'.format(composite_entities_file)) return cls(meta, composite_entities)
def load(model_dir: Text): """Loads the metadata from a models directory. Args: model_dir (str): the directory where the model is saved. Returns: Metadata: A metadata object describing the model """ try: metadata_file = os.path.join(model_dir, 'metadata.json') data = utils.read_json_file(metadata_file) return Metadata(data, model_dir) except Exception as e: abspath = os.path.abspath(os.path.join(model_dir, 'metadata.json')) raise InvalidProjectError("Failed to load model metadata " "from '{}'. {}".format(abspath, e))
def load(model_dir): # type: (Text) -> 'Metadata' """Loads the metadata from a models directory. Args: model_dir (str): the directory where the model is saved. Returns: Metadata: A metadata object describing the model """ try: metadata_file = os.path.join(model_dir, 'metadata.json') data = utils.read_json_file(metadata_file) return Metadata(data, model_dir) except Exception as e: abspath = os.path.abspath(os.path.join(model_dir, 'metadata.json')) raise InvalidProjectError("Failed to load model metadata " "from '{}'. {}".format(abspath, e))
def load(cls, model_dir=None, # type: Optional[Text] model_metadata=None, # type: Optional[Metadata] cached_component=None, # type: Optional[RegexFeaturizer] **kwargs # type: **Any ): # type: (...) -> RegexFeaturizer meta = model_metadata.for_component(cls.name) file_name = meta.get("regex_file", REGEX_FEATURIZER_FILE_NAME) regex_file = os.path.join(model_dir, file_name) if os.path.exists(regex_file): known_patterns = utils.read_json_file(regex_file) return RegexFeaturizer(meta, known_patterns=known_patterns) else: return RegexFeaturizer(meta)
def load(cls, model_dir=None, # type: Optional[Text] model_metadata=None, # type: Optional[Metadata] cached_component=None, # type: Optional[RegexFeaturizer] **kwargs # type: **Any ): # type: (...) -> RegexFeaturizer if model_dir and model_metadata.get("regex_featurizer"): regex_file = os.path.join(model_dir, model_metadata.get("regex_featurizer")) if os.path.isfile(regex_file): known_patterns = utils.read_json_file(regex_file) return RegexFeaturizer(known_patterns) else: warnings.warn("Failed to load regex pattern file " "'{}'".format(regex_file)) return RegexFeaturizer()
def load( cls, model_dir=None, # type: Optional[Text] model_metadata=None, # type: Optional[Metadata] cached_component=None, # type: Optional[RegexFeaturizer] **kwargs # type: **Any ): # type: (...) -> RegexFeaturizer meta = model_metadata.for_component(cls.name) file_name = meta.get("regex_file", REGEX_FEATURIZER_FILE_NAME) regex_file = os.path.join(model_dir, file_name) if os.path.exists(regex_file): known_patterns = utils.read_json_file(regex_file) return RegexFeaturizer(meta, known_patterns=known_patterns) else: return RegexFeaturizer(meta)
def load(cls, model_dir: Optional[Text] = None, model_metadata: Optional[Metadata] = None, cached_component: Optional['EntitySynonymMapper'] = None, **kwargs: Any) -> 'EntitySynonymMapper': meta = model_metadata.for_component(cls.name) file_name = meta.get("synonyms_file") if not file_name: synonyms = None return cls(meta, synonyms) entity_synonyms_file = os.path.join(model_dir, file_name) if os.path.isfile(entity_synonyms_file): synonyms = utils.read_json_file(entity_synonyms_file) else: synonyms = None warnings.warn("Failed to load synonyms file from '{}'" "".format(entity_synonyms_file)) return cls(meta, synonyms)
def load( cls, model_dir=None, # type: Text model_metadata=None, # type: Metadata cached_component=None, # type: Optional[DucklingHTTPExtractor] **kwargs # type: **Any ): # type: (...) -> DucklingHTTPExtractor persisted = os.path.join(model_dir, model_metadata.get(cls.name)) config = kwargs.get("config", {}) dimensions = None if os.path.isfile(persisted): persisted_data = utils.read_json_file(persisted) dimensions = persisted_data["dimensions"] return DucklingHTTPExtractor(config.get("duckling_http_url"), model_metadata.get("language"), dimensions)
def read(self, fn, **kwargs): # type: ([Text]) -> TrainingData """Loads training data stored in the Dialogflow data format.""" language = kwargs["language"] fformat = kwargs["fformat"] if fformat not in {DIALOGFLOW_INTENT, DIALOGFLOW_ENTITIES}: raise ValueError("fformat must be either {}, or {}".format(DIALOGFLOW_INTENT, DIALOGFLOW_ENTITIES)) root_js = utils.read_json_file(fn) examples_js = self._read_examples_js(fn, language, fformat) if not examples_js: logger.warning("No training examples found for dialogflow file {}!".format(fn)) return TrainingData() elif fformat == DIALOGFLOW_INTENT: return self._read_intent(root_js, examples_js) elif fformat == DIALOGFLOW_ENTITIES: return self._read_entities(examples_js)
def load(cls, model_dir=None, # type: Optional[Text] model_metadata=None, # type: Optional[Metadata] cached_component=None, # type: Optional[EntitySynonymMapper] **kwargs # type: **Any ): # type: (...) -> EntitySynonymMapper meta = model_metadata.for_component(cls.name) file_name = meta.get("synonyms_file", ENTITY_SYNONYMS_FILE_NAME) entity_synonyms_file = os.path.join(model_dir, file_name) if os.path.isfile(entity_synonyms_file): synonyms = utils.read_json_file(entity_synonyms_file) else: synonyms = None warnings.warn("Failed to load synonyms file from '{}'" "".format(entity_synonyms_file)) return EntitySynonymMapper(meta, synonyms)
def load( cls, model_dir=None, # type: Optional[Text] model_metadata=None, # type: Optional[Metadata] cached_component=None, # type: Optional[Component] **kwargs # type: **Any ): from rasa_nlu.utils import read_json_file meta = model_metadata.for_component(cls.name) file_name = meta.get("gazette_file", FUZZY_GAZETTE_FILE) path = os.path.join(model_dir, file_name) if os.path.isfile(path): gazette = read_json_file(path) else: gazette = None warnings.warn("Failed to load gazette file from '{}'" "".format(path)) return FuzzyGazette(meta, gazette)
def load(cls, model_dir=None, # type: Text model_metadata=None, # type: Metadata cached_component=None, # type:Optional[DucklingExtractor] **kwargs # type: **Any ): # type: (...) -> DucklingExtractor persisted = os.path.join(model_dir, model_metadata.get("ner_duckling_persisted")) if cached_component: duckling = cached_component.duckling else: language = model_metadata.get("language") duckling = cls.create_duckling_wrapper(language) if os.path.isfile(persisted): persisted_data = utils.read_json_file(persisted) return DucklingExtractor(duckling, persisted_data["dimensions"]) else: return DucklingExtractor(duckling)
def load(cls, meta: Dict[Text, Any], model_dir: Optional[Text] = None, model_metadata: Optional[Metadata] = None, cached_component: Optional['EntitySynonymMapper'] = None, **kwargs: Any ) -> 'EntitySynonymMapper': file_name = meta.get("file") if not file_name: synonyms = None return cls(meta, synonyms) entity_synonyms_file = os.path.join(model_dir, file_name) if os.path.isfile(entity_synonyms_file): synonyms = utils.read_json_file(entity_synonyms_file) else: synonyms = None warnings.warn("Failed to load synonyms file from '{}'" "".format(entity_synonyms_file)) return cls(meta, synonyms)
def read(self, fn: Text, **kwargs: Any) -> 'TrainingData': """Loads training data stored in the Dialogflow data format.""" from rasa_nlu.training_data import TrainingData language = kwargs["language"] fformat = kwargs["fformat"] if fformat not in {DIALOGFLOW_INTENT, DIALOGFLOW_ENTITIES}: raise ValueError("fformat must be either {}, or {}" "".format(DIALOGFLOW_INTENT, DIALOGFLOW_ENTITIES)) root_js = utils.read_json_file(fn) examples_js = self._read_examples_js(fn, language, fformat) if not examples_js: logger.warning("No training examples found for dialogflow file {}!" "".format(fn)) return TrainingData() elif fformat == DIALOGFLOW_INTENT: return self._read_intent(root_js, examples_js) elif fformat == DIALOGFLOW_ENTITIES: return self._read_entities(root_js, examples_js)
def _read_composite_entities(self): """Read the defined composite patterns from the train file. We have to manually load the file, as rasa strips our custom information. """ try: file = self._get_train_file_cmd() except: try: file = self._get_train_file_http() except: warnings.warn('The CompositeEntityExtractor could not load ' 'the train file.') return [] file_content = utils.read_json_file(file) rasa_nlu_data = file_content['rasa_nlu_data'] try: composite_entities = rasa_nlu_data['composite_entities'] except KeyError: composite_entities = [] if not composite_entities: warnings.warn( 'CompositeEntityExtractor was added to the ' 'pipeline but no composite entites have been defined.') return composite_entities
def test_example_training_data_is_valid(): demo_json = 'data/examples/rasa/demo-rasa.json' data = utils.read_json_file(demo_json) validate_rasa_nlu_data(data)