def get_data(self,train_data, language): lookup_tables = [] composite_entities = [] #cmdline_args = create_argument_parser().parse_args() import pdb pdb.set_trace() print(train_data) files = utils.list_files(cmdline_args.data) for file in files: fformat = _guess_format(file) file_content = utils.read_json_file(file) if fformat == DIALOGFLOW_ENTITIES: entity = file_content['name'] dialogflowReader = DialogflowReader() examples_js = dialogflowReader._read_examples_js(fn=file, language=language, fformat=fformat) lookup_table = self._extract_lookup_tables(entity, examples_js) if(lookup_table): lookup_tables.append(lookup_table) composite_entity = self._extract_composite_entities( entity, examples_js) if(composite_entity): composite_entities.append(composite_entity) if fformat == rasa_nlu: rasa_nlu_data = file_content['rasa.nlu_data'] composite_entities = rasa_nlu_data['composite_entities'] lookup_tables = rasa_nlu_data['lookup_tables'] return lookup_tables, composite_entities
def _read_composite_entities(self): """Read the defined composite patterns from the train file. We have to manually load the file, as rasa strips our custom information. """ try: files = self._get_train_files_cmd() except: try: files = self._get_train_files_http() except: warnings.warn("The CompositeEntityExtractor could not load " "the train file.") return [] composite_entities = [] for file in files: file_content = utils.read_json_file(file) rasa_nlu_data = file_content["rasa_nlu_data"] try: composite_entities_in_file = rasa_nlu_data[ "composite_entities"] except KeyError: pass else: composite_entities.extend(composite_entities_in_file) if not composite_entities: warnings.warn( "CompositeEntityExtractor was added to the " "pipeline but no composite entites have been defined.") return composite_entities
def read(self, fn: Text, **kwargs: Any) -> "TrainingData": """Loads training data stored in the Dialogflow data format.""" from rasa.nlu.training_data import TrainingData language = kwargs["language"] fformat = kwargs["fformat"] if fformat not in {DIALOGFLOW_INTENT, DIALOGFLOW_ENTITIES}: raise ValueError( "fformat must be either {}, or {}" "".format(DIALOGFLOW_INTENT, DIALOGFLOW_ENTITIES) ) root_js = utils.read_json_file(fn) examples_js = self._read_examples_js(fn, language, fformat) if not examples_js: logger.warning( "No training examples found for dialogflow file {}!".format(fn) ) return TrainingData() elif fformat == DIALOGFLOW_INTENT: return self._read_intent(root_js, examples_js) elif fformat == DIALOGFLOW_ENTITIES: return self._read_entities(root_js, examples_js)
def test_url_data_format(): data = """ { "rasa_nlu_data": { "entity_synonyms": [ { "value": "nyc", "synonyms": ["New York City", "nyc", "the big apple"] } ], "common_examples" : [ { "text": "show me flights to New York City", "intent": "unk", "entities": [ { "entity": "destination", "start": 19, "end": 32, "value": "NYC" } ] } ] } }""" fname = utils.create_temporary_file(data.encode("utf-8"), suffix="_tmp_training_data.json", mode="w+b") data = utils.read_json_file(fname) assert data is not None validate_rasa_nlu_data(data)
def load( cls, model_dir=None, # type: Optional[Text] model_metadata=None, # type: Optional[Metadata] cached_component=None, # type: Optional[CompositeEntitiesMapper] **kwargs # type: **Any ): # type: (...) -> CompositeEntitiesMapper meta = model_metadata.for_component(cls.name) file_name = meta.get("composite_entities_file", COMPOSITE_ENTITIES_FILE_NAME) composite_entities_file = os.path.join(model_dir, file_name) if os.path.isfile(composite_entities_file): composite_entities = utils.read_json_file(composite_entities_file) else: composite_entities = { 'lookup_tables': [], 'composite_entities': [] } warnings.warn("Failed to load composite entities file from '{}'" "".format(composite_entities_file)) return cls(meta, composite_entities)
def load( cls, meta: Dict[Text, Any], model_dir: Optional[Text] = None, model_metadata: "Metadata" = None, cached_component: Optional["KeywordIntentClassifier"] = None, **kwargs: Any, ) -> "KeywordIntentClassifier": if model_dir and meta.get("file"): file_name = meta.get("file") keyword_file = os.path.join(model_dir, file_name) if os.path.exists(keyword_file): intent_keyword_map = utils.read_json_file(keyword_file) else: raise_warning( f"Failed to load key word file for `IntentKeywordClassifier`, " f"maybe {keyword_file} does not exist?", ) intent_keyword_map = None return cls(meta, intent_keyword_map) else: raise Exception( f"Failed to load keyword intent classifier model. " f"Path {os.path.abspath(meta.get('file'))} doesn't exist." )
def load( cls, meta, model_dir=None, model_metadata=None, cached_component=None, **kwargs ): file_name = meta.get("file") regex_file = os.path.join(model_dir, file_name) if os.path.exists(regex_file): known_patterns = utils.read_json_file(regex_file) return cls(meta, known_patterns=known_patterns) else: return cls(meta)
def load( cls, meta: Dict[Text, Any], model_dir: Optional[Text] = None, model_metadata: Optional[Metadata] = None, cached_component: Optional[Component] = None, **kwargs: Any, ) -> "EntityExtractor": file_name = meta.get("file") regex_features = [] if file_name: regex_features = utils.read_json_file(os.path.join(model_dir, file_name)) # type: ignore meta["regex_features"] = regex_features return cls(meta)
def _read_examples_js(fn, language, fformat): """Infer and load the example file based on the root filename and root format.""" if fformat == DIALOGFLOW_INTENT: examples_type = "usersays" else: examples_type = "entries" examples_fn_ending = "_{}_{}.json".format(examples_type, language) examples_fn = fn.replace(".json", examples_fn_ending) if os.path.isfile(examples_fn): return utils.read_json_file(examples_fn) else: return None
def load(cls, meta: Dict[Text, Any], model_dir: Optional[Text] = None, model_metadata: Optional["Metadata"] = None, cached_component: Optional["RegexFeaturizer"] = None, **kwargs: Any) -> "RegexFeaturizer": file_name = meta.get("file") regex_file = os.path.join(model_dir, file_name) if os.path.exists(regex_file): known_patterns = utils.read_json_file(regex_file) return RegexFeaturizer(meta, known_patterns=known_patterns) else: return RegexFeaturizer(meta)
def load(cls, meta: Dict[Text, Any], model_dir: Optional[Text] = None, model_metadata: Optional["Metadata"] = None, cached_component: Optional["NGramFeaturizer"] = None, **kwargs: Any) -> "NGramFeaturizer": file_name = meta.get("file") featurizer_file = os.path.join(model_dir, file_name) if os.path.exists(featurizer_file): data = utils.read_json_file(featurizer_file) return NGramFeaturizer(meta, data["all_ngrams"], data["best_num_ngrams"]) else: return NGramFeaturizer(meta)
def load(model_dir: Text): """Loads the metadata from a models directory. Args: model_dir: the directory where the model is saved. Returns: Metadata: A metadata object describing the model """ try: metadata_file = os.path.join(model_dir, 'metadata.json') data = utils.read_json_file(metadata_file) return Metadata(data, model_dir) except Exception as e: abspath = os.path.abspath(os.path.join(model_dir, 'metadata.json')) raise InvalidProjectError("Failed to load model metadata " "from '{}'. {}".format(abspath, e))
def load(cls, component_meta=None, model_dir=None, model_metadata=None, cached_component=None, **kwargs): file_name = component_meta.get("composite_entities_file", COMPOSITE_ENTITIES_FILE_NAME) composite_entities_file = os.path.join(model_dir, file_name) if os.path.isfile(composite_entities_file): composite_entities = utils.read_json_file(composite_entities_file) else: composite_entities = [] warnings.warn("Failed to load composite entities" 'file from "{}"'.format(composite_entities_file)) return cls(component_meta, composite_entities)
def load(cls, component_meta: Dict[Text, Any], model_dir: Text = None, model_metadata: Metadata = None, cached_component: Optional['Gazette'] = None, **kwargs: Any) -> 'Gazette': from rasa.nlu.utils import read_json_file td = read_json_file(os.path.join(model_dir, "training_data.json")) if "gazette" in td["rasa_nlu_data"]: gazette = cls._load_gazette_list(td["rasa_nlu_data"]["gazette"]) else: gazette = None warnings.warn( "Could not find Gazette in persisted training data file.") return Gazette(component_meta, gazette)
def load( cls, meta: Dict[Text, Any], model_dir: Optional[Text] = None, model_metadata: "Metadata" = None, cached_component: Optional["KeywordIntentClassifier"] = None, **kwargs: Any, ) -> "KeywordIntentClassifier": if model_dir and meta.get("file"): file_name = meta.get("file") keyword_file = os.path.join(model_dir, file_name) if os.path.exists(keyword_file): intent_keyword_map = utils.read_json_file(keyword_file) else: warnings.warn(f"Failed to load IntentKeywordClassifier, maybe " "{keyword_file} does not exist.") return cls(meta, intent_keyword_map)
def load(cls, model_dir: Optional[Text] = None, model_metadata: Optional[Metadata] = None, cached_component: Optional['EntitySynonymMapper'] = None, **kwargs: Any) -> 'EntitySynonymMapper': meta = model_metadata.for_component(cls.name) file_name = meta.get("synonyms_file") if not file_name: synonyms = None return cls(meta, synonyms) entity_synonyms_file = os.path.join(model_dir, file_name) if os.path.isfile(entity_synonyms_file): synonyms = utils.read_json_file(entity_synonyms_file) else: synonyms = None warnings.warn("Failed to load synonyms file from '{}'" "".format(entity_synonyms_file)) return cls(meta, synonyms)
def load(cls, meta: Dict[Text, Any], model_dir: Text = None, model_metadata: Metadata = None, cached_component: Optional['DucklingHTTPExtractor'] = None, **kwargs: Any): # type: (...) -> CustomizedEntityExtractor file_name = meta.get("customize_file") if not file_name: entity_customize = None return cls(meta, entity_customize) entity_customize_file = os.path.join(model_dir, file_name) if os.path.isfile(entity_customize_file): entity_customize = read_json_file(entity_customize_file) else: entity_customize = None warnings.warn("Failed to load synonyms file from '{}'" "".format(entity_customize_file)) return cls(meta, entity_customize)
def test_example_training_data_is_valid(): demo_json = "data/examples/rasa/demo-rasa.json" data = utils.read_json_file(demo_json) validate_rasa_nlu_data(data)