Beispiel #1
0
    def get_data(self, language):
        lookup_tables = []
        composite_entities = []

        cmdline_args = create_argument_parser().parse_args()
        files = utils.list_files(cmdline_args.data)

        for file in files:
            fformat = _guess_format(file)
            file_content = utils.read_json_file(file)
            if fformat == DIALOGFLOW_ENTITIES:
                entity = file_content['name']
                dialogflowReader = DialogflowReader()
                examples_js = dialogflowReader._read_examples_js(fn=file, language=language, fformat=fformat)
                lookup_table = self._extract_lookup_tables(entity, examples_js)
                if(lookup_table):
                    lookup_tables.append(lookup_table)
                composite_entity = self._extract_composite_entities(
                            entity,
                            examples_js)
                if(composite_entity):
                    composite_entities.append(composite_entity)

            if fformat == RASA_NLU:
                rasa_nlu_data = file_content['rasa_nlu_data']
                composite_entities = rasa_nlu_data['composite_entities']
                lookup_tables = rasa_nlu_data['lookup_tables']

        return lookup_tables, composite_entities
Beispiel #2
0
    def load(
            cls,
            model_dir=None,  # type: Optional[Text]
            model_metadata=None,  # type: Optional[Metadata]
            cached_component=None,  # type: Optional[CompositeEntitiesMapper]
            **kwargs  # type: **Any
    ):
        # type: (...) -> CompositeEntitiesMapper

        meta = model_metadata.for_component(cls.name)
        file_name = meta.get("composite_entities_file",
                             COMPOSITE_ENTITIES_FILE_NAME)
        composite_entities_file = os.path.join(model_dir, file_name)

        if os.path.isfile(composite_entities_file):
            composite_entities = utils.read_json_file(composite_entities_file)
        else:
            composite_entities = {
                'lookup_tables': [],
                'composite_entities': []
            }
            warnings.warn("Failed to load composite entities file from '{}'"
                          "".format(composite_entities_file))

        return cls(meta, composite_entities)
Beispiel #3
0
    def __init__(self, filename=None, env_vars=None, cmdline_args=None):

        if filename is None and os.path.isfile(DEFAULT_CONFIG_LOCATION):
            filename = DEFAULT_CONFIG_LOCATION

        self.override(DEFAULT_CONFIG)
        if filename is not None:
            try:
                file_config = utils.read_json_file(filename)
            except ValueError as e:
                raise InvalidConfigError("Failed to read configuration file "
                                         "'{}'. Error: {}".format(filename, e))
            self.override(file_config)

        if env_vars is not None:
            env_config = self.create_env_config(env_vars)
            self.override(env_config)

        if cmdline_args is not None:
            cmdline_config = self.create_cmdline_config(cmdline_args)
            self.override(cmdline_config)

        if isinstance(self.__dict__['pipeline'], six.string_types):
            from rasa_nlu import registry
            if self.__dict__['pipeline'] in registry.registered_pipeline_templates:
                self.__dict__['pipeline'] = registry.registered_pipeline_templates[self.__dict__['pipeline']]
            else:
                raise InvalidConfigError("No pipeline specified and unknown pipeline template " +
                                         "'{}' passed. Known pipeline templates: {}".format(
                                                 self.__dict__['pipeline'],
                                                 ", ".join(registry.registered_pipeline_templates.keys())))

        for key, value in self.items():
            setattr(self, key, value)
Beispiel #4
0
def test_url_data_format():
    data = u"""
    {
      "rasa_nlu_data": {
        "entity_synonyms": [
          {
            "value": "nyc",
            "synonyms": ["New York City", "nyc", "the big apple"]
          }
        ],
        "common_examples" : [
          {
            "text": "show me flights to New York City",
            "intent": "unk",
            "entities": [
              {
                "entity": "destination",
                "start": 19,
                "end": 32,
                "value": "NYC"
              }
            ]
          }
        ]
      }
    }"""
    fname = utils.create_temporary_file(data.encode("utf-8"),
                                        suffix="_tmp_training_data.json",
                                        mode="w+b")
    data = utils.read_json_file(fname)
    assert data is not None
    validate_rasa_nlu_data(data)
def test_url_data_format():
    data = u"""
    {
      "rasa_nlu_data": {
        "entity_synonyms": [
          {
            "value": "nyc",
            "synonyms": ["New York City", "nyc", "the big apple"]
          }
        ],
        "common_examples" : [
          {
            "text": "show me flights to New York City",
            "intent": "unk",
            "entities": [
              {
                "entity": "destination",
                "start": 19,
                "end": 32,
                "value": "NYC"
              }
            ]
          }
        ]
      }
    }"""
    fname = utils.create_temporary_file(data.encode("utf-8"),
                                        suffix="_tmp_training_data.json",
                                        mode="w+b")
    data = utils.read_json_file(fname)
    assert data is not None
    validate_rasa_nlu_data(data)
Beispiel #6
0
 def _read_examples_js(self, fn, language, fformat):
     """Infer and load the example file based on the root filename and root format."""
     examples_type = "usersays" if fformat == DIALOGFLOW_INTENT else "entries"
     examples_fn_ending = "_{}_{}.json".format(examples_type, language)
     examples_fn = fn.replace(".json", examples_fn_ending)
     if os.path.isfile(examples_fn):
         return utils.read_json_file(examples_fn)
     else:
         return None
Beispiel #7
0
 def _read_examples_js(self, fn, language, fformat):
     """Infer and load the example file based on the root filename and root format."""
     examples_type = "usersays" if fformat == DIALOGFLOW_INTENT else "entries"
     examples_fn_ending = "_{}_{}.json".format(examples_type, language)
     examples_fn = fn.replace(".json", examples_fn_ending)
     if os.path.isfile(examples_fn):
         return utils.read_json_file(examples_fn)
     else:
         return None
Beispiel #8
0
 def load(model_dir):
     # type: (Text) -> 'Metadata'
     """Loads the metadata from a models directory."""
     try:
         metadata_file = os.path.join(model_dir, 'metadata.json')
         data = utils.read_json_file(metadata_file)
         return Metadata(data, model_dir)
     except Exception as e:
         abspath = os.path.abspath(os.path.join(model_dir, 'metadata.json'))
         raise InvalidProjectError("Failed to load model metadata "
                                   "from '{}'. {}".format(abspath, e))
Beispiel #9
0
 def load(model_dir):
     # type: (Text) -> 'Metadata'
     """Loads the metadata from a models directory."""
     try:
         metadata_file = os.path.join(model_dir, 'metadata.json')
         data = utils.read_json_file(metadata_file)
         return Metadata(data, model_dir)
     except Exception as e:
         abspath = os.path.abspath(os.path.join(model_dir, 'metadata.json'))
         raise InvalidProjectError("Failed to load model metadata "
                                   "from '{}'. {}".format(abspath, e))
Beispiel #10
0
    def load(cls, model_dir, model_metadata, cached_component, **kwargs):
        # type: (Text, Metadata, Optional[EntitySynonymMapper], **Any) -> EntitySynonymMapper

        if model_dir and model_metadata.get("entity_synonyms"):
            entity_synonyms_file = os.path.join(
                model_dir, model_metadata.get("entity_synonyms"))
            if os.path.isfile(entity_synonyms_file):
                synonyms = utils.read_json_file(entity_synonyms_file)
                return EntitySynonymMapper(synonyms)
            else:
                warnings.warn("Failed to load synonyms file from '{}'".format(
                    entity_synonyms_file))
        return EntitySynonymMapper()
Beispiel #11
0
    def load(cls,
             model_dir=None,
             model_metadata=None,
             cached_component=None,
             **kwargs):
        meta = model_metadata.for_component(cls.name)
        file_name = meta.get('regex_file', REGEX_FEATURIZER_FILE_NAME)
        regex_file = os.path.join(model_dir, file_name)

        if os.path.exists(regex_file):
            known_patterns = utils.read_json_file(regex_file)
            return cls(meta, known_patterns=known_patterns)
        else:
            return cls(meta)
Beispiel #12
0
    def load(cls,
             model_dir: Optional[Text] = None,
             model_metadata: Optional['Metadata'] = None,
             cached_component: Optional['RegexFeaturizer'] = None,
             **kwargs: Any) -> 'RegexFeaturizer':

        meta = model_metadata.for_component(cls.name)
        file_name = meta.get("regex_file", REGEX_FEATURIZER_FILE_NAME)
        regex_file = os.path.join(model_dir, file_name)

        if os.path.exists(regex_file):
            known_patterns = utils.read_json_file(regex_file)
            return RegexFeaturizer(meta, known_patterns=known_patterns)
        else:
            return RegexFeaturizer(meta)
Beispiel #13
0
    def load(cls,
             meta: Dict[Text, Any],
             model_dir: Optional[Text] = None,
             model_metadata: Optional['Metadata'] = None,
             cached_component: Optional['RegexFeaturizer'] = None,
             **kwargs: Any) -> 'RegexFeaturizer':

        file_name = meta.get("file")
        regex_file = os.path.join(model_dir, file_name)

        if os.path.exists(regex_file):
            known_patterns = utils.read_json_file(regex_file)
            return RegexFeaturizer(meta, known_patterns=known_patterns)
        else:
            return RegexFeaturizer(meta)
Beispiel #14
0
    def load(cls,
             meta: Dict[Text, Any],
             model_dir: Optional[Text] = None,
             model_metadata: Optional['Metadata'] = None,
             cached_component: Optional['RegexFeaturizer'] = None,
             **kwargs: Any
             ) -> 'RegexFeaturizer':

        file_name = meta.get("file")
        regex_file = os.path.join(model_dir, file_name)

        if os.path.exists(regex_file):
            known_patterns = utils.read_json_file(regex_file)
            return RegexFeaturizer(meta, known_patterns=known_patterns)
        else:
            return RegexFeaturizer(meta)
Beispiel #15
0
 def load(cls,
          model_dir=None,
          model_metadata=None,
          cached_component=None,
          **kwargs):
     meta = model_metadata.for_component(cls.name)
     file_name = meta.get('composite_entities_file',
                          COMPOSITE_ENTITIES_FILE_NAME)
     composite_entities_file = os.path.join(model_dir, file_name)
     if os.path.isfile(composite_entities_file):
         composite_entities = utils.read_json_file(composite_entities_file)
     else:
         composite_entities = []
         warnings.warn('Failed to load composite entities'
                       'file from "{}"'.format(composite_entities_file))
     return cls(meta, composite_entities)
Beispiel #16
0
    def load(model_dir: Text):
        """Loads the metadata from a models directory.

        Args:
            model_dir (str): the directory where the model is saved.
        Returns:
            Metadata: A metadata object describing the model
        """
        try:
            metadata_file = os.path.join(model_dir, 'metadata.json')
            data = utils.read_json_file(metadata_file)
            return Metadata(data, model_dir)
        except Exception as e:
            abspath = os.path.abspath(os.path.join(model_dir, 'metadata.json'))
            raise InvalidProjectError("Failed to load model metadata "
                                      "from '{}'. {}".format(abspath, e))
Beispiel #17
0
    def load(model_dir):
        # type: (Text) -> 'Metadata'
        """Loads the metadata from a models directory.

        Args:
            model_dir (str): the directory where the model is saved.
        Returns:
            Metadata: A metadata object describing the model
        """
        try:
            metadata_file = os.path.join(model_dir, 'metadata.json')
            data = utils.read_json_file(metadata_file)
            return Metadata(data, model_dir)
        except Exception as e:
            abspath = os.path.abspath(os.path.join(model_dir, 'metadata.json'))
            raise InvalidProjectError("Failed to load model metadata "
                                      "from '{}'. {}".format(abspath, e))
Beispiel #18
0
    def load(cls,
             model_dir=None,   # type: Optional[Text]
             model_metadata=None,   # type: Optional[Metadata]
             cached_component=None,   # type: Optional[RegexFeaturizer]
             **kwargs  # type: **Any
             ):
        # type: (...) -> RegexFeaturizer

        meta = model_metadata.for_component(cls.name)
        file_name = meta.get("regex_file", REGEX_FEATURIZER_FILE_NAME)
        regex_file = os.path.join(model_dir, file_name)

        if os.path.exists(regex_file):
            known_patterns = utils.read_json_file(regex_file)
            return RegexFeaturizer(meta, known_patterns=known_patterns)
        else:
            return RegexFeaturizer(meta)
Beispiel #19
0
    def load(cls,
             model_dir=None,   # type: Optional[Text]
             model_metadata=None,   # type: Optional[Metadata]
             cached_component=None,   # type: Optional[RegexFeaturizer]
             **kwargs  # type: **Any
             ):
        # type: (...) -> RegexFeaturizer

        if model_dir and model_metadata.get("regex_featurizer"):
            regex_file = os.path.join(model_dir,
                                      model_metadata.get("regex_featurizer"))
            if os.path.isfile(regex_file):
                known_patterns = utils.read_json_file(regex_file)
                return RegexFeaturizer(known_patterns)
            else:
                warnings.warn("Failed to load regex pattern file "
                              "'{}'".format(regex_file))
        return RegexFeaturizer()
Beispiel #20
0
    def load(
            cls,
            model_dir=None,  # type: Optional[Text]
            model_metadata=None,  # type: Optional[Metadata]
            cached_component=None,  # type: Optional[RegexFeaturizer]
            **kwargs  # type: **Any
    ):
        # type: (...) -> RegexFeaturizer

        meta = model_metadata.for_component(cls.name)
        file_name = meta.get("regex_file", REGEX_FEATURIZER_FILE_NAME)
        regex_file = os.path.join(model_dir, file_name)

        if os.path.exists(regex_file):
            known_patterns = utils.read_json_file(regex_file)
            return RegexFeaturizer(meta, known_patterns=known_patterns)
        else:
            return RegexFeaturizer(meta)
Beispiel #21
0
    def load(cls,
             model_dir: Optional[Text] = None,
             model_metadata: Optional[Metadata] = None,
             cached_component: Optional['EntitySynonymMapper'] = None,
             **kwargs: Any) -> 'EntitySynonymMapper':

        meta = model_metadata.for_component(cls.name)
        file_name = meta.get("synonyms_file")
        if not file_name:
            synonyms = None
            return cls(meta, synonyms)

        entity_synonyms_file = os.path.join(model_dir, file_name)
        if os.path.isfile(entity_synonyms_file):
            synonyms = utils.read_json_file(entity_synonyms_file)
        else:
            synonyms = None
            warnings.warn("Failed to load synonyms file from '{}'"
                          "".format(entity_synonyms_file))
        return cls(meta, synonyms)
    def load(
            cls,
            model_dir=None,  # type: Text
            model_metadata=None,  # type: Metadata
            cached_component=None,  # type: Optional[DucklingHTTPExtractor]
            **kwargs  # type: **Any
    ):
        # type: (...) -> DucklingHTTPExtractor

        persisted = os.path.join(model_dir, model_metadata.get(cls.name))
        config = kwargs.get("config", {})
        dimensions = None

        if os.path.isfile(persisted):
            persisted_data = utils.read_json_file(persisted)
            dimensions = persisted_data["dimensions"]

        return DucklingHTTPExtractor(config.get("duckling_http_url"),
                                     model_metadata.get("language"),
                                     dimensions)
Beispiel #23
0
    def read(self, fn, **kwargs):
        # type: ([Text]) -> TrainingData
        """Loads training data stored in the Dialogflow data format."""

        language = kwargs["language"]
        fformat = kwargs["fformat"]

        if fformat not in {DIALOGFLOW_INTENT, DIALOGFLOW_ENTITIES}:
            raise ValueError("fformat must be either {}, or {}".format(DIALOGFLOW_INTENT, DIALOGFLOW_ENTITIES))

        root_js = utils.read_json_file(fn)
        examples_js = self._read_examples_js(fn, language, fformat)

        if not examples_js:
            logger.warning("No training examples found for dialogflow file {}!".format(fn))
            return TrainingData()
        elif fformat == DIALOGFLOW_INTENT:
            return self._read_intent(root_js, examples_js)
        elif fformat == DIALOGFLOW_ENTITIES:
            return self._read_entities(examples_js)
Beispiel #24
0
    def load(cls,
             model_dir=None,  # type: Optional[Text]
             model_metadata=None,  # type: Optional[Metadata]
             cached_component=None,  # type: Optional[EntitySynonymMapper]
             **kwargs  # type: **Any
             ):
        # type: (...) -> EntitySynonymMapper

        meta = model_metadata.for_component(cls.name)
        file_name = meta.get("synonyms_file", ENTITY_SYNONYMS_FILE_NAME)
        entity_synonyms_file = os.path.join(model_dir, file_name)

        if os.path.isfile(entity_synonyms_file):
            synonyms = utils.read_json_file(entity_synonyms_file)
        else:
            synonyms = None
            warnings.warn("Failed to load synonyms file from '{}'"
                          "".format(entity_synonyms_file))

        return EntitySynonymMapper(meta, synonyms)
    def load(
            cls,
            model_dir=None,  # type: Optional[Text]
            model_metadata=None,  # type: Optional[Metadata]
            cached_component=None,  # type: Optional[Component]
            **kwargs  # type: **Any
    ):
        from rasa_nlu.utils import read_json_file

        meta = model_metadata.for_component(cls.name)
        file_name = meta.get("gazette_file", FUZZY_GAZETTE_FILE)
        path = os.path.join(model_dir, file_name)

        if os.path.isfile(path):
            gazette = read_json_file(path)
        else:
            gazette = None
            warnings.warn("Failed to load gazette file from '{}'"
                          "".format(path))

        return FuzzyGazette(meta, gazette)
    def load(cls,
             model_dir=None,  # type: Text
             model_metadata=None,  # type: Metadata
             cached_component=None,  # type:Optional[DucklingExtractor]
             **kwargs  # type: **Any
             ):
        # type: (...) -> DucklingExtractor

        persisted = os.path.join(model_dir,
                                 model_metadata.get("ner_duckling_persisted"))
        if cached_component:
            duckling = cached_component.duckling
        else:
            language = model_metadata.get("language")
            duckling = cls.create_duckling_wrapper(language)

        if os.path.isfile(persisted):
            persisted_data = utils.read_json_file(persisted)
            return DucklingExtractor(duckling, persisted_data["dimensions"])
        else:
            return DucklingExtractor(duckling)
Beispiel #27
0
    def load(cls,
             meta: Dict[Text, Any],
             model_dir: Optional[Text] = None,
             model_metadata: Optional[Metadata] = None,
             cached_component: Optional['EntitySynonymMapper'] = None,
             **kwargs: Any
             ) -> 'EntitySynonymMapper':

        file_name = meta.get("file")
        if not file_name:
            synonyms = None
            return cls(meta, synonyms)

        entity_synonyms_file = os.path.join(model_dir, file_name)
        if os.path.isfile(entity_synonyms_file):
            synonyms = utils.read_json_file(entity_synonyms_file)
        else:
            synonyms = None
            warnings.warn("Failed to load synonyms file from '{}'"
                          "".format(entity_synonyms_file))
        return cls(meta, synonyms)
Beispiel #28
0
    def read(self, fn: Text, **kwargs: Any) -> 'TrainingData':
        """Loads training data stored in the Dialogflow data format."""
        from rasa_nlu.training_data import TrainingData

        language = kwargs["language"]
        fformat = kwargs["fformat"]

        if fformat not in {DIALOGFLOW_INTENT, DIALOGFLOW_ENTITIES}:
            raise ValueError("fformat must be either {}, or {}"
                             "".format(DIALOGFLOW_INTENT, DIALOGFLOW_ENTITIES))

        root_js = utils.read_json_file(fn)
        examples_js = self._read_examples_js(fn, language, fformat)

        if not examples_js:
            logger.warning("No training examples found for dialogflow file {}!"
                           "".format(fn))
            return TrainingData()
        elif fformat == DIALOGFLOW_INTENT:
            return self._read_intent(root_js, examples_js)
        elif fformat == DIALOGFLOW_ENTITIES:
            return self._read_entities(root_js, examples_js)
Beispiel #29
0
 def _read_composite_entities(self):
     """Read the defined composite patterns from the train file. We have
     to manually load the file, as rasa strips our custom information.
     """
     try:
         file = self._get_train_file_cmd()
     except:
         try:
             file = self._get_train_file_http()
         except:
             warnings.warn('The CompositeEntityExtractor could not load '
                           'the train file.')
             return []
     file_content = utils.read_json_file(file)
     rasa_nlu_data = file_content['rasa_nlu_data']
     try:
         composite_entities = rasa_nlu_data['composite_entities']
     except KeyError:
         composite_entities = []
     if not composite_entities:
         warnings.warn(
             'CompositeEntityExtractor was added to the '
             'pipeline but no composite entites have been defined.')
     return composite_entities
Beispiel #30
0
def test_example_training_data_is_valid():
    demo_json = 'data/examples/rasa/demo-rasa.json'
    data = utils.read_json_file(demo_json)
    validate_rasa_nlu_data(data)
Beispiel #31
0
def test_example_training_data_is_valid():
    demo_json = 'data/examples/rasa/demo-rasa.json'
    data = utils.read_json_file(demo_json)
    validate_rasa_nlu_data(data)