예제 #1
0
    def get_data(self,train_data, language):
        lookup_tables = []
        composite_entities = []

        #cmdline_args = create_argument_parser().parse_args()
        import pdb
        pdb.set_trace()
        print(train_data)
        files = utils.list_files(cmdline_args.data)

        for file in files:
            fformat = _guess_format(file)
            file_content = utils.read_json_file(file)
            if fformat == DIALOGFLOW_ENTITIES:
                entity = file_content['name']
                dialogflowReader = DialogflowReader()
                examples_js = dialogflowReader._read_examples_js(fn=file, language=language, fformat=fformat)
                lookup_table = self._extract_lookup_tables(entity, examples_js)
                if(lookup_table):
                    lookup_tables.append(lookup_table)
                composite_entity = self._extract_composite_entities(
                            entity,
                            examples_js)
                if(composite_entity):
                    composite_entities.append(composite_entity)

            if fformat == rasa_nlu:
                rasa_nlu_data = file_content['rasa.nlu_data']
                composite_entities = rasa_nlu_data['composite_entities']
                lookup_tables = rasa_nlu_data['lookup_tables']

        return lookup_tables, composite_entities
예제 #2
0
 def _read_composite_entities(self):
     """Read the defined composite patterns from the train file. We have
     to manually load the file, as rasa strips our custom information.
     """
     try:
         files = self._get_train_files_cmd()
     except:
         try:
             files = self._get_train_files_http()
         except:
             warnings.warn("The CompositeEntityExtractor could not load "
                           "the train file.")
             return []
     composite_entities = []
     for file in files:
         file_content = utils.read_json_file(file)
         rasa_nlu_data = file_content["rasa_nlu_data"]
         try:
             composite_entities_in_file = rasa_nlu_data[
                 "composite_entities"]
         except KeyError:
             pass
         else:
             composite_entities.extend(composite_entities_in_file)
     if not composite_entities:
         warnings.warn(
             "CompositeEntityExtractor was added to the "
             "pipeline but no composite entites have been defined.")
     return composite_entities
예제 #3
0
    def read(self, fn: Text, **kwargs: Any) -> "TrainingData":
        """Loads training data stored in the Dialogflow data format."""
        from rasa.nlu.training_data import TrainingData

        language = kwargs["language"]
        fformat = kwargs["fformat"]

        if fformat not in {DIALOGFLOW_INTENT, DIALOGFLOW_ENTITIES}:
            raise ValueError(
                "fformat must be either {}, or {}"
                "".format(DIALOGFLOW_INTENT, DIALOGFLOW_ENTITIES)
            )

        root_js = utils.read_json_file(fn)
        examples_js = self._read_examples_js(fn, language, fformat)

        if not examples_js:
            logger.warning(
                "No training examples found for dialogflow file {}!".format(fn)
            )
            return TrainingData()
        elif fformat == DIALOGFLOW_INTENT:
            return self._read_intent(root_js, examples_js)
        elif fformat == DIALOGFLOW_ENTITIES:
            return self._read_entities(root_js, examples_js)
예제 #4
0
def test_url_data_format():
    data = """
    {
      "rasa_nlu_data": {
        "entity_synonyms": [
          {
            "value": "nyc",
            "synonyms": ["New York City", "nyc", "the big apple"]
          }
        ],
        "common_examples" : [
          {
            "text": "show me flights to New York City",
            "intent": "unk",
            "entities": [
              {
                "entity": "destination",
                "start": 19,
                "end": 32,
                "value": "NYC"
              }
            ]
          }
        ]
      }
    }"""
    fname = utils.create_temporary_file(data.encode("utf-8"),
                                        suffix="_tmp_training_data.json",
                                        mode="w+b")
    data = utils.read_json_file(fname)
    assert data is not None
    validate_rasa_nlu_data(data)
예제 #5
0
    def load(
            cls,
            model_dir=None,  # type: Optional[Text]
            model_metadata=None,  # type: Optional[Metadata]
            cached_component=None,  # type: Optional[CompositeEntitiesMapper]
            **kwargs  # type: **Any
    ):
        # type: (...) -> CompositeEntitiesMapper

        meta = model_metadata.for_component(cls.name)
        file_name = meta.get("composite_entities_file",
                             COMPOSITE_ENTITIES_FILE_NAME)
        composite_entities_file = os.path.join(model_dir, file_name)

        if os.path.isfile(composite_entities_file):
            composite_entities = utils.read_json_file(composite_entities_file)
        else:
            composite_entities = {
                'lookup_tables': [],
                'composite_entities': []
            }
            warnings.warn("Failed to load composite entities file from '{}'"
                          "".format(composite_entities_file))

        return cls(meta, composite_entities)
예제 #6
0
    def load(
        cls,
        meta: Dict[Text, Any],
        model_dir: Optional[Text] = None,
        model_metadata: "Metadata" = None,
        cached_component: Optional["KeywordIntentClassifier"] = None,
        **kwargs: Any,
    ) -> "KeywordIntentClassifier":

        if model_dir and meta.get("file"):
            file_name = meta.get("file")
            keyword_file = os.path.join(model_dir, file_name)
            if os.path.exists(keyword_file):
                intent_keyword_map = utils.read_json_file(keyword_file)
            else:
                raise_warning(
                    f"Failed to load key word file for `IntentKeywordClassifier`, "
                    f"maybe {keyword_file} does not exist?",
                )
                intent_keyword_map = None
            return cls(meta, intent_keyword_map)
        else:
            raise Exception(
                f"Failed to load keyword intent classifier model. "
                f"Path {os.path.abspath(meta.get('file'))} doesn't exist."
            )
예제 #7
0
    def load(
        cls, meta, model_dir=None, model_metadata=None, cached_component=None, **kwargs
    ):
        file_name = meta.get("file")
        regex_file = os.path.join(model_dir, file_name)

        if os.path.exists(regex_file):
            known_patterns = utils.read_json_file(regex_file)
            return cls(meta, known_patterns=known_patterns)
        else:
            return cls(meta)
예제 #8
0
파일: regex.py 프로젝트: amedat04/rasam
 def load(
     cls,
     meta: Dict[Text, Any],
     model_dir: Optional[Text] = None,
     model_metadata: Optional[Metadata] = None,
     cached_component: Optional[Component] = None,
     **kwargs: Any,
 ) -> "EntityExtractor":
     file_name = meta.get("file")
     regex_features = []
     if file_name:
         regex_features = utils.read_json_file(os.path.join(model_dir, file_name))  # type: ignore
     meta["regex_features"] = regex_features
     return cls(meta)
예제 #9
0
    def _read_examples_js(fn, language, fformat):
        """Infer and load the example file based on the root
        filename and root format."""

        if fformat == DIALOGFLOW_INTENT:
            examples_type = "usersays"
        else:
            examples_type = "entries"
        examples_fn_ending = "_{}_{}.json".format(examples_type, language)
        examples_fn = fn.replace(".json", examples_fn_ending)
        if os.path.isfile(examples_fn):
            return utils.read_json_file(examples_fn)
        else:
            return None
예제 #10
0
    def load(cls,
             meta: Dict[Text, Any],
             model_dir: Optional[Text] = None,
             model_metadata: Optional["Metadata"] = None,
             cached_component: Optional["RegexFeaturizer"] = None,
             **kwargs: Any) -> "RegexFeaturizer":

        file_name = meta.get("file")
        regex_file = os.path.join(model_dir, file_name)

        if os.path.exists(regex_file):
            known_patterns = utils.read_json_file(regex_file)
            return RegexFeaturizer(meta, known_patterns=known_patterns)
        else:
            return RegexFeaturizer(meta)
예제 #11
0
    def load(cls,
             meta: Dict[Text, Any],
             model_dir: Optional[Text] = None,
             model_metadata: Optional["Metadata"] = None,
             cached_component: Optional["NGramFeaturizer"] = None,
             **kwargs: Any) -> "NGramFeaturizer":

        file_name = meta.get("file")
        featurizer_file = os.path.join(model_dir, file_name)

        if os.path.exists(featurizer_file):
            data = utils.read_json_file(featurizer_file)
            return NGramFeaturizer(meta, data["all_ngrams"],
                                   data["best_num_ngrams"])
        else:
            return NGramFeaturizer(meta)
예제 #12
0
    def load(model_dir: Text):
        """Loads the metadata from a models directory.

        Args:
            model_dir: the directory where the model is saved.
        Returns:
            Metadata: A metadata object describing the model
        """
        try:
            metadata_file = os.path.join(model_dir, 'metadata.json')
            data = utils.read_json_file(metadata_file)
            return Metadata(data, model_dir)
        except Exception as e:
            abspath = os.path.abspath(os.path.join(model_dir, 'metadata.json'))
            raise InvalidProjectError("Failed to load model metadata "
                                      "from '{}'. {}".format(abspath, e))
예제 #13
0
 def load(cls,
          component_meta=None,
          model_dir=None,
          model_metadata=None,
          cached_component=None,
          **kwargs):
     file_name = component_meta.get("composite_entities_file",
                                    COMPOSITE_ENTITIES_FILE_NAME)
     composite_entities_file = os.path.join(model_dir, file_name)
     if os.path.isfile(composite_entities_file):
         composite_entities = utils.read_json_file(composite_entities_file)
     else:
         composite_entities = []
         warnings.warn("Failed to load composite entities"
                       'file from "{}"'.format(composite_entities_file))
     return cls(component_meta, composite_entities)
예제 #14
0
    def load(cls,
             component_meta: Dict[Text, Any],
             model_dir: Text = None,
             model_metadata: Metadata = None,
             cached_component: Optional['Gazette'] = None,
             **kwargs: Any) -> 'Gazette':
        from rasa.nlu.utils import read_json_file

        td = read_json_file(os.path.join(model_dir, "training_data.json"))
        if "gazette" in td["rasa_nlu_data"]:
            gazette = cls._load_gazette_list(td["rasa_nlu_data"]["gazette"])
        else:
            gazette = None
            warnings.warn(
                "Could not find Gazette in persisted training data file.")

        return Gazette(component_meta, gazette)
예제 #15
0
    def load(
        cls,
        meta: Dict[Text, Any],
        model_dir: Optional[Text] = None,
        model_metadata: "Metadata" = None,
        cached_component: Optional["KeywordIntentClassifier"] = None,
        **kwargs: Any,
    ) -> "KeywordIntentClassifier":

        if model_dir and meta.get("file"):
            file_name = meta.get("file")
            keyword_file = os.path.join(model_dir, file_name)
            if os.path.exists(keyword_file):
                intent_keyword_map = utils.read_json_file(keyword_file)
            else:
                warnings.warn(f"Failed to load IntentKeywordClassifier, maybe "
                              "{keyword_file} does not exist.")
        return cls(meta, intent_keyword_map)
예제 #16
0
    def load(cls,
             model_dir: Optional[Text] = None,
             model_metadata: Optional[Metadata] = None,
             cached_component: Optional['EntitySynonymMapper'] = None,
             **kwargs: Any) -> 'EntitySynonymMapper':

        meta = model_metadata.for_component(cls.name)
        file_name = meta.get("synonyms_file")
        if not file_name:
            synonyms = None
            return cls(meta, synonyms)

        entity_synonyms_file = os.path.join(model_dir, file_name)
        if os.path.isfile(entity_synonyms_file):
            synonyms = utils.read_json_file(entity_synonyms_file)
        else:
            synonyms = None
            warnings.warn("Failed to load synonyms file from '{}'"
                          "".format(entity_synonyms_file))
        return cls(meta, synonyms)
예제 #17
0
    def load(cls,
             meta: Dict[Text, Any],
             model_dir: Text = None,
             model_metadata: Metadata = None,
             cached_component: Optional['DucklingHTTPExtractor'] = None,
             **kwargs: Any):
        # type: (...) -> CustomizedEntityExtractor

        file_name = meta.get("customize_file")
        if not file_name:
            entity_customize = None
            return cls(meta, entity_customize)

        entity_customize_file = os.path.join(model_dir, file_name)
        if os.path.isfile(entity_customize_file):
            entity_customize = read_json_file(entity_customize_file)
        else:
            entity_customize = None
            warnings.warn("Failed to load synonyms file from '{}'"
                          "".format(entity_customize_file))
        return cls(meta, entity_customize)
예제 #18
0
def test_example_training_data_is_valid():
    demo_json = "data/examples/rasa/demo-rasa.json"
    data = utils.read_json_file(demo_json)
    validate_rasa_nlu_data(data)