def parse(self, text, scope=None):
        """Extract builtin entities from *text*

        Args:
            text (str): Input
            scope (list of str, optional): List of builtin entity labels. If
                defined, the parser will extract entities using the provided
                scope instead of the entire scope of all available entities.
                This allows to look for specifics builtin entity kinds.

        Returns:
            list of dict: The list of extracted entities
        """
        if not isinstance(text, str):
            raise TypeError("Expected language to be of type 'str' but found: "
                            "%s" % type(text))
        if scope is not None:
            if not all(isinstance(e, str) for e in scope):
                raise TypeError(
                    "Expected scope to contain objects of type 'str'")
            scope = [e.encode("utf8") for e in scope]
            arr = CStringArray()
            arr.size = c_int(len(scope))
            arr.data = (c_char_p * len(scope))(*scope)
            scope = byref(arr)

        with string_pointer(c_char_p()) as ptr:
            exit_code = lib.snips_nlu_ontology_extract_builtin_entities_json(
                self._parser, text.encode("utf8"), scope, byref(ptr))
            check_ffi_error(
                exit_code, "Something went wrong when extracting "
                "builtin entities")
            result = string_at(ptr)
            return json.loads(result.decode("utf8"))
    def build(cls, language, gazetteer_entity_parser_path=None):
        """Build a `BuiltinEntityParser`

        Args:
            language (str): Language identifier
            gazetteer_entity_parser_path (str, optional): Path to a gazetteer
                entity parser. If None, the builtin entity parser will only
                use grammar entities.
        """
        if isinstance(gazetteer_entity_parser_path, Path):
            gazetteer_entity_parser_path = str(gazetteer_entity_parser_path)
        if not isinstance(language, str):
            raise TypeError("Expected language to be of type 'str' but found:"
                            " %s" % type(language))
        parser_config = dict(
            language=language.upper(),
            gazetteer_parser_path=gazetteer_entity_parser_path)
        parser = pointer(c_void_p())
        json_parser_config = bytes(json.dumps(parser_config), encoding="utf8")
        exit_code = lib.snips_nlu_ontology_create_builtin_entity_parser(
            byref(parser), json_parser_config)
        check_ffi_error(
            exit_code, "Something went wrong while creating the "
            "builtin entity parser")
        return cls(parser)
Exemple #3
0
def get_builtin_entity_examples(builtin_entity_kind, language):
    """Provides some examples of the builtin entity in the specified language
    """
    global _ENTITIES_EXAMPLES

    if not isinstance(builtin_entity_kind, str):
        raise TypeError("Expected `builtin_entity_kind` to be of type 'str' "
                        "but found: %s" % type(builtin_entity_kind))
    if not isinstance(language, str):
        raise TypeError(
            "Expected `language` to be of type 'str' but found: %s" %
            type(language))

    if builtin_entity_kind not in _ENTITIES_EXAMPLES:
        _ENTITIES_EXAMPLES[builtin_entity_kind] = dict()

    if language not in _ENTITIES_EXAMPLES[builtin_entity_kind]:
        with string_array_pointer(pointer(CStringArray())) as ptr:
            exit_code = lib.snips_nlu_ontology_builtin_entity_examples(
                builtin_entity_kind.encode("utf8"), language.encode("utf8"),
                byref(ptr))
            check_ffi_error(
                exit_code, "Something went wrong when retrieving "
                "builtin entity examples")
            array = ptr.contents
            _ENTITIES_EXAMPLES[builtin_entity_kind][language] = list(
                array.data[i].decode("utf8") for i in range(array.size))
    return _ENTITIES_EXAMPLES[builtin_entity_kind][language]
 def persist(self, path):
     """Persist the gazetteer parser on disk at the provided path"""
     if isinstance(path, Path):
         path = str(path)
     exit_code = lib.snips_nlu_ontology_persist_builtin_entity_parser(
         self._parser, path.encode("utf8"))
     check_ffi_error(
         exit_code, "Something went wrong when persisting the "
         "builtin entity parser")
 def from_path(cls, parser_path):
     """Create a :class:`GazetteerEntityParser` from a gazetteer parser
     persisted on disk
     """
     if isinstance(parser_path, Path):
         parser_path = str(parser_path)
     parser = pointer(c_void_p())
     parser_path = bytes(parser_path, encoding="utf8")
     exit_code = lib.snips_nlu_ontology_load_builtin_entity_parser(
         byref(parser), parser_path)
     check_ffi_error(
         exit_code, "Something went wrong when loading the "
         "builtin entity parser")
     return cls(parser)
Exemple #6
0
    def build(cls, build_config):
        """Create a new :class:`GazetteerEntityParser` from a build config

        The build configuration must have the following format:

            {
                "entity_parsers": [
                    {
                        "entity_identifier": "my_first_entity",
                        "entity_parser": {
                            "gazetteer": [
                                {
                                    "raw_value": "foo bar",
                                    "resolved_value": "Foo Bar"
                                },
                                {
                                    "raw_value": "yolo",
                                    "resolved_value": "Yala"
                                }
                            ],
                            "threshold": 0.6,
                            "n_gazetteer_stop_words": 10,
                            "additional_stop_words": ["the", "a"]
                        }
                    },
                    {
                        "entity_identifier": "my_second_entity",
                        "entity_parser": {
                            "gazetteer": [
                                {
                                    "raw_value": "the stones",
                                    "resolved_value": "The Rolling Stones"
                                }
                            ],
                            "threshold": 0.6,
                            "n_gazetteer_stop_words": None,
                            "additional_stop_words": None
                        }
                    },
                ]
            }
        """
        parser = pointer(c_void_p())
        json_parser_config = bytes(json.dumps(build_config), encoding="utf8")
        exit_code = lib.snips_nlu_ontology_build_gazetteer_entity_parser(
            byref(parser), json_parser_config)
        check_ffi_error(
            exit_code, "Something went wrong when building the "
            "gazetteer entity parser")
        return cls(parser)
Exemple #7
0
def get_builtin_entity_shortname(entity):
    """Get the short name of the entity

    Examples:

    >>> get_builtin_entity_shortname(u"snips/amountOfMoney")
    'AmountOfMoney'
    """
    global _BUILTIN_ENTITIES_SHORTNAMES
    if entity not in _BUILTIN_ENTITIES_SHORTNAMES:
        with string_pointer(c_char_p()) as ptr:
            exit_code = lib.snips_nlu_ontology_entity_shortname(
                entity.encode("utf8"), byref(ptr))
            check_ffi_error(
                exit_code, "Something went wrong when retrieving "
                "builtin entity shortname")
            result = string_at(ptr)
            _BUILTIN_ENTITIES_SHORTNAMES[entity] = result.decode("utf8")
    return _BUILTIN_ENTITIES_SHORTNAMES[entity]
Exemple #8
0
def get_supported_grammar_entities(language):
    """Lists the grammar entities supported in the specified *language*

    Returns:
          list of str: the list of entity labels
    """
    global _SUPPORTED_GRAMMAR_ENTITIES

    if not isinstance(language, str):
        raise TypeError("Expected language to be of type 'str' but found: %s" %
                        type(language))

    if language not in _SUPPORTED_GRAMMAR_ENTITIES:
        with string_array_pointer(pointer(CStringArray())) as ptr:
            exit_code = lib.snips_nlu_ontology_supported_grammar_entities(
                language.encode("utf8"), byref(ptr))
            check_ffi_error(
                exit_code, "Something went wrong when retrieving "
                "supported grammar entities")
            array = ptr.contents
            _SUPPORTED_GRAMMAR_ENTITIES[language] = set(
                array.data[i].decode("utf8") for i in range(array.size))
    return _SUPPORTED_GRAMMAR_ENTITIES[language]