Example #1
0
    def parse(self, text, scope=None):
        """Extract gazetteer entities from *text*

        Args:
            text (str): Input
            scope (list of str, optional): List of entity labels. If defined,
                the parser will extract entities using the provided scope
                instead of the entire scope of all available entities. This
                allows to look for specifics entities.

        Returns:
            list of dict: The list of extracted entities
        """
        if not isinstance(text, str):
            raise TypeError("Expected text to be of type 'str' but found: "
                            "%s" % type(text))
        if scope is not None:
            if not all(isinstance(e, str) for e in scope):
                raise TypeError(
                    "Expected scope to contain objects of type 'str'")
            scope = [e.encode("utf8") for e in scope]
            arr = CStringArray()
            arr.size = c_int(len(scope))
            arr.data = (c_char_p * len(scope))(*scope)
            scope = byref(arr)

        with string_pointer(c_char_p()) as ptr:
            exit_code = lib.snips_nlu_parsers_extract_gazetteer_entities_json(
                self._parser, text.encode("utf8"), scope, byref(ptr))
            check_ffi_error(
                exit_code, "Something went wrong when "
                "extracting gazetteer entities")
            result = string_at(ptr)
            return json.loads(result.decode("utf8"))
Example #2
0
def get_complete_entity_ontology():
    """Lists the complete entity ontology for all languages in JSON format
    """
    global _COMPLETE_ENTITY_ONTOLOGY
    if _COMPLETE_ENTITY_ONTOLOGY is None:
        with string_pointer(c_char_p()) as ptr:
            exit_code = lib.snips_nlu_parsers_complete_entity_ontology_json(
                byref(ptr))
            check_ffi_error(
                exit_code, "Something went wrong when retrieving "
                "complete entity ontology")
            json_str = string_at(ptr).decode("utf8")
            _COMPLETE_ENTITY_ONTOLOGY = json.loads(json_str, encoding="utf8")
    return _COMPLETE_ENTITY_ONTOLOGY
Example #3
0
def get_language_entity_ontology(language):
    """Lists the complete entity ontology for the specified language in JSON format
    """
    global _LANGUAGE_ENTITY_ONTOLOGY
    if language not in _LANGUAGE_ENTITY_ONTOLOGY:
        with string_pointer(c_char_p()) as ptr:
            exit_code = lib.snips_nlu_parsers_language_entity_ontology_json(
                language.encode("utf8"), byref(ptr))
            check_ffi_error(
                exit_code, "Something went wrong when retrieving "
                "language entity ontology")
            json_str = string_at(ptr).decode("utf8")
            _LANGUAGE_ENTITY_ONTOLOGY[language] = json.loads(json_str,
                                                             encoding="utf8")
    return _LANGUAGE_ENTITY_ONTOLOGY[language]
Example #4
0
def get_builtin_entity_shortname(entity):
    """Get the short name of the entity

    Examples:

    >>> get_builtin_entity_shortname(u"snips/amountOfMoney")
    'AmountOfMoney'
    """
    global _BUILTIN_ENTITIES_SHORTNAMES
    if entity not in _BUILTIN_ENTITIES_SHORTNAMES:
        with string_pointer(c_char_p()) as ptr:
            exit_code = lib.snips_nlu_ontology_entity_shortname(
                entity.encode("utf8"), byref(ptr))
            check_ffi_error(
                exit_code, "Something went wrong when retrieving "
                "builtin entity shortname")
            result = string_at(ptr)
            _BUILTIN_ENTITIES_SHORTNAMES[entity] = result.decode("utf8")
    return _BUILTIN_ENTITIES_SHORTNAMES[entity]
Example #5
0
    def parse(self, text, scope=None, max_alternative_resolved_values=5):
        """Extracts builtin entities from *text*

        Args:
            text (str): Input
            scope (list of str, optional): List of builtin entity labels. If
                defined, the parser will extract entities using the provided
                scope instead of the entire scope of all available entities.
                This allows to look for specifics builtin entity kinds.
            max_alternative_resolved_values (int, optional): Maximum number of
                alternative resolved values to return in addition to the top
                one (default 5).

        Returns:
            list of dict: The list of extracted entities
        """
        if not isinstance(text, str):
            raise TypeError("Expected language to be of type 'str' but found: "
                            "%s" % type(text))
        if scope is not None:
            if not all(isinstance(e, str) for e in scope):
                raise TypeError(
                    "Expected scope to contain objects of type 'str'")
            scope = [e.encode("utf8") for e in scope]
            arr = CStringArray()
            arr.size = c_int(len(scope))
            arr.data = (c_char_p * len(scope))(*scope)
            scope = byref(arr)

        with string_pointer(c_char_p()) as ptr:
            exit_code = lib.snips_nlu_parsers_extract_builtin_entities_json(
                self._parser, text.encode("utf8"), scope,
                max_alternative_resolved_values, byref(ptr))
            check_ffi_error(
                exit_code, "Something went wrong when extracting "
                "builtin entities")
            result = string_at(ptr)
            return json.loads(result.decode("utf8"))