Ejemplo n.º 1
0
def validate_and_format_dataset(dataset):
    """Checks that the dataset is valid and format it"""
    # Make this function idempotent
    if dataset.get(VALIDATED, False):
        return dataset
    dataset = deepcopy(dataset)
    dataset = json.loads(json.dumps(dataset))
    validate_type(dataset, dict)
    mandatory_keys = [INTENTS, ENTITIES, LANGUAGE]
    for key in mandatory_keys:
        validate_key(dataset, key, object_label="dataset")
    validate_type(dataset[ENTITIES], dict)
    validate_type(dataset[INTENTS], dict)
    language = dataset[LANGUAGE]
    validate_type(language, str)
    if language not in get_all_languages():
        raise ValueError("Unknown language: '%s'" % language)

    for intent in itervalues(dataset[INTENTS]):
        validate_and_format_intent(intent, dataset[ENTITIES])

    queries_entities_values = extract_queries_entities(dataset)

    for entity_name, entity in iteritems(dataset[ENTITIES]):
        queries_entities = queries_entities_values[entity_name]
        if is_builtin_entity(entity_name):
            dataset[ENTITIES][entity_name] = \
                validate_and_format_builtin_entity(entity, queries_entities)
        else:
            dataset[ENTITIES][entity_name] = validate_and_format_custom_entity(
                entity, queries_entities, language)
    dataset[VALIDATED] = True
    return dataset
Ejemplo n.º 2
0
def validate_and_format_dataset(dataset):
    """Checks that the dataset is valid and format it"""
    # Make this function idempotent
    if dataset.get(VALIDATED, False):
        return dataset
    dataset = deepcopy(dataset)
    dataset = json.loads(json.dumps(dataset))
    validate_type(dataset, dict)
    mandatory_keys = [INTENTS, ENTITIES, LANGUAGE]
    for key in mandatory_keys:
        validate_key(dataset, key, object_label="dataset")
    validate_type(dataset[ENTITIES], dict)
    validate_type(dataset[INTENTS], dict)
    language = dataset[LANGUAGE]
    validate_type(language, str)
    if language not in get_all_languages():
        raise ValueError("Unknown language: '%s'" % language)

    for intent in itervalues(dataset[INTENTS]):
        validate_and_format_intent(intent, dataset[ENTITIES])

    queries_entities_values = extract_queries_entities(dataset)

    for entity_name, entity in iteritems(dataset[ENTITIES]):
        queries_entities = queries_entities_values[entity_name]
        if is_builtin_entity(entity_name):
            dataset[ENTITIES][entity_name] = \
                validate_and_format_builtin_entity(entity, queries_entities)
        else:
            dataset[ENTITIES][entity_name] = validate_and_format_custom_entity(
                entity, queries_entities, language)
    dataset[VALIDATED] = True
    return dataset
Ejemplo n.º 3
0
def validate_and_format_intent(intent, entities):
    validate_type(intent, dict)
    validate_key(intent, UTTERANCES, object_label="intent dict")
    validate_type(intent[UTTERANCES], list)
    for utterance in intent[UTTERANCES]:
        validate_type(utterance, dict)
        validate_key(utterance, DATA, object_label="utterance")
        validate_type(utterance[DATA], list)
        for chunk in utterance[DATA]:
            validate_type(chunk, dict)
            validate_key(chunk, TEXT, object_label="chunk")
            if ENTITY in chunk or SLOT_NAME in chunk:
                mandatory_keys = [ENTITY, SLOT_NAME]
                validate_keys(chunk, mandatory_keys, object_label="chunk")
                if is_builtin_entity(chunk[ENTITY]):
                    continue
                else:
                    validate_key(entities, chunk[ENTITY],
                                 object_label=ENTITIES)
    return intent
Ejemplo n.º 4
0
def validate_and_format_intent(intent, entities):
    validate_type(intent, dict)
    validate_key(intent, UTTERANCES, object_label="intent dict")
    validate_type(intent[UTTERANCES], list)
    for utterance in intent[UTTERANCES]:
        validate_type(utterance, dict)
        validate_key(utterance, DATA, object_label="utterance")
        validate_type(utterance[DATA], list)
        for chunk in utterance[DATA]:
            validate_type(chunk, dict)
            validate_key(chunk, TEXT, object_label="chunk")
            if ENTITY in chunk or SLOT_NAME in chunk:
                mandatory_keys = [ENTITY, SLOT_NAME]
                validate_keys(chunk, mandatory_keys, object_label="chunk")
                if is_builtin_entity(chunk[ENTITY]):
                    continue
                else:
                    validate_key(entities, chunk[ENTITY],
                                 object_label=ENTITIES)
    return intent