Exemple #1
0
def check_language_priority(language, repository_authorization, repository_version):
    if language:
        language = str(language).lower()
        language = re.split(r"[-_]", language)[0]
        language_validation(language)

    # Tries to get repository by DEFAULT_LANGS (hard-coded exceptions)
    repository = {}
    if language in DEFAULT_LANGS_PRIORITY.keys():
        priority_ordered_langs = DEFAULT_LANGS_PRIORITY.get(language)
        for lang in priority_ordered_langs:
            try:
                repository = backend().request_backend_parse(
                    repository_authorization, lang, repository_version
                )
            except Exception:
                repository = {}

            if repository.get("total_training_end"):
                break

    # Else tries to get most generic repository ('LANG' only)
    else:
        try:
            repository = backend().request_backend_parse(
                repository_authorization, language, repository_version
            )
        except Exception:
            repository = {}

    return repository
def qa_handler(
    authorization,
    knowledge_base_id,
    question,
    language,
    from_backend=False,
    user_agent=None,
):
    language_validation(language)
    user_base_authorization = repository_authorization_validation(
        authorization)

    if not question or type(question) != str:
        raise EmptyInputException()
    elif len(question) > BOTHUB_NLP_API_QA_QUESTION_LIMIT:
        raise LargeQuestionException(len(question),
                                     limit=BOTHUB_NLP_API_QA_QUESTION_LIMIT)

    request = backend().request_backend_knowledge_bases(
        user_base_authorization, knowledge_base_id, language)
    text = request.get("text")

    if not text:
        raise EmptyBaseException()
    elif len(text) > BOTHUB_NLP_API_QA_TEXT_LIMIT:
        raise LargeContextException(len(text),
                                    limit=BOTHUB_NLP_API_QA_TEXT_LIMIT)

    result = request_torchserve(text, question, language)

    if len(result["answers"]) > 0:
        answer_object = result["answers"][0]

        answer = answer_object["text"]
        confidence = float(answer_object["confidence"])
    else:
        answer = ""
        confidence = .0

    log = threading.Thread(
        target=backend().send_log_qa_nlp_parse,
        kwargs={
            "data": {
                "answer": answer,
                "confidence": confidence,
                "question": question,
                "user_agent": user_agent,
                "nlp_log": json.dumps(result),
                "user": str(user_base_authorization),
                "knowledge_base": int(knowledge_base_id),
                "language": language,
                "from_backend": from_backend,
            }
        },
    )
    log.start()

    return result
Exemple #3
0
def _debug_parse(authorization, text, language, repository_version=None):
    from ..utils import DEFAULT_LANGS_PRIORITY

    language_validation(language)
    repository_authorization = repository_authorization_validation(
        authorization)

    if type(text) != str or not text:
        raise ValidationError("Text required.")

    try:
        update = backend().request_backend_parse(repository_authorization,
                                                 language, repository_version)
    except Exception:
        update = {}

    if not update.get("version"):
        next_languages = DEFAULT_LANGS_PRIORITY.get(language, [])
        for next_language in next_languages:
            update = backend().request_backend_parse(repository_authorization,
                                                     next_language,
                                                     repository_version)
            if update.get("version"):
                break

    if not update.get("version"):
        raise ValidationError("This repository has never been trained")

    model = get_language_model(update)
    try:
        answer_task = celery_app.send_task(
            TASK_NLU_DEBUG_PARSE_TEXT,
            args=[
                update.get("repository_version"), repository_authorization,
                text
            ],
            queue=queue_name(update.get("language"), ACTION_DEBUG_PARSE,
                             model),
        )
        answer_task.wait()
        answer = answer_task.result
    except TimeLimitExceeded:
        raise CeleryTimeoutException()

    answer.update({
        "text": text,
        "repository_version": update.get("repository_version"),
        "language": update.get("language"),
    })
    return answer
Exemple #4
0
def crossvalidation_evaluate_handler(authorization,
                                     language,
                                     repository_version=None):
    repository_authorization = repository_authorization_validation(
        authorization)
    language_validation(language)

    try:
        repository = backend().request_backend_start_automatic_evaluate(
            repository_authorization, repository_version, language)
    except Exception:
        repository = {}

    if not repository.get("can_run_automatic_evaluate"):
        raise ValidationError("Validation error")

    model = get_language_model(repository)

    try:
        job_id = f'bothub_{settings.ENVIRONMENT}_evaluate_{repository.get("repository_version_language_id")}_{language}_{str(int(time.time()))}'
        send_job_train_ai_platform(
            jobId=job_id,
            repository_version=str(
                repository.get("repository_version_language_id")),
            by_id=str(repository.get("user_id")),
            repository_authorization=str(repository_authorization),
            language=language,
            type_model=model,
            operation="evaluate",
        )
        backend().request_backend_save_queue_id(
            update_id=str(repository.get("repository_version_language_id")),
            repository_authorization=str(repository_authorization),
            task_id=job_id,
            from_queue=0,
            type_processing=2,
        )
        evaluate_report = {
            "language": language,
            "status": EVALUATE_STATUS_PROCESSING,
            "repository_version":
            repository.get("repository_version_language_id"),
            "evaluate_id": None,
            "evaluate_version": None,
            "cross_validation": True,
        }
    except Exception as e:
        evaluate_report = {"status": EVALUATE_STATUS_FAILED, "error": str(e)}

    return evaluate_report
Exemple #5
0
def _parse(authorization, text, language, rasa_format=False):
    from ..utils import NEXT_LANGS

    if language and (
        language not in settings.SUPPORTED_LANGUAGES.keys()
        and language not in NEXT_LANGS.keys()
    ):
        raise ValidationError("Language '{}' not supported by now.".format(language))

    repository_authorization = get_repository_authorization(authorization)
    if not repository_authorization:
        raise AuthorizationIsRequired()

    try:
        update = backend().request_backend_parse(
            "parse", repository_authorization, language
        )
    except Exception:
        update = {}

    if not update.get("update"):
        next_languages = NEXT_LANGS.get(language, [])
        for next_language in next_languages:
            update = backend().request_backend_parse(
                "parse", repository_authorization, next_language
            )
            if update.get("update"):
                break

    if not update.get("update"):
        raise ValidationError("This repository has never been trained")

    answer_task = celery_app.send_task(
        TASK_NLU_PARSE_TEXT,
        args=[update.get("update_id"), repository_authorization, text],
        kwargs={"rasa_format": rasa_format},
        queue=queue_name(ACTION_PARSE, update.get("language")),
    )
    answer_task.wait()

    answer = answer_task.result
    answer.update(
        {
            "text": text,
            "update_id": update.get("update_id"),
            "language": update.get("language"),
        }
    )

    return answer
Exemple #6
0
def _words_distribution(authorization, language, repository_version=None):
    language_validation(language)
    repository_authorization = repository_authorization_validation(
        authorization)

    current_update = backend().request_backend_train(repository_authorization,
                                                     language,
                                                     repository_version)

    try:
        answer_task = celery_app.send_task(
            TASK_NLU_WORDS_DISTRIBUTION,
            args=[
                current_update.get("current_version_id"),
                language,
                repository_authorization,
            ],
            queue=queue_name(language, ACTION_WORDS_DISTIRBUTION),
        )

        answer_task.wait()
        answer = answer_task.result
    except TimeLimitExceeded:
        raise CeleryTimeoutException()

    return answer
Exemple #7
0
async def info_handler(
        request: Request = Depends(AuthorizationRequired()),
        Authorization: str = Header(..., description="Bearer your_key"),
):
    repository_authorization = get_repository_authorization(Authorization)
    info = backend().request_backend_parse("info", repository_authorization)
    info["intents"] = info["intents_list"]
    info.pop("intents_list")
    return info
Exemple #8
0
async def info_handler(
        request: Request = Depends(AuthorizationRequired()),
        Authorization: str = Header(..., description="Bearer your_key"),
):
    repository_authorization = repository_authorization_validation(
        Authorization)
    info = backend().request_backend_info(repository_authorization)
    if info.get("detail"):
        raise HTTPException(status_code=400, detail=info)
    return info
Exemple #9
0
def evaluate_handler(authorization, language, repository_version=None):
    repository_authorization = repository_authorization_validation(
        authorization)
    language_validation(language)

    try:
        repository = backend().request_backend_evaluate(
            repository_authorization, language, repository_version)
    except Exception:
        repository = {}

    if not repository.get("update"):
        raise ValidationError("This repository has never been trained")

    model = get_language_model(repository)

    try:
        cross_validation = False
        evaluate_task = celery_app.send_task(
            TASK_NLU_EVALUATE_UPDATE,
            args=[
                repository_version,
                repository.get(
                    "repository_version"),  # repository_version_language_id
                repository_authorization,
                cross_validation,
                repository.get("language"),
            ],
            queue=queue_name(repository.get("language"), ACTION_EVALUATE,
                             model),
        )
        evaluate_task.wait()
        evaluate = evaluate_task.result

        evaluate_report = {
            "language":
            language,
            "status":
            EVALUATE_STATUS_PROCESSING,
            "repository_version":
            repository.get("repository_version"),
            "evaluate_id":
            evaluate.get("id") if evaluate is not None else None,
            "evaluate_version":
            evaluate.get("version") if evaluate is not None else None,
            "cross_validation":
            cross_validation,
        }
    except TimeLimitExceeded:
        raise CeleryTimeoutException()
    except Exception as e:
        evaluate_report = {"status": EVALUATE_STATUS_FAILED, "error": str(e)}

    return evaluate_report
Exemple #10
0
def _intent_sentence_suggestion(
    authorization,
    language,
    intent,
    n_sentences_to_generate,
    percentage_to_replace,
    repository_version=None,
):
    repository_authorization = repository_authorization_validation(
        authorization)
    language_validation(language)

    if not intent or type(intent) != str:
        raise ValidationError("Invalid intent")
    if (not n_sentences_to_generate or type(n_sentences_to_generate) != int
            or n_sentences_to_generate <= 0 or n_sentences_to_generate > 50):
        raise ValidationError("Invalid number of sentences to generate")
    if (not percentage_to_replace or type(percentage_to_replace) != float
            or percentage_to_replace <= 0 or percentage_to_replace > 1):
        raise ValidationError("Invalid percentage to replace")

    try:
        update = backend().request_backend_parse(repository_authorization,
                                                 language, repository_version)
    except Exception:
        update = {}
    try:
        answer_task = celery_app.send_task(
            TASK_NLU_INTENT_SENTENCE_SUGGESTION_TEXT,
            args=[
                update.get("repository_version"),
                repository_authorization,
                intent,
                percentage_to_replace,
                n_sentences_to_generate,
            ],
            queue=queue_name(language, ACTION_INTENT_SENTENCE_SUGGESTION,
                             "SPACY"),
        )
        answer_task.wait()
        answer = answer_task.result
    except TimeLimitExceeded:
        raise CeleryTimeoutException()

    answer.update({
        "language": language,
        "n_sentences_to_generate": n_sentences_to_generate,
        "percentage_to_replace": percentage_to_replace,
        "intent": intent,
    })
    return answer
Exemple #11
0
def train_handler(authorization, repository_version=None, language=None):
    repository_authorization = repository_authorization_validation(
        authorization)

    languages_report = {}
    train_tasks = []

    if language:
        language_validation(language)
        language_status = backend().request_backend_train(
            repository_authorization, language, repository_version)
        ready_to_train_languages = ([
            language_status
        ] if language_status.get("ready_for_train") else [])
    else:
        ready_to_train_languages = backend(
        ).request_all_readytotrain_languages(repository_authorization,
                                             repository_version)

    for repository in ready_to_train_languages:

        model = get_language_model(repository)
        if settings.BOTHUB_SERVICE_TRAIN == "celery":
            train_task = celery_app.send_task(
                TASK_NLU_TRAIN_UPDATE,
                args=[
                    repository.get("current_version_id"),
                    repository.get("repository_authorization_user_id"),
                    repository_authorization,
                ],
                queue=queue_name(repository.get("language"), ACTION_TRAIN,
                                 model),
            )
            train_tasks.append({
                "task": train_task,
                "language": repository.get("language")
            })
        elif settings.BOTHUB_SERVICE_TRAIN == "ai-platform":
            job_id = f'bothub_{settings.ENVIRONMENT}_train_{str(repository.get("current_version_id"))}_{repository.get("language")}_{str(int(time.time()))}'
            send_job_train_ai_platform(
                jobId=job_id,
                repository_version=str(repository.get("current_version_id")),
                by_id=str(repository.get("repository_authorization_user_id")),
                repository_authorization=str(repository_authorization),
                language=repository.get("language"),
                type_model=model,
                operation="train",
            )
            backend().request_backend_save_queue_id(
                update_id=str(repository.get("current_version_id")),
                repository_authorization=str(repository_authorization),
                task_id=job_id,
                from_queue=0,
                type_processing=0,
            )
        languages_report[repository.get("language")] = {
            "status": TRAIN_STATUS_PROCESSING
        }

    resp = {
        "SUPPORTED_LANGUAGES": list(settings.SUPPORTED_LANGUAGES.keys()),
        "languages_report": languages_report,
    }
    return resp
Exemple #12
0
def _parse(
    authorization,
    text,
    language,
    rasa_format=False,
    repository_version=None,
    user_agent=None,
    from_backend=False,
):
    repository_authorization = repository_authorization_validation(authorization)

    if type(text) != str or not text:
        raise ValidationError("Invalid text.")

    repository = check_language_priority(
        language, repository_authorization, repository_version
    )

    if not repository.get("version"):
        raise ValidationError("This repository has never been trained.")

    model = get_language_model(repository)

    try:
        answer_task = celery_app.send_task(
            TASK_NLU_PARSE_TEXT,
            args=[repository.get("repository_version"), repository_authorization, text],
            kwargs={"rasa_format": rasa_format},
            queue=queue_name(repository.get("language"), ACTION_PARSE, model),
        )
        answer_task.wait()
        answer = answer_task.result
    except TimeLimitExceeded:
        raise CeleryTimeoutException()

    entities_dict = get_entities_dict(answer)
    answer.update(
        {
            "text": text,
            "repository_version": repository.get("repository_version"),
            "language": repository.get("language"),
            "group_list": list(entities_dict.keys()),
            "entities": entities_dict,
        }
    )

    if "intent_ranking" not in answer or answer.get("intent_ranking") is None:
        answer.update({"intent_ranking": []})

    log = threading.Thread(
        target=backend().send_log_nlp_parse,
        kwargs={
            "data": {
                "text": text,
                "from_backend": from_backend,
                "user_agent": user_agent,
                "user": str(repository_authorization),
                "repository_version_language": int(
                    repository.get("repository_version")
                ),
                "nlp_log": json.dumps(answer),
                "log_intent": [
                    {
                        "intent": result["name"],
                        "is_default": result["name"] == answer["intent"]["name"],
                        "confidence": result["confidence"],
                    }
                    for result in answer.get("intent_ranking", [])
                ],
            }
        },
    )
    log.start()

    return answer