예제 #1
0
def _word_suggestion(text, language, n_words_to_generate):
    language_validation(language)

    if not text or type(text) != str:
        raise ValidationError("Invalid text")
    if (
        not n_words_to_generate
        or type(n_words_to_generate) != int
        or n_words_to_generate <= 0
        or n_words_to_generate > 50
    ):
        raise ValidationError("Invalid number of words to generate")

    try:
        answer_task = celery_app.send_task(
            TASK_NLU_WORD_SUGGESTION_TEXT,
            args=[text, n_words_to_generate],
            queue=queue_name(language, ACTION_WORD_SUGGESTION, "SPACY"),
        )
        answer_task.wait()
        answer = answer_task.result
    except TimeLimitExceeded:
        raise CeleryTimeoutException()

    answer.update(
        {"text": text, "language": language, "n_words_to_generate": n_words_to_generate}
    )
    return answer
예제 #2
0
def _words_distribution(authorization, language, repository_version=None):
    language_validation(language)
    repository_authorization = repository_authorization_validation(
        authorization)

    current_update = backend().request_backend_train(repository_authorization,
                                                     language,
                                                     repository_version)

    try:
        answer_task = celery_app.send_task(
            TASK_NLU_WORDS_DISTRIBUTION,
            args=[
                current_update.get("current_version_id"),
                language,
                repository_authorization,
            ],
            queue=queue_name(language, ACTION_WORDS_DISTIRBUTION),
        )

        answer_task.wait()
        answer = answer_task.result
    except TimeLimitExceeded:
        raise CeleryTimeoutException()

    return answer
예제 #3
0
def evaluate_handler(authorization, language, repository_version=None):
    repository_authorization = repository_authorization_validation(
        authorization)
    language_validation(language)

    try:
        repository = backend().request_backend_evaluate(
            repository_authorization, language, repository_version)
    except Exception:
        repository = {}

    if not repository.get("update"):
        raise ValidationError("This repository has never been trained")

    model = get_language_model(repository)

    try:
        cross_validation = False
        evaluate_task = celery_app.send_task(
            TASK_NLU_EVALUATE_UPDATE,
            args=[
                repository_version,
                repository.get(
                    "repository_version"),  # repository_version_language_id
                repository_authorization,
                cross_validation,
                repository.get("language"),
            ],
            queue=queue_name(repository.get("language"), ACTION_EVALUATE,
                             model),
        )
        evaluate_task.wait()
        evaluate = evaluate_task.result

        evaluate_report = {
            "language":
            language,
            "status":
            EVALUATE_STATUS_PROCESSING,
            "repository_version":
            repository.get("repository_version"),
            "evaluate_id":
            evaluate.get("id") if evaluate is not None else None,
            "evaluate_version":
            evaluate.get("version") if evaluate is not None else None,
            "cross_validation":
            cross_validation,
        }
    except TimeLimitExceeded:
        raise CeleryTimeoutException()
    except Exception as e:
        evaluate_report = {"status": EVALUATE_STATUS_FAILED, "error": str(e)}

    return evaluate_report
예제 #4
0
def _intent_sentence_suggestion(
    authorization,
    language,
    intent,
    n_sentences_to_generate,
    percentage_to_replace,
    repository_version=None,
):
    repository_authorization = repository_authorization_validation(
        authorization)
    language_validation(language)

    if not intent or type(intent) != str:
        raise ValidationError("Invalid intent")
    if (not n_sentences_to_generate or type(n_sentences_to_generate) != int
            or n_sentences_to_generate <= 0 or n_sentences_to_generate > 50):
        raise ValidationError("Invalid number of sentences to generate")
    if (not percentage_to_replace or type(percentage_to_replace) != float
            or percentage_to_replace <= 0 or percentage_to_replace > 1):
        raise ValidationError("Invalid percentage to replace")

    try:
        update = backend().request_backend_parse(repository_authorization,
                                                 language, repository_version)
    except Exception:
        update = {}
    try:
        answer_task = celery_app.send_task(
            TASK_NLU_INTENT_SENTENCE_SUGGESTION_TEXT,
            args=[
                update.get("repository_version"),
                repository_authorization,
                intent,
                percentage_to_replace,
                n_sentences_to_generate,
            ],
            queue=queue_name(language, ACTION_INTENT_SENTENCE_SUGGESTION,
                             "SPACY"),
        )
        answer_task.wait()
        answer = answer_task.result
    except TimeLimitExceeded:
        raise CeleryTimeoutException()

    answer.update({
        "language": language,
        "n_sentences_to_generate": n_sentences_to_generate,
        "percentage_to_replace": percentage_to_replace,
        "intent": intent,
    })
    return answer
예제 #5
0
def _debug_parse(authorization, text, language, repository_version=None):
    from ..utils import DEFAULT_LANGS_PRIORITY

    language_validation(language)
    repository_authorization = repository_authorization_validation(
        authorization)

    if type(text) != str or not text:
        raise ValidationError("Text required.")

    try:
        update = backend().request_backend_parse(repository_authorization,
                                                 language, repository_version)
    except Exception:
        update = {}

    if not update.get("version"):
        next_languages = DEFAULT_LANGS_PRIORITY.get(language, [])
        for next_language in next_languages:
            update = backend().request_backend_parse(repository_authorization,
                                                     next_language,
                                                     repository_version)
            if update.get("version"):
                break

    if not update.get("version"):
        raise ValidationError("This repository has never been trained")

    model = get_language_model(update)
    try:
        answer_task = celery_app.send_task(
            TASK_NLU_DEBUG_PARSE_TEXT,
            args=[
                update.get("repository_version"), repository_authorization,
                text
            ],
            queue=queue_name(update.get("language"), ACTION_DEBUG_PARSE,
                             model),
        )
        answer_task.wait()
        answer = answer_task.result
    except TimeLimitExceeded:
        raise CeleryTimeoutException()

    answer.update({
        "text": text,
        "repository_version": update.get("repository_version"),
        "language": update.get("language"),
    })
    return answer
예제 #6
0
def _parse(authorization, text, language, rasa_format=False):
    from ..utils import NEXT_LANGS

    if language and (
        language not in settings.SUPPORTED_LANGUAGES.keys()
        and language not in NEXT_LANGS.keys()
    ):
        raise ValidationError("Language '{}' not supported by now.".format(language))

    repository_authorization = get_repository_authorization(authorization)
    if not repository_authorization:
        raise AuthorizationIsRequired()

    try:
        update = backend().request_backend_parse(
            "parse", repository_authorization, language
        )
    except Exception:
        update = {}

    if not update.get("update"):
        next_languages = NEXT_LANGS.get(language, [])
        for next_language in next_languages:
            update = backend().request_backend_parse(
                "parse", repository_authorization, next_language
            )
            if update.get("update"):
                break

    if not update.get("update"):
        raise ValidationError("This repository has never been trained")

    answer_task = celery_app.send_task(
        TASK_NLU_PARSE_TEXT,
        args=[update.get("update_id"), repository_authorization, text],
        kwargs={"rasa_format": rasa_format},
        queue=queue_name(ACTION_PARSE, update.get("language")),
    )
    answer_task.wait()

    answer = answer_task.result
    answer.update(
        {
            "text": text,
            "update_id": update.get("update_id"),
            "language": update.get("language"),
        }
    )

    return answer
예제 #7
0
def evaluate_handler(authorization, language):
    if language and (language not in settings.SUPPORTED_LANGUAGES.keys()
                     and language not in NEXT_LANGS.keys()):
        raise ValidationError(
            "Language '{}' not supported by now.".format(language))

    repository_authorization = get_repository_authorization(authorization)
    if not repository_authorization:
        raise AuthorizationIsRequired()

    try:
        update = backend().request_backend_parse("evaluate",
                                                 repository_authorization,
                                                 language)
    except Exception:
        update = {}

    if not update.get("update"):
        raise ValidationError("This repository has never been trained")

    try:
        evaluate_task = celery_app.send_task(
            TASK_NLU_EVALUATE_UPDATE,
            args=[
                update.get("update_id"),
                update.get("user_id"),
                repository_authorization,
            ],
            queue=queue_name(ACTION_EVALUATE, update.get("language")),
        )
        evaluate_task.wait()
        evaluate = evaluate_task.result
        evaluate_report = {
            "language": language,
            "status": EVALUATE_STATUS_EVALUATED,
            "update_id": update.get("update_id"),
            "evaluate_id": evaluate.get("id"),
            "evaluate_version": evaluate.get("version"),
        }
    except Exception as e:
        # from .. import logger

        # logger.exception(e)

        evaluate_report = {"status": EVALUATE_STATUS_FAILED, "error": str(e)}

    return evaluate_report
예제 #8
0
def train_handler(authorization):
    repository_authorization = get_repository_authorization(authorization)

    languages_report = {}

    for language in settings.SUPPORTED_LANGUAGES.keys():

        current_update = backend().request_backend_parse(
            "train", repository_authorization, language)

        if not current_update.get("ready_for_train"):
            languages_report[language] = {
                "status": TRAIN_STATUS_NOT_READY_FOR_TRAIN
            }
            continue

        try:
            train_task = celery_app.send_task(
                TASK_NLU_TRAIN_UPDATE,
                args=[
                    current_update.get("current_update_id"),
                    current_update.get("repository_authorization_user_id"),
                    repository_authorization,
                ],
                queue=queue_name(ACTION_TRAIN, current_update.get("language")),
            )
            train_task.wait()
            languages_report[language] = {"status": TRAIN_STATUS_TRAINED}
        except Exception as e:
            # from .. import logger
            # logger.exception(e)

            # if settings.BOTHUB_NLP_SENTRY_CLIENT:
            #     yield Task(self.captureException, exc_info=True)

            languages_report[language] = {
                "status": TRAIN_STATUS_FAILED,
                "error": str(e),
            }

    resp = {
        "SUPPORTED_LANGUAGES": list(settings.SUPPORTED_LANGUAGES.keys()),
        "languages_report": languages_report,
    }
    return resp
예제 #9
0
def _sentence_suggestion(
    text, language, n_sentences_to_generate, percentage_to_replace
):
    language_validation(language)

    if not text or type(text) != str:
        raise ValidationError("Invalid text")
    if (
        not n_sentences_to_generate
        or type(n_sentences_to_generate) != int
        or n_sentences_to_generate <= 0
        or n_sentences_to_generate > 50
    ):
        raise ValidationError("Invalid number of sentences to generate")
    if (
        not percentage_to_replace
        or type(percentage_to_replace) != float
        or percentage_to_replace <= 0
        or percentage_to_replace > 1
    ):
        raise ValidationError("Invalid percentage to replace")

    try:
        answer_task = celery_app.send_task(
            TASK_NLU_SENTENCE_SUGGESTION_TEXT,
            args=[text, percentage_to_replace, n_sentences_to_generate],
            queue=queue_name(language, ACTION_SENTENCE_SUGGESTION, "SPACY"),
        )
        answer_task.wait()
        answer = answer_task.result
    except TimeLimitExceeded:
        raise CeleryTimeoutException()

    answer.update(
        {
            "text": text,
            "language": language,
            "n_sentences_to_generate": n_sentences_to_generate,
            "percentage_to_replace": percentage_to_replace,
        }
    )
    return answer
예제 #10
0
def train_handler(authorization, repository_version=None, language=None):
    repository_authorization = repository_authorization_validation(
        authorization)

    languages_report = {}
    train_tasks = []

    if language:
        language_validation(language)
        language_status = backend().request_backend_train(
            repository_authorization, language, repository_version)
        ready_to_train_languages = ([
            language_status
        ] if language_status.get("ready_for_train") else [])
    else:
        ready_to_train_languages = backend(
        ).request_all_readytotrain_languages(repository_authorization,
                                             repository_version)

    for repository in ready_to_train_languages:

        model = get_language_model(repository)
        if settings.BOTHUB_SERVICE_TRAIN == "celery":
            train_task = celery_app.send_task(
                TASK_NLU_TRAIN_UPDATE,
                args=[
                    repository.get("current_version_id"),
                    repository.get("repository_authorization_user_id"),
                    repository_authorization,
                ],
                queue=queue_name(repository.get("language"), ACTION_TRAIN,
                                 model),
            )
            train_tasks.append({
                "task": train_task,
                "language": repository.get("language")
            })
        elif settings.BOTHUB_SERVICE_TRAIN == "ai-platform":
            job_id = f'bothub_{settings.ENVIRONMENT}_train_{str(repository.get("current_version_id"))}_{repository.get("language")}_{str(int(time.time()))}'
            send_job_train_ai_platform(
                jobId=job_id,
                repository_version=str(repository.get("current_version_id")),
                by_id=str(repository.get("repository_authorization_user_id")),
                repository_authorization=str(repository_authorization),
                language=repository.get("language"),
                type_model=model,
                operation="train",
            )
            backend().request_backend_save_queue_id(
                update_id=str(repository.get("current_version_id")),
                repository_authorization=str(repository_authorization),
                task_id=job_id,
                from_queue=0,
                type_processing=0,
            )
        languages_report[repository.get("language")] = {
            "status": TRAIN_STATUS_PROCESSING
        }

    resp = {
        "SUPPORTED_LANGUAGES": list(settings.SUPPORTED_LANGUAGES.keys()),
        "languages_report": languages_report,
    }
    return resp
예제 #11
0
def _parse(
    authorization,
    text,
    language,
    rasa_format=False,
    repository_version=None,
    user_agent=None,
    from_backend=False,
):
    repository_authorization = repository_authorization_validation(authorization)

    if type(text) != str or not text:
        raise ValidationError("Invalid text.")

    repository = check_language_priority(
        language, repository_authorization, repository_version
    )

    if not repository.get("version"):
        raise ValidationError("This repository has never been trained.")

    model = get_language_model(repository)

    try:
        answer_task = celery_app.send_task(
            TASK_NLU_PARSE_TEXT,
            args=[repository.get("repository_version"), repository_authorization, text],
            kwargs={"rasa_format": rasa_format},
            queue=queue_name(repository.get("language"), ACTION_PARSE, model),
        )
        answer_task.wait()
        answer = answer_task.result
    except TimeLimitExceeded:
        raise CeleryTimeoutException()

    entities_dict = get_entities_dict(answer)
    answer.update(
        {
            "text": text,
            "repository_version": repository.get("repository_version"),
            "language": repository.get("language"),
            "group_list": list(entities_dict.keys()),
            "entities": entities_dict,
        }
    )

    if "intent_ranking" not in answer or answer.get("intent_ranking") is None:
        answer.update({"intent_ranking": []})

    log = threading.Thread(
        target=backend().send_log_nlp_parse,
        kwargs={
            "data": {
                "text": text,
                "from_backend": from_backend,
                "user_agent": user_agent,
                "user": str(repository_authorization),
                "repository_version_language": int(
                    repository.get("repository_version")
                ),
                "nlp_log": json.dumps(answer),
                "log_intent": [
                    {
                        "intent": result["name"],
                        "is_default": result["name"] == answer["intent"]["name"],
                        "confidence": result["confidence"],
                    }
                    for result in answer.get("intent_ranking", [])
                ],
            }
        },
    )
    log.start()

    return answer
예제 #12
0
import subprocess
from bothub_nlp_celery.actions import queue_name
from bothub_nlp_celery import settings

if settings.BOTHUB_LANGUAGE_MODEL:
    queue = queue_name(
        settings.BOTHUB_NLP_LANGUAGE_QUEUE,
        model_name=settings.BOTHUB_LANGUAGE_MODEL,
    )
else:
    queue = settings.BOTHUB_NLP_LANGUAGE_QUEUE

subprocess.run([
    "celery",
    "-A",
    "celery_app",
    "worker",
    "-O",
    "fair",
    "-c",
    "1",
    "-l",
    "INFO",
    "-E",
    "--pool",
    "gevent",
    "-Q",
    queue,
])