def _word_suggestion(text, language, n_words_to_generate): language_validation(language) if not text or type(text) != str: raise ValidationError("Invalid text") if ( not n_words_to_generate or type(n_words_to_generate) != int or n_words_to_generate <= 0 or n_words_to_generate > 50 ): raise ValidationError("Invalid number of words to generate") try: answer_task = celery_app.send_task( TASK_NLU_WORD_SUGGESTION_TEXT, args=[text, n_words_to_generate], queue=queue_name(language, ACTION_WORD_SUGGESTION, "SPACY"), ) answer_task.wait() answer = answer_task.result except TimeLimitExceeded: raise CeleryTimeoutException() answer.update( {"text": text, "language": language, "n_words_to_generate": n_words_to_generate} ) return answer
def _words_distribution(authorization, language, repository_version=None): language_validation(language) repository_authorization = repository_authorization_validation( authorization) current_update = backend().request_backend_train(repository_authorization, language, repository_version) try: answer_task = celery_app.send_task( TASK_NLU_WORDS_DISTRIBUTION, args=[ current_update.get("current_version_id"), language, repository_authorization, ], queue=queue_name(language, ACTION_WORDS_DISTIRBUTION), ) answer_task.wait() answer = answer_task.result except TimeLimitExceeded: raise CeleryTimeoutException() return answer
def evaluate_handler(authorization, language, repository_version=None): repository_authorization = repository_authorization_validation( authorization) language_validation(language) try: repository = backend().request_backend_evaluate( repository_authorization, language, repository_version) except Exception: repository = {} if not repository.get("update"): raise ValidationError("This repository has never been trained") model = get_language_model(repository) try: cross_validation = False evaluate_task = celery_app.send_task( TASK_NLU_EVALUATE_UPDATE, args=[ repository_version, repository.get( "repository_version"), # repository_version_language_id repository_authorization, cross_validation, repository.get("language"), ], queue=queue_name(repository.get("language"), ACTION_EVALUATE, model), ) evaluate_task.wait() evaluate = evaluate_task.result evaluate_report = { "language": language, "status": EVALUATE_STATUS_PROCESSING, "repository_version": repository.get("repository_version"), "evaluate_id": evaluate.get("id") if evaluate is not None else None, "evaluate_version": evaluate.get("version") if evaluate is not None else None, "cross_validation": cross_validation, } except TimeLimitExceeded: raise CeleryTimeoutException() except Exception as e: evaluate_report = {"status": EVALUATE_STATUS_FAILED, "error": str(e)} return evaluate_report
def _intent_sentence_suggestion( authorization, language, intent, n_sentences_to_generate, percentage_to_replace, repository_version=None, ): repository_authorization = repository_authorization_validation( authorization) language_validation(language) if not intent or type(intent) != str: raise ValidationError("Invalid intent") if (not n_sentences_to_generate or type(n_sentences_to_generate) != int or n_sentences_to_generate <= 0 or n_sentences_to_generate > 50): raise ValidationError("Invalid number of sentences to generate") if (not percentage_to_replace or type(percentage_to_replace) != float or percentage_to_replace <= 0 or percentage_to_replace > 1): raise ValidationError("Invalid percentage to replace") try: update = backend().request_backend_parse(repository_authorization, language, repository_version) except Exception: update = {} try: answer_task = celery_app.send_task( TASK_NLU_INTENT_SENTENCE_SUGGESTION_TEXT, args=[ update.get("repository_version"), repository_authorization, intent, percentage_to_replace, n_sentences_to_generate, ], queue=queue_name(language, ACTION_INTENT_SENTENCE_SUGGESTION, "SPACY"), ) answer_task.wait() answer = answer_task.result except TimeLimitExceeded: raise CeleryTimeoutException() answer.update({ "language": language, "n_sentences_to_generate": n_sentences_to_generate, "percentage_to_replace": percentage_to_replace, "intent": intent, }) return answer
def _debug_parse(authorization, text, language, repository_version=None): from ..utils import DEFAULT_LANGS_PRIORITY language_validation(language) repository_authorization = repository_authorization_validation( authorization) if type(text) != str or not text: raise ValidationError("Text required.") try: update = backend().request_backend_parse(repository_authorization, language, repository_version) except Exception: update = {} if not update.get("version"): next_languages = DEFAULT_LANGS_PRIORITY.get(language, []) for next_language in next_languages: update = backend().request_backend_parse(repository_authorization, next_language, repository_version) if update.get("version"): break if not update.get("version"): raise ValidationError("This repository has never been trained") model = get_language_model(update) try: answer_task = celery_app.send_task( TASK_NLU_DEBUG_PARSE_TEXT, args=[ update.get("repository_version"), repository_authorization, text ], queue=queue_name(update.get("language"), ACTION_DEBUG_PARSE, model), ) answer_task.wait() answer = answer_task.result except TimeLimitExceeded: raise CeleryTimeoutException() answer.update({ "text": text, "repository_version": update.get("repository_version"), "language": update.get("language"), }) return answer
def _parse(authorization, text, language, rasa_format=False): from ..utils import NEXT_LANGS if language and ( language not in settings.SUPPORTED_LANGUAGES.keys() and language not in NEXT_LANGS.keys() ): raise ValidationError("Language '{}' not supported by now.".format(language)) repository_authorization = get_repository_authorization(authorization) if not repository_authorization: raise AuthorizationIsRequired() try: update = backend().request_backend_parse( "parse", repository_authorization, language ) except Exception: update = {} if not update.get("update"): next_languages = NEXT_LANGS.get(language, []) for next_language in next_languages: update = backend().request_backend_parse( "parse", repository_authorization, next_language ) if update.get("update"): break if not update.get("update"): raise ValidationError("This repository has never been trained") answer_task = celery_app.send_task( TASK_NLU_PARSE_TEXT, args=[update.get("update_id"), repository_authorization, text], kwargs={"rasa_format": rasa_format}, queue=queue_name(ACTION_PARSE, update.get("language")), ) answer_task.wait() answer = answer_task.result answer.update( { "text": text, "update_id": update.get("update_id"), "language": update.get("language"), } ) return answer
def evaluate_handler(authorization, language): if language and (language not in settings.SUPPORTED_LANGUAGES.keys() and language not in NEXT_LANGS.keys()): raise ValidationError( "Language '{}' not supported by now.".format(language)) repository_authorization = get_repository_authorization(authorization) if not repository_authorization: raise AuthorizationIsRequired() try: update = backend().request_backend_parse("evaluate", repository_authorization, language) except Exception: update = {} if not update.get("update"): raise ValidationError("This repository has never been trained") try: evaluate_task = celery_app.send_task( TASK_NLU_EVALUATE_UPDATE, args=[ update.get("update_id"), update.get("user_id"), repository_authorization, ], queue=queue_name(ACTION_EVALUATE, update.get("language")), ) evaluate_task.wait() evaluate = evaluate_task.result evaluate_report = { "language": language, "status": EVALUATE_STATUS_EVALUATED, "update_id": update.get("update_id"), "evaluate_id": evaluate.get("id"), "evaluate_version": evaluate.get("version"), } except Exception as e: # from .. import logger # logger.exception(e) evaluate_report = {"status": EVALUATE_STATUS_FAILED, "error": str(e)} return evaluate_report
def train_handler(authorization): repository_authorization = get_repository_authorization(authorization) languages_report = {} for language in settings.SUPPORTED_LANGUAGES.keys(): current_update = backend().request_backend_parse( "train", repository_authorization, language) if not current_update.get("ready_for_train"): languages_report[language] = { "status": TRAIN_STATUS_NOT_READY_FOR_TRAIN } continue try: train_task = celery_app.send_task( TASK_NLU_TRAIN_UPDATE, args=[ current_update.get("current_update_id"), current_update.get("repository_authorization_user_id"), repository_authorization, ], queue=queue_name(ACTION_TRAIN, current_update.get("language")), ) train_task.wait() languages_report[language] = {"status": TRAIN_STATUS_TRAINED} except Exception as e: # from .. import logger # logger.exception(e) # if settings.BOTHUB_NLP_SENTRY_CLIENT: # yield Task(self.captureException, exc_info=True) languages_report[language] = { "status": TRAIN_STATUS_FAILED, "error": str(e), } resp = { "SUPPORTED_LANGUAGES": list(settings.SUPPORTED_LANGUAGES.keys()), "languages_report": languages_report, } return resp
def _sentence_suggestion( text, language, n_sentences_to_generate, percentage_to_replace ): language_validation(language) if not text or type(text) != str: raise ValidationError("Invalid text") if ( not n_sentences_to_generate or type(n_sentences_to_generate) != int or n_sentences_to_generate <= 0 or n_sentences_to_generate > 50 ): raise ValidationError("Invalid number of sentences to generate") if ( not percentage_to_replace or type(percentage_to_replace) != float or percentage_to_replace <= 0 or percentage_to_replace > 1 ): raise ValidationError("Invalid percentage to replace") try: answer_task = celery_app.send_task( TASK_NLU_SENTENCE_SUGGESTION_TEXT, args=[text, percentage_to_replace, n_sentences_to_generate], queue=queue_name(language, ACTION_SENTENCE_SUGGESTION, "SPACY"), ) answer_task.wait() answer = answer_task.result except TimeLimitExceeded: raise CeleryTimeoutException() answer.update( { "text": text, "language": language, "n_sentences_to_generate": n_sentences_to_generate, "percentage_to_replace": percentage_to_replace, } ) return answer
def train_handler(authorization, repository_version=None, language=None): repository_authorization = repository_authorization_validation( authorization) languages_report = {} train_tasks = [] if language: language_validation(language) language_status = backend().request_backend_train( repository_authorization, language, repository_version) ready_to_train_languages = ([ language_status ] if language_status.get("ready_for_train") else []) else: ready_to_train_languages = backend( ).request_all_readytotrain_languages(repository_authorization, repository_version) for repository in ready_to_train_languages: model = get_language_model(repository) if settings.BOTHUB_SERVICE_TRAIN == "celery": train_task = celery_app.send_task( TASK_NLU_TRAIN_UPDATE, args=[ repository.get("current_version_id"), repository.get("repository_authorization_user_id"), repository_authorization, ], queue=queue_name(repository.get("language"), ACTION_TRAIN, model), ) train_tasks.append({ "task": train_task, "language": repository.get("language") }) elif settings.BOTHUB_SERVICE_TRAIN == "ai-platform": job_id = f'bothub_{settings.ENVIRONMENT}_train_{str(repository.get("current_version_id"))}_{repository.get("language")}_{str(int(time.time()))}' send_job_train_ai_platform( jobId=job_id, repository_version=str(repository.get("current_version_id")), by_id=str(repository.get("repository_authorization_user_id")), repository_authorization=str(repository_authorization), language=repository.get("language"), type_model=model, operation="train", ) backend().request_backend_save_queue_id( update_id=str(repository.get("current_version_id")), repository_authorization=str(repository_authorization), task_id=job_id, from_queue=0, type_processing=0, ) languages_report[repository.get("language")] = { "status": TRAIN_STATUS_PROCESSING } resp = { "SUPPORTED_LANGUAGES": list(settings.SUPPORTED_LANGUAGES.keys()), "languages_report": languages_report, } return resp
def _parse( authorization, text, language, rasa_format=False, repository_version=None, user_agent=None, from_backend=False, ): repository_authorization = repository_authorization_validation(authorization) if type(text) != str or not text: raise ValidationError("Invalid text.") repository = check_language_priority( language, repository_authorization, repository_version ) if not repository.get("version"): raise ValidationError("This repository has never been trained.") model = get_language_model(repository) try: answer_task = celery_app.send_task( TASK_NLU_PARSE_TEXT, args=[repository.get("repository_version"), repository_authorization, text], kwargs={"rasa_format": rasa_format}, queue=queue_name(repository.get("language"), ACTION_PARSE, model), ) answer_task.wait() answer = answer_task.result except TimeLimitExceeded: raise CeleryTimeoutException() entities_dict = get_entities_dict(answer) answer.update( { "text": text, "repository_version": repository.get("repository_version"), "language": repository.get("language"), "group_list": list(entities_dict.keys()), "entities": entities_dict, } ) if "intent_ranking" not in answer or answer.get("intent_ranking") is None: answer.update({"intent_ranking": []}) log = threading.Thread( target=backend().send_log_nlp_parse, kwargs={ "data": { "text": text, "from_backend": from_backend, "user_agent": user_agent, "user": str(repository_authorization), "repository_version_language": int( repository.get("repository_version") ), "nlp_log": json.dumps(answer), "log_intent": [ { "intent": result["name"], "is_default": result["name"] == answer["intent"]["name"], "confidence": result["confidence"], } for result in answer.get("intent_ranking", []) ], } }, ) log.start() return answer
import subprocess from bothub_nlp_celery.actions import queue_name from bothub_nlp_celery import settings if settings.BOTHUB_LANGUAGE_MODEL: queue = queue_name( settings.BOTHUB_NLP_LANGUAGE_QUEUE, model_name=settings.BOTHUB_LANGUAGE_MODEL, ) else: queue = settings.BOTHUB_NLP_LANGUAGE_QUEUE subprocess.run([ "celery", "-A", "celery_app", "worker", "-O", "fair", "-c", "1", "-l", "INFO", "-E", "--pool", "gevent", "-Q", queue, ])