def check_language_priority(language, repository_authorization, repository_version): if language: language = str(language).lower() language = re.split(r"[-_]", language)[0] language_validation(language) # Tries to get repository by DEFAULT_LANGS (hard-coded exceptions) repository = {} if language in DEFAULT_LANGS_PRIORITY.keys(): priority_ordered_langs = DEFAULT_LANGS_PRIORITY.get(language) for lang in priority_ordered_langs: try: repository = backend().request_backend_parse( repository_authorization, lang, repository_version ) except Exception: repository = {} if repository.get("total_training_end"): break # Else tries to get most generic repository ('LANG' only) else: try: repository = backend().request_backend_parse( repository_authorization, language, repository_version ) except Exception: repository = {} return repository
def qa_handler( authorization, knowledge_base_id, question, language, from_backend=False, user_agent=None, ): language_validation(language) user_base_authorization = repository_authorization_validation( authorization) if not question or type(question) != str: raise EmptyInputException() elif len(question) > BOTHUB_NLP_API_QA_QUESTION_LIMIT: raise LargeQuestionException(len(question), limit=BOTHUB_NLP_API_QA_QUESTION_LIMIT) request = backend().request_backend_knowledge_bases( user_base_authorization, knowledge_base_id, language) text = request.get("text") if not text: raise EmptyBaseException() elif len(text) > BOTHUB_NLP_API_QA_TEXT_LIMIT: raise LargeContextException(len(text), limit=BOTHUB_NLP_API_QA_TEXT_LIMIT) result = request_torchserve(text, question, language) if len(result["answers"]) > 0: answer_object = result["answers"][0] answer = answer_object["text"] confidence = float(answer_object["confidence"]) else: answer = "" confidence = .0 log = threading.Thread( target=backend().send_log_qa_nlp_parse, kwargs={ "data": { "answer": answer, "confidence": confidence, "question": question, "user_agent": user_agent, "nlp_log": json.dumps(result), "user": str(user_base_authorization), "knowledge_base": int(knowledge_base_id), "language": language, "from_backend": from_backend, } }, ) log.start() return result
def _debug_parse(authorization, text, language, repository_version=None): from ..utils import DEFAULT_LANGS_PRIORITY language_validation(language) repository_authorization = repository_authorization_validation( authorization) if type(text) != str or not text: raise ValidationError("Text required.") try: update = backend().request_backend_parse(repository_authorization, language, repository_version) except Exception: update = {} if not update.get("version"): next_languages = DEFAULT_LANGS_PRIORITY.get(language, []) for next_language in next_languages: update = backend().request_backend_parse(repository_authorization, next_language, repository_version) if update.get("version"): break if not update.get("version"): raise ValidationError("This repository has never been trained") model = get_language_model(update) try: answer_task = celery_app.send_task( TASK_NLU_DEBUG_PARSE_TEXT, args=[ update.get("repository_version"), repository_authorization, text ], queue=queue_name(update.get("language"), ACTION_DEBUG_PARSE, model), ) answer_task.wait() answer = answer_task.result except TimeLimitExceeded: raise CeleryTimeoutException() answer.update({ "text": text, "repository_version": update.get("repository_version"), "language": update.get("language"), }) return answer
def crossvalidation_evaluate_handler(authorization, language, repository_version=None): repository_authorization = repository_authorization_validation( authorization) language_validation(language) try: repository = backend().request_backend_start_automatic_evaluate( repository_authorization, repository_version, language) except Exception: repository = {} if not repository.get("can_run_automatic_evaluate"): raise ValidationError("Validation error") model = get_language_model(repository) try: job_id = f'bothub_{settings.ENVIRONMENT}_evaluate_{repository.get("repository_version_language_id")}_{language}_{str(int(time.time()))}' send_job_train_ai_platform( jobId=job_id, repository_version=str( repository.get("repository_version_language_id")), by_id=str(repository.get("user_id")), repository_authorization=str(repository_authorization), language=language, type_model=model, operation="evaluate", ) backend().request_backend_save_queue_id( update_id=str(repository.get("repository_version_language_id")), repository_authorization=str(repository_authorization), task_id=job_id, from_queue=0, type_processing=2, ) evaluate_report = { "language": language, "status": EVALUATE_STATUS_PROCESSING, "repository_version": repository.get("repository_version_language_id"), "evaluate_id": None, "evaluate_version": None, "cross_validation": True, } except Exception as e: evaluate_report = {"status": EVALUATE_STATUS_FAILED, "error": str(e)} return evaluate_report
def _parse(authorization, text, language, rasa_format=False): from ..utils import NEXT_LANGS if language and ( language not in settings.SUPPORTED_LANGUAGES.keys() and language not in NEXT_LANGS.keys() ): raise ValidationError("Language '{}' not supported by now.".format(language)) repository_authorization = get_repository_authorization(authorization) if not repository_authorization: raise AuthorizationIsRequired() try: update = backend().request_backend_parse( "parse", repository_authorization, language ) except Exception: update = {} if not update.get("update"): next_languages = NEXT_LANGS.get(language, []) for next_language in next_languages: update = backend().request_backend_parse( "parse", repository_authorization, next_language ) if update.get("update"): break if not update.get("update"): raise ValidationError("This repository has never been trained") answer_task = celery_app.send_task( TASK_NLU_PARSE_TEXT, args=[update.get("update_id"), repository_authorization, text], kwargs={"rasa_format": rasa_format}, queue=queue_name(ACTION_PARSE, update.get("language")), ) answer_task.wait() answer = answer_task.result answer.update( { "text": text, "update_id": update.get("update_id"), "language": update.get("language"), } ) return answer
def _words_distribution(authorization, language, repository_version=None): language_validation(language) repository_authorization = repository_authorization_validation( authorization) current_update = backend().request_backend_train(repository_authorization, language, repository_version) try: answer_task = celery_app.send_task( TASK_NLU_WORDS_DISTRIBUTION, args=[ current_update.get("current_version_id"), language, repository_authorization, ], queue=queue_name(language, ACTION_WORDS_DISTIRBUTION), ) answer_task.wait() answer = answer_task.result except TimeLimitExceeded: raise CeleryTimeoutException() return answer
async def info_handler( request: Request = Depends(AuthorizationRequired()), Authorization: str = Header(..., description="Bearer your_key"), ): repository_authorization = get_repository_authorization(Authorization) info = backend().request_backend_parse("info", repository_authorization) info["intents"] = info["intents_list"] info.pop("intents_list") return info
async def info_handler( request: Request = Depends(AuthorizationRequired()), Authorization: str = Header(..., description="Bearer your_key"), ): repository_authorization = repository_authorization_validation( Authorization) info = backend().request_backend_info(repository_authorization) if info.get("detail"): raise HTTPException(status_code=400, detail=info) return info
def evaluate_handler(authorization, language, repository_version=None): repository_authorization = repository_authorization_validation( authorization) language_validation(language) try: repository = backend().request_backend_evaluate( repository_authorization, language, repository_version) except Exception: repository = {} if not repository.get("update"): raise ValidationError("This repository has never been trained") model = get_language_model(repository) try: cross_validation = False evaluate_task = celery_app.send_task( TASK_NLU_EVALUATE_UPDATE, args=[ repository_version, repository.get( "repository_version"), # repository_version_language_id repository_authorization, cross_validation, repository.get("language"), ], queue=queue_name(repository.get("language"), ACTION_EVALUATE, model), ) evaluate_task.wait() evaluate = evaluate_task.result evaluate_report = { "language": language, "status": EVALUATE_STATUS_PROCESSING, "repository_version": repository.get("repository_version"), "evaluate_id": evaluate.get("id") if evaluate is not None else None, "evaluate_version": evaluate.get("version") if evaluate is not None else None, "cross_validation": cross_validation, } except TimeLimitExceeded: raise CeleryTimeoutException() except Exception as e: evaluate_report = {"status": EVALUATE_STATUS_FAILED, "error": str(e)} return evaluate_report
def _intent_sentence_suggestion( authorization, language, intent, n_sentences_to_generate, percentage_to_replace, repository_version=None, ): repository_authorization = repository_authorization_validation( authorization) language_validation(language) if not intent or type(intent) != str: raise ValidationError("Invalid intent") if (not n_sentences_to_generate or type(n_sentences_to_generate) != int or n_sentences_to_generate <= 0 or n_sentences_to_generate > 50): raise ValidationError("Invalid number of sentences to generate") if (not percentage_to_replace or type(percentage_to_replace) != float or percentage_to_replace <= 0 or percentage_to_replace > 1): raise ValidationError("Invalid percentage to replace") try: update = backend().request_backend_parse(repository_authorization, language, repository_version) except Exception: update = {} try: answer_task = celery_app.send_task( TASK_NLU_INTENT_SENTENCE_SUGGESTION_TEXT, args=[ update.get("repository_version"), repository_authorization, intent, percentage_to_replace, n_sentences_to_generate, ], queue=queue_name(language, ACTION_INTENT_SENTENCE_SUGGESTION, "SPACY"), ) answer_task.wait() answer = answer_task.result except TimeLimitExceeded: raise CeleryTimeoutException() answer.update({ "language": language, "n_sentences_to_generate": n_sentences_to_generate, "percentage_to_replace": percentage_to_replace, "intent": intent, }) return answer
def train_handler(authorization, repository_version=None, language=None): repository_authorization = repository_authorization_validation( authorization) languages_report = {} train_tasks = [] if language: language_validation(language) language_status = backend().request_backend_train( repository_authorization, language, repository_version) ready_to_train_languages = ([ language_status ] if language_status.get("ready_for_train") else []) else: ready_to_train_languages = backend( ).request_all_readytotrain_languages(repository_authorization, repository_version) for repository in ready_to_train_languages: model = get_language_model(repository) if settings.BOTHUB_SERVICE_TRAIN == "celery": train_task = celery_app.send_task( TASK_NLU_TRAIN_UPDATE, args=[ repository.get("current_version_id"), repository.get("repository_authorization_user_id"), repository_authorization, ], queue=queue_name(repository.get("language"), ACTION_TRAIN, model), ) train_tasks.append({ "task": train_task, "language": repository.get("language") }) elif settings.BOTHUB_SERVICE_TRAIN == "ai-platform": job_id = f'bothub_{settings.ENVIRONMENT}_train_{str(repository.get("current_version_id"))}_{repository.get("language")}_{str(int(time.time()))}' send_job_train_ai_platform( jobId=job_id, repository_version=str(repository.get("current_version_id")), by_id=str(repository.get("repository_authorization_user_id")), repository_authorization=str(repository_authorization), language=repository.get("language"), type_model=model, operation="train", ) backend().request_backend_save_queue_id( update_id=str(repository.get("current_version_id")), repository_authorization=str(repository_authorization), task_id=job_id, from_queue=0, type_processing=0, ) languages_report[repository.get("language")] = { "status": TRAIN_STATUS_PROCESSING } resp = { "SUPPORTED_LANGUAGES": list(settings.SUPPORTED_LANGUAGES.keys()), "languages_report": languages_report, } return resp
def _parse( authorization, text, language, rasa_format=False, repository_version=None, user_agent=None, from_backend=False, ): repository_authorization = repository_authorization_validation(authorization) if type(text) != str or not text: raise ValidationError("Invalid text.") repository = check_language_priority( language, repository_authorization, repository_version ) if not repository.get("version"): raise ValidationError("This repository has never been trained.") model = get_language_model(repository) try: answer_task = celery_app.send_task( TASK_NLU_PARSE_TEXT, args=[repository.get("repository_version"), repository_authorization, text], kwargs={"rasa_format": rasa_format}, queue=queue_name(repository.get("language"), ACTION_PARSE, model), ) answer_task.wait() answer = answer_task.result except TimeLimitExceeded: raise CeleryTimeoutException() entities_dict = get_entities_dict(answer) answer.update( { "text": text, "repository_version": repository.get("repository_version"), "language": repository.get("language"), "group_list": list(entities_dict.keys()), "entities": entities_dict, } ) if "intent_ranking" not in answer or answer.get("intent_ranking") is None: answer.update({"intent_ranking": []}) log = threading.Thread( target=backend().send_log_nlp_parse, kwargs={ "data": { "text": text, "from_backend": from_backend, "user_agent": user_agent, "user": str(repository_authorization), "repository_version_language": int( repository.get("repository_version") ), "nlp_log": json.dumps(answer), "log_intent": [ { "intent": result["name"], "is_default": result["name"] == answer["intent"]["name"], "confidence": result["confidence"], } for result in answer.get("intent_ranking", []) ], } }, ) log.start() return answer