def post(self): translation_batch = {} src_list, response_body = list(), list() inputs = request.get_json(force=True) if len(inputs) > 0 and all(v in inputs for v in ['src_list', 'model_id']): try: log_info("Making v1/translate API call", MODULE_CONTEXT) log_info("inputs---{}".format(inputs), MODULE_CONTEXT) input_src_list = inputs.get('src_list') src_list = [i.get('src') for i in input_src_list] if len(src_list) > translation_batch_limit: raise Exception( f"Number of sentences per request exceeded the limit of:{translation_batch_limit} sentences per batch" ) translation_batch = { 'id': inputs.get('model_id'), 'src_list': src_list } output_batch = FairseqDocumentTranslateService.batch_translator( translation_batch) output_batch_dict_list = [{ 'tgt': output_batch['tgt_list'][i], 'tagged_tgt': output_batch['tagged_tgt_list'][i], 'tagged_src': output_batch['tagged_src_list'][i] } for i in range(len(input_src_list))] for j, k in enumerate(input_src_list): k.update(output_batch_dict_list[j]) response_body.append(k) out = CustomResponse(Status.SUCCESS.value, response_body) log_info( "Final output from v1/translate API: {}".format( out.get_res_json()), MODULE_CONTEXT) except Exception as e: status = Status.SYSTEM_ERR.value status['message'] = str(e) log_exception( "Exception caught in batch_translator child block: {}". format(e), MODULE_CONTEXT, e) out = CustomResponse(status, inputs) return out.jsonify_res() else: log_info( "API input missing mandatory data ('src_list','model_id')", MODULE_CONTEXT) status = Status.INVALID_API_REQUEST.value status[ 'message'] = "Missing mandatory data ('src_list','model_id')" out = CustomResponse(status, inputs) return out.jsonify_res()
def post(self): ''' End point when only src and tgt language information is available ''' translation_batch = {} src_list, response_body = list(), list() content_type = 'application/json' inputs = request.get_json(force=True) if request.content_type != content_type: status = Status.INVALID_CONTENT_TYPE.value log_exception("v1.1 translate API | Invalid content type", MODULE_CONTEXT, status['message']) out = CustomResponse(status, html_encode(inputs)) return out.get_res_json(), 406, { 'Content-Type': content_type, 'X-Content-Type-Options': 'nosniff' } if len(inputs) > 0 and all( v in inputs for v in ['src_list', 'source_language_code', 'target_language_code']): if (inputs.get('source_language_code') not in supported_languages ) or (inputs.get('target_language_code') not in supported_languages): status = Status.UNSUPPORTED_LANGUAGE.value log_exception( "v1.1 translate API | Unsupported input language code", MODULE_CONTEXT, status['message']) out = CustomResponse(status, html_encode(inputs)) return out.get_res_json(), 400, { 'Content-Type': content_type, 'X-Content-Type-Options': 'nosniff' } elif inputs.get('source_language_code') == inputs.get( 'target_language_code'): status = Status.SAME_LANGUAGE_VALUE.value log_exception( "v1.1 translate API | src and tgt code can't be same", MODULE_CONTEXT, status['message']) out = CustomResponse(status, html_encode(inputs)) return out.get_res_json(), 400, { 'Content-Type': content_type, 'X-Content-Type-Options': 'nosniff' } try: log_info("Making translate v1.1 API call", MODULE_CONTEXT) log_info("v1.1 translate API | input--- {}".format(inputs), MODULE_CONTEXT) input_src_list = inputs.get('src_list') src_list = [i.get('src') for i in input_src_list] m_id = get_model_id(inputs.get('source_language_code'), inputs.get('target_language_code')) translation_batch = { 'id': m_id, 'src_lang': inputs.get('source_language_code'), 'tgt_lang': inputs.get('target_language_code'), 'src_list': src_list } output_batch = FairseqDocumentTranslateService.indic_to_indic_translator( translation_batch) output_batch_dict_list = [{ 'tgt': output_batch['tgt_list'][i] } for i in range(len(input_src_list))] for j, k in enumerate(input_src_list): k.update(output_batch_dict_list[j]) response_body.append(k) out = CustomResponse(Status.SUCCESS.value, response_body) log_info( "Final output v1.1 API | {}".format(out.get_res_json()), MODULE_CONTEXT) return out.get_res_json(), 200, { 'Content-Type': content_type, 'X-Content-Type-Options': 'nosniff' } except Exception as e: status = Status.SYSTEM_ERR.value status['message'] = str(e) log_exception( "Exception caught in v1.1 translate API resource child block: {}" .format(e), MODULE_CONTEXT, e) out = CustomResponse(status, html_encode(inputs)) return out.get_res_json(), 500, { 'Content-Type': content_type, 'X-Content-Type-Options': 'nosniff' } else: status = Status.INVALID_API_REQUEST.value status[ 'message'] = "Missing mandatory data ('src_list','source_language_code','target_language_code')" log_exception( "v1.1 translate API | input missing mandatory data ('src_list','source_language_code','target_language_code')", MODULE_CONTEXT, status['message']) out = CustomResponse(status, html_encode(inputs)) return out.get_res_json(), 401, { 'Content-Type': content_type, 'X-Content-Type-Options': 'nosniff' }
def post(self): ''' ULCA end point ''' translation_batch = {} src_list, output = list(), list() inputs = request.get_json(force=True) if len(inputs) > 0 and all(v in inputs for v in ['input', 'config'] ) and "modelId" in inputs.get('config'): try: log_info("Making API call for ULCA endpoint", MODULE_CONTEXT) log_info("inputs---{}".format(inputs), MODULE_CONTEXT) input_src_list = inputs.get('input') config = inputs.get('config') language = config.get('language') model_id = config.get('modelId') src_list = [i.get('source') for i in input_src_list] if len(src_list) > translation_batch_limit: raise Exception( f"Number of sentences per request exceeded the limit of: {translation_batch_limit} sentences per batch" ) if model_id == 144: translation_batch = { 'id': model_id, 'src_lang': language['sourceLanguage'], 'tgt_lang': language['targetLanguage'], 'src_list': src_list } output_batch = FairseqDocumentTranslateService.indic_to_indic_translator( translation_batch) else: translation_batch = {'id': model_id, 'src_list': src_list} output_batch = FairseqDocumentTranslateService.batch_translator( translation_batch) output_batch_dict_list = [{ 'target': output_batch['tgt_list'][i] } for i in range(len(input_src_list))] for j, k in enumerate(input_src_list): k.update(output_batch_dict_list[j]) output.append(k) final_output = {'config': config, 'output': output} out = CustomResponse(Status.SUCCESS.value, final_output) log_info( "Final output from ULCA API: {}".format( out.get_res_json()), MODULE_CONTEXT) return out.jsonify_data() except Exception as e: status = Status.SYSTEM_ERR.value status['message'] = str(e) log_exception( "Exception caught in ULCA API child block: {}".format(e), MODULE_CONTEXT, e) out = CustomResponse(status, inputs) return out.get_res_json_data(), 500 else: log_info( "ULCA API input missing mandatory data ('input','config,'modelId')", MODULE_CONTEXT) status = Status.INVALID_API_REQUEST.value status[ 'message'] = "Missing mandatory data ('input','config','modelId)" out = CustomResponse(status, inputs) return out.get_res_json_data(), 400
def batch_translator(c_topic): ''' New method for batch translation ''' log_info('KafkaTranslate: batch_translator',MODULE_CONTEXT) out = {} msg_count,msg_sent = 0,0 consumer = get_consumer(c_topic) producer = get_producer() try: for msg in consumer: producer_topic = [topic["producer"] for topic in config.kafka_topic if topic["consumer"] == msg.topic][0] log_info("Producer for current consumer:{} is-{}".format(msg.topic,producer_topic),MODULE_CONTEXT) msg_count +=1 log_info("*******************msg received count: {}; at {} ************".format(msg_count,datetime.datetime.now()),MODULE_CONTEXT) inputs = msg.value partition = msg.partition translation_batch = {} src_list, response_body = list(), list() if inputs is not None and all(v in inputs for v in ['message','record_id','id']) and len(inputs) is not 0: try: input_time = datetime.datetime.now() log_info("Input for Record Id:{} at {}".format(inputs.get('record_id'),input_time),MODULE_CONTEXT) log_info("Running batch-translation on {}".format(inputs),MODULE_CONTEXT) record_id = inputs.get('record_id') message = inputs.get('message') src_list = [i.get('src') for i in message] translation_batch = {'id':inputs.get('id'),'src_list': src_list} output_batch = FairseqDocumentTranslateService.batch_translator(translation_batch) log_info("Output of translation batch service at :{}".format(datetime.datetime.now()),MODULE_CONTEXT) output_batch_dict_list = [{'tgt': output_batch['tgt_list'][i], 'tagged_tgt':output_batch['tagged_tgt_list'][i],'tagged_src':output_batch['tagged_src_list'][i]} for i in range(len(message))] for j,k in enumerate(message): k.update(output_batch_dict_list[j]) response_body.append(k) log_info("Record Id:{}; Final response body of current batch translation:{}".format(record_id,response_body),MODULE_CONTEXT) out = CustomResponse(Status.SUCCESS.value,response_body) except Exception as e: status = Status.SYSTEM_ERR.value status['message'] = str(e) log_exception("Exception caught in batch_translator child block: {}".format(e),MODULE_CONTEXT,e) out = CustomResponse(status, inputs.get('message')) out = out.get_res_json() out['record_id'] = record_id log_info("Output for Record Id:{} at {}".format(record_id,datetime.datetime.now()),MODULE_CONTEXT) log_info("Total time for processing Record Id:{} is: {}".format(record_id,(datetime.datetime.now()- input_time).total_seconds()),MODULE_CONTEXT) else: status = Status.KAFKA_INVALID_REQUEST.value out = CustomResponse(status, inputs.get('message')) out = out.get_res_json() if inputs.get('record_id'): out['record_id'] = inputs.get('record_id') log_info("Empty input request or key parameter missing in Batch translation request: batch_translator",MODULE_CONTEXT) producer.send(producer_topic, value={'out':out},partition=partition) producer.flush() msg_sent += 1 log_info("*******************msg sent count: {}; at {} **************".format(msg_sent,datetime.datetime.now()),MODULE_CONTEXT) except ValueError as e: '''includes simplejson.decoder.JSONDecodeError ''' log_exception("JSON decoding failed in KafkaTranslate-batch_translator method: {}".format(e),MODULE_CONTEXT,e) log_info("Reconnecting kafka c/p after exception handling",MODULE_CONTEXT) KafkaTranslate.batch_translator(c_topic) except Exception as e: log_exception("Exception caught in KafkaTranslate-batch_translator method: {}".format(e),MODULE_CONTEXT,e) log_info("Reconnecting kafka c/p after exception handling",MODULE_CONTEXT) KafkaTranslate.batch_translator(c_topic)