예제 #1
0
 def post(self):
     translation_batch = {}
     src_list, response_body = list(), list()
     inputs = request.get_json(force=True)
     if len(inputs) > 0 and all(v in inputs
                                for v in ['src_list', 'model_id']):
         try:
             log_info("Making v1/translate API call", MODULE_CONTEXT)
             log_info("inputs---{}".format(inputs), MODULE_CONTEXT)
             input_src_list = inputs.get('src_list')
             src_list = [i.get('src') for i in input_src_list]
             if len(src_list) > translation_batch_limit:
                 raise Exception(
                     f"Number of sentences per request exceeded the limit of:{translation_batch_limit} sentences per batch"
                 )
             translation_batch = {
                 'id': inputs.get('model_id'),
                 'src_list': src_list
             }
             output_batch = FairseqDocumentTranslateService.batch_translator(
                 translation_batch)
             output_batch_dict_list = [{
                 'tgt':
                 output_batch['tgt_list'][i],
                 'tagged_tgt':
                 output_batch['tagged_tgt_list'][i],
                 'tagged_src':
                 output_batch['tagged_src_list'][i]
             } for i in range(len(input_src_list))]
             for j, k in enumerate(input_src_list):
                 k.update(output_batch_dict_list[j])
                 response_body.append(k)
             out = CustomResponse(Status.SUCCESS.value, response_body)
             log_info(
                 "Final output from v1/translate API: {}".format(
                     out.get_res_json()), MODULE_CONTEXT)
         except Exception as e:
             status = Status.SYSTEM_ERR.value
             status['message'] = str(e)
             log_exception(
                 "Exception caught in batch_translator child block: {}".
                 format(e), MODULE_CONTEXT, e)
             out = CustomResponse(status, inputs)
         return out.jsonify_res()
     else:
         log_info(
             "API input missing mandatory data ('src_list','model_id')",
             MODULE_CONTEXT)
         status = Status.INVALID_API_REQUEST.value
         status[
             'message'] = "Missing mandatory data ('src_list','model_id')"
         out = CustomResponse(status, inputs)
         return out.jsonify_res()
예제 #2
0
    def post(self):
        '''
        End point when only src and tgt language information is available
        '''
        translation_batch = {}
        src_list, response_body = list(), list()
        content_type = 'application/json'
        inputs = request.get_json(force=True)
        if request.content_type != content_type:
            status = Status.INVALID_CONTENT_TYPE.value
            log_exception("v1.1 translate API | Invalid content type",
                          MODULE_CONTEXT, status['message'])
            out = CustomResponse(status, html_encode(inputs))
            return out.get_res_json(), 406, {
                'Content-Type': content_type,
                'X-Content-Type-Options': 'nosniff'
            }

        if len(inputs) > 0 and all(
                v in inputs for v in
            ['src_list', 'source_language_code', 'target_language_code']):
            if (inputs.get('source_language_code') not in supported_languages
                ) or (inputs.get('target_language_code')
                      not in supported_languages):
                status = Status.UNSUPPORTED_LANGUAGE.value
                log_exception(
                    "v1.1 translate API | Unsupported input language code",
                    MODULE_CONTEXT, status['message'])
                out = CustomResponse(status, html_encode(inputs))
                return out.get_res_json(), 400, {
                    'Content-Type': content_type,
                    'X-Content-Type-Options': 'nosniff'
                }
            elif inputs.get('source_language_code') == inputs.get(
                    'target_language_code'):
                status = Status.SAME_LANGUAGE_VALUE.value
                log_exception(
                    "v1.1 translate API | src and tgt code can't be same",
                    MODULE_CONTEXT, status['message'])
                out = CustomResponse(status, html_encode(inputs))
                return out.get_res_json(), 400, {
                    'Content-Type': content_type,
                    'X-Content-Type-Options': 'nosniff'
                }

            try:
                log_info("Making translate v1.1 API call", MODULE_CONTEXT)
                log_info("v1.1 translate API | input--- {}".format(inputs),
                         MODULE_CONTEXT)
                input_src_list = inputs.get('src_list')
                src_list = [i.get('src') for i in input_src_list]
                m_id = get_model_id(inputs.get('source_language_code'),
                                    inputs.get('target_language_code'))
                translation_batch = {
                    'id': m_id,
                    'src_lang': inputs.get('source_language_code'),
                    'tgt_lang': inputs.get('target_language_code'),
                    'src_list': src_list
                }
                output_batch = FairseqDocumentTranslateService.indic_to_indic_translator(
                    translation_batch)
                output_batch_dict_list = [{
                    'tgt': output_batch['tgt_list'][i]
                } for i in range(len(input_src_list))]
                for j, k in enumerate(input_src_list):
                    k.update(output_batch_dict_list[j])
                    response_body.append(k)
                out = CustomResponse(Status.SUCCESS.value, response_body)
                log_info(
                    "Final output v1.1 API | {}".format(out.get_res_json()),
                    MODULE_CONTEXT)
                return out.get_res_json(), 200, {
                    'Content-Type': content_type,
                    'X-Content-Type-Options': 'nosniff'
                }
            except Exception as e:
                status = Status.SYSTEM_ERR.value
                status['message'] = str(e)
                log_exception(
                    "Exception caught in v1.1 translate API resource child block: {}"
                    .format(e), MODULE_CONTEXT, e)
                out = CustomResponse(status, html_encode(inputs))
                return out.get_res_json(), 500, {
                    'Content-Type': content_type,
                    'X-Content-Type-Options': 'nosniff'
                }
        else:
            status = Status.INVALID_API_REQUEST.value
            status[
                'message'] = "Missing mandatory data ('src_list','source_language_code','target_language_code')"
            log_exception(
                "v1.1 translate API | input missing mandatory data ('src_list','source_language_code','target_language_code')",
                MODULE_CONTEXT, status['message'])
            out = CustomResponse(status, html_encode(inputs))
            return out.get_res_json(), 401, {
                'Content-Type': content_type,
                'X-Content-Type-Options': 'nosniff'
            }
예제 #3
0
    def post(self):
        '''
        ULCA end point
        '''
        translation_batch = {}
        src_list, output = list(), list()
        inputs = request.get_json(force=True)
        if len(inputs) > 0 and all(v in inputs for v in ['input', 'config']
                                   ) and "modelId" in inputs.get('config'):
            try:
                log_info("Making API call for ULCA endpoint", MODULE_CONTEXT)
                log_info("inputs---{}".format(inputs), MODULE_CONTEXT)
                input_src_list = inputs.get('input')
                config = inputs.get('config')
                language = config.get('language')
                model_id = config.get('modelId')
                src_list = [i.get('source') for i in input_src_list]
                if len(src_list) > translation_batch_limit:
                    raise Exception(
                        f"Number of sentences per request exceeded the limit of: {translation_batch_limit} sentences per batch"
                    )

                if model_id == 144:
                    translation_batch = {
                        'id': model_id,
                        'src_lang': language['sourceLanguage'],
                        'tgt_lang': language['targetLanguage'],
                        'src_list': src_list
                    }
                    output_batch = FairseqDocumentTranslateService.indic_to_indic_translator(
                        translation_batch)
                else:
                    translation_batch = {'id': model_id, 'src_list': src_list}
                    output_batch = FairseqDocumentTranslateService.batch_translator(
                        translation_batch)
                output_batch_dict_list = [{
                    'target': output_batch['tgt_list'][i]
                } for i in range(len(input_src_list))]
                for j, k in enumerate(input_src_list):
                    k.update(output_batch_dict_list[j])
                    output.append(k)
                final_output = {'config': config, 'output': output}
                out = CustomResponse(Status.SUCCESS.value, final_output)
                log_info(
                    "Final output from ULCA API: {}".format(
                        out.get_res_json()), MODULE_CONTEXT)
                return out.jsonify_data()
            except Exception as e:
                status = Status.SYSTEM_ERR.value
                status['message'] = str(e)
                log_exception(
                    "Exception caught in  ULCA API child block: {}".format(e),
                    MODULE_CONTEXT, e)
                out = CustomResponse(status, inputs)
                return out.get_res_json_data(), 500

        else:
            log_info(
                "ULCA API input missing mandatory data ('input','config,'modelId')",
                MODULE_CONTEXT)
            status = Status.INVALID_API_REQUEST.value
            status[
                'message'] = "Missing mandatory data ('input','config','modelId)"
            out = CustomResponse(status, inputs)
            return out.get_res_json_data(), 400
예제 #4
0
    def batch_translator(c_topic):
        ''' New method for batch translation '''      
        log_info('KafkaTranslate: batch_translator',MODULE_CONTEXT)  
        out = {}
        msg_count,msg_sent = 0,0
        consumer = get_consumer(c_topic)
        producer = get_producer()
        try:
            for msg in consumer:
                producer_topic = [topic["producer"] for topic in config.kafka_topic if topic["consumer"] == msg.topic][0]
                log_info("Producer for current consumer:{} is-{}".format(msg.topic,producer_topic),MODULE_CONTEXT)
                msg_count +=1
                log_info("*******************msg received count: {}; at {} ************".format(msg_count,datetime.datetime.now()),MODULE_CONTEXT)
                inputs = msg.value
                partition = msg.partition
                translation_batch = {}
                src_list, response_body = list(), list()

                if inputs is not None and all(v in inputs for v in ['message','record_id','id']) and len(inputs) is not 0:
                    try:
                        input_time = datetime.datetime.now()
                        log_info("Input for Record Id:{} at {}".format(inputs.get('record_id'),input_time),MODULE_CONTEXT)
                        log_info("Running batch-translation on  {}".format(inputs),MODULE_CONTEXT) 
                        record_id = inputs.get('record_id')
                        message = inputs.get('message')
                        src_list = [i.get('src') for i in message]
                        translation_batch = {'id':inputs.get('id'),'src_list': src_list}
                        output_batch = FairseqDocumentTranslateService.batch_translator(translation_batch)
                        log_info("Output of translation batch service at :{}".format(datetime.datetime.now()),MODULE_CONTEXT)                        
                        output_batch_dict_list = [{'tgt': output_batch['tgt_list'][i],
                                                'tagged_tgt':output_batch['tagged_tgt_list'][i],'tagged_src':output_batch['tagged_src_list'][i]}
                                                for i in range(len(message))]
                        
                        for j,k in enumerate(message):
                            k.update(output_batch_dict_list[j])
                            response_body.append(k)
                        
                        log_info("Record Id:{}; Final response body of current batch translation:{}".format(record_id,response_body),MODULE_CONTEXT) 
                        out = CustomResponse(Status.SUCCESS.value,response_body)   
                    except Exception as e:
                        status = Status.SYSTEM_ERR.value
                        status['message'] = str(e)
                        log_exception("Exception caught in batch_translator child block: {}".format(e),MODULE_CONTEXT,e) 
                        out = CustomResponse(status, inputs.get('message'))
                    
                    out = out.get_res_json()
                    out['record_id'] = record_id
                    log_info("Output for Record Id:{} at {}".format(record_id,datetime.datetime.now()),MODULE_CONTEXT)
                    log_info("Total time for processing Record Id:{} is: {}".format(record_id,(datetime.datetime.now()- input_time).total_seconds()),MODULE_CONTEXT)
                else:
                    status = Status.KAFKA_INVALID_REQUEST.value
                    out = CustomResponse(status, inputs.get('message'))
                    out = out.get_res_json()
                    if inputs.get('record_id'): out['record_id'] = inputs.get('record_id') 
                    log_info("Empty input request or key parameter missing in Batch translation request: batch_translator",MODULE_CONTEXT)      
            
                producer.send(producer_topic, value={'out':out},partition=partition)
                producer.flush()
                msg_sent += 1
                log_info("*******************msg sent count: {}; at {} **************".format(msg_sent,datetime.datetime.now()),MODULE_CONTEXT)
        except ValueError as e:  
            '''includes simplejson.decoder.JSONDecodeError '''
            log_exception("JSON decoding failed in KafkaTranslate-batch_translator method: {}".format(e),MODULE_CONTEXT,e)
            log_info("Reconnecting kafka c/p after exception handling",MODULE_CONTEXT)
            KafkaTranslate.batch_translator(c_topic)  
        except Exception as e:
            log_exception("Exception caught in KafkaTranslate-batch_translator method: {}".format(e),MODULE_CONTEXT,e)
            log_info("Reconnecting kafka c/p after exception handling",MODULE_CONTEXT)
            KafkaTranslate.batch_translator(c_topic)