def model_conversion(inputs): out = {} if any(v not in inputs for v in ['inp_model_path','out_dir']): out = CustomResponse(Status.INCOMPLETE_API_REQUEST.value, []) log_info("Missing either inp_model_path,out_dir in model conversion request",MODULE_CONTEXT) return out with open(config.ICONFG_FILE) as f: confs = json.load(f) model_root = confs['models_root'] final_dir = os.path.join(model_root, inputs['out_dir']) try: log_info("Inside model_conversion-interactive_translate function",MODULE_CONTEXT) converter = ctranslate2.converters.OpenNMTPyConverter(inputs['inp_model_path']) # inp_model_path: the model which has to be converted output_dir = converter.convert( final_dir, # Path to the output directory. "TransformerBase", # A model specification instance from ctranslate2.specs. vmap=None, # Path to a vocabulary mapping file. quantization=None, # Weights quantization: "int8" or "int16". force=False) log_info("Interactive model converted and saved at: {}".format(output_dir),MODULE_CONTEXT) out = CustomResponse(Status.SUCCESS.value, None) except Exception as e: log_exception("Error in model_conversion interactive translate: {} and {}".format(sys.exc_info()[0],e),MODULE_CONTEXT,e) status = Status.SYSTEM_ERR.value status['why'] = str(e) out = CustomResponse(status, None) return out
def post(self): userID = request.headers.get('userID') if userID == None: userID = request.headers.get('x-user-id') body = request.get_json() if 'words' not in body and not body['words']: return post_error("Data Missing", "words are required", None), 400 words = body['words'] AppContext.adduserID(userID) log_info( "DigitalDocumentUpdateWordResource for user {}, number words to update {} request {}" .format(userID, len(words), body), AppContext.getContext()) try: result = digitalRepo.update_words(userID, words) if result == True: res = CustomResponse(Status.SUCCESS.value, words) return res.getres() # return post_error("Data Missing","Failed to update word since data is missing",None), 400 return result, 400 except Exception as e: log_exception( "Exception in DigitalDocumentUpdateWordResource |{}".format( str(e)), AppContext.getContext(), e) return post_error("Data Missing", "Failed to update word since data is missing", None), 400
def post(self): body = request.get_json() if 'files' not in body or not body['files']: return post_error("Data Missing", "files is required", None), 400 if 'recordID' not in body or not body['recordID']: return post_error("Data Missing", "recordID is required", None), 400 # if 'jobID' not in body or not body['jobID']: # return post_error("Data Missing","jobID is required",None), 400 files = body['files'] userID = body['metadata']['userID'] recordID = body['recordID'] if not userID: return post_error("Data Missing", "userID is required", None), 400 AppContext.addRecordID(recordID) log_info( 'Missing params in DigitalDocumentSaveResource {}, user_id:{}, record_id:{}' .format(body, userID, recordID), AppContext.getContext()) try: AppContext.addRecordID(recordID) log_info( 'DigitalDocumentSaveResource request received, user_id:{}, record_id:{}' .format(userID, recordID), AppContext.getContext()) result = digitalRepo.store(userID, recordID, files) if result == False: log_info( 'Missing params in DigitalDocumentSaveResource {}, user_id:{}, record_id:{}' .format(body, userID, recordID), AppContext.getContext()) return post_error("Data Missing", "Failed to store doc since data is missing", None), 400 elif result is None: AppContext.addRecordID(recordID) log_info( 'DigitalDocumentSaveResource request completed, user_id:{}, record_id:{}' .format(userID, recordID), AppContext.getContext()) res = CustomResponse(Status.SUCCESS.value, None) return res.getres() else: log_info( 'Missing params in DigitalDocumentSaveResource {}, user_id:{}, record_id:{}' .format(body, userID, recordID), AppContext.getContext()) return result, 400 except Exception as e: AppContext.addRecordID(recordID) log_exception( "Exception on save document | DigitalDocumentSaveResource :{}". format(str(e)), AppContext.getContext(), e) return post_error("Data Missing", "Failed to store doc since data is missing", None), 400
def get(self): parser = reqparse.RequestParser() parser.add_argument( 'start_page', type=int, location='args', help= 'start_page can be 0, set start_page & end_page as 0 to get entire document', required=True) parser.add_argument( 'end_page', type=int, location='args', help= 'end_page can be 0, set start_page & end_page as 0 to get entire document', required=True) parser.add_argument('recordID', type=str, location='args', help='record_id is required', required=True) args = parser.parse_args() AppContext.addRecordID(args['recordID']) log_info( "DigitalDocumentGetResource record_id {} ".format( args['recordID']), AppContext.getContext()) try: result = digitalRepo.get_pages(args['recordID'], args['start_page'], args['end_page']) if result == False: return post_error("Data Missing", "Failed to get pages since data is missing", None), 400 AppContext.addRecordID(args['recordID']) log_info( "DigitalDocumentGetResource record_id {} has {} pages".format( args['recordID'], result['total']), AppContext.getContext()) res = CustomResponse(Status.SUCCESS.value, result['pages'], result['total']) return res.getres() except Exception as e: AppContext.addRecordID(args['recordID']) log_exception( "Exception in DigitalDocumentGetResource |{}".format(str(e)), AppContext.getContext(), e) return post_error("Data Missing", "Failed to get pages since data is missing", None), 400
def get(self): parser = reqparse.RequestParser() parser.add_argument( 'start_page', type=int, location='args', help= 'start_page can be 0, set start_page & end_page as 0 to get entire document', required=True) parser.add_argument( 'end_page', type=int, location='args', help= 'end_page can be 0, set start_page & end_page as 0 to get entire document', required=True) parser.add_argument('ad-userid', location='headers', type=str, help='userid cannot be empty', required=True) parser.add_argument('job_id', type=str, location='args', help='Job Id is required', required=False) parser.add_argument('record_id', type=str, location='args', help='record_id is required', required=True) args = parser.parse_args() AppContext.addRecordID(args['record_id']) log_info( "FileContentGetResource record_id {} for user {}".format( args['record_id'], args['ad-userid']), AppContext.getContext()) try: result = fileContentRepo.get(args['ad-userid'], args['record_id'], args['start_page'], args['end_page']) if result == False: res = CustomResponse( Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 log_info( "FileContentGetResource record_id {} for user {} has {} pages". format(args['record_id'], args['ad-userid'], result['total']), AppContext.getContext()) res = CustomResponse(Status.SUCCESS.value, result['pages'], result['total']) return res.getres() except Exception as e: log_exception("FileContentGetResource ", AppContext.getContext(), e) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400
def post(self): inputs = request.get_json(force=True) if len(inputs) > 0: log_info("Making interactive-model-convert API call", MODULE_CONTEXT) out = ModelConvertService.model_conversion(inputs) return out.getres() else: log_info("null inputs in request in interactive-translation API", MODULE_CONTEXT) out = CustomResponse(Status.INVALID_API_REQUEST.value, None) return out.getres()
def post(self): inputs = request.get_json(force=True) if len(inputs)>0: log_info("Making v3/translate-anuvaad API call",MODULE_CONTEXT) log_info("inputs---{}".format(inputs),MODULE_CONTEXT) out = OpenNMTTranslateService.translate_func(inputs) log_info("Final output from v3/translate-anuvaad API: {}".format(out.getresjson()),MODULE_CONTEXT) return out.getres() else: log_info("null inputs in request in translate-anuvaad API",MODULE_CONTEXT) out = CustomResponse(Status.INVALID_API_REQUEST.value,None) return out.getres()
def post(self): inputs = request.get_json(force=True) if len(inputs)>0: log_info("Making v2/interactive-translation API call",MODULE_CONTEXT) log_info("inputs---{}".format(inputs),MODULE_CONTEXT) # log_info(entry_exit_log(LOG_TAGS["input"],inputs)) out = TranslateService.interactive_translation(inputs) log_info("out from v2/interactive-translation done: {}".format(out.getresjson()),MODULE_CONTEXT) # log_info(entry_exit_log(LOG_TAGS["output"],out)) return out.getres() else: log_info("null inputs in request in v2/interactive-translation API",MODULE_CONTEXT) out = CustomResponse(Status.INVALID_API_REQUEST.value,None) return out.getres()
def post(self): translation_batch = {} src_list, response_body = list(), list() inputs = request.get_json(force=True) if len(inputs) > 0 and all(v in inputs for v in ['src_list', 'model_id']): try: log_info("Making v1/translate API call", MODULE_CONTEXT) log_info("inputs---{}".format(inputs), MODULE_CONTEXT) input_src_list = inputs.get('src_list') src_list = [i.get('src') for i in input_src_list] if len(src_list) > translation_batch_limit: raise Exception( f"Number of sentences per request exceeded the limit of:{translation_batch_limit} sentences per batch" ) translation_batch = { 'id': inputs.get('model_id'), 'src_list': src_list } output_batch = FairseqDocumentTranslateService.batch_translator( translation_batch) output_batch_dict_list = [{ 'tgt': output_batch['tgt_list'][i], 'tagged_tgt': output_batch['tagged_tgt_list'][i], 'tagged_src': output_batch['tagged_src_list'][i] } for i in range(len(input_src_list))] for j, k in enumerate(input_src_list): k.update(output_batch_dict_list[j]) response_body.append(k) out = CustomResponse(Status.SUCCESS.value, response_body) log_info( "Final output from v1/translate API: {}".format( out.get_res_json()), MODULE_CONTEXT) except Exception as e: status = Status.SYSTEM_ERR.value status['message'] = str(e) log_exception( "Exception caught in batch_translator child block: {}". format(e), MODULE_CONTEXT, e) out = CustomResponse(status, inputs) return out.jsonify_res() else: log_info( "API input missing mandatory data ('src_list','model_id')", MODULE_CONTEXT) status = Status.INVALID_API_REQUEST.value status[ 'message'] = "Missing mandatory data ('src_list','model_id')" out = CustomResponse(status, inputs) return out.jsonify_res()
def post(self): inputs = request.get_json(force=True) if len(inputs) > 0: log_info("Making v0/interactive-translation API call", MODULE_CONTEXT) log_info("inputs---{}".format(inputs), MODULE_CONTEXT) out = FairseqAutoCompleteTranslateService.constrained_translation( inputs) log_info( "out from v0/interactive-translation done: {}".format( out.getresjson()), MODULE_CONTEXT) return out.jsonify_res() else: log_info( "null inputs in request in v0/interactive-translation API", MODULE_CONTEXT) out = CustomResponse(Status.INVALID_API_REQUEST.value, None) return out.jsonify_res()
def post(self): inputs = request.get_json(force=True) if len(inputs)>0: log_info("Making interactive-translation API call",MODULE_CONTEXT) log_info("inputs---{}".format(inputs),MODULE_CONTEXT) # log_info(entry_exit_log(LOG_TAGS["input"],inputs)) out = TranslateService.interactive_translation(inputs) out = out.getresjson() complete_response = out['response_body'] out['response_body'] = [{"tgt": complete_response[i]['tgt'][0],"tagged_tgt":complete_response[i]['tagged_tgt'][0], "tagged_src":complete_response[i]['tagged_src'],"s_id":complete_response[i]['s_id'], "src":complete_response[i]["src"]} for i in range(len(complete_response))] log_info("out from interactive-translation done: {}".format(out),MODULE_CONTEXT) # log_info(entry_exit_log(LOG_TAGS["output"],out)) return CustomResponse.jsonify(out) else: log_info("null inputs in request in interactive-translation API",MODULE_CONTEXT) out = CustomResponse(Status.INVALID_API_REQUEST.value,None) return out.getres()
def find_performance_pipeline(input_text_file,model_id,batch_size,max_batch_size,batch_type): ''' Given an input english text file (with one sentence per line) returns average time taken per word for each step in the pipeline of translation. ''' try: with open(input_text_file,'r') as f: input_text_array = f.readlines() input_text_array = [sent[:-1] for sent in input_text_array] # input_text_array = input_text_array[:500] word_count = 0 for sentence in input_text_array: word_count += len(sentence.split()) batch_input_array = [] num_of_batch = len(input_text_array) // batch_size for i in range(num_of_batch): prev_index = i * batch_size if (prev_index + batch_size) < len(input_text_array): input_batch = {'id': model_id, 'src_list': input_text_array[prev_index: prev_index + batch_size]} else: input_batch = {'id': model_id, 'src_list': input_text_array[prev_index: ]} batch_input_array.append(input_batch) time_model_loading_array, time_preprocessing_array, time_tokenizing_array, time_encoding_array, \ time_translating_array, time_decoding_array, time_detokenizing_array, time_postprocessing_array = [],[],[],[],[],[],[],[] for batch_input in batch_input_array: time_taken_dict = NMTTranslatePerformanceService.batch_translator(batch_input,max_batch_size,batch_type) time_model_loading_array.append(time_taken_dict["time_model_loading"]) time_preprocessing_array.append(time_taken_dict["time_preprocessing"]) time_tokenizing_array.append(time_taken_dict[ "time_tokenizing"]) time_encoding_array.append(time_taken_dict["time_encoding"]) time_translating_array.append(time_taken_dict["time_translating"]) time_decoding_array.append(time_taken_dict["time_decoding"]) time_detokenizing_array.append(time_taken_dict["time_detokenizing"]) time_postprocessing_array.append(time_taken_dict["time_postprocessing"]) return sum(time_model_loading_array) /word_count, sum(time_preprocessing_array) /word_count,\ sum(time_tokenizing_array) /word_count, sum(time_encoding_array) /word_count,\ sum(time_translating_array) /word_count, sum(time_decoding_array) /word_count,\ sum(time_detokenizing_array) /word_count, sum(time_postprocessing_array) /word_count except Exception as e: status = Status.SYSTEM_ERR.value log_exception("Exception caught in performance check: {} ".format(e),MODULE_CONTEXT,e) out = CustomResponse(status, []) return out
def find_performance(input_text_file,model_id,batch_size): ''' Given an input english text file (with one sentence per line) returns average number of words translated per second by the document translator ''' try: with open(input_text_file,'r') as f: input_text_array = f.readlines() input_text_array = [sent[:-1] for sent in input_text_array] word_count = 0 for sentence in input_text_array: word_count += len(sentence.split()) batch_input_array = [] num_of_batch = len(input_text_array) // batch_size for i in range(num_of_batch): prev_index = i * batch_size if (prev_index + batch_size) < len(input_text_array): input_batch = {'id': model_id, 'src_list': input_text_array[prev_index: prev_index + batch_size]} else: input_batch = {'id': model_id, 'src_list': input_text_array[prev_index: ]} batch_input_array.append(input_batch) time_taken_array = [] out_tgt_array = [] for batch_input in batch_input_array: start = time.time() out_batch = NMTTranslateService.batch_translator(batch_input) time_taken = time.time() - start time_taken_array.append(time_taken) out_tgt_array.append(out_batch['tgt_list']) avg_words_per_sec = word_count / sum(time_taken_array) out_tgt_array = [sentence for out_batch in out_tgt_array for sentence in out_batch] return avg_words_per_sec, out_tgt_array except Exception as e: status = Status.SYSTEM_ERR.value log_exception("Exception caught in performance check: {} ".format(e),MODULE_CONTEXT,e) out = CustomResponse(status, []) return out
def get(self, user_id, s_id): AppContext.addRecordID(None) log_info( "SentenceBlockGetResource {} for user {}".format(s_id, user_id), AppContext.getContext()) try: result = SentenceRepositories.get_sentence_block(user_id, s_id) if result == False: res = CustomResponse( Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 res = CustomResponse(Status.SUCCESS.value, result) return result, 200 except Exception as e: log_exception("SentenceBlockGetResource ", AppContext.getContext(), e) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400
def get(self): log_info("FetchModelsResource api called", MODULE_CONTEXT) try: with open(config.FETCH_MODEL_CONFG) as f: confs = json.load(f) models = confs['data'] out = CustomResponse(Status.SUCCESS.value, models) return out.getres() except Exception as e: log_exception("Error in FetchModelsResource: {}".format(e), MODULE_CONTEXT, e) status = Status.SYSTEM_ERR.value status['why'] = str(e) out = CustomResponse(status, None) return out.getres()
def get(self): log_info("NMT Health api called", MODULE_CONTEXT) out = CustomResponse(Status.SUCCESS.value, []) return out.getres()
def post(self): ''' End point when only src and tgt language information is available ''' translation_batch = {} src_list, response_body = list(), list() content_type = 'application/json' inputs = request.get_json(force=True) if request.content_type != content_type: status = Status.INVALID_CONTENT_TYPE.value log_exception("v1.1 translate API | Invalid content type", MODULE_CONTEXT, status['message']) out = CustomResponse(status, html_encode(inputs)) return out.get_res_json(), 406, { 'Content-Type': content_type, 'X-Content-Type-Options': 'nosniff' } if len(inputs) > 0 and all( v in inputs for v in ['src_list', 'source_language_code', 'target_language_code']): if (inputs.get('source_language_code') not in supported_languages ) or (inputs.get('target_language_code') not in supported_languages): status = Status.UNSUPPORTED_LANGUAGE.value log_exception( "v1.1 translate API | Unsupported input language code", MODULE_CONTEXT, status['message']) out = CustomResponse(status, html_encode(inputs)) return out.get_res_json(), 400, { 'Content-Type': content_type, 'X-Content-Type-Options': 'nosniff' } elif inputs.get('source_language_code') == inputs.get( 'target_language_code'): status = Status.SAME_LANGUAGE_VALUE.value log_exception( "v1.1 translate API | src and tgt code can't be same", MODULE_CONTEXT, status['message']) out = CustomResponse(status, html_encode(inputs)) return out.get_res_json(), 400, { 'Content-Type': content_type, 'X-Content-Type-Options': 'nosniff' } try: log_info("Making translate v1.1 API call", MODULE_CONTEXT) log_info("v1.1 translate API | input--- {}".format(inputs), MODULE_CONTEXT) input_src_list = inputs.get('src_list') src_list = [i.get('src') for i in input_src_list] m_id = get_model_id(inputs.get('source_language_code'), inputs.get('target_language_code')) translation_batch = { 'id': m_id, 'src_lang': inputs.get('source_language_code'), 'tgt_lang': inputs.get('target_language_code'), 'src_list': src_list } output_batch = FairseqDocumentTranslateService.indic_to_indic_translator( translation_batch) output_batch_dict_list = [{ 'tgt': output_batch['tgt_list'][i] } for i in range(len(input_src_list))] for j, k in enumerate(input_src_list): k.update(output_batch_dict_list[j]) response_body.append(k) out = CustomResponse(Status.SUCCESS.value, response_body) log_info( "Final output v1.1 API | {}".format(out.get_res_json()), MODULE_CONTEXT) return out.get_res_json(), 200, { 'Content-Type': content_type, 'X-Content-Type-Options': 'nosniff' } except Exception as e: status = Status.SYSTEM_ERR.value status['message'] = str(e) log_exception( "Exception caught in v1.1 translate API resource child block: {}" .format(e), MODULE_CONTEXT, e) out = CustomResponse(status, html_encode(inputs)) return out.get_res_json(), 500, { 'Content-Type': content_type, 'X-Content-Type-Options': 'nosniff' } else: status = Status.INVALID_API_REQUEST.value status[ 'message'] = "Missing mandatory data ('src_list','source_language_code','target_language_code')" log_exception( "v1.1 translate API | input missing mandatory data ('src_list','source_language_code','target_language_code')", MODULE_CONTEXT, status['message']) out = CustomResponse(status, html_encode(inputs)) return out.get_res_json(), 401, { 'Content-Type': content_type, 'X-Content-Type-Options': 'nosniff' }
def post(self): body = request.get_json() user_id = request.headers.get('userid') if user_id == None: user_id = request.headers.get('ad-userid') workflowCode = None if 'blocks' not in body or user_id is None: res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 if 'workflowCode' in body: workflowCode = body['workflowCode'] blocks = body['blocks'] AppContext.addRecordID(None) log_info( "FileContentUpdateResource for user ({}), to update ({}) blocks". format(user_id, len(blocks)), AppContext.getContext()) try: result, updated_blocks = fileContentRepo.update( user_id, blocks, workflowCode) if result == False: res = CustomResponse( Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 log_info( "FileContentUpdateResource for user ({}) updated".format( user_id), AppContext.getContext()) response = {'blocks': updated_blocks, 'workflowCode': workflowCode} res = CustomResponse(Status.SUCCESS.value, response, len(updated_blocks)) return res.getres() except Exception as e: log_exception("FileContentUpdateResource ", AppContext.getContext(), e) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400
def post(self): body = request.get_json() user_id = request.headers.get('userid') if user_id == None: user_id = request.headers.get('ad-userid') pages = body['pages'] file_locale = '' if 'file_locale' in body: file_locale = body['file_locale'] job_id = '' if 'job_id' in body: job_id = body['job_id'] record_id = None if 'record_id' in body: record_id = body['record_id'] src_lang = None if 'src_lang' in body: src_lang = body['src_lang'] tgt_lang = None if 'tgt_lang' in body: tgt_lang = body['tgt_lang'] if 'pages' not in body or user_id is None or record_id == None or src_lang == None or tgt_lang == None: log_info( 'Missing params in FileContentSaveResource {}, user_id:{}'. format(body, user_id), AppContext.getContext()) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 AppContext.addRecordID(record_id) log_info( "FileContentSaveResource record_id ({}) for user ({})".format( record_id, user_id), AppContext.getContext()) try: if fileContentRepo.store(user_id, file_locale, record_id, pages, src_lang, tgt_lang) == False: res = CustomResponse( Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 log_info( "FileContentSaveResource record_id ({}) for user ({}) saved". format(record_id, user_id), AppContext.getContext()) res = CustomResponse(Status.SUCCESS.value, None) return res.getres() except Exception as e: log_exception("FileContentSaveResource ", AppContext.getContext(), e) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400
def post(self): body = request.json log_info('received request for WordSearch', AppContext.getContext()) if 'word' not in body or 'word_locale' not in body or 'target_locale' not in body: res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 if (body['word_locale'] == 'en') or (body['target_locale'] == 'en'): result = None if body['word_locale'] == 'en': body['word'] = body['word'].lower() result = wordRepo.search_english(body['word'], body['target_locale']) else: result = wordRepo.search_vernacular(body['word'], body['word_locale']) if result == None: ''' - call google apis to get the translation - save the translation - return the response ''' log_info( 'checking google for the searched word ({})'.format( body['word']), AppContext.getContext()) input_word, translated_word, input_locale = translate.translate_text( body['target_locale'], body['word']) log_info( 'google returned input ({}), translated ({})'.format( input_word, translated_word), AppContext.getContext()) if translated_word == None: res = CustomResponse(Status.SUCCESS.value, None) return res.getres() else: if body['word_locale'] == 'en': result = wordRepo.update(body['word'], 'en', translated_word, body['target_locale']) else: result = wordRepo.update(translated_word, body['target_locale'], body['word'], body['word_locale']) if result == None: res = CustomResponse(Status.SUCCESS.value, None) return res.getres() else: res = CustomResponse(Status.SUCCESS.value, result) return res.getres() else: log_info('returning word search from local database', AppContext.getContext()) res = CustomResponse(Status.SUCCESS.value, result) return res.getres() else: res = CustomResponse(Status.ERR_ENGLISH_MANDATORY.value, None) return res.getresjson(), 400
def interactive_translation(inputs): out = {} i_src, tgt = list(), list() tagged_tgt = list() tagged_src = list() sentence_id = list() tp_tokenizer = None try: for i in inputs: sentence_id.append(i.get("s_id") or "NA") if any(v not in i for v in ['src', 'id']): log_info("either id or src missing in some input", MODULE_CONTEXT) out = CustomResponse(Status.ID_OR_SRC_MISSING.value, inputs) return out log_info("input sentence:{}".format(i['src']), MODULE_CONTEXT) i_src.append(i['src']) i['src'] = i['src'].strip() i['src_lang'], i['tgt_lang'] = misc.get_src_tgt_langauge( i['id']) i['src'] = misc.convert_digits_preprocess( i['src_lang'], i['src']) if special_case_handler.special_case_fits(i['src']): log_info( "sentence fits in special case, returning accordingly and not going to model", MODULE_CONTEXT) translation = special_case_handler.handle_special_cases( i['src'], i['id']) translation = [translation] tag_tgt, tag_src = translation, i['src'] else: log_info( "Performing interactive translation on:{}".format( i['id']), MODULE_CONTEXT) i['src'], date_original, url_original, num_array, num_map = tagger_util.tag_number_date_url( i['src']) tag_src = i['src'] if i['id'] == 56: "english-hindi" if i['src'].isupper(): log_info( "src all Upper case hence Tital casing it", MODULE_CONTEXT) i['src'] = i['src'].title() tp_tokenizer = sentence_processor.indic_tokenizer i['src'] = sentence_processor.moses_tokenizer(i['src']) translation = encode_itranslate_decode( i, num_map, tp_tokenizer) translation = [ sentence_processor.indic_detokenizer(i) for i in translation ] elif i['id'] == 7: "english-tamil" translation = encode_itranslate_decode( i, num_map, tp_tokenizer) elif i['id'] == 10: "english-gujarati" translation = encode_itranslate_decode( i, num_map, tp_tokenizer) elif i['id'] == 15: "english-kannada" translation = encode_itranslate_decode( i, num_map, tp_tokenizer) elif i['id'] == 16: "english-telugu" translation = encode_itranslate_decode( i, num_map, tp_tokenizer) elif i['id'] == 17: "english-malayalam" translation = encode_itranslate_decode( i, num_map, tp_tokenizer) elif i['id'] == 18: "english-punjabi" translation = encode_itranslate_decode( i, num_map, tp_tokenizer) elif i['id'] == 42: "english-marathi" translation = encode_itranslate_decode( i, num_map, tp_tokenizer) elif i['id'] == 50: "telugu-english" tp_tokenizer = sentence_processor.moses_tokenizer i['src'] = sentence_processor.indic_tokenizer(i['src']) translation = encode_itranslate_decode( i, num_map, tp_tokenizer) translation = [ sentence_processor.moses_detokenizer(i) for i in translation ] elif i['id'] == 6: "hindi-english" tp_tokenizer = sentence_processor.moses_tokenizer i['src'] = sentence_processor.indic_tokenizer(i['src']) translation = encode_itranslate_decode( i, num_map, tp_tokenizer) translation = [ sentence_processor.moses_detokenizer(i) for i in translation ] elif i['id'] == 62: "marathi-english" tp_tokenizer = sentence_processor.moses_tokenizer i['src'] = sentence_processor.indic_tokenizer(i['src']) translation = encode_itranslate_decode( i, num_map, tp_tokenizer) translation = [ sentence_processor.moses_detokenizer(i) for i in translation ] elif i['id'] == 8: "tamil-english" tp_tokenizer = sentence_processor.moses_tokenizer i['src'] = sentence_processor.indic_tokenizer(i['src']) translation = encode_itranslate_decode( i, num_map, tp_tokenizer) translation = [ sentence_processor.moses_detokenizer(i) for i in translation ] elif i['id'] == 55: "punjabi-english" tp_tokenizer = sentence_processor.moses_tokenizer i['src'] = sentence_processor.indic_tokenizer(i['src']) translation = encode_itranslate_decode( i, num_map, tp_tokenizer) translation = [ sentence_processor.moses_detokenizer(i) for i in translation ] elif i['id'] == 48: "kannada-english" tp_tokenizer = sentence_processor.moses_tokenizer i['src'] = sentence_processor.indic_tokenizer(i['src']) translation = encode_itranslate_decode( i, num_map, tp_tokenizer) translation = [ sentence_processor.moses_detokenizer(i) for i in translation ] elif i['id'] == 60: "malayalam-english" tp_tokenizer = sentence_processor.moses_tokenizer i['src'] = sentence_processor.indic_tokenizer(i['src']) translation = encode_itranslate_decode( i, num_map, tp_tokenizer) translation = [ sentence_processor.moses_detokenizer(i) for i in translation ] elif i['id'] == 52: "gujarati-english" tp_tokenizer = sentence_processor.moses_tokenizer i['src'] = sentence_processor.indic_tokenizer(i['src']) translation = encode_itranslate_decode( i, num_map, tp_tokenizer) translation = [ sentence_processor.moses_detokenizer(i) for i in translation ] elif i['id'] == 65: "english-bengali 4th" tp_tokenizer = sentence_processor.indic_tokenizer i['src'] = sentence_processor.moses_tokenizer(i['src']) translation = encode_itranslate_decode( i, num_map, tp_tokenizer) translation = [ sentence_processor.indic_detokenizer(i) for i in translation ] elif i['id'] == 66: "bengali-english 3rd" tp_tokenizer = sentence_processor.moses_tokenizer i['src'] = sentence_processor.indic_tokenizer(i['src']) translation = encode_itranslate_decode( i, num_map, tp_tokenizer) translation = [ sentence_processor.moses_detokenizer(i) for i in translation ] elif i['id'] == 67: "ta-en 3rd" tp_tokenizer = sentence_processor.moses_tokenizer i['src'] = sentence_processor.indic_tokenizer(i['src']) translation = encode_itranslate_decode_v2( i, num_map, tp_tokenizer) translation = [ sentence_processor.moses_detokenizer(i) for i in translation ] elif i['id'] == 68: "en-ta 5th" tp_tokenizer = sentence_processor.indic_tokenizer i['src'] = sentence_processor.moses_tokenizer(i['src']) translation = encode_itranslate_decode_v2( i, num_map, tp_tokenizer) translation = [ sentence_processor.indic_detokenizer(i) for i in translation ] elif i['id'] == 69: "hi-en 3rd" tp_tokenizer = sentence_processor.moses_tokenizer i['src'] = sentence_processor.indic_tokenizer(i['src']) translation = encode_itranslate_decode_v2( i, num_map, tp_tokenizer) translation = [ sentence_processor.moses_detokenizer(i) for i in translation ] elif i['id'] == 70: "en-hi 15th" tp_tokenizer = sentence_processor.indic_tokenizer i['src'] = sentence_processor.moses_tokenizer(i['src']) translation = encode_itranslate_decode_v2( i, num_map, tp_tokenizer) translation = [ sentence_processor.indic_detokenizer(i) for i in translation ] elif i['id'] == 71: "te-en 2nd" tp_tokenizer = sentence_processor.moses_tokenizer i['src'] = sentence_processor.indic_tokenizer(i['src']) translation = encode_itranslate_decode_v2( i, num_map, tp_tokenizer) translation = [ sentence_processor.moses_detokenizer(i) for i in translation ] elif i['id'] == 72: "en-te 3rd" tp_tokenizer = sentence_processor.indic_tokenizer i['src'] = sentence_processor.moses_tokenizer(i['src']) translation = encode_itranslate_decode_v2( i, num_map, tp_tokenizer) translation = [ sentence_processor.indic_detokenizer(i) for i in translation ] elif i['id'] == 73: "ml-en 2nd" tp_tokenizer = sentence_processor.moses_tokenizer i['src'] = sentence_processor.indic_tokenizer(i['src']) translation = encode_itranslate_decode_v2( i, num_map, tp_tokenizer) translation = [ sentence_processor.moses_detokenizer(i) for i in translation ] elif i['id'] == 74: "en-ml 3rd" tp_tokenizer = sentence_processor.indic_tokenizer i['src'] = sentence_processor.moses_tokenizer(i['src']) translation = encode_itranslate_decode_v2( i, num_map, tp_tokenizer) translation = [ sentence_processor.indic_detokenizer(i) for i in translation ] else: log_info( "unsupported model id: {} for given input".format( i['id']), MODULE_CONTEXT) raise Exception( "Unsupported Model ID - id: {} for given input". format(i['id'])) translation = [i.replace("▁", " ") for i in translation] translation = [ misc.regex_pass(i, [ patterns['p8'], patterns['p9'], patterns['p4'], patterns['p5'], patterns['p6'], patterns['p7'] ]) for i in translation ] tag_tgt = translation translation = [ tagger_util.replace_tags_with_original( i, date_original, url_original, num_array, num_map) for i in translation ] translation = [ misc.convert_digits_postprocess(i['tgt_lang'], item) for item in translation ] log_info( "interactive translation-experiment-{} output: {}".format( i['id'], translation), MODULE_CONTEXT) tgt.append(translation) tagged_tgt.append(tag_tgt) tagged_src.append(tag_src) out['response_body'] = [{ "tgt": tgt[i], "tagged_tgt": tagged_tgt[i], "tagged_src": tagged_src[i], "s_id": sentence_id[i], "src": i_src[i] } for i in range(len(tgt))] out = CustomResponse(Status.SUCCESS.value, out['response_body']) except Exception as e: status = Status.SYSTEM_ERR.value status['why'] = str(e) log_exception( "Unexpected error:%s and %s" % (e, sys.exc_info()[0]), MODULE_CONTEXT, e) out = CustomResponse(status, inputs) return out
def post(self): body = request.get_json() user_id = request.headers.get('userid') if user_id == None: user_id = request.headers.get('x-user-id') if 'sentences' not in body or user_id is None or 'workflowCode' not in body: log_info( 'Missing params in SaveSentenceResource {}, user_id:{}'.format( body, user_id), AppContext.getContext()) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 sentences = body['sentences'] workflowCode = body['workflowCode'] AppContext.addRecordID(None) log_info( "SaveSentenceResource for user {}, number sentences to update {} request {}" .format(user_id, len(sentences), body), AppContext.getContext()) try: result = sentenceRepo.update_sentences(user_id, sentences, workflowCode) if result == False: res = CustomResponse( Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 if USER_TRANSLATION_ENABLED: try: result = sentenceRepo.save_sentences(user_id, sentences) except Exception as e: log_exception("SaveSentenceResource", AppContext.getContext(), e) # sentence_ids = [] # for sentence in sentences: # sentence_ids.append(sentence['s_id']) # result = sentenceRepo.get_sentence(user_id, sentence_ids) # if result == False: # res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) # return res.getresjson(), 400 # else: res = CustomResponse(Status.SUCCESS.value, sentences) return res.getres() except Exception as e: log_exception("SaveSentenceResource ", AppContext.getContext(), e) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400
def post(self): body = request.get_json() if "keys" not in body or not body["keys"]: res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 keys = body["keys"] log_info("Fetching sentences from redis store", AppContext.getContext()) try: result = sentenceRepo.get_sentences_from_store(keys) if result == None: res = CustomResponse( Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 res = CustomResponse(Status.SUCCESS.value, result) return res.getres() except Exception as e: log_exception( "Exception while fetching sentences from redis store ", AppContext.getContext(), e) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400
def post(self): body = request.get_json() user_id = request.headers.get('userid') if user_id == None: user_id = request.headers.get('x-user-id') s_ids = None if 'sentences' in body: s_ids = body['sentences'] if user_id is None or s_ids is None: log_info( 'Missing params in FetchSentenceResource {}, user_id:{}'. format(body, user_id), AppContext.getContext()) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 AppContext.addRecordID(None) log_info( "FetchSentenceResource s_ids {} for user {}".format( len(s_ids), user_id), AppContext.getContext()) try: result = sentenceRepo.get_sentence(user_id, s_ids) if result == False: res = CustomResponse( Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 res = CustomResponse(Status.SUCCESS.value, result) return res.getres() except Exception as e: log_exception("FetchSentenceResource ", AppContext.getContext(), e) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400
def post(self): body = request.get_json() user_id = request.headers.get('userid') if user_id == None: user_id = request.headers.get('x-user-id') if 'record_ids' not in body or user_id is None: log_info( 'Missing params in SentenceStatisticsCount {}, user_id:{}'. format(body, user_id), AppContext.getContext()) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 record_ids = body['record_ids'] bleu_return = None if 'bleu_score' in body: bleu_return = body['bleu_score'] else: bleu_return = False AppContext.addRecordID(None) log_info( "SentenceStatisticsCount for user {}, sentence count for record_ids {}" .format(user_id, record_ids), AppContext.getContext()) try: result = sentenceRepo.get_sentences_counts(record_ids, bleu_return) if result == False: res = CustomResponse( Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 res = CustomResponse(Status.SUCCESS.value, result) return res.getres() except Exception as e: log_exception("SentenceStatisticsCount ", AppContext.getContext(), e) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400
def post(self): inputs = request.get_json(force=True) if len(inputs) > 0: log_info("Making performance check API call", MODULE_CONTEXT) try: for i in inputs: if i['mode'] == 0: avg_words_per_sec, target_array = BatchNMTPerformanceService.find_performance(i['input_txt_file'],\ i['model_id'],i['batch_size']) output_file_name = os.path.basename(i['input_txt_file']).split(".")[0] + "_" + str(i['model_id']) + \ "_" + str(i['batch_size']) + "_" + "output" +".txt" with open( os.path.join(str(Path.home()), output_file_name), 'w') as f: for sentence in target_array: f.write("%s\n" % sentence) out = {} out['response_body'] = { "words_per_sec": avg_words_per_sec } out = CustomResponse(Status.SUCCESS.value, out['response_body']) log_info( "out from performance check done: {}".format( out.getresjson()), MODULE_CONTEXT) return out.getres() elif i['mode'] == 1: time_taken_array = BatchNMTPerformanceService.find_performance_pipeline(i['input_txt_file'],\ i['model_id'],i['batch_size'],i['max_batch_size'],i['batch_type']) out = {} out['response_body'] = {"avg_time_loading_per_word": time_taken_array[0] ,\ "avg_time_preprocessing_per_word": time_taken_array[1] ,\ "avg_time_tokenizing_per_word": time_taken_array[2] ,\ "avg_time_encoding_per_word": time_taken_array[3] ,\ "avg_time_translating_per_word": time_taken_array[4] ,\ "avg_time_decoding_per_word": time_taken_array[5] ,\ "avg_time_detokenizing_per_word": time_taken_array[6] ,\ "avg_time_postprocessing_per_word": time_taken_array[7] } out = CustomResponse(Status.SUCCESS.value, out['response_body']) log_info( "out from performance check done: {}".format( out.getresjson()), MODULE_CONTEXT) return out.getres() except Exception as e: status = Status.SYSTEM_ERR.value status['why'] = str(e) out = CustomResponse(status, []) return out.getres() else: log_info("null inputs in request in /v0/performance API", MODULE_CONTEXT) out = CustomResponse(Status.INVALID_API_REQUEST.value, None) return out.getres()
def post(self): inputs = request.get_json(force=True) response_list = list() if len(inputs) > 0: log_info("Making labse-aligner(Resource) API call", MODULE_CONTEXT) log_info("Complete request input: {}".format(inputs), MODULE_CONTEXT) try: for i in inputs: if all(v in i for v in ["src_phrases", "tgt"]): log_info("Making labse-aligner service call", MODULE_CONTEXT) res = LabseAlignerService.phrase_aligner(i) response_list.append(res) out = CustomResponse(Status.SUCCESS.value, response_list) else: log_info( "Missing mandatory Parameters for labse-aligner:src_phrases or tgt", MODULE_CONTEXT) out = CustomResponse( Status.MANDATORY_PARAM_MISSING.value, []) return out.getres() except Exception as e: status = Status.SYSTEM_ERR.value status['why'] = str(e) out = CustomResponse(status, []) return out.getres() else: log_info("null inputs in request in labse-aligner API", MODULE_CONTEXT) out = CustomResponse(Status.INVALID_API_REQUEST.value, None) return out.getres()
def post(self): body = request.json log_info('received request for WordSaveResource', AppContext.getContext()) if body == None: res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 if 'words' not in body: res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 for word in body['words']: if word['locale'] != 'en': res = CustomResponse( Status.ERR_ENGLISH_MANDATORY_WHILE_SAVING.value, None) return res.getresjson(), 400 result = wordRepo.store(body['words']) if result == False: res = CustomResponse(Status.ERR_SCHEMA_VALIDATION.value, None) return res.getresjson(), 400 res = CustomResponse(Status.SUCCESS.value, None) return res.getres()
def post(self): body = request.json parser = reqparse.RequestParser() parser.add_argument('dict_fallback', type=int, location='args', help='set 1 to invoke google transalte and 0 to not', required=False,default=1) args = parser.parse_args() dict_fallback=args["dict_fallback"] log_info('received request for WordSearch', AppContext.getContext()) if 'word' not in body or 'word_locale' not in body or 'target_locale' not in body: res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 if (body['word_locale'] == 'en') or (body['target_locale'] == 'en'): result = None if body['word_locale'] == 'en': body['word'] = body['word'].lower() result = wordRepo.search_english(body['word'], body['target_locale']) else: result = wordRepo.search_vernacular(body['word'], body['word_locale']) if result == None and DICTIONARY_FALLBACK==True: translate = GoogleTranslate() ''' - call google apis to get the translation - save the translation - return the response ''' log_info('checking google for the searched word ({})'.format(body['word']), AppContext.getContext()) input_word, translated_word, input_locale = translate.translate_text(body['target_locale'], body['word']) log_info('google returned input ({}), translated ({})'.format(input_word, translated_word), AppContext.getContext()) if translated_word == None: res = CustomResponse(Status.SUCCESS.value, None) return res.getres() else: if body['word_locale'] == 'en': result = wordRepo.update(body['word'], 'en', translated_word, body['target_locale']) else: result = wordRepo.update(translated_word, body['target_locale'], body['word'], body['word_locale']) if result == None: res = CustomResponse(Status.SUCCESS.value, None) return res.getres() else: res = CustomResponse(Status.SUCCESS.value, result) return res.getres() else: log_info('returning word search from local database', AppContext.getContext()) res = CustomResponse(Status.SUCCESS.value, result) return res.getres() else: res = CustomResponse(Status.ERR_ENGLISH_MANDATORY.value, None) return res.getresjson(), 400
def translate_func(inputs): inputs = inputs out = {} pred_score = list() sentence_id, node_id = list(), list() input_subwords, output_subwords = list(), list() i_src, tgt = list(), list() tagged_tgt, tagged_src = list(), list() s_id, n_id = [0000], [0000] i_s0_src, i_s0_tgt, i_save = list(), list(), list() i_tmx_phrases = list() try: for i in inputs: s0_src, s0_tgt, save = "NA", "NA", False if all(v in i for v in ['s_id', 'n_id']): s_id = [i['s_id']] n_id = [i['n_id']] if any(v not in i for v in ['src', 'id']): log_info("either id or src missing in some input", MODULE_CONTEXT) out = CustomResponse(Status.ID_OR_SRC_MISSING.value, inputs) return out if any(v in i for v in ['s0_src', 's0_tgt', 'save']): s0_src, s0_tgt, save = handle_custome_input( i, s0_src, s0_tgt, save) i_s0_src.append(s0_src), i_s0_tgt.append( s0_tgt), i_save.append(save) log_info("input sentences:{}".format(i['src']), MODULE_CONTEXT) i_src.append(i['src']) i['src'] = i['src'].strip() src_language, tgt_language = misc.get_src_tgt_langauge(i['id']) if src_language == 'English' and i['src'].isupper(): i['src'] = i['src'].title() i['src'] = misc.convert_digits_preprocess( src_language, i['src']) if special_case_handler.special_case_fits(i['src']): log_info( "sentence fits in special case, returning accordingly and not going to model", MODULE_CONTEXT) translation = special_case_handler.handle_special_cases( i['src'], i['id']) scores = [1] input_sw, output_sw, tag_tgt, tag_src = "", "", translation, i[ 'src'] else: log_info("translating using NMT-model:{}".format(i['id']), MODULE_CONTEXT) prefix, i['src'] = special_case_handler.prefix_handler( i['src']) i['src'], date_original, url_original, num_array, num_map = tagger_util.tag_number_date_url( i['src']) tag_src = (prefix + " " + i['src']).lstrip() i['src'], is_missing_stop_punc = special_case_handler.handle_a_sentence_wo_stop( src_language, i['src']) if i['id'] == 6: "hi-en_exp-2 05-05-20" i['src'] = sentence_processor.indic_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode( i) translation = sentence_processor.moses_detokenizer( translation) elif i['id'] == 7: "english-tamil" translation, scores, input_sw, output_sw = encode_translate_decode( i) elif i['id'] == 10: "english-gujrati" translation, scores, input_sw, output_sw = encode_translate_decode( i) elif i['id'] == 15: "english-kannada" translation, scores, input_sw, output_sw = encode_translate_decode( i) elif i['id'] == 16: "english-telgu" translation, scores, input_sw, output_sw = encode_translate_decode( i) elif i['id'] == 17: "english-malayalam" translation, scores, input_sw, output_sw = encode_translate_decode( i) elif i['id'] == 18: "english-punjabi" translation, scores, input_sw, output_sw = encode_translate_decode( i) elif i['id'] == 42: "english-marathi exp-2" translation, scores, input_sw, output_sw = encode_translate_decode( i) elif i['id'] == 56: "09/12/19-Exp-5.6:" i['src'] = sentence_processor.moses_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode( i) translation = sentence_processor.indic_detokenizer( translation) elif i['id'] == 8: "ta-en 1st" i['src'] = sentence_processor.indic_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode( i) translation = sentence_processor.moses_detokenizer( translation) elif i['id'] == 44: "eng-mr-3rd" i['src'] = sentence_processor.moses_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode( i) translation = sentence_processor.indic_detokenizer( translation) elif i['id'] == 47: "en-kn 2nd" i['src'] = sentence_processor.moses_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode( i) translation = sentence_processor.indic_detokenizer( translation) elif i['id'] == 48: "kn-en 1st" i['src'] = sentence_processor.indic_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode( i) translation = sentence_processor.moses_detokenizer( translation) elif i['id'] == 49: "en-tel 2nd" i['src'] = sentence_processor.moses_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode( i) translation = sentence_processor.indic_detokenizer( translation) elif i['id'] == 50: "tel-en 1st" i['src'] = sentence_processor.indic_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode( i) translation = sentence_processor.moses_detokenizer( translation) elif i['id'] == 51: "en-guj 2nd" i['src'] = sentence_processor.moses_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode( i) translation = sentence_processor.indic_detokenizer( translation) elif i['id'] == 52: "guj-en 1st" i['src'] = sentence_processor.indic_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode( i) translation = sentence_processor.moses_detokenizer( translation) elif i['id'] == 53: "en-punjabi 2nd" i['src'] = sentence_processor.moses_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode( i) translation = sentence_processor.indic_detokenizer( translation) elif i['id'] == 55: "punjabi-en 1st" i['src'] = sentence_processor.indic_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode( i) translation = sentence_processor.moses_detokenizer( translation) elif i['id'] == 57: "en-bengali 3rd" i['src'] = sentence_processor.moses_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode( i) translation = sentence_processor.indic_detokenizer( translation) elif i['id'] == 58: "bengali-en 2nd" i['src'] = sentence_processor.indic_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode( i) translation = sentence_processor.moses_detokenizer( translation) elif i['id'] == 59: "en-malay 2nd" i['src'] = sentence_processor.moses_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode( i) translation = sentence_processor.indic_detokenizer( translation) elif i['id'] == 60: "malay-en 1st" i['src'] = sentence_processor.indic_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode( i) translation = sentence_processor.moses_detokenizer( translation) elif i['id'] == 62: "mr-to-en 2nd" i['src'] = sentence_processor.indic_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode( i) translation = sentence_processor.moses_detokenizer( translation) elif i['id'] == 65: "en-bengali 4th" i['src'] = sentence_processor.moses_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode( i) translation = sentence_processor.indic_detokenizer( translation) elif i['id'] == 66: "bengali-en 3rd" i['src'] = sentence_processor.indic_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode( i) translation = sentence_processor.moses_detokenizer( translation) elif i['id'] == 67: "ta-en 3rd" i['src'] = sentence_processor.indic_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode_v2( i) translation = sentence_processor.moses_detokenizer( translation) elif i['id'] == 68: "en-ta 5th" i['src'] = sentence_processor.moses_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode_v2( i) translation = sentence_processor.indic_detokenizer( translation) elif i['id'] == 69: "hi-en 3rd" i['src'] = sentence_processor.indic_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode_v2( i) translation = sentence_processor.moses_detokenizer( translation) elif i['id'] == 70: "en-hi 15th" i['src'] = sentence_processor.moses_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode_v2( i) translation = sentence_processor.indic_detokenizer( translation) elif i['id'] == 71: "te-en 2nd" i['src'] = sentence_processor.indic_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode_v2( i) translation = sentence_processor.moses_detokenizer( translation) elif i['id'] == 72: "en-te 3rd" i['src'] = sentence_processor.moses_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode_v2( i) translation = sentence_processor.indic_detokenizer( translation) elif i['id'] == 73: "ml-en 2nd" i['src'] = sentence_processor.indic_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode_v2( i) translation = sentence_processor.moses_detokenizer( translation) elif i['id'] == 74: "en-ml 3rd" i['src'] = sentence_processor.moses_tokenizer(i['src']) translation, scores, input_sw, output_sw = encode_translate_decode_v2( i) translation = sentence_processor.indic_detokenizer( translation) else: log_info( "Unsupported model id: {} for given input".format( i['id']), MODULE_CONTEXT) raise Exception( "Unsupported Model ID - id: {} for given input". format(i['id'])) translation = oc.postprocess_a_sentence_wo_stop( tgt_language, translation, is_missing_stop_punc) translation = (prefix + " " + translation).lstrip() translation = translation.replace("▁", " ") translation = misc.regex_pass(translation, [ patterns['p8'], patterns['p9'], patterns['p4'], patterns['p5'], patterns['p6'], patterns['p7'] ]) tag_tgt = translation translation = tagger_util.replace_tags_with_original( translation, date_original, url_original, num_array, num_map) translation = oc.cleaner(tag_src, translation, i['id']) translation = misc.convert_digits_postprocess( tgt_language, translation) log_info( "translate_function-experiment-{} output: {}".format( i['id'], translation), MODULE_CONTEXT) tgt.append(translation) pred_score.append(scores) sentence_id.append(s_id[0]), node_id.append(n_id[0]) input_subwords.append(input_sw), output_subwords.append( output_sw) tagged_tgt.append(tag_tgt), tagged_src.append(tag_src) i_tmx_phrases.append(i.get("tmx_phrases", [])) out['response_body'] = [{ "tgt": tgt[i], "pred_score": pred_score[i], "s_id": sentence_id[i], "input_subwords": input_subwords[i], "output_subwords": output_subwords[i], "n_id": node_id[i], "src": i_src[i], "tagged_tgt": tagged_tgt[i], "tagged_src": tagged_src[i], "save": i_save[i], "s0_src": i_s0_src[i], "s0_tgt": i_s0_tgt[i], "tmx_phrases": i_tmx_phrases[i] } for i in range(len(tgt))] out = CustomResponse(Status.SUCCESS.value, out['response_body']) except ServerModelError as e: status = Status.SEVER_MODEL_ERR.value status['why'] = str(e) log_exception( "ServerModelError error in TRANSLATE_UTIL-translate_func: {} and {}" .format(e, sys.exc_info()[0]), MODULE_CONTEXT, e) out = CustomResponse(status, inputs) except Exception as e: status = Status.SYSTEM_ERR.value status['why'] = str(e) log_exception( "Unexpected error:%s and %s" % (e, sys.exc_info()[0]), MODULE_CONTEXT, e) out = CustomResponse(status, inputs) return out