def post(self): body = request.get_json() if "keys" not in body or not body["keys"]: res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 keys = body["keys"] log_info("Fetching sentences from redis store", AppContext.getContext()) try: result = sentenceRepo.get_sentences_from_store(keys) if result == None: res = CustomResponse( Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 res = CustomResponse(Status.SUCCESS.value, result) return res.getres() except Exception as e: log_exception( "Exception while fetching sentences from redis store ", AppContext.getContext(), e) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400
def get_sentence_by_s_id(self, user_id, s_id): try: collections = get_db()[DB_SCHEMA_NAME] docs = collections.aggregate([{ '$match': { 'data.tokenized_sentences.s_id': s_id } }, { '$project': { 'tokenized_sentences': { '$filter': { 'input': '$data.tokenized_sentences', 'as': 'ts', 'cond': { '$eq': ['$$ts.s_id', s_id] } } } } }]) for doc in docs: sentence = doc['tokenized_sentences'][0] if 's0_tgt' not in list(sentence.keys()): sentence['s0_tgt'] = sentence['tgt'] if 's0_src' not in list(sentence.keys()): sentence['s0_src'] = sentence['src'] return sentence return None except Exception as e: log_exception("db connection exception ", AppContext.getContext(), e) return None
def GoogleVisionOCR(app_context, base_dir=config.BASE_DIR): log_debug( 'google vision ocr process starting {}'.format( app_context.application_context), app_context.application_context) try: response, langs = process_input(app_context, base_dir) if response != None: return { 'code': 200, 'message': 'request completed', 'rsp': response, 'langs': langs } else: return { 'code': 400, 'message': 'Error occured during google vision ocr', 'rsp': None } except Exception as e: log_exception("Error occured during google vision ocr ", app_context.application_context, e) return { 'code': 400, 'message': 'Error occured during google vision ocr ', 'rsp': None }
def TextDetection(app_context, base_dir=config.BASE_DIR): log_debug( 'Block merger starting processing {}'.format( app_context.application_context), app_context.application_context) try: words, lines, images = get_text(app_context, base_dir) response = get_response(app_context, words, lines, images) if response != None: return { 'code': 200, 'message': 'request completed', 'rsp': response } else: return { 'code': 400, 'message': 'Error occured during pdf to blocks conversion', 'rsp': None } except Exception as e: log_exception( "Error occured during word detection conversion" + str(e), app_context.application_context, e) return { 'code': 400, 'message': 'Error occured during pdf to blocks conversion', 'rsp': None }
def post(self): body = request.get_json() user_id = request.headers.get('userid') if user_id == None: user_id = request.headers.get('x-user-id') pages = body['pages'] file_locale = '' if 'file_locale' in body: file_locale = body['file_locale'] job_id = '' if 'job_id' in body: job_id = body['job_id'] record_id = None if 'record_id' in body: record_id = body['record_id'] src_lang = None if 'src_lang' in body: src_lang = body['src_lang'] tgt_lang = None if 'tgt_lang' in body: tgt_lang = body['tgt_lang'] if 'pages' not in body or user_id is None or record_id == None or src_lang == None or tgt_lang == None: AppContext.addRecordID(record_id) log_info( 'Missing params in FileContentSaveResource {}, user_id:{}'. format(body, user_id), AppContext.getContext()) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 AppContext.addRecordID(record_id) log_info( "FileContentSaveResource record_id ({}) for user ({})".format( record_id, user_id), AppContext.getContext()) try: if fileContentRepo.store(user_id, file_locale, record_id, pages, src_lang, tgt_lang) == False: res = CustomResponse( Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400 AppContext.addRecordID(record_id) log_info( "FileContentSaveResource record_id ({}) for user ({}) saved". format(record_id, user_id), AppContext.getContext()) res = CustomResponse(Status.SUCCESS.value, None) return res.getres() except Exception as e: AppContext.addRecordID(record_id) log_exception("FileContentSaveResource ", AppContext.getContext(), e) res = CustomResponse(Status.ERR_GLOBAL_MISSING_PARAMETERS.value, None) return res.getresjson(), 400
def register_job(self, object_in): job_id = util.generate_job_id() try: response = { "input": object_in, "jobID": job_id, "status": "STARTED" } self.update_job_details(response, True) prod_res = producer.push_to_queue(response, jsonalign_job_topic) if prod_res: self.update_job_status("FAILED", object_in, prod_res["message"]) response = { "input": object_in, "jobID": job_id, "status": "FAILED", "error": prod_res } return response except Exception as e: log_exception( "Exception while registering the alignment job: " + str(e), object_in, e) return None
def process_info(app_context, base_dir): try: files = get_files(app_context.application_context) file_images = [] output = [] for index, file_new in enumerate(files): start_time = time.time() file = get_json(file_new['file']['name'], base_dir)[0] file_properties = File(file) ocr_level, lang = get_ocr_config(file_new, file_properties.get_pages()) file = preprocess_file(file_properties, lang, ocr_level) file['file'] = file_new['file'] file['config'] = file_new['config'] output.append(file) output[index]['status'] = { 'code': 200, 'message': "tesseract ocr successful" } end_time = time.time() extraction_time = (end_time - start_time) / len( file_properties.get_pages()) log_info( 'tesseract ocr per page completed in {}'.format( extraction_time), app_context.application_context) app_context.application_context["outputs"] = output log_info("successfully completed tesseract ocr", None) except Exception as e: log_exception("Error occured during tesseract ocr ", app_context.application_context, e) return None return app_context.application_context
def replace_tags_with_original(text, date_original, url_dict, num_array, num_map): ''' Replaces dates,urls and numbers in the text with the original values in place of the tags ''' try: res = text if len(text) == 0: return "" for url_tag, url in url_dict.items(): res = text.replace(url_tag, url) log_info("response after url and date replacemnt:{}".format(res), MODULE_CONTEXT) if len(num_map) == 0: ''' handling the case when model outputs a tag which is not in tagged_src(src is without any number''' for char in reversed(hindi_numbers): res = re.sub(r'NnUuMm' + char, "", res) num_map.reverse() for item in num_map: res = res.replace(item['tag'], str(item['no.']), 1) res = remove_extra_tags(res) log_info("response after tags replacement:{}".format(res), MODULE_CONTEXT) return res except Exception as e: log_exception( "Error in parent except block of replace_tags_with_original_1 function, returning tagged output:{}" .format(e), MODULE_CONTEXT, e) return res
def update_sentence_by_s_id(self, record_id, user_id, sentence): SENTENCE_KEYS = ['n_id', 'pred_score', 's_id', 'src', 'tgt'] try: collections = get_db()[DB_SCHEMA_NAME] results = collections.update({'$and': [{'record_id': record_id}, {'created_by': user_id}, { 'data.tokenized_sentences': {'$elemMatch': {'s_id': {'$eq': sentence['s_id']}}}}]}, { '$set': { "data.tokenized_sentences.$.n_id" : sentence['n_id'], "data.tokenized_sentences.$.src" : sentence['src'], "data.tokenized_sentences.$.tgt" : sentence['tgt'], "data.tokenized_sentences.$.save" : sentence['save'], "data.tokenized_sentences.$.bleu_score" : sentence['bleu_score'], "data.tokenized_sentences.$.time_spent_ms" : sentence['time_spent_ms'], "data.tokenized_sentences.$.rating_score" : sentence['rating_score'] } }, upsert=False) if 'writeError' in list(results.keys()): return False return True except Exception as e: log_exception("db connection exception ", AppContext.getContext(), e) return False
def predict_primanet(self, image, craft_coords): try: #print(image,"iiiiiiiiiiiiiiiiiiiiiiiiii") #image = cv2.imread("/home/naresh/anuvaad/anuvaad-etl/anuvaad-extractor/document-processor/layout-detector/prima/"+image) image = cv2.imread(image) height, width, channels = image.shape #image = cv2.imread(image) #image = clean_image(image) #image = image[..., ::-1] layout = model_primalaynet.detect(image) bbox, tag, score = self.prima_region(layout) ############### craft refinement logic bbox, tag, score = self.prima_craft_refinement( bbox, craft_coords, tag, score) layouts = self.update_box_format(bbox, tag, score) flag = True while flag == True: layouts, flag = self.merge_remove_overlap( layouts, height, width) layouts = cell_layout(layouts, image) return layouts except Exception as e: log_exception("Error occured during prima layout detection ", app_context.application_context, e) return None
def update_num_arr(num_array, zero_prefix_num, i_zero, num_array_orignal): ''' This is function is meant to handle zero prefix numbers like 09 or 000 which are converted to 9 or 0 during processing, We want them in original form i.e 09 zero_prefix_num: this is the num that has to be transformed back with zero prefix(from 9 to 09, or, 0 to 000 originally) i_zero: indices of numbers with zero prefix in num_array_orignal ind: indices of zero prefix numbers in num_array descending Note: this function needs some fixing ''' try: num_array_o = None num_array_o = num_array[:] ind = list() zero_prefix_num = np.unique(np.array(zero_prefix_num)) for i in zero_prefix_num: for j, m in enumerate(num_array): if m == i: ind.append(j) for k, l in enumerate(ind): num_array[l] = num_array_orignal[i_zero[k]] return num_array except Exception as e: log_exception( "Error in handle_date_url:update_num_arr,returning incoming num_array:{}" .format(e), MODULE_CONTEXT, e) return num_array_o
def wrapper(*args, **kwargs): try: output = method(*args, **kwargs) return output except Exception as e: log_exception('Error in response generation {}'.format(e), app_context.application_context, e) return None
def encode_itranslate_decode(i, src_lang, tgt_lang): try: i["src"] = [i["src"]] i["target_prefix"] = [i["target_prefix"]] translator = load_models.loaded_models[i["id"]] source_bpe = load_models.bpes[i["id"]][0] target_bpe = load_models.bpes[i["id"]][1] i["src"] = sentence_processor.preprocess(i["src"], src_lang) i["src"] = apply_bpe(i["src"], source_bpe) # apply bpe to constraints with target bpe prefix = apply_bpe(i["target_prefix"], target_bpe) i_final = sentence_processor.apply_lang_tags(i["src"], src_lang, tgt_lang) translation = translator.translate(i_final, constraints=prefix) translation = sentence_processor.postprocess(translation, tgt_lang) return translation except Exception as e: log_exception( "Unexpexcted error in encode_itranslate_decode: {} and {}".format( e, sys.exc_info()[0] ), MODULE_CONTEXT, e, ) raise
def predict_primanet(self, image, craft_coords): try: image = cv2.imread(image) image = image[..., ::-1] layout = model_primalaynet.detect(image) boxes, coords, layout_class = self.prima_region( layout, craft_coords) final_coord = [] for idx, coord in enumerate(coords): temp_dict = {} vert = [] temp_dict['identifier'] = str(uuid.uuid4()) vert.append({'x': coord[0], 'y': coord[1]}) vert.append({'x': coord[2], 'y': coord[1]}) vert.append({'x': coord[2], 'y': coord[3]}) vert.append({'x': coord[0], 'y': coord[3]}) temp_dict['boundingBox'] = {} temp_dict['boundingBox']["vertices"] = vert temp_dict['class'] = self.class_mapping(layout_class[idx]) #temp_dict['text_left'] = coord[0]; temp_dict['text_top'] = coord[1] #temp_dict['text_width'] = abs((coord[2]-coord[0])); temp_dict['text_height'] = abs((coord[3]-coord[1])) final_coord.append(temp_dict) return final_coord except Exception as e: log_exception("Error occured during prima layout detection ", app_context.application_context, e) return None
def get_document_total_page_count(self, record_id): try: collections = get_db()[DB_SCHEMA_NAME] results = collections.aggregate([{ '$match': { 'record_id': record_id } }, { '$group': { '_id': '$record_id', 'page_count': { '$max': "$page_no" } } }]) count = 0 for result in results: count = result['page_count'] break return count except Exception as e: log_exception("db connection exception ", AppContext.getContext(), e) return 0
def encode_translate_decode_v2(i): try: log_info("Inside encode_translate_decode_v2 function", MODULE_CONTEXT) model_path, sp_encoder, sp_decoder = get_model_path(i['id']) translator = load_models.loaded_models[i['id']] i['src'] = sp.encode_line_v2(sp_encoder, i['src']) log_info("SP encoded sent: %s" % str(i['src']), MODULE_CONTEXT) input_sw = str(i['src']) m_out = translator.translate_batch([i['src']], beam_size=5, num_hypotheses=1) output_sw = " ".join(m_out[0][0]['tokens']) log_info("output from model: {}".format(output_sw), MODULE_CONTEXT) scores = m_out[0][0]['score'] translation = multiple_hypothesis_decoding_v2(m_out[0], sp_decoder)[0] log_info("SP decoded sent: %s" % str(translation), MODULE_CONTEXT) return translation, scores, input_sw, output_sw except ServerModelError as e: log_exception( "ServerModelError error in encode_translate_decode_v2: {} and {}". format(e, sys.exc_info()[0]), MODULE_CONTEXT, e) raise except Exception as e: log_exception( "Unexpexcted error in encode_translate_decode_v2: {} and {}". format(e, sys.exc_info()[0]), MODULE_CONTEXT, e) raise
def get_nmt_url_body(self, block_translate_input, nmt_txt): model = block_translate_input["input"]["model"] nmt_in = { "src_list": nmt_txt, "source_language_code": model["source_language_code"], "target_language_code": model["target_language_code"], "model_id": model["model_id"] } try: host = model["connection_details"]["translation"]["host"] api_host = os.environ.get(host, 'NA') endpoint = model["connection_details"]["translation"][ "api_endpoint"] api_endpoint = os.environ.get(endpoint, 'NA') if api_host == "NA" or api_endpoint == "NA": log_info("Falling back to Anuvaad NMT translate URL....", block_translate_input) return nmt_translate_url, nmt_in url = api_host + api_endpoint return url, nmt_in except Exception as e: log_exception( "Exception while fetching API conn details: {}".format(str(e)), block_translate_input, None) log_info("Falling back to Anuvaad NMT translate URL....", block_translate_input) return nmt_translate_url, nmt_in
def get_segmented_regions(app_context,base_dir) : try: files = get_files(app_context.application_context) output = [] for index,file in enumerate(files): file = get_json(base_dir, file['file']['name']) file_properties = File(file) pages = file_properties.get_pages() page_counts = len(pages) start_time = time.time() for page_index in range(page_counts): print('processing for page : ', page_index) # page_lines = file_properties.get_lines(page_index) # page_regions = file_properties.get_regions(page_index) # page_words = file_properties.get_words(page_index) #font_meta = font_properties(file_properties.get_page(page_index)) font_meta = [] #page_regions = region_unifier.region_unifier(page_lines,page_regions) #file_properties.set_regions(page_index, segment_regions(page_words,page_lines,page_regions)) file_properties.set_font_properties(page_index,font_meta) output.append(file_properties.get_file()) output[index]['status']= {'message':"block-segmenter successful"} end_time = time.time() extraction_time = (end_time - start_time)/page_counts log_info('block segmentation per page completed in {}'.format(extraction_time), app_context.application_context) app_context.application_context["outputs"] =output log_info("successfully completed block segmentation", None) except Exception as e: log_exception("Error occured during block segmentation ", app_context.application_context, e) return None return app_context.application_context
def process_tokenization_kf(): file_ops = FileOperation() DOWNLOAD_FOLDER =file_ops.file_download(config.download_folder) # instatiation of consumer for respective topic try: consumer_class = Consumer(config.input_topic, config.bootstrap_server) consumer = consumer_class.consumer_instantiate() log_info("process_tokenization_kf : trying to receive value from consumer ", None) for msg in consumer: data = msg.value log_info("process_tokenization_kf : received input json from input topic consumer ", data) task_id = str("TOK-" + str(time.time()).replace('.', '')[0:13]) task_starttime = eval(str(time.time()).replace('.', '')[0:13]) input_files, workflow_id, jobid, tool_name, step_order, user_id = file_ops.json_input_format(data) response_gen = Response(data, DOWNLOAD_FOLDER) file_value_response = response_gen.workflow_response(task_id, task_starttime) if "errorID" not in file_value_response.keys(): producer = Producer() producer.push_data_to_queue(config.output_topic, file_value_response, data, task_id) else: log_error("process_tokenization_kf : error send to error handler", data, None) except KafkaConsumerError as e: response_custom = CustomResponse(Status.ERR_STATUS.value, None, None) response_custom.status_code['message'] = str(e) file_ops.error_handler(response_custom.status_code, "KAFKA_CONSUMER_ERROR", True) log_exception("process_tokenization_kf : Consumer didn't instantiate", None, e) except KafkaProducerError as e: response_custom = e.code response_custom['message'] = e.message file_ops.error_handler(response_custom, "KAFKA_PRODUCER_ERROR", True) log_exception("process_tokenization_kf : response send to topic %s"%(config.output_topic), data, e)
def handle_sentences_wo_stop(language, sentence_array): ''' Handles sentences in the array which do not have a sentence ending puncuation by adding it. Used in batch translation. ''' try: if language is None: return sentence_array, [] else: log_info("Inside handle_sentences_wo_stop", MODULE_CONTEXT) stop_puncs = misc.get_language_stop_puncs(language) full_stop_or_purnviram = stop_puncs[0] sent_indices_wo_stop = [] for i, sentence in enumerate(sentence_array): if misc.is_sentence_wo_stop(sentence, stop_puncs): sent_indices_wo_stop.append(i) sentence_array[i] = misc.add_stop_punc( sentence_array[i], full_stop_or_purnviram) return sentence_array, sent_indices_wo_stop except Exception as e: log_exception("Error in handle_sentences_wo_stop: {}".format(e), MODULE_CONTEXT, e) return sentence_array, []
def TesseractOCR(app_context, base_dir=config.BASE_DIR): log_debug( 'tesseract ocr process starting {}'.format( app_context.application_context), app_context.application_context) try: response = process_info(app_context, base_dir) if response != None: return { 'code': 200, 'message': 'request completed', 'rsp': response } else: return { 'code': 400, 'message': 'Error occured during tesseract ocr', 'rsp': None } except Exception as e: log_exception("Error occured during tesseract ocr ", app_context.application_context, e) return { 'code': 400, 'message': 'Error occured during tesseract ocr ', 'rsp': None }
def consumer_instantiate(self): try: consumer = KafkaConsumer(self.topic_name, bootstrap_servers = list((self.server_address).split(",")), auto_offset_reset = 'latest', group_id = config.CONSUMER_GROUP, enable_auto_commit=True) log_info("consumer_instantiate : Consumer returned for topic: %s"%(self.topic_name), None) return consumer except Exception as e: log_exception("consumer_instantiate : error occured for consumer topic: %s"%(self.topic_name), None, e)
def save_page_res(res, file_name): try: tmp_file = copy.deepcopy(res['rsp']) del tmp_file['input'] tmp_file['files'] = res['rsp']['outputs'] del tmp_file['outputs'] json_file_name = file_name['output'][0]['outputFile'] for file in [tmp_file]: recordID = file['jobID'] + '|' + json_file_name page_idx = 0 total_pages = len(file['files'][0]['pages']) file['files'][0]['config'] = copy.deepcopy( file['files'][0]['config']['OCR']) save_file = copy.deepcopy(file) save_file['recordID'] = recordID while page_idx < total_pages: pages = file['files'][0]['pages'][page_idx:page_idx + SAVE_NO_PAGE] save_file['files'][0]['pages'] = pages page_idx = page_idx + SAVE_NO_PAGE log_info( "started saving data to database with record id: " + str(recordID), app_context.application_context) rsp = requests.post(SAVE_URL, json=save_file) log_info( "successfully saved data to database with record id: " + str(recordID), app_context.application_context) except Exception as e: log_exception("Error occured during saving page response", app_context.application_context, e)
def LayoutDetection(app_context, base_dir=config.BASE_DIR): log_debug( 'layout detection process starting {}'.format( app_context.application_context), app_context.application_context) try: response = get_layout(app_context) if response != None: return { 'code': 200, 'message': 'request completed', 'rsp': response } else: return { 'code': 400, 'message': 'Error occured during layout detection', 'rsp': None } except Exception as e: log_exception("Error occured during layout detection ", app_context.application_context, e) return { 'code': 400, 'message': 'Error occured during layout detection ', 'rsp': None }
def get_response(app_context, words, lines, images): output = [] files = get_files(app_context.application_context) for file_index, file in enumerate(files): file_prperties = FileOutput(file) try : for page_index, page in enumerate(images[file_index]): if len(words)!=0: page_words = words[file_index][page_index] else: page_words = [] if len(lines)!=0: page_lines = lines[file_index][page_index] else: page_lines = [] page_properties = Page(page_words, page_lines, page) file_prperties.set_page(page_properties.get_page()) file_prperties.set_page_info(page) file_prperties.set_staus(True) except Exception as e: file_prperties.set_staus(False) log_exception("Error occured during response generation" + str(e), app_context.application_context, e) output.append(file_prperties.get_file()) app_context.application_context['outputs'] = output return app_context.application_context
def get_text(path, page_dict, font_info): #path = "/home/naresh/anuvaad/anuvaad-etl/anuvaad-extractor/document-processor/ocr/gv-document-digitization/"+path try: if config.CLEAN_BACKGROUND: img = cv2.imread(path) img[175 < img] = 255 masked_path = path.split('.jpg')[0] + "_watermarks.jpg" cv2.imwrite(masked_path, img) else: masked_path = path with io.open(masked_path, 'rb') as image_file: content = image_file.read() image = vision.types.Image(content=content) response = client.document_text_detection(image=image) page_dict, page_lines = get_document_bounds( response.full_text_annotation, page_dict, font_info) return page_dict, page_lines except Exception as e: log_exception("Error occured during text_extraction {}".format(e), app_context.application_context, e) return None, None
def process_input(app_context, base_dir): try: files = get_files(app_context.application_context) output_files = [] langs = [] for index, file in enumerate(files): file_output = {"status": {}} file = get_json(base_dir, file['file']['name'])[0] file_properties = File(file) if "page_info" in file.keys(): page_paths = file_properties.get_pages() else: page_paths = doc_pre_processing(file['file']['name'], config.BASE_DIR) page_res = text_extraction(file_properties, page_paths, file) output_files.append(page_res) langs.append(file_properties.get_language()) app_context.application_context["outputs"] = output_files log_info("successfully completed google vision ocr", None) except Exception as e: log_exception("Error occured during google vision ocr", app_context.application_context, e) return None, None return app_context.application_context, langs
def vision_ocr_request_worker(): file_ops = FileOperation() DOWNLOAD_FOLDER = file_ops.create_file_download_dir(config.download_folder) producer_tok = Producer(config.bootstrap_server) log_info("vision_ocr_request_worker : starting thread ", LOG_WITHOUT_CONTEXT) while True: data = processQueue.get(block=True) ################# task_id = str("vision_ocr" + str(time.time()).replace('.', '')) ################### task_starttime = str(time.time()).replace('.', '') input_files, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format(data) log_info("vision_ocr_request_worker processing -- received message "+str(jobid), data) try: response_gen = Response(data, DOWNLOAD_FOLDER) file_value_response = response_gen.workflow_response(task_id, task_starttime, False) if file_value_response != None: if "errorID" not in file_value_response.keys(): push_output(producer_tok, config.output_topic, file_value_response, jobid, task_id,data) log_info("vision_ocr_request_worker : response send to topic %s"%(config.output_topic), LOG_WITHOUT_CONTEXT) else: log_info("vision_ocr_request_worker : error send to error handler", data) log_info('vision_ocr_request_worker - request in internal queue {}'.format(Queue.qsize()), data) processQueue.task_done() except Exception as e: log_exception("vision_ocr_request_worker ", LOG_WITHOUT_CONTEXT, e) controlQueue.put(1)
def core_consume(): try: wfmservice = WFMService() topics = [anu_etl_wfm_core_topic] consumer = instantiate(topics) rand_str = ''.join( random.choice(string.ascii_letters) for i in range(4)) prefix = "WFM-Core-" + "(" + rand_str + ")" log_info(prefix + " | Running..........", None) log_info(prefix + " | Topics: " + str(topics), None) while True: for msg in consumer: data = {} try: if msg: data = msg.value log_info( prefix + " | Received on Topic: " + msg.topic + " | Partition: " + str(msg.partition), data) wfmservice.initiate_wf(data) except Exception as e: log_exception( prefix + " | Exception while consuming: " + str(e), data, e) post_error("WFM_CORE_CONSUMER_ERROR", "Exception while consuming: " + str(e), None) except Exception as e: log_exception( "Exception while starting the wfm core consumer: " + str(e), None, e) post_error("WFM_CONSUMER_ERROR", "Exception while starting wfm core consumer: " + str(e), None)
def purnaviram_applier(src,tgt): ''' For english to hindi translation ''' try: if tgt is None or len(tgt.split()) == 0: return tgt if len(src.split()) < 5: return tgt if src.endswith('.') and tgt.endswith('ред'): return tgt elif src.endswith('.') and tgt[-1] != 'ред': if tgt.endswith('.'): log_info("Replacing '.' with purnaviram",MODULE_CONTEXT) tgt = tgt[:-1] + str("ред") else: log_info("Adding the missing purnaviram",MODULE_CONTEXT) tgt = tgt + str("ред") return tgt else: return tgt except Exception as e: log_exception("Error in purnaviram applier, returning original tgt: {}".format(e),MODULE_CONTEXT,e) return tgt