def consume_nmt(): try: utils = TranslatorUtils() topics = utils.get_topics_from_models() topics.append(anu_nmt_output_topic) consumer = instantiate(topics) service = TranslatorService() rand_str = ''.join(random.choice(string.ascii_letters) for i in range(4)) prefix = "Translator-NMT-" + "(" + rand_str + ")" log_info(prefix + " Running..........", None) while True: for msg in consumer: data = {} try: data = msg.value if data: log_info(prefix + " | Received on Topic: " + msg.topic + " | Partition: " + str(msg.partition), data) service.process_nmt_output(data) else: break except Exception as e: log_exception(prefix + " Exception in translator nmt while consuming: " + str(e), data, e) post_error("TRANSLATOR_CONSUMER_ERROR", "Exception in translator while consuming: " + str(e), None) except Exception as e: log_exception("Exception while starting the translator nmt consumer: " + str(e), None, e) post_error("TRANSLATOR_CONSUMER_EXC", "Exception while starting translator consumer: " + str(e), None)
def core_consume(): try: wfmservice = WFMService() topics = [anu_etl_wfm_core_topic] consumer = instantiate(topics) rand_str = ''.join( random.choice(string.ascii_letters) for i in range(4)) prefix = "WFM-Core-" + "(" + rand_str + ")" log_info(prefix + " | Running..........", None) log_info(prefix + " | Topics: " + str(topics), None) while True: for msg in consumer: data = {} try: if msg: data = msg.value log_info( prefix + " | Received on Topic: " + msg.topic + " | Partition: " + str(msg.partition), data) wfmservice.initiate_wf(data) except Exception as e: log_exception( prefix + " | Exception while consuming: " + str(e), data, e) post_error("WFM_CORE_CONSUMER_ERROR", "Exception while consuming: " + str(e), None) except Exception as e: log_exception( "Exception while starting the wfm core consumer: " + str(e), None, e) post_error("WFM_CONSUMER_ERROR", "Exception while starting wfm core consumer: " + str(e), None)
def validate_tool_response(self, tool_response, tool_details, wf_input): if not tool_response: log_error("Error from the tool: " + str(tool_details["name"]), wf_input, None) error = post_error( "ERROR_FROM_TOOL", "Error from the tool: " + str(tool_details["name"]), None) client_output = self.get_wf_details_sync(wf_input, None, True, error) self.update_job_details(client_output, False) log_info("Job FAILED, jobID: " + str(wf_input["jobID"]), wf_input) return client_output else: fail_msg = None if 'error' in tool_response.keys(): if tool_response["error"]: fail_msg = "Error from the tool: " + str( tool_details["name"]) + " | Cause: " + str( tool_response["error"]) elif 'http' in tool_response.keys(): if 'status' in tool_response["http"]: if tool_response["http"]["status"] != 200: fail_msg = "Error from the tool: " + str( tool_details["name"]) + " | Cause: " + str( tool_response["why"]) if fail_msg: log_error(fail_msg, wf_input, None) error = post_error("ERROR_FROM_TOOL", fail_msg, None) client_output = self.get_wf_details_sync( wf_input, None, True, error) self.update_job_details(client_output, False) log_info("Job FAILED, jobID: " + str(wf_input["jobID"]), wf_input) return client_output
def post(self): userID = request.headers.get('userID') if userID == None: userID = request.headers.get('x-user-id') body = request.get_json() if 'words' not in body and not body['words']: return post_error("Data Missing", "words are required", None), 400 words = body['words'] AppContext.adduserID(userID) log_info( "DigitalDocumentUpdateWordResource for user {}, number words to update {} request {}" .format(userID, len(words), body), AppContext.getContext()) try: result = digitalRepo.update_words(userID, words) if result == True: res = CustomResponse(Status.SUCCESS.value, words) return res.getres() # return post_error("Data Missing","Failed to update word since data is missing",None), 400 return result, 400 except Exception as e: log_exception( "Exception in DigitalDocumentUpdateWordResource |{}".format( str(e)), AppContext.getContext(), e) return post_error("Data Missing", "Failed to update word since data is missing", None), 400
def post(self): body = request.get_json() if 'files' not in body or not body['files']: return post_error("Data Missing", "files is required", None), 400 if 'recordID' not in body or not body['recordID']: return post_error("Data Missing", "recordID is required", None), 400 # if 'jobID' not in body or not body['jobID']: # return post_error("Data Missing","jobID is required",None), 400 files = body['files'] userID = body['metadata']['userID'] recordID = body['recordID'] if not userID: return post_error("Data Missing", "userID is required", None), 400 AppContext.addRecordID(recordID) log_info( 'Missing params in DigitalDocumentSaveResource {}, user_id:{}, record_id:{}' .format(body, userID, recordID), AppContext.getContext()) try: AppContext.addRecordID(recordID) log_info( 'DigitalDocumentSaveResource request received, user_id:{}, record_id:{}' .format(userID, recordID), AppContext.getContext()) result = digitalRepo.store(userID, recordID, files) if result == False: log_info( 'Missing params in DigitalDocumentSaveResource {}, user_id:{}, record_id:{}' .format(body, userID, recordID), AppContext.getContext()) return post_error("Data Missing", "Failed to store doc since data is missing", None), 400 elif result is None: AppContext.addRecordID(recordID) log_info( 'DigitalDocumentSaveResource request completed, user_id:{}, record_id:{}' .format(userID, recordID), AppContext.getContext()) res = CustomResponse(Status.SUCCESS.value, None) return res.getres() else: log_info( 'Missing params in DigitalDocumentSaveResource {}, user_id:{}, record_id:{}' .format(body, userID, recordID), AppContext.getContext()) return result, 400 except Exception as e: AppContext.addRecordID(recordID) log_exception( "Exception on save document | DigitalDocumentSaveResource :{}". format(str(e)), AppContext.getContext(), e) return post_error("Data Missing", "Failed to store doc since data is missing", None), 400
def text_translate(self, text_translate_input): text_translate_input["jobID"] = utils.generate_task_id() text_translate_input["startTime"] = eval( str(time.time()).replace('.', '')[0:13]) log_info("Text Translation started....", text_translate_input) output = text_translate_input output["status"], output["output"] = "FAILED", None try: text_for_nmt, ch_res = self.get_stored_hypothesis_ch( text_translate_input["input"]["textList"], text_translate_input) if text_for_nmt: url, body = self.get_nmt_url_body(text_translate_input, text_for_nmt) log_info("NMT IT URI - " + str(url), text_translate_input) nmt_response = utils.call_api( url, "POST", body, None, text_translate_input["metadata"]["userID"]) if nmt_response: if 'status' in nmt_response.keys(): if 'statusCode' in nmt_response["status"].keys(): if nmt_response["status"]["statusCode"] != 200: output["error"] = post_error( "TRANSLATION_FAILED", "Error while translating: " + str(nmt_response["status"]["message"]), None) return output ch_res.extend(nmt_response["data"]) nmt_predictions = self.dedup_hypothesis(ch_res) output["input"], output["status"] = None, "SUCCESS" output["taskEndTime"], output["output"] = eval( str(time.time()).replace('.', '')[0:13]), { "predictions": nmt_predictions } else: output["taskEndTime"] = eval( str(time.time()).replace('.', '')[0:13]) output["error"] = post_error("TRANSLATION_FAILED", "Error while translating", None) else: ch_predictions = self.dedup_hypothesis(ch_res) output["input"], output["status"] = None, "SUCCESS" output["taskEndTime"], output["output"] = eval( str(time.time()).replace('.', '')[0:13]), { "predictions": ch_predictions } log_info("Text Translation completed!", text_translate_input) return output except Exception as e: log_exception("Exception while translating: " + str(e), text_translate_input, None) output["error"] = post_error( "TRANSLATION_FAILED", "Exception while translating: " + str(e), None) output["taskEndTime"] = eval( str(time.time()).replace('.', '')[0:13]) return output
def get(self): parser = reqparse.RequestParser() parser.add_argument( 'start_page', type=int, location='args', help= 'start_page can be 0, set start_page & end_page as 0 to get entire document', required=True) parser.add_argument( 'end_page', type=int, location='args', help= 'end_page can be 0, set start_page & end_page as 0 to get entire document', required=True) parser.add_argument('recordID', type=str, location='args', help='record_id is required', required=True) args = parser.parse_args() AppContext.addRecordID(args['recordID']) log_info( "DigitalDocumentGetResource record_id {} ".format( args['recordID']), AppContext.getContext()) try: result = digitalRepo.get_pages(args['recordID'], args['start_page'], args['end_page']) if result == False: return post_error("Data Missing", "Failed to get pages since data is missing", None), 400 AppContext.addRecordID(args['recordID']) log_info( "DigitalDocumentGetResource record_id {} has {} pages".format( args['recordID'], result['total']), AppContext.getContext()) res = CustomResponse(Status.SUCCESS.value, result['pages'], result['total']) return res.getres() except Exception as e: AppContext.addRecordID(args['recordID']) log_exception( "Exception in DigitalDocumentGetResource |{}".format(str(e)), AppContext.getContext(), e) return post_error("Data Missing", "Failed to get pages since data is missing", None), 400
def common_validate(self, data): if data is None: return post_error("INPUT_NOT_FOUND", "Input is empty", None) if 'workflowCode' not in data.keys(): return post_error("WOFKLOWCODE_NOT_FOUND", "workflowCode is mandatory", None) else: configs = wfmutils.get_configs() if data["workflowCode"] not in configs.keys(): return post_error( "WORKFLOW_NOT_FOUND", "There's no workflow configured against this workflowCode", None)
def glossary_create(self, object_in): try: if 'org' not in object_in.keys(): return post_error("ORG_NOT_FOUND", "org is mandatory", None) if 'context' not in object_in.keys(): return post_error("CONTEXT_NOT_FOUND", "context is mandatory", None) else: if 'translations' not in object_in.keys(): return post_error("TRANSLATIONS_NOT_FOUND", "Translations are mandatory", None) else: if not object_in["translations"]: return post_error("TRANSLATIONS_EMPTY", "Translations cannot be empty", None) else: for translation in object_in["translations"]: if 'src' not in translation.keys(): return post_error("SRC_NOT_FOUND", "src is mandatory for every translation", None) if 'tgt' not in translation.keys(): return post_error("TGT_NOT_FOUND", "tgt is mandatory for every translation", None) if 'locale' not in translation.keys(): return post_error("LOCALE_NOT_FOUND", "locale is mandatory for every translation", None) for translation in object_in["translations"]: translation["id"] = uuid.uuid4() translation["org"] = object_in["org"] translation["uploaded_by"] = object_in["userID"] translation["created_on"] = eval(str(time.time()).replace('.', '')[0:13]) repo.glossary_create(translation) return {"message": "Glossary created successfully", "status": "SUCCESS"} except Exception as e: return post_error("GLOSSARY_CREATION_FAILED", "Glossary creation failed due to exception: {}".format(str(e)), None)
def consume(): try: topics = [anu_translator_input_topic, anu_translator_nonmt_topic] consumer = instantiate(topics) service = TranslatorService() validator = TranslatorValidator() rand_str = ''.join( random.choice(string.ascii_letters) for i in range(4)) prefix = "Translator-Core-" + "(" + rand_str + ")" log_info(prefix + " Running..........", None) while True: for msg in consumer: data = {} try: data = msg.value if data: if msg.topic == anu_translator_nonmt_topic: service.process_no_nmt_jobs(data) else: log_info( prefix + " | Received on Topic: " + msg.topic + " | Partition: " + str(msg.partition), data) error = validator.validate_wf(data, False) if error is not None: log_error(prefix + " | Error: " + str(error), data, error) log_info(prefix + " | Input: " + str(data), data) post_error_wf(error["code"], error["message"], data, None) break service.start_file_translation(data) else: break except Exception as e: log_exception( prefix + " Exception in translator while consuming: " + str(e), data, e) post_error( "TRANSLATOR_CONSUMER_ERROR", "Exception in translator while consuming: " + str(e), None) except Exception as e: log_exception( "Exception while starting the translator consumer: " + str(e), None, e) post_error("TRANSLATOR_CONSUMER_EXC", "Exception while starting translator consumer: " + str(e), None)
def validate_wf(self, data, is_api): if 'jobID' not in data.keys(): return post_error("JOBID_NOT_FOUND", "jobID is mandatory", None) else: error = self.validate_input_files(data, is_api) if error is not None: return error
def produce(self, object_in, topic, partition): producer = self.instantiate() try: if object_in: if partition is None: partition = random.choice( list(range(0, total_no_of_partitions))) producer.send(topic, value=object_in, partition=partition) log_info("Pushing to topic: " + topic, object_in) producer.flush() except Exception as e: log_exception("Exception in translator while producing: " + str(e), object_in, e) post_error("TRANSLATOR_PRODUCER_EXC", "Exception in translator while producing: " + str(e), None)
def update_words(self, user_id, words): for word in words: Validation = validator.update_word_validation(word) if Validation is not None: return Validation page = word['page_no'] region_id = word['region_id'] word_id = word['word_id'] record_id = word['record_id'] user_word = word['updated_word'] AppContext.addRecordID(record_id) log_info("DigitalDocumentRepo update word request", AppContext.getContext()) #str(page) region_to_update = self.docModel.get_word_region( user_id, record_id, region_id, page) if region_to_update: if region_to_update['identifier'] == region_id: region_to_update['updated'] = True for data in region_to_update['regions']: for word in data['regions']: if word['identifier'] == word_id: word['ocr_text'] = word['text'] word['text'] = user_word break else: pass # return post_error("Data Missing","No record with the given user_id,record_id and word_id",None) else: return post_error( "Data Missing", "No record with the given user_id,record_id and region_id", None) AppContext.addRecordID(record_id) log_info( "DigitalDocumentRepo update word region :{}".format( str(region_to_update)), AppContext.getContext()) print(region_to_update) if self.docModel.update_word(user_id, record_id, region_id, region_to_update, page) == False: return post_error( "Data Missing", "Failed to update word since data is missing", None) return True
def error_handler(self, code, message, object_in, iswf): if iswf: object_in["state"] = "SENTENCES-ALIGNED" object_in["status"] = "FAILED" error = post_error_wf(code, message, object_in, None) else: error = post_error(code, message, None) return error
def update_word_validation(word): obj_keys = { 'record_id', 'region_id', 'word_id', 'updated_word', 'page_no' } word_keys = word.keys() if not all(item in word_keys for item in obj_keys): return post_error( "Data Missing", "record_id,region_id,word_id,updated_word are mandatory for updating the word", None) if not word['record_id'] or not word['region_id'] or not word[ 'word_id'] or not word['updated_word'] or not word['page_no']: return post_error( "Data Missing", "record_id,region_id,word_id,updated_word are mandatory for updating the word", None)
def run(self): obj = {"metadata": {"module": module_name}} rand_str = ''.join( random.choice(string.ascii_letters) for i in range(4)) prefix = "WFMJobsManager(" + rand_str + ")" log_info(prefix + " -- AJM Deployed, WFMJobsManager running......", obj) wfm_utils = WFMJMCronUtils() run = 0 while not self.stopped.wait(eval(str(js_cron_interval_sec))): try: criteria, exclude = { "status": { "$in": ["STARTED", "INPROGRESS"] } }, { '_id': False } jobs = wfm_utils.search_job(criteria, exclude, None, None) no_of_jobs = 0 if jobs: log_info( prefix + " -- Run: " + str(run) + " | Jobs Fetched: " + str(len(jobs)), obj) for job in jobs: if "AL" in job["workflowCode"]: continue #Ignore Aligner jobs job_start_time = job["startTime"] diff = eval(str(time.time()).replace( '.', '')[0:13]) - job_start_time if (diff / 1000) > eval( str(js_job_failure_interval_sec)): job["status"] = "FAILED" job["error"] = post_error( "ORPHAN_JOB", "The job was failed by the system, since it was idle", None) job["endTime"] = eval( str(time.time()).replace('.', '')[0:13]) wfm_utils.update_job(job, job["jobID"]) log_info( prefix + " -- JOB FAILED: Idle job, force failed. jobID: " + job["jobID"], job) no_of_jobs += 1 run += 1 log_info( prefix + " -- Run: " + str(run) + " | Jobs Fetched: " + str(len(jobs)) + " | Jobs Processed: " + str(no_of_jobs), obj) except Exception as e: run += 1 log_exception( prefix + " -- Run: " + str(run) + " | Exception in JobSweeper: " + str(e), obj, e)
def validate_text_translate(self, data): if 'input' not in data.keys(): return post_error("INPUT_NOT_FOUND", "Input key is mandatory", None) else: api_input = data["input"] if 'textList' not in api_input.keys(): return post_error("TEXT_LIST_NOT_FOUND", "Text List is mandatory", None) else: if not api_input["textList"]: return post_error("TEXT_LIST_EMPTY", "Text list cannot be empty", None) else: for text in api_input["textList"]: if 's_id' not in text.keys(): return post_error("SENTENCE_ID_NOT_FOUND", "s_id is mandatory", None) if 'src' not in text.keys(): return post_error("TEXT_NOT_FOUND", "src is mandatory", None) if 'taggedPrefix' not in text.keys(): return post_error("TAGGED_PREFIX_NOT_FOUND", "taggedPrefix is mandatory", None) if 'model' not in api_input.keys(): return post_error("MODEL_NOT_FOUND", "Model details are mandatory for this wf.", None) else: model = api_input["model"] if 'model_id' not in model.keys(): return post_error("MODEL_ID_NOT_FOUND", "Model Id is mandatory.", None) if 'source_language_code' not in model.keys(): return post_error("SRC_LANG_NOT_FOUND", "Source language code is mandatory.", None) if 'target_language_code' not in model.keys(): return post_error("TGT_LANG_NOT_FOUND", "Target language code is mandatory.", None)
def error_handler(self, object_in, code, iswf): if iswf: object_in['status'] = "FAILED" object_in['state'] = config.TASK_STAT error = post_error_wf(code, object_in['message'], object_in, None) return error else: code = code message = "" error = post_error(code, message, None) return error
def error_handler(self, object_in, code, iswf): if iswf: object_in['status'] = "FAILED" object_in['state'] = "SENTENCE-TOKENISED" error = post_error_wf(code, object_in['message'], object_in, None) return error else: code = code message = "" error = post_error(code, message, None) return error
def validate_for_annotator(self, data): for file in data["files"]: if 'annotationType' not in file.keys(): return post_error( "ANNOTATION_TYPE_NOT_FOUND", "annotationType is mandatory for all files for this wf", None) if 'sourceLanguage' not in file.keys(): return post_error( "SRC_LANG_NOT_FOUND", "sourceLanguage is mandatory for all files for this wf", None) if 'targetLanguage' not in file.keys(): return post_error( "TGT_LANG_NOT_FOUND", "targetLanguage is mandatory for all files for this wf", None) if 'fileInfo' not in file.keys(): return post_error( "FILES_INFO_NOT_FOUND", "fileInfo is mandatory for all files for this wf", None) if 'users' not in file.keys(): return post_error( "USERS_NOT_FOUND", "users is mandatory for all files for this wf", None) if 'description' not in file.keys(): return post_error( "DESC_NOT_FOUND", "description is mandatory for all files for this wf", None)
def start_file_translation(self, translate_wf_input): duplicate_jobs = repo.search({"jobID": translate_wf_input["jobID"]}, {'_id': False}) if duplicate_jobs: log_info( "Duplicate Job, jobID: " + str(translate_wf_input["jobID"]), translate_wf_input) return None translate_wf_input["taskID"] = utils.generate_task_id() translate_wf_input["taskStartTime"] = eval( str(time.time()).replace('.', '')[0:13]) translate_wf_input["state"] = "TRANSLATED" log_info( "Translator process initiated... jobID: " + str(translate_wf_input["jobID"]), translate_wf_input) error, error_list = None, [] for file in translate_wf_input["input"]["files"]: try: dumped = self.dump_file_to_db(file["path"], translate_wf_input) if not dumped: error_list.append({ "inputFile": str(file["path"]), "outputFile": "FAILED", "error": "File is either empty or couldn't be downloaded!" }) error = post_error( "FILE_DUMP_FAILED", "File is either empty or couldn't be downloaded!", None) else: translation_process = Process( target=self.push_sentences_to_nmt, args=(file, translate_wf_input)) translation_process.start() except Exception as e: log_exception( "Exception while posting sentences to NMT: " + str(e), translate_wf_input, e) continue if error_list and error is not None: translate_wf_input["output"], translate_wf_input[ "status"] = error_list, "FAILED" translate_wf_input["error"] = error translate_wf_input["taskEndTime"] = eval( str(time.time()).replace('.', '')[0:13]) producer.produce(translate_wf_input, anu_translator_output_topic, None) return {"status": "failed", "message": "Some/All files failed"} return {"status": "success", "message": "Sentences sent to NMT"}
def consume_tmx(): try: topics = [anu_translator_tmx_in_topic] consumer = instantiate(topics) service = TMXService() rand_str = ''.join(random.choice(string.ascii_letters) for i in range(4)) prefix = "Translator-TMX-" + "(" + rand_str + ")" log_info(prefix + " Running..........", None) while True: for msg in consumer: data = {} try: data = msg.value if data: log_info(prefix + " | Received on Topic: " + msg.topic + " | Partition: " + str(msg.partition), data) service.push_to_tmx_store(data) except Exception as e: log_exception(prefix + " Exception in translator tmx while consuming: " + str(e), data, e) post_error("TRANSLATOR_CONSUMER_ERROR", "Exception in translator while consuming: " + str(e), None) except Exception as e: log_exception("Exception while starting the translator nmt consumer: " + str(e), None, e) post_error("TRANSLATOR_CONSUMER_EXC", "Exception while starting translator consumer: " + str(e), None)
def consume(): try: wfmutils = WFMUtils() wfmservice = WFMService() wfmutils.read_all_configs() configs = wfmutils.get_configs() topics = wfmutils.fetch_output_topics(configs) consumer = instantiate(topics) rand_str = ''.join( random.choice(string.ascii_letters) for i in range(4)) prefix = "WFM--" + "(" + rand_str + ")" log_info(prefix + " | Running..........", None) log_info(prefix + " | Topics: " + str(topics), None) while True: for msg in consumer: data = {} try: if msg: data = msg.value if 'jobID' in data.keys(): job_details = wfmutils.get_job_details( data["jobID"]) if job_details: data["metadata"] = job_details[0]["metadata"] log_info( prefix + " | Received on Topic: " + msg.topic + " | Partition: " + str(msg.partition), data) wfmservice.manage_wf(data) except Exception as e: log_exception( prefix + " | Exception while consuming: " + str(e), data, e) post_error("WFM_CONSUMER_ERROR", "Exception while consuming: " + str(e), None) except Exception as e: log_exception("Exception while starting the wfm consumer: " + str(e), None, e) post_error("WFM_CONSUMER_ERROR", "Exception while starting wfm consumer: " + str(e), None)
def process_sync(self, wf_input): try: ctx = wf_input order_of_execution = wfmutils.get_order_of_exc( wf_input["workflowCode"]) tool_output = None previous_tool = None for tool_order in order_of_execution.keys(): step_details = order_of_execution[tool_order] tool_details = step_details["tool"][0] log_info( tool_details["name"] + log_msg_start + " jobID: " + ctx["jobID"], ctx) if not tool_output: tool_input = wfmutils.get_tool_input_sync( tool_details["name"], None, None, wf_input) else: tool_input = wfmutils.get_tool_input_sync( tool_details["name"], previous_tool, tool_output, None) response = wfmutils.call_api( tool_details["api-details"][0]["uri"], tool_input, wf_input["metadata"]["userID"]) error = self.validate_tool_response(response, tool_details, wf_input) if error: return error tool_output = response previous_tool = tool_details["name"] ctx["metadata"]["module"] = module_wfm_name tool_output["metadata"] = ctx["metadata"] log_info( tool_details["name"] + log_msg_end + " jobID: " + ctx["jobID"], ctx) client_output = self.get_wf_details_sync(None, tool_output, True, None) self.update_job_details(client_output, False) log_info("Job COMPLETED, jobID: " + str(wf_input["jobID"]), ctx) return client_output except Exception as e: log_exception( "Exception while processing SYNC workflow: " + str(e), wf_input, e) error = post_error( "SYNC_WFLOW_ERROR", "Exception while processing the sync workflow: " + str(e), e) client_output = self.get_wf_details_sync(wf_input, None, True, error) self.update_job_details(client_output, False) log_info("Job FAILED, jobID: " + str(wf_input["jobID"]), wf_input) return client_output
def error_handler(self, object_in, code, iswf): if iswf: job_id = object_in["jobID"] task_id = object_in["taskID"] state = object_in['state'] status = object_in['status'] code = code message = object_in['message'] error = post_error_wf(code, message, object_in, None) return error else: code = object_in['error']['code'] message = object_in['error']['message'] error = post_error(code, message, None) return error
def push_to_queue(self, object_in, topic): global topic_partition_map producer = self.instantiate() partition = random.choice(list(range(0, total_no_of_partitions))) if topic in topic_partition_map.keys(): while partition == topic_partition_map[topic]: partition = random.choice(list(range(0, total_no_of_partitions))) topic_partition_map[topic] = partition try: if object_in: producer.send(topic, partition=partition, value=object_in) object_in["metadata"]["module"] = module_wfm_name # FOR LOGGING ONLY. log_info("Pushing to TOPIC: " + topic + " | PARTITION: " + str(partition), object_in) return None producer.flush() except Exception as e: log_exception("Exception while producing: " + str(e), object_in, e) return post_error("WFLOW_PRODUCER_ERROR", "Exception while producing: " + str(e), None)
def store(self, userID, recordID, files): try: for file in files: # recordID= recordID jobID = recordID.split('|')[0] fileID = file['file']['identifier'] file_name = file['file']['name'] locale = file['config']['language'] file_type = file['file']['type'] pages = file['pages'] log_info( "DigitalDocumentRepo save document for user: {}| record: {}| count of pages received: {}" .format(userID, recordID, str(len(pages))), AppContext.getContext()) blocks = [] for page in pages: block = self.create_regions_from_page(userID, jobID, recordID, fileID, file_name, locale, file_type, page) if len(block.keys()) > 5: blocks.append(block) else: return block log_info( 'DigitalDocumentRepo page blocks created for insert, user_id:{}, record_id:{}, block length:{}' .format(userID, recordID, str(len(blocks))), AppContext.getContext()) result = self.docModel.store_bulk_blocks(blocks) if result == False: return False except Exception as e: AppContext.addRecordID(recordID) log_exception( 'Exception on save document | DigitalDocumentRepo :{}'.format( str(e)), AppContext.getContext(), e) return post_error("Data Missing", "Failed to store doc since :{}".format(str(e)), None)
def create_regions_from_page(self, userID, jobID, recordID, fileID, file_name, locale, file_type, page): try: AppContext.addRecordID(recordID) log_info( 'DigitalDocumentRepo page blocks creation started for record_id:{}, page_number:{}' .format(recordID, str(page['page_no'])), AppContext.getContext()) block_info = {} block_info['userID'] = userID block_info['jobID'] = jobID block_info['recordID'] = recordID block_info['file_identifier'] = fileID block_info['file_name'] = file_name block_info['file_locale'] = locale block_info['file_type'] = file_type block_info['created_on'] = datetime.utcnow() page_info = {} page_info['page_no'] = page['page_no'] + 1 page_info['page_identifier'] = page['identifier'] page_info['page_boundingBox'] = page['boundingBox'] page_info['page_img_path'] = page['path'] if 'resolution' in page.keys(): page_info['page_resolution'] = page['resolution'] block_info['page_info'] = page_info block_info['regions'] = page['regions'] return block_info except Exception as e: AppContext.addRecordID(recordID) log_exception( 'Exception on save document | DigitalDocumentRepo :{}'.format( str(e)), AppContext.getContext(), e) return post_error("Data Missing", "Failed to store doc since data is missing", None)
def validate_async(self, data, workflowCode): if is_async_flow_enabled: configs = wfmutils.get_configs() if configs[workflowCode]["type"] != "ASYNC": return post_error("UNSUPPORTED_WF_CODE", "This workflow is NOT of the ASYNC type.", None) if 'files' not in data.keys(): return post_error("FILES_NOT_FOUND", "files are mandatory", None) else: if len(data["files"]) == 0: return post_error("FILES_NOT_FOUND", "Input files are mandatory", None) else: tools = wfmutils.get_tools_of_wf(workflowCode) if tool_annotator in tools: self.validate_for_annotator(data) return for file in data["files"]: if 'path' not in file.keys(): return post_error( "FILES_PATH_NOT_FOUND", "Path is mandatory for all files in the input", None) if 'type' not in file.keys(): return post_error( "FILES_TYPE_NOT_FOUND", "Type is mandatory for all files in the input", None) if 'locale' not in file.keys(): return post_error( "FILES_LOCALE_NOT_FOUND", "Locale is mandatory for all files in the input", None) if tool_translator in tools: if 'model' not in file.keys(): return post_error( "MODEL_NOT_FOUND", "Model details are mandatory for this wf.", None) else: model = file["model"] if 'model_id' not in model.keys(): return post_error( "MODEL_ID_NOT_FOUND", "Model Id is mandatory.", None) if 'source_language_code' not in model.keys(): return post_error( "SRC_LANG_NOT_FOUND", "Source language code is mandatory.", None) if 'target_language_code' not in model.keys(): return post_error( "TGT_LANG_NOT_FOUND", "Target language code is mandatory.", None) if tool_worddetector in tools or tool_layoutdetector in tools or tool_ocrgooglevision in tools \ or tool_ocrtesseract in tools or tool_blocksegmenter in tools or tool_ocrdd10googlevision in tools\ or tool_ocrdd15googlevision in tools: if 'config' not in file.keys(): return post_error( "CONFIG_NOT_FOUND", "OCR Config details are mandatory for this wf.", None) else: config = file["config"] if 'OCR' not in config.keys(): return post_error( "CONFIG_NOT_FOUND", "OCR Config details are mandatory for this wf.", None) else: return post_error( "WORKFLOW_TYPE_DISABLED", "This workflow belongs to ASYNC type, which is currently disabled.", None)
def validate_sync(self, data, workflowCode): if is_sync_flow_enabled: configs = wfmutils.get_configs() if configs[workflowCode]["type"] != "SYNC": return post_error("UNSUPPORTED_WF_CODE", "This workflow is NOT of the SYNC type.", None) if 'recordID' not in data.keys(): return post_error("RECORD_ID_NOT_FOUND", "Record id is mandatory.", None) if 'locale' not in data.keys(): return post_error("LOCALE_NOT_FOUND", "Locale is mandatory.", None) if 'textBlocks' not in data.keys(): return post_error("TEXT_BLOCKS_NOT_FOUND", "text blocks are mandatory.", None) else: if not data["textBlocks"]: return post_error("TEXT_BLOCKS_NOT_FOUND", "text blocks are mandatory.", None) tools = wfmutils.get_tools_of_wf(workflowCode) if tool_translator in tools: if 'model' not in data.keys(): return post_error( "MODEL_NOT_FOUND", "Model details are mandatory for this wf.", None) else: model = dict(data["model"]) if 'model_id' not in model.keys(): return post_error("MODEL_ID_NOT_FOUND", "Model Id is mandatory.", None) if 'source_language_code' not in model.keys(): return post_error( "SRC_LANG_NOT_FOUND", "Source language code is mandatory.", None) if 'target_language_code' not in model.keys(): return post_error( "TGT_LANG_NOT_FOUND", "Target language code is mandatory.", None) if len(tools) == 1: if 'modifiedSentences' not in data.keys(): return post_error( "MODIFIED_SENT_NOT_FOUND", "Ids of modified sentences is mandatory", None) else: if not data["modifiedSentences"]: return post_error( "MODIFIED_SENT_NOT_FOUND", "Ids of modified sentences is mandatory", None) else: return post_error( "WORKFLOW_TYPE_DISABLED", "This workflow belongs to SYNC type, which is currently disabled.", None)