def multi_thred_block_merger(self,task_id, task_starttime,jobid): thread = threading.current_thread().name log_info("multi_thred_block_merger" + str(thread)+" | block-merger process started ===>",app_context.application_context) file_value_response = self.workflow_response(task_id, task_starttime) if "errorID" not in file_value_response.keys(): producer = Producer() producer.push_data_to_queue(config.output_topic, file_value_response, jobid, task_id) else: log_info("process_merger_kf error send to error handler", app_context.application_context)
def word_detector_request_worker(): file_ops = FileOperation() DOWNLOAD_FOLDER = file_ops.create_file_download_dir(config.download_folder) producer_tok = Producer(config.bootstrap_server) log_info("word_detector_request_worker : starting thread ", LOG_WITHOUT_CONTEXT) while True: data = Queue.get(block=True) ################# task_id = str("word_detector" + str(time.time()).replace('.', '')) ################### task_starttime = str(time.time()).replace('.', '') input_files, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format( data) log_info( "word_detector_request_worker processing -- received message " + str(jobid), data) try: response_gen = Response(data, DOWNLOAD_FOLDER) file_value_response = response_gen.workflow_response( task_id, task_starttime, False) if file_value_response != None: push_output(producer_tok, config.output_topic, file_value_response, jobid, task_id, data) log_info( "word_detector_request_worker : response send to topic %s" % (config.output_topic), LOG_WITHOUT_CONTEXT) else: erro_obj = { 'code': 400, 'jobID': jobid, 'message': "Word detector failed" } producer_tok.push_data_to_queue( config.KAFKA_ANUVAAD_ETL_WF_ERROR_TOPIC, erro_obj) log_info( "word_detector_request_worker : error send to error handler", data) log_info( 'word_detector_request_worker - request in internal queue {}'. format(Queue.qsize()), data) Queue.task_done() except Exception as e: log_exception("word_detector_request_worker ", LOG_WITHOUT_CONTEXT, e)
def vision_ocr_request_worker(): file_ops = FileOperation() DOWNLOAD_FOLDER = file_ops.create_file_download_dir(config.download_folder) producer_tok = Producer(config.bootstrap_server) log_info("vision_ocr_request_worker : starting thread ", LOG_WITHOUT_CONTEXT) while True: data = processQueue.get(block=True) ################# task_id = str("vision_ocr" + str(time.time()).replace('.', '')) ################### task_starttime = str(time.time()).replace('.', '') input_files, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format(data) log_info("vision_ocr_request_worker processing -- received message "+str(jobid), data) try: response_gen = Response(data, DOWNLOAD_FOLDER) file_value_response = response_gen.workflow_response(task_id, task_starttime, False) if file_value_response != None: if "errorID" not in file_value_response.keys(): push_output(producer_tok, config.output_topic, file_value_response, jobid, task_id,data) log_info("vision_ocr_request_worker : response send to topic %s"%(config.output_topic), LOG_WITHOUT_CONTEXT) else: log_info("vision_ocr_request_worker : error send to error handler", data) log_info('vision_ocr_request_worker - request in internal queue {}'.format(Queue.qsize()), data) processQueue.task_done() except Exception as e: log_exception("vision_ocr_request_worker ", LOG_WITHOUT_CONTEXT, e) controlQueue.put(1)
def processRequest(data): file_ops = FileOperation() producer_tok = Producer(config.bootstrap_server) DOWNLOAD_FOLDER = file_ops.file_download(config.download_folder) task_id = str("ANNO-" + str(time.time()).replace('.', '')) task_starttime = str(time.time()).replace('.', '') input_params, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format( data) log_info("processing -- received message " + str(jobid), data) try: response_gen = Response(data, DOWNLOAD_FOLDER) result_response = response_gen.workflow_response( task_id, task_starttime) if result_response != None: if "errorID" not in result_response.keys(): push_output(producer_tok, config.output_topic, result_response, jobid, task_id, data) log_info( "processing completed successfully, published at %s" % (config.output_topic), data) else: log_info("processing failed, informed WFM", data) except Exception as e: log_exception("exception encountered ", LOG_WITHOUT_CONTEXT, e)
def process_block_segmenter_kf(): file_ops = FileOperation() DOWNLOAD_FOLDER = file_ops.create_file_download_dir(config.download_folder) producer_tok = Producer(config.bootstrap_server) # instatiation of consumer for respective topic try: consumer = consumer_validator() log_info( "process_document_segmenter_kf : trying to receive value from consumer ", LOG_WITHOUT_CONTEXT) for msg in consumer: if Consumer.get_json_data(msg.value) == None: log_info( 'process_document_segmenter_kf - received invalid data {}'. format(msg.value), None) continue data = Consumer.get_json_data(msg.value) jobid = data['jobID'] log_info( 'process_document_segmenter_kf - received message from kafka, dumping into internal queue', data) input_files, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format( data) #if input_files[0]['locale'] == 'en': ############# #################################### Queue.put(data) log_info( 'process_document_segmenter_kf - request in internal queue {}'. format(Queue.qsize()), data) ######################################## # else: # blockMergerOCRQueue.put(data) # log_info('process_block_merger_kf - request in internal OCR queue {}'.format(blockMergerOCRQueue.qsize()), data) # We should reject kafka request if internal queue size become too-much. # except KafkaConsumerError as e: response_custom = {} response_custom['message'] = str(e) file_ops.error_handler(response_custom, "KAFKA_CONSUMER_ERROR", True) log_exception( "process_layout_detector_kf : Consumer didn't instantiate", None, e) except KafkaProducerError as e: response_custom = {} response_custom['message'] = e.message file_ops.error_handler(response_custom, "KAFKA_PRODUCER_ERROR", True) log_exception( "process_layout_detector_kf : response send to topic %s" % (config.output_topic), None, e)
def process_vision_ocr_kf(): file_ops = FileOperation() DOWNLOAD_FOLDER = file_ops.create_file_download_dir(config.download_folder) producer_tok = Producer(config.bootstrap_server) # instatiation of consumer for respective topic try: consumer = consumer_validator() log_info("process_vision_ocr_kf : trying to receive value from consumer ", LOG_WITHOUT_CONTEXT) while True: wait_for_control = controlQueue.get(block=True) for msg in consumer: if Consumer.get_json_data(msg.value) == None: log_info('process_vision_ocr_kf - received invalid data {}'.format(msg.value), None) continue data = Consumer.get_json_data(msg.value) consumer.commit() # <--- This is what we need # Optionally, To check if everything went good print('New Kafka offset: %s' % consumer.committed(TopicPartition(config.input_topic, msg.partition))) jobid = data['jobID'] log_info('process_vision_ocr_kf - received message from kafka, dumping into internal queue', data) input_files, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format(data) #if input_files[0]['locale'] == 'en': ############# #################################### processQueue.put(data) log_info('process_vision_ocr_kf - request in internal queue {}'.format(Queue.qsize()), data) break ######################################## # else: # blockMergerOCRQueue.put(data) # log_info('process_block_merger_kf - request in internal OCR queue {}'.format(blockMergerOCRQueue.qsize()), data) # We should reject kafka request if internal queue size become too-much. # except KafkaConsumerError as e: response_custom = {} response_custom['message'] = str(e) file_ops.error_handler(response_custom, "KAFKA_CONSUMER_ERROR", True) log_exception("process_vision_ocr_kf : Consumer didn't instantiate", None, e) except KafkaProducerError as e: response_custom = {} response_custom['message'] = e.message file_ops.error_handler(response_custom, "KAFKA_PRODUCER_ERROR", True) log_exception("process_vision_ocr_kf : response send to topic %s"%(config.output_topic), None, e)
def block_merger_request_worker(): file_ops = FileOperation() DOWNLOAD_FOLDER = file_ops.create_file_download_dir(config.download_folder) producer_tok = Producer(config.bootstrap_server) while True: data = blockMergerQueue.get(block=True) task_id = str("BM-" + str(time.time()).replace('.', '')) task_starttime = str(time.time()).replace('.', '') input_files, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format( data) log_info( "block_merger_request_worker processing -- received message " + str(jobid), data) response_gen = Response(data, DOWNLOAD_FOLDER) file_value_response = response_gen.workflow_response( task_id, task_starttime, False) if file_value_response != None: if "errorID" not in file_value_response.keys(): push_output(producer_tok, config.output_topic, file_value_response, jobid, task_id) log_info( "process_block_merger_kf : response send to topic %s" % (config.output_topic), None) else: log_info( "process_block_merger_kf : error send to error handler", jobid) log_info( 'block_merger_request_worker - request in internal queue {}'. format(blockMergerQueue.qsize()), jobid) blockMergerQueue.task_done()
def process_vision_ocr_kf(): file_ops = FileOperation() DOWNLOAD_FOLDER = file_ops.create_file_download_dir(config.download_folder) producer_tok = Producer(config.bootstrap_server)