def consumer_validator(): try: consumer_class = Consumer(config.input_topic, config.bootstrap_server) consumer = consumer_class.consumer_instantiate() log_info("consumer_validator --- consumer running -----", None) return consumer except: log_exception("consumer_validator : error in kafka opertation while listening to consumer on topic %s"%(config.input_topic), None, None) raise KafkaConsumerError(400, "Can not connect to consumer.")
def process_block_segmenter_kf(): file_ops = FileOperation() DOWNLOAD_FOLDER = file_ops.create_file_download_dir(config.download_folder) producer_tok = Producer(config.bootstrap_server) # instatiation of consumer for respective topic try: consumer = consumer_validator() log_info( "process_document_segmenter_kf : trying to receive value from consumer ", LOG_WITHOUT_CONTEXT) for msg in consumer: if Consumer.get_json_data(msg.value) == None: log_info( 'process_document_segmenter_kf - received invalid data {}'. format(msg.value), None) continue data = Consumer.get_json_data(msg.value) jobid = data['jobID'] log_info( 'process_document_segmenter_kf - received message from kafka, dumping into internal queue', data) input_files, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format( data) #if input_files[0]['locale'] == 'en': ############# #################################### Queue.put(data) log_info( 'process_document_segmenter_kf - request in internal queue {}'. format(Queue.qsize()), data) ######################################## # else: # blockMergerOCRQueue.put(data) # log_info('process_block_merger_kf - request in internal OCR queue {}'.format(blockMergerOCRQueue.qsize()), data) # We should reject kafka request if internal queue size become too-much. # except KafkaConsumerError as e: response_custom = {} response_custom['message'] = str(e) file_ops.error_handler(response_custom, "KAFKA_CONSUMER_ERROR", True) log_exception( "process_layout_detector_kf : Consumer didn't instantiate", None, e) except KafkaProducerError as e: response_custom = {} response_custom['message'] = e.message file_ops.error_handler(response_custom, "KAFKA_PRODUCER_ERROR", True) log_exception( "process_layout_detector_kf : response send to topic %s" % (config.output_topic), None, e)
def process_vision_ocr_kf(): file_ops = FileOperation() DOWNLOAD_FOLDER = file_ops.create_file_download_dir(config.download_folder) producer_tok = Producer(config.bootstrap_server) # instatiation of consumer for respective topic try: consumer = consumer_validator() log_info("process_vision_ocr_kf : trying to receive value from consumer ", LOG_WITHOUT_CONTEXT) while True: wait_for_control = controlQueue.get(block=True) for msg in consumer: if Consumer.get_json_data(msg.value) == None: log_info('process_vision_ocr_kf - received invalid data {}'.format(msg.value), None) continue data = Consumer.get_json_data(msg.value) consumer.commit() # <--- This is what we need # Optionally, To check if everything went good print('New Kafka offset: %s' % consumer.committed(TopicPartition(config.input_topic, msg.partition))) jobid = data['jobID'] log_info('process_vision_ocr_kf - received message from kafka, dumping into internal queue', data) input_files, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format(data) #if input_files[0]['locale'] == 'en': ############# #################################### processQueue.put(data) log_info('process_vision_ocr_kf - request in internal queue {}'.format(Queue.qsize()), data) break ######################################## # else: # blockMergerOCRQueue.put(data) # log_info('process_block_merger_kf - request in internal OCR queue {}'.format(blockMergerOCRQueue.qsize()), data) # We should reject kafka request if internal queue size become too-much. # except KafkaConsumerError as e: response_custom = {} response_custom['message'] = str(e) file_ops.error_handler(response_custom, "KAFKA_CONSUMER_ERROR", True) log_exception("process_vision_ocr_kf : Consumer didn't instantiate", None, e) except KafkaProducerError as e: response_custom = {} response_custom['message'] = e.message file_ops.error_handler(response_custom, "KAFKA_PRODUCER_ERROR", True) log_exception("process_vision_ocr_kf : response send to topic %s"%(config.output_topic), None, e)
def process_merger_kf(): file_ops = FileOperation() DOWNLOAD_FOLDER = file_ops.create_file_download_dir(config.download_folder) task_id = str("BM-" + str(time.time()).replace('.', '')) task_starttime = str(time.time()).replace('.', '') # instatiation of consumer for respective topic try: consumer_class = Consumer(config.input_topic, config.bootstrap_server) consumer = consumer_class.consumer_instantiate() log_info("process_merger_kf", "trying to receive value from consumer", None) thread_instance = 0 for msg in consumer: try: data = msg.value task_id = str("BM-" + str(time.time()).replace('.', '')) task_starttime = str(time.time()).replace('.', '') input_files, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format( data) log_info("process_merger_kf", "kafka request arrived ", jobid) response_gen = Response(data, DOWNLOAD_FOLDER) t1 = threading.Thread( target=response_gen.multi_thred_block_merger, args=(task_id, task_starttime, jobid), name='BM-thread-' + str(thread_instance)) t1.start() thread_instance += 1 log_info("multithread", "block-merger running on multithread", None) ''' file_value_response = response_gen.workflow_response(task_id, task_starttime) if "errorID" not in file_value_response.keys(): producer = Producer() producer.push_data_to_queue(config.output_topic, file_value_response, jobid, task_id) else: log_info("process_merger_kf", "error send to error handler", jobid)''' except Exception as e: log_exception("process_pdf_kf", "exception while consuming the records", jobid, e) except KafkaConsumerError as e: response_custom = CustomResponse(Status.ERR_STATUS.value, None, None) response_custom.status_code['message'] = str(e) file_ops.error_handler(response_custom.status_code, "KAFKA_CONSUMER_ERROR", True) log_exception("process_pdf_kf", "Consumer didn't instantiate", None, e) except KafkaProducerError as e: response_custom = e.code response_custom['message'] = e.message file_ops.error_handler(response_custom, "KAFKA_PRODUCER_ERROR", True) log_exception("process_pdf_kf", "response send to topic %s" % (config.output_topic), response_custom['jobID'], e)
def process_kf_request_payload(): file_ops = FileOperation() # instatiation of consumer for respective topic try: consumer = consumer_validator() log_info("trying to receive value from consumer ", LOG_WITHOUT_CONTEXT) for msg in consumer: if Consumer.get_json_data(msg.value) == None: log_info('received invalid data {}'.format(msg.value), LOG_WITHOUT_CONTEXT) continue data = Consumer.get_json_data(msg.value) LOG_WITHOUT_CONTEXT['jobID'] = data['jobID'] log_info( "received input request from Kafka queue for JobID: %s " % (data['jobID']), LOG_WITHOUT_CONTEXT) processRequest(data) except KafkaConsumerError as e: response_custom = {} response_custom['message'] = str(e) file_ops.error_handler(response_custom, "KAFKA_CONSUMER_ERROR", True) log_exception("Consumer didn't instantiate", None, e) except KafkaProducerError as e: response_custom = {} response_custom['message'] = e.message file_ops.error_handler(response_custom, "KAFKA_PRODUCER_ERROR", True) log_exception("response send to topic %s" % (config.output_topic), None, e) except Exception as e: file_ops.error_handler(response_custom, "KAFKA_CONSUMER_ERROR", True) log_exception("response send to topic %s" % (config.output_topic), None, e)
def process_vision_ocr_kf(): file_ops = FileOperation() DOWNLOAD_FOLDER = file_ops.create_file_download_dir(config.download_folder) producer_tok = Producer(config.bootstrap_server) # instatiation of consumer for respective topic try: consumer = consumer_validator() log_info("process_google_ocr_kf : trying to receive value from consumer ", LOG_WITHOUT_CONTEXT) while True: wait_for_control = controlQueue.get(block=True) for msg in consumer: if Consumer.get_json_data(msg.value) == None: log_info('process_google_ocr_kf - received invalid data {}'.format(msg.value), None) continue data = Consumer.get_json_data(msg.value) consumer.commit() # <--- This is what we need # Optionally, To check if everything went good #print('New Kafka offset: %s' % consumer.committed(TopicPartition(config.input_topic, msg.partition))) jobid = data['jobID'] log_info('process_google_ocr_kf - received message from kafka, dumping into internal queue', data) input_files, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format(data) Queue.put(data)