Beispiel #1
0
def process_block_segmenter_kf():
    file_ops = FileOperation()
    DOWNLOAD_FOLDER = file_ops.create_file_download_dir(config.download_folder)
    producer_tok = Producer(config.bootstrap_server)

    # instatiation of consumer for respective topic
    try:
        consumer = consumer_validator()
        log_info(
            "process_document_segmenter_kf : trying to receive value from consumer ",
            LOG_WITHOUT_CONTEXT)

        for msg in consumer:
            if Consumer.get_json_data(msg.value) == None:
                log_info(
                    'process_document_segmenter_kf - received invalid data {}'.
                    format(msg.value), None)
                continue
            data = Consumer.get_json_data(msg.value)

            jobid = data['jobID']
            log_info(
                'process_document_segmenter_kf - received message from kafka, dumping into internal queue',
                data)
            input_files, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format(
                data)

            #if input_files[0]['locale'] == 'en':
            #############
            ####################################
            Queue.put(data)
            log_info(
                'process_document_segmenter_kf - request in internal queue {}'.
                format(Queue.qsize()), data)
            ########################################
            # else:
            #     blockMergerOCRQueue.put(data)
            #     log_info('process_block_merger_kf - request in internal OCR queue {}'.format(blockMergerOCRQueue.qsize()), data)

            # We should reject kafka request if internal queue size become too-much.
            #

    except KafkaConsumerError as e:
        response_custom = {}
        response_custom['message'] = str(e)
        file_ops.error_handler(response_custom, "KAFKA_CONSUMER_ERROR", True)
        log_exception(
            "process_layout_detector_kf : Consumer didn't instantiate", None,
            e)
    except KafkaProducerError as e:
        response_custom = {}
        response_custom['message'] = e.message
        file_ops.error_handler(response_custom, "KAFKA_PRODUCER_ERROR", True)
        log_exception(
            "process_layout_detector_kf : response send to topic %s" %
            (config.output_topic), None, e)
Beispiel #2
0
def process_vision_ocr_kf():
    file_ops            = FileOperation()
    DOWNLOAD_FOLDER     = file_ops.create_file_download_dir(config.download_folder)
    producer_tok        = Producer(config.bootstrap_server)
    
    # instatiation of consumer for respective topic
    try:
        consumer = consumer_validator()
        log_info("process_vision_ocr_kf : trying to receive value from consumer ", LOG_WITHOUT_CONTEXT)

        while True:
            wait_for_control = controlQueue.get(block=True)
        
            for msg in consumer:
                if Consumer.get_json_data(msg.value) == None:
                    log_info('process_vision_ocr_kf - received invalid data {}'.format(msg.value), None)
                    continue

                data            = Consumer.get_json_data(msg.value)

                consumer.commit()  # <--- This is what we need
                # Optionally, To check if everything went good
                print('New Kafka offset: %s' % consumer.committed(TopicPartition(config.input_topic, msg.partition)))

                jobid           = data['jobID']
                log_info('process_vision_ocr_kf - received message from kafka, dumping into internal queue', data)
                input_files, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format(data)

                #if input_files[0]['locale'] == 'en':
                    #############
                ####################################
                processQueue.put(data)
                log_info('process_vision_ocr_kf - request in internal queue {}'.format(Queue.qsize()),
                            data)
                break

            ########################################
            # else:
            #     blockMergerOCRQueue.put(data)
            #     log_info('process_block_merger_kf - request in internal OCR queue {}'.format(blockMergerOCRQueue.qsize()), data)

            # We should reject kafka request if internal queue size become too-much.
            #
    
    except KafkaConsumerError as e:
        response_custom = {}
        response_custom['message'] = str(e)
        file_ops.error_handler(response_custom, "KAFKA_CONSUMER_ERROR", True)
        log_exception("process_vision_ocr_kf : Consumer didn't instantiate", None, e)
    except KafkaProducerError as e:
        response_custom = {}
        response_custom['message'] = e.message      
        file_ops.error_handler(response_custom, "KAFKA_PRODUCER_ERROR", True)
        log_exception("process_vision_ocr_kf : response send to topic %s"%(config.output_topic), None, e)
Beispiel #3
0
def process_kf_request_payload():
    file_ops = FileOperation()

    # instatiation of consumer for respective topic
    try:
        consumer = consumer_validator()
        log_info("trying to receive value from consumer ", LOG_WITHOUT_CONTEXT)

        for msg in consumer:
            if Consumer.get_json_data(msg.value) == None:
                log_info('received invalid data {}'.format(msg.value),
                         LOG_WITHOUT_CONTEXT)
                continue

            data = Consumer.get_json_data(msg.value)
            LOG_WITHOUT_CONTEXT['jobID'] = data['jobID']
            log_info(
                "received input request from Kafka queue for JobID: %s " %
                (data['jobID']), LOG_WITHOUT_CONTEXT)
            processRequest(data)

    except KafkaConsumerError as e:
        response_custom = {}
        response_custom['message'] = str(e)
        file_ops.error_handler(response_custom, "KAFKA_CONSUMER_ERROR", True)
        log_exception("Consumer didn't instantiate", None, e)
    except KafkaProducerError as e:
        response_custom = {}
        response_custom['message'] = e.message
        file_ops.error_handler(response_custom, "KAFKA_PRODUCER_ERROR", True)
        log_exception("response send to topic %s" % (config.output_topic),
                      None, e)
    except Exception as e:
        file_ops.error_handler(response_custom, "KAFKA_CONSUMER_ERROR", True)
        log_exception("response send to topic %s" % (config.output_topic),
                      None, e)
Beispiel #4
0
def process_vision_ocr_kf():
    file_ops            = FileOperation()
    DOWNLOAD_FOLDER     = file_ops.create_file_download_dir(config.download_folder)
    producer_tok        = Producer(config.bootstrap_server)
    
    # instatiation of consumer for respective topic
   try:
        consumer = consumer_validator()
        log_info("process_google_ocr_kf : trying to receive value from consumer ", LOG_WITHOUT_CONTEXT)

        while True:
            wait_for_control = controlQueue.get(block=True)

            for msg in consumer:

                if Consumer.get_json_data(msg.value) == None:
                    log_info('process_google_ocr_kf - received invalid data {}'.format(msg.value), None)
                    continue
                data            = Consumer.get_json_data(msg.value)


                consumer.commit()  # <--- This is what we need
                # Optionally, To check if everything went good
                #print('New Kafka offset: %s' % consumer.committed(TopicPartition(config.input_topic, msg.partition)))


                jobid           = data['jobID']
                log_info('process_google_ocr_kf - received message from kafka, dumping into internal queue', data)
                input_files, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format(data)

                Queue.put(data)