Esempio n. 1
0
def process_merger_kf():
    file_ops = FileOperation()
    DOWNLOAD_FOLDER = file_ops.create_file_download_dir(config.download_folder)
    task_id = str("BM-" + str(time.time()).replace('.', ''))
    task_starttime = str(time.time()).replace('.', '')
    # instatiation of consumer for respective topic
    try:
        consumer_class = Consumer(config.input_topic, config.bootstrap_server)
        consumer = consumer_class.consumer_instantiate()
        log_info("process_merger_kf", "trying to receive value from consumer",
                 None)
        thread_instance = 0
        for msg in consumer:
            try:
                data = msg.value
                task_id = str("BM-" + str(time.time()).replace('.', ''))
                task_starttime = str(time.time()).replace('.', '')
                input_files, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format(
                    data)
                log_info("process_merger_kf", "kafka request arrived ", jobid)
                response_gen = Response(data, DOWNLOAD_FOLDER)
                t1 = threading.Thread(
                    target=response_gen.multi_thred_block_merger,
                    args=(task_id, task_starttime, jobid),
                    name='BM-thread-' + str(thread_instance))
                t1.start()
                thread_instance += 1
                log_info("multithread", "block-merger running on multithread",
                         None)
                '''
                file_value_response = response_gen.workflow_response(task_id, task_starttime)
                if "errorID" not in file_value_response.keys():
                    producer = Producer()
                    producer.push_data_to_queue(config.output_topic, file_value_response, jobid, task_id)
                else:
                    log_info("process_merger_kf", "error send to error handler", jobid)'''
            except Exception as e:
                log_exception("process_pdf_kf",
                              "exception while consuming the records", jobid,
                              e)

    except KafkaConsumerError as e:
        response_custom = CustomResponse(Status.ERR_STATUS.value, None, None)
        response_custom.status_code['message'] = str(e)
        file_ops.error_handler(response_custom.status_code,
                               "KAFKA_CONSUMER_ERROR", True)
        log_exception("process_pdf_kf", "Consumer didn't instantiate", None, e)
    except KafkaProducerError as e:
        response_custom = e.code
        response_custom['message'] = e.message
        file_ops.error_handler(response_custom, "KAFKA_PRODUCER_ERROR", True)
        log_exception("process_pdf_kf",
                      "response send to topic %s" % (config.output_topic),
                      response_custom['jobID'], e)
Esempio n. 2
0
def push_output(producer, topic_name, output, jobid, task_id,data):
    try:
        producer.push_data_to_queue(topic_name, output)
        log_info("push_output : producer flushed value on topic %s"%(topic_name), data)
    except Exception as e:
        response_custom = CustomResponse(Status.ERR_STATUS.value, jobid, task_id)
        log_exception("push_output : Response can't be pushed to queue %s"%(topic_name), data, None)
        raise KafkaProducerError(response_custom, "data Not pushed to queue: %s"%(topic_name))
Esempio n. 3
0
def process_annotation_kf():
    file_ops = FileOperation()
    DOWNLOAD_FOLDER = file_ops.create_file_upload_dir(config.download_folder)
    # instatiation of consumer for respective topic
    try:
        consumer_class = Consumer(config.ner_input_topic,
                                  config.bootstrap_server)
        consumer = consumer_class.consumer_instantiate()
        log.info("--- consumer running -----")
    except:
        response = Status.ERR_Consumer.value
        producer_html2json = Producer(config.bootstrap_server)
        producer = producer_html2json.producer_fn()
        producer.send(config.ner_output_topic, value=response)
        producer.flush()
        log.error(
            "error in kafka opertation while listening to consumer on topic %s"
            % (config.ner_input_topic))
        log.info("response send to topic %s" % (config.ner_output_topic))
    try:
        log.info("trying to receive data from consumer")
        for msg in consumer:
            log.info("received data from consumer")
            data = msg.value
            task_id = str("NER-" + str(time.time()).replace('.', ''))
            task_starttime = str(time.time()).replace('.', '')
            checking_response = CheckingResponse(data, task_id, task_starttime,
                                                 DOWNLOAD_FOLDER)
            file_value_response = checking_response.main_response_wf()
            try:
                producer_ner = Producer(config.bootstrap_server)
                producer = producer_ner.producer_fn()
                producer.send(config.ner_output_topic,
                              value=file_value_response)
                producer.flush()
                log.info("producer flushed for topic %s" %
                         (config.ner_output_topic))
            except:
                log.info(
                    "error occured in file operation of workflow and it is pushed to error queue"
                )
    except Exception as e:
        log.error(
            "error occured during consumer running or flushing data to another queue %s"
            % e)
        for msg in consumer:
            log.info("value received from consumer")
            data = msg.value
            input_files, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format(
                data)
            task_id = str("NER-" + str(time.time()).replace('.', ''))
            task_starttime = str(time.time()).replace('.', '')
            response = CustomResponse(Status.ERR_Producer.value, jobid,
                                      task_id)
            file_ops.error_handler(response, True)
            log.info(
                "error in kafka opertation producer flushed value on error topic"
            )
Esempio n. 4
0
 def push_data_to_queue(self, topic_name, push_data, jobid, task_id):
     producer = self.producer_fn()
     try:
         producer.send(topic_name, value=push_data)
         producer.flush()
         log_info("push_data_to_queue",
                  "successfully pushed data to output queue", None)
     except:
         response_custom = CustomResponse(Status.ERR_STATUS.value, jobid,
                                          task_id)
         log_exception(
             "push_data_to queue",
             "Response can't be pushed to queue %s" % (topic_name), jobid,
             None)
         raise KafkaProducerError(
             response_custom.status_code,
             "data Not pushed to queue: %s" % (topic_name))
Esempio n. 5
0
 def workflow_response(self, task_id, task_starttime):
     input_files, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format(
         self.json_data)
     log_info("workflow_response", "started the response generation", jobid)
     error_validator = ValidationResponse(self.DOWNLOAD_FOLDER)
     try:
         error_validator.wf_keyerror(jobid, workflow_id, tool_name,
                                     step_order)
         error_validator.inputfile_list_error(input_files)
         output_file_response = list()
         for i, item in enumerate(input_files):
             input_filename, in_file_type, in_locale = file_ops.accessing_files(
                 item)
             output_json_data = DocumentStructure(jobid=jobid,
                                                  file_name=input_filename,
                                                  lang=in_locale)
             output_filename_json = file_ops.writing_json_file(
                 i, output_json_data, self.DOWNLOAD_FOLDER)
             file_res = file_ops.one_filename_response(
                 input_filename, output_filename_json, in_locale,
                 in_file_type)
             output_file_response.append(file_res)
         task_endtime = str(time.time()).replace('.', '')
         response_true = CustomResponse(Status.SUCCESS.value, jobid,
                                        task_id)
         response_success = response_true.success_response(
             workflow_id, task_starttime, task_endtime, tool_name,
             step_order, output_file_response)
         response = copy.deepcopy(response_success)
         log_info("workflow_response",
                  "successfully generated response for workflow", jobid)
         return response
     except WorkflowkeyError as e:
         response_custom = CustomResponse(Status.ERR_STATUS.value, jobid,
                                          task_id)
         response_custom.status_code['message'] = str(e)
         response = file_ops.error_handler(response_custom.status_code,
                                           "WORKFLOWKEY-ERROR", True)
         log_exception("workflow_response",
                       "workflow key error: key value missing", jobid, e)
         response = copy.deepcopy(response)
         return response
     except FileErrors as e:
         response_custom = CustomResponse(Status.ERR_STATUS.value, jobid,
                                          task_id)
         response_custom.status_code['message'] = e.message
         response = file_ops.error_handler(response_custom.status_code,
                                           e.code, True)
         log_exception("workflow_response",
                       "some error occured while validating file", jobid, e)
         response = copy.deepcopy(response)
         return response
     except ServiceError as e:
         response_custom = CustomResponse(Status.ERR_STATUS.value, jobid,
                                          task_id)
         response_custom.status_code['message'] = str(e)
         response = file_ops.error_handler(response_custom.status_code,
                                           "SERVICE_ERROR", True)
         log_exception(
             "workflow_response",
             "Something went wrong during pdf to block conversion.", jobid,
             e)
         response = copy.deepcopy(response)
         return response
Esempio n. 6
0
    def workflow_response(self, task_id, task_starttime, debug_flush=False):

        app_context.init()
        app_context.application_context = {}

        input_files, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format(self.json_data)
        log_info("workflow_response started the response generation", app_context.application_context)
        error_validator = ValidationResponse(self.DOWNLOAD_FOLDER)
        try:
            error_validator.wf_keyerror(jobid, workflow_id, tool_name, step_order)
            error_validator.inputfile_list_error(input_files)
            output_file_response = list()
            for i, item in enumerate(input_files):
                input_filename, in_file_type, in_locale     = file_ops.accessing_files(item)
                self.json_data['taskID']                   = task_id
                app_context.application_context             = self.json_data
                
                if debug_flush == False:
                    bm_response = DocumentStructure(app_context=app_context, file_name=input_filename, lang=in_locale)
                    if bm_response['code'] == 200:
                        
                        output_filename_json = file_ops.writing_json_file(i, bm_response['rsp'], self.DOWNLOAD_FOLDER)
                        file_res = file_ops.one_filename_response(input_filename, output_filename_json, in_locale, in_file_type)
                        output_file_response.append(file_res)
                        task_endtime = str(time.time()).replace('.', '')
                        response_true = CustomResponse(Status.SUCCESS.value, jobid, task_id)
                        response_success = response_true.success_response(workflow_id, task_starttime, task_endtime, tool_name, step_order, output_file_response)
                        response = copy.deepcopy(response_success)
                        log_info("successfully generated response for workflow", app_context.application_context)
                        
                        return response
                    else:
                        post_error_wf(bm_response.code, bm_response.message, app_context.application_context, None)
                        return None
                else:
                    log_info('flushing queue data, not handling file {}'.format(input_files), app_context.application_context)
                    post_error_wf(400, 'flushing queue data, not handling file {}'.format(input_files), app_context.application_context, None)
                    return None

            
        except WorkflowkeyError as e:
            response_custom = CustomResponse(Status.ERR_STATUS.value, jobid, task_id)
            response_custom.status_code['message'] = str(e)
            response = file_ops.error_handler(response_custom.status_code, "WORKFLOWKEY-ERROR", True)
            log_exception("workflow_response workflow key error: key value missing", app_context.application_context, e)
            response = copy.deepcopy(response)
            return response
        except FileErrors as e:
            response_custom = CustomResponse(Status.ERR_STATUS.value, jobid, task_id)
            response_custom.status_code['message'] = e.message
            response = file_ops.error_handler(response_custom.status_code, e.code, True)
            log_exception("workflow_response some error occured while validating file", app_context.application_context, e)
            response = copy.deepcopy(response)
            return response
        except ServiceError as e:
            response_custom = CustomResponse(Status.ERR_STATUS.value, jobid, task_id)
            response_custom.status_code['message'] = str(e)
            response = file_ops.error_handler(response_custom.status_code, "SERVICE_ERROR", True)
            log_exception("workflow_response Something went wrong during pdf to block conversion.", app_context.application_context, e)
            response = copy.deepcopy(response)
            return response
Esempio n. 7
0
    def workflow_response(self, task_id, task_starttime):

        app_context.init()
        app_context.application_context = self.json_data

        input_params, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format(
            self.json_data)
        log_info("workflow_response started the response generation",
                 app_context.application_context)
        error_validator = ValidationResponse(self.DOWNLOAD_FOLDER)

        try:
            error_validator.wf_keyerror(jobid, workflow_id, tool_name,
                                        step_order)

            # --------------------------

            result = process_incoming_request(app_context, input_params[0],
                                              jobid, workflow_id)

            # --------------------------

            task_endtime = eval(str(time.time()).replace('.', '')[0:13])
            response_true = CustomResponse(Status.SUCCESS.value, jobid,
                                           task_id)
            response_success = response_true.success_response(
                workflow_id, task_starttime, task_endtime, tool_name,
                step_order, result)
            log_info(
                "workflow_response : successfully generated response for workflow",
                app_context.application_context)
            return response_success

        except WorkflowkeyError as e:
            response_custom = CustomResponse(Status.ERR_STATUS.value, jobid,
                                             task_id)
            response_custom.status_code['message'] = str(e)
            response = file_ops.error_handler(response_custom.status_code,
                                              "WORKFLOWKEY-ERROR", True)
            log_exception(
                "workflow_response workflow key error: key value missing",
                app_context.application_context, e)
            response = copy.deepcopy(response)
            return response
        except FileErrors as e:
            response_custom = CustomResponse(Status.ERR_STATUS.value, jobid,
                                             task_id)
            response_custom.status_code['message'] = e.message
            response = file_ops.error_handler(response_custom.status_code,
                                              e.code, True)
            log_exception(
                "workflow_response some error occured while validating file",
                app_context.application_context, e)
            response = copy.deepcopy(response)
            return response
        except ServiceError as e:
            response_custom = CustomResponse(Status.ERR_STATUS.value, jobid,
                                             task_id)
            response_custom.status_code['message'] = str(e)
            response = file_ops.error_handler(response_custom.status_code,
                                              "SERVICE_ERROR", True)
            log_exception("workflow_response Something went wrong during ocr.",
                          app_context.application_context, e)
            response = copy.deepcopy(response)
            return response