def workflow_response_block_tokeniser(self, task_id, task_starttime): input_key, workflow_id, jobid, tool_name, step_order, user_id = file_ops.json_input_format(self.json_data) log_info("workflow_response : started the block tokenisation response generation", self.json_data) error_validator = ValidationResponse(self.DOWNLOAD_FOLDER) tokenisation = Tokenisation(self.DOWNLOAD_FOLDER, self.json_data) try: error_validator.wf_keyerror(jobid, workflow_id, tool_name, step_order) error_validator.inputfile_list_empty(input_key) blocks_list, record_id, model_id, in_locale = file_ops.get_input_values_for_block_tokenise(input_key) input_key = tokenisation.adding_tokenised_text_blockmerger(input_key, in_locale, 0) task_endtime = eval(str(time.time()).replace('.', '')[0:13]) response_true = CustomResponse(Status.SUCCESS.value, jobid, task_id) response_success = response_true.success_response(workflow_id, task_starttime, task_endtime, tool_name, step_order, input_key) response = copy.deepcopy(response_success) log_info("workflow_response : successfully generated response for workflow", self.json_data) return response except WorkflowkeyError as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "WORKFLOWKEY-ERROR", True) log_exception("workflow_response : workflow key error: key value missing", self.json_data, e) response = copy.deepcopy(response) return response except FileErrors as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = e.message response = file_ops.error_handler(response_custom, e.code, True) log_exception("workflow_response : some error occured while validating file", self.json_data, e) response = copy.deepcopy(response) return response except ServiceError as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "SERVICE_ERROR", True) log_exception("workflow_response : Error occured during tokenisation or file writing", self.json_data, e) response = copy.deepcopy(response) return response except Exception as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "SERVICE_ERROR", True) log_exception("workflow_response : Any random exception", self.json_data, e) response = copy.deepcopy(response) return response
def workflow_response(self, task_id, task_starttime): input_key, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format( self.json_data) log_info("workflow_response : started the response generation", self.json_data) error_validator = ValidationResponse(self.DOWNLOAD_FOLDER) tokenisation = Tokenisation(self.DOWNLOAD_FOLDER, self.json_data) try: error_validator.wf_keyerror(jobid, workflow_id, tool_name, step_order) error_validator.inputfile_list_empty(input_key) if 'files' in input_key.keys(): output_file_response = list() for i, item in enumerate(input_key['files']): input_filename, in_file_type, in_locale = file_ops.accessing_files( item) if in_file_type == "txt": input_file_data = file_ops.read_txt_file( input_filename) error_validator.file_encoding_error(input_file_data) output_filename = tokenisation.tokenisation_response( input_file_data, in_locale, i) elif in_file_type == "json": input_jsonfile_data = file_ops.read_json_file( input_filename) input_jsonfile_data['result'] = [ tokenisation.adding_tokenised_text_blockmerger( item, in_locale, page_id) for page_id, item in enumerate(input_jsonfile_data['result']) ] output_filename = tokenisation.writing_json_file_blockmerger( i, input_jsonfile_data) file_res = file_ops.one_filename_response( input_filename, output_filename, in_locale, in_file_type) output_file_response.append(file_res) else: input_paragraphs = input_key['text'] input_locale = input_key['locale'] tokenised_sentences = [ tokenisation.tokenisation_core([input_paragraph], input_locale) for input_paragraph in input_paragraphs ] output_list_text = [{ "inputText": x, "tokenisedSentences": y } for x, y in zip(input_paragraphs, tokenised_sentences)] output_file_response = { 'tokenisedText': output_list_text, 'locale': input_locale } task_endtime = str(time.time()).replace('.', '') response_true = CustomResponse(Status.SUCCESS.value, jobid, task_id) response_success = response_true.success_response( workflow_id, task_starttime, task_endtime, tool_name, step_order, output_file_response) response = copy.deepcopy(response_success) log_info( "workflow_response : successfully generated response for workflow", self.json_data) return response except WorkflowkeyError as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "WORKFLOWKEY-ERROR", True) log_exception( "workflow_response : workflow key error: key value missing", self.json_data, e) response = copy.deepcopy(response) return response except FileErrors as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = e.message response = file_ops.error_handler(response_custom, e.code, True) log_exception( "workflow_response : some error occured while validating file", self.json_data, e) response = copy.deepcopy(response) return response except FileEncodingError as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "ENCODING_ERROR", True) log_exception( "workflow_response : service supports only utf-16 encoded file", self.json_data, e) response = copy.deepcopy(response) return response except ServiceError as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "SERVICE_ERROR", True) log_exception( "workflow_response : Error occured during tokenisation or file writing", self.json_data, e) response = copy.deepcopy(response) return response
def workflow_response(self, task_id, task_starttime): input_key, workflow_id, jobid, tool_name, step_order, user_id = file_ops.json_input_format( self.json_data) log_info("workflow_response : started the response generation", self.json_data) error_validator = ValidationResponse(self.DOWNLOAD_FOLDER) tokenisation = Tokenisation(self.DOWNLOAD_FOLDER, self.json_data) try: error_validator.wf_keyerror( jobid, workflow_id, tool_name, step_order) # Validating Workflow key-values error_validator.inputfile_list_empty( input_key ) # Validating Input key for files input and only text input # input key is a dictionary data for files input, "files" as a key if not isinstance(input_key, list): if 'files' in input_key.keys(): output_file_response = list() for i, item in enumerate(input_key['files']): input_filename, in_file_type, in_locale = file_ops.accessing_files( item) if in_file_type == "txt": input_file_data = file_ops.read_txt_file( input_filename) error_validator.file_encoding_error( input_file_data) output_filename = tokenisation.tokenisation_response( input_file_data, in_locale, i) elif in_file_type == "json": input_jsonfile_data, file_write = file_ops.read_json_file( input_filename) # input_jsonfile_data['result'] = tokenisation.getting_incomplete_text_merging_blocks(input_jsonfile_data['result']) input_jsonfile_data['result'] = [ tokenisation.adding_tokenised_text_blockmerger( item, in_locale, page_id) for page_id, item in enumerate(input_jsonfile_data['result']) ] input_jsonfile_data[ 'result'] = tokenisation.getting_incomplete_text_merging_blocks( input_jsonfile_data['result']) input_jsonfile_data['file_locale'] = in_locale #tokenisation.sending_data_to_content_handler(jobid, user_id, input_jsonfile_data) json_data_write = json.dumps(input_jsonfile_data) file_write.seek(0) file_write.truncate() file_write.write(json_data_write) output_filename = input_filename file_res = file_ops.one_filename_response( input_filename, output_filename, in_locale, in_file_type) output_file_response.append(file_res) # input key is a list data of objects, object contain text and language code else: output_file_response = [] for paragraph in input_key: input_paragraphs = paragraph['text'] input_locale = paragraph['locale'] tokenised_sentences = [ tokenisation.tokenisation_core([input_paragraph], input_locale) for input_paragraph in input_paragraphs ] output_list_text = [{ "inputText": x, "tokenisedSentences": y } for x, y in zip(input_paragraphs, tokenised_sentences)] output_per_para = { 'tokenisedText': output_list_text, 'locale': input_locale } output_file_response.append(output_per_para) task_endtime = eval(str(time.time()).replace('.', '')[0:13]) response_true = CustomResponse(Status.SUCCESS.value, jobid, task_id) response_success = response_true.success_response( workflow_id, task_starttime, task_endtime, tool_name, step_order, output_file_response) response = copy.deepcopy(response_success) log_info( "workflow_response : successfully generated response for workflow", self.json_data) return response # exceptions for workflow key error except WorkflowkeyError as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "WORKFLOWKEY-ERROR", True) log_exception( "workflow_response : workflow key error: key value missing", self.json_data, e) response = copy.deepcopy(response) return response # exceptions for input key data validation except FileErrors as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = e.message response = file_ops.error_handler(response_custom, e.code, True) log_exception( "workflow_response : some error occured while validating file", self.json_data, e) response = copy.deepcopy(response) return response # checking filedata unicodes and null data except FileEncodingError as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "ENCODING_ERROR", True) log_exception( "workflow_response : service supports only utf-16 encoded file", self.json_data, e) response = copy.deepcopy(response) return response # exceptions for tokenisation core logic and file writing of tokenised output except ServiceError as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "SERVICE_ERROR", True) log_exception( "workflow_response : Error occured during tokenisation or file writing", self.json_data, e) response = copy.deepcopy(response) return response # any other exception i.e. not covered in above exceptions except Exception as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "SERVICE_ERROR", True) log_exception("workflow_response : Any random exception", self.json_data, e) response = copy.deepcopy(response) return response
def workflow_response(self, task_id, task_starttime): input_files, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format( self.json_data) log_info("workflow_response : started the response generation", self.json_data) error_validator = ValidationResponse(self.DOWNLOAD_FOLDER) try: error_validator.wf_keyerror(jobid, workflow_id, tool_name, step_order) error_validator.inputfile_list_empty(input_files) output_file_response = list() for i, item in enumerate(input_files): upload_id = str(uuid4()) input_filename, in_file_type, in_locale = file_ops.accessing_files( item) filepath = os.path.join(config.download_folder, input_filename) log_info( "workflow_response : input filename received %s" % (input_filename), self.json_data) if input_filename.endswith('.pdf'): file_res = file_ops.one_filename_response( input_filename, input_filename, in_locale, 'pdf') output_file_response.append(file_res) else: result = convert_to(os.path.join(config.download_folder, 'pdf', upload_id), filepath, timeout=60) copyfile( result, os.path.join(config.download_folder, upload_id + '.pdf')) file_res = file_ops.one_filename_response( input_filename, upload_id + '.pdf', in_locale, 'pdf') output_file_response.append(file_res) task_endtime = eval(str(time.time()).replace('.', '')[0:13]) response_true = CustomResponse(Status.SUCCESS.value, jobid, task_id) response_success = response_true.success_response( workflow_id, task_starttime, task_endtime, tool_name, step_order, output_file_response) log_info( "workflow_response : successfully generated response for workflow", self.json_data) return response_success except LibreOfficeError as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom[ 'message'] = 'workflow_response : Error when converting file to PDF: LibreOfficeError' response = file_ops.error_handler(response_custom, "SERVICE_ERROR", True) log_exception( "workflow_response : Error when converting file to PDF", self.json_data, e) return response except TimeoutExpired as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom[ 'message'] = 'workflow_response : Timeout when converting file to PDF: TimeoutExpired' response = file_ops.error_handler(response_custom, "SERVICE_ERROR", True) log_exception( "workflow_response : Timeout when converting file to PDF", self.json_data, e) return response except WorkflowkeyError as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "WORKFLOWKEY-ERROR", True) log_exception( "workflow_response : workflow key error: key value missing", self.json_data, e) return response except FileErrors as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = e.message response = file_ops.error_handler(response_custom, e.code, True) log_exception( "workflow_response : some error occured while validating file", self.json_data, e) return response except FileEncodingError as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "ENCODING_ERROR", True) log_exception( "workflow_response : service supports only utf-16 encoded file", self.json_data, e) return response except ServiceError as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "SERVICE_ERROR", True) log_exception( "workflow_response : Error occured during file conversion or file writing", self.json_data, e) return response
def workflow_response(self, task_id, task_starttime): input_files, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format( self.json_data) log_info("workflow_response", "started the response generation", jobid) error_validator = ValidationResponse(self.DOWNLOAD_FOLDER) pdf_html_service = Pdf2HtmlService(self.DOWNLOAD_FOLDER) try: error_validator.wf_keyerror(jobid, workflow_id, tool_name, step_order) error_validator.inputfile_list_error(input_files) output_file_response = list() for item in input_files: input_filename, in_file_type, in_locale = file_ops.accessing_files( item) input_filepath = file_ops.input_path(input_filename) output_htmlfiles_path, output_pngfiles_path = pdf_html_service.pdf2html( input_filepath, jobid) file_res = file_ops.one_filename_response( input_filename, output_htmlfiles_path, output_pngfiles_path, in_locale, in_file_type) output_file_response.append(file_res) task_endtime = str(time.time()).replace('.', '') response_true = CustomResponse(Status.SUCCESS.value, jobid, task_id) response_success = response_true.success_response( workflow_id, task_starttime, task_endtime, tool_name, step_order, output_file_response) response = copy.deepcopy(response_success) log_info("workflow_response", "successfully generated response for workflow", jobid) return response except WorkflowkeyError as e: response_custom = CustomResponse(Status.ERR_STATUS.value, jobid, task_id) response_custom.status_code['message'] = str(e) response = file_ops.error_handler(response_custom.status_code, "WORKFLOWKEY-ERROR", True) log_exception("workflow_response", "workflow key error: key value missing", jobid, e) response = copy.deepcopy(response) return response except FileErrors as e: response_custom = CustomResponse(Status.ERR_STATUS.value, jobid, task_id) response_custom.status_code['message'] = e.message response = file_ops.error_handler(response_custom.status_code, e.code, True) log_exception("workflow_response", "some error occured while validating file", jobid, e) response = copy.deepcopy(response) return response except ServiceError as e: response_custom = CustomResponse(Status.ERR_STATUS.value, jobid, task_id) response_custom.status_code['message'] = str(e) response = file_ops.error_handler(response_custom.status_code, "SERVICE_ERROR", True) log_exception( "workflow_response", "Something went wrong during pdf to html conversion.", jobid, e) response = copy.deepcopy(response) return response