def post(self): log_info("Resource Pdf2HtmlConversion", "pdf2html service started", None) json_data = request.get_json(force=True) try: error_validator = ValidationResponse(DOWNLOAD_FOLDER) if error_validator.format_error(json_data) is True: response_gen = Response(json_data, DOWNLOAD_FOLDER) response = response_gen.nonwf_response() log_info("Resource Pdf2HtmlConversion", "pdf2html api response completed", None) return jsonify(response) except FormatError as e: log_error("Resource Pdf2HtmlConversion", "Input json format is not correct or dict_key is missing", None, e) return Status.ERR_request_input_format.value
def post(self): log_info("Resource Pdf2HtmlConversionWF", "pdf2html service started", None) task_id = str("PDF2HTML-" + str(time.time()).replace('.', '')) task_starttime = str(time.time()).replace('.', '') json_data = request.get_json(force = True) try: error_validator = ValidationResponse(DOWNLOAD_FOLDER) if error_validator.format_error(json_data) is True: response_gen = Response(json_data, DOWNLOAD_FOLDER) response = response_gen.workflow_response(task_id, task_starttime) log_info("Resource Pdf2HtmlConversionWF", "pdf2html api response completed", None) return jsonify(response) except FormatError as e: log_error("Resource Pdf2HtmlConversionWF", "Input json format is not correct or dict_key is missing", None, e) return Status.ERR_request_input_format.value
def post(self): task_id = str("TOK-" + str(time.time()).replace('.', '')[0:13]) task_starttime = eval(str(time.time()).replace('.', '')[0:13]) json_data = request.get_json(force=True) log_info("Resource BlockTokenize : data from api request received", json_data) try: error_validator = ValidationResponse(DOWNLOAD_FOLDER) if error_validator.format_error(json_data) is True: response_gen = Response(json_data, DOWNLOAD_FOLDER) response = response_gen.workflow_response_block_tokeniser( task_id, task_starttime) log_info( "Resource BlockTokenize : Tokenisation api response completed", json_data) return jsonify(response) except FormatError as e: log_error( "Resource BlockTokenize : Input json format is not correct or dict_key is missing", json_data, e) return Status.ERR_request_input_format.value
def nonwf_response(self): log_info("non workflow response", "started the response generation", None) input_files = self.json_data['files'] error_validator = ValidationResponse(self.DOWNLOAD_FOLDER) pdf_html_service = Pdf2HtmlService(self.DOWNLOAD_FOLDER) try: error_validator.inputfile_list_empty(input_files) output_file_response = list() for item in input_files: input_filename, in_file_type, in_locale = file_ops.accessing_files( item) output_htmlfiles_path, output_pngfiles_path = pdf_html_service.pdf2html( self.DOWNLOAD_FOLDER, input_filepath) file_res = file_ops.one_filename_response( input_filename, output_htmlfiles_path, output_pngfiles_path, in_locale, in_file_type) output_file_response.append(file_res) response_true = Status.SUCCESS.value response_true['output'] = output_file_response log_info("non workflow_response", "successfully generated response for rest server", None) return response_true except FileErrors as e: response_custom = Status.ERR_STATUS.value response_custom['message'] = e.message response = file_ops.error_handler(response_custom, e.code, False) log_exception("non workflow_response", "some error occured while validating file", None, e) return response except ServiceError as e: response_custom = Status.ERR_STATUS.value response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "SERVICE_ERROR", False) log_exception( "non workflow_response", "Something went wrong during pdf to html conversion.", None, e) return response
def workflow_response_block_tokeniser(self, task_id, task_starttime): input_key, workflow_id, jobid, tool_name, step_order, user_id = file_ops.json_input_format(self.json_data) log_info("workflow_response : started the block tokenisation response generation", self.json_data) error_validator = ValidationResponse(self.DOWNLOAD_FOLDER) tokenisation = Tokenisation(self.DOWNLOAD_FOLDER, self.json_data) try: error_validator.wf_keyerror(jobid, workflow_id, tool_name, step_order) error_validator.inputfile_list_empty(input_key) blocks_list, record_id, model_id, in_locale = file_ops.get_input_values_for_block_tokenise(input_key) input_key = tokenisation.adding_tokenised_text_blockmerger(input_key, in_locale, 0) task_endtime = eval(str(time.time()).replace('.', '')[0:13]) response_true = CustomResponse(Status.SUCCESS.value, jobid, task_id) response_success = response_true.success_response(workflow_id, task_starttime, task_endtime, tool_name, step_order, input_key) response = copy.deepcopy(response_success) log_info("workflow_response : successfully generated response for workflow", self.json_data) return response except WorkflowkeyError as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "WORKFLOWKEY-ERROR", True) log_exception("workflow_response : workflow key error: key value missing", self.json_data, e) response = copy.deepcopy(response) return response except FileErrors as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = e.message response = file_ops.error_handler(response_custom, e.code, True) log_exception("workflow_response : some error occured while validating file", self.json_data, e) response = copy.deepcopy(response) return response except ServiceError as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "SERVICE_ERROR", True) log_exception("workflow_response : Error occured during tokenisation or file writing", self.json_data, e) response = copy.deepcopy(response) return response except Exception as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "SERVICE_ERROR", True) log_exception("workflow_response : Any random exception", self.json_data, e) response = copy.deepcopy(response) return response
def nonwf_response(self): log_info("non workflow response : started the response generation", None) error_validator = ValidationResponse(self.DOWNLOAD_FOLDER) tokenisation = Tokenisation(self.DOWNLOAD_FOLDER, self.json_data) try: if 'files' in self.json_data.keys(): input_files = self.json_data['files'] error_validator.inputfile_list_empty(input_files) output_file_response = list() for i, item in enumerate(input_files): input_filename, in_file_type, in_locale = file_ops.accessing_files(item) input_file_data = file_ops.read_txt_file(input_filename) error_validator.file_encoding_error(input_file_data) output_filename = tokenisation.tokenisation_response(input_file_data, in_locale, i) file_res = file_ops.one_filename_response(input_filename, output_filename, in_locale, in_file_type) output_file_response.append(file_res) else: input_paragraphs = self.json_data['text'] input_locale = self.json_data['locale'] tokenised_sentences = [tokenisation.tokenisation_core([input_paragraph], input_locale) for input_paragraph in input_paragraphs] output_list_text = [{"inputText" : x, "tokenisedSentences" : y} for x, y in zip(input_paragraphs, tokenised_sentences)] output_file_response = {'tokenisedText' : output_list_text, 'locale':input_locale} response_true = Status.SUCCESS.value response_true['output'] = output_file_response log_info("non workflow_response : successfully generated response for rest server", None) return response_true except FileErrors as e: response_custom = Status.ERR_STATUS.value response_custom['message'] = e.message response = file_ops.error_handler(response_custom, e.code, False) log_exception("non workflow_response : some error occured while validating file", None, e) return response except FileEncodingError as e: response_custom = Status.ERR_STATUS.value response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "ENCODING_ERROR", False) log_exception("non workflow_response : service supports only utf-16 encoded file", None, e) return response except ServiceError as e: response_custom = Status.ERR_STATUS.value response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "SERVICE_ERROR", False) log_exception("non workflow_response : Error occured during tokenisation or file writing", None, e) return response
def workflow_response(self, task_id, task_starttime): input_key, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format( self.json_data) log_info("workflow_response : started the response generation", self.json_data) error_validator = ValidationResponse(self.DOWNLOAD_FOLDER) tokenisation = Tokenisation(self.DOWNLOAD_FOLDER, self.json_data) try: error_validator.wf_keyerror(jobid, workflow_id, tool_name, step_order) error_validator.inputfile_list_empty(input_key) if 'files' in input_key.keys(): output_file_response = list() for i, item in enumerate(input_key['files']): input_filename, in_file_type, in_locale = file_ops.accessing_files( item) if in_file_type == "txt": input_file_data = file_ops.read_txt_file( input_filename) error_validator.file_encoding_error(input_file_data) output_filename = tokenisation.tokenisation_response( input_file_data, in_locale, i) elif in_file_type == "json": input_jsonfile_data = file_ops.read_json_file( input_filename) input_jsonfile_data['result'] = [ tokenisation.adding_tokenised_text_blockmerger( item, in_locale, page_id) for page_id, item in enumerate(input_jsonfile_data['result']) ] output_filename = tokenisation.writing_json_file_blockmerger( i, input_jsonfile_data) file_res = file_ops.one_filename_response( input_filename, output_filename, in_locale, in_file_type) output_file_response.append(file_res) else: input_paragraphs = input_key['text'] input_locale = input_key['locale'] tokenised_sentences = [ tokenisation.tokenisation_core([input_paragraph], input_locale) for input_paragraph in input_paragraphs ] output_list_text = [{ "inputText": x, "tokenisedSentences": y } for x, y in zip(input_paragraphs, tokenised_sentences)] output_file_response = { 'tokenisedText': output_list_text, 'locale': input_locale } task_endtime = str(time.time()).replace('.', '') response_true = CustomResponse(Status.SUCCESS.value, jobid, task_id) response_success = response_true.success_response( workflow_id, task_starttime, task_endtime, tool_name, step_order, output_file_response) response = copy.deepcopy(response_success) log_info( "workflow_response : successfully generated response for workflow", self.json_data) return response except WorkflowkeyError as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "WORKFLOWKEY-ERROR", True) log_exception( "workflow_response : workflow key error: key value missing", self.json_data, e) response = copy.deepcopy(response) return response except FileErrors as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = e.message response = file_ops.error_handler(response_custom, e.code, True) log_exception( "workflow_response : some error occured while validating file", self.json_data, e) response = copy.deepcopy(response) return response except FileEncodingError as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "ENCODING_ERROR", True) log_exception( "workflow_response : service supports only utf-16 encoded file", self.json_data, e) response = copy.deepcopy(response) return response except ServiceError as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "SERVICE_ERROR", True) log_exception( "workflow_response : Error occured during tokenisation or file writing", self.json_data, e) response = copy.deepcopy(response) return response
def workflow_response(self, task_id, task_starttime): input_key, workflow_id, jobid, tool_name, step_order, user_id = file_ops.json_input_format( self.json_data) log_info("workflow_response : started the response generation", self.json_data) error_validator = ValidationResponse(self.DOWNLOAD_FOLDER) tokenisation = Tokenisation(self.DOWNLOAD_FOLDER, self.json_data) try: error_validator.wf_keyerror( jobid, workflow_id, tool_name, step_order) # Validating Workflow key-values error_validator.inputfile_list_empty( input_key ) # Validating Input key for files input and only text input # input key is a dictionary data for files input, "files" as a key if not isinstance(input_key, list): if 'files' in input_key.keys(): output_file_response = list() for i, item in enumerate(input_key['files']): input_filename, in_file_type, in_locale = file_ops.accessing_files( item) if in_file_type == "txt": input_file_data = file_ops.read_txt_file( input_filename) error_validator.file_encoding_error( input_file_data) output_filename = tokenisation.tokenisation_response( input_file_data, in_locale, i) elif in_file_type == "json": input_jsonfile_data, file_write = file_ops.read_json_file( input_filename) # input_jsonfile_data['result'] = tokenisation.getting_incomplete_text_merging_blocks(input_jsonfile_data['result']) input_jsonfile_data['result'] = [ tokenisation.adding_tokenised_text_blockmerger( item, in_locale, page_id) for page_id, item in enumerate(input_jsonfile_data['result']) ] input_jsonfile_data[ 'result'] = tokenisation.getting_incomplete_text_merging_blocks( input_jsonfile_data['result']) input_jsonfile_data['file_locale'] = in_locale #tokenisation.sending_data_to_content_handler(jobid, user_id, input_jsonfile_data) json_data_write = json.dumps(input_jsonfile_data) file_write.seek(0) file_write.truncate() file_write.write(json_data_write) output_filename = input_filename file_res = file_ops.one_filename_response( input_filename, output_filename, in_locale, in_file_type) output_file_response.append(file_res) # input key is a list data of objects, object contain text and language code else: output_file_response = [] for paragraph in input_key: input_paragraphs = paragraph['text'] input_locale = paragraph['locale'] tokenised_sentences = [ tokenisation.tokenisation_core([input_paragraph], input_locale) for input_paragraph in input_paragraphs ] output_list_text = [{ "inputText": x, "tokenisedSentences": y } for x, y in zip(input_paragraphs, tokenised_sentences)] output_per_para = { 'tokenisedText': output_list_text, 'locale': input_locale } output_file_response.append(output_per_para) task_endtime = eval(str(time.time()).replace('.', '')[0:13]) response_true = CustomResponse(Status.SUCCESS.value, jobid, task_id) response_success = response_true.success_response( workflow_id, task_starttime, task_endtime, tool_name, step_order, output_file_response) response = copy.deepcopy(response_success) log_info( "workflow_response : successfully generated response for workflow", self.json_data) return response # exceptions for workflow key error except WorkflowkeyError as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "WORKFLOWKEY-ERROR", True) log_exception( "workflow_response : workflow key error: key value missing", self.json_data, e) response = copy.deepcopy(response) return response # exceptions for input key data validation except FileErrors as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = e.message response = file_ops.error_handler(response_custom, e.code, True) log_exception( "workflow_response : some error occured while validating file", self.json_data, e) response = copy.deepcopy(response) return response # checking filedata unicodes and null data except FileEncodingError as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "ENCODING_ERROR", True) log_exception( "workflow_response : service supports only utf-16 encoded file", self.json_data, e) response = copy.deepcopy(response) return response # exceptions for tokenisation core logic and file writing of tokenised output except ServiceError as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "SERVICE_ERROR", True) log_exception( "workflow_response : Error occured during tokenisation or file writing", self.json_data, e) response = copy.deepcopy(response) return response # any other exception i.e. not covered in above exceptions except Exception as e: response_custom = self.json_data response_custom['taskID'] = task_id response_custom['message'] = str(e) response = file_ops.error_handler(response_custom, "SERVICE_ERROR", True) log_exception("workflow_response : Any random exception", self.json_data, e) response = copy.deepcopy(response) return response
def workflow_response(self, task_id, task_starttime): input_files, workflow_id, jobid, tool_name, step_order = file_ops.json_input_format( self.json_data) log_info("workflow_response", "started the response generation", jobid) error_validator = ValidationResponse(self.DOWNLOAD_FOLDER) pdf_html_service = Pdf2HtmlService(self.DOWNLOAD_FOLDER) try: error_validator.wf_keyerror(jobid, workflow_id, tool_name, step_order) error_validator.inputfile_list_error(input_files) output_file_response = list() for item in input_files: input_filename, in_file_type, in_locale = file_ops.accessing_files( item) input_filepath = file_ops.input_path(input_filename) output_htmlfiles_path, output_pngfiles_path = pdf_html_service.pdf2html( input_filepath, jobid) file_res = file_ops.one_filename_response( input_filename, output_htmlfiles_path, output_pngfiles_path, in_locale, in_file_type) output_file_response.append(file_res) task_endtime = str(time.time()).replace('.', '') response_true = CustomResponse(Status.SUCCESS.value, jobid, task_id) response_success = response_true.success_response( workflow_id, task_starttime, task_endtime, tool_name, step_order, output_file_response) response = copy.deepcopy(response_success) log_info("workflow_response", "successfully generated response for workflow", jobid) return response except WorkflowkeyError as e: response_custom = CustomResponse(Status.ERR_STATUS.value, jobid, task_id) response_custom.status_code['message'] = str(e) response = file_ops.error_handler(response_custom.status_code, "WORKFLOWKEY-ERROR", True) log_exception("workflow_response", "workflow key error: key value missing", jobid, e) response = copy.deepcopy(response) return response except FileErrors as e: response_custom = CustomResponse(Status.ERR_STATUS.value, jobid, task_id) response_custom.status_code['message'] = e.message response = file_ops.error_handler(response_custom.status_code, e.code, True) log_exception("workflow_response", "some error occured while validating file", jobid, e) response = copy.deepcopy(response) return response except ServiceError as e: response_custom = CustomResponse(Status.ERR_STATUS.value, jobid, task_id) response_custom.status_code['message'] = str(e) response = file_ops.error_handler(response_custom.status_code, "SERVICE_ERROR", True) log_exception( "workflow_response", "Something went wrong during pdf to html conversion.", jobid, e) response = copy.deepcopy(response) return response