def run_split_pdf(params): try: cos_everest_submission_bucket = params.get( "cos_everest_submission_bucket", None) if cos_everest_submission_bucket is None or "": raise Exception("Pass location of the bucket") mode = params.get("mode", None) if mode is None or "": raise Exception("Pass RUNTIME or TRAINING") status = params.get("status", None) if status is None or "": raise Exception("Pass status ") db_conn = db2utils.get_connection() print("db_conn: {}".format(db_conn)) if status: sql = f'''SELECT ID, USED_FOR FROM EVERESTSCHEMA.EVRE_LEARNING_EMAIL_MSGS where USED_FOR = '{mode}' and status = '{status}' order by ID ''' else: sql = f'''SELECT ID, USED_FOR FROM EVERESTSCHEMA.EVRE_LEARNING_EMAIL_MSGS where USED_FOR = '{mode}' order by ID ''' print("sql: {}".format(sql)) stmt = ibm_db.exec_immediate(db_conn, sql) result = ibm_db.fetch_assoc(stmt) result_dict = {} while result: id = str(result["ID"]) mode = result["USED_FOR"] param = { 'cos_everest_submission_bucket': cos_everest_submission_bucket, 'final_pdf_folder': 'final_pdf', 'submission_id': id, 'submissions_data_folder': 'submission_documents_data', 'mode': mode } split_pdf.main(param) # time.sleep(2) print(f'Split PDF for : {id, param}') result = ibm_db.fetch_assoc(stmt) result_dict = {} result_dict["status"] = "SUCCESS" except (ibm_db.conn_error, ibm_db.conn_errormsg, Exception) as err: logging.exception(err) result_dict = {} result_dict["status"] = "FAILURE" return {"result": "This flow should get executed"}
def run_extract_email_msg(params): try: cos_everest_submission_bucket = params.get( "cos_everest_submission_bucket", None) if cos_everest_submission_bucket is None or "": raise Exception("Pass location of the bucket") mode = params.get("mode", None) if mode is None or "": raise Exception("Pass RUNTIME or TRAINING") limit = params.get("limit", None) db_conn = db2utils.get_connection() print("db_conn: {}".format(db_conn)) if limit: sql = f'''SELECT ID, USED_FOR FROM EVERESTSCHEMA.EVRE_LEARNING_EMAIL_MSGS where USED_FOR = '{mode}' order by ID LIMIT {limit}''' else: sql = f'''SELECT ID, USED_FOR FROM EVERESTSCHEMA.EVRE_LEARNING_EMAIL_MSGS where USED_FOR = '{mode}' order by ID ''' print("sql: {}".format(sql)) stmt = ibm_db.exec_immediate(db_conn, sql) result = ibm_db.fetch_assoc(stmt) result_dict = {} while result: id = str(result["ID"]) mode = result["USED_FOR"] param = { 'cos_everest_submission_bucket': cos_everest_submission_bucket, 'submission_id': id, 'mode': mode } print(f'Extracting Email message for : {id, param}') extract_email_msgs.main(param) result = ibm_db.fetch_assoc(stmt) result_dict = {} result_dict["status"] = "SUCCESS" except (ibm_db.conn_error, ibm_db.conn_errormsg, Exception) as err: logging.exception(err) result_dict = {} result_dict["status"] = "FAILURE" return {"result": "This flow should get executed"}
def main(params): logging.info('Calling fn_split_pdf.') try: db_conn = db2utils.get_connection() print("db_conn: {}".format(db_conn)) sql = f'''SELECT ID, DOCUMENT_NAME, STATUS, TO_CHAR(FIRST_UPDATED,'YYYY-MM-DD HH.MI.SS') as FIRST_UPDATED, TO_CHAR(LAST_UPDATED,'YYYY-MM-DD HH.MI.SS') as LAST_UPDATED FROM EVERESTSCHEMA.EVRE_LEARNING_EMAIL_MSGS where USED_FOR = 'RUNTIME' order by ID ''' print("sql: {}".format(sql)) stmt = ibm_db.exec_immediate(db_conn, sql) result = ibm_db.fetch_assoc(stmt) result_list = [] while result: id = str(result["ID"]) result = ibm_db.fetch_assoc(stmt) result_list.append(result) json_result = {"result": result_list, "error": {}} print(f'json_result: {json_result}') result_dict = {} result_dict["result"] = result_list result_dict["status"] = "SUCCESS" return result_dict except (ibm_db.conn_error, ibm_db.conn_errormsg, Exception) as err: logging.exception(err) result_dict = {} result_dict["error"] = err result_dict["status"] = "FAILURE" return result_dict return {"result": "Flow should not reach here"}
def main(params): logging.info('Calling fn_extract_email_msgs.') try: cos_everest_submission_bucket = params.get( "cos_everest_submission_bucket", None) if cos_everest_submission_bucket is None or "": raise Exception("Pass location of the bucket") submission_id = params.get("submission_id", None) if submission_id is None or "": raise Exception("Pass submission_id ") mode = params.get("mode", None) if mode is None or "": raise Exception("Pass mode ") # Create a directory on the local drive object_storage_key_prefix = OBJECT_STORAGE_EMAIL_ATTACHMENTS_ROOT_FOLDER + "/" + mode db_conn = db2utils.get_connection() print("db_conn: {}".format(db_conn)) sql = f'''SELECT ID, DOCUMENT_NAME, ENCODED_ID, HEX(ENCODED_ID) as MSG_DOCUMENT_ID FROM EVERESTSCHEMA.EVRE_LEARNING_EMAIL_MSGS where ID={submission_id}''' stmt = ibm_db.exec_immediate(db_conn, sql) result = ibm_db.fetch_both(stmt) msg_object_storage_key = None msg_document_id = None msg_id = None if result: msg_id = result["ID"] msg_object_storage_key = result["DOCUMENT_NAME"] msg_encoded_id = result["ENCODED_ID"] msg_document_id = result["MSG_DOCUMENT_ID"] else: raise Exception("No email message document found") email_message_bytes = cosutils.get_item(cos_everest_submission_bucket, msg_object_storage_key) # extract attachments msg = extract_msg.Message(email_message_bytes, attachmentClass=EmailAttachmentClass) # save message body # self.__crlf = '\r\n' msg_from = msg.sender msg_to = msg.to msg_cc = msg.cc msg_subject = msg.subject msg_date = msg.date msg_body = msg.body msg_file_content = """ {msg_from} {msg_to} {msg_cc} {msg_subject} {msg_date} {msg_body} """.format(msg_from=msg_from, msg_to=msg_to, msg_cc=msg_cc, msg_subject=msg_subject, msg_date=msg_date, msg_body=msg_body) print(msg_file_content) object_storage_key = object_storage_key_prefix + "/" + str( msg_id) + "/" + (msg_document_id + "_message.txt") return_val = cosutils.save_file(cos_everest_submission_bucket, object_storage_key, msg_file_content) if return_val is "SUCCESS": print("File Uploaded to object storage successfully") # create entries in DB2 db_conn = db2utils.get_connection() print("db_conn: {}".format(db_conn)) sql = f'''SELECT ID FROM FINAL TABLE (INSERT INTO EVERESTSCHEMA.EVRE_LEARNING_EMAIL_ATTACHMENTS (EVRE_EMAIL_MSG_ID, DOCUMENT_NAME, DOCUMENT_TYPE, CLASSIFICATION_TYPE, STATUS, USED_FOR) VALUES ({msg_id}, '{object_storage_key}', '.txt', 'N/A', 'CONVERT_TO_PDF', '{mode}') ) ''' # print ("sql: {}".format(sql)) stmt = ibm_db.exec_immediate(db_conn, sql) result = ibm_db.fetch_both(stmt) attachment_id = None if result: attachment_id = result["ID"] print(f'attachment_id: {attachment_id}') else: raise Exception("File upload to object storage failed") attachments = msg.attachments count_attachments = len(attachments) print("count_attachments: {}", count_attachments) if count_attachments == 0: print('No Atatchments found for msg:: {}', msg_object_storage_key) else: attachment_dir = None for i in range(count_attachments): attachment = attachments[i] attachment_id = attachment.save( object_storage_bucket_name=cos_everest_submission_bucket, object_storage_key_prefix=object_storage_key_prefix, save_to_object_storage=True, msg_id=msg_id, msg_encoded_id=msg_encoded_id, msg_document_id=msg_document_id, mode=mode) sql = f'''SELECT ID, STATUS, TO_CHAR(FIRST_UPDATED,'YYYY-MM-DD HH.MI.SS') as FIRST_UPDATED, TO_CHAR(LAST_UPDATED,'YYYY-MM-DD HH.MI.SS') as LAST_UPDATED FROM FINAL TABLE (UPDATE EVERESTSCHEMA.EVRE_LEARNING_EMAIL_MSGS SET STATUS = 'CONVERT_TO_PDF' where ID = {msg_id}) ''' print("sql: {}".format(sql)) stmt = ibm_db.exec_immediate(db_conn, sql) result = ibm_db.fetch_assoc(stmt) result_list = [] if result: id = str(result["ID"]) result_list.append(result) result_dict = {} result_dict["result"] = result_list result_dict["status"] = "SUCCESS" return result_dict except (ibm_db.conn_error, ibm_db.conn_errormsg, Exception) as err: logging.exception(err) result_dict = {} result_dict["error"] = err result_dict["status"] = "FAILURE" return result_dict return {"result": "Flow should not reach here"}
def main(params): logging.info('Calling fn_split_pdf.') try: cos_everest_submission_bucket = params.get( "cos_everest_submission_bucket", None) if cos_everest_submission_bucket is None or "": raise Exception("Pass location of the bucket") final_pdf_folder = params.get("final_pdf_folder", None) if final_pdf_folder is None or "": raise Exception("Pass pdf folder to split files") submissions_data_folder = params.get("submissions_data_folder", None) if submissions_data_folder is None or "": raise Exception("Pass submissions_data_folder") submission_id = params.get("submission_id", None) if submission_id is None or "": raise Exception("Pass submission_id") mode = params.get("mode", None) if mode is None or "": raise Exception("Pass mode") object_storage_key = submissions_data_folder + "/" + \ mode + "/" + str(submission_id) + "/" # + "/" + final_pdf_folder extensions = ['pdf'] regex = r"^" + object_storage_key + ".*(?i)(pdf).*$" file_keys = cosutils.get_bucket_contents(cos_everest_submission_bucket, regex) print(file_keys) for key in file_keys: if key.lower().endswith(tuple(extensions)): file_name = os.path.basename(key) file_name_without_ext, file_extension = os.path.splitext( file_name) pdf_file_bytes = cosutils.get_item( cos_everest_submission_bucket, key) db_conn = db2utils.get_connection() print("db_conn: {}".format(db_conn)) sql = f'''SELECT ID FROM EVERESTSCHEMA.EVRE_LEARNING_EMAIL_ATTACHMENTS where EVRE_EMAIL_MSG_ID={submission_id} and DOCUMENT_NAME='{key}' ''' print("sql: {}".format(sql)) stmt = ibm_db.exec_immediate(db_conn, sql) result = ibm_db.fetch_both(stmt) pdf_id = -1 if result: pdf_id = result["ID"] # read pdf pdf = PdfFileReader(BytesIO(pdf_file_bytes)) num_of_pages = pdf.getNumPages() print("num_of_pages:: {} ", num_of_pages) for page in range(num_of_pages): pdf_writer = PdfFileWriter() pdf_writer.addPage(pdf.getPage(page)) split_pdf_dir = "final_pdf_split" output_filename_key = '{}{}/{}_page_{}.pdf'.format( object_storage_key, split_pdf_dir, file_name_without_ext, page + 1) tmp = BytesIO() pdf_writer.write(tmp) tmp.seek(0) output_page_bytes = tmp.read() # print("Bytes:: {} ", output_page_bytes) return_val = cosutils.save_file( cos_everest_submission_bucket, output_filename_key, output_page_bytes) if return_val is "SUCCESS": print( "File Uploaded to object storage successfully:: {} ", output_filename_key) db_conn = db2utils.get_connection() print("db_conn: {}".format(db_conn)) sql = f'''SELECT ID FROM FINAL TABLE (INSERT INTO EVERESTSCHEMA.EVRE_LEARNING_SPLIT_CONTENT (EVRE_EMAIL_MSG_ID, EVRE_LEARNING_EMAIL_ATTACHMENTS_ID, DOCUMENT_NAME, DOCUMENT_TYPE, CLASSIFICATION_TYPE, STATUS, USED_FOR, DESCRIPTION) VALUES ({submission_id}, {pdf_id}, '{output_filename_key}', '.pdf', 'N/A', 'N', 'RUNTIME', 'STANDARDIZE_TO_TXT') ) ''' print("sql: {}".format(sql)) stmt = ibm_db.exec_immediate(db_conn, sql) result = ibm_db.fetch_both(stmt) attachment_id = None if result: attachment_id = result["ID"] # end of for loop db_conn = db2utils.get_connection() sql = f'''SELECT ID, STATUS, TO_CHAR(FIRST_UPDATED,'YYYY-MM-DD HH.MI.SS') as FIRST_UPDATED, TO_CHAR(LAST_UPDATED,'YYYY-MM-DD HH.MI.SS') as LAST_UPDATED FROM FINAL TABLE (UPDATE EVERESTSCHEMA.EVRE_LEARNING_EMAIL_MSGS SET STATUS = 'STANDARDIZE_TO_TXT' where ID = {submission_id}) ''' print("sql: {}".format(sql)) stmt = ibm_db.exec_immediate(db_conn, sql) result = ibm_db.fetch_assoc(stmt) result_list = [] if result: result_list.append(result) result_dict = {} result_dict["result"] = result_list result_dict["status"] = "SUCCESS" return result_dict except (ibm_db.conn_error, ibm_db.conn_errormsg, Exception) as err: logging.exception(err) result_dict = {} result_dict["error"] = err result_dict["status"] = "FAILURE" return result_dict return {"result": "Flow should not reach here"}
def main(params): logging.info('Calling fn_wds_operations.') try: cos_everest_submission_bucket = params.get( "cos_everest_submission_bucket", SUBMISSION_BUCKET) if cos_everest_submission_bucket is None or "": raise Exception("Pass location of the bucket") cos_everest_submission_data_folder = params.get( "cos_everest_submission_data_folder", SUBMISSION_DATA_FOLDER) if cos_everest_submission_data_folder is None or "": raise Exception("Pass cos_everest_submission_data_folder") doc_type_to_process = params.get("doc_type_to_process", DOC_TYPE_TO_PROCESS) if doc_type_to_process is None or "": raise Exception("Pass doc_type_to_process") # initialize watson discovery utils wds = watsondiscoveryutils.inst() # with DB2DBConnection() as db_conn: db_conn = db2utils.get_connection() print("db_conn: {}".format(db_conn)) sql = f'''SELECT ID, DOCUMENT_TYPE, DOCUMENT_NAME FROM EVERESTSCHEMA.evre_learning_email_attachments where used_for='TRAINING' and DOCUMENT_TYPE IN {tuple(doc_type_to_process)} order by ID''' print("sql: {}".format(sql)) stmt = ibm_db.exec_immediate(db_conn, sql) result = ibm_db.fetch_both(stmt) msg_id = None msg_encoded_id = None msg_document_id = None while result: print("result::{}".format(result)) msg_id = result["ID"] doc_type = result["DOCUMENT_TYPE"] obj_storage_doc_path = result["DOCUMENT_NAME"] print(f'obj_storage_doc_path: {obj_storage_doc_path}') # https://blogs.msdn.microsoft.com/vsofficedeveloper/2008/05/08/office-2007-file-format-mime-types-for-http-content-streaming-2/ if doc_type.lower() == ".PDF".lower(): file_content_type = "application/pdf" elif doc_type.lower() == ".DOCX".lower(): file_content_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" elif doc_type.lower() == ".DOC".lower(): file_content_type = "application/msword" elif doc_type.lower() == ".XLSX".lower(): file_content_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" elif doc_type.lower() == ".XLS".lower() or doc_type.lower( ) == ".XL".lower(): file_content_type = "application/vnd.ms-excel" elif doc_type.lower() == ".XLSM".lower(): file_content_type = "application/vnd.ms-excel.sheet.macroEnabled.12" elif doc_type.lower() == ".XLTM".lower(): file_content_type = "application/vnd.ms-excel.template.macroEnabled.12" elif doc_type.lower() == ".XLTX".lower(): file_content_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.template" elif doc_type.lower() == ".XLSB".lower(): file_content_type = "application/vnd.ms-excel.sheet.binary.macroEnabled.12" elif doc_type.lower() == ".PPTX".lower(): file_content_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation" elif doc_type.lower() == ".HTML".lower(): file_content_type = "application/xhtml+xml" # task = asyncio.create_task(wds_upload_task(wds, cos_everest_submission_bucket, obj_storage_doc_path, doc_bytes, doc_type)) # await task wds_upload_task(wds, cos_everest_submission_bucket, obj_storage_doc_path, file_content_type) # iterate thru the resultset result = ibm_db.fetch_both(stmt) except (ibm_db.conn_error, ibm_db.conn_errormsg, Exception) as err: logging.exception(err) json_result = json.dumps(err) return {"result": "Success"}
def save(self, contentId=False, json=False, useFileName=False, raw=False, customPath=None, customFilename=None, object_storage_bucket_name=None, object_storage_key_prefix=None, save_to_object_storage=None, msg_id=None, msg_encoded_id=None, msg_document_id=None): # Check if the user has specified a custom filename filename = None if customFilename is not None and customFilename != '': filename = customFilename else: # If not... # Check if user wants to save the file under the Content-id if contentId: filename = self.__cid # If filename is None at this point, use long filename as first preference if filename is None: filename = self.__longFilename # Otherwise use the short filename if filename is None: filename = self.__shortFilename # Otherwise just make something up! if filename is None: filename = 'UnknownFilename ' + \ ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(5)) + '.bin' if customPath is not None and customPath != '': if customPath[-1] != '/' or customPath[-1] != '\\': customPath += '/' # if unique_id is not None: # filename = customPath + str(unique_id) + "_" + filename # else: filename = customPath + filename print("######filename::{}", filename) if self.__type == "data": """ Write to local system """ if not save_to_object_storage: with open(filename, 'wb') as f: f.write(self.__data) else: """ Write to Object Storage """ tmp_file_name, file_extension = os.path.splitext(filename) print("######msg_document_id::{}", msg_document_id) object_storage_key = object_storage_key_prefix + "/" + str( msg_id) + "/" + (msg_document_id + "_" + filename) print( "object_storage_bucket_name: {}: object_storage_key: {} ". format(object_storage_bucket_name, object_storage_key)) # Write attachments to the object storage return_val = cosutils.save_file(object_storage_bucket_name, object_storage_key, self.__data) if return_val is "SUCCESS": print("File Uploaded to object storage successfully") # create entries in DB2 db_conn = db2utils.get_connection() print("db_conn: {}".format(db_conn)) sql = f'''SELECT ID FROM FINAL TABLE (INSERT INTO EVERESTSCHEMA.EVRE_LEARNING_EMAIL_ATTACHMENTS (EVRE_EMAIL_MSG_ID, DOCUMENT_NAME, DOCUMENT_TYPE, CLASSIFICATION_TYPE, STATUS) VALUES ({msg_id}, '{object_storage_key}', '{file_extension}', 'N/A', 'N') ) ''' # print ("sql: {}".format(sql)) stmt = ibm_db.exec_immediate(db_conn, sql) result = ibm_db.fetch_both(stmt) attachment_id = None if result: attachment_id = result["ID"] print(f'attachment_id: {attachment_id}') else: self.saveEmbededMessage(contentId, json, useFileName, raw, customPath, customFilename) return filename
def main(params): logging.info('Calling fn_document_conversion_pdf.') try: cos_everest_submission_bucket = params.get( "cos_everest_submission_bucket", None) if cos_everest_submission_bucket is None or "": raise Exception("Pass location of the bucket") submission_id = params.get("submission_id", None) if submission_id is None or "": raise Exception("Pass submission_id") submissions_data_folder = params.get("submissions_data_folder", None) if submissions_data_folder is None or "": raise Exception("Pass submissions_data_folder") mode = params.get("mode", None) if mode is None or "": raise Exception("Pass mode") object_storage_key = submissions_data_folder + "/" + mode + "/" + str( submission_id) + "/" regex = r"^" + object_storage_key + ".*$" file_keys = cosutils.get_bucket_contents(cos_everest_submission_bucket, regex) extensions = ['.docx', '.doc', 'pptx'] for key in file_keys: if key.lower().endswith(tuple(extensions)): file_name = os.path.basename(key) file_name_without_ext, file_extension = os.path.splitext( file_name) url = OBJECT_STORAGE_PUBLIC_URL + "/" + object_storage_key + quote( file_name) PARAMS = { "apikey": convertio_api_key, "input": "url", "file": url, "outputformat": "pdf" } print(url) # sending get request and saving the response as response object r = requests.post(url=CONVERT_IO_URL, data=json.dumps(PARAMS), stream=True) return_val = json.loads(r.text) print("1......return_val::{}", return_val) status = return_val["status"] code = return_val["code"] if code == 200 and status == "ok": id = return_val["data"]["id"] print("converted document id::", id) check_status_url = CONVERT_IO_URL + "/" + id + "/status" print("check_status_url::", check_status_url) while True: r = requests.get(url=check_status_url) return_val = json.loads(r.text) print("2.......status::return_val::", return_val) status = return_val["status"] code = return_val["code"] if code == 200 and status == "ok": step = return_val["data"]["step"] step_percent = return_val["data"]["step_percent"] if step == "finish" and step_percent == 100: id = return_val["data"]["id"] print( "Get content, store in object storage and update db2 and exist" ) # get content get_result_url = CONVERT_IO_URL + "/" + id + "/dl/base64" print("status::get_result_url::", get_result_url) r = requests.get(url=get_result_url) return_val = json.loads(r.text) status = return_val["status"] code = return_val["code"] if code == 200 and status == "ok": content = return_val["data"]["content"] pdf_object_storage_key = object_storage_key + "final_pdf" + "/" + file_name_without_ext + ".pdf" print( "cos_everest_submission_bucket: {}: pdf_object_storage_key: {} " .format(cos_everest_submission_bucket, pdf_object_storage_key)) # Write attachments to the object storage return_val = cosutils.save_file( cos_everest_submission_bucket, pdf_object_storage_key, base64.b64decode(content)) db_conn = db2utils.get_connection() print("db_conn: {}".format(db_conn)) sql = f'''SELECT ID FROM FINAL TABLE (INSERT INTO EVERESTSCHEMA.EVRE_LEARNING_EMAIL_ATTACHMENTS (EVRE_EMAIL_MSG_ID, DOCUMENT_NAME, DOCUMENT_TYPE, CLASSIFICATION_TYPE, STATUS, USED_FOR, DESCRIPTION) VALUES ({submission_id}, '{pdf_object_storage_key}', '.pdf', 'N/A', 'N', 'RUNTIME', 'SPLIT_PDF') ) ''' print("sql: {}".format(sql)) stmt = ibm_db.exec_immediate(db_conn, sql) result = ibm_db.fetch_both(stmt) attachment_id = None if result: attachment_id = result["ID"] break else: time.sleep(2) # End of for loop for PDF conversion db_conn = db2utils.get_connection() sql = f'''SELECT ID, STATUS, TO_CHAR(FIRST_UPDATED,'YYYY-MM-DD HH.MI.SS') as FIRST_UPDATED, TO_CHAR(LAST_UPDATED,'YYYY-MM-DD HH.MI.SS') as LAST_UPDATED FROM FINAL TABLE (UPDATE EVERESTSCHEMA.EVRE_LEARNING_EMAIL_MSGS SET STATUS = 'SPLIT_PDF' where ID = {submission_id}) ''' print("sql: {}".format(sql)) stmt = ibm_db.exec_immediate(db_conn, sql) result = ibm_db.fetch_assoc(stmt) result_list = [] if result: result_list.append(result) result_dict = {} result_dict["result"] = result_list result_dict["status"] = "SUCCESS" return result_dict except (ibm_db.conn_error, ibm_db.conn_errormsg, Exception) as err: logging.exception(err) result_dict = {} result_dict["error"] = err result_dict["status"] = "FAILURE" return result_dict return {"result": "Flow should not reach here"}
def main(params): logging.info('Calling fn_get_submission_results') try: cos_everest_submission_bucket = params.get( "cos_everest_submission_bucket", None) if cos_everest_submission_bucket is None or "": raise Exception("Pass location of the bucket") submission_id = params.get("submission_id", None) if submission_id is None or "": raise Exception("Pass submission_id") submissions_data_folder = params.get("submissions_data_folder", None) if submissions_data_folder is None or "": raise Exception("Pass submissions_data_folder") standardized_txt_dir = params.get("standardized_txt_dir", None) if standardized_txt_dir is None or "": raise Exception("Pass standardized_txt_dir") mode = params.get("mode", None) if mode is None or "": raise Exception("Pass mode") model_id = params.get("model_id", None) if model_id is None or "": raise Exception("Pass model_id") object_storage_key = submissions_data_folder + "/" + \ mode + "/" + str(submission_id) output_object_storage_key = submissions_data_folder + "/" + \ mode + "/" + str(submission_id) + "/" + \ "nlu_results" + "/" + "output.json" # + "/" + standardized_txt_dir extensions = ['txt'] regex = r"^" + object_storage_key + ".*txt$" file_keys = cosutils.get_bucket_contents(cos_everest_submission_bucket, regex) nlu_service = watson_nlu_utils.inst() results_dict = {} nlu_results_list = [] for key in file_keys: print("Processing file:: {}", file_keys) if key.endswith(tuple(extensions)): txt_file_bytes = cosutils.get_item( cos_everest_submission_bucket, key) text = txt_file_bytes.decode("utf-8") print("text:: ", len(text.strip())) nlu_results = None if text is not None and len(text.strip()) != 0: nlu_results = watson_nlu_utils.get_result( nlu_service, model_id, text) nlu_results_list.append(nlu_results) # get Final cleaned results nlu_results_dict = {} nlu_response = get_clean_results(nlu_results_list) nlu_results_dict["result"] = nlu_response res_bytes = str(nlu_results_dict).encode('utf-8') print("res_bytes::", res_bytes) # store in object storage return_val = cosutils.save_file(cos_everest_submission_bucket, output_object_storage_key, res_bytes) if return_val is "SUCCESS": print("File Uploaded to object storage successfully:: {} ", output_object_storage_key) validation_status = get_validation_status(nlu_response) print("validation_status", validation_status["status"]) db_conn = db2utils.get_connection() sql = f'''SELECT ID, STATUS, TO_CHAR(FIRST_UPDATED,'YYYY-MM-DD HH.MI.SS') as FIRST_UPDATED, TO_CHAR(LAST_UPDATED,'YYYY-MM-DD HH.MI.SS') as LAST_UPDATED FROM FINAL TABLE (UPDATE EVERESTSCHEMA.EVRE_LEARNING_EMAIL_MSGS SET STATUS = '{validation_status["status"]}' where ID = {submission_id}) ''' print("sql: {}".format(sql)) stmt = ibm_db.exec_immediate(db_conn, sql) result = ibm_db.fetch_assoc(stmt) result_list = [] if result: result_list.append(result) result_dict = {} result_dict["result"] = result_list result_dict["status"] = "SUCCESS" print(result_dict) return result_dict except (ibm_db.conn_error, ibm_db.conn_errormsg, Exception) as err: logging.exception(err) result_dict = {} result_dict["error"] = err result_dict["status"] = "FAILURE" return result_dict return {"result": "Flow should not reach here"}