def get_transferable(content): """ Get transferable (40A and 40B). Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = False if '40A' in content.keys(): # value = 'TRANSFERABLE' in content['40A'].upper() value = re.search('T ?R ?A ?N ?S ?F ?E ?R ?A ?B ?L ?E', content['40A'].upper()) != None elif '40B' in content.keys(): # value = 'TRANSFERABLE' in content['40B'].upper() value = re.search('T ?R ?A ?N ?S ?F ?E ?R ?A ?B ?L ?E', content['40B'].upper()) != None else: cmLog('[W] 可否轉讓: Missing 40A or 40B (必要欄位)') # value = '[W] 可否轉讓: Missing 40A or 40B (必要欄位)' return value
def get_at_sight(content): """ Get at sight descrption (42C or 42P). Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = "" if '42C' in content.keys() or '42P' in content.keys(): if '42C' in content.keys(): value = content['42C'] else: value = content['42P'] temp = re.findall('at sight', value, re.IGNORECASE) if len(temp) > 0: value = 'AT SIGHT' else: cmLog('[W] 是否即期: Missing 42C') value = '[W] 是否即期: Missing 42C' return value
def get_revocable(content): """ Get revocable (40A and 40B). Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = False if '40A' in content.keys(): # value = not ('IRREVOCABLE' in content['40A'].upper()) value = re.search('I ?R ?R ?E ?V ?O ?C ?A ?B ?L ?E', content['40A'].upper()) == None elif '40B' in content.keys(): # value = not ('IRREVOCABLE' in content['40B'].upper()) value = re.search('I ?R ?R ?E ?V ?O ?C ?A ?B ?L ?E', content['40B'].upper()) == None else: cmLog('[W] 可否撤銷: Missing 40A or 40B (必要欄位)') # value = '[W] 可否撤銷: Missing 40A or 40B (必要欄位)' return value
def get_expiry_date(content): """ Get expiry date (31D). Warning when place is not in 台灣 Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = "" key = '31D' if key in content.keys(): result = content[key] ### Some cases contains spaces in between numbers, so remove it. result = result.replace(' ', '') datetime = re.findall('\d+', result) if len(datetime) > 0: value = datetime[0] place = result.replace(value, '') tmp = re.findall('taiwan', place, re.IGNORECASE) if len(tmp) == 0: cmLog('[W] 地點『{}』不是台灣!'.format(place)) place = '[W] 地點『{}』不是台灣!'.format(place) value = [value, place] else: cmLog('[W] 信用狀有效期限: Missing 31D (必要欄位)') value = ['[W] 信用狀有效期限: Missing 31D (必要欄位)'] return value
def get_transshipment(content): """ Get transshipment (43T). Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = "" if '43T' in content.keys(): if 'NOT ALLOWED' in content['43T'].upper() or 'FORBIDDEN' in content[ '43T'].upper() or 'PROHIBITED' in content['43T'].upper(): value = 'NOT ALLOWED' elif 'ALLOWED' in content['43T'].upper() or 'PERMITTED' in content[ '43T'].upper() or 'YES' in content['43T'].upper(): value = 'ALLOWED' # ALLOWED/PERMITTED 為 True # NOT ALLOWED/FORBIDDEN/PROHIBITED為 False else: cmLog("[W] 可否分批裝運: 無法辨識, 預設『Allow』") value = "[W] 可否分批裝運: 無法辨識, 預設『Allow』" else: cmLog("[W] 可否轉運: Missing 43T, 預設『Allow』") value = "[W] 可否轉運: Missing 43T, 預設『Allow』" return value
def get_UCP660(content): """ Get UCP660 (40E). Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = False if '40E' in content.keys(): result = content['40E'] if 'latest' in content['40E'].lower() or '600' in content['40E'].lower( ): value = True else: cmLog( '[W] 是否依國際商會2007修定之UCP600: {} is not the latest version (UCP 600)' .format(result)) # value = '[W] 是否依國際商會2007修定之UCP600: {} is not the latest version (UCP 600)'.format(result) else: cmLog('[W] 是否依國際商會2007修定之UCP600: Missing 40E') # value = '[W] 是否依國際商會2007修定之UCP600: Missing 40E' return value
def get_nominated_bank(content): """ Get nominated bank descrption (41A or 41D). Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ def find_bank(temp, key): result = temp[key] result = 'ANY BANK' in result if result: result = 'Any bank' else: result = temp[key] return result value = "" if '41A' in content.keys(): value = find_bank(content, '41A') elif '41D' in content.keys(): value = find_bank(content, '41D') else: cmLog('[W] 指定押匯銀行: Missing 41A and 41D (必要欄位)') value = '[W] 指定押匯銀行: Missing 41A and 41D (必要欄位)' return value
def get_nominated_agent(content): """ Get nominated agent. Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = "[W] 指定代理商: (此欄位不需要看?)" cmLog("[W] 指定代理商: (此欄位不需要看?)") return value
def get_latest_negociation(content): """ Get latest negociation (Not important, told no need to get this info) Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ cmLog('[W] 押匯期限: 不重要,應該與信用狀有效期限相同') value = '[W] 押匯期限: 不重要,應該與信用狀有效期限相同' return value
def get_quantity(content): """ Get quantity descrption (45A or 45B). Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = "" if '45A' in content.keys() or '45B' in content.keys( ) or '454' in content.keys(): if '45A' in content.keys(): temp = content['45A'] elif '454' in content.keys(): temp = content['45A'] else: temp = content['45B'] temp = temp.replace('OOMT', '00MT') ### value = None numbers_pat = '(\d+[\.,]?\d{0,3}?[\.,]?\d*)' ### find quantity by pattern reg = re.compile('QUANTITY: ?{} *\n?'.format(numbers_pat), re.IGNORECASE) result = reg.findall(temp) if len(result) > 0: value = result ### find quantity by unit pattern if value is None: units_patterns = ['mt[s]? ?\(?.*\)?', 'metric tons'] pattern = '{} *'.format(numbers_pat) for u in units_patterns: reg = re.compile(pattern + u, re.IGNORECASE) result = reg.findall(temp) if len(result) > 0: value = result if value is None: cmLog('[W] 貨品數量: Not found in 45A -> ' + temp) value = ['[W] 貨品數量: Not found in 45A -> ' + temp] else: cmLog('[W] 貨品數量: Missing 45A') value = ['[W] 貨品數量: Missing 45A'] return value
def requestOCR(credential, jpg_paths): ''' 功能 : 呼叫vision api 輸入 : 1. gcp金鑰 2. 信用狀JPEG圖檔 輸出 : vision api 分析結果 注意 : 確保每張圖都有不同的tmp_key,否則會覆蓋 ''' visionapi.set_vision_credential(credential) vision_results = {} for path in jpg_paths: vis_response, error = visionapi.annotateDocument(path) tmp_key = os.path.basename(path) vision_results[tmp_key] = vis_response if error is not None: cmLog('[E] Vision API Error: {}'.format(error)) # print('[E] Vision API Error: {}'.format(error)) return vision_results
def get_descrption(content, productnames): """ Get product descrption (45A or 45B). Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = "" if '45A' in content.keys() or '45B' in content.keys( ) or '454' in content.keys(): if '45A' in content.keys(): temp = content['45A'] elif '454' in content.keys(): temp = content['45A'] else: temp = content['45B'] temp = temp.replace('\n', ' ') found = False # value = None ### find name by pattern reg = re.compile('GOODS DESCRIPTION: ?(.*)\n', re.IGNORECASE) result = reg.findall(temp) if len(result) > 0: found = True value = result[0] ### find name by vlookup if not found: for item in productnames: if item in temp: value = item break if value == "": cmLog('[W] 貨品名稱: Not found in 45A ->' + temp) else: cmLog('[W] 貨品名稱: Missing 45A') value = '[W] 貨品名稱: Missing 45A' return value
def get_name_correctness(content): """ Get name correctness (59). Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = "" if '59' in content.keys(): value = 'FORMOSA PLASTICS CORPORATION' in content['59'].replace( '\n', ' ').upper() else: cmLog('[W] 受益人名稱: Missing 59 (必要欄位)') value = False return value
def get_mgmt_mark_up(content): """ Get mgmt mark up (42P). Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = "" if '42P' in content.keys(): cmLog('[W] 溢價處理: {} (目前沒看到過,有待商確)'.format(content['42P'])) value = '[W] 溢價處理: {} (目前沒看到過,有待商確)'.format(content['42P']) else: cmLog('[W] 溢價處理: Missing 42P (目前沒看到過,有待商確)') value = '[W] 溢價處理: Missing 42P (目前沒看到過,有待商確)' return value
def get_interest(content): """ Get interest descrption (39C). Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = "" if '39C' in content.keys(): value = content['39C'] cmLog('[W] 利息負擔: 39C -> {}'.format(value)) value = '[W] 利息負擔: 39C -> {}'.format(value) else: cmLog('[W] 利息負擔: Missing 39C 預設『客戶負擔』') value = '[W] 利息負擔: Missing 39C 預設『客戶負擔』' return value
def get_amount(content): """ Get amount descrption (45A or 45B). Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = "" if '32B' in content.keys(): value = content['32B'].replace('\n', '') if len(re.findall('u ?s ?d', value, re.IGNORECASE)) == 0: cmLog('[W] 信用狀金額: {} is not in USD'.format(value)) value = '[W] 信用狀金額: {} is not in USD'.format(value) else: cmLog('[W] 信用狀金額: Missing 32B (必要欄位)') value = '[W] 信用狀金額: Missing 32B (必要欄位)' return value
def get_confirmed(content): """ Get confirmed (49). Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = "" if '49' in content.keys(): value = content['49'] if 'WITHOUT' in value.upper(): value = 'WITHOUT' else: cmLog('[W] 是否保兌: Missing 49 (必要欄位)') value = '[W] 是否保兌: Missing 49 (必要欄位)' return value
def get_latest_shipment(content): """ Get latest shipment (44C and 44D) Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = "" key = '44C' if key in content.keys(): result = re.findall('\d+', content[key]) if len(result) > 0: value = result[0] else: cmLog('[W] 最後裝船期限: Missing 44C') value = '[W] 最後裝船期限: Missing 44C' return value
def get_nominated_loading_port(content): """ Get nominated loading port (44E). Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = "" if "44E" in content.keys(): value = content['44E'] if not ('ANY' in value): cmLog('[W] 出口港 : \'{}\' does not match expectation'.format(value)) value = '[W] 出口港 : \'{}\' does not match expectation'.format(value) else: cmLog('[W] 出口港: Missing 44E') value = "[W] 出口港: Missing 44E" return value
def retrieveVisionResponse(credential, jpg_paths, result_root=None): ''' 功能 : requestOCR的前導程式,用來寫log檔、回傳錯誤訊息 輸入 : 1. gcp金鑰 2. 信用狀JPEG圖檔 輸出 : vision api 分析結果 ''' assert isinstance(jpg_paths, list), '[E] "jpg_paths" must be instance of list' vision_results = None response_path = None if result_root is not None: assert isinstance( result_root, str), '[E] "response_path" must be instance of string' response_file = 'vision_result.json' response_path = os.path.join(result_root, response_file) cmLog('[I] Reading existed ocr response from {} ...'.format( response_path)) if os.path.exists(response_path): vision_results = utils.loadFileIfExisted(response_path) if vision_results is None: cmLog('[I] Sending ocr request to Google Vision API...') vision_results = requestOCR(credential, jpg_paths) cmLog('[I] Saving Google Vision API ocr response to {} ...'.format( response_path)) if response_path is not None: with open(response_path, 'w') as outfile: json.dump(vision_results, outfile, ensure_ascii=False, indent=2, sort_keys=True) return vision_results
def get_insurance(content): """ Get insurance descrption (45A or 45B). Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = get_terms(content) if '[W]' in value: cmLog('[W] 保險指示: 交易條件有誤 無法判斷') value = '[W] 保險指示: 交易條件有誤 無法判斷' else: need_isr = ['CIP', 'DAT', 'DAP', 'DDP', 'CIF'] noneed_isr = ['EXW', 'FCA', 'CPT', 'FAS', 'FOB', 'CFR'] if value in noneed_isr: value = False elif value in need_isr: cmLog('[W] 保險指示: {} 需要再從 Document Req 中確認一致性'.format(value)) value = '[W] 保險指示: {} 需要再從 Document Req 中確認一致性'.format(value) else: cmLog('[W] 保險指示: 交易條件 {} 有誤:Unknown error'.format(value)) value = '[W] 保險指示: 交易條件 {} 有誤:Unknown error'.format(value) return value
def get_presented_in_7_days(content): """ Get presented in 7 days (48). Default 21 when not specified Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = "" key = '48' if key in content.keys(): result = re.findall('\d+', content[key]) if len(result) > 0: value = result[0] # value = int(value) <= 7 ### 應要求只要有值就顯示,不論是否在七天內 else: value = 21 ### 應要求 預設 21 天 cmLog('[W] 應要求 預設 21 天') return value
def get_other_docs(content): """ Get other doc (47A). Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = "" if '47A' in content.keys(): temp = content['47A'] add_docs = {} add_docs['original_47A'] = temp value = add_docs else: value = {'original_47A': "[W] 提單: Missing 47A"} cmLog("[W] 提單: Missing 47A") return value
def get_destination(content): """ Get destination (44B and 44F). Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = None if "44B" in content.keys(): value = content['44B'] if '44F' in content.keys(): if value is None: value = content['44F'] else: value += content['44F'] if value is None: cmLog('[W] 目的港: Missing 44B and 44F (預設空值)') value = "[W] 目的港: Missing 44B and 44F (預設空值)" return value
def get_beneficiary_name(content): """ Get beneficiary name (59). Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = "" if '59' in content.keys(): value = 'FORMOSA PLASTICS CORPORATION' in content['59'].replace( '\n', ' ').upper() if value: value = 'FORMOSA PLASTICS CORPORATION' else: value = content['59'] else: cmLog('[W] 受益人名稱: Missing 59 (必要欄位)') value = '[W] 受益人名稱: Missing 59 (必要欄位)' return value
def get_terms(content): """ Get terms descrption (45A or 45B). Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ incoterms = [ 'CIP', 'DAT', 'DAP', 'DDP', 'CIF', 'EXW', 'FCA', 'CPT', 'FAS', 'FOB', 'CFR' ] value = None if '45A' in content.keys() or '45B' in content.keys( ) or '454' in content.keys(): if '45A' in content.keys(): termtext = content['45A'] elif '454' in content.keys(): termtext = content['45A'] else: termtext = content['45B'] if 'INCOTERMS' in termtext: token = termtext.split('INCOTERMS')[1] for term in incoterms: if term in token: value = term break if value is None: # termtext = utils.removeInvalidChars(termtext) splitted = re.split('[^a-zA-Z]', termtext) splitted = [_ for _ in splitted if _ != ""] inetersects = set(incoterms).intersection(splitted) if len(inetersects) == 1: value = inetersects.pop() elif len(inetersects) == 0: cmLog( '[W] 交易條件: no term are found in 45A: {}'.format(termtext)) value = '[W] 交易條件: no term are found in 45A: {}'.format( termtext) else: cmLog( '[W] 交易條件: More than one term are found in 45A: {}'.format( termtext)) value = '[W] 交易條件: More than one term are found in 45A: {}'.format( termtext) else: cmLog('[W] 交易條件: Missing 45A') value = '[W] 交易條件: Missing 45A' return value
def get_movement(content): """ Get movement (45A). Parameters ---------- content: dict a dictionary of swift code infomation Returns ---------- value for this item """ value = "" if '45A' in content.keys(): temp = content['45A'] if re.findall('contain', temp, re.IGNORECASE): value = temp else: cmLog("[W] 裝船方式: unable to find movement info in 45A (此欄位不需要看?)") value = "[W] 裝船方式: unable to find movement info in 45A (此欄位不需要看?)" elif '45B' in content.keys(): temp = content['45B'] if re.findall('contain', temp, re.IGNORECASE): value = temp else: cmLog("[W] 裝船方式: unable to find movement info in 45A (此欄位不需要看?)") value = "[W] 裝船方式: unable to find movement info in 45A (此欄位不需要看?)" elif '454' in content.keys(): temp = content['454'] if re.findall('contain', temp, re.IGNORECASE): value = temp else: cmLog("[W] 裝船方式: unable to find movement info in 45A (此欄位不需要看?)") value = "[W] 裝船方式: unable to find movement info in 45A (此欄位不需要看?)" else: cmLog("[W] 裝船方式: Missing 45A (此欄位不需要看?)") value = "[W] 裝船方式: Missing 45A (此欄位不需要看?)" return value
def extractSwiftsInfo(self, config, swifts_config, detail=True): super().extractSwiftsInfo(config) body_config = config['main_body'] line_height = config['line_height'] swift_regex = config['swift_regex'] swifts = swifts_config['swift_codes'] ### Initialize swift code dictionary swifts_result = {} for item in swifts: swifts_result[item['code']] = { 'text': '', 'boundingbox': [[-1, -1, -1, -1]], 'page': [-1] } if 'code2' in item.keys(): swifts_result[item['code2']] = { 'text': '', 'boundingbox': [[-1, -1, -1, -1]], 'page': [-1] } p_index_list = [page['index'] for page in body_config] number_pages = self.visdoc.getNumberOfPages() last_found = None for p in range(0, number_pages): ### Get the boundingbox for specific page index target_box = None if p in p_index_list: tmp_idx = p_index_list.index(p) else: tmp_idx = p_index_list.index('n') target_box = body_config[tmp_idx]['boundingbox'] ### Get line list and boundingbox list objectList = self.visdoc.getObjectInBoundaryInPage( p, target_box, depth=visionapi.VisionObject.DEPTH.WORDS) ### Extract swift code infomation from line list tmp_result, last_found = self.reformatSwiftInfo( objectList, swifts_result.keys(), swift_regex, last_found, line_height=line_height) # print (tmp_result, last_found) ### Merge and clean up extracted infomation # print (tmp_result) for key, value in tmp_result.items(): texts = tmp_result[key]['text'].strip() + '\n' boxes = visionapi.VisionObject.fuseBoundingBox( tmp_result[key]['boundingbox']) try: boxes[0] = boxes[0] - 7 swifts_result[key]['text'] += texts if swifts_result[key]['boundingbox'] == [[ -1, -1, -1, -1 ]] and swifts_result[key]['page'] == [-1]: swifts_result[key]['boundingbox'] = [boxes] swifts_result[key]['page'] = [p] else: swifts_result[key]['boundingbox'].append(boxes) swifts_result[key]['page'].append(p) except KeyError as e: cmLog( '[W] Swift code {} is not in the config file. Content: {}' .format(key, texts)) # try: # swifts_result[key]['boundingbox'].append([-1,-1,-1,-1]) # swifts_result[key]['page'].append(-1) # except:pass if not detail: final_result = {} for key, value in swifts_result.items(): if len(value['text']) != 0: final_result[key] = value self.swifts_info = final_result else: self.swifts_info = swifts_result return swifts_result
def annotateCreditLetter(credential, division_code, jpg_path_list, result_root, bank_name=None): ''' 功能 : 信用狀分析流程的主幹,所有的sub function都是由這裡呼叫再將資料回傳 輸入 : 1.gcp金鑰 2.general.yaml 3.信用狀JPEG圖檔 4.輸出路徑 輸出 : 最終分析結果 注意 : 分析流程可以參考“台塑信用狀辨識流程圖” ''' # Validate all parameters general_path = os.path.join('./configs', 'general.yaml') validateAllParameters(credential, general_path, jpg_path_list, result_root) general = utils.loadFileIfExisted(general_path) ### # . Auto-identified bank name if bank_name is None: jpg_path_list_n = augm.augmentBatchImages([jpg_path_list[0]], bank_name) os.mkdir(result_root + '/tmp') vision_results = retrieveVisionResponse(credential, jpg_path_list_n, result_root + '/tmp') vision_doc = visionapi.VisionDocument.createWithVisionResponse( vision_results) clformatted = formatter.GeneralCLFormatter(vision_doc) bank_name = clformatted.identifyBankName( general) ## 參考general.yaml預設的銀行bounding box擷取銀行名稱 [os.remove(path) for path in jpg_path_list_n] shutil.rmtree(result_root + '/tmp') cmLog('[I] Auto-identified bank name: {}'.format(bank_name)) ### # . Preprocessing the image for enhancement cmLog('[I] Preprocessing image for enhencement ...') jpg_path_list = augm.augmentBatchImages(jpg_path_list, bank_name) ### # . Sending image to Google Vision API and save the response vision_results = retrieveVisionResponse(credential, jpg_path_list, result_root) ### # . Preparing and organizing vision api's result with config file cmLog('[I] Preparing and organizing ocr response ...') vision_doc = visionapi.VisionDocument.createWithVisionResponse( vision_results) ### # . Initialize a formatter final_result = {} clformatted = formatter.GeneralCLFormatter(vision_doc) ### # . General config must exist if general is None: cmLog('[C] Unable to find general config file at: {}'.format(general)) final_result = { 'error': '[E] Unable to find config file with bank: {} for document at: {}'. format(bank_name, jpg_path_list[0]) } else: if 'bank_titles' not in general: print('bank_titles') bank_list = [b['name'] for b in general['bank_titles']] if bank_name.lower() not in bank_list: bank_name = clformatted.identifyBankName(general) cmLog('[I] Auto-identified bank name: {}'.format(bank_name)) config_path = os.path.join('./configs', bank_name + '_config.yaml') config = utils.loadFileIfExisted(config_path) ### # . Bank config must exist if config is None: cmLog( '[C] Unable to find config file with bank: {} for document at: {}' .format(bank_name, jpg_path_list[0])) final_result = { 'error': '[E] Unable to find config file with bank: {} for document at: {}' .format(bank_name, jpg_path_list[0]) } else: cmLog( '[I] Extracting Header and Swift codes for bank {} ...'.format( bank_name)) clformatted.extractHeaderInfo( config) ## 呼叫reformatter.py分析header的內容 clformatted.extractSwiftsInfo( config, general) ## 呼叫reformatter.py分析Swift的內容 cmLog('[I] Evaluating letter of credit ...') evaluated = evaluator.CLEvaluator(clformatted) evaluated.evaluate_checklist(config, general) final_result = evaluated.dumpToDict() ### # adding prefix for C# application (C# cannot read key starting with _ or numeric value) newswift = {} for key, value in final_result['swifts'].items(): newswift['code_' + key] = final_result['swifts'][key] final_result['swifts'] = newswift ''' 針對回傳的結果做Rule Based的修飾 ''' ### # 0 replace O if 'O' in final_result['header']['lc_no']['text']: final_result['header']['lc_no']['text'] = '0'.join( final_result['header']['lc_no']['text'].split('O')) ### # if applicant is empty replace by code_50 if final_result['header']['applicant']['text'] == '': final_result['header']['applicant']['text'] = final_result[ 'swifts']['code_50']['text'] ### # specialized for mega bank if bank_name == 'mega': final_result['header']['advising_no_of_bank']['boundingbox'] = [ 1634, 470, 2520, 660 ] ### # if lc_no is empty replace by code_20 or code_21 ,only for mega if final_result['header']['lc_no']['text'] == '': if 'DOCUMENTARYCREDITN' in final_result['swifts']['code_20'][ 'text'].upper().replace(" ", ""): code_20_text = final_result['swifts']['code_20'][ 'text'].replace(':', '\n').split('\n') for i, _ in enumerate(code_20_text): if 'DOCUMENTARYCREDITN' in _.upper().replace(" ", ""): final_result['header']['lc_no']['text'] = code_20_text[ i + 1].strip() break elif 'DOCUMENTARYCREDITN' in final_result['swifts']['code_21'][ 'text'].upper().replace(" ", ""): code_20_text = final_result['swifts']['code_21'][ 'text'].replace(':', '\n').split('\n') for i, _ in enumerate(code_20_text): if 'DOCUMENTARY CREDIT N' in _.upper().replace(" ", ""): final_result['header']['lc_no']['text'] = code_20_text[ i + 1].strip() break ### # saving checklist.json if result_root is not None: result_path = os.path.join(result_root, 'checklist.json') cmLog('[I] Saving evaluation result in {} ...'.format(result_path)) with open(result_path, 'w') as outfile: json.dump(final_result, outfile, ensure_ascii=False, indent=2) return final_result
def get_shipping_docs(content, config): """ Evaluate session shipping doc (46A) Parameters ---------- config: str a dictionary load from general config yaml file. Returns ---------- a dictionary containing # of originals and copies of each document. """ res_req_docs = {} req_docs = config['req_docs'] req_items = req_docs['items'] paragraph_pat = req_docs['paragraph_patterns'] quantity_pat = req_docs['quantity_patterns'] warn_str = '' ### # If 46A or 46B not exists in the swift code then return with empty response if not ('46A' in content.keys() or '46B' in content.keys() or '464' in content.keys()): for item in req_items: tmp_key_name = item['name'] key_list = item['keys'] res_req_docs[tmp_key_name] = { 'original': 0, 'copies': 0, 'warn': warn_str, 'text': "[W] 提單: Missing key: 46A" } cmLog("[W] 提單: Missing key: 46A") else: if '46A' in content.keys(): temp = content['46A'] temp = reformatInParagraphs(temp, '46A', paragraph_pat) elif '464' in content.keys(): temp = content['46A'] temp = reformatInParagraphs(temp, '464', paragraph_pat) else: temp = content['46B'] temp = reformatInParagraphs(temp, '46B', paragraph_pat) splitted = re.split(r'\n', temp) duplicaed = splitted.copy() ### # Loop through all required document items and check if current line # contains the keyword for item in req_items: cur_key_name = item['name'] cur_key_list = item['keys'] contained = False candidate_line = '' warn_str = '' org_res, cop_res = 0, 0 ### # Loop through all lines to see if keyword exists in a line for idx, line in enumerate(splitted): ### # Loop through all possible keywords in current item for k in cur_key_list: contained = k.upper() in line.upper() if contained: candidate_line = line ### # candidate_line shall be in duplicated, this is # in case two different keywords occurs in the same line. if candidate_line in duplicaed: duplicaed.remove(candidate_line) break if contained: break if contained: ### # Preprocessing candidate line so it will have unified format for evaluation target_line = utils.text2number(candidate_line) target_line = replaceFullset(target_line, cur_key_name) target_line = replaceDuplicates(target_line) target_line = replaceSpecialCase(target_line) org_res, cop_res = detect_quantity_with_patterns( target_line, quantity_pat['original'], quantity_pat['copy'], quantity_pat['fold']) ### # if original and copies are both zero but keyword is catched, then it # is assume to have at least one original if org_res + cop_res <= 0: org_res = 1 cop_res = 0 warn_str = '[W] 無法辨識數量,預設為 1 正本' res_req_docs[cur_key_name] = { 'original': org_res, 'copies': cop_res, 'warn': warn_str, 'text': candidate_line } ### # store the remaining lines for reference # res_req_docs['remained'] = duplicaed return res_req_docs