예제 #1
0
def get_transferable(content):
    """
    Get transferable (40A and 40B). 
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = False
    if '40A' in content.keys():
        # value = 'TRANSFERABLE' in content['40A'].upper()
        value = re.search('T ?R ?A ?N ?S ?F ?E ?R ?A ?B ?L ?E',
                          content['40A'].upper()) != None
    elif '40B' in content.keys():
        # value = 'TRANSFERABLE' in content['40B'].upper()
        value = re.search('T ?R ?A ?N ?S ?F ?E ?R ?A ?B ?L ?E',
                          content['40B'].upper()) != None
    else:
        cmLog('[W] 可否轉讓: Missing 40A or 40B (必要欄位)')
        # value = '[W] 可否轉讓: Missing 40A or 40B (必要欄位)'

    return value
예제 #2
0
def get_at_sight(content):
    """
    Get at sight descrption (42C or 42P). 
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = ""
    if '42C' in content.keys() or '42P' in content.keys():
        if '42C' in content.keys():
            value = content['42C']
        else:
            value = content['42P']

        temp = re.findall('at sight', value, re.IGNORECASE)
        if len(temp) > 0:
            value = 'AT SIGHT'
    else:
        cmLog('[W] 是否即期: Missing 42C')
        value = '[W] 是否即期: Missing 42C'
    return value
예제 #3
0
def get_revocable(content):
    """
    Get revocable (40A and 40B). 
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = False
    if '40A' in content.keys():
        # value = not ('IRREVOCABLE' in content['40A'].upper())
        value = re.search('I ?R ?R ?E ?V ?O ?C ?A ?B ?L ?E',
                          content['40A'].upper()) == None
    elif '40B' in content.keys():
        # value = not ('IRREVOCABLE' in content['40B'].upper())
        value = re.search('I ?R ?R ?E ?V ?O ?C ?A ?B ?L ?E',
                          content['40B'].upper()) == None
    else:
        cmLog('[W] 可否撤銷: Missing 40A or 40B (必要欄位)')
    #     value = '[W] 可否撤銷: Missing 40A or 40B (必要欄位)'

    return value
예제 #4
0
def get_expiry_date(content):
    """
    Get expiry date (31D). Warning when place is not in 台灣
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = ""
    key = '31D'
    if key in content.keys():
        result = content[key]
        ### Some cases contains spaces in between numbers, so remove it.
        result = result.replace(' ', '')
        datetime = re.findall('\d+', result)
        if len(datetime) > 0:
            value = datetime[0]
        place = result.replace(value, '')
        tmp = re.findall('taiwan', place, re.IGNORECASE)
        if len(tmp) == 0:
            cmLog('[W] 地點『{}』不是台灣!'.format(place))
            place = '[W] 地點『{}』不是台灣!'.format(place)
        value = [value, place]
    else:
        cmLog('[W] 信用狀有效期限: Missing 31D (必要欄位)')
        value = ['[W] 信用狀有效期限: Missing 31D (必要欄位)']
    return value
예제 #5
0
def get_transshipment(content):
    """
    Get transshipment (43T). 
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = ""
    if '43T' in content.keys():
        if 'NOT ALLOWED' in content['43T'].upper() or 'FORBIDDEN' in content[
                '43T'].upper() or 'PROHIBITED' in content['43T'].upper():
            value = 'NOT ALLOWED'
        elif 'ALLOWED' in content['43T'].upper() or 'PERMITTED' in content[
                '43T'].upper() or 'YES' in content['43T'].upper():
            value = 'ALLOWED'
        # ALLOWED/PERMITTED 為 True
        # NOT ALLOWED/FORBIDDEN/PROHIBITED為 False
        else:
            cmLog("[W] 可否分批裝運: 無法辨識, 預設『Allow』")
            value = "[W] 可否分批裝運: 無法辨識, 預設『Allow』"
    else:
        cmLog("[W] 可否轉運: Missing 43T, 預設『Allow』")
        value = "[W] 可否轉運: Missing 43T, 預設『Allow』"
    return value
예제 #6
0
def get_UCP660(content):
    """
    Get UCP660 (40E). 
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = False
    if '40E' in content.keys():
        result = content['40E']
        if 'latest' in content['40E'].lower() or '600' in content['40E'].lower(
        ):
            value = True
        else:
            cmLog(
                '[W] 是否依國際商會2007修定之UCP600: {} is not the latest version (UCP 600)'
                .format(result))
            # value = '[W] 是否依國際商會2007修定之UCP600: {} is not the latest version (UCP 600)'.format(result)
    else:
        cmLog('[W] 是否依國際商會2007修定之UCP600: Missing 40E')
        # value = '[W] 是否依國際商會2007修定之UCP600: Missing 40E'

    return value
예제 #7
0
def get_nominated_bank(content):
    """
    Get nominated bank descrption (41A or 41D). 
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    def find_bank(temp, key):
        result = temp[key]
        result = 'ANY BANK' in result
        if result:
            result = 'Any bank'
        else:
            result = temp[key]
        return result

    value = ""
    if '41A' in content.keys():
        value = find_bank(content, '41A')
    elif '41D' in content.keys():
        value = find_bank(content, '41D')
    else:
        cmLog('[W] 指定押匯銀行: Missing 41A and 41D (必要欄位)')
        value = '[W] 指定押匯銀行: Missing 41A and 41D (必要欄位)'
    return value
예제 #8
0
def get_nominated_agent(content):
    """
    Get nominated agent. 
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = "[W] 指定代理商: (此欄位不需要看?)"
    cmLog("[W] 指定代理商: (此欄位不需要看?)")
    return value
예제 #9
0
def get_latest_negociation(content):
    """
    Get latest negociation (Not important, told no need to get this info)
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    cmLog('[W] 押匯期限: 不重要,應該與信用狀有效期限相同')
    value = '[W] 押匯期限: 不重要,應該與信用狀有效期限相同'
    return value
예제 #10
0
def get_quantity(content):
    """
    Get quantity descrption (45A or 45B). 
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = ""
    if '45A' in content.keys() or '45B' in content.keys(
    ) or '454' in content.keys():
        if '45A' in content.keys():
            temp = content['45A']
        elif '454' in content.keys():
            temp = content['45A']
        else:
            temp = content['45B']
        temp = temp.replace('OOMT', '00MT')  ###
        value = None
        numbers_pat = '(\d+[\.,]?\d{0,3}?[\.,]?\d*)'

        ### find quantity by pattern
        reg = re.compile('QUANTITY: ?{} *\n?'.format(numbers_pat),
                         re.IGNORECASE)
        result = reg.findall(temp)

        if len(result) > 0:
            value = result

        ### find quantity by unit pattern
        if value is None:
            units_patterns = ['mt[s]? ?\(?.*\)?', 'metric tons']
            pattern = '{} *'.format(numbers_pat)
            for u in units_patterns:
                reg = re.compile(pattern + u, re.IGNORECASE)
                result = reg.findall(temp)
                if len(result) > 0:
                    value = result

        if value is None:
            cmLog('[W] 貨品數量: Not found in 45A -> ' + temp)
            value = ['[W] 貨品數量: Not found in 45A -> ' + temp]
    else:
        cmLog('[W] 貨品數量: Missing 45A')
        value = ['[W] 貨品數量: Missing 45A']
    return value
예제 #11
0
def requestOCR(credential, jpg_paths):
    '''
    功能 : 呼叫vision api 
    輸入 : 1. gcp金鑰 2. 信用狀JPEG圖檔 
    輸出 : vision api 分析結果
    注意 : 確保每張圖都有不同的tmp_key,否則會覆蓋
    '''
    visionapi.set_vision_credential(credential)
    vision_results = {}
    for path in jpg_paths:
        vis_response, error = visionapi.annotateDocument(path)
        tmp_key = os.path.basename(path)
        vision_results[tmp_key] = vis_response
        if error is not None:
            cmLog('[E] Vision API Error: {}'.format(error))
            # print('[E] Vision API Error: {}'.format(error))
    return vision_results
예제 #12
0
def get_descrption(content, productnames):
    """
    Get product descrption (45A or 45B). 
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = ""
    if '45A' in content.keys() or '45B' in content.keys(
    ) or '454' in content.keys():
        if '45A' in content.keys():
            temp = content['45A']
        elif '454' in content.keys():
            temp = content['45A']
        else:
            temp = content['45B']
        temp = temp.replace('\n', ' ')
        found = False
        # value = None

        ### find name by pattern
        reg = re.compile('GOODS DESCRIPTION: ?(.*)\n', re.IGNORECASE)
        result = reg.findall(temp)
        if len(result) > 0:
            found = True
            value = result[0]

        ### find name by vlookup
        if not found:
            for item in productnames:
                if item in temp:
                    value = item
                    break
        if value == "":
            cmLog('[W] 貨品名稱: Not found in 45A ->' + temp)
    else:
        cmLog('[W] 貨品名稱: Missing 45A')
        value = '[W] 貨品名稱: Missing 45A'
    return value
예제 #13
0
def get_name_correctness(content):
    """
    Get name correctness (59). 
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = ""
    if '59' in content.keys():
        value = 'FORMOSA PLASTICS CORPORATION' in content['59'].replace(
            '\n', ' ').upper()
    else:
        cmLog('[W] 受益人名稱: Missing 59 (必要欄位)')
        value = False
    return value
예제 #14
0
def get_mgmt_mark_up(content):
    """
    Get mgmt mark up (42P). 
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = ""
    if '42P' in content.keys():
        cmLog('[W] 溢價處理: {} (目前沒看到過,有待商確)'.format(content['42P']))
        value = '[W] 溢價處理: {} (目前沒看到過,有待商確)'.format(content['42P'])
    else:
        cmLog('[W] 溢價處理: Missing 42P (目前沒看到過,有待商確)')
        value = '[W] 溢價處理: Missing 42P (目前沒看到過,有待商確)'
    return value
예제 #15
0
def get_interest(content):
    """
    Get interest descrption (39C). 
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = ""
    if '39C' in content.keys():
        value = content['39C']
        cmLog('[W] 利息負擔: 39C -> {}'.format(value))
        value = '[W] 利息負擔: 39C -> {}'.format(value)
    else:
        cmLog('[W] 利息負擔: Missing 39C 預設『客戶負擔』')
        value = '[W] 利息負擔: Missing 39C 預設『客戶負擔』'
    return value
예제 #16
0
def get_amount(content):
    """
    Get amount descrption (45A or 45B). 
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = ""
    if '32B' in content.keys():
        value = content['32B'].replace('\n', '')
        if len(re.findall('u ?s ?d', value, re.IGNORECASE)) == 0:
            cmLog('[W] 信用狀金額: {} is not in USD'.format(value))
            value = '[W] 信用狀金額: {} is not in USD'.format(value)
    else:
        cmLog('[W] 信用狀金額: Missing 32B (必要欄位)')
        value = '[W] 信用狀金額: Missing 32B (必要欄位)'
    return value
예제 #17
0
def get_confirmed(content):
    """
    Get confirmed (49). 
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = ""
    if '49' in content.keys():
        value = content['49']
        if 'WITHOUT' in value.upper():
            value = 'WITHOUT'
    else:
        cmLog('[W] 是否保兌: Missing 49 (必要欄位)')
        value = '[W] 是否保兌: Missing 49 (必要欄位)'

    return value
예제 #18
0
def get_latest_shipment(content):
    """
    Get latest shipment (44C and 44D)
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = ""
    key = '44C'
    if key in content.keys():
        result = re.findall('\d+', content[key])
        if len(result) > 0:
            value = result[0]
    else:
        cmLog('[W] 最後裝船期限: Missing 44C')
        value = '[W] 最後裝船期限: Missing 44C'
    return value
예제 #19
0
def get_nominated_loading_port(content):
    """
    Get nominated loading port (44E). 
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = ""
    if "44E" in content.keys():
        value = content['44E']
        if not ('ANY' in value):
            cmLog('[W] 出口港 : \'{}\' does not match expectation'.format(value))
            value = '[W] 出口港 : \'{}\' does not match expectation'.format(value)
    else:
        cmLog('[W] 出口港: Missing 44E')
        value = "[W] 出口港: Missing 44E"
    return value
예제 #20
0
def retrieveVisionResponse(credential, jpg_paths, result_root=None):
    '''
    功能 : requestOCR的前導程式,用來寫log檔、回傳錯誤訊息
    輸入 : 1. gcp金鑰 2. 信用狀JPEG圖檔 
    輸出 : vision api 分析結果
    '''
    assert isinstance(jpg_paths,
                      list), '[E] "jpg_paths" must be instance of list'
    vision_results = None
    response_path = None
    if result_root is not None:
        assert isinstance(
            result_root, str), '[E] "response_path" must be instance of string'
        response_file = 'vision_result.json'
        response_path = os.path.join(result_root, response_file)
        cmLog('[I] Reading existed ocr response from {} ...'.format(
            response_path))
        if os.path.exists(response_path):
            vision_results = utils.loadFileIfExisted(response_path)
    if vision_results is None:
        cmLog('[I] Sending ocr request to Google Vision API...')
        vision_results = requestOCR(credential, jpg_paths)
        cmLog('[I] Saving Google Vision API ocr response to {} ...'.format(
            response_path))
        if response_path is not None:
            with open(response_path, 'w') as outfile:
                json.dump(vision_results,
                          outfile,
                          ensure_ascii=False,
                          indent=2,
                          sort_keys=True)
    return vision_results
예제 #21
0
def get_insurance(content):
    """
    Get insurance descrption (45A or 45B). 
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = get_terms(content)
    if '[W]' in value:
        cmLog('[W] 保險指示: 交易條件有誤 無法判斷')
        value = '[W] 保險指示: 交易條件有誤 無法判斷'
    else:
        need_isr = ['CIP', 'DAT', 'DAP', 'DDP', 'CIF']
        noneed_isr = ['EXW', 'FCA', 'CPT', 'FAS', 'FOB', 'CFR']
        if value in noneed_isr:
            value = False
        elif value in need_isr:
            cmLog('[W] 保險指示: {} 需要再從 Document Req 中確認一致性'.format(value))
            value = '[W] 保險指示: {} 需要再從 Document Req 中確認一致性'.format(value)
        else:
            cmLog('[W] 保險指示: 交易條件 {} 有誤:Unknown error'.format(value))
            value = '[W] 保險指示: 交易條件 {} 有誤:Unknown error'.format(value)

    return value
예제 #22
0
def get_presented_in_7_days(content):
    """
    Get presented in 7 days (48). Default 21 when not specified
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = ""
    key = '48'
    if key in content.keys():
        result = re.findall('\d+', content[key])
        if len(result) > 0:
            value = result[0]
            # value = int(value) <= 7 ### 應要求只要有值就顯示,不論是否在七天內
    else:
        value = 21  ### 應要求 預設 21 天
        cmLog('[W] 應要求 預設 21 天')
    return value
예제 #23
0
def get_other_docs(content):
    """
    Get other doc (47A). 
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = ""
    if '47A' in content.keys():
        temp = content['47A']
        add_docs = {}
        add_docs['original_47A'] = temp
        value = add_docs
    else:
        value = {'original_47A': "[W] 提單: Missing 47A"}
        cmLog("[W] 提單: Missing 47A")

    return value
예제 #24
0
def get_destination(content):
    """
    Get destination (44B and 44F). 
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = None
    if "44B" in content.keys():
        value = content['44B']
    if '44F' in content.keys():
        if value is None:
            value = content['44F']
        else:
            value += content['44F']
    if value is None:
        cmLog('[W] 目的港: Missing 44B and 44F (預設空值)')
        value = "[W] 目的港: Missing 44B and 44F (預設空值)"
    return value
예제 #25
0
def get_beneficiary_name(content):
    """
    Get beneficiary name (59). 
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = ""
    if '59' in content.keys():
        value = 'FORMOSA PLASTICS CORPORATION' in content['59'].replace(
            '\n', ' ').upper()
        if value:
            value = 'FORMOSA PLASTICS CORPORATION'
        else:
            value = content['59']
    else:
        cmLog('[W] 受益人名稱: Missing 59 (必要欄位)')
        value = '[W] 受益人名稱: Missing 59 (必要欄位)'
    return value
예제 #26
0
def get_terms(content):
    """
    Get terms descrption (45A or 45B). 
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    incoterms = [
        'CIP', 'DAT', 'DAP', 'DDP', 'CIF', 'EXW', 'FCA', 'CPT', 'FAS', 'FOB',
        'CFR'
    ]

    value = None
    if '45A' in content.keys() or '45B' in content.keys(
    ) or '454' in content.keys():
        if '45A' in content.keys():
            termtext = content['45A']
        elif '454' in content.keys():
            termtext = content['45A']
        else:
            termtext = content['45B']
        if 'INCOTERMS' in termtext:
            token = termtext.split('INCOTERMS')[1]
            for term in incoterms:
                if term in token:
                    value = term
                    break
        if value is None:
            # termtext = utils.removeInvalidChars(termtext)
            splitted = re.split('[^a-zA-Z]', termtext)
            splitted = [_ for _ in splitted if _ != ""]
            inetersects = set(incoterms).intersection(splitted)
            if len(inetersects) == 1:
                value = inetersects.pop()
            elif len(inetersects) == 0:
                cmLog(
                    '[W] 交易條件: no term are found in 45A: {}'.format(termtext))
                value = '[W] 交易條件: no term are found in 45A: {}'.format(
                    termtext)
            else:
                cmLog(
                    '[W] 交易條件: More than one term are found in 45A: {}'.format(
                        termtext))
                value = '[W] 交易條件: More than one term are found in 45A: {}'.format(
                    termtext)
    else:
        cmLog('[W] 交易條件: Missing 45A')
        value = '[W] 交易條件: Missing 45A'

    return value
예제 #27
0
def get_movement(content):
    """
    Get movement (45A). 
    Parameters
    ----------
    content: dict
        a dictionary of swift code infomation
    Returns
    ----------
        value for this item
    """
    value = ""
    if '45A' in content.keys():
        temp = content['45A']
        if re.findall('contain', temp, re.IGNORECASE):
            value = temp
        else:
            cmLog("[W] 裝船方式: unable to find movement info in 45A (此欄位不需要看?)")
            value = "[W] 裝船方式: unable to find movement info in 45A (此欄位不需要看?)"
    elif '45B' in content.keys():
        temp = content['45B']
        if re.findall('contain', temp, re.IGNORECASE):
            value = temp
        else:
            cmLog("[W] 裝船方式: unable to find movement info in 45A (此欄位不需要看?)")
            value = "[W] 裝船方式: unable to find movement info in 45A (此欄位不需要看?)"
    elif '454' in content.keys():
        temp = content['454']
        if re.findall('contain', temp, re.IGNORECASE):
            value = temp
        else:
            cmLog("[W] 裝船方式: unable to find movement info in 45A (此欄位不需要看?)")
            value = "[W] 裝船方式: unable to find movement info in 45A (此欄位不需要看?)"
    else:
        cmLog("[W] 裝船方式: Missing 45A (此欄位不需要看?)")
        value = "[W] 裝船方式: Missing 45A (此欄位不需要看?)"
    return value
예제 #28
0
    def extractSwiftsInfo(self, config, swifts_config, detail=True):
        super().extractSwiftsInfo(config)

        body_config = config['main_body']
        line_height = config['line_height']
        swift_regex = config['swift_regex']
        swifts = swifts_config['swift_codes']

        ### Initialize swift code dictionary
        swifts_result = {}
        for item in swifts:
            swifts_result[item['code']] = {
                'text': '',
                'boundingbox': [[-1, -1, -1, -1]],
                'page': [-1]
            }
            if 'code2' in item.keys():
                swifts_result[item['code2']] = {
                    'text': '',
                    'boundingbox': [[-1, -1, -1, -1]],
                    'page': [-1]
                }

        p_index_list = [page['index'] for page in body_config]
        number_pages = self.visdoc.getNumberOfPages()
        last_found = None
        for p in range(0, number_pages):
            ### Get the boundingbox for specific page index
            target_box = None
            if p in p_index_list:
                tmp_idx = p_index_list.index(p)
            else:
                tmp_idx = p_index_list.index('n')
            target_box = body_config[tmp_idx]['boundingbox']
            ### Get line list and boundingbox list
            objectList = self.visdoc.getObjectInBoundaryInPage(
                p, target_box, depth=visionapi.VisionObject.DEPTH.WORDS)
            ### Extract swift code infomation from line list
            tmp_result, last_found = self.reformatSwiftInfo(
                objectList,
                swifts_result.keys(),
                swift_regex,
                last_found,
                line_height=line_height)
            # print (tmp_result, last_found)
            ### Merge and clean up extracted infomation
            # print (tmp_result)
            for key, value in tmp_result.items():
                texts = tmp_result[key]['text'].strip() + '\n'
                boxes = visionapi.VisionObject.fuseBoundingBox(
                    tmp_result[key]['boundingbox'])
                try:
                    boxes[0] = boxes[0] - 7
                    swifts_result[key]['text'] += texts
                    if swifts_result[key]['boundingbox'] == [[
                            -1, -1, -1, -1
                    ]] and swifts_result[key]['page'] == [-1]:
                        swifts_result[key]['boundingbox'] = [boxes]
                        swifts_result[key]['page'] = [p]
                    else:
                        swifts_result[key]['boundingbox'].append(boxes)
                        swifts_result[key]['page'].append(p)
                except KeyError as e:
                    cmLog(
                        '[W] Swift code {} is not in the config file. Content: {}'
                        .format(key, texts))
                    # try:
                    #   swifts_result[key]['boundingbox'].append([-1,-1,-1,-1])
                    #   swifts_result[key]['page'].append(-1)
                    # except:pass

        if not detail:
            final_result = {}
            for key, value in swifts_result.items():
                if len(value['text']) != 0:
                    final_result[key] = value
            self.swifts_info = final_result
        else:
            self.swifts_info = swifts_result
        return swifts_result
예제 #29
0
def annotateCreditLetter(credential,
                         division_code,
                         jpg_path_list,
                         result_root,
                         bank_name=None):
    '''
    功能 : 信用狀分析流程的主幹,所有的sub function都是由這裡呼叫再將資料回傳
    輸入 : 1.gcp金鑰 2.general.yaml 3.信用狀JPEG圖檔 4.輸出路徑
    輸出 : 最終分析結果
    注意 : 分析流程可以參考“台塑信用狀辨識流程圖”
    '''
    # Validate all parameters
    general_path = os.path.join('./configs', 'general.yaml')
    validateAllParameters(credential, general_path, jpg_path_list, result_root)

    general = utils.loadFileIfExisted(general_path)

    ###
    # . Auto-identified bank name
    if bank_name is None:
        jpg_path_list_n = augm.augmentBatchImages([jpg_path_list[0]],
                                                  bank_name)
        os.mkdir(result_root + '/tmp')
        vision_results = retrieveVisionResponse(credential, jpg_path_list_n,
                                                result_root + '/tmp')
        vision_doc = visionapi.VisionDocument.createWithVisionResponse(
            vision_results)
        clformatted = formatter.GeneralCLFormatter(vision_doc)
        bank_name = clformatted.identifyBankName(
            general)  ## 參考general.yaml預設的銀行bounding box擷取銀行名稱
        [os.remove(path) for path in jpg_path_list_n]
        shutil.rmtree(result_root + '/tmp')
        cmLog('[I] Auto-identified bank name: {}'.format(bank_name))

    ###
    # . Preprocessing the image for enhancement
    cmLog('[I] Preprocessing image for enhencement ...')
    jpg_path_list = augm.augmentBatchImages(jpg_path_list, bank_name)

    ###
    # . Sending image to Google Vision API and save the response
    vision_results = retrieveVisionResponse(credential, jpg_path_list,
                                            result_root)

    ###
    # . Preparing and organizing vision api's result with config file
    cmLog('[I] Preparing and organizing ocr response ...')
    vision_doc = visionapi.VisionDocument.createWithVisionResponse(
        vision_results)

    ###
    # . Initialize a formatter
    final_result = {}
    clformatted = formatter.GeneralCLFormatter(vision_doc)

    ###
    # . General config must exist
    if general is None:
        cmLog('[C] Unable to find general config file at: {}'.format(general))
        final_result = {
            'error':
            '[E] Unable to find config file with bank: {} for document at: {}'.
            format(bank_name, jpg_path_list[0])
        }
    else:
        if 'bank_titles' not in general:
            print('bank_titles')
        bank_list = [b['name'] for b in general['bank_titles']]
        if bank_name.lower() not in bank_list:
            bank_name = clformatted.identifyBankName(general)
            cmLog('[I] Auto-identified bank name: {}'.format(bank_name))

        config_path = os.path.join('./configs', bank_name + '_config.yaml')
        config = utils.loadFileIfExisted(config_path)

        ###
        # . Bank config must exist
        if config is None:
            cmLog(
                '[C] Unable to find config file with bank: {} for document at: {}'
                .format(bank_name, jpg_path_list[0]))
            final_result = {
                'error':
                '[E] Unable to find config file with bank: {} for document at: {}'
                .format(bank_name, jpg_path_list[0])
            }
        else:
            cmLog(
                '[I] Extracting Header and Swift codes for bank {} ...'.format(
                    bank_name))
            clformatted.extractHeaderInfo(
                config)  ## 呼叫reformatter.py分析header的內容
            clformatted.extractSwiftsInfo(
                config, general)  ## 呼叫reformatter.py分析Swift的內容

            cmLog('[I] Evaluating letter of credit ...')
            evaluated = evaluator.CLEvaluator(clformatted)
            evaluated.evaluate_checklist(config, general)
            final_result = evaluated.dumpToDict()

            ###
            # adding prefix for C# application (C# cannot read key starting with _ or numeric value)
            newswift = {}
            for key, value in final_result['swifts'].items():
                newswift['code_' + key] = final_result['swifts'][key]
            final_result['swifts'] = newswift
        '''
        
        針對回傳的結果做Rule Based的修飾

        '''
        ###
        # 0 replace O
        if 'O' in final_result['header']['lc_no']['text']:
            final_result['header']['lc_no']['text'] = '0'.join(
                final_result['header']['lc_no']['text'].split('O'))

        ###
        # if applicant is empty replace by code_50
        if final_result['header']['applicant']['text'] == '':
            final_result['header']['applicant']['text'] = final_result[
                'swifts']['code_50']['text']

        ###
        # specialized for mega bank
        if bank_name == 'mega':
            final_result['header']['advising_no_of_bank']['boundingbox'] = [
                1634, 470, 2520, 660
            ]

        ###
        # if lc_no is empty replace by code_20 or code_21 ,only for mega
        if final_result['header']['lc_no']['text'] == '':
            if 'DOCUMENTARYCREDITN' in final_result['swifts']['code_20'][
                    'text'].upper().replace(" ", ""):
                code_20_text = final_result['swifts']['code_20'][
                    'text'].replace(':', '\n').split('\n')
                for i, _ in enumerate(code_20_text):
                    if 'DOCUMENTARYCREDITN' in _.upper().replace(" ", ""):
                        final_result['header']['lc_no']['text'] = code_20_text[
                            i + 1].strip()
                        break

            elif 'DOCUMENTARYCREDITN' in final_result['swifts']['code_21'][
                    'text'].upper().replace(" ", ""):
                code_20_text = final_result['swifts']['code_21'][
                    'text'].replace(':', '\n').split('\n')
                for i, _ in enumerate(code_20_text):
                    if 'DOCUMENTARY CREDIT N' in _.upper().replace(" ", ""):
                        final_result['header']['lc_no']['text'] = code_20_text[
                            i + 1].strip()
                        break

        ###
        # saving checklist.json
        if result_root is not None:
            result_path = os.path.join(result_root, 'checklist.json')
            cmLog('[I] Saving evaluation result in {} ...'.format(result_path))
            with open(result_path, 'w') as outfile:
                json.dump(final_result, outfile, ensure_ascii=False, indent=2)

    return final_result
예제 #30
0
def get_shipping_docs(content, config):
    """
    Evaluate session shipping doc (46A)
    Parameters
    ----------
    config: str
        a dictionary load from general config yaml file.
    Returns
    ----------
        a dictionary containing # of originals and copies of each document. 
    """
    res_req_docs = {}
    req_docs = config['req_docs']
    req_items = req_docs['items']
    paragraph_pat = req_docs['paragraph_patterns']
    quantity_pat = req_docs['quantity_patterns']
    warn_str = ''

    ###
    # If 46A or 46B not exists in the swift code then return with empty response
    if not ('46A' in content.keys() or '46B' in content.keys()
            or '464' in content.keys()):
        for item in req_items:
            tmp_key_name = item['name']
            key_list = item['keys']
            res_req_docs[tmp_key_name] = {
                'original': 0,
                'copies': 0,
                'warn': warn_str,
                'text': "[W] 提單: Missing key: 46A"
            }
        cmLog("[W] 提單: Missing key: 46A")
    else:
        if '46A' in content.keys():
            temp = content['46A']
            temp = reformatInParagraphs(temp, '46A', paragraph_pat)
        elif '464' in content.keys():
            temp = content['46A']
            temp = reformatInParagraphs(temp, '464', paragraph_pat)
        else:
            temp = content['46B']
            temp = reformatInParagraphs(temp, '46B', paragraph_pat)

        splitted = re.split(r'\n', temp)
        duplicaed = splitted.copy()

        ###
        # Loop through all required document items and check if current line
        # contains the keyword
        for item in req_items:
            cur_key_name = item['name']
            cur_key_list = item['keys']
            contained = False
            candidate_line = ''
            warn_str = ''
            org_res, cop_res = 0, 0

            ###
            # Loop through all lines to see if keyword exists in a line
            for idx, line in enumerate(splitted):
                ###
                # Loop through all possible keywords in current item
                for k in cur_key_list:
                    contained = k.upper() in line.upper()
                    if contained:
                        candidate_line = line
                        ###
                        #  candidate_line shall be in duplicated, this is
                        #  in case two different keywords occurs in the same line.
                        if candidate_line in duplicaed:
                            duplicaed.remove(candidate_line)
                        break
                if contained:
                    break

            if contained:
                ###
                # Preprocessing candidate line so it will have unified format for evaluation
                target_line = utils.text2number(candidate_line)
                target_line = replaceFullset(target_line, cur_key_name)
                target_line = replaceDuplicates(target_line)
                target_line = replaceSpecialCase(target_line)
                org_res, cop_res = detect_quantity_with_patterns(
                    target_line, quantity_pat['original'],
                    quantity_pat['copy'], quantity_pat['fold'])

                ###
                # if original and copies are both zero but keyword is catched, then it
                # is assume to have at least one original
                if org_res + cop_res <= 0:
                    org_res = 1
                    cop_res = 0
                    warn_str = '[W] 無法辨識數量,預設為 1 正本'

            res_req_docs[cur_key_name] = {
                'original': org_res,
                'copies': cop_res,
                'warn': warn_str,
                'text': candidate_line
            }
        ###
        # store the remaining lines for reference
        # res_req_docs['remained'] = duplicaed
    return res_req_docs