Python OCR 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: pdf_scanner_ocr

메소드/함수: OCR

hotexamples.com에서의 예제들: 5

Python OCR - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 pdf_scanner_ocr.OCR에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

0

파일 보기

파일: regex_file.py 프로젝트: demaniou/AUEB-Invoicer

def regex_MADISON(language_tag):
    text = pdf_scanner_ocr.OCR('PDF_TEST.jpg',language_tag)

    #convert to lowercase
    text = text.lower()
    #remove \n (new lines)
    text = text.replace('\n','')
    #replace space with commas
    text = text.replace(' ',',')
    text = text.replace(',,,,', ',')
    text = text.replace(',,,', ',')
    text = text.replace(':,', ',')
    text = text.replace(',.', ',')
    text = text.replace('.,', ',')
    text = text.replace(',,', ',')


    text_VAT = re.search(r'vat,de(.*?)unterschrift,', text).group(1)
    text_VAT = text_VAT.replace(',','')
    company = 'MADISON'
    payment = re.search(r'eur,total,(.*?)total,', text).group(1)
    payment = pd.unique(payment.split(','))[0]
    has_to_be_paid_until = '60' + ' days' + ' from ' + re.search(r'datum,(.*?),abreise', text).group(1)
    results_list = [text_VAT,company,payment,has_to_be_paid_until]
    return results_list

예제 #2

0

파일 보기

파일: regex_file.py 프로젝트: demaniou/AUEB-Invoicer

def regex_OTE():
    text = pdf_scanner_ocr.OCR('PDF_TEST.jpg','ell') #ell for OTE
    #convert to lowercase
    text = text.lower()
    #remove \n (new lines)
    text = text.replace('\n','')
    #replace space with commas
    text=text.replace(' ',',')
    text = text.replace(',,,,', ',')
    text = text.replace(',,,', ',')
    text = text.replace(':,', ',')
    text = text.replace(',.', ',')
    text = text.replace('.,', ',')
    text = text.replace(',,', ',')

    text_VAT = re.search(r'αφμ,(.*?),δου,', text).group(1)
    text_VAT = text_VAT[0:9]
    company = 'OTE'
    payment = re.search(r',λογαριασμού,\(µε,φπα\),(.*?)εποσό,πληρωμής,', text).group(1)
    payment = payment.replace(',','.')
    payment = str(np.round(float(payment),decimals=2))
    has_to_be_paid_until = re.search(r'ις,-30,0970ε(.*?)σας,ευχαριστούμεσύνολο,', text).group(1)


    results_list = [text_VAT,company,payment,has_to_be_paid_until]

    return results_list

예제 #3

0

파일 보기

파일: regex_file.py 프로젝트: demaniou/AUEB-Invoicer

def regex_HELLOWORLD(language_tag):
    text = pdf_scanner_ocr.OCR('PDF_TEST.jpg',language_tag)
    #convert to lowercase
    text = text.lower()
    #remove \n (new lines)
    text = text.replace('\n','')
    #replace space with commas
    text=text.replace(' ',',')
    text = text.replace(',,,,', ',')
    text = text.replace(',,,', ',')
    text = text.replace(':,', ',')
    text = text.replace(',.', ',')
    text = text.replace('.,', ',')
    text = text.replace(',,', ',')

    text_VAT = re.search(r'road,po,(.*?)\(123\),456,', text).group(1)
    company = 'HELLO WORLD'
    payment = re.search(r'00total,(.*?)many,thanks,for,', text).group(1)
    has_to_be_paid_until = re.search(r'to,be,received,within,(.*?),days.powered,by', text).group(1) + ' days' + ' from ' + re.search(r'hello,world,invoice123,southwest,(.*?)silicon,valley,', text).group(1)



    results_list = [text_VAT,company,payment,has_to_be_paid_until]

    return results_list

예제 #4

0

파일 보기

파일: regex_file.py 프로젝트: demaniou/AUEB-Invoicer

def get_clean_text_Technicomer():
    text = pdf_scanner_ocr.OCR('PDF_TEST.jpg', 'ell')
    # convert to lowercase
    text = text.lower()
    # remove \n (new lines)
    text = text.replace('\n', '')
    # replace space with commas
    text = text.replace(' ', ',')
    text = text.replace(',,,,', ',')
    text = text.replace(',,,', ',')
    text = text.replace(':,', ',')
    text = text.replace(',.', ',')
    text = text.replace('.,', ',')
    text = text.replace(',,', ',')
    return text

예제 #5

0

파일 보기

파일: regex_file.py 프로젝트: demaniou/AUEB-Invoicer

def regex_technicomer():
    text = pdf_scanner_ocr.OCR('PDF_TEST.jpg','Greek')
    #convert to lowercase
    text = text.lower()
    #remove \n (new lines)
    text = text.replace('\n','')
    #replace space with commas
    text=text.replace(' ',',')
    text = text.replace(',,,,', ',')
    text = text.replace(',,,', ',')
    text = text.replace(':,', ',')
    text = text.replace(',.', ',')
    text = text.replace('.,', ',')
    text = text.replace(',,', ',')

    text_VAT = re.search(r'αφμ,(.*?)7δ\.ο\.υ,', text).group(1)  # in Greece VAT number is 9 digits
    company = 'Technicomer'
    payment = re.search(r'επληρώτεο,(.*?),ε,πληρωτεο,την,', text).group(1)
    has_to_be_paid_until = re.search(r'πληρωτεο,την,(.*?),αρ.λογαριασμων,/', text).group(1)


    results_list = [text_VAT,company,payment,has_to_be_paid_until]

    return results_list