def return_data_emirates(filename): cif, name, nat = arabic(filename) text2 = file_handler(filename) Data = emi_extract(text2) if Data != None: Data['arabic_name'] = name Data['cif'] = cif Data['nationality_arabic'] = nat return Data
def doc_type(filename): text2 = file_handler(filename) if text2[0] == 'T': text2.replace('T', 'I') if text2[0] == 'I': return 'emirates_id' elif text2[0] == 'P': if text2[2:5] == 'IND': return 'passport_ind' elif text2[2:5] == 'PAK': return 'passport_pak' elif text2[2:5] == 'ARE': return 'passport_uae' elif text2[2:5] == 'JOR': return 'passport_jor'
def return_data(filename): '''img=cv2.imread(filename) apply_ocr(filename) df = pd.read_csv('eng.tsv',sep='\t',quoting=csv.QUOTE_NONE) df=df[df.text != ' '] df.dropna() footer = df[df.block_num == df.block_num.max()] footer_coords = [(footer.iloc[0].left-20, footer.iloc[0].top-20), (footer.iloc[-1].left+footer.iloc[-1].width+20, footer.iloc[-1].top+footer.iloc[-1].height+20)] print(footer_coords) img2=img[footer_coords[0][1]:footer_coords[1][1],footer_coords[0][0]:footer_coords[1][0]] cv2.imshow('cut',img2) cv2.waitKey(0) #------Running tesseract on --------- text=pytesseract.image_to_string(img2) text=text.replace(" ",'') text2=text.replace('\n','') print(text2) ''' text2 = file_handler(filename) text2 = list(text2) #print(len(text2)) if len(text2) > 88: del text2[40:40 + len(text2) - 88] if len(text2) < 88: text2.insert(41, "<") Data = { 'Name': None, 'Passport Number': None, 'ID Number': None, 'Date Of Birth': None, 'Expiry Date': None, 'Nationality': None, 'Sex': None } #---Name---- name_string = text2[5:] index = name_string.find('<') name1 = name_string[0:index] forward_string = name_string[index + 2:] index2 = forward_string.find('<') name2 = forward_string[0:index2] final_string = forward_string[index2 + 1:] index3 = final_string.find('<') name3 = final_string[0:index3] Data["Name"] = name1 + " " + name2 + " " + name3 #---Passport Numer Data["Passport Number"] = text2[44:54] #---Nationality____ Data['Nationality'] = text2[54:57] #----Birth Date year_store = text2[57:59] birth_month = text2[59:61] birth_date = text2[61:63] if int(year_store) > 30: year = '19' + year_store else: year = '20' + year_store birth_day = birth_date + '/' + birth_month + '/' + year Data["Date Of Birth"] = birth_day #--Sex--- Data['Sex'] = text2[64] #---Expiry Date----- expiry_year = '20' + text2[65:67] expiry_month = text2[67:69] expiry_date = text2[69:71] Data["Expiry Date"] = expiry_date + '/' + expiry_month + '/' + expiry_year #---ID number--- Data["ID Number"] = text2[72:84] return Data
def jord_passport(filename): text2 = file_handler(filename) text2 = list(text2) #print(len(text2)) if len(text2) > 88: del text2[40:40 + len(text2) - 88] if len(text2) < 88: text2.insert(41, "<") print(len(text2)) text2 = "".join(text2) print(text2) Data = { 'Name': None, 'Passport Number': None, 'National Number': None, 'Date Of Birth': None, 'Expiry Date': None, 'Nationality': None, 'Sex': None } #---Name---- name_string = str(text2[5:]) index = name_string.find('<') name1 = name_string[0:index] forward_string = name_string[index + 2:] index2 = forward_string.find('<') name2 = forward_string[0:index2] final_string = forward_string[index2 + 1:] index3 = final_string.find('<') name3 = final_string[0:index3] Data["Name"] = name1 + " " + name2 + " " + name3 #----Passport Number Data["Passport Number"] = text2[44:51] #---Nationality Data["Nationality"] = text2[54:57] #----Birth Dateext2[57:59] year_store = text2[57:59] print(year_store) birth_month = text2[59:61] birth_date = text2[61:63] if int(year_store) > 30: year = '19' + year_store else: year = '20' + year_store birth_day = birth_date + '/' + birth_month + '/' + year Data["Date Of Birth"] = birth_day #--Sex--- Data['Sex'] = text2[64] #---Expiry Date----- expiry_year = '20' + text2[65:67] expiry_month = text2[67:69] expiry_date = text2[69:71] Data["Expiry Date"] = expiry_date + '/' + expiry_month + '/' + expiry_year Data["National Number"] = text2[72:81] return Data
def get_emirates_data(filename): try: cif_no = filename.split('_')[-1].split('.')[0] except: cif_no = str(np.random.randint(1000000, 2000000)) img = cv2.imread(filename) apply_ocr(filename) text = pytesseract.image_to_string(Image.open(filename), 'ara+eng') with open('textfile.txt', 'w') as file: file.writelines(text) time.sleep(0.5) content = file_process('textfile.txt') print(content) for line in content: if "Name" in line: name_index = content.index(line) if "Nationality" in line: nat_index = content.index(line) arabic_name_index = name_index - 1 arabic_name = content[arabic_name_index] index_arabic_name = arabic_name.find(':') name_in_arabic = arabic_name[index_arabic_name + 1:] arabic_nationality_index = nat_index - 1 arabic_nationality = content[arabic_nationality_index] index_arabic_nationality = arabic_nationality.find(':') nationality_in_arabic = arabic_nationality[index_arabic_nationality + 1:] ''' df = pd.read_csv('eng.tsv', sep='\t') df=df[df.text != ' '] df = df.dropna() footer = df[df.block_num == df.block_num.max()] footer_coords = [(footer.iloc[0].left-20, footer.iloc[0].top-20), (footer.iloc[-1].left+footer.iloc[-1].width+20, footer.iloc[-1].top+footer.iloc[-1].height+20)] print(footer_coords) img2=img[footer_coords[0][1]:footer_coords[1][1],footer_coords[0][0]:footer_coords[1][0]] # cv2.imshow('cut',img2) # cv2.waitKey() cv2.imwrite("results.png",img2) time.sleep(1) a=Image.open("results.png") text=pytesseract.image_to_string(a) text=text.replace(" ",'') text2=text.replace('\n','') ''' text2 = file_handler(filename) print(text2) Data = { 'type': 'emirates_id', 'name': None, 'card_no': None, 'emirates_id': None, 'birth_date': None, 'expiry_date': None, 'nationality': None, 'sex': None, 'cif_no': cif_no, 'arabic_name': None, 'nationality(arabic)': None } Data['card_no'] = text2[5:14] Data['emirates_id'] = text2[15:30] year_store = text2[30:32] birth_month = text2[32:34] birth_date = text2[34:36] if int(year_store) > 30: year = '19' + year_store else: year = '20' + year_store birth_day = birth_date + '/' + birth_month + '/' + year Data["birth_date"] = birth_day Data['sex'] = text2[37] expiry_year = '20' + text2[38:40] expiry_month = text2[40:42] expiry_date = text2[42:44] Data["expiry_date"] = expiry_date + '/' + expiry_month + '/' + expiry_year Data["nationality"] = text2[45:48] last_line = text2[60:] index = last_line.find('<') last_name = last_line[0:index] next_string = last_line[index + 2:] index2 = next_string.find('<') first_name = next_string[0:index2] last_string = last_line[index + 2 + index2 + 1:] index3 = last_string.find('<') middle_name = last_string[0:index3] name = first_name + ' ' + middle_name + ' ' + last_name Data['name'] = name # print(Data) return Data
def indianpassport(filename): text2 = file_handler(filename) text2 = list(text2) #print(len(text2)) if len(text2) > 88: del text2[40:40 + len(text2) - 88] if len(text2) < 88: text2.insert(41, "<") print(len(text2)) text2 = "".join(text2) print(text2) Data = { 'Name': None, 'Passport Number': None, 'ID Number': None, 'Date Of Birth': None, 'Expiry Date': None, 'Nationality': None, 'Sex': None } '''text=pytesseract.image_to_string(img2) text=text.replace(" ",'') text2=text.replace('\n','') ''' #----Name----= name_string = text2[5:] index = name_string.find('<') name1 = name_string[0:index] forward_string = name_string[index + 2:] index2 = forward_string.find('<') name2 = forward_string[0:index2] final_string = forward_string[index2 + 1:] index3 = final_string.find('<') name3 = final_string[0:index3] Data["Name"] = name1 + " " + name2 + " " + name3 #----Passport Number Data["Passport Number"] = text2[44:52] #---Nationality Data["Nationality"] = text2[54:57] #----Birth Date year_store = text2[57:59] birth_month = text2[59:61] birth_date = text2[61:63] if int(year_store) > 30: year = '19' + year_store else: year = '20' + year_store birth_day = birth_date + '/' + birth_month + '/' + year Data["Date Of Birth"] = birth_day #--Sex--- Data['Sex'] = text2[64] #---Expiry Date----- expiry_year = '20' + text2[65:67] expiry_month = text2[67:69] expiry_date = text2[69:71] Data["Expiry Date"] = expiry_date + '/' + expiry_month + '/' + expiry_year return Data