Beispiel #1
0
def make_compare_file():
	f=open('./corrected_docs/Samp_'+str(tno)+'/compare_list_new.txt','w')
	g=open('./corrected_docs/Samp_'+str(tno)+'/output_file_new.txt','w')
	# img=cv2.imread('./Example/dc_books_page.png',0)
	path='./corrected_docs/Samp_'+str(tno)+'/*.png'
	url=glob.glob(path)
	img=cv2.imread(url[0],0)
	# img=cv2.imread('./Samp_3/samp3.png',0)
	if(img==None):
		print 'image does\'nt exist'
		exit()
	img = pp.preprocess(img)
	# im=img
	# im,rot = pp.skew_correction(img)

	line = pp.find_lines(img.copy())
	# print len(linene)
	label_list=train.label_unicode()
	i=0
	num=[]
	for l in line:
		for w in l.word_list:
			for c in w.char_list:
				# num.append((str(i),label_list[int(c.label)]))
				tup=label_list[int(c.label)]
				f.write(tup+'\n')
				g.write(tup)
				# cv2.imwrite('./Samp_22/samp/'+str(i)+'.png',c.data)
				i+=1
			g.write(' ')
		g.write('\n')
	f.close()
	g.close()
Beispiel #2
0
def make_modified_file():
	f=open('./compare_list.txt','r')
	g=open('./output_file.txt','w')
	img=cv2.imread('./Example/dc_books_page.png',0)

	if(img==None):
		print url+' does\'nt exist'
		exit()
	img = pp.preprocess(img)
	im,rot = pp.skew_correction(img)

	line = pp.find_lines(im.copy())
	# print len(linene)
	label_list=train.label_unicode()

	q=f.readlines()
	i=0
	num=[]
	for l in line:
		for w in l.word_list:
			for c in w.char_list:
				# num.append((str(i),label_list[int(c.label)]))
				tup=label_list[int(c.label)]
				if(q[i][:-1]!=tup):
					print tup
				# f.write(tup+'\n')
				g.write(tup)
				# cv2.imwrite('samp/'+str(i)+'.png',c.data)
				i+=1
			g.write(' ')
		g.write('\n')
	f.close()
	g.close()
Beispiel #3
0
def make_modified_file():
    f = open('./compare_list.txt', 'r')
    g = open('./output_file.txt', 'w')
    img = cv2.imread('./Example/dc_books_page.png', 0)

    if (img == None):
        print url + ' does\'nt exist'
        exit()
    img = pp.preprocess(img)
    im, rot = pp.skew_correction(img)

    line = pp.find_lines(im.copy())
    # print len(linene)
    label_list = train.label_unicode()

    q = f.readlines()
    i = 0
    num = []
    for l in line:
        for w in l.word_list:
            for c in w.char_list:
                # num.append((str(i),label_list[int(c.label)]))
                tup = label_list[int(c.label)]
                if (q[i][:-1] != tup):
                    print tup
                # f.write(tup+'\n')
                g.write(tup)
                # cv2.imwrite('samp/'+str(i)+'.png',c.data)
                i += 1
            g.write(' ')
        g.write('\n')
    f.close()
    g.close()
Beispiel #4
0
def make_compare_file():
    f = open('./corrected_docs/Samp_' + str(tno) + '/compare_list_new.txt',
             'w')
    g = open('./corrected_docs/Samp_' + str(tno) + '/output_file_new.txt', 'w')
    # img=cv2.imread('./Example/dc_books_page.png',0)
    path = './corrected_docs/Samp_' + str(tno) + '/*.png'
    url = glob.glob(path)
    img = cv2.imread(url[0], 0)
    # img=cv2.imread('./Samp_3/samp3.png',0)
    if (img == None):
        print 'image does\'nt exist'
        exit()
    img = pp.preprocess(img)
    # im=img
    # im,rot = pp.skew_correction(img)

    line = pp.find_lines(img.copy())
    # print len(linene)
    label_list = train.label_unicode()
    i = 0
    num = []
    for l in line:
        for w in l.word_list:
            for c in w.char_list:
                # num.append((str(i),label_list[int(c.label)]))
                tup = label_list[int(c.label)]
                f.write(tup + '\n')
                g.write(tup)
                # cv2.imwrite('./Samp_22/samp/'+str(i)+'.png',c.data)
                i += 1
            g.write(' ')
        g.write('\n')
    f.close()
    g.close()
Beispiel #5
0
def recognize_block(im):
    line = find_lines(im)
    # print len(linene)
    label_list = train.label_unicode()
    i = 0
    string = ''
    #selecting each line
    for l in line:
        cv2.imwrite('temp/zline_' + str(i) + '.png', l.data)
        string = string + '\n'
        j = 0
        #selecting words in a line
        for w in l.word_list:
            #cv2.imwrite('zword_'+str(i)+'_word_'+str(j)+'.png',w.data)
            string = string + ' '
            j += 1
            k = 0
            c = 0

            #Formatting characters in the word
            while (c < len(w.char_list)):
                char = w.char_list[c]
                try:
                    #checking whether the input is  ' or " or ,
                    if (label_list[int(char.label)] in ['\'', ',']):
                        char2 = w.char_list[c + 1]
                        if (label_list[int(char2.label)] in ['\'', ',']):
                            string = string + '\"'
                            c += 1
                        else:
                            string = string + label_list[int(char.label)]
                    #checking whether the input is  ൈ  or െ
                    elif (label_list[int(char.label)] in ['െ', 'േ', '്ര']):
                        char2 = w.char_list[c + 1]
                        if (label_list[int(char2.label)] in ['െ', '്ര']):
                            char3 = w.char_list[c + 2]
                            string = string + label_list[int(char3.label)]
                            c += 1
                        string = string + label_list[int(char2.label)]
                        string = string + label_list[int(char.label)]
                        c += 1
                    else:
                        string = string + label_list[int(char.label)]
                except IndexError:
                    string = string + label_list[int(char.label)]
                # cv2.imwrite('output/zcline_'+str(i)+'_word_'+str(j)+'_c_'+str(k)+str(int(w.char_list[c].label))+'.png',w.char_list[c].data)
                k += 1
                c += 1
        i += 1
    return string
Beispiel #6
0
def recognize_block(im):
	line = find_lines(im)
	# print len(linene)
	label_list=train.label_unicode()
	i=0
	string=''
	#selecting each line
	for l in line:
		cv2.imwrite('temp/zline_'+str(i)+'.png',l.data)
		string=string+'\n'
		j=0
		#selecting words in a line
		for w in l.word_list:
			#cv2.imwrite('zword_'+str(i)+'_word_'+str(j)+'.png',w.data)
			string=string+' '
			j+=1
			k=0
			c=0

			#Formatting characters in the word
			while(c<len(w.char_list)):
				char= w.char_list[c]
				try:
					#checking whether the input is  ' or " or ,
					if(label_list[int(char.label)]in ['\'',',']):
						char2=w.char_list[c+1]
						if(label_list[int(char2.label)]in ['\'',',']):
							string=string+'\"'
							c+=1
						else:
							string=string+label_list[int(char.label)]
					#checking whether the input is  ൈ  or െ
					elif(label_list[int(char.label)]in ['െ','േ','്ര']):
						char2=w.char_list[c+1]
						if(label_list[int(char2.label)]in ['െ','്ര']):
							char3=w.char_list[c+2]
							string=string+label_list[int(char3.label)]
							c+=1
						string=string+label_list[int(char2.label)]
						string=string+label_list[int(char.label)]
						c+=1
					else:
						string=string+label_list[int(char.label)]
				except IndexError:
					string=string+label_list[int(char.label)]
				# cv2.imwrite('output/zcline_'+str(i)+'_word_'+str(j)+'_c_'+str(k)+str(int(w.char_list[c].label))+'.png',w.char_list[c].data)
				k+=1
				c+=1
		i+=1
	return string
Beispiel #7
0
def recognize_block(im):
	line = pp.find_lines(im)
	# print len(linene)
	label_list=train.label_unicode()
	i=0
	string='word:'
	for l in line:
		# cv2.imwrite('zline_'+str(i)+'.png',l.data)
		string=string+'\n'
		j=0
		for w in l.word_list:
	#		cv2.imwrite('zword_'+str(i)+'_word_'+str(j)+'.png',w.data)
			string=string+' '
			j+=1
			k=0
			c=0
			while(c<len(w.char_list)):
				char= w.char_list[c]
				try:
					if(label_list[int(char.label)]in ['\'',',1',',2']):
						char2=w.char_list[c+1]
						if(label_list[int(char2.label)]in ['\'',',1',',2']):
							string=string+'\"'
							c+=1
						else:
							string=string+label_list[int(char.label)]
					elif(label_list[int(char.label)]in [',1',',2']):
						string=string+','
					elif(label_list[int(char.label)]in ['ൾ2','ൾ']):
						string=string+'ൾ'
					elif(label_list[int(char.label)]in ['െ','േ','്ര']):
						char2=w.char_list[c+1]
						if(label_list[int(char2.label)]in ['െ','്ര']):
							char3=w.char_list[c+2]
							string=string+label_list[int(char3.label)]
							c+=1
						string=string+label_list[int(char2.label)]
						string=string+label_list[int(char.label)]
						c+=1
					else:
						string=string+label_list[int(char.label)]
				except IndexError:
					string=string+label_list[int(char.label)]
				# cv2.imwrite('output/zcline_'+str(i)+'_word_'+str(j)+'_c_'+str(k)+str(int(w.char_list[c].label))+'.png',w.char_list[c].data)
				k+=1
				c+=1
		i+=1
	return string
Beispiel #8
0
def recognize_block(im):
    line = pp.find_lines(im)
    # print len(linene)
    label_list = train.label_unicode()
    i = 0
    string = ''
    for l in line:
        # cv2.imwrite('zline_'+str(i)+'.png',l.data)
        # string=string+'\n'
        j = 0
        for w in l.word_list:
            #		cv2.imwrite('zword_'+str(i)+'_word_'+str(j)+'.png',w.data)
            string = string + ' '
            j += 1
            k = 0
            c = 0
            while (c < len(w.char_list)):
                char = w.char_list[c]
                try:
                    if (label_list[int(char.label)] in ['\'', ',']):
                        char2 = w.char_list[c + 1]
                        if (label_list[int(char2.label)] in ['\'', ',']):
                            string = string + '\"'
                            c += 1
                        else:
                            string = string + label_list[int(char.label)]
                    elif (label_list[int(char.label)] in ['െ', 'േ', '്ര']):
                        char2 = w.char_list[c + 1]
                        if (label_list[int(char2.label)] in ['െ', '്ര']):
                            char3 = w.char_list[c + 2]
                            string = string + label_list[int(char3.label)]
                            c += 1
                        string = string + label_list[int(char2.label)]
                        string = string + label_list[int(char.label)]
                        c += 1
                    else:
                        string = string + label_list[int(char.label)]
                except IndexError:
                    string = string + label_list[int(char.label)]
                cv2.imwrite(
                    'output/zcline_' + str(i) + '_word_' + str(j) + '_c_' +
                    str(k) + str(int(w.char_list[c].label)) + '.png',
                    w.char_list[c].data)
                k += 1
                c += 1
        i += 1
    return string