Beispiel #1
0
def gen_train_sample(im):
#	train.classifierclassifier.load('svm_class.xml')
	img = pp.preprocess(im.copy())
	# img,rot = pp.skew_correction(img)
	hight,width=im.shape
	# M = cv2.getRotationMatrix2D((hight/2,width/2),rot-90,1)
	# im = cv2.warpAffine(im,M,(width,hight))
	# cv2.imwrite('skew correct.png',im)
	contours2, hierarchy = cv2.findContours(img,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
	contours = []
	for cnt in contours2:
		print (cv2.contourArea(cnt))
		if(cv2.contourArea(cnt)>20):
			contours.append(cnt)
	X = [cv2.contourArea(C) for C in contours]
#	print len(contours),len(X)
	t=[i for i in range (0,len(contours))]
	X,t = zip(*sorted(zip(X,t)))
	i=0
	for j in t:
		x,y,w,h=cv2.boundingRect(contours[j])
		box = im[y-1:y+h+1,x-1:x+w+1]
		char = pp.preprocess(box.copy())
		try:
			f = train.find_feature(char)
			fu= train.np.array(f,train.np.float32)
			# print len(fu)
			t = train.classifier.predict(fu)
			print t
		except IndexError:
			t = 0
		cv2.imwrite('samp/zsamp47_8_'+str(int(t))+'_'+str(i)+'.png',box)
		# cv2.imwrite('./samp/'+str(i)+'.png',box)
		i+=1
Beispiel #2
0
def gen_train_sample(im):
#	train.classifierclassifier.load('svm_class.xml')
	img = pp.preprocess(im.copy())
	# img,rot = pp.skew_correction(img)
	hight,width=im.shape
	# M = cv2.getRotationMatrix2D((hight/2,width/2),rot-90,1)
	# im = cv2.warpAffine(im,M,(width,hight))
	# cv2.imwrite('skew correct.png',im)
	contours2, hierarchy = cv2.findContours(img,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
	contours = []
	for cnt in contours2:
		print (cv2.contourArea(cnt))
		if(cv2.contourArea(cnt)>20):
			contours.append(cnt)
	X = [cv2.contourArea(C) for C in contours]
#	print len(contours),len(X)
	t=[i for i in range (0,len(contours))]
	X,t = zip(*sorted(zip(X,t)))
	i=0
	for j in t:
		x,y,w,h=cv2.boundingRect(contours[j])
		box = im[y-1:y+h+1,x-1:x+w+1]
		char = pp.preprocess(box.copy())
		try:
			f = train.find_feature(char)
			fu= train.np.array(f,train.np.float32)
			# print len(fu)
			t = train.classifier.predict(fu)
			print t
		except IndexError:
			t = 0
		cv2.imwrite('samp/zsamp47_8_'+str(int(t))+'_'+str(i)+'.png',box)
		# cv2.imwrite('./samp/'+str(i)+'.png',box)
		i+=1
	def __init__(self,char):
		global cur_char 
		cur_char = self
		self.hight,t=char.shape
		self.data=char
		self.feature=np.array(train.find_feature(self.data.copy()),np.float32)
		self.label=train.recognize(self.feature)
Beispiel #4
0
def train_svm():

	# CV2 SVM
	svm_params = dict( kernel_type = cv2.SVM_RBF,
	                    svm_type = cv2.SVM_C_SVC,
	                    C=9.34, gamma=15.68 )
	svm=cv2.SVM()
	label_list=[]
	label_list.append('a')
	url='train_images/'
	train_set = []
	s_list=sorted(os.listdir(url))
	label = 0
	for i in s_list:
		s_list=glob.glob(url+i+'/*.png')
		# if(len(s_list)>25):
		if(len(s_list)>500):
			file=open(url+i+'/utf8',"r")
			i_uni=file.read()
			i_uni=i_uni[:-1]
			label_list.append(i_uni)
			label+=1
		else:
			continue
		print str(label),i,label_list[label],len(s_list)
		int test=10;
		for j in s_list:
			
			if(!test-=1)
				break;
			img=cv2.imread(j,0)
			img=pp.preprocess(img)
			f =train.find_feature(img.copy())
			# print len(f)
			s = [label,f]
			train_set.append(s)
	f=open('label','w')
	for l in label_list:
		f.write(l+'\n')
	f.close()

	shuffle(train_set)
	f_list = []
	label = []
	for t in train_set:
		label.append(t[0])
		f_list.append(t[1])
#	np.savetxt('feature.txt',f_list)
#	np.savetxt('label.txt',label)
#	samples = np.loadtxt('feature.txt',np.float32)
#	responses = np.loadtxt('label.txt',np.float32)
#	responses = responses.reshape((responses.size,1))  
	samples = np.array(f_list,np.float32)
	responses = np.array(label,np.float32)
	print 'auto training initiated'
	print 'please wait.....'
	svm.train(samples,responses,params=svm_params)
	# svm.train_auto(samples,responses,None,None,params=svm_params)
	svm.save("svm_class.xml")
Beispiel #5
0
 def __init__(self, char):
     global cur_char
     cur_char = self
     self.height, t = char.shape
     self.data = char
     self.feature = np.array(train.find_feature(self.data.copy()),
                             np.float32)
     self.label = train.recognize(self.feature)
Beispiel #6
0
def purify_train():
	classifier = cv2.SVM()
	classifier.load('svm_class.xml')
	g=[]
	label_list=get_labellist()
	# label_list.append('a')
	url='train_images/'
	v=open('purify.txt','w')
	train_set = []
	s_list=sorted(os.listdir(url))
	label = 0
	for i in s_list:
		s_list=glob.glob(url+i+'/*.png')
		if(len(s_list)>25):
			# file=open(url+i+'/utf8',"r")
			# i_uni=file.read()
			# i_uni=i_uni[:-1]
			# label_list.append(i_uni)
			label+=1
		else:
			# for i in s_list1:
			continue
		print str(label),i,label_list[label],len(s_list)
		for j in s_list:
			img=cv2.imread(j,0)

			# w=find_vlines(img.copy())

			img=pp.preprocess(img)
			f =train.find_feature(img.copy())


			feature = np.array(f,np.float32)
			a = classifier.predict(feature)


			if a !=label:
				q=j.split('/')
				print a,label,int(a)
				# print label_list[int(a)],i

				v.write(q[2]+'\t'+label_list[int(a)]+' '+str(a)+'\t'+str(label)+' '+i+'\n')
				# cv2.imwrite('train_im/'+i+'/'+q[2],im)
				# os.rename(j,'train_im/'+i+'/'+q[2])


			feature=list(feature)
			
			g.append((feature,label))
	# print g

	with open('data.txt','w') as outfile:
		json.dump(g,outfile)
Beispiel #7
0
def purify_train():
	classifier = cv2.SVM()
	classifier.load('svm_class.xml')
	g=[]
	label_list=get_labellist()
	# label_list.append('a')
	url='train_images/'
	v=open('purify.txt','w')
	train_set = []
	s_list=sorted(os.listdir(url))
	label = 0
	for i in s_list:
		s_list=glob.glob(url+i+'/*.png')
		if(len(s_list)>25):
			# file=open(url+i+'/utf8',"r")
			# i_uni=file.read()
			# i_uni=i_uni[:-1]
			# label_list.append(i_uni)
			label+=1
		else:
			# for i in s_list1:
			continue
		print str(label),i,label_list[label],len(s_list)
		for j in s_list:
			img=cv2.imread(j,0)

			# w=find_vlines(img.copy())

			img=pp.preprocess(img)
			f =train.find_feature(img.copy())


			feature = np.array(f,np.float32)
			a = classifier.predict(feature)


			if a !=label:
				q=j.split('/')
				print a,label,int(a)
				# print label_list[int(a)],i

				v.write(q[2]+'\t'+label_list[int(a)]+' '+str(a)+'\t'+str(label)+' '+i+'\n')
				# cv2.imwrite('train_im/'+i+'/'+q[2],im)
				# os.rename(j,'train_im/'+i+'/'+q[2])


			feature=list(feature)
			
			g.append((feature,label))
	# print g

	with open('data.txt','w') as outfile:
		json.dump(g,outfile)
Beispiel #8
0
			file=open(url+i+'/utf8',"r")
			i_uni=file.read()
			i_uni=i_uni[:-1]
			label_list.append(i_uni)
			label+=1
		else:
			continue
		print str(label),i,label_list[label],len(s_list)
		int test=10;
		for j in s_list:
			
			if(!test-=1)
				break;
			img=cv2.imread(j,0)
			img=pp.preprocess(img)
			f =train.find_feature(img.copy())
			# print len(f)
			s = [label,f]
			train_set.append(s)
	f=open('label','w')
	for l in label_list:
		f.write(l+'\n')
	f.close()

	shuffle(train_set)
	f_list = []
	label = []
	for t in train_set:
		label.append(t[0])
		f_list.append(t[1])
	samples = np.array(f_list,np.float32)