def gen_train_sample(im): # train.classifierclassifier.load('svm_class.xml') img = pp.preprocess(im.copy()) # img,rot = pp.skew_correction(img) hight,width=im.shape # M = cv2.getRotationMatrix2D((hight/2,width/2),rot-90,1) # im = cv2.warpAffine(im,M,(width,hight)) # cv2.imwrite('skew correct.png',im) contours2, hierarchy = cv2.findContours(img,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE) contours = [] for cnt in contours2: print (cv2.contourArea(cnt)) if(cv2.contourArea(cnt)>20): contours.append(cnt) X = [cv2.contourArea(C) for C in contours] # print len(contours),len(X) t=[i for i in range (0,len(contours))] X,t = zip(*sorted(zip(X,t))) i=0 for j in t: x,y,w,h=cv2.boundingRect(contours[j]) box = im[y-1:y+h+1,x-1:x+w+1] char = pp.preprocess(box.copy()) try: f = train.find_feature(char) fu= train.np.array(f,train.np.float32) # print len(fu) t = train.classifier.predict(fu) print t except IndexError: t = 0 cv2.imwrite('samp/zsamp47_8_'+str(int(t))+'_'+str(i)+'.png',box) # cv2.imwrite('./samp/'+str(i)+'.png',box) i+=1
def gen_train_sample(im): # train.classifierclassifier.load('svm_class.xml') img = pp.preprocess(im.copy()) # img,rot = pp.skew_correction(img) hight,width=im.shape # M = cv2.getRotationMatrix2D((hight/2,width/2),rot-90,1) # im = cv2.warpAffine(im,M,(width,hight)) # cv2.imwrite('skew correct.png',im) contours2, hierarchy = cv2.findContours(img,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE) contours = [] for cnt in contours2: print (cv2.contourArea(cnt)) if(cv2.contourArea(cnt)>20): contours.append(cnt) X = [cv2.contourArea(C) for C in contours] # print len(contours),len(X) t=[i for i in range (0,len(contours))] X,t = zip(*sorted(zip(X,t))) i=0 for j in t: x,y,w,h=cv2.boundingRect(contours[j]) box = im[y-1:y+h+1,x-1:x+w+1] char = pp.preprocess(box.copy()) try: f = train.find_feature(char) fu= train.np.array(f,train.np.float32) # print len(fu) t = train.classifier.predict(fu) print t except IndexError: t = 0 cv2.imwrite('samp/zsamp47_8_'+str(int(t))+'_'+str(i)+'.png',box) # cv2.imwrite('./samp/'+str(i)+'.png',box) i+=1
def __init__(self,char): global cur_char cur_char = self self.hight,t=char.shape self.data=char self.feature=np.array(train.find_feature(self.data.copy()),np.float32) self.label=train.recognize(self.feature)
def train_svm(): # CV2 SVM svm_params = dict( kernel_type = cv2.SVM_RBF, svm_type = cv2.SVM_C_SVC, C=9.34, gamma=15.68 ) svm=cv2.SVM() label_list=[] label_list.append('a') url='train_images/' train_set = [] s_list=sorted(os.listdir(url)) label = 0 for i in s_list: s_list=glob.glob(url+i+'/*.png') # if(len(s_list)>25): if(len(s_list)>500): file=open(url+i+'/utf8',"r") i_uni=file.read() i_uni=i_uni[:-1] label_list.append(i_uni) label+=1 else: continue print str(label),i,label_list[label],len(s_list) int test=10; for j in s_list: if(!test-=1) break; img=cv2.imread(j,0) img=pp.preprocess(img) f =train.find_feature(img.copy()) # print len(f) s = [label,f] train_set.append(s) f=open('label','w') for l in label_list: f.write(l+'\n') f.close() shuffle(train_set) f_list = [] label = [] for t in train_set: label.append(t[0]) f_list.append(t[1]) # np.savetxt('feature.txt',f_list) # np.savetxt('label.txt',label) # samples = np.loadtxt('feature.txt',np.float32) # responses = np.loadtxt('label.txt',np.float32) # responses = responses.reshape((responses.size,1)) samples = np.array(f_list,np.float32) responses = np.array(label,np.float32) print 'auto training initiated' print 'please wait.....' svm.train(samples,responses,params=svm_params) # svm.train_auto(samples,responses,None,None,params=svm_params) svm.save("svm_class.xml")
def __init__(self, char): global cur_char cur_char = self self.height, t = char.shape self.data = char self.feature = np.array(train.find_feature(self.data.copy()), np.float32) self.label = train.recognize(self.feature)
def purify_train(): classifier = cv2.SVM() classifier.load('svm_class.xml') g=[] label_list=get_labellist() # label_list.append('a') url='train_images/' v=open('purify.txt','w') train_set = [] s_list=sorted(os.listdir(url)) label = 0 for i in s_list: s_list=glob.glob(url+i+'/*.png') if(len(s_list)>25): # file=open(url+i+'/utf8',"r") # i_uni=file.read() # i_uni=i_uni[:-1] # label_list.append(i_uni) label+=1 else: # for i in s_list1: continue print str(label),i,label_list[label],len(s_list) for j in s_list: img=cv2.imread(j,0) # w=find_vlines(img.copy()) img=pp.preprocess(img) f =train.find_feature(img.copy()) feature = np.array(f,np.float32) a = classifier.predict(feature) if a !=label: q=j.split('/') print a,label,int(a) # print label_list[int(a)],i v.write(q[2]+'\t'+label_list[int(a)]+' '+str(a)+'\t'+str(label)+' '+i+'\n') # cv2.imwrite('train_im/'+i+'/'+q[2],im) # os.rename(j,'train_im/'+i+'/'+q[2]) feature=list(feature) g.append((feature,label)) # print g with open('data.txt','w') as outfile: json.dump(g,outfile)
def purify_train(): classifier = cv2.SVM() classifier.load('svm_class.xml') g=[] label_list=get_labellist() # label_list.append('a') url='train_images/' v=open('purify.txt','w') train_set = [] s_list=sorted(os.listdir(url)) label = 0 for i in s_list: s_list=glob.glob(url+i+'/*.png') if(len(s_list)>25): # file=open(url+i+'/utf8',"r") # i_uni=file.read() # i_uni=i_uni[:-1] # label_list.append(i_uni) label+=1 else: # for i in s_list1: continue print str(label),i,label_list[label],len(s_list) for j in s_list: img=cv2.imread(j,0) # w=find_vlines(img.copy()) img=pp.preprocess(img) f =train.find_feature(img.copy()) feature = np.array(f,np.float32) a = classifier.predict(feature) if a !=label: q=j.split('/') print a,label,int(a) # print label_list[int(a)],i v.write(q[2]+'\t'+label_list[int(a)]+' '+str(a)+'\t'+str(label)+' '+i+'\n') # cv2.imwrite('train_im/'+i+'/'+q[2],im) # os.rename(j,'train_im/'+i+'/'+q[2]) feature=list(feature) g.append((feature,label)) # print g with open('data.txt','w') as outfile: json.dump(g,outfile)
file=open(url+i+'/utf8',"r") i_uni=file.read() i_uni=i_uni[:-1] label_list.append(i_uni) label+=1 else: continue print str(label),i,label_list[label],len(s_list) int test=10; for j in s_list: if(!test-=1) break; img=cv2.imread(j,0) img=pp.preprocess(img) f =train.find_feature(img.copy()) # print len(f) s = [label,f] train_set.append(s) f=open('label','w') for l in label_list: f.write(l+'\n') f.close() shuffle(train_set) f_list = [] label = [] for t in train_set: label.append(t[0]) f_list.append(t[1]) samples = np.array(f_list,np.float32)