def return_pp(img_dir): names=get_abs_names.get_files(img_dir) names.sort() image_vector=[] for i in range(len(names)): image_vector.append(initial_processing.imageToVector(names[i])) #print image_vector #print len(image_vector) sum_img=image_vector[-1] for i in range(len(image_vector)-1): sum_img=sum_img+image_vector[i] # Uncomment to know the sum image #print "printing sum image" #print sum_img hist_img=numpy.histogram(sum_img,bins=34) ## Uncoment to now the histogram #print "Printing histogram" #print hist_img img_for_dimension=Image.open(names[0]) percent_of_pixels=(hist_img[0]/float(img_for_dimension.size[0]*img_for_dimension.size[1]))*100 # Uncomment to know the details of percentage of pixels present per bucket range #print "Printing percentage of pixels" #print percent_of_pixels #print len(percent_of_pixels) #print sum(percent_of_pixels) return percent_of_pixels
def testdb(signature_images,test_set,train_set,mean_img_vect_for_test,eigen_selected_for_test,count_of_dots_original_path,flag_for_rename): temp_norm_list=[] sucess=0 signature_images_backup=signature_images for i in test_set: temp_norm_list=[] print "testing undergoing for the image: " +i temp_test_image=Image.open(i) temp_test_image_vect=ip.imageToVector(i) temp_test_image_mean_subtracted=temp_test_image_vect-mean_img_vect_for_test temp_test_image_mean_subtracted_transpose=numpy.transpose(temp_test_image_mean_subtracted) temp_test_image_mapped=numpy.matrix(temp_test_image_mean_subtracted_transpose)*numpy.matrix(eigen_selected_for_test) for j in range(len(signature_images)): temp_for_norm=signature_images[j]-temp_test_image_mapped temp_norm_list.append(scipy.linalg.norm(temp_for_norm,2)) temp_min_in_norm_list=min(temp_norm_list) temp_index_min_in_norm_list = temp_norm_list.index(temp_min_in_norm_list) recognized=train_set[temp_index_min_in_norm_list] ########## code of checking if the algorithm identified correctly or not if(flag_for_rename==1): index_of_underscore=i.find('_') if(index_of_underscore>=0): new_i=i.replace('_',os.sep) new_recognized=recognized.replace('_',os.sep) else: new_i=i.replace('.',os.sep,count_of_dots_original_path+1) new_recognized=recognized.replace('.',os.sep,count_of_dots_original_path+1) else: new_i=i new_recognized=recognized temp_split_taken_name=new_i.split(os.sep) temp_split_recognized_name=new_recognized.split(os.sep) len_of_split_file_name=len(temp_split_recognized_name) if(temp_split_taken_name[len_of_split_file_name-2]==temp_split_recognized_name[len_of_split_file_name-2]): print "taken class : " + temp_split_taken_name[len_of_split_file_name-2] print "recognized class : " +temp_split_recognized_name[len_of_split_file_name-2] print "sucess" sucess=sucess+1 else : print "taken class : " + temp_split_taken_name[len_of_split_file_name-2] print "recognized class : " +temp_split_recognized_name[len_of_split_file_name-2] print "failure" print "printing sucess" print " identified %d images out of %d images " %(sucess,len(test_set)) return (float(sucess)/len(test_set))*100
def create_xs_ys(self): self.files_list = \ lslR.get_files(directory=self.IMAGE_DIRECTORY,include=self.include,\ exclude = self.exclude,ftype=self.ftypes) #print files_list for file in self.files_list: for item in self.keywords.iteritems(): #the 1st entry is the class number 0,1,2,3... key = item[0] #the second entry is the reg-exp identifying the class value = item[1] if file.rfind(value) != -1 : #print key , file self.xs.append(initial_processing.imageToVector(file)) self.ys.append(key)
mean_img_vect sum_img_vect """ for i in range(total_dimensions_per_image): mean_img_vect.append(0) sum_img_vect.append(0) """ we might have to convert images to vectors before processing, so imageToVector method in initial_processing is called and all individual images are appended to total_img_vect """ for i in images : image_vect=ip.imageToVector(i) total_img_vect.append(image_vect) """ we might have to number of images to calculate mean/sum of the images """ total_no_of_images=len(total_img_vect) """finding the total of all the images""" for i in range(total_no_of_images): sum_img_vect=sum_img_vect+total_img_vect[i] """ finding the mean of all the images """ mean_img_vect=sum_img_vect/total_no_of_images
def traindb(image_list): # Declarations of some required variables images_abs_names=[] # variable contains all the paths to file total_img_vect=[] # variable to contain total images in vector form mean_img_vect=[] # variable for containing mean of all images sum_img_vect=[] # variable for containing sum of all images mean_for_subtraction=[] # variable contains clones of mean used for subtracting image_vect=[] #holding vectors of images #code starts here image_list_backup=image_list shape_image=Image.open(image_list[0][0]) shape_image_array=numpy.asarray(shape_image) shape=shape_image_array.shape total_dimensions_per_image=shape[0]*shape[1] # ##initialising all the required values such as ###mean_img_vect ####sum_img_vect for i in range(total_dimensions_per_image): mean_img_vect.append(0) sum_img_vect.append(0) #we have to convert images to vectors before processing, ##so imageToVector method in initial_processing is called ###and all individual images are appended to total_img_vect for i in image_list : for j in range(len(i)): temp_image_vect=ip.imageToVector(i[j]) image_vect.append(temp_image_vect) total_img_vect=image_vect # we have to know number of images to calculate ##mean/sum of the images total_no_of_images=len(total_img_vect) #Uncomment following line to know the number of trained images #print "Total number of trained images = %d " %(total_no_of_trained_images) #finding the total of all the images for i in range(total_no_of_images): sum_img_vect=sum_img_vect+total_img_vect[i] #print "printing the total sum of images" #print sum_img_vect # finding the mean of all the images mean_img_vect=sum_img_vect/total_no_of_images #print "printing the mean of all images" #print mean_img_vect # we need to clone mean_img_vect as manytimes as number of images ##to subtract from total_img_vect which actually contains vectors of all ###the images for i in range(total_no_of_images): mean_for_subtraction.append(mean_img_vect) # ##the vectors are to be conveted for furthur processsing of ###co-relation etc.., and transpose is taken so it matches the required ####order, which is each column containing one image in 2-d matrix total_img_array=numpy.asarray(total_img_vect) total_img_array=numpy.transpose(total_img_array) mean_for_subtraction_array=numpy.asarray(mean_for_subtraction) mean_for_subtraction_array=numpy.transpose(mean_for_subtraction_array) ### subtraction of original images with mean is done here for furthur ####processing mean_sub_img_array=total_img_array-mean_for_subtraction_array ### Calculating co-relation matrix using mean_subtracted_matrix """ mean_sum_img_array_transpose=numpy.transpose(mean_sub_img_array) # Calculating the transpose co_relation = numpy.matrix(mean_sum_img_array_transpose)*numpy.matrix(mean_sub_img_array) # casting to matrix #and multiplying to form co-relation matrix ### Calculating Eigen Values and Eigen Vectors """ eig_value,eig_vect = scipy.linalg.eig(co_relation) # returns the eigen values and eigen vectors ### mapping eigen values onto images""" eig_vect=numpy.matrix(eig_vect) # casting to matrix for multiplication mean_sub_img_array = numpy.matrix(mean_sub_img_array) # casting to matrix for multiplication mapped_eig = mean_sub_img_array * eig_vect # Multiplication which maps images to eigen vectors mapped_eig_array=numpy.asarray(mapped_eig) # Converting back to array for slicing no_of_eigen_vectors_to_be_taken=20 # Edit this value to change the no of dimensions to be taken mapped_eig_large_select=mapped_eig_array[:,0:no_of_eigen_vectors_to_be_taken] # Selecting required number of dimensions #using slicing ### Calculating the signature of each image """ # Creating the matrix ( signature matrix )and initialised with zeros cv_signature_images=zeros((total_no_of_images,no_of_eigen_vectors_to_be_taken)) mean_sub_img_array_transpose=numpy.transpose(mean_sub_img_array) # transpose is taken so it becomes easy for multiplicati for i in range(total_no_of_images) : cv_signature_images[i]=mean_sub_img_array_transpose[i] * mapped_eig_large_select #Uncomment following two lines to print the shape of the signature matrix #print cv_signature_images.shape #print " is the sizeof the signature matrix " return mean_img_vect,mapped_eig_large_select,cv_signature_images
def test(self,image_directory,classKeys,include=None,exclude=None): """ The method implements the testing part of the algorithm and returns the successrate in fractions between 0 and 1 """ #the following reassignment is done to keep the __init__ #method and the test method arguments consistent test_path = image_directory test_classes = classKeys includ = include exclud = exclude print "Starting to Test" #test_path = raw_input("Enter the Path containing test images:") #test_pattern = list(raw_input("Enter the regexp identifying \ #test images in the directory above:")) test_files = \ lslR.get_files(directory=test_path,ftype=self.ftypes,include=includ,exclude=exclud) num = len(test_files) i = 0 success = 0 failure = 0 for fil in test_files: #if num - i > 0 : # print "processing Image number: (%d) , Corresponding to File name: %s " % (i,fil) xTest = \ numpy.matrix(initial_processing.imageToVector(fil)) xTest_class = -1 #----------------debug code------------ #print type(xTest) #-------------------------------------- #project testing image on A #yTest = numpy.transpose(self.A * xTest.transpose()) yTest = xTest * self.A.transpose() #find the distance of yTest from each Yi distAndYPair = [] #inf denotes infinity #self.y_Transpose = numpy.transpose(self.y) #for y in self.y_Transpose: for y in self.y: #print type(yTest),type(y),yTest.shape,y.shape dist = spatial.distance.euclidean(yTest,y) #if dist < minPair[0]: # print "iterating to find the minimum match" # print "y :",y # print "yTest :",yTest # minPair[0] = dist # minPair[1] = tuple(y) distAndYPair.append([dist,y]) #print "type of minPair[1]"+str(type(minPair[1])) #print minPair[1] in self.y_Transpose distAndYPair.sort() #print minPair[1] in numpy.transpose(self.y) #print type(self.y[0]) #print "Minimum distance: ",minPair[0] recognized_class = self.yZipYs[distAndYPair[0][1]] #because the class number starts from 0 and the numbering of #of for key in test_classes.keys(): if fil.find(key) != -1: xTest_class = test_classes[key] break if recognized_class == xTest_class : success += 1 else: failure += 1 #print "Success %d , Failure %d" % (success,failure) #print "Recognized Class: %d And xTest_Class: %d " % \ #(recognized_class,xTest_class) i +=1 rate = (float(success)/float(success+failure)) #print "Success Rate is : %f" % rate return rate
mean_img_vect sum_img_vect """ for i in range(total_dimensions_per_image): mean_img_vect.append(0) sum_img_vect.append(0) """ we might have to convert images to vectors before processing, so imageToVector method in initial_processing is called and all individual images are appended to total_img_vect """ for i in images: image_vect = ip.imageToVector(i) total_img_vect.append(image_vect) """ we might have to number of images to calculate mean/sum of the images """ total_no_of_images = len(total_img_vect) """finding the total of all the images""" for i in range(total_no_of_images): sum_img_vect = sum_img_vect + total_img_vect[i] """ finding the mean of all the images """ mean_img_vect = sum_img_vect / total_no_of_images
def testdb(signature_images,test_set,train_set,mean_img_vect_for_test,eigen_selected_for_test,trained_images_per_class,flag_for_rename): # Need to set the flags depending on the file names and directory structure # rename_flag_is_set=0 # print test_set # file_path_name_from_test_set_to_set_flag=test_set[0] # file_path_name_from_test_set_to_set_flag_split=file_path_name_from_test_set_to_set_flag.split(os.sep) # print file_path_name_from_test_set_to_set_flag_split # file_path_from_test_set_to_set_flag_length=len(file_path_name_from_test_set_to_set_flag_split) # file_name_from_test_set_to_set_flag=file_path_name_from_test_set_to_set_flag_split[file_path_from_test_set_to_set_flag_length-1] # print file_name_from_test_set_to_set_flag # index_of_underscore=file_name_from_test_set_to_set_flag.find('_') # # index_of_dot=file_name_from_test_set_to_set_flag.index('.') # # rindex_of_dot=file_name_from_test_set_to_set_flag.rindex('.') # print index_of_underscore # if(index_of_underscore>=0): # rename_flag_is_set=1 # if(index_of_dot>=0): # rename_flag_is_set=2 */ temp_norm_list=[] sucess=0 signature_images_backup=signature_images for i in test_set: temp_norm_list=[] #print "testing undergoing for the image: " +i temp_test_image=Image.open(i) temp_test_image_vect=ip.imageToVector(i) temp_test_image_mean_subtracted=temp_test_image_vect-mean_img_vect_for_test temp_test_image_mean_subtracted_transpose=numpy.transpose(temp_test_image_mean_subtracted) temp_test_image_mapped=numpy.matrix(temp_test_image_mean_subtracted_transpose)*numpy.matrix(eigen_selected_for_test) for j in range(len(signature_images)): temp_for_norm=signature_images[j]-temp_test_image_mapped temp_norm_list.append(scipy.linalg.norm(temp_for_norm,2)) temp_min_in_norm_list=min(temp_norm_list) temp_index_min_in_norm_list = temp_norm_list.index(temp_min_in_norm_list) recognized=train_set[temp_index_min_in_norm_list] ########## code of checking if the algorithm identified correctly or not if(flag_for_rename==1): index_of_underscore=i.find('_') if(index_of_underscore>=0): new_i=i.replace('_',os.sep,1) new_recognized=recognized.replace('_',os.sep) else: new_i=i.replace('.',os.sep,1) new_recognized=recognized.replace('.',os.sep,1) else: new_i=i new_recognized=recognized temp_split_taken_name=new_i.split(os.sep) temp_split_recognized_name=new_recognized.split(os.sep) len_of_split_file_name=len(temp_split_recognized_name) if(temp_split_taken_name[len_of_split_file_name-2]==temp_split_recognized_name[len_of_split_file_name-2]): #print "taken class : " + temp_split_taken_name[len_of_split_file_name-2] #print "recognized class : " +temp_split_recognized_name[len_of_split_file_name-2] #print "sucess" sucess=sucess+1 #else : #print "taken class : " + temp_split_taken_name[len_of_split_file_name-2] #print "recognized class : " +temp_split_recognized_name[len_of_split_file_name-2] #print "failure" #print "printing sucess" #print " identified %d images out of %d images " %(sucess,len(test_set)) return (sucess/len(test_set))