Esempio n. 1
0
def dct_main(database):
	eigen_vect=[]
	images_path=get_abs_names.get_files(database)
	for i in images_path:
		eigen_vect.append(dct1.imagetoDct(i))
	print "Printing eigen vectors of all the images"
	print eigen_vect
Esempio n. 2
0
def return_pp(img_dir):
	names=get_abs_names.get_files(img_dir)
	names.sort()
	image_vector=[]
	for i in range(len(names)):
		image_vector.append(initial_processing.imageToVector(names[i]))
	#print image_vector
	#print len(image_vector)

	sum_img=image_vector[-1]
	for i in range(len(image_vector)-1):
		sum_img=sum_img+image_vector[i]

	# Uncomment to know the sum image
	
	#print "printing sum image"
	#print sum_img
	hist_img=numpy.histogram(sum_img,bins=34)
	
	## Uncoment to now the histogram 	
	
	#print "Printing histogram"
	#print hist_img

	img_for_dimension=Image.open(names[0])
	percent_of_pixels=(hist_img[0]/float(img_for_dimension.size[0]*img_for_dimension.size[1]))*100
	
	# Uncomment to know the details  of percentage of pixels present per bucket range

	#print "Printing percentage of pixels"
	#print percent_of_pixels
	#print len(percent_of_pixels)
	#print sum(percent_of_pixels)

	return percent_of_pixels
Esempio n. 3
0
def pre_process(pathtoimages):
	
	################################################# Declarations of all lists which are to be initialised ####################################
	
	images_abs_names=[] # variable contains all the paths to file
	total_img_vect=[]   # variable to contain total images in vector form
	mean_img_vect=[]   # variable  for containing mean of all images
	sum_img_vect=[]   # variable  for containing sum of all images
	mean_for_subtraction=[] # variable contains clones of mean used for subtracting
	norm_list = [] # variable to hold all the norm values during testing phase
	split_image_names=[]  # variable to hold split image files to group into classes
	class_names=[] #Variable to hold  the class names
	each_class=[] # variable to hold names of each class
	entire_class=[] # variable to hold entire class
	test_data_set=[] #variable for storing test images
	train_data_set=[] #variable for storing train images
	entire_train_data_as_list=[] #variable for storing train images as list
	images_name_modified=[] # variable to hold modified images names

	######################################################## End of Declarations  #############################################################

	######### get_files method in the module get_abs_names is called to get the absolute path names of all the images in input directory #######

	src_img_dir=pathtoimages  # Taking the backup of the directory path
	images=lslR.get_files(src_img_dir) #returns all the absolute image names as a list   

	################# Uncomment following 2-lines to print all the absolut path names in the order given by get_abs_names
	
	#print "printing absolute path file names as given by get_abs_names"
	#print images


	images_abs_names=images # Taking backup of absolute path names of the images 
	images.sort() # Sorting the image files so that images of each class are grouped together

	################# Uncomment following 2-lines to print all the absolut path names in the order given by get_abs_names

	#print "printing absolute path file names after sorting"
	#print images
	
	#_________________________________________________________________________________________________________________________________________#	
	 # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$   For initialising mean_image_vect and sum_image_vect, $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$#
	 # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$     its required to know the dimension of each image,  $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$#
	 # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$     so one test image is read and then all the      $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ #
         # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$      required values are found out.           $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ #
	#_________________________________________________________________________________________________________________________________________#

	shape_image=Image.open(images[0]) # Any imge can be open, here we are opening 1st image
	shape_image_array=numpy.asarray(shape_image) # Dimension of the image is to be known so converting to numpy array
	shape=shape_image_array.shape # Getting the dimensions of the image convertedd array 

	################################### Uncomment following two-lines to know the dimension of the image 

	#print "printing shape or the dimension of the image"
	#print shape 
	
	total_dimensions_per_image=shape[0]*shape[1] # Multiplying rows * columns of array to know total dimensions
	
	################################### Uncomment following two-lines to know the total_dimensions of the image 

	#print "printing total dimensions of the image"
	#print total_dimensions_per_image 
	

	#######################################initialising all the required values such as mean_img_vect, sum_img_vect

	for i in range(total_dimensions_per_image):
		mean_img_vect.append(0) # initialising with zeros 
		sum_img_vect.append(0)  # initialising with zeros 

	######## Calling the function which returns the split file names 
	###### The function return_split_file names take one argument and returns two arguments
	#### Input Arguments : images : contains the sorted list of absolute path names of all the images in the input directory
	## Return values : return 1: split_image_names : Has the list of split image names
	# Return Values : return 2: no_of_images : This contains the total number of input images

	split_image_names,no_of_images=return_split_file_names(images) 

	####################################### Uncomment following 2 lines to see the split file names 

	#print "printing split file names"
	#print split_image_names

	#######################This part is to find the length of the path of each image to extract class name 
	
	single_image_to_find_length=split_image_names[0]
	length_split_image_name=len(single_image_to_find_length)

	################################ Code to extract the class names of the database

	for i in range(no_of_images):
		temp_class_name=split_image_names[i][length_split_image_name-2]  #extracting class names
		class_names.append(temp_class_name)  # creating a list of class names 

	###################### Uncomment following 2 lines to know all the different classes with repetitions
	
	#print "printing all class names"
	#print class_names

	set_of_class_names=set(class_names)  #removing the repetitions using set so it contains only unique classes

	###################### Uncomment following 2 lines to know all different classes without repetitions

	#print "printing unique set of class names"
	#print set_of_class_names

	no_of_classes=len(set_of_class_names)  #getting the count of no of classes

	# If the directory structure is different (flat), some change to be done to the path names of the files. 
	# Every database has more then one class, this is obvious, coz if there are more then one class 
	# only then face recogniton on that database makes some sense

	#Checking if the classes are partitioned properly.
	
	flag_for_testing=0 # flag required to be set to 1 if in case if the directory structure is flat
	
	if(no_of_classes<=1): # if number of classes is 1 it means that partition has not happend 
		flag_for_testing=1  # setting  the flag indicating flat architecture
		temp_str_for_checking_if_underscore_is_present=images[0] 
		temp_index_if_present=temp_str_for_checking_if_underscore_is_present.find('_') # to check if '_' is present find returns the position of the '_' in the string or else return -1 if not present

		if(temp_index_if_present>=0): # if present 
			flag_for_changing_file_name=1 # set this flag to 1 which means that seperator is '_'
		else: # if not present 
			flag_for_changing_file_name=0 # set this flag to 0 which means that seperator is '.' or any other symbol

		# Modifying the images names so that it that seperator remains os.sep through out

		for i in range(no_of_images):
			if(flag_for_changing_file_name==0):
				temp_image_name_modified=images[i].replace('.',os.sep,1)
			else:
				temp_image_name_modified=images[i].replace('_',os.sep)
			images_name_modified.append(temp_image_name_modified)

		#Uncomment following 2-lines to see the modified file names
	
		#print "printing modified images names"
		#print images_name_modified


		########## To obtain the split image names
		######## Calling the function which returns the split file names 
		###### The function return_split_file names take one argument and returns two arguments
		#### Input Arguments : images : contains the sorted list of absolute path names of all the images in the input directory
		## Return values  : return 1: split_image_names : Has the list of split image names
		#  Return values  : return 2: no_of_images : This contains the total number of input images


		split_image_names,no_of_images=return_split_file_names(images_name_modified)

		########### Uncomment following 2 lines to know the split file names

		#print " printing list of split file names : "	
		#print split_image_names


		#This part is to find the length of the path of each image to extract class name
	
		single_image_to_find_length=split_image_names[0]
		length_split_image_name=len(single_image_to_find_length)

		#Code to extract the class names of the database

		class_names=[]  # making class_names to empty string which other wise contains some junk values

		for i in range(no_of_images):
			temp_class_name=split_image_names[i][length_split_image_name-2]  #extracting class names '-2' because class names lies in last second position of list
			class_names.append(temp_class_name)  # creating a list of class names 

		########## Uncomment following two lines to know all the different classes with repetitions
	
		#print "printing all class names"
		#print class_names

		set_of_class_names=set(class_names)  #removing the repetitions using set so it contains only unique classes

		# Uncomment to know all the different classes without repetitions

		#print "printing unique set of class names"
		#print set_of_class_names

		no_of_classes=len(set_of_class_names)  #getting the count of no of classes

	
	no_of_images_per_class=no_of_images/no_of_classes  #getting the count of no of images per class

	# Uncomment to print the know the number of images per class

	#print "number of images per class = %d " %(no_of_images_per_class)
	
	#################arranging the input directory of images into the order of class

	for  i in range(no_of_classes):
		each_class=[]
		for j in range(no_of_images_per_class):
			img_counter=i*no_of_images_per_class+j
			each_class.append(images[img_counter])
		entire_class.append(each_class)  #contains all the images arranged according to the class
	
	entire_class_backup=entire_class

	#code to  create trainset and testset 
	#one random image selected in one class will be added in testset and all other remaining (no_of_images_per_class) will be added to trainset

	for i in range(no_of_classes):
		image_no_for_test=random.random()*no_of_images_per_class	
		image_no_for_test=int(image_no_for_test)
		test_data_set.append(entire_class[i][image_no_for_test])
		temp_train=entire_class[i]
		temp_train.remove(entire_class[i][image_no_for_test])
		train_data_set.append(temp_train)

	# Uncomment following lines in order to know the details of the train_data_set
	
	#print "Printing type of train dataset 	
	#print type(train_data_set)
	#print "printing training data set"
	#print train_data_set

	# Uncomment following lines in order to know the details of the test_data_set
	#print "Printing type of test dataset 	
	#print type(test_data_set)
	#print "printing test data set"
	#print test_data_set
	
			
	test_data_set_matrix=numpy.matrix(test_data_set) # converting to matrix to perform multiplication 
	train_data_set_matrix=numpy.matrix(train_data_set) # converting to matrix to perform multiplication 
	
	#############Uncomment following lines to know the shapes and details of train_data_set and test_data_set

	#print "Printing test data shape 	
	#print test_data_set_matrix.shape
	#print "Printing train data shape 
	#print train_data_set_matrix.shape
	
	################### we need the entire training data set as a single list

	for r in  range(no_of_classes):
		c=0
		for c in range(no_of_images_per_class-1):
			entire_train_data_as_list.append(train_data_set[r][c])

	######### Calling traindb in train_database which actually does the training part and it returns some values which actually is needed during the testing phase.
	####### Input Argument : train_data_set ( set of tranining images )
	##### It returns 3 values
	#### (1) mean_img : contains the mean of all the images, its a 1-d array/list
	### (2) eigen_selected : Usually only the major values of the eigen vector are taken, this contain those major eigen values only
	## (3) signature_images_for_train_set : contains the signatures (mapped images / eigen images ) for the entire training dataset
	
	mean_img,eigen_selected,signature_images_for_train_set=train_database.traindb(train_data_set)

	#### to find number of images trained per class
	## Thas obviously no_of_images_per_class - 1 because one image will be taken for testing part

	no_images_trained_per_class=no_of_images_per_class-1

	#Uncomment following to print signature of the trained images

	#print "Printing the signature/co-relation matrix of the trained image 
	#print signature_images_for_train_set

	####### Uncomment the following lines when any lengths or the types of the signature variable are to be checked
 
	#print "signature type"
	#print type(signature_images_for_train_set)
	#print "signature length"
	#print len(signature_images_for_train_set)

# Calling the testdb in test_database.py which takes in quite a number of arguments, lets explore the arguments

# arg_1 : signature_images_for_train_set :  contains the signatures (mapped images / eigen images ) for the entire training dataset ( which is return by train_database )
# arg_2 : test_data_set : contains the list of test data images which is randomly selected, one from each class
# arg_3 : entire_train_data_as_list : contains entire train data set ( removed test_data_set from original input ) 
# arg_4 : mean_img : contains the mean of all the images, its a 1-d array/list ( which is return by train_database )
# arg_5 : eigen_selected : Usually only the major values of the eigen vector are taken, this contain those major eigen values only ( which is return by train_database )
# arg_6 : no_images_trained_per_class : contains  number of images actually trained per class from the original dataset 
# arg_7 : flag_for_tesing : flag which actually if the given directory structure is flat or hierarchy; It sets the flag if the structure is flat


	r=test_database.testdb(signature_images_for_train_set,test_data_set,entire_train_data_as_list,mean_img,eigen_selected,no_images_trained_per_class,flag_for_testing)
	
	return r
Esempio n. 4
0
def decide_algo(input_str):
	
	#print input_str
		
	wrapper_test_image_names = lslR.get_files(input_str)
	
	#Uncomment following 2-lines to print all the image names

	#print "image names"
	#print wrapper_test_image_names

	wrapper_test_image_names.sort() # Sorting the image names
	no_of_images=len(wrapper_test_image_names) # Getting the length of number of images
	randomly_selected_image=random.random()*no_of_images # getting the random value in between 0 and no_of_images
	randomly_selected_image=int(round(randomly_selected_image)) # converting to integer coz index should be a integer
	
	#Uncomment following 2 - lines to print the index randomly selected image

	#print "Index of randomly selected image = %d" %(randomly_selected_image)

	#print "number of images"
	#print len(wrapper_test_image_names)

	#wrapper_test_image=Image.open(wrapper_test_image_names[randomly_selected_image]) 
	
	# test_image_stat=ImageStat.Stat(wrapper_test_image) # To convert to Imagestat object whcih gives the stat properties of the image

######## We will be doing the decision based on the 16-metrices. 
###### Only when the test image (image set) satisfies all the 16 metrices it means that the given test database is actually one of the trained dataset
#### If test image fails to meet all the 16-metric criteria, then the wrapper depending on how close the new data set is decides which algorithm to be chosen
## Over time it actually appends the values and maintains the updated metrices of the trained dataset of the new data set

####### We actually test only one image of the entire given test data set ( using the old taught which says one rice grain is often enough to say whether the rice is boiled #### or not, similar we regressively test only one image of the dataset ( given by the user ) ) and choose the algorithm.


####### Image.Stat properties

	flag=0 # Initializing the flag, flag=0 means database not identified, assuming db not identified in the beginning, will be set once db is identified
 
	metric = getmetrics.return_metrics(wrapper_test_image_names[randomly_selected_image]) # Calling the return_metrics of getmetrics which returns 16 metrics as list



#			break   # once if database is identified then we can come out of loop  """

##### We need to get the names of the databases which are previously trained, Rewriting this part with more efficient way
 	
	fp_for_trained_db=open("mapping_dataset_algo","r") # Opening trained_databases in "r" mode to read the list of trained db's
	fp_for_trained_db.seek(0) # Not necessary,  but still on safer hand its given so fp_for_trained_db points to beginning of the file
	trained_datasets=pickle.load(fp_for_trained_db)	# loading the trained data lists from pickle to trained_datasets
	
	for i in range(len(trained_datasets)): # Have to be checked on all the previously trained datasets 	  
		face_names=lslR.get_files(trained_datasets[i][0]) # Getting the absolute path names of the database
		face_names.sort() # Sorting the absolute path names
		no_of_images_in_train=len(face_names)
		if (no_of_images_in_train<randomly_selected_image):
			continue 
		trained_metric=getmetrics.return_metrics(face_names[randomly_selected_image]) # Calling the return_metrics of getmetrics which returns 16 metrics as list
		
		if(metric.__eq__(trained_metric)): # Comparing if all the 16 metrics of the image is matching

			print "Data base identified"
			print "Identified database is"
			print trained_datasets[i][0] # printing the identified database name
			flag=1 # Setting the flag if database is identified 
			identified_algo_name=trained_datasets[i][1]
			if(identified_algo_name=="PCA"):
				PCA_main.main_pca(trained_datasets[i][0])			
			if(identified_algo_name=="DCT"):
				print "DCT is to be called"
			if(identified_algo_name=="LPP"):
				print "LPP is to be called"
	

			break   # once if database is identified then we can come out of loop 


	if(flag==0): # means database not identified
		##### Need to extract the trained dataset path 
		trained_data_path=trained_datasets[0][0] # Taking any database path to extract trained database path
		rindex_ossep=trained_data_path.rindex(os.sep) # Getting the path of the trained databases directory
		trained_data_path=trained_data_path[0:rindex_ossep] # Getting the path of the trained databases directory
		src_path=input_str  # Getting source directory of new database 
		dest_path=trained_data_path+os.sep # Creating destination directory for taking back up of new database
		dest_path=dest_path+get_db_name(input_str) # Creating destination directory for taking back up of new database
		
		
		print "Data base not identifed"
		print "Adding database to our trained database sets"
		shutil.copytree(src_path,dest_path) # creating a copy of the entire database, dynamically updating new database to trained set
		add_database.add_db(dest_path) # Adding the new database (which is presently copied to dest_path) to the previously trained list. 
		print "Database added"
		print dest_path
		index_best_algo_chosen=compare_with_all.choose_best_algo(dest_path,trained_datasets)
		best_algo_chosen=algorithms[index_best_algo_chosen]
		
		print "Algorithm chosen is " +best_algo_chosen +" bacause it has the more efficiency then other algorithms on this database"
	
		new_list_to_append=[]
		new_list_to_append.append(dest_path)
		new_list_to_append.append(best_algo_chosen)

		trained_datasets.append(new_list_to_append)
	
		fp_to_update_trained_db=open("mapping_dataset_algo","w+") # Opening trained_databases in "r" mode to read the list of trained db's
		fp_to_update_trained_db.seek(0) # Not necessary,  but still on safer hand its given so fp_for_trained_db points to beginning of the file
		pickle.dump(trained_datasets,fp_to_update_trained_db)	# updating the mapping_dataset file
		fp_to_update_trained_db.close()
		if(best_algo_chosen=="PCA"):
			PCA_main.main_pca(dest_path)			
		if(best_algo_chosen=="DCT"):
			print "DCT is to be called"
		if(best_algo_chosen=="LPP"):
			print "LPP is to be called"
Esempio n. 5
0
def pre_process(pathtoimages):
	
	################################################# Declarations of all lists which are to be initialised ####################################
	
	images_abs_names=[] # variable contains all the paths to file
	total_img_vect=[]   # variable to contain total images in vector form
	mean_img_vect=[]   # variable  for containing mean of all images
	sum_img_vect=[]   # variable  for containing sum of all images
	mean_for_subtraction=[] # variable contains clones of mean used for subtracting
	norm_list = [] # variable to hold all the norm values during testing phase
	split_image_names=[]  # variable to hold split image files to group into classes
	class_names=[] #Variable to hold  the class names
	each_class=[] # variable to hold names of each class
	entire_class=[] # variable to hold entire class
	test_data_set=[] #variable for storing test images
	train_data_set=[] #variable for storing train images
	entire_train_data_as_list=[] #variable for storing train images as list
	images_name_modified=[] # variable to hold modified images names

	######################################################## End of Declarations  #############################################################

	######### get_files method in the module get_abs_names is called to get the absolute path names of all the images in input directory #######

	src_img_dir=pathtoimages  # Taking the backup of the directory path
	images=lslR.get_files(src_img_dir) #returns all the absolute image names as a list   

	################# Uncomment following 2-lines to print all the absolut path names in the order given by get_abs_names
	
	#print "printing absolute path file names as given by get_abs_names"
	#print images


	images_abs_names=images # Taking backup of absolute path names of the images 
	images.sort() # Sorting the image files so that images of each class are grouped together

	################# Uncomment following 2-lines to print all the absolut path names in the order given by get_abs_names

	#print "printing absolute path file names after sorting"
	#print images
	
	#_________________________________________________________________________________________________________________________________________#	
	 # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$   For initialising mean_image_vect and sum_image_vect, $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$#
	 # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$     its required to know the dimension of each image,  $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$#
	 # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$     so one test image is read and then all the      $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ #
         # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$      required values are found out.           $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ #
	#_________________________________________________________________________________________________________________________________________#

	shape_image=Image.open(images[0]) # Any imge can be open, here we are opening 1st image
	shape_image_array=numpy.asarray(shape_image) # Dimension of the image is to be known so converting to numpy array
	shape=shape_image_array.shape # Getting the dimensions of the image convertedd array 

	################################### Uncomment following two-lines to know the dimension of the image 

	#print "printing shape or the dimension of the image"
	#print shape 
	
	total_dimensions_per_image=shape[0]*shape[1] # Multiplying rows * columns of array to know total dimensions
	
	################################### Uncomment following two-lines to know the total_dimensions of the image 

	#print "printing total dimensions of the image"
	#print total_dimensions_per_image 
	

	#######################################initialising all the required values such as mean_img_vect, sum_img_vect

	for i in range(total_dimensions_per_image):
		mean_img_vect.append(0) # initialising with zeros 
		sum_img_vect.append(0)  # initialising with zeros 

	######## Calling the function which returns the split file names 
	###### The function return_split_file names take one argument and returns two arguments
	#### Input Arguments : images : contains the sorted list of absolute path names of all the images in the input directory
	## Return values : return 1: split_image_names : Has the list of split image names
	# Return Values : return 2: no_of_images : This contains the total number of input images

	split_image_names,no_of_images=return_split_file_names(images) 

	####################################### Uncomment following 2 lines to see the split file names 

	#print "printing split file names"
	#print split_image_names

	#######################This part is to find the length of the path of each image to extract class name 
	
	single_image_to_find_length=split_image_names[0]
	length_split_image_name=len(single_image_to_find_length)

	################################ Code to extract the class names of the database

	for i in range(no_of_images):
		temp_class_name=split_image_names[i][length_split_image_name-2]  #extracting class names
		class_names.append(temp_class_name)  # creating a list of class names 

	###################### Uncomment following 2 lines to know all the different classes with repetitions
	
	#print "printing all class names"
	#print class_names

	set_of_class_names=set(class_names)  #removing the repetitions using set so it contains only unique classes
	
	##################### Converting back to list coz set doesnt support indexing #######

	unique_class_names=list(set_of_class_names)
	unique_class_names.sort()

	###################### Uncomment following 2 lines to know all different classes without repetitions

	#print "printing unique set of class names"
	#print set_of_class_names

	no_of_classes=len(set_of_class_names)  #getting the count of no of classes

	# If the directory structure is different (flat), some change to be done to the path names of the files. 
	# Every database has more then one class, this is obvious, coz if there are more then one class 
	# only then face recogniton on that database makes some sense

	#Checking if the classes are partitioned properly.
	
	flag_for_testing=0 # flag required to be set to 1 if in case if the directory structure is flat
	count_of_dots_original_path=0
	if(no_of_classes<=1): # if number of classes is 1 it means that partition has not happend 
		flag_for_testing=1  # setting  the flag indicating flat architecture
		temp_str_for_checking_if_underscore_is_present=images[0] 
		temp_index_if_present=temp_str_for_checking_if_underscore_is_present.find('_') # to check if '_' is present find returns the position of the '_' in the string or else return -1 if not present

		if(temp_index_if_present>=0): # if present 
			flag_for_changing_file_name=1 # set this flag to 1 which means that seperator is '_'
		else: # if not present 
			flag_for_changing_file_name=0 # set this flag to 0 which means that seperator is '.' or any other symbol

		# Modifying the images names so that it that seperator remains os.sep through out
                count_of_dots_original_path=pathtoimages.count('.')
		for i in range(no_of_images):
			if(flag_for_changing_file_name==0):
				temp_image_name_modified=images[i].replace('.',os.sep,count_of_dots_original_path+1)
			else:
				temp_image_name_modified=images[i].replace('_',os.sep)
			images_name_modified.append(temp_image_name_modified)

		#Uncomment following 2-lines to see the modified file names
	
		#print "printing modified images names"
		#print images_name_modified


		########## To obtain the split image names
		######## Calling the function which returns the split file names 
		###### The function return_split_file names take one argument and returns two arguments
		#### Input Arguments : images : contains the sorted list of absolute path names of all the images in the input directory
		## Return values  : return 1: split_image_names : Has the list of split image names
		#  Return values  : return 2: no_of_images : This contains the total number of input images


		split_image_names,no_of_images=return_split_file_names(images_name_modified)

		########### Uncomment following 2 lines to know the split file names

		#print " printing list of split file names : "	
		#print split_image_names


		#This part is to find the length of the path of each image to extract class name
	
		single_image_to_find_length=split_image_names[0]
		length_split_image_name=len(single_image_to_find_length)

		#Code to extract the class names of the database

		class_names=[]  # making class_names to empty string which other wise contains some junk values

		for i in range(no_of_images):
			temp_class_name=split_image_names[i][length_split_image_name-2]  #extracting class names '-2' because class names lies in last second position of list
			class_names.append(temp_class_name)  # creating a list of class names 

		########## Uncomment following two lines to know all the different classes with repetitions
	
		#print "printing all class names"
		#print class_names

		set_of_class_names=set(class_names)  #removing the repetitions using set so it contains only unique classes

		# Uncomment to know all the different classes without repetitions

		#print "printing unique set of class names"
		#print set_of_class_names

		no_of_classes=len(set_of_class_names)  #getting the count of no of classes
		
		####Converting back to list so coz set doesnt support indexing

		unique_class_names=list(set_of_class_names)
		unique_class_names.sort()

	
	#################arranging the input directory of images into the order of class

	img_counter=0
        num_of_images_each_class=[]
	for  i in range(no_of_classes):
		each_class=[]
		num_of_images_each_class.append(class_names.count(unique_class_names[i]))
		for j in range(class_names.count(unique_class_names[i])):
			#print "img=%d" %(img_counter)
			#print images[img_counter]
			each_class.append(images[img_counter])
			img_counter=img_counter+1
		entire_class.append(each_class)  #contains all the images arranged according to the class
	
	entire_class_backup=entire_class

	#print "imagecounter=%d" %(img_counter)
	
	#print "number of images =%d" %(len(images))
	#print entire_class
	#print "printing number of images per class"
	#print num_of_images_each_class
	#print "total number of images = %d" %(sum(num_of_images_each_class))

	#code to  create trainset and testset 
	#one random image selected in one class will be added in testset and all other remaining (no_of_images_per_class) will be added to trainset

	for i in range(no_of_classes):
		image_no_for_test=random.random()*num_of_images_each_class[i]	
		image_no_for_test=int(image_no_for_test)
		test_data_set.append(entire_class[i][image_no_for_test])
		temp_train=entire_class[i]
		temp_train.remove(entire_class[i][image_no_for_test])
		train_data_set.append(temp_train)

	# Uncomment following lines in order to know the details of the train_data_set
	
	#print "Printing type of train dataset 	
	#print type(train_data_set)
	#print "printing training data set"
	#print train_data_set

	# Uncomment following lines in order to know the details of the test_data_set
	#print "Printing type of test dataset 	
	#print type(test_data_set)
	#print "printing test data set"
	#print test_data_set
	
			
	################### we need the entire training data set as a single list

	entire_train_data_as_list=[]
	for r in  range(no_of_classes):
		entire_train_data_as_list.extend(train_data_set[r])
	print "Total number of trained images = %d " %(len(entire_train_data_as_list))
	
	return (train_data_set,entire_train_data_as_list,no_of_classes,test_data_set,count_of_dots_original_path,flag_for_testing)
Esempio n. 6
0
import sys
import Image
import scipy.linalg
import numpy.matlib
from numpy.matlib import zeros

images_abs_names=[] # variable contains all the paths to file
total_img_vect=[]   # variable to contain total images in vector form
mean_img_vect=[]   # variable  for containing mean of all images
sum_img_vect=[]   # variable  for containing sum of all images
mean_for_subtraction=[] # variable contains clones of mean used for subtracting

""" get_files method in the module get_abs_names is called """

src_img_dir=sys.argv[1]
images=lslR.get_files(src_img_dir)    # This should be generic in such a way to be given at run time using sys.argv, To be changed very soon

""" 
we might have to initialise mean_image_vect and sum_image_vect,
so we might required to know the dimension of each image,
so one test image is read and then all the required values are found out

"""
shape_image=Image.open(images[0])
shape_image_array=numpy.asarray(shape_image)
shape=shape_image_array.shape
total_dimensions_per_image=shape[0]*shape[1]

""" 
initialising all the required values such as 
mean_img_vect
Esempio n. 7
0
def train(pathtoimages):
	images_abs_names=[] # variable contains all the paths to file
	total_img_vect=[]   # variable to contain total images in vector form
	mean_img_vect=[]   # variable  for containing mean of all images
	sum_img_vect=[]   # variable  for containing sum of all images
	mean_for_subtraction=[] # variable contains clones of mean used for subtracting
	norm_list = [] # variable to hold all the norm values during testing phase
	split_image_names=[]  # variable to hold split image files to group into classes
	class_names=[] #Variable to hold  the class names
	each_class=[] # variable to hold names of each class
	entire_class=[] # variable to hold entire class
	test_data_set=[] #variable for storing test images
	train_data_set=[] #variable for storing train images
	entire_train_data_as_list=[] #variable for storing train images as list

# get_files method in the module get_abs_names is called 
	src_img_dir=pathtoimages
	images=lslR.get_files(src_img_dir)    
	images_abs_names=images

#we might have to initialise mean_image_vect and sum_image_vect,
#so we might required to know the dimension of each image,
#so one test image is read and then all the required values are found out

	shape_image=Image.open(images[0])
	shape_image_array=numpy.asarray(shape_image)
	shape=shape_image_array.shape
	total_dimensions_per_image=shape[0]*shape[1]


#initialising all the required values such as mean_img_vect, sum_img_vect

	for i in range(total_dimensions_per_image):
		mean_img_vect.append(0)
		sum_img_vect.append(0)

#the code which actually partitions the entire database of images into trainset and testset goes here
	no_of_images=len(images)
	for i in range(no_of_images):
		temp_image=images[i].split(os.sep)
		split_image_names.append(temp_image)

#this part is to find the length of  the path of each image to extract class name
	
	single_image_to_find_length=split_image_names[0]
	length_split_image_name=len(single_image_to_find_length)

#Code to extract the class names of the database

	for i in range(no_of_images):
		temp_class_name=split_image_names[i][length_split_image_name-2]  #extracting class names
		class_names.append(temp_class_name)  # creating a list of class names 

# Uncomment to know all the different classes with repetitions
	
	#print "printing all class names"
	#print class_names

	set_of_class_names=set(class_names)  #removing the repetitions using set so it contains only unique classes

# Uncomment to know all the different classes without repetitions

	#print "printing unique set of class names"
	#print set_of_class_names

	no_of_classes=len(set_of_class_names)  #getting the count of no of classes
	no_of_images_per_class=no_of_images/no_of_classes  #getting the count of no of images per class

# Uncomment to print the know thw number of images per class

	#print "number of images per class = %d " %(no_of_images_per_class)
	
#arranging the input directory of images into the order of class

	for  i in range(no_of_classes):
		each_class=[]
		for j in range(no_of_images_per_class):
			img_counter=i*no_of_images_per_class+j
			each_class.append(images[img_counter])
		entire_class.append(each_class)  #contains all the images arranged according to the class
	
	entire_class_backup=entire_class

#code to  create trainset and testset 
#one random image selected in one class will be added in testset and all other remaining (no_of_images_per_class) will be added to trainset

	for i in range(no_of_classes):
		image_no_for_test=random.random()*no_of_images_per_class	
		image_no_for_test=int(image_no_for_test)
		test_data_set.append(entire_class[i][image_no_for_test])
		temp_train=entire_class[i]
		temp_train.remove(entire_class[i][image_no_for_test])
		train_data_set.append(temp_train)

# Uncomment following lines in order to know the details of the train_data_set
	
	#print type(train_data_set)
	#print "printing training data set"
	#print train_data_set
			
	test_data_set_matrix=numpy.matrix(test_data_set)
	train_data_set_matrix=numpy.matrix(train_data_set)
	
	#print test_data_set_matrix.shape
	#print train_data_set_matrix.shape
	
	#print "printing one individual image in training data set"
	#print train_data_set

# we need the entire training data set as a single list
	for r in  range(no_of_classes):
		c=0
		for c in range(no_of_images_per_class-1):
			entire_train_data_as_list.append(train_data_set[r][c])

# Calling traindb in train_database which actually does the training part and it returns some which actually is needed during the testing phase
# It returns 3 values
# (1) mean_img : contains the mean of all the images, its a 1-d array/list
# (2) eigen_selected : Usually only the major values of the eigen vector are taken, this contain those major eigen values only
# (3) signature_images_for_train_set : contains the signatures (mapped images / eigen images ) for the entire training dataset
	
	mean_img,eigen_selected,signature_images_for_train_set=train_database.traindb(train_data_set)

# to find number of images trained per class

	no_images_trained_per_class=no_of_images_per_class-1

#print signature_images_for_train_set

# Uncomment the following lines when any lengths or the types of the following variables are to be checked
 
#	print "signature type"
#	print type(signature_images_for_train_set)
#	print "signature length"
#	print len(signature_images_for_train_set)

# Calling the testdb in test_database.py which takes in quite a number of arguments, lets explore the arguments

# arg_1 : signature_images_for_train_set :  contains the signatures (mapped images / eigen images ) for the entire training dataset ( which is return by train_database )
# arg_2 : test_data_set : contains the list of test data images which is randomly selected, one from each class
# arg_3 : entire_train_data_as_list : contains entire train data set ( removed test_data_set from original input ) 
# arg_4 : mean_img : contains the mean of all the images, its a 1-d array/list ( which is return by train_database )
# arg_5 : eigen_selected : Usually only the major values of the eigen vector are taken, this contain those major eigen values only ( which is return by train_database )
# arg_6 : no_images_trained_per_class : contains  number of images actually trained per class from the original dataset 

	

	test_database.testdb(signature_images_for_train_set,test_data_set,entire_train_data_as_list,mean_img,eigen_selected,no_images_trained_per_class)
import Image
import scipy.linalg
import numpy.matlib
from numpy.matlib import zeros

images_abs_names = []  # variable contains all the paths to file
total_img_vect = []  # variable to contain total images in vector form
mean_img_vect = []  # variable  for containing mean of all images
sum_img_vect = []  # variable  for containing sum of all images
mean_for_subtraction = []  # variable contains clones of mean used for subtracting
norm_list = []  # variable to hold all the norm values during testing phase

""" get_files method in the module get_abs_names is called """

src_img_dir = sys.argv[1]
images = lslR.get_files(src_img_dir)

images_abs_names = images

""" 
we might have to initialise mean_image_vect and sum_image_vect,
so we might required to know the dimension of each image,
so one test image is read and then all the required values are found out

"""
shape_image = Image.open(images[0])
shape_image_array = numpy.asarray(shape_image)
shape = shape_image_array.shape
total_dimensions_per_image = shape[0] * shape[1]

print "printing shape of the image"