コード例 #1
ファイル: dct_main(test).py プロジェクト: d-rama/Code_PyRec
def dct_main(database):
	for i in images_path:
	print "Printing eigen vectors of all the images"
	print eigen_vect
コード例 #2
def return_pp(img_dir):
	for i in range(len(names)):
	#print image_vector
	#print len(image_vector)

	for i in range(len(image_vector)-1):

	# Uncomment to know the sum image
	#print "printing sum image"
	#print sum_img
	## Uncoment to now the histogram 	
	#print "Printing histogram"
	#print hist_img

	# Uncomment to know the details  of percentage of pixels present per bucket range

	#print "Printing percentage of pixels"
	#print percent_of_pixels
	#print len(percent_of_pixels)
	#print sum(percent_of_pixels)

	return percent_of_pixels
コード例 #3
def pre_process(pathtoimages):
	################################################# Declarations of all lists which are to be initialised ####################################
	images_abs_names=[] # variable contains all the paths to file
	total_img_vect=[]   # variable to contain total images in vector form
	mean_img_vect=[]   # variable  for containing mean of all images
	sum_img_vect=[]   # variable  for containing sum of all images
	mean_for_subtraction=[] # variable contains clones of mean used for subtracting
	norm_list = [] # variable to hold all the norm values during testing phase
	split_image_names=[]  # variable to hold split image files to group into classes
	class_names=[] #Variable to hold  the class names
	each_class=[] # variable to hold names of each class
	entire_class=[] # variable to hold entire class
	test_data_set=[] #variable for storing test images
	train_data_set=[] #variable for storing train images
	entire_train_data_as_list=[] #variable for storing train images as list
	images_name_modified=[] # variable to hold modified images names

	######################################################## End of Declarations  #############################################################

	######### get_files method in the module get_abs_names is called to get the absolute path names of all the images in input directory #######

	src_img_dir=pathtoimages  # Taking the backup of the directory path
	images=lslR.get_files(src_img_dir) #returns all the absolute image names as a list   

	################# Uncomment following 2-lines to print all the absolut path names in the order given by get_abs_names
	#print "printing absolute path file names as given by get_abs_names"
	#print images

	images_abs_names=images # Taking backup of absolute path names of the images 
	images.sort() # Sorting the image files so that images of each class are grouped together

	################# Uncomment following 2-lines to print all the absolut path names in the order given by get_abs_names

	#print "printing absolute path file names after sorting"
	#print images
	 # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$   For initialising mean_image_vect and sum_image_vect, $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$#
	 # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$     its required to know the dimension of each image,  $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$#
	 # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$     so one test image is read and then all the      $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ #
         # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$      required values are found out.           $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ #

	shape_image=Image.open(images[0]) # Any imge can be open, here we are opening 1st image
	shape_image_array=numpy.asarray(shape_image) # Dimension of the image is to be known so converting to numpy array
	shape=shape_image_array.shape # Getting the dimensions of the image convertedd array 

	################################### Uncomment following two-lines to know the dimension of the image 

	#print "printing shape or the dimension of the image"
	#print shape 
	total_dimensions_per_image=shape[0]*shape[1] # Multiplying rows * columns of array to know total dimensions
	################################### Uncomment following two-lines to know the total_dimensions of the image 

	#print "printing total dimensions of the image"
	#print total_dimensions_per_image 

	#######################################initialising all the required values such as mean_img_vect, sum_img_vect

	for i in range(total_dimensions_per_image):
		mean_img_vect.append(0) # initialising with zeros 
		sum_img_vect.append(0)  # initialising with zeros 

	######## Calling the function which returns the split file names 
	###### The function return_split_file names take one argument and returns two arguments
	#### Input Arguments : images : contains the sorted list of absolute path names of all the images in the input directory
	## Return values : return 1: split_image_names : Has the list of split image names
	# Return Values : return 2: no_of_images : This contains the total number of input images


	####################################### Uncomment following 2 lines to see the split file names 

	#print "printing split file names"
	#print split_image_names

	#######################This part is to find the length of the path of each image to extract class name 

	################################ Code to extract the class names of the database

	for i in range(no_of_images):
		temp_class_name=split_image_names[i][length_split_image_name-2]  #extracting class names
		class_names.append(temp_class_name)  # creating a list of class names 

	###################### Uncomment following 2 lines to know all the different classes with repetitions
	#print "printing all class names"
	#print class_names

	set_of_class_names=set(class_names)  #removing the repetitions using set so it contains only unique classes

	###################### Uncomment following 2 lines to know all different classes without repetitions

	#print "printing unique set of class names"
	#print set_of_class_names

	no_of_classes=len(set_of_class_names)  #getting the count of no of classes

	# If the directory structure is different (flat), some change to be done to the path names of the files. 
	# Every database has more then one class, this is obvious, coz if there are more then one class 
	# only then face recogniton on that database makes some sense

	#Checking if the classes are partitioned properly.
	flag_for_testing=0 # flag required to be set to 1 if in case if the directory structure is flat
	if(no_of_classes<=1): # if number of classes is 1 it means that partition has not happend 
		flag_for_testing=1  # setting  the flag indicating flat architecture
		temp_index_if_present=temp_str_for_checking_if_underscore_is_present.find('_') # to check if '_' is present find returns the position of the '_' in the string or else return -1 if not present

		if(temp_index_if_present>=0): # if present 
			flag_for_changing_file_name=1 # set this flag to 1 which means that seperator is '_'
		else: # if not present 
			flag_for_changing_file_name=0 # set this flag to 0 which means that seperator is '.' or any other symbol

		# Modifying the images names so that it that seperator remains os.sep through out

		for i in range(no_of_images):

		#Uncomment following 2-lines to see the modified file names
		#print "printing modified images names"
		#print images_name_modified

		########## To obtain the split image names
		######## Calling the function which returns the split file names 
		###### The function return_split_file names take one argument and returns two arguments
		#### Input Arguments : images : contains the sorted list of absolute path names of all the images in the input directory
		## Return values  : return 1: split_image_names : Has the list of split image names
		#  Return values  : return 2: no_of_images : This contains the total number of input images


		########### Uncomment following 2 lines to know the split file names

		#print " printing list of split file names : "	
		#print split_image_names

		#This part is to find the length of the path of each image to extract class name

		#Code to extract the class names of the database

		class_names=[]  # making class_names to empty string which other wise contains some junk values

		for i in range(no_of_images):
			temp_class_name=split_image_names[i][length_split_image_name-2]  #extracting class names '-2' because class names lies in last second position of list
			class_names.append(temp_class_name)  # creating a list of class names 

		########## Uncomment following two lines to know all the different classes with repetitions
		#print "printing all class names"
		#print class_names

		set_of_class_names=set(class_names)  #removing the repetitions using set so it contains only unique classes

		# Uncomment to know all the different classes without repetitions

		#print "printing unique set of class names"
		#print set_of_class_names

		no_of_classes=len(set_of_class_names)  #getting the count of no of classes

	no_of_images_per_class=no_of_images/no_of_classes  #getting the count of no of images per class

	# Uncomment to print the know the number of images per class

	#print "number of images per class = %d " %(no_of_images_per_class)
	#################arranging the input directory of images into the order of class

	for  i in range(no_of_classes):
		for j in range(no_of_images_per_class):
		entire_class.append(each_class)  #contains all the images arranged according to the class

	#code to  create trainset and testset 
	#one random image selected in one class will be added in testset and all other remaining (no_of_images_per_class) will be added to trainset

	for i in range(no_of_classes):

	# Uncomment following lines in order to know the details of the train_data_set
	#print "Printing type of train dataset 	
	#print type(train_data_set)
	#print "printing training data set"
	#print train_data_set

	# Uncomment following lines in order to know the details of the test_data_set
	#print "Printing type of test dataset 	
	#print type(test_data_set)
	#print "printing test data set"
	#print test_data_set
	test_data_set_matrix=numpy.matrix(test_data_set) # converting to matrix to perform multiplication 
	train_data_set_matrix=numpy.matrix(train_data_set) # converting to matrix to perform multiplication 
	#############Uncomment following lines to know the shapes and details of train_data_set and test_data_set

	#print "Printing test data shape 	
	#print test_data_set_matrix.shape
	#print "Printing train data shape 
	#print train_data_set_matrix.shape
	################### we need the entire training data set as a single list

	for r in  range(no_of_classes):
		for c in range(no_of_images_per_class-1):

	######### Calling traindb in train_database which actually does the training part and it returns some values which actually is needed during the testing phase.
	####### Input Argument : train_data_set ( set of tranining images )
	##### It returns 3 values
	#### (1) mean_img : contains the mean of all the images, its a 1-d array/list
	### (2) eigen_selected : Usually only the major values of the eigen vector are taken, this contain those major eigen values only
	## (3) signature_images_for_train_set : contains the signatures (mapped images / eigen images ) for the entire training dataset

	#### to find number of images trained per class
	## Thas obviously no_of_images_per_class - 1 because one image will be taken for testing part


	#Uncomment following to print signature of the trained images

	#print "Printing the signature/co-relation matrix of the trained image 
	#print signature_images_for_train_set

	####### Uncomment the following lines when any lengths or the types of the signature variable are to be checked
	#print "signature type"
	#print type(signature_images_for_train_set)
	#print "signature length"
	#print len(signature_images_for_train_set)

# Calling the testdb in test_database.py which takes in quite a number of arguments, lets explore the arguments

# arg_1 : signature_images_for_train_set :  contains the signatures (mapped images / eigen images ) for the entire training dataset ( which is return by train_database )
# arg_2 : test_data_set : contains the list of test data images which is randomly selected, one from each class
# arg_3 : entire_train_data_as_list : contains entire train data set ( removed test_data_set from original input ) 
# arg_4 : mean_img : contains the mean of all the images, its a 1-d array/list ( which is return by train_database )
# arg_5 : eigen_selected : Usually only the major values of the eigen vector are taken, this contain those major eigen values only ( which is return by train_database )
# arg_6 : no_images_trained_per_class : contains  number of images actually trained per class from the original dataset 
# arg_7 : flag_for_tesing : flag which actually if the given directory structure is flat or hierarchy; It sets the flag if the structure is flat

	return r
コード例 #4
ファイル: wrapper.py プロジェクト: d-rama/Code_PyRec
def decide_algo(input_str):
	#print input_str
	wrapper_test_image_names = lslR.get_files(input_str)
	#Uncomment following 2-lines to print all the image names

	#print "image names"
	#print wrapper_test_image_names

	wrapper_test_image_names.sort() # Sorting the image names
	no_of_images=len(wrapper_test_image_names) # Getting the length of number of images
	randomly_selected_image=random.random()*no_of_images # getting the random value in between 0 and no_of_images
	randomly_selected_image=int(round(randomly_selected_image)) # converting to integer coz index should be a integer
	#Uncomment following 2 - lines to print the index randomly selected image

	#print "Index of randomly selected image = %d" %(randomly_selected_image)

	#print "number of images"
	#print len(wrapper_test_image_names)

	# test_image_stat=ImageStat.Stat(wrapper_test_image) # To convert to Imagestat object whcih gives the stat properties of the image

######## We will be doing the decision based on the 16-metrices. 
###### Only when the test image (image set) satisfies all the 16 metrices it means that the given test database is actually one of the trained dataset
#### If test image fails to meet all the 16-metric criteria, then the wrapper depending on how close the new data set is decides which algorithm to be chosen
## Over time it actually appends the values and maintains the updated metrices of the trained dataset of the new data set

####### We actually test only one image of the entire given test data set ( using the old taught which says one rice grain is often enough to say whether the rice is boiled #### or not, similar we regressively test only one image of the dataset ( given by the user ) ) and choose the algorithm.

####### Image.Stat properties

	flag=0 # Initializing the flag, flag=0 means database not identified, assuming db not identified in the beginning, will be set once db is identified
	metric = getmetrics.return_metrics(wrapper_test_image_names[randomly_selected_image]) # Calling the return_metrics of getmetrics which returns 16 metrics as list

#			break   # once if database is identified then we can come out of loop  """

##### We need to get the names of the databases which are previously trained, Rewriting this part with more efficient way
	fp_for_trained_db=open("mapping_dataset_algo","r") # Opening trained_databases in "r" mode to read the list of trained db's
	fp_for_trained_db.seek(0) # Not necessary,  but still on safer hand its given so fp_for_trained_db points to beginning of the file
	trained_datasets=pickle.load(fp_for_trained_db)	# loading the trained data lists from pickle to trained_datasets
	for i in range(len(trained_datasets)): # Have to be checked on all the previously trained datasets 	  
		face_names=lslR.get_files(trained_datasets[i][0]) # Getting the absolute path names of the database
		face_names.sort() # Sorting the absolute path names
		if (no_of_images_in_train<randomly_selected_image):
		trained_metric=getmetrics.return_metrics(face_names[randomly_selected_image]) # Calling the return_metrics of getmetrics which returns 16 metrics as list
		if(metric.__eq__(trained_metric)): # Comparing if all the 16 metrics of the image is matching

			print "Data base identified"
			print "Identified database is"
			print trained_datasets[i][0] # printing the identified database name
			flag=1 # Setting the flag if database is identified 
				print "DCT is to be called"
				print "LPP is to be called"

			break   # once if database is identified then we can come out of loop 

	if(flag==0): # means database not identified
		##### Need to extract the trained dataset path 
		trained_data_path=trained_datasets[0][0] # Taking any database path to extract trained database path
		rindex_ossep=trained_data_path.rindex(os.sep) # Getting the path of the trained databases directory
		trained_data_path=trained_data_path[0:rindex_ossep] # Getting the path of the trained databases directory
		src_path=input_str  # Getting source directory of new database 
		dest_path=trained_data_path+os.sep # Creating destination directory for taking back up of new database
		dest_path=dest_path+get_db_name(input_str) # Creating destination directory for taking back up of new database
		print "Data base not identifed"
		print "Adding database to our trained database sets"
		shutil.copytree(src_path,dest_path) # creating a copy of the entire database, dynamically updating new database to trained set
		add_database.add_db(dest_path) # Adding the new database (which is presently copied to dest_path) to the previously trained list. 
		print "Database added"
		print dest_path
		print "Algorithm chosen is " +best_algo_chosen +" bacause it has the more efficiency then other algorithms on this database"

		fp_to_update_trained_db=open("mapping_dataset_algo","w+") # Opening trained_databases in "r" mode to read the list of trained db's
		fp_to_update_trained_db.seek(0) # Not necessary,  but still on safer hand its given so fp_for_trained_db points to beginning of the file
		pickle.dump(trained_datasets,fp_to_update_trained_db)	# updating the mapping_dataset file
			print "DCT is to be called"
			print "LPP is to be called"
コード例 #5
ファイル: frame_work_v2.py プロジェクト: d-rama/Code_PyRec
def pre_process(pathtoimages):
	################################################# Declarations of all lists which are to be initialised ####################################
	images_abs_names=[] # variable contains all the paths to file
	total_img_vect=[]   # variable to contain total images in vector form
	mean_img_vect=[]   # variable  for containing mean of all images
	sum_img_vect=[]   # variable  for containing sum of all images
	mean_for_subtraction=[] # variable contains clones of mean used for subtracting
	norm_list = [] # variable to hold all the norm values during testing phase
	split_image_names=[]  # variable to hold split image files to group into classes
	class_names=[] #Variable to hold  the class names
	each_class=[] # variable to hold names of each class
	entire_class=[] # variable to hold entire class
	test_data_set=[] #variable for storing test images
	train_data_set=[] #variable for storing train images
	entire_train_data_as_list=[] #variable for storing train images as list
	images_name_modified=[] # variable to hold modified images names

	######################################################## End of Declarations  #############################################################

	######### get_files method in the module get_abs_names is called to get the absolute path names of all the images in input directory #######

	src_img_dir=pathtoimages  # Taking the backup of the directory path
	images=lslR.get_files(src_img_dir) #returns all the absolute image names as a list   

	################# Uncomment following 2-lines to print all the absolut path names in the order given by get_abs_names
	#print "printing absolute path file names as given by get_abs_names"
	#print images

	images_abs_names=images # Taking backup of absolute path names of the images 
	images.sort() # Sorting the image files so that images of each class are grouped together

	################# Uncomment following 2-lines to print all the absolut path names in the order given by get_abs_names

	#print "printing absolute path file names after sorting"
	#print images
	 # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$   For initialising mean_image_vect and sum_image_vect, $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$#
	 # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$     its required to know the dimension of each image,  $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$#
	 # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$     so one test image is read and then all the      $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ #
         # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$      required values are found out.           $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ #

	shape_image=Image.open(images[0]) # Any imge can be open, here we are opening 1st image
	shape_image_array=numpy.asarray(shape_image) # Dimension of the image is to be known so converting to numpy array
	shape=shape_image_array.shape # Getting the dimensions of the image convertedd array 

	################################### Uncomment following two-lines to know the dimension of the image 

	#print "printing shape or the dimension of the image"
	#print shape 
	total_dimensions_per_image=shape[0]*shape[1] # Multiplying rows * columns of array to know total dimensions
	################################### Uncomment following two-lines to know the total_dimensions of the image 

	#print "printing total dimensions of the image"
	#print total_dimensions_per_image 

	#######################################initialising all the required values such as mean_img_vect, sum_img_vect

	for i in range(total_dimensions_per_image):
		mean_img_vect.append(0) # initialising with zeros 
		sum_img_vect.append(0)  # initialising with zeros 

	######## Calling the function which returns the split file names 
	###### The function return_split_file names take one argument and returns two arguments
	#### Input Arguments : images : contains the sorted list of absolute path names of all the images in the input directory
	## Return values : return 1: split_image_names : Has the list of split image names
	# Return Values : return 2: no_of_images : This contains the total number of input images


	####################################### Uncomment following 2 lines to see the split file names 

	#print "printing split file names"
	#print split_image_names

	#######################This part is to find the length of the path of each image to extract class name 

	################################ Code to extract the class names of the database

	for i in range(no_of_images):
		temp_class_name=split_image_names[i][length_split_image_name-2]  #extracting class names
		class_names.append(temp_class_name)  # creating a list of class names 

	###################### Uncomment following 2 lines to know all the different classes with repetitions
	#print "printing all class names"
	#print class_names

	set_of_class_names=set(class_names)  #removing the repetitions using set so it contains only unique classes
	##################### Converting back to list coz set doesnt support indexing #######


	###################### Uncomment following 2 lines to know all different classes without repetitions

	#print "printing unique set of class names"
	#print set_of_class_names

	no_of_classes=len(set_of_class_names)  #getting the count of no of classes

	# If the directory structure is different (flat), some change to be done to the path names of the files. 
	# Every database has more then one class, this is obvious, coz if there are more then one class 
	# only then face recogniton on that database makes some sense

	#Checking if the classes are partitioned properly.
	flag_for_testing=0 # flag required to be set to 1 if in case if the directory structure is flat
	if(no_of_classes<=1): # if number of classes is 1 it means that partition has not happend 
		flag_for_testing=1  # setting  the flag indicating flat architecture
		temp_index_if_present=temp_str_for_checking_if_underscore_is_present.find('_') # to check if '_' is present find returns the position of the '_' in the string or else return -1 if not present

		if(temp_index_if_present>=0): # if present 
			flag_for_changing_file_name=1 # set this flag to 1 which means that seperator is '_'
		else: # if not present 
			flag_for_changing_file_name=0 # set this flag to 0 which means that seperator is '.' or any other symbol

		# Modifying the images names so that it that seperator remains os.sep through out
		for i in range(no_of_images):

		#Uncomment following 2-lines to see the modified file names
		#print "printing modified images names"
		#print images_name_modified

		########## To obtain the split image names
		######## Calling the function which returns the split file names 
		###### The function return_split_file names take one argument and returns two arguments
		#### Input Arguments : images : contains the sorted list of absolute path names of all the images in the input directory
		## Return values  : return 1: split_image_names : Has the list of split image names
		#  Return values  : return 2: no_of_images : This contains the total number of input images


		########### Uncomment following 2 lines to know the split file names

		#print " printing list of split file names : "	
		#print split_image_names

		#This part is to find the length of the path of each image to extract class name

		#Code to extract the class names of the database

		class_names=[]  # making class_names to empty string which other wise contains some junk values

		for i in range(no_of_images):
			temp_class_name=split_image_names[i][length_split_image_name-2]  #extracting class names '-2' because class names lies in last second position of list
			class_names.append(temp_class_name)  # creating a list of class names 

		########## Uncomment following two lines to know all the different classes with repetitions
		#print "printing all class names"
		#print class_names

		set_of_class_names=set(class_names)  #removing the repetitions using set so it contains only unique classes

		# Uncomment to know all the different classes without repetitions

		#print "printing unique set of class names"
		#print set_of_class_names

		no_of_classes=len(set_of_class_names)  #getting the count of no of classes
		####Converting back to list so coz set doesnt support indexing


	#################arranging the input directory of images into the order of class

	for  i in range(no_of_classes):
		for j in range(class_names.count(unique_class_names[i])):
			#print "img=%d" %(img_counter)
			#print images[img_counter]
		entire_class.append(each_class)  #contains all the images arranged according to the class

	#print "imagecounter=%d" %(img_counter)
	#print "number of images =%d" %(len(images))
	#print entire_class
	#print "printing number of images per class"
	#print num_of_images_each_class
	#print "total number of images = %d" %(sum(num_of_images_each_class))

	#code to  create trainset and testset 
	#one random image selected in one class will be added in testset and all other remaining (no_of_images_per_class) will be added to trainset

	for i in range(no_of_classes):

	# Uncomment following lines in order to know the details of the train_data_set
	#print "Printing type of train dataset 	
	#print type(train_data_set)
	#print "printing training data set"
	#print train_data_set

	# Uncomment following lines in order to know the details of the test_data_set
	#print "Printing type of test dataset 	
	#print type(test_data_set)
	#print "printing test data set"
	#print test_data_set
	################### we need the entire training data set as a single list

	for r in  range(no_of_classes):
	print "Total number of trained images = %d " %(len(entire_train_data_as_list))
	return (train_data_set,entire_train_data_as_list,no_of_classes,test_data_set,count_of_dots_original_path,flag_for_testing)
コード例 #6
ファイル: train.py プロジェクト: d-rama/Code_PyRec
import sys
import Image
import scipy.linalg
import numpy.matlib
from numpy.matlib import zeros

images_abs_names=[] # variable contains all the paths to file
total_img_vect=[]   # variable to contain total images in vector form
mean_img_vect=[]   # variable  for containing mean of all images
sum_img_vect=[]   # variable  for containing sum of all images
mean_for_subtraction=[] # variable contains clones of mean used for subtracting

""" get_files method in the module get_abs_names is called """

images=lslR.get_files(src_img_dir)    # This should be generic in such a way to be given at run time using sys.argv, To be changed very soon

we might have to initialise mean_image_vect and sum_image_vect,
so we might required to know the dimension of each image,
so one test image is read and then all the required values are found out


initialising all the required values such as 
コード例 #7
ファイル: PCA_train.py プロジェクト: d-rama/Code_PyRec
def train(pathtoimages):
	images_abs_names=[] # variable contains all the paths to file
	total_img_vect=[]   # variable to contain total images in vector form
	mean_img_vect=[]   # variable  for containing mean of all images
	sum_img_vect=[]   # variable  for containing sum of all images
	mean_for_subtraction=[] # variable contains clones of mean used for subtracting
	norm_list = [] # variable to hold all the norm values during testing phase
	split_image_names=[]  # variable to hold split image files to group into classes
	class_names=[] #Variable to hold  the class names
	each_class=[] # variable to hold names of each class
	entire_class=[] # variable to hold entire class
	test_data_set=[] #variable for storing test images
	train_data_set=[] #variable for storing train images
	entire_train_data_as_list=[] #variable for storing train images as list

# get_files method in the module get_abs_names is called 

#we might have to initialise mean_image_vect and sum_image_vect,
#so we might required to know the dimension of each image,
#so one test image is read and then all the required values are found out


#initialising all the required values such as mean_img_vect, sum_img_vect

	for i in range(total_dimensions_per_image):

#the code which actually partitions the entire database of images into trainset and testset goes here
	for i in range(no_of_images):

#this part is to find the length of  the path of each image to extract class name

#Code to extract the class names of the database

	for i in range(no_of_images):
		temp_class_name=split_image_names[i][length_split_image_name-2]  #extracting class names
		class_names.append(temp_class_name)  # creating a list of class names 

# Uncomment to know all the different classes with repetitions
	#print "printing all class names"
	#print class_names

	set_of_class_names=set(class_names)  #removing the repetitions using set so it contains only unique classes

# Uncomment to know all the different classes without repetitions

	#print "printing unique set of class names"
	#print set_of_class_names

	no_of_classes=len(set_of_class_names)  #getting the count of no of classes
	no_of_images_per_class=no_of_images/no_of_classes  #getting the count of no of images per class

# Uncomment to print the know thw number of images per class

	#print "number of images per class = %d " %(no_of_images_per_class)
#arranging the input directory of images into the order of class

	for  i in range(no_of_classes):
		for j in range(no_of_images_per_class):
		entire_class.append(each_class)  #contains all the images arranged according to the class

#code to  create trainset and testset 
#one random image selected in one class will be added in testset and all other remaining (no_of_images_per_class) will be added to trainset

	for i in range(no_of_classes):

# Uncomment following lines in order to know the details of the train_data_set
	#print type(train_data_set)
	#print "printing training data set"
	#print train_data_set
	#print test_data_set_matrix.shape
	#print train_data_set_matrix.shape
	#print "printing one individual image in training data set"
	#print train_data_set

# we need the entire training data set as a single list
	for r in  range(no_of_classes):
		for c in range(no_of_images_per_class-1):

# Calling traindb in train_database which actually does the training part and it returns some which actually is needed during the testing phase
# It returns 3 values
# (1) mean_img : contains the mean of all the images, its a 1-d array/list
# (2) eigen_selected : Usually only the major values of the eigen vector are taken, this contain those major eigen values only
# (3) signature_images_for_train_set : contains the signatures (mapped images / eigen images ) for the entire training dataset

# to find number of images trained per class


#print signature_images_for_train_set

# Uncomment the following lines when any lengths or the types of the following variables are to be checked
#	print "signature type"
#	print type(signature_images_for_train_set)
#	print "signature length"
#	print len(signature_images_for_train_set)

# Calling the testdb in test_database.py which takes in quite a number of arguments, lets explore the arguments

# arg_1 : signature_images_for_train_set :  contains the signatures (mapped images / eigen images ) for the entire training dataset ( which is return by train_database )
# arg_2 : test_data_set : contains the list of test data images which is randomly selected, one from each class
# arg_3 : entire_train_data_as_list : contains entire train data set ( removed test_data_set from original input ) 
# arg_4 : mean_img : contains the mean of all the images, its a 1-d array/list ( which is return by train_database )
# arg_5 : eigen_selected : Usually only the major values of the eigen vector are taken, this contain those major eigen values only ( which is return by train_database )
# arg_6 : no_images_trained_per_class : contains  number of images actually trained per class from the original dataset 


コード例 #8
import Image
import scipy.linalg
import numpy.matlib
from numpy.matlib import zeros

images_abs_names = []  # variable contains all the paths to file
total_img_vect = []  # variable to contain total images in vector form
mean_img_vect = []  # variable  for containing mean of all images
sum_img_vect = []  # variable  for containing sum of all images
mean_for_subtraction = []  # variable contains clones of mean used for subtracting
norm_list = []  # variable to hold all the norm values during testing phase

""" get_files method in the module get_abs_names is called """

src_img_dir = sys.argv[1]
images = lslR.get_files(src_img_dir)

images_abs_names = images

we might have to initialise mean_image_vect and sum_image_vect,
so we might required to know the dimension of each image,
so one test image is read and then all the required values are found out

shape_image = Image.open(images[0])
shape_image_array = numpy.asarray(shape_image)
shape = shape_image_array.shape
total_dimensions_per_image = shape[0] * shape[1]

print "printing shape of the image"