# modules I wrote from Pipeline_CommonFunctions import clean_file_lst import os item_dict = {} new_lst = [] full_path = '/Users/heymanhn/Virginia/Zipfian/Capstone_Project/Web_App/data/Image_20Percent' clean_stand_img_directory_lst = clean_file_lst(os.listdir(full_path), jpg=False) count = 0 for i, subdir in enumerate(clean_stand_img_directory_lst): subdir_path = os.path.join(full_path, subdir) clean_img_lst = clean_file_lst(os.listdir(subdir_path), jpg=True) for j, img_file in enumerate(clean_img_lst): new_lst.append(img_file) item_dict[img_file] = count count += 1 print item_dict['barneys_158585078.jpg'] print item_dict['saks_0471952635914.jpg'] print len(item_dict) print len(new_lst)
def feature_preprocessing(self): """ main feature preprocessing done output: feature_matrix """ total_feat_det = [] total_post_feat_detec = [] total_post_local = [] total_images = 0 num_failed = 0 fails = [] full_matrix = [] label_vec = [] f = open( '%s/size_new_twenty_50_50_10e_labels.csv' % FeatVecs_path, 'w') t = open('%s/size_new_twenty_50_50_10e_items.csv' % FeatVecs_path, 'w') non = open( '%s/size_new_twenty_50_50_10e_fails.csv' % FeatVecs_path, 'w') clean_stand_img_directory_lst = clean_file_lst( os.listdir(self.stand_img_directory), jpg=False) for i, subdir in enumerate(clean_stand_img_directory_lst): subdir_path = os.path.join(self.stand_img_directory, subdir) clean_img_lst = clean_file_lst(os.listdir(subdir_path), jpg=True) for j, img_file in enumerate(clean_img_lst): label = subdir print label # Create path for each image file img_file_path = os.path.join(subdir_path, img_file) # read in image file img_arr = io.imread(img_file_path) # Assert image size self._check_img_size(img_arr, img_file_path) # If self.img_size != self.target_size, # reshape to self.target_size if self.img_size != self.target_size: img_arr = resize(img_arr, self.target_size) print img_file_path # Extract features from raw image array try: pre_trans_feat, feat_det_size = self.pre_trans( img_arr, img_file_path) total_feat_det.append(feat_det_size) print 'pre_feat_det', feat_det_size print 'pre_trans_feat', pre_trans_feat.shape except IndexError: num_failed += 1 fails.append(img_file_path) non.write(img_file) continue # Apply filters to transform image array unflattened_trans_img_arr = self.filter_transform( img_arr).astype(float) print 'trans_img_arr', unflattened_trans_img_arr.shape try: # Extract features from post-transformed image array post_trans_feat, post_feat_det_size = self.post_trans(unflattened_trans_img_arr) print 'post_feat_det', post_feat_det_size print 'post_trans_feat', post_trans_feat.shape total_post_feat_detec.append(post_feat_det_size) except IndexError: num_failed += 1 fails.append(img_file_path) continue # flattened AND concatenated feature vector feat_vector = self.create_feature_vector( pre_trans_feat, post_trans_feat, unflattened_trans_img_arr) f.write(label + ',') label_vec.append(label) t.write(img_file + ',') print 'feature vector shape', feat_vector.shape # Append feature vector and label to full image matrix full_matrix.append(feat_vector) total_images += 1 print total_images # break f.close() print 'total_images', total_images print 'num_fails', num_failed print 'ratio', num_failed / float(total_images) # Apply StandardScaler to feature matrix rescaled_feat_matrix = self.rescaling(full_matrix) np.save('%s/rescaled_new_feat_matrix_50_50_10e.npy' % FeatVecs_path, rescaled_feat_matrix) return rescaled_feat_matrix, label_vec, fails
def feature_preprocessing(self): """ main feature preprocessing done output: feature_matrix """ total_feat_det = [] total_post_feat_detec = [] total_post_local = [] total_images = 0 num_failed = 0 X = [] y = [] full_matrix_label = [] f = open('%s/size_100_100_labels.csv' % FeatVecs_path, 'w') clean_stand_img_directory_lst = clean_file_lst(os.listdir( self.stand_img_directory), jpg=False) for i, subdir in enumerate(clean_stand_img_directory_lst): subdir_path = os.path.join(self.stand_img_directory, subdir) clean_img_lst = clean_file_lst(os.listdir(subdir_path), jpg=True) for j, img_file in enumerate(clean_img_lst): label = subdir print label # Create path for each image file img_file_path = os.path.join(subdir_path, img_file) # read in image file img_arr = io.imread(img_file_path) # Assert image size self._check_img_size(img_arr, img_file_path) # If self.img_size != self.target_size, reshape to # self.target_size if self.img_size != self.target_size: img_arr = resize(img_arr, self.target_size) print img_file_path # Extract features from raw image array try: pre_trans_feat, feat_det_size = self.pre_trans( img_arr, img_file_path) total_feat_det.append(feat_det_size) print 'pre_feat_det', feat_det_size print 'pre_trans_feat', pre_trans_feat.shape # Apply filters to transform image array unflattened_trans_img_arr = self.filter_transform( img_arr).astype(float) print 'trans_img_arr', unflattened_trans_img_arr.shape # Extract features from post-transformed image array post_trans_feat, post_feat_det_size = self.post_trans( unflattened_trans_img_arr) print 'post_feat_det', post_feat_det_size print 'post_trans_feat', post_trans_feat.shape total_post_feat_detec.append(post_feat_det_size) # flattened AND concatenated feature vector feat_vector = self.create_feature_vector( pre_trans_feat, post_trans_feat, unflattened_trans_img_arr) f.write(label + ',') np.savetxt('%s%s.csv' % (self.cached_feature_vector_file, img_file), feat_vector, fmt='%.18e', delimiter=',') print 'feature vector shape', feat_vector.shape # Append feature vector and label to full image matrix full_matrix_label.append((feat_vector, label)) total_images += 1 print total_images # break except IndexError: num_failed += 1 print 'number of fails so far: ', num_failed f.close() print 'total_images', total_images print 'num_fails', num_failed print 'ratio', num_failed / float(total_images) # Extract feature matrix from full image matrix # Extract labels from full image_matrix for i in xrange(len(full_matrix_label)): X.append(full_matrix_label[i][0]) y.append(full_matrix_label[i][1]) X = np.array(X) y = np.array(y) # Apply StandardScaler to feature matrix rescaled_feat_matrix = self.rescaling(X) print rescaled_feat_matrix return rescaled_feat_matrix, y
def feature_preprocessing(self): """ main feature preprocessing done output: feature_matrix """ total_feat_det = [] item_name = [] total_images = 0 num_failed = 0 fails = [] X = [] y = [] full_matrix = [] label_vec = [] f = open( '%s/size_new_twenty_100_100_10e_test15k_labels.csv' % FeatVecs_path, 'w') t = open( '%s/size_new_twenty_100_100_10e_test15k_items.csv' % FeatVecs_path, 'w') clean_stand_img_directory_lst = clean_file_lst(os.listdir( self.stand_img_directory), jpg=False) for i, subdir in enumerate(clean_stand_img_directory_lst): subdir_path = os.path.join(self.stand_img_directory, subdir) clean_img_lst = clean_file_lst(os.listdir(subdir_path), jpg=True) for j, img_file in enumerate(clean_img_lst): label = subdir print label # Create path for each image file img_file_path = os.path.join(subdir_path, img_file) # read in image file img_arr = io.imread(img_file_path) # Assert image size self._check_img_size(img_arr, img_file_path) # If self.img_size != self.target_size, reshape to # self.target_size if self.img_size != self.target_size: img_arr = resize(img_arr, self.target_size) print img_file_path # Extract features from raw image array try: pre_trans_feat = self.pre_trans(img_arr, img_file_path) print 'pre_trans_feat', pre_trans_feat.shape except IndexError: num_failed += 1 fails.append(img_file_path) continue feat_vector = pre_trans_feat f.write(label + ',') t.write(img_file + ',') label_vec.append(label) item_name.append(img_file_path) print 'feature vector shape', feat_vector.shape # Append feature vector and label to full image matrix full_matrix.append(feat_vector) total_images += 1 print total_images f.close() t.close() print 'total_images', total_images print 'num_fails', num_failed print 'ratio', num_failed / float(total_images) # Apply StandardScaler to feature matrix rescaled_feat_matrix = self.rescaling(full_matrix) np.save( '%s/rescaled_new_feat_matrix_100_100_10e_test15k.npy' % FeatVecs_path, rescaled_feat_matrix) m = open('%s/feature_matrix_test15k.pkl' % FeatVecs_path, 'w') pickle.dump(rescaled_feat_matrix, m) return rescaled_feat_matrix, label_vec, item_name