def pipeline(self): pipe = ImagePipeline(self.image_path) pipe.read(sub_dirs=('casual', 'dress')) pipe.resize((300, 300, 3)) pipe.transform(rgb2gray, {}) pipe.vectorize() return pipe
def pipeline(directory): pipe = ImagePipeline(directory) pipe.read(sub_dirs=('casual', 'dress')) pipe.resize((300, 300, 3)) #pipe.transform(rgb2gray, {}) pipe.vectorize() return pipe
def image_featurizer(category, sub_dir='white', edge=False, pca=True): ''' Vectorize images (with white backgrounds) within the category. INPUT: category: string sub_dir: string edge: boolean pca: boolean OUTPUT: feature_dict: dictionary key: path (filename) value: vectorized image scaler: trained StandardScaler pca_model: trained PCA model ''' # Use ImagePipeline to read and transform the images: base_path = 'wayfair/images/' + category + '/' image_pipe = ImagePipeline(base_path) image_pipe.read(sub_dirs=(sub_dir,)) image_pipe.resize((150,150,3)) image_pipe.transform(skimage.color.rgb2gray, {}) if edge: image_pipe.transform(skimage.feature.canny, {}) image_pipe.vectorize() features = image_pipe.features # Do PCA if pca is passed in as true: if pca: scaler = StandardScaler() features_scaled = scaler.fit_transform(features) pca_model = PCA(n_components=100) pca_data = pca_model.fit_transform(features_scaled) features = pca_data # Organize vectorized images into the dictionary (feature_dict): paths = os.listdir(base_path + sub_dir) paths = [x for x in paths if x[0] != '.'] feature_dict = {} for i in xrange(len(paths)): path = paths[i] feature = features[i] feature_dict[path] = feature if pca: return feature_dict, scaler, pca_model else: return feature_dict