def get_SIFT_descriptor(image2D, **kwargs): """ Perform SIFT on a single image. frames[i] -> (x,y) descrs[i] -> (f1,f2,f3...f128) Configure SIFT algorithm with followings: dense: Apply d-SIFT (Default: True) float: Descriptor are returned in floating point (Default: False) fast: Fast approximation for d-SIFT (Default: False) Returns np.array """ dense = kwargs.get('dense', True) frames = descrs = None if dense: fast = kwargs.get('fast', False) step = kwargs.get('step', 1) # print("Using dsift with fast:", fast) frames, descrs = dsift(image2D, step=step, fast=fast) # Might be useful: verbose=False else: # print("Using regular sift") frames, descrs = sift( image2D, compute_descriptor=True) # Might be useful: verbose=False # For debugging purposes # if(np.any(np.isnan(descrs))): # print(descrs) # input("NaN detected, proceed?") # if(np.any(np.isinf(descrs))): # print(descrs) # input("inf detected, proceed?") return sample_descriptors(descrs)
def extract_sift(img_path,data_path): subdirs = [x[0] for x in os.walk(img_path,True)] subdirs.pop(0) for subdir in subdirs: print(' ') sys.stdout.write("extracting sift " + subdir.split("/")[-1]) sift_data_subdir = data_path+'sift/'+subdir.split('/')[-1]+'/' if not os.path.exists(sift_data_subdir): os.makedirs(sift_data_subdir) imgs = [x[2] for x in os.walk(subdir,True)][0] num_files = len(imgs) count = 0 for img in imgs: if count >= round(num_files/10): sys.stdout.write('.') count = 0 gray_f=misc.imread(subdir+'/'+img,True) height,width=gray_f.shape ratio = 1.0*max(height,width)/300 gray_f = misc.imresize(gray_f,(round(height/ratio),round(width/ratio))).astype(float) #这个雪崩 别用这个 #sift_frame_and_descriptor = sift(gray_f,compute_descriptor = True,float_descriptors=True, norm_thresh=1.0) #sift_frame_and_descriptor = sift(gray_f,n_octaves = 5,n_levels = 10,compute_descriptor = True,float_descriptors=True,edge_thresh = 30) #dense sift sift_frame_and_descriptor = dsift(gray_f,step =4,size = (8,8),float_descriptors = True) pickle.dump(sift_frame_and_descriptor,open(sift_data_subdir+img.split('.')[-2]+'.pkl','wb')) count += 1
def extract_and_describe(data, size=5, step=STEP): """ Extract and describe all the patches of the images in data using the dsift function. Parameters ---------- data : pandas.core.frame.DataFrame The dataset. size : int The size of the spatial bin of the SIFT descriptor in pixels. step : int A SIFT descriptor is extracted every ``step`` pixels. Returns ------- np.ndarray Features of the dataset's images. """ descriptors = [] for i, row in tqdm(data.iterrows(), "Extracting/Describing Patches", total=len(data)): path = DATA_SET_FOLDER + os.path.sep + row['path'] im = io.imread(path, as_gray=True) _, description = dsift(im, size=size, step=step, fast=True) descriptors.append(description) return np.vstack(descriptors)
def load_and_describe(filename, size=5, step=STEP): """ Describe an image and then return the bag of visual words. Parameters ---------- filename : str Path to an image. size : int The size of the spatial bin of the SIFT descriptor in pixels. step : int A SIFT descriptor is extracted every ``step`` pixels. Returns ------- list List of the closest cluster for each descriptor. """ im = io.imread(filename, as_gray=True) _, descriptors = dsift(im, size=size, step=step, fast=True) tokens = k_means.predict(descriptors) return tokens
def main(image_list="images.dat"): cwd = os.getcwd() data_path = cwd + "/dataset/" sift_path = cwd + "/sift_descriptor/" dsift_path = cwd + "/dsift_descriptor/" with open("images.dat", "r") as im: images = im.readlines() i = 1 total = len(images) for image in images: print(i, "/", total) image_matrice = imread(data_path + image.strip(), mode='F') sift_frame, sift_desc = sift(image_matrice, compute_descriptor=True) dsift_frame, dsift_desc = dsift(image_matrice, step=10) sift_image_path = sift_path + image.strip("\n") dsift_image_path = dsift_path + image.strip("\n") os.makedirs( os.path.dirname(sift_image_path), exist_ok=True) os.makedirs( os.path.dirname(dsift_image_path), exist_ok=True) np.save(sift_image_path, sift_desc) np.save(dsift_image_path, dsift_desc) i+=1
def vl_phow(im, **kwargs): # ------------------------------------------------------------------- # Parse the arguments # ------------------------------------------------------------------- from utils import NameSpace import math, warnings, os from skimage import color from cyvlfeat.sift import dsift from vl_plotframe import vl_plotframe import numpy as np opts = NameSpace() opts.verbose = True opts.fast = True opts.sizes = [4, 6, 8, 10] opts.step = 2 opts.color = 'gray' opts.floatdescriptors = False opts.magnif = 6 opts.windowsize = 1 #1.5 opts.contrastthreshold = 0.005 opts.update(**kwargs) dsiftOpts = { 'step': opts.step, 'norm': True, 'window_size': opts.windowsize } dsiftOpts['verbose'] = opts.verbose dsiftOpts['fast'] = opts.fast dsiftOpts['float_descriptors'] = opts.floatdescriptors # ------------------------------------------------------------------- # Extract the features # ------------------------------------------------------------------- # standarize the image imageSize = [im.shape[1], im.shape[0]] if opts.color.lower() == 'gray': numChannels = 1 if len(im.shape) == 3: im = np.expand_dims(color.rgb2gray(im), axis=-1) else: numChannels = 3 if len(im.shape) == 2: im = np.tile(np.expand_dims(im, -1), (1, 1, numChannels)) if opts.color.lower() == 'rgb': pass elif opts.color.lower() == 'opponent': # Note that the mean differs from the standard definition of opponent # space and is the regular intesity (for compatibility with # the contrast thresholding). # # Note also that the mean is added pack to the other two # components with a small multipliers for monochromatic # regions. mu = 0.3 * im[:, :, 0] + 0.59 * im[:, :, 1] + 0.11 * im[:, :, 2] alpha = 0.01 im = np.concatenate( (mu, (im[:, :, 0] - im[:, :, 1]) / math.sqrt(2) + alpha * mu, (im[:, :, 0] + im[:, :, 1] - 2 * im[:, :, 2]) / math.sqrt(6) + alpha * mu), dim=-1) elif opts.color.lower() == 'hsv': im = color.rgb2hsv(im) else: opts.color = 'hsv' warnings.warn( 'Color space not recongized, defaulting to HSV color space.') if opts.verbose: print('%s: color space: %s' % (os.path.basename(__file__), opts.color)) print('%s: image size: %d x %d' % (os.path.basename(__file__), imageSize[0], imageSize[1])) print('%s: sizes: %s' % (os.path.basename(__file__), str(opts.sizes))) frames = [] descrs = [] for si in range(len(opts.sizes)): # Recall from VL_DSIFT() that the first descriptor for scale SIZE has # center located at XC = XMIN + 3/2 SIZE (the Y coordinate is # similar). It is convenient to align the descriptors at different # scales so that they have the same geometric centers. For the # maximum size we pick XMIN = 1 and we get centers starting from # XC = 1 + 3/2 MAX(OPTS.SIZES). For any other scale we pick XMIN so # that XMIN + 3/2 SIZE = 1 + 3/2 MAX(OPTS.SIZES). # # In pracrice, the offset must be integer ('bounds'), so the # alignment works properly only if all OPTS.SZES are even or odd. off = int(math.floor(3 / 2 * (max(opts.sizes) - opts.sizes[si]))) # smooth the image to the appropriate scale based on the size # of the SIFT bins sigma = float(opts.sizes[si]) / opts.magnif # ims = vl_imsmooth(im, sigma) # window of ceil(4 *sigma) try: ims = imsmooth(im, sigma) except: import ipdb ipdb.set_trace() dsiftOpts['size'] = opts.sizes[si] dsiftOpts['bounds'] = [off, off, im.shape[0] - 1, im.shape[1] - 1] # extract dense SIFT features from all channels f = [] d = [] for k in range(numChannels): _f, _d = dsift(ims[:, :, k], **dsiftOpts) f.append(_f.T) d.append(_d.T) # import ipdb; ipdb.set_trace() # remove low contrast descriptors # note that for color descriptors the V component is # thresholded if opts.color.lower() in ['gray', 'opponent']: contrast = f[0][-1] elif opts.color.lower() == 'rgb': contrast = np.mean((f[0][-1], f[1][-1], f[2][-1]), axis=0) else: # hsv contrast = f[2][-1] for k in range(numChannels): thresh = np.where(contrast < opts.contrastthreshold)[0] for i in thresh: d[k][:, i] = 0 # save only x,y, and the scale frames.append( np.concatenate( [f[0][0:-1], opts.sizes[si] * np.ones((1, f[0].shape[1]))], axis=0)) descrs.append(np.concatenate(d, axis=0)) frames = np.concatenate(frames, axis=1) descrs = np.concatenate(descrs, axis=1) return frames, descrs
def phow(image, verbose=False, fast=True, sizes=(4, 6, 8, 10), step=2, color='gray', float_descriptors=False, magnification=6, window_size=1.5, contrast_threshold=0.005): """ Extracts PHOW features from the ``image``. PHOW is simply dense SIFT applied at several resolutions. DESCRS has the same format of `sift()` and `dsift()`. `frames[:,1:2]` are the x,y coordinates of the center of each descriptor, `frames[:,3]` is the contrast of the descriptor, as returned by `dsift()` (for colour variant, contrast is computed on the intensity channel). `frames[:,4]` is the size of the bin of the descriptor. By default, `phow()` computes the gray-scale variant of the descriptor. The `color` option can be used to compute the color variant instead. Unlike Matlab the Matlab wrapper of vlfeat, the image is pre-smoothed at the desired scale level by gaussian filter provided by Scipy: ``scipy.ndimage.filters.gaussian_filter``. Parameters ---------- image : [H, W] or [H, W, 1] `float32` `ndarray` A single channel, greyscale, `float32` numpy array (ndarray) representing the image to calculate descriptors for. verbose : bool`, optional If ``True``, be verbose. fast : `bool`, optional If ``True``, use a piecewise-flat, rather than Gaussian, windowing function. While this breaks exact SIFT equivalence, in practice is much faster to compute. sizes : (`int`, `int`, `int`), optional Scales at which the dense SIFT features are extracted. Each value is used as bin size for the dsift() function. step : `int`, optional A SIFT descriptor is extracted every ``step`` pixels. This allows for sub-sampling of the image. color : `str`, optional Choose between 'gray', 'rgb', 'hsv', and 'opponent'. float_descriptors : `bool`, optional If ``True``, the descriptor are returned in floating point rather than integer format. magnification : `int`, optional Set the descriptor magnification factor. The scale of the keypoint is multiplied by this factor to obtain the width (in pixels) of the spatial bins. For instance, if there are there are 4 spatial bins along each spatial direction, the ``side`` of the descriptor is approximately ``4 * magnification``. window_size : `int`, optional Set the variance of the Gaussian window that determines the descriptor support. It is expressed in units of spatial bins. contrast_threshold : `float`, optional Contrast threshold below which SIFT features are mapped to zero. The input image is scaled to have intensity range in [0,1] (rather than [0,255]) and this value is compared to the descriptor norm as returned by dsift(). Returns ------- frames : `(F, 4)` `float32` `ndarray` ``F`` is the number of keypoints (frames) used. This is the center of every dense SIFT descriptor that is extracted. descriptors : `(F, 128)` `uint8` or `float32` `ndarray` ``F`` is the number of keypoints (frames) used. The 128 length vectors per keypoint extracted. ``uint8`` by default. """ # Standardize the image: The following block assumes that the user input # for argument color has somewhat more priority than # actual color space of I. # That is why the conversions are according to the value of variable 'color' # irrespective of actual color space to which I belongs. if image.max() > 1: image = np.array(image, np.float32) / 255.0 frames, descriptors = [],[] color_lower = color.lower() I = image.copy() # case where user inputs, color ='gray' and I is also greyscale. if color_lower == 'gray': num_channels = 1 # case where user inputs, color ='gray' but I belongs to RGB space. if I.ndim == 3 and I.shape[2] > 1: I = rgb2gray(I) else: num_channels = 3 # case where user inputs, color ='rgb'or 'hsv'or 'opponent' but I is greyscale. if I.ndim == 2 or I.shape[2] == 1: I= gray2rgb(I) # case where user inputs, color ='rgb' and I also belongs to RGB space. elif color_lower == 'rgb': pass # case where user inputs, color ='opponent' and I belongs to RGB space. elif color_lower == 'opponent': # Note that the mean differs from the standard definition of opponent # space and is the regular intensity (for compatibility with # the contrast thresholding). # Note also that the mean is added pack to the other two # components with a small multipliers for monochromatic # regions. alpha = 0.01 I = np.concatenate( (rgb2gray(I), (I[:, :, 0] - I[:, :, 1]) / math.sqrt(2) + alpha * rgb2gray(I), I[:, :, 0] + I[:, :, 1] - 2 * I[:, :, 2] / math.sqrt(6) + alpha * rgb2gray(I)), axis=2) # case when user inputs, color ='hsv' and I belongs to RGB space. elif color_lower == 'hsv': I = rgb_to_hsv(I) else: # case when user inputs, color ='hsv' and I belongs to RGB space. color_lower = 'hsv' I = rgb_to_hsv(I) print('Color space not recognized, defaulting to HSV color space.') if verbose: print('Color space: {}'.format(color)) print('I size: {}x{}'.format(I.shape[0], I.shape[1])) print('Sizes: [{} {} {} {}]'.format(sizes[0], sizes[1], sizes[2], sizes[3])) temp_frames = [] temp_descrs = [] for si in xrange(len(sizes)): f = [] d = [] off = math.floor(1.0 + 3.0 / 2.0 * (max(sizes) - sizes[si])) # smooth I to the appropriate scale based on the size of the SIFT bins sigma = sizes[si] * 1.0 / magnification ims = scipy.ndimage.filters.gaussian_filter(I, sigma) # extract dense SIFT features from all channels temp_all_results = [] # temp_arr = np.empty((num_channels, ), dtype=np.float32, order='C') data = ims.copy() for k in xrange(num_channels): # The third dimension of an image matrix represent the no. of channels that are present. # In Matlab, size(I) returns: 256 x256 which is same as the result returned by python's I.shape # where I is the numpy array of image. In Matlab, size(I,3) returns 1 for a greyscale # image but in Python, I.shape[2] raises an error -> tuple index out of range, simply because # there is no third channel. For RGB images I.shape[2] returns 3. The below if-else is a fix # for that. if ims.ndim == 2: # Since it is greyscale, we'd pass whole array (Dsift accepts only 2D arrays.) smoothed_image = data elif ims.ndim == 3: # Since it has 3 channels, i.e. could be split into 3 different channels(2D array) one by one. smoothed_image = data[:, :, k] else: raise ValueError('Image array not defined') temp_results = dsift(smoothed_image, step=step, size=sizes[si], bounds=np.array([off, off, image.shape[0] - 1, image.shape[1] - 1]), norm=True, fast=fast, float_descriptors=float_descriptors, verbose=verbose) temp_all_results.append(temp_results) for i in xrange(len(temp_all_results)): f.append(temp_all_results[i][0]) d.append(temp_all_results[i][1]) if color_lower == 'gray': contrast = f[0][:, 2] elif color_lower == 'opponent': contrast = f[0][:, 2] elif color_lower == 'rgb': m = (f[0][:, 2], f[1][:, 2], f[2][:, 2]) contrast = np.mean(m, axis=0) else: color_lower = 'hsv' contrast = f[2][:, 2] # remove low contrast descriptors note that for color descriptors the V component is thresholded toremove = [i for i in xrange(len(contrast)) if contrast[i] < contrast_threshold] for k in xrange(num_channels): d[k][toremove] = 0 dim2 = contrast.shape[0] param2 = (sizes[si]) * np.ones((dim2, 1)) temp_frames.append(np.append(f[0], param2, axis=1)) frames = np.concatenate(temp_frames, axis=0) temp_descrs.append(np.hstack(d)) descriptors = np.concatenate(temp_descrs, axis=0) return frames, descriptors