Esempio n. 1
0
def get_SIFT_descriptor(image2D, **kwargs):
    """
    Perform SIFT on a single image.
    frames[i] -> (x,y)
    descrs[i] -> (f1,f2,f3...f128)
    Configure SIFT algorithm with followings:
    dense: Apply d-SIFT (Default: True)
    float: Descriptor are returned in floating point (Default: False)
    fast: Fast approximation for d-SIFT (Default: False)
    Returns np.array
    """
    dense = kwargs.get('dense', True)
    frames = descrs = None
    if dense:
        fast = kwargs.get('fast', False)
        step = kwargs.get('step', 1)
        # print("Using dsift with fast:", fast)
        frames, descrs = dsift(image2D, step=step,
                               fast=fast)  # Might be useful: verbose=False
    else:
        # print("Using regular sift")
        frames, descrs = sift(
            image2D, compute_descriptor=True)  # Might be useful: verbose=False
    # For debugging purposes
    # if(np.any(np.isnan(descrs))):
    #     print(descrs)
    #     input("NaN detected, proceed?")
    # if(np.any(np.isinf(descrs))):
    #     print(descrs)
    #     input("inf detected, proceed?")
    return sample_descriptors(descrs)
Esempio n. 2
0
def extract_sift(img_path,data_path):	
	
	subdirs = [x[0] for x in os.walk(img_path,True)]
	
	subdirs.pop(0)
	
	for subdir in subdirs:
		print(' ')
		sys.stdout.write("extracting sift " + subdir.split("/")[-1])
		sift_data_subdir = data_path+'sift/'+subdir.split('/')[-1]+'/'
		if not os.path.exists(sift_data_subdir):
			os.makedirs(sift_data_subdir)	
		imgs = [x[2] for x in os.walk(subdir,True)][0]
		num_files = len(imgs)
		count = 0
		for img in imgs:
			if count >= round(num_files/10):
				sys.stdout.write('.')
				count = 0
			gray_f=misc.imread(subdir+'/'+img,True)
			height,width=gray_f.shape
			ratio = 1.0*max(height,width)/300
			gray_f = misc.imresize(gray_f,(round(height/ratio),round(width/ratio))).astype(float)
		#这个雪崩 别用这个	#sift_frame_and_descriptor = sift(gray_f,compute_descriptor = True,float_descriptors=True, norm_thresh=1.0)				
			#sift_frame_and_descriptor = sift(gray_f,n_octaves = 5,n_levels = 10,compute_descriptor = True,float_descriptors=True,edge_thresh = 30)
			#dense sift
			sift_frame_and_descriptor = dsift(gray_f,step =4,size = (8,8),float_descriptors = True)
			pickle.dump(sift_frame_and_descriptor,open(sift_data_subdir+img.split('.')[-2]+'.pkl','wb'))
			count += 1
Esempio n. 3
0
def extract_and_describe(data, size=5, step=STEP):
    """
    Extract and describe all the patches of the images in data using the dsift function.

    Parameters
    ----------
    data : pandas.core.frame.DataFrame
        The dataset.
    size : int
         The size of the spatial bin of the SIFT descriptor in pixels.
    step : int
         A SIFT descriptor is extracted every ``step`` pixels.

    Returns
    -------
    np.ndarray
        Features of the dataset's images.
    """
    descriptors = []
    for i, row in tqdm(data.iterrows(),
                       "Extracting/Describing Patches",
                       total=len(data)):
        path = DATA_SET_FOLDER + os.path.sep + row['path']
        im = io.imread(path, as_gray=True)

        _, description = dsift(im, size=size, step=step, fast=True)
        descriptors.append(description)

    return np.vstack(descriptors)
Esempio n. 4
0
def load_and_describe(filename, size=5, step=STEP):
    """
    Describe an image and then return the bag of visual words.

    Parameters
    ----------
    filename : str
        Path to an image.
    size : int
         The size of the spatial bin of the SIFT descriptor in pixels.
    step : int
         A SIFT descriptor is extracted every ``step`` pixels.

    Returns
    -------
    list
        List of the closest cluster for each descriptor.
    """
    im = io.imread(filename, as_gray=True)
    _, descriptors = dsift(im, size=size, step=step, fast=True)
    tokens = k_means.predict(descriptors)
    return tokens
def main(image_list="images.dat"):
    cwd = os.getcwd()
    data_path = cwd + "/dataset/"
    sift_path = cwd + "/sift_descriptor/"
    dsift_path = cwd + "/dsift_descriptor/"
    with open("images.dat", "r") as im:
        images = im.readlines() 
        i = 1
        total = len(images)
        for image in images:
            print(i, "/", total)
            image_matrice = imread(data_path + image.strip(), mode='F')
            sift_frame, sift_desc = sift(image_matrice, compute_descriptor=True)
            dsift_frame, dsift_desc = dsift(image_matrice, step=10)
            
            sift_image_path = sift_path + image.strip("\n")
            dsift_image_path = dsift_path + image.strip("\n")
            
            os.makedirs( os.path.dirname(sift_image_path), exist_ok=True)
            os.makedirs( os.path.dirname(dsift_image_path), exist_ok=True)
            np.save(sift_image_path, sift_desc)
            np.save(dsift_image_path, dsift_desc)
            i+=1
Esempio n. 6
0
def vl_phow(im, **kwargs):
    # -------------------------------------------------------------------
    # Parse the arguments
    # -------------------------------------------------------------------
    from utils import NameSpace
    import math, warnings, os
    from skimage import color
    from cyvlfeat.sift import dsift
    from vl_plotframe import vl_plotframe
    import numpy as np
    opts = NameSpace()
    opts.verbose = True
    opts.fast = True
    opts.sizes = [4, 6, 8, 10]
    opts.step = 2
    opts.color = 'gray'
    opts.floatdescriptors = False
    opts.magnif = 6
    opts.windowsize = 1  #1.5
    opts.contrastthreshold = 0.005
    opts.update(**kwargs)

    dsiftOpts = {
        'step': opts.step,
        'norm': True,
        'window_size': opts.windowsize
    }
    dsiftOpts['verbose'] = opts.verbose
    dsiftOpts['fast'] = opts.fast
    dsiftOpts['float_descriptors'] = opts.floatdescriptors

    # -------------------------------------------------------------------
    # Extract the features
    # -------------------------------------------------------------------

    # standarize the image
    imageSize = [im.shape[1], im.shape[0]]
    if opts.color.lower() == 'gray':
        numChannels = 1
        if len(im.shape) == 3:
            im = np.expand_dims(color.rgb2gray(im), axis=-1)
    else:
        numChannels = 3
        if len(im.shape) == 2:
            im = np.tile(np.expand_dims(im, -1), (1, 1, numChannels))

        if opts.color.lower() == 'rgb':
            pass

        elif opts.color.lower() == 'opponent':
            #    Note that the mean differs from the standard definition of opponent
            #    space and is the regular intesity (for compatibility with
            #    the contrast thresholding).
            #
            #    Note also that the mean is added pack to the other two
            #    components with a small multipliers for monochromatic
            #    regions.
            mu = 0.3 * im[:, :, 0] + 0.59 * im[:, :, 1] + 0.11 * im[:, :, 2]
            alpha = 0.01
            im = np.concatenate(
                (mu, (im[:, :, 0] - im[:, :, 1]) / math.sqrt(2) + alpha * mu,
                 (im[:, :, 0] + im[:, :, 1] - 2 * im[:, :, 2]) / math.sqrt(6) +
                 alpha * mu),
                dim=-1)

        elif opts.color.lower() == 'hsv':
            im = color.rgb2hsv(im)

        else:
            opts.color = 'hsv'
            warnings.warn(
                'Color space not recongized, defaulting to HSV color space.')

    if opts.verbose:
        print('%s: color space: %s' % (os.path.basename(__file__), opts.color))
        print('%s: image size: %d x %d' %
              (os.path.basename(__file__), imageSize[0], imageSize[1]))
        print('%s: sizes: %s' % (os.path.basename(__file__), str(opts.sizes)))

    frames = []
    descrs = []
    for si in range(len(opts.sizes)):

        #    Recall from VL_DSIFT() that the first descriptor for scale SIZE has
        #    center located at XC = XMIN + 3/2 SIZE (the Y coordinate is
        #    similar). It is convenient to align the descriptors at different
        #    scales so that they have the same geometric centers. For the
        #    maximum size we pick XMIN = 1 and we get centers starting from
        #    XC = 1 + 3/2 MAX(OPTS.SIZES). For any other scale we pick XMIN so
        #    that XMIN + 3/2 SIZE = 1 + 3/2 MAX(OPTS.SIZES).
        #
        #    In pracrice, the offset must be integer ('bounds'), so the
        #    alignment works properly only if all OPTS.SZES are even or odd.
        off = int(math.floor(3 / 2 * (max(opts.sizes) - opts.sizes[si])))

        #    smooth the image to the appropriate scale based on the size
        #    of the SIFT bins
        sigma = float(opts.sizes[si]) / opts.magnif
        # ims = vl_imsmooth(im, sigma) # window of ceil(4 *sigma)
        try:
            ims = imsmooth(im, sigma)
        except:
            import ipdb
            ipdb.set_trace()

        dsiftOpts['size'] = opts.sizes[si]
        dsiftOpts['bounds'] = [off, off, im.shape[0] - 1, im.shape[1] - 1]
        #    extract dense SIFT features from all channels
        f = []
        d = []
        for k in range(numChannels):
            _f, _d = dsift(ims[:, :, k], **dsiftOpts)
            f.append(_f.T)
            d.append(_d.T)
        # import ipdb; ipdb.set_trace()

        #    remove low contrast descriptors
        #    note that for color descriptors the V component is
        #    thresholded
        if opts.color.lower() in ['gray', 'opponent']:
            contrast = f[0][-1]
        elif opts.color.lower() == 'rgb':
            contrast = np.mean((f[0][-1], f[1][-1], f[2][-1]), axis=0)
        else:  #    hsv
            contrast = f[2][-1]

        for k in range(numChannels):
            thresh = np.where(contrast < opts.contrastthreshold)[0]
            for i in thresh:
                d[k][:, i] = 0

        #    save only x,y, and the scale
        frames.append(
            np.concatenate(
                [f[0][0:-1], opts.sizes[si] * np.ones((1, f[0].shape[1]))],
                axis=0))
        descrs.append(np.concatenate(d, axis=0))

    frames = np.concatenate(frames, axis=1)
    descrs = np.concatenate(descrs, axis=1)

    return frames, descrs
Esempio n. 7
0
def phow(image, verbose=False, fast=True, sizes=(4, 6, 8, 10), step=2, color='gray',
         float_descriptors=False, magnification=6, window_size=1.5, contrast_threshold=0.005):

    """
    Extracts PHOW features from the ``image``. PHOW is simply dense
    SIFT applied at several resolutions.

    DESCRS has the same format of `sift()` and `dsift()`. `frames[:,1:2]`
    are the x,y coordinates of the center of each descriptor, `frames[:,3]`
    is the contrast of the descriptor, as returned by `dsift()` (for
    colour variant, contrast is computed on the intensity channel).
    `frames[:,4]` is the size of the bin of the descriptor.

    By default, `phow()` computes the gray-scale variant of the descriptor. The
    `color` option can be used to compute the color variant instead.

    Unlike Matlab the Matlab wrapper of vlfeat, the image
    is pre-smoothed at the desired scale level by gaussian filter provided
    by Scipy: ``scipy.ndimage.filters.gaussian_filter``.

    Parameters
    ----------
    image : [H, W] or [H, W, 1] `float32` `ndarray`
        A single channel, greyscale, `float32` numpy array (ndarray)
        representing the image to calculate descriptors for.
    verbose : bool`, optional
        If ``True``, be verbose.
    fast : `bool`, optional
        If ``True``, use a piecewise-flat, rather than Gaussian,
        windowing function. While this breaks exact SIFT equivalence,
        in practice is much faster to compute.
    sizes : (`int`, `int`, `int`), optional
        Scales at which the dense SIFT features are extracted. Each
        value is used as bin size for the dsift() function.
    step : `int`, optional
        A SIFT descriptor is extracted every ``step`` pixels. This allows for
        sub-sampling of the image.
    color : `str`, optional
        Choose between 'gray', 'rgb', 'hsv', and 'opponent'.
    float_descriptors : `bool`, optional
        If ``True``, the descriptor are returned in floating point rather than
        integer format.
    magnification : `int`, optional
        Set the descriptor magnification factor. The scale of the keypoint is
        multiplied by this factor to obtain the width (in pixels) of the spatial
        bins. For instance, if there are there are 4 spatial bins along each
        spatial direction, the ``side`` of the descriptor is approximately ``4 *
        magnification``.
    window_size : `int`, optional
        Set the variance of the Gaussian window that determines the
        descriptor support. It is expressed in units of spatial bins.
    contrast_threshold : `float`, optional
        Contrast threshold below which SIFT features are mapped to
        zero. The input image is scaled to have intensity range in [0,1]
        (rather than [0,255]) and this value is compared to the
        descriptor norm as returned by dsift().

    Returns
    -------
    frames : `(F, 4)` `float32` `ndarray`
        ``F`` is the number of keypoints (frames) used. This is the center
        of every dense SIFT descriptor that is extracted.
    descriptors : `(F, 128)` `uint8` or `float32` `ndarray`
        ``F`` is the number of keypoints (frames) used. The 128 length vectors
        per keypoint extracted. ``uint8`` by default.


    """

    # Standardize the image: The following block assumes that the user input
    # for argument color has somewhat more priority than
    # actual color space of I.
    # That is why the conversions are according to the value of variable 'color'
    # irrespective of actual color space to which I belongs.

    if image.max() > 1:
        image = np.array(image, np.float32) / 255.0

    frames, descriptors = [],[]
                                                                                                                                                                                                                                                                                                                        
    color_lower = color.lower()
    I = image.copy()

    # case where user inputs, color ='gray' and I is also greyscale.
    if color_lower == 'gray':
        num_channels = 1

        # case where user inputs, color ='gray' but I belongs to RGB space.
        if I.ndim == 3 and I.shape[2] > 1:
            I = rgb2gray(I)
    else:
        num_channels = 3

        # case where user inputs, color ='rgb'or 'hsv'or 'opponent' but I is greyscale.
        if I.ndim == 2 or I.shape[2] == 1:
            I= gray2rgb(I)

        # case where user inputs, color ='rgb' and I also belongs to RGB space.
        elif color_lower == 'rgb':
            pass

        # case where user inputs, color ='opponent' and I belongs to RGB space.
        elif color_lower == 'opponent':

            # Note that the mean differs from the standard definition of opponent
            # space and is the regular intensity (for compatibility with
            # the contrast thresholding).
            # Note also that the mean is added pack to the other two
            # components with a small multipliers for monochromatic
            # regions.

            alpha = 0.01
            I = np.concatenate(
                (rgb2gray(I), (I[:, :, 0] - I[:, :, 1]) / math.sqrt(2) + alpha * rgb2gray(I),
                 I[:, :, 0] + I[:, :, 1] - 2 * I[:, :, 2] / math.sqrt(6) + alpha * rgb2gray(I)),
                axis=2)
        # case when user inputs, color ='hsv' and I belongs to RGB space.
        elif color_lower == 'hsv':
            I = rgb_to_hsv(I)
        else:
            # case when user inputs, color ='hsv' and I belongs to RGB space.
            color_lower = 'hsv'
            I = rgb_to_hsv(I)
            print('Color space not recognized, defaulting to HSV color space.')

    if verbose:
        print('Color space: {}'.format(color))
        print('I size: {}x{}'.format(I.shape[0], I.shape[1]))
        print('Sizes: [{} {} {} {}]'.format(sizes[0], sizes[1], sizes[2], sizes[3]))

    temp_frames = []
    temp_descrs = []

    for si in xrange(len(sizes)):
        f = []
        d = []
        off = math.floor(1.0 + 3.0 / 2.0 * (max(sizes) - sizes[si]))

        # smooth I to the appropriate scale based on the size of the SIFT bins
        sigma = sizes[si] * 1.0 / magnification
        ims = scipy.ndimage.filters.gaussian_filter(I, sigma)

        # extract dense SIFT features from all channels
        temp_all_results = []
        # temp_arr = np.empty((num_channels, ), dtype=np.float32, order='C')
        data = ims.copy()
        for k in xrange(num_channels):

            # The third dimension of an image matrix represent the no. of channels that are present.
            # In Matlab, size(I) returns: 256 x256 which is same as the result returned by python's I.shape
            # where I is the numpy array of image. In Matlab, size(I,3) returns 1 for a greyscale
            # image but in Python, I.shape[2] raises an error -> tuple index out of range, simply because
            # there is no third channel. For RGB images I.shape[2] returns 3. The below if-else is a fix
            # for that.
            if ims.ndim == 2:
                # Since it is greyscale, we'd pass whole array (Dsift accepts only 2D arrays.)
                smoothed_image = data

            elif ims.ndim == 3:
                # Since it has 3 channels, i.e. could be split into 3 different channels(2D array) one by one.
                smoothed_image = data[:, :, k]
            else:
                raise ValueError('Image array not defined')

            temp_results = dsift(smoothed_image, step=step, size=sizes[si],
                                 bounds=np.array([off, off, image.shape[0] - 1, image.shape[1] - 1]),
                                 norm=True, fast=fast, float_descriptors=float_descriptors, verbose=verbose)

            temp_all_results.append(temp_results)

        for i in xrange(len(temp_all_results)):
            f.append(temp_all_results[i][0])
            d.append(temp_all_results[i][1])

        if color_lower == 'gray':
            contrast = f[0][:, 2]

        elif color_lower == 'opponent':
            contrast = f[0][:, 2]

        elif color_lower == 'rgb':
            m = (f[0][:, 2], f[1][:, 2], f[2][:, 2])
            contrast = np.mean(m, axis=0)

        else:
            color_lower = 'hsv'
            contrast = f[2][:, 2]

        # remove low contrast descriptors note that for color descriptors the V component is thresholded
        toremove = [i for i in xrange(len(contrast)) if contrast[i] < contrast_threshold]
        for k in xrange(num_channels):
            d[k][toremove] = 0

        dim2 = contrast.shape[0]
        param2 = (sizes[si]) * np.ones((dim2, 1))
        temp_frames.append(np.append(f[0], param2, axis=1))
        frames = np.concatenate(temp_frames, axis=0)

        temp_descrs.append(np.hstack(d))

    descriptors = np.concatenate(temp_descrs, axis=0)

    return frames, descriptors