def Dense_SIFT_Extractor(images, data_name_code):
    kp, desc0 = cy.vl_dsift(images[0],
                            step=1,
                            size=1,
                            bounds=None,
                            window_size=1,
                            norm=True,
                            fast=True,
                            float_descriptors=True,
                            geometry=(4, 4, 8),
                            verbose=1)
    count = 0
    print("Image number " + str(count) + " in Stack")
    kp, desc1 = cy.vl_dsift(images[1],
                            step=1,
                            size=1,
                            bounds=None,
                            window_size=1,
                            norm=True,
                            fast=True,
                            float_descriptors=True,
                            geometry=(4, 4, 8),
                            verbose=1)
    count = 1
    print("Image number " + str(count) + " in Stack")

    concate = np.concatenate((desc0, desc1))
    for img in images[2:]:
        kp, desc = cy.vl_dsift(img,
                               step=1,
                               size=1,
                               bounds=None,
                               window_size=1,
                               norm=True,
                               fast=True,
                               float_descriptors=True,
                               geometry=(4, 4, 8),
                               verbose=1)
        concate = np.concatenate((concate, desc))
        print(concate.shape)
        count += 1
        print("Image number " + str(count) + " in Stack")

    print("All Dense SIFT Descriptors Generated")

    filename = "Fer2013_DenseSIFF_Descriptors.npy"
    concate = np.reshape(concate, (images.shape[0], 2025, 128))
    np.save(filename, concate)
    print("Saved all Dense SIFT Descriptors as numpy array to Disk")
def calculate_sift_descriptors(image, step_size, cell_size, norm_threshold=0.0):
    """
    Calculate sift descriptors on single image.

    @param image: Image to extract sift descriptors from.
    @param step_size: Horizontal and vertical distance between keypoints in pixels.
    @param cell_size: Area size of one descriptor in pixels.
    @return: Coordinates and descriptors for each keypoint.
    """
    # we don't want to start directly in the corner of the page
    if cell_size == 8:
        off = 2.5
    elif cell_size == 5:
        off = 7.5
    else:
        off = 0.0

    frames, desc = vlfeat.vl_dsift(image,
                                   bounds=np.array((off, off, image.shape[0]-off, image.shape[1]-off), 'f'),
                                   step=step_size,
                                   size=cell_size,
                                   norm=(norm_threshold > 0.0))

    frames = frames.T
    desc = desc.T

    # throw away all descriptors with a magnitude < norm_threshold
    if norm_threshold > 0.0:
        norms = frames[:, 2]
        frames = np.array([p[:2] for i, p in enumerate(frames) if norms[i] > norm_threshold])
        desc = np.array([d for i, d in enumerate(desc) if norms[i] > norm_threshold])
    return frames, desc
def process_image(imagename, resultname='temp.sift', dense=False):    
    """ process an image and save the results in a .key ascii file"""
    print "working on ", imagename
    if dense == False:
        if imagename[-3:] != 'pgm':
            #create a pgm file, image is resized, if it is too big.
            # sift returns an error if more than 8000 features are found
            size = (MAXSIZE, MAXSIZE)
            im = Image.open(imagename).convert('L')
            im.thumbnail(size, Image.ANTIALIAS)
            im.save('tmp.pgm')
            imagename = 'tmp.pgm'
        
        #check if linux or windows 
        if os.name == "posix":
            cmmd = "./sift <" + imagename + ">" + resultname
            print cmmd
        else:
            cmmd = "siftWin32 <" + imagename + ">" + resultname
        
        os.system(cmmd)
        if os.path.getsize(resultname) == 0:
            raise IOError("extracting SIFT features failed " + resultname)

    else:
        import vlfeat

        # defines how dense the grid is
        size = (150, 150)
        step = 10
        
        im = Image.open(imagename).resize(size, Image.ANTIALIAS)
        im_array = numpy.asarray(im)
        if im_array.ndim == 3:
            im_gray = vlfeat.vl_rgb2gray(im_array)
        elif im_array.ndim == 2:
            im_gray = im_array
        else:
            raise IOError("Not enough dims found in image " + resultname)
        

        locs, int_descriptors = vlfeat.vl_dsift(im_gray, step=step, verbose=VERBOSE)
        nfeatures = int_descriptors.shape[1]
        padding = numpy.zeros((2, nfeatures))
        locs = numpy.vstack((locs, padding))
        header = ' '.join([str(nfeatures), str(128)])
        temp = int_descriptors.astype('float')  # convert descriptors to float
        descriptors = temp[:]
        with open(resultname, 'wb') as f:
            cPickle.dump([locs.T, descriptors.T], f, protocol=cPickle.HIGHEST_PROTOCOL)
        print "features saved in", resultname
        if WRITE_VERBOSE:
            with open(resultname, 'w') as f:
                f.write(header)
                f.write("\n")
                for i in range(nfeatures):
                    f.write(' '.join(map(str, locs[:, i])))
                    f.write("\n")
                    f.write(' '.join(map(str, descriptors[:, i])))
                    f.write("\n")
Example #4
0
    def __init__(self, word_img, word, codebook):
        self._word_img_ = word_img
        self._word_ = word
        self._bof_ = None

        #Deskriptoren berechnen
        step_size = 5
        cell_size = 5
        frames, desc = vlfeat.vl_dsift(word_img / 255,
                                       step=step_size,
                                       size=cell_size)
        desc = np.array(desc, dtype=np.float)

        #Deskriptoren quantisieren mithilfe vom Codebook
        dist_mat = distance.cdist(desc, codebook, 'euclidean')
        dist_mat_sort_ind = np.argsort(dist_mat, axis=1)

        global_ = dist_mat_sort_ind[:, 0]
        left = global_[:len(global_) / 2]
        #mid = global_[len(global_)/3:2*len(global_)/3]
        right = global_[len(global_) / 2:]

        bof_g = np.bincount(global_, minlength=4095)
        bof_l = np.bincount(left, minlength=4095)
        #bof_m = np.bincount(mid,minlength = 4095)
        bof_r = np.bincount(right, minlength=4095)

        self._bof_ = np.concatenate((bof_g, bof_l, bof_r), axis=0)
Example #5
0
def calculate_histograms(images, clusters_mean):
    for image in images:
        f, des = vl_dsift(image.img, size=8, step=8)
        histogram = get_histogram(des, clusters_mean)
        his_sum = sum(histogram.itervalues()) * 1.0
        for item in range(len(clusters_mean)):
            image.histogram.append(
                histogram[item] / his_sum if his_sum != 0 else histogram[item])
    return images
Example #6
0
def BOW(n_clusters=40):
    all_descriptors = []
    for image in TRAIN_IMAGES:
        f, des = vl_dsift(image.img, size=8, step=8)
        for item in des.tolist():
            all_descriptors.append(item)

    kmeans = KMeans(n_clusters=n_clusters).fit(np.array(all_descriptors))
    clusters_mean = kmeans.cluster_centers_.tolist()
    return clusters_mean
Example #7
0
    def calculate_visual_words_for_query(self, query_image, visualize = False):

        # Fuer spaeter folgende Verarbeitungsschritte muss das Bild mit float32-Werten vorliegen. 
        im_arr = query_image
        # Die colormap legt fest wie die Intensitaetswerte interpretiert werden.
        #if visualize:
        #    plt.imshow(im_arr, cmap=cm.get_cmap('Greys_r'))
        #    plt.show()
       
        # SIFT Deskriptoren berechnen
        frames, desc = vlfeat.vl_dsift(im_arr, step=self.step_size, size=self.cell_size)
    #     pickle_densesift_fn = '2700270-small_dense-%d_sift-%d_descriptors.p' % (step_size, cell_size)
    #     frames, desc = pickle.load(open(pickle_densesift_fn, 'rb'))
        frames = frames.T
        desc = desc.T
        distance_matrix = scipy.spatial.distance.cdist(desc, self.centroids, "euclidean")
        labels = np.argmin(distance_matrix, axis = 1)
        
        
        if visualize:
            draw_descriptor_cells = False
            fig = plt.figure()
            ax = fig.add_subplot(111)
            ax.imshow(im_arr, cmap=cm.get_cmap('Greys_r'))
            ax.hold(True)
            ax.autoscale(enable=False)
            colormap = cm.get_cmap('jet')
            desc_len = self.cell_size * 4
            for (x, y), label in zip(frames, labels):
                color = colormap(label / float(self.n_centroids))
                circle = Circle((x, y), radius=1, fc=color, ec=color, alpha=1)
                #rect = Rectangle((x - desc_len / 2, y - desc_len / 2), desc_len, desc_len, alpha=0.08, lw=1)
                ax.add_patch(circle)
                if draw_descriptor_cells:
                    for p_factor in [0.25, 0.5, 0.75]:
                        offset_dyn = desc_len * (0.5 - p_factor)
                        offset_stat = desc_len * 0.5
                        line_h = Line2D((x - offset_stat, x + offset_stat), (y - offset_dyn, y - offset_dyn), alpha=0.08, lw=1)
                        line_v = Line2D((x - offset_dyn , x - offset_dyn), (y - offset_stat, y + offset_stat), alpha=0.08, lw=1)
                        ax.add_line(line_h)
                        ax.add_line(line_v)
                #ax.add_patch(rect)
            
            plt.show()
        
        # Centroids: Eine Liste von Zentroiden (Auch SWIFT Operatoren!)
        # Labels: Fuer jeden SWIFT Operator ist ein Index vorhanden, der angibt, zu welchem Zentroid der Operator zugeordnet ist.
        # ORDNUNG: Spaltenweise von oben nach unten und links nach rechts. Beispiel:
        # 1 4 7
        # 2 5 8
        # 3 6 9
        labels = np.reshape(labels,(len(np.unique(frames[:,1])),-1),order='F')
        return labels
Example #8
0
def rgsift(image):
    from skimage import img_as_float
    shaped_image = img_as_float(image)
    gray = rgb2gray(image)
    s = shaped_image.sum(axis=2)
    red = shaped_image[:, :, 0] / (s + 1e-5)
    green = shaped_image[:, :, 1] / (s + 1e-5)
    descs = []
    for channel in [gray, red, green]:
        loc, desc = vl_dsift(channel, step=4, size=6)
        descs.append(desc.T.copy())
    return loc, np.hstack(descs)
Example #9
0
def rgsift(image):
    from skimage import img_as_float
    shaped_image = img_as_float(image)
    gray = rgb2gray(image)
    s = shaped_image.sum(axis=2)
    red = shaped_image[:, :, 0] / (s + 1e-5)
    green = shaped_image[:, :, 1] / (s + 1e-5)
    descs = []
    for channel in [gray, red, green]:
        loc, desc = vl_dsift(channel, step=4, size=6)
        descs.append(desc.T.copy())
    return loc, np.hstack(descs)
	def extract_feature_vector(self):
		img = imread(self.image.get_path()) if self.memory == False else self.image
		imgResized = resize(img, (300,250))
		grayScaleImg = vl_rgb2gray(imgResized).astype('uint8')
		histEqualizedImage = equalizeHist(grayScaleImg)
		sizeOfSpatialBins = 8
		step = 10
		fast = False #if set to True it uses a flat window rather than a Gaussian window
		verbose = False
		norm = False
		bounds = -1
		frames ,descriptors = vl_dsift(histEqualizedImage,step,bounds,sizeOfSpatialBins,fast,verbose,norm)
		return descriptors.transpose().astype('float32')
Example #11
0
def sift_descriptors(images, dataset):
    descs = []
    coordinates = []
    print("computing sift descriptors")
    for f in images:
        print("processing image %s" % f)
        image = dataset.get_image(f)
        #coords, sift = rgsift(image)
        #tracer()
        gray_image = rgb2gray(image)
        coords, sift = vl_dsift(gray_image, step=3, size=4)
        #coords2, sift2 = vl_dsift(gray_image, step=3, size=8)
        #coords3, sift3 = vl_dsift(gray_image, step=3, size=16)
        #tracer()
        #sift = np.hstack([sift, sift2, sift3])
        #coords = np.hstack([coords, coords2, coords3])
        descs.append(sift.T)
        coordinates.append(coords)
    return descs, coordinates
Example #12
0
def sift_descriptors(images, dataset):
    descs = []
    coordinates = []
    print("computing sift descriptors")
    for f in images:
        print("processing image %s" % f)
        image = dataset.get_image(f)
        #coords, sift = rgsift(image)
        #tracer()
        gray_image = rgb2gray(image)
        coords, sift = vl_dsift(gray_image, step=3, size=4)
        #coords2, sift2 = vl_dsift(gray_image, step=3, size=8)
        #coords3, sift3 = vl_dsift(gray_image, step=3, size=16)
        #tracer()
        #sift = np.hstack([sift, sift2, sift3])
        #coords = np.hstack([coords, coords2, coords3])
        descs.append(sift.T)
        coordinates.append(coords)
    return descs, coordinates
def calculate_sift_descriptors(image,
                               step_size,
                               cell_size,
                               norm_threshold=0.0):
    """
    Calculate sift descriptors on single image.

    @param image: Image to extract sift descriptors from.
    @param step_size: Horizontal and vertical distance between keypoints in pixels.
    @param cell_size: Area size of one descriptor in pixels.
    @return: Coordinates and descriptors for each keypoint.
    """
    # we don't want to start directly in the corner of the page
    if cell_size == 8:
        off = 2.5
    elif cell_size == 5:
        off = 7.5
    else:
        off = 0.0

    frames, desc = vlfeat.vl_dsift(
        image,
        bounds=np.array((off, off, image.shape[0] - off, image.shape[1] - off),
                        'f'),
        step=step_size,
        size=cell_size,
        norm=(norm_threshold > 0.0))

    frames = frames.T
    desc = desc.T

    # throw away all descriptors with a magnitude < norm_threshold
    if norm_threshold > 0.0:
        norms = frames[:, 2]
        frames = np.array(
            [p[:2] for i, p in enumerate(frames) if norms[i] > norm_threshold])
        desc = np.array(
            [d for i, d in enumerate(desc) if norms[i] > norm_threshold])
    return frames, desc
def extract_RGB_SIFT_features(image, labels):
    n_sp = np.max(labels) + 1
    feat_descs = np.zeros((n_sp, 128 * 3))
    img_superpixel = np.zeros_like(labels, dtype='int')

    # extract SIFT features for each colour channel
    f = np.empty((3, ), dtype='object')
    d = np.empty((3, ), dtype='object')
    for n in range(3):
        f[n], d[n] = vl_dsift(image[..., n], size=1, float_descriptors=True)

    r = np.arange(f[0].shape[0])  # indices of all features

    # find feature desc nearest to centroid and fill in for each channel
    for i in range(n_sp):
        # get centroid of i'th superpixel
        img_superpixel[:] = labels == i
        c = regionprops(img_superpixel)[0].centroid

        # find nearest sift feature location to the centroid
        D = np.sum((f[0] - c)**2, axis=1)
        d_amin = D.argmin()

        # see how many are equally close
        equal_mask = D == D[d_amin]
        n_equal = np.count_nonzero(equal_mask)

        # if no draws, pick closest, else randomly pick from the equally closest
        idx = d_amin if n_equal == 1 else np.random.choice(r[equal_mask])

        # fill in the feature vector for each image channel
        for n in range(3):
            # pick out which bit of the feature vector we're in and fill it in
            j, k = n * 128, (1 + n) * 128
            feat_descs[i, j:k] = d[n][idx, :]

    return feat_descs
Example #15
0
def vl_phow(im,
            verbose=True,
            fast=True,
            sizes=[4, 6, 8, 10],
            step=2,
            color='rgb',
            floatdescriptors=False,
            magnif=6,
            windowsize=1.5,
            contrastthreshold=0.005):

    opts = Options(verbose, fast, sizes, step, color, floatdescriptors, magnif,
                   windowsize, contrastthreshold)
    dsiftOpts = DSiftOptions(opts)

    # make sure image is float, otherwise segfault
    im = array(im, 'float32')

    # Extract the features
    imageSize = shape(im)
    if im.ndim == 3:
        if imageSize[2] != 3:
            # "IndexError: tuple index out of range" if both if's are checked at the same time
            raise ValueError("Image data in unknown format/shape")
    if opts.color == 'gray':
        numChannels = 1
        if (im.ndim == 2):
            im = vl_rgb2gray(im)
    else:
        numChannels = 3
        if (im.ndim == 2):
            im = dstack([im, im, im])
        if opts.color == 'rgb':
            pass
        elif opts.color == 'opponent':
            # from https://github.com/vlfeat/vlfeat/blob/master/toolbox/sift/vl_phow.m
            # Note that the mean differs from the standard definition of opponent
            # space and is the regular intesity (for compatibility with
            # the contrast thresholding).
            # Note also that the mean is added pack to the other two
            # components with a small multipliers for monochromatic
            # regions.

            mu = 0.3 * im[:, :, 0] + 0.59 * im[:, :, 1] + 0.11 * im[:, :, 2]
            alpha = 0.01
            im = dstack([
                mu, (im[:, :, 0] - im[:, :, 1]) / sqrt(2) + alpha * mu,
                (im[:, :, 0] + im[:, :, 1] - 2 * im[:, :, 2]) / sqrt(6) +
                alpha * mu
            ])
        else:
            raise ValueError('Color option ' + str(opts.color) +
                             ' not recognized')
    if opts.verbose:
        print('{0}: color space: {1}'.format('vl_phow', opts.color))
        print('{0}: image size: {1} x {2}'.format('vl_phow', imageSize[0],
                                                  imageSize[1]))
        print('{0}: sizes: [{1}]'.format('vl_phow', opts.sizes))

    frames_all = []
    descrs_all = []
    for size_of_spatial_bins in opts.sizes:
        # from https://github.com/vlfeat/vlfeat/blob/master/toolbox/sift/vl_phow.m
        # Recall from VL_DSIFT() that the first descriptor for scale SIZE has
        # center located at XC = XMIN + 3/2 SIZE (the Y coordinate is
        # similar). It is convenient to align the descriptors at different
        # scales so that they have the same geometric centers. For the
        # maximum size we pick XMIN = 1 and we get centers starting from
        # XC = 1 + 3/2 MAX(OPTS.SIZES). For any other scale we pick XMIN so
        # that XMIN + 3/2 SIZE = 1 + 3/2 MAX(OPTS.SIZES).
        # In pracrice, the offset must be integer ('bounds'), so the
        # alignment works properly only if all OPTS.SZES are even or odd.

        off = floor(3.0 / 2 * (max(opts.sizes) - size_of_spatial_bins)) + 1

        # smooth the image to the appropriate scale based on the size
        # of the SIFT bins
        sigma = size_of_spatial_bins / float(opts.magnif)
        ims = vl_imsmooth(im, sigma)

        # extract dense SIFT features from all channels
        frames = []
        descrs = []
        for k in range(numChannels):
            size_of_spatial_bins = int(size_of_spatial_bins)
            # vl_dsift does not accept numpy.int64 or similar
            f_temp, d_temp = vl_dsift(data=ims[:, :, k],
                                      step=dsiftOpts.step,
                                      size=size_of_spatial_bins,
                                      fast=dsiftOpts.fast,
                                      verbose=dsiftOpts.verbose,
                                      norm=dsiftOpts.norm,
                                      bounds=[off, off, maxint, maxint])
            frames.append(f_temp)
            descrs.append(d_temp)
        frames = array(frames)
        descrs = array(descrs)
        d_new_shape = [descrs.shape[0] * descrs.shape[1], descrs.shape[2]]
        descrs = descrs.reshape(d_new_shape)
        # remove low contrast descriptors
        # note that for color descriptors the V component is
        # thresholded
        if (opts.color == 'gray') | (opts.color == 'opponent'):
            contrast = frames[0][2, :]
        elif opts.color == 'rgb':
            contrast = mean(
                [frames[0][2, :], frames[1][2, :], frames[2][2, :]], 0)
        else:
            raise ValueError('Color option ' + str(opts.color) +
                             ' not recognized')
        descrs[:, contrast < opts.contrastthreshold] = 0

        # save only x,y, and the scale
        frames_temp = array(frames[0][0:3, :])
        padding = array(size_of_spatial_bins * ones(frames[0][0].shape))
        frames_all.append(vstack([frames_temp, padding]))
        descrs_all.append(array(descrs))

    frames_all = hstack(frames_all)
    descrs_all = hstack(descrs_all)
    return frames_all, descrs_all
Example #16
0
def process_image(imagename, resultname='temp.sift', dense=False):    
    """ process an image and save the results in a .key ascii file"""
    #print "working on ", imagename
    if dense == False:
        if imagename[-3:] != 'pgm':
            #create a pgm file, image is resized, if it is too big.
            # sift returns an error if more than 8000 features are found
            size = (MAXSIZE, MAXSIZE)
            im = Image.open(imagename).convert('L')
            im.thumbnail(size, Image.ANTIALIAS)
            im.save('tmp.pgm')
            imagename = 'tmp.pgm'
        
        #check if linux or windows 
        if os.name == "posix":
            cmmd = "./sift < " + imagename + " > " + resultname
        else:
            cmmd = "siftWin32 < " + imagename + " > " + resultname
        
        # run extraction command
        returnvalue = subprocess.call(cmmd, shell=True)
        if returnvalue == 127:
            os.remove(resultname) # removing empty resultfile created by output redirection
            raise IOError("SIFT executable not found")
        if returnvalue == 2:
            os.remove(resultname) # removing empty resultfile created by output redirection
            raise IOError("image " + imagename + " not found")            
        if os.path.getsize(resultname) == 0:
            raise IOError("extracting SIFT features failed " + resultname)

    else:
        import vlfeat

        # defines how dense the grid is
        size = (150, 150)
        step = 10
        
        im = Image.open(imagename).resize(size, Image.ANTIALIAS)
        im_array = numpy.asarray(im)
        if im_array.ndim == 3:
            im_gray = vlfeat.vl_rgb2gray(im_array)
        elif im_array.ndim == 2:
            im_gray = im_array
        else:
            raise IOError("Not enough dims found in image " + resultname)
        

        locs, int_descriptors = vlfeat.vl_dsift(im_gray, step=step, verbose=VERBOSE)
        nfeatures = int_descriptors.shape[1]
        padding = numpy.zeros((2, nfeatures))
        locs = numpy.vstack((locs, padding))
        header = ' '.join([str(nfeatures), str(128)])
        temp = int_descriptors.astype('float')  # convert descriptors to float
        descriptors = temp[:]
        with open(resultname, 'wb') as f:
            cPickle.dump([locs.T, descriptors.T], f, protocol=cPickle.HIGHEST_PROTOCOL)
        print "features saved in", resultname
        if WRITE_VERBOSE:
            with open(resultname, 'w') as f:
                f.write(header)
                f.write("\n")
                for i in range(nfeatures):
                    f.write(' '.join(map(str, locs[:, i])))
                    f.write("\n")
                    f.write(' '.join(map(str, descriptors[:, i])))
                    f.write("\n")
Example #17
0
    def test_all(self):
        img = numpy.array(Image.open('roofs1.jpg'))

        # Test rgb2gray
        img_gray = rgb2gray(img)
        self.assertEqual(tuple(img_gray.shape), (478, 640))
        self.assertTrue(
            numpy.allclose(img_gray[:4, :4],
                           numpy.array([[0.8973, 0.8973, 0.8973, 0.9052],
                                        [0.8973, 0.8973, 0.8973, 0.9052],
                                        [0.8973, 0.8973, 0.9021, 0.9061],
                                        [0.9013, 0.9013, 0.9061, 0.9100]]),
                           atol=1e-4))
        if os.path.exists("img_gray.txt"):
            expected = numpy.loadtxt("img_gray.txt", delimiter='\t')
            self.assertTrue(numpy.allclose(img_gray, expected, atol=1e-4))

        # Test vl_imsmooth
        binsize = 8
        magnif = 3
        img_smooth = vlfeat.vl_imsmooth(img_gray,
                                        math.sqrt((binsize / magnif)**2 -
                                                  0.25),
                                        verbose=True)
        self.assertEqual(tuple(img_smooth.shape), (478, 640))
        self.assertTrue(
            numpy.allclose(img_smooth[:4, :4],
                           numpy.array([[0.8998, 0.9013, 0.9034, 0.9057],
                                        [0.9000, 0.9015, 0.9035, 0.9057],
                                        [0.9002, 0.9017, 0.9036, 0.9057],
                                        [0.9005, 0.9020, 0.9038, 0.9058]]),
                           atol=1e-4))
        if os.path.exists("img_smooth.txt"):
            expected = numpy.loadtxt("img_smooth.txt", delimiter='\t')
            self.assertTrue(numpy.allclose(img_smooth, expected, atol=1e-4))

        # Test vl_dsift
        frames, descrs = vlfeat.vl_dsift(img_smooth,
                                         size=binsize,
                                         verbose=True)
        frames = numpy.add(frames.transpose(), 1)
        descrs = descrs.transpose()
        self.assertEqual(tuple(frames.shape), (2, 279664))
        self.assertEqual(tuple(descrs.shape), (128, 279664))
        self.assertTrue(
            numpy.allclose(frames[:, :4],
                           numpy.array([[13, 13, 13, 13], [13, 14, 15, 16]])))
        self.assertTrue(
            numpy.allclose(
                descrs[:, 0],
                numpy.array([
                    134, 35, 0, 0, 0, 0, 0, 5, 109, 9, 1, 0, 0, 0, 0, 61, 7, 2,
                    32, 21, 0, 0, 1, 28, 2, 13, 111, 9, 0, 0, 0, 2, 33, 134,
                    131, 0, 0, 0, 0, 0, 30, 92, 134, 0, 0, 0, 0, 19, 11, 42,
                    134, 0, 0, 0, 1, 31, 6, 20, 124, 3, 0, 0, 0, 7, 5, 134,
                    134, 0, 0, 0, 0, 0, 1, 94, 134, 0, 0, 0, 0, 0, 0, 34, 134,
                    1, 0, 0, 0, 0, 0, 4, 134, 13, 0, 2, 2, 0, 27, 53, 15, 0, 0,
                    0, 0, 1, 11, 48, 27, 2, 0, 0, 0, 0, 0, 5, 28, 16, 1, 0, 0,
                    0, 0, 2, 13, 16, 4, 5, 4, 0
                ])))
        if os.path.exists("dsift_frames.txt"):
            expected = numpy.loadtxt("dsift_frames.txt", delimiter='\t')
            self.assertTrue(numpy.allclose(frames, expected))
        if os.path.exists("dsift_descrs.txt"):
            expected = numpy.loadtxt("dsift_descrs.txt", delimiter='\t')
            self.assertTrue(
                numpy.linalg.norm(expected - descrs) < 28)  # rounding errors?

        # Test vl_kmeans
        centers, assigns = vlfeat.vl_kmeans(numpy.array(
            [[1], [2], [3], [10], [11], [12]], dtype='f'),
                                            2,
                                            ret_quantize=True,
                                            verbose=True)
        self.assertTrue(numpy.allclose(centers, numpy.array([[2], [11]])))
        self.assertTrue(
            numpy.allclose(assigns, numpy.array([0, 0, 0, 1, 1, 1])))

        centers, assigns = vlfeat.vl_kmeans(numpy.array(
            [[1, 0], [2, 0], [3, 0], [10, 1], [11, 1], [12, 1]], dtype='f'),
                                            2,
                                            ret_quantize=True)
        self.assertTrue(numpy.allclose(centers,
                                       numpy.array([[11, 1],
                                                    [2,
                                                     0]])))  # order swapped?
        self.assertTrue(
            numpy.allclose(assigns, numpy.array([1, 1, 1, 0, 0, 0])))

        # Test vl_gmm
        if os.path.exists("gmm_data.txt"):
            data = numpy.loadtxt("gmm_data.txt", delimiter='\t').transpose()
        else:
            data = numpy.random.rand(5000, 2)

        means, covariances, priors, ll, posteriors = vlfeat.vl_gmm(
            data, 30, verbose=True, ret_loglikelihood=True, ret_posterior=True)
        self.assertEqual(tuple(means.shape), (30, 2))
        self.assertEqual(tuple(covariances.shape), (30, 2))
        self.assertEqual(tuple(priors.shape), (30, ))
        self.assertEqual(tuple(posteriors.shape), (5000, 30))

        if os.path.exists("gmm_means.txt"):
            expected = numpy.loadtxt("gmm_means.txt",
                                     delimiter='\t').transpose()
            self.assertTrue(numpy.allclose(means, expected, atol=1e-4))

        if os.path.exists("gmm_covariances.txt"):
            expected = numpy.loadtxt("gmm_covariances.txt",
                                     delimiter='\t').transpose()
            self.assertTrue(numpy.allclose(covariances, expected, atol=1e-4))

        if os.path.exists("gmm_priors.txt"):
            expected = numpy.loadtxt("gmm_priors.txt",
                                     delimiter='\t').transpose()
            self.assertTrue(numpy.allclose(priors, expected, atol=1e-4))

        if os.path.exists("gmm_posteriors.txt"):
            expected = numpy.loadtxt("gmm_posteriors.txt",
                                     delimiter='\t').transpose()
            self.assertTrue(numpy.allclose(posteriors, expected, atol=1e-3))

        # Test vl_fisher
        if os.path.exists("fisher_data.txt"):
            data = numpy.loadtxt("fisher_data.txt", delimiter='\t').transpose()
        else:
            data = numpy.random.rand(1000, 2)

        encoding = vlfeat.vl_fisher(data,
                                    means,
                                    covariances,
                                    priors,
                                    verbose=True)
        self.assertEqual(tuple(encoding.shape), (120, ))

        if os.path.exists("fisher_encoding.txt"):
            expected = numpy.loadtxt("fisher_encoding.txt",
                                     delimiter='\t').transpose()
            self.assertTrue(numpy.allclose(encoding, expected, atol=1e-4))
Example #18
0
def block_feat(bk='', *varargin):
    if bk == '':
        bk = bkinit('feat', 'db')
        bk['fetch'] = fetch__
        bk['rand_send'] = []
        bk['detector'] = 'sift'
        bk['descriptor'] = 'siftnosmooth'
        bk['ref_size'] = []
        bk['min_sigma'] = 0;
        bk['max_num'] = np.inf
        bk['rescale'] = 6
        return bk
    
    ############################
    # check/load inputs
    bk, dirty = bkbegin(bk)
    if not dirty:
        print('block_feat not dirty')
        return bk

    db = bkfetch(bk['db']['tag'], 'db')
    
    reduce_ = True
    ##############################
    #
    for seg in db['segs']:
        print('process %s'%(seg['path']))
        ##############################
        # preprocess
        Iorig = imread(os.path.join(db['images_path'], seg['path']))
        I = img_as_float(Iorig)
        M,N,K = I.shape
        if len(bk['ref_size']):
            rho = bk.ref_size / np.max(M,N)
        else:
            rho = 1;
        
        Icolor = I
        
        ##############################
        # Detector

        #############################
        # frame selector


        ###############################
        # Descriptor
        if bk['descriptor'] == 'dsift-color':
            
            RGB=(Icolor.sum(2)+np.finfo(float).eps)
            Irgb=Icolor / np.stack((RGB,RGB,RGB),2)

            
            fr,dr=vlfeat.vl_dsift(Irgb[:,:,0],size=bk['dsift_size'],step=bk['dsift_step'],fast=True,norm=True)
            fr=fr.transpose();
            dr=dr.transpose();

            fg,dg=vlfeat.vl_dsift(Irgb[:,:,1],size=bk['dsift_size'],step=bk['dsift_step'],fast=True,norm=True)
            fg=fg.transpose()
            dg=dg.transpose()
        
            d=np.concatenate((dr,dg),axis=0)
            keep1=(fr[2,:]>bk['dsift_minnorm']) | (fg[2,:]>bk['dsift_minnorm'])

            f=fr[0:2,:]
            f=f[:,keep1]
            d=d[:,keep1]

            sigma=bk['dsift_size']*4/6;
            f=np.concatenate((f,
                              sigma*np.ones((1,f.shape[1])) ,
                              np.pi*np.ones((1,f.shape[1]))),axis=0)
            #rescale=6
            R=f[3,:]*bk['rescale']
            keep2=(f[0,:]-R>=0)&(f[0,:]+R<=N-1) & (f[1,:]-R>=0) &( f[1,:]+R<=M-1)
            f=f[:,keep2]
            d=d[:,keep2]
    

        else:
            print('unkonw descriptor')

        ##############################
        # pose process
        f[0:2,:]=(f[0:2,:]-1)/rho+1;
        f[2,:]=f[2,:]/rho;
        #############################
        # save
        path_d = os.path.join(glb.wrd['prefix'],'data','%05d.d.pkl'%(seg['seg']))
        path_f = os.path.join(glb.wrd['prefix'],'data','%05d.f.pkl'%(seg['seg']))
        pickle.dump(f, open(path_f,'wb'))
        pickle.dump(d, open(path_d,'wb'))

    ###################################
    if reduce_==True:
        bk = bkend(bk)    


    return bk
def process_image(imagename, resultname='temp.sift', dense=False):
    """ process an image and save the results in a .key ascii file"""
    print "working on ", imagename
    if dense == False:
        if imagename[-3:] != 'pgm':
            #create a pgm file, image is resized, if it is too big.
            # sift returns an error if more than 8000 features are found
            size = (MAXSIZE, MAXSIZE)
            im = Image.open(imagename).convert('L')
            im.thumbnail(size, Image.ANTIALIAS)
            im.save('tmp.pgm')
            imagename = 'tmp.pgm'

        #check if linux or windows
        if os.name == "posix":
            cmmd = "./sift <" + imagename + ">" + resultname
            print cmmd
        else:
            cmmd = "siftWin32 <" + imagename + ">" + resultname

        os.system(cmmd)
        if os.path.getsize(resultname) == 0:
            raise IOError("extracting SIFT features failed " + resultname)
        #print 'processed', imagename
    else:
        import vlfeat

        # like in pinto2008 why is vision hard
        size = (150, 150)
        window_size = 8
        step = 10

        im = Image.open(imagename).resize(size, Image.ANTIALIAS)
        im_array = numpy.asarray(im)
        if im_array.ndim == 3:
            im_gray = vlfeat.vl_rgb2gray(im_array)
        elif im_array.ndim == 2:
            im_gray = im_array
        else:
            raise IOError("Not enough dims found in image " + resultname)

        #locs,int_descriptors = vlfeat.vl_dsift(im_gray,size=window_size,verbose=VERBOSE)
        locs, int_descriptors = vlfeat.vl_dsift(im_gray,
                                                step=step,
                                                verbose=VERBOSE)
        nfeatures = int_descriptors.shape[1]
        padding = numpy.zeros((2, nfeatures))
        locs = numpy.vstack((locs, padding))
        header = ' '.join([str(nfeatures), str(128)])
        temp = int_descriptors.astype('float')  # convert descriptors to float
        descriptors = temp[:]
        with open(resultname, 'wb') as f:
            cPickle.dump([locs.T, descriptors.T],
                         f,
                         protocol=cPickle.HIGHEST_PROTOCOL)
        print "features saved in", resultname
        if WRITE_VERBOSE:
            with open(resultname, 'w') as f:
                f.write(header)
                f.write("\n")
                for i in range(nfeatures):
                    f.write(' '.join(map(str, locs[:, i])))
                    f.write("\n")
                    f.write(' '.join(map(str, descriptors[:, i])))
                    f.write("\n")
from vlfeat import vl_dsift
import cv2
import numpy as np
img = cv2.imread('../data/face.jpg')
img = cv2.resize(img, (640, 480))

gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

gray = gray.astype('float32')

print type(gray)
print gray.shape
import time
start = time.time()
frame, desc = vl_dsift(gray, 8, 16, fast=True, verbose=True)
print 'romputing the dsift of the full image took {}'.format(time.time() -
                                                             start)

import numpy as np

a = np.transpose(desc)

print a.shape[0] * a.shape[1]
Example #21
0
def dsift(im,
            verbose=True,
            fast=True,
            sizes=[4, 6, 8, 10],
            step=2,
            color='rgb',
            floatdescriptors=False,
            magnif=6,
            windowsize=1.5,
            contrastthreshold=0.005):

    opts = Options(verbose, fast, sizes, step, color, floatdescriptors,
                   magnif, windowsize, contrastthreshold)
    dsiftOpts = DSiftOptions(opts)

    # make sure image is float, otherwise segfault
    im = array(im, 'float32')

    # Extract the features
    imageSize = shape(im)
    if im.ndim == 3:
        if imageSize[2] != 3:
            # "IndexError: tuple index out of range" if both if's are checked at the same time
            raise ValueError("Image data in unknown format/shape")
    if opts.color == 'gray':
        numChannels = 1
        if (im.ndim == 2):
            im = vl_rgb2gray(im)
    else:
        numChannels = 3
        if (im.ndim == 2):
            im = dstack([im, im, im])
        if opts.color == 'rgb':
            pass
        elif opts.color == 'opponent':
             # from https://github.com/vlfeat/vlfeat/blob/master/toolbox/sift/vl_phow.m
             # Note that the mean differs from the standard definition of opponent
             # space and is the regular intesity (for compatibility with
             # the contrast thresholding).
             # Note also that the mean is added pack to the other two
             # components with a small multipliers for monochromatic
             # regions.

            mu = 0.3 * im[:, :, 0] + 0.59 * im[:, :, 1] + 0.11 * im[:, :, 2]
            alpha = 0.01
            im = dstack([mu,
                         (im[:, :, 0] - im[:, :, 1]) / sqrt(2) + alpha * mu,
                         (im[:, :, 0] + im[:, :, 1] - 2 * im[:, :, 2]) / sqrt(6) + alpha * mu])
        else:
            raise ValueError('Color option ' + str(opts.color) + ' not recognized')
    if opts.verbose:
	great='great'
        #print('{0}: color space: {1}'.format('vl_phow', opts.color))
        #print('{0}: image size: {1} x {2}'.format('vl_phow', imageSize[0], imageSize[1]))
        #print('{0}: sizes: [{1}]'.format('vl_phow', opts.sizes))

    frames_all = []
    descrs_all = []
    for size_of_spatial_bins in opts.sizes:
        # from https://github.com/vlfeat/vlfeat/blob/master/toolbox/sift/vl_phow.m
        # Recall from VL_DSIFT() that the first descriptor for scale SIZE has
        # center located at XC = XMIN + 3/2 SIZE (the Y coordinate is
        # similar). It is convenient to align the descriptors at different
        # scales so that they have the same geometric centers. For the
        # maximum size we pick XMIN = 1 and we get centers starting from
        # XC = 1 + 3/2 MAX(OPTS.SIZES). For any other scale we pick XMIN so
        # that XMIN + 3/2 SIZE = 1 + 3/2 MAX(OPTS.SIZES).
        # In pracrice, the offset must be integer ('bounds'), so the
        # alignment works properly only if all OPTS.SZES are even or odd.

        off = floor(3.0 / 2 * (max(opts.sizes) - size_of_spatial_bins)) + 1

        # smooth the image to the appropriate scale based on the size
        # of the SIFT bins
        sigma = size_of_spatial_bins / float(opts.magnif)
        ims = vl_imsmooth(im, sigma)

        # extract dense SIFT features from all channels
        frames = []
        descrs = []
        for k in range(numChannels):
            size_of_spatial_bins = int(size_of_spatial_bins)
            # vl_dsift does not accept numpy.int64 or similar
            f_temp, d_temp = vl_dsift(data=ims[:, :, k],
                                      step=dsiftOpts.step,
                                      size=size_of_spatial_bins,
                                      fast=dsiftOpts.fast,
                                      verbose=dsiftOpts.verbose,
                                      norm=dsiftOpts.norm,
                                      bounds=[off, off, maxint, maxint])
            frames.append(f_temp)
            descrs.append(d_temp)
        frames = array(frames)
        descrs = array(descrs)
        d_new_shape = [descrs.shape[0] * descrs.shape[1], descrs.shape[2]]
        descrs = descrs.reshape(d_new_shape)
        # remove low contrast descriptors
        # note that for color descriptors the V component is
        # thresholded
        if (opts.color == 'gray') | (opts.color == 'opponent'):
            contrast = frames[0][2, :]
        elif opts.color == 'rgb':
            contrast = mean([frames[0][2, :], frames[1][2, :], frames[2][2, :]], 0)
        else:
            raise ValueError('Color option ' + str(opts.color) + ' not recognized')
        descrs[:, contrast < opts.contrastthreshold] = 0

        # save only x,y, and the scale
        frames_temp = array(frames[0][0:3, :])
        padding = array(size_of_spatial_bins * ones(frames[0][0].shape))
        frames_all.append(vstack([frames_temp, padding]))
        descrs_all.append(array(descrs))


    frames_all = hstack(frames_all)
    descrs_all = hstack(descrs_all)
    return frames_all, descrs_all
Example #22
0
def denseSift(imgIn,verbose=False,border=[10,10],visu=False,dtype=numpy.float32,scale=None):
    def  extendBorder(imgIn,borderSize,visu=False):
        img=imgIn
        oldShape = img.shape
        flipUd   = numpy.fliplr(img)
        imgUd_U  = flipUd[:,oldShape[1]-borderSize[1]:oldShape[1]]
        imgUd_D  = flipUd[:, 0: borderSize[1]]
        assert imgUd_U.shape[1]==borderSize[1]
        assert imgUd_D.shape[1]==borderSize[1]

        img = numpy.concatenate([imgUd_U,img,imgUd_D],axis=1)

        oldShape = img.shape
        flipLr   = numpy.flipud(img)
        imgLr_R  = flipLr[oldShape[0]-borderSize[0]:oldShape[0],:]
        imgLr_L  = flipLr[0: borderSize[0],:]
        assert imgLr_R.shape[0]==borderSize[0]
        assert imgLr_L.shape[0]==borderSize[0]

        img = numpy.concatenate([imgLr_R,img,imgLr_L],axis=0)

        if visu:
            plt.imshow(img.T, cmap = cm.Greys_r)
            plt.show()
        
        return img
    def frameToCoordinate(F):
        cX = F[1,:]
        cY = F[0,:]
        cX = numpy.floor(cX)
        cY = numpy.floor(cY)
        cX = cX.astype(numpy.int32)
        cY = cY.astype(numpy.int32)
        coords = (cX,cY)
        return coords

    def checkCovering(shape,border,coord):
        covering = numpy.zeros(shape)
        covering[coord] = 1

        coveringSub  = covering[border[0]:shape[0]-border[0] ,border[1]:shape[1]-border[1] ]
        whereZero    = numpy.where(coveringSub==0)

        total  = coveringSub.shape[0]*coveringSub.shape[1]
        nZeros = len(whereZero[0])
        nOnes  = total-nZeros
        return nZeros==0

    #imgIn = numpy.squeeze(vigra.filters.laplacianOfGaussian(imgIn,1.0))

    if scale is not None :
        shapeOrg = [dx,dy] =imgIn.shape[0:2]
        newShape  = [  int(float(dx)*scale) ,int(float(dy)*scale)  ]
        imgIn = vigra.sampling.resize(imgIn,newShape)

    if imgIn.ndim == 3 :
        imgIn = numpy.sum(imgIn,axis=2)

    srcShape = imgIn.shape
    # extend image with mirror boundary condions
    img      = extendBorder(imgIn,border)
    img      = numpy.require(img,dtype=numpy.float32)
    # shape is bigger than src shape since the image has 
    # been extented by "extendBorder" 
    shape    = img.shape 
    
    # call *EXTERNAL* code for dense sift
    F,D=siftRes=vlfeat.vl_dsift(
        img, 
        step=-1, 
        bounds=numpy.zeros(1, 'f'), 
        size=-1, 
        fast=True, 
        verbose=verbose, 
        norm=False
    )
    # number of desc (128 default)
    numDescriptors  = D.shape[0]
    featureImg      = numpy.ones([shape[0],shape[1],numDescriptors],dtype=dtype)
    coords          = frameToCoordinate(F)

    # check that each pixel is covered
    assert checkCovering(shape,border,coords)

    # MAKE ME FAST!!!!
    # write results in one dense array
    for fi in xrange(numDescriptors):
        if verbose : print fi,",",numDescriptors,"(make me faster!!)"
        #featureImg_fi = featureImg[:,:,fi]
        featureImg[coords[0],coords[1],fi] = D[fi,:]

    # UN-extend image
    featureImgSrcShape  = featureImg[border[0]:border[0]+srcShape[0],border[1]:border[1]+srcShape[1],:].copy()    
    assert featureImgSrcShape.shape[0] == srcShape[0]
    assert featureImgSrcShape.shape[1] == srcShape[1]
    assert featureImgSrcShape.shape[2] == numDescriptors

    # visualize results ?
    if visu:
        for fi in xrange(numDescriptors):
            print fi
            plt.imshow(featureImgSrcShape[:,:,fi].T, cmap = cm.Greys_r)
            plt.show()


    if scale is not None :
        featureImgSrcShape = vigra.sampling.resize(featureImgSrcShape,shapeOrg+(featureImgSrcShape.shape[2],))
    return featureImgSrcShape
Example #23
0
 def calculate_visual_words_for_document(self, document_image_filename, visualize = False):
     image = Image.open(document_image_filename)
     # Fuer spaeter folgende Verarbeitungsschritte muss das Bild mit float32-Werten vorliegen. 
     im_arr = np.asarray(image, dtype='float32')
     # Die colormap legt fest wie die Intensitaetswerte interpretiert werden.
     if visualize:
         plt.imshow(im_arr, cmap=cm.get_cmap('Greys_r'))
         plt.show()
     
     
     print "Berechne Sift Deskriptoren"
     # SIFT Deskriptoren berechnen
     frames, desc = vlfeat.vl_dsift(im_arr, step=self.step_size, size=self.cell_size)
 #     pickle_densesift_fn = '2700270-small_dense-%d_sift-%d_descriptors.p' % (step_size, cell_size)
 #     frames, desc = pickle.load(open(pickle_densesift_fn, 'rb'))
     frames = frames.T
     desc = desc.T
 
     # 
     # Um eine Bag-of-Features Repraesentation des Bilds zu erstellen, wird ein
     # Visual Vocabulary benoetigt. Im Folgenden wird es in einer Clusteranalyse
     # berechnet. Fuer die Clusteranalyse wird Lloyd's Version des k-means Algorithmus
     # verwendet. Parameter sind
     # - die Anzahl der Centroiden in der Clusteranalyse (n_centroids). Das entspricht 
     # der Groesse des Visual Vocabulary bzw. der Anzahl von Visual Words. 
     # - Der Anzahl von Durchlaeufen des Algorithmus (iter)
     # - Der Initialisierung (minit). Der Wert 'points' fuehrt zu einer zufaelligen
     #   Auswahl von gegebenen Datenpunkten, die als initiale Centroiden verwendet
     #   werden.
     # Die Methode gibt zwei NumPy Arrays zurueck: 
     #  - Das sogenannte Codebuch. Eine zeilenweise organisierte Matrix mit Centroiden (hier nicht verwendet).
     #  - Einen Vektor mit einem Index fuer jeden Deskriptor in desc. Der Index bezieht
     #    sich auf den aehnlichsten Centroiden aus dem Codebuch (labels).
     #
     # Die Abbildung von Deskriptoren auf Centroiden (Visual Words) bezeichnet man als Quantisierung.
     
     print "Berechne Visual Words"
     centroids, labels = kmeans2(desc.astype(float), self.n_centroids, minit='points')
     print "Berechnung Visual Words abgeschlossen."
     #
     # Die Deskriptoren und deren Quantisierung werden nun visualisiert. Zu jedem 
     # Deskriptor werden dazu die Mittelpunkte und die 4x4 Zellen eingezeichnet.
     # Die Farbe des Mittelpunkts codiert den Index des Visual Words im Visual Vocabulary
     # (Codebuch). Beachten Sie, dass durch diese Kodierung einige Farben sehr 
     # aehnlich sein koennen. 
     # Da das Zeichnen der 4x4 Zellen fuer jeden Deskriptor viel Performance kosten
     # kann, ist es moeglich es ueber das Flag draw_descriptor_cells abzuschalten.
     #
     if visualize:
         draw_descriptor_cells = True
         fig = plt.figure()
         ax = fig.add_subplot(111)
         ax.imshow(im_arr, cmap=cm.get_cmap('Greys_r'))
         ax.hold(True)
         ax.autoscale(enable=False)
         colormap = cm.get_cmap('jet')
         desc_len = self.cell_size * 4
         for (x, y), label in zip(frames, labels):
             color = colormap(label / float(self.n_centroids))
             circle = Circle((x, y), radius=1, fc=color, ec=color, alpha=1)
             rect = Rectangle((x - desc_len / 2, y - desc_len / 2), desc_len, desc_len, alpha=0.08, lw=1)
             ax.add_patch(circle)
             if draw_descriptor_cells:
                 for p_factor in [0.25, 0.5, 0.75]:
                     offset_dyn = desc_len * (0.5 - p_factor)
                     offset_stat = desc_len * 0.5
                     line_h = Line2D((x - offset_stat, x + offset_stat), (y - offset_dyn, y - offset_dyn), alpha=0.08, lw=1)
                     line_v = Line2D((x - offset_dyn , x - offset_dyn), (y - offset_stat, y + offset_stat), alpha=0.08, lw=1)
                     ax.add_line(line_h)
                     ax.add_line(line_v)
             ax.add_patch(rect)
         
         plt.show()
     
     # Centroids: Eine Liste von Zentroiden (Auch SWIFT Operatoren!)
     # Labels: Fuer jeden SWIFT Operator ist ein Index vorhanden, der angibt, zu welchem Zentroid der Operator zugeordnet ist.
     # ORDNUNG: Spaltenweise von oben nach unten und links nach rechts. Beispiel:
     # 1 4 7
     # 2 5 8
     # 3 6 9
     self.centroids = centroids
     labels = np.reshape(labels,(len(np.unique(frames[:,1])),-1),order='F')
     return centroids, labels
Example #24
0
def parallelImageProcessing(patchSizeTraining, patchStepTraining, \
                             scaleList, testPredictFolder, svmWeights, feStandardized, enableMSER, \
                             imgPath): 
    start = time.time()
    dirName, fName = os.path.split(imgPath)  
    fName = fName.split('.')[0]
    dirName = dirName.split('/')[-1]
    print 'Starting processing Image - ', imgPath
    imageBatchSize = 500
    minT  = 0
    # save file names
    predictFileName = os.path.join(testPredictFolder, 'MaxPredict_' + dirName + '_' + fName + '.npy')
    meanFeatureValues =  feStandardized[0]
    stdDevFeatureValues =  feStandardized[1]
    if(os.path.isfile(predictFileName)):
        print 'Data already present'  
        imageArray = np.round(imread(imgPath, flatten=True)).astype(np.uint8)
        maxPredictImage = np.load(predictFileName)            
    else:
        # open the image and load the image as array
        imageArray = np.round(imread(imgPath, flatten=True)).astype(np.uint8)
        orgImageSize = imageArray.shape
        if (enableMSER == True):
            delta = 5
            minArea = 30
            maxArea = 90000
            maxVariation = 0.2
            minDiversity = 0.1
            # 1 column - width 2nd column height
            bboxesDetected = mserHelper.detectMSERBboxes(imgPath, delta, minArea, 
                                                         maxArea, maxVariation, minDiversity)
            numOfScales = bboxesDetected.shape[0]
            print 'Number of scales detected by MSER  in image - ', fName, ' - ', numOfScales
            if(numOfScales == 0):
                bboxesDetected = [patchSizeTraining[0],patchSizeTraining[1]]
        else:
            numOfScales = len(scaleList)
        maxPredictImage = np.ones(imageArray.shape)*(-np.Inf)
        for scaleRun in xrange(numOfScales):
            if(enableMSER == True):
                # 1 column - width 2nd column height
                curBBoxes = bboxesDetected[scaleRun,:]
                widthRatio = float(patchSizeTraining[1])/curBBoxes[0]
                heightRatio = float(patchSizeTraining[0])/curBBoxes[1]
                rescaledImgSize = (int(orgImageSize[0]*heightRatio), int(orgImageSize[1]*widthRatio))
                rescaledImage = imresize(imageArray, rescaledImgSize, interp = 'bicubic')
                print 'Computing the response for image ', fName, ' size ', rescaledImage.shape
            else:
                curScale = scaleList[scaleRun]
                print 'Computing the response for image ', fName, ' at scale ', curScale
                # rescale the image based on ratio given
                rescaledImage = imresize(imageArray, curScale, interp = 'bicubic')
                widthRatio = curScale
                heightRatio = curScale
            if((rescaledImage.shape[0] >= 32) and (rescaledImage.shape[1] >=32)):
                txtPredictions = np.zeros((rescaledImage.shape[0]-24,rescaledImage.shape[1]-24))
                for rowImageRun in np.arange(0, rescaledImage.shape[0]-24, imageBatchSize-24):
                    for colImageRun in np.arange(0, rescaledImage.shape[1]-24, imageBatchSize-24):
                        curImage = rescaledImage[rowImageRun:rowImageRun+imageBatchSize, colImageRun:colImageRun+imageBatchSize]
                        frames, siftArray = vlfeat.vl_dsift(curImage.astype(np.float32), size = 8, step = 1 ,verbose = False)
                        siftArray = siftArray.T.astype(np.float32)
                        siftArray -= meanFeatureValues
                        siftArray /= stdDevFeatureValues
                        frames = frames.T
                        minColumn = np.min(frames[:,0])
                        maxColumn = np.max(frames[:,0])
                        minRow = np.min(frames[:,1])
                        maxRow = np.max(frames[:,1])
                        # +1 is beacause of zero indexing
                        numColumns = maxColumn - minColumn + 1
                        numRows  = maxRow -minRow + 1
                        siftArray = np.hstack((siftArray, np.ones((siftArray.shape[0],1))))
                        predict = np.dot(siftArray, svmWeights)
                        predictPart = predict[:,0].reshape(numColumns,numRows).T
                        txtPredictions[rowImageRun:rowImageRun+numRows,\
                                        colImageRun:colImageRun+numColumns] = predictPart
                arrIndex = np.ndindex(txtPredictions.shape[0], txtPredictions.shape[1])
                for predictRun in arrIndex:
                    eachPredict = txtPredictions[predictRun[0], predictRun[1]]
                    minT = np.minimum(minT, eachPredict)
                    eachRowCord = np.floor((predictRun[0]*patchStepTraining[0])/heightRatio)
                    eachColCord = np.floor((predictRun[1]*patchStepTraining[1])/widthRatio)
                    eachRowCordEnd = eachRowCord + np.floor(patchSizeTraining[0]/heightRatio)
                    eachColCordEnd = eachColCord + np.floor(patchSizeTraining[1]/widthRatio)
                    predPart = maxPredictImage[eachRowCord:eachRowCordEnd, eachColCord:eachColCordEnd]
                    maxPredictImage[eachRowCord:eachRowCordEnd, eachColCord:eachColCordEnd] = np.maximum(predPart, eachPredict)
        # save the response
        maxPredictImage[maxPredictImage==-np.Inf] = minT
        np.save(predictFileName, maxPredictImage)
    end = time.time()
    print  'Finished processing  - ', imgPath, ' seconds - ', end-start
Example #25
0
    def compute(self, image, step, size):
        kp, desc = vl_dsift(image, step=step, size=size, fast=True)

        return kp, desc
def process_image(imagename, resultname="temp.sift", dense=False):
    """ process an image and save the results in a .key ascii file"""
    print "working on ", imagename
    if dense == False:
        if imagename[-3:] != "pgm":
            # create a pgm file, image is resized, if it is too big.
            # sift returns an error if more than 8000 features are found
            size = (MAXSIZE, MAXSIZE)
            im = Image.open(imagename).convert("L")
            im.thumbnail(size, Image.ANTIALIAS)
            im.save("tmp.pgm")
            imagename = "tmp.pgm"

        # check if linux or windows
        if os.name == "posix":
            cmmd = "./sift < " + imagename + " > " + resultname
        else:
            cmmd = "siftWin32 < " + imagename + " > " + resultname

        # run extraction command
        returnvalue = subprocess.call(cmmd, shell=True)
        if returnvalue == 127:
            os.remove(resultname)  # removing empty resultfile created by output redirection
            raise IOError("SIFT executable not found")
        if returnvalue == 2:
            os.remove(resultname)  # removing empty resultfile created by output redirection
            raise IOError("image " + imagename + " not found")
        if os.path.getsize(resultname) == 0:
            raise IOError("extracting SIFT features failed " + resultname)

    else:
        import vlfeat

        # defines how dense the grid is
        size = (150, 150)
        step = 10

        im = Image.open(imagename).resize(size, Image.ANTIALIAS)
        im_array = numpy.asarray(im)
        if im_array.ndim == 3:
            im_gray = vlfeat.vl_rgb2gray(im_array)
        elif im_array.ndim == 2:
            im_gray = im_array
        else:
            raise IOError("Not enough dims found in image " + resultname)

        locs, int_descriptors = vlfeat.vl_dsift(im_gray, step=step, verbose=VERBOSE)
        nfeatures = int_descriptors.shape[1]
        padding = numpy.zeros((2, nfeatures))
        locs = numpy.vstack((locs, padding))
        header = " ".join([str(nfeatures), str(128)])
        temp = int_descriptors.astype("float")  # convert descriptors to float
        descriptors = temp[:]
        with open(resultname, "wb") as f:
            cPickle.dump([locs.T, descriptors.T], f, protocol=cPickle.HIGHEST_PROTOCOL)
        print "features saved in", resultname
        if WRITE_VERBOSE:
            with open(resultname, "w") as f:
                f.write(header)
                f.write("\n")
                for i in range(nfeatures):
                    f.write(" ".join(map(str, locs[:, i])))
                    f.write("\n")
                    f.write(" ".join(map(str, descriptors[:, i])))
                    f.write("\n")
Example #27
0
def _get_vectorised_sift_samples(archetype_config, dataloader):
    # returns num unmasked pixels x SIFT_DLEN, in uint8 format
    # operates on greyscale 128 bit images

    num_batches, batch_sz = len(
        dataloader), archetype_config.dataloader_batch_sz
    num_imgs_max = num_batches * batch_sz  # estimate
    img_sz = archetype_config.input_sz

    # cluster individual (box central) pixels
    desc_side = int(img_sz / SIFT_STEP)
    print("img sz %d, desc_side %d" % (img_sz, desc_side))
    sys.stdout.flush()

    descs_all = np.zeros((num_imgs_max, desc_side * desc_side, SIFT_DLEN),
                         dtype=np.uint8)
    masks_all = np.zeros((num_imgs_max, desc_side * desc_side), dtype=np.bool)
    labels_all = None
    actual_num_imgs = 0

    # when descriptor matrix flattened, goes along rows first (rows change slow)
    central_inds_h = (np.arange(desc_side) * SIFT_STEP +
                      (SIFT_STEP / 2)).reshape(
                          (desc_side, 1)).repeat(desc_side, axis=1)
    central_inds_w = (np.arange(desc_side) * SIFT_STEP +
                      (SIFT_STEP / 2)).reshape(
                          (1, desc_side)).repeat(desc_side, axis=0)
    central_inds_h, central_inds_w = central_inds_h.reshape(-1), \
                                     central_inds_w.reshape(-1)

    for b_i, batch in enumerate(dataloader):
        if len(batch) == 3:  # test dataloader
            store_labels = True

            if (labels_all is None):
                labels_all = np.zeros((num_imgs_max, desc_side * desc_side),
                                      dtype=np.int32)
            imgs, labels, masks = batch
            labels = labels.cpu().numpy().astype(np.int32)
        else:  # training dataloader
            store_labels = False
            imgs, _, _, masks = batch

        # imgs currently channel first, [0-1] range, floats
        imgs = (imgs * 255.).permute(0, 2, 3, 1).cpu().numpy().astype(np.uint8)
        masks = masks.cpu().numpy().astype(np.bool)

        curr_batch_sz, h, w, c = imgs.shape
        assert (h == archetype_config.input_sz
                and w == archetype_config.input_sz
                and c == archetype_config.in_channels)
        if b_i < num_batches - 1:
            assert (batch_sz == curr_batch_sz)

        start = b_i * batch_sz
        for i in xrange(curr_batch_sz):
            grey_img = cv2.cvtColor(imgs[i, :, :, :], cv2.COLOR_RGB2GRAY)
            locs, descs = vlfeat.vl_dsift(grey_img, step=SIFT_STEP)
            descs = descs.transpose((1, 0))  # 40*40, 128
            descs = descs.reshape(-1, SIFT_DLEN)  # rows change slowest

            # get the corresponding box central mask/label
            mask = masks[i][central_inds_h, central_inds_w]

            offset = start + i
            descs_all[offset, :, :] = descs
            masks_all[offset, :] = mask
            if store_labels:
                label = labels[i][central_inds_h, central_inds_w]
                labels_all[offset, :] = label

        actual_num_imgs += curr_batch_sz

    descs_all = descs_all[:actual_num_imgs, :, :]
    masks_all = masks_all[:actual_num_imgs, :]
    num_unmasked = masks_all.sum()
    if store_labels:
        labels_all = labels_all[:actual_num_imgs, :]
        samples_labels = labels_all[masks_all].reshape(-1)
        assert (samples_labels.shape[0] == num_unmasked)

    samples = descs_all[masks_all, :].reshape(-1, SIFT_DLEN)
    assert (samples.shape[0] == num_unmasked)

    if not store_labels:
        return samples
    else:
        return samples, samples_labels