def extract_features(image, xs, ys, scale=1.0): # check that image is grayscale assert image.ndim == 2, 'image should be grayscale' ########################################################################## N = len(xs) #should be the same as len(ys) window = 3 #not using scale, as its optional feats = [] orients = [] mag, theta = canny.canny_nmax(image) dx, dy = canny.sobel_gradients(image) for i in range(N): bins = [] dx_start = xs[i] - 1 dx_end = xs[i] + 2 if xs[i] + 2 < len(dx) else len(dx) - 1 dy_start = ys[i] - 1 dy_end = ys[i] + 2 if ys[i] + 2 < len(dx[0]) else len(dx[0]) - 1 dir_img_i = dir_img(dx[np.ix_([dx_start, dx_end], [dy_start, dy_end])], dy[np.ix_([dx_start, dx_end], [dy_start, dy_end])]) #print(dir_img_i) orients.append(dir_img_i) for x in range(3): for y in range(3): binx = (x + 3) / ((3 + 3) / 3) biny = (y + 3) / ((3 + 3) / 3) bin = np.zeros(8) for a in range(window): for b in range(window): x_disp = int(xs[i] - binx - 1 + a) y_disp = int(ys[i] - biny - 1 + b) val = int((theta[x_disp, y_disp] - dir_img_i + np.pi) / ((np.pi + np.pi) / 8)) - 1 bin[val] += 1 bins.extend(bin) feats.append(bins) feats = np.asarray(feats) ########################################################################## return feats, (np.average(orients) * 180.0 / np.pi)
def find_interest_points(image, max_points=200, scale=1.0): # check that image is grayscale assert image.ndim == 2, 'image should be grayscale' dx, dy = canny.sobel_gradients(image) win_size = int(scale) xr = dx.shape[0] yr = dx.shape[1] score_tuples = [] for ix in range(win_size, xr - win_size): for iy in range(win_size, yr - win_size): localx = dx[ix - win_size:ix + win_size + 1, iy - win_size:iy + win_size + 1] localy = dy[ix - win_size:ix + win_size + 1, iy - win_size:iy + win_size + 1] localx = np.ndarray.flatten(localx) localy = np.ndarray.flatten(localy) xdotx = np.dot(localx, localx) ydoty = np.dot(localy, localy) xdoty = np.dot(localx, localy) #score = ((xdotx * ydoty) - (xdoty ** 2)) / (xdotx + ydoty) alpha = 0.05 score = ((xdotx * ydoty) - (xdoty**2)) - alpha * ((xdotx + ydoty)**2) score_tuples.append((score, ix, iy)) # Max-sort by the first element of the tuple (score) score_tuples.sort(reverse=True) # Now obtain top 200 points, with nonmax suppression xs = [] ys = [] scores = [] # Sets quickly check presence (if _ in banned) banned = set() # Nonmax suppression window vs original score window? k = 1 offsets = list(range(-k * win_size, k * win_size + 1)) for tupl in score_tuples: if len(scores) == max_points: break score, x, y = tupl if (x, y) in banned: continue scores.append(score) xs.append(x) ys.append(y) for ox in offsets: for oy in offsets: banned.add((x + ox, y + oy)) xs = np.array(xs) ys = np.array(ys) return xs, ys, scores
def extract_features(image, xs, ys, scale = 1.0): """ FEATURE DESCRIPTOR (12 Points Implementation + 3 Points Write-up) Implement a SIFT-like feature descriptor by binning orientation energy in spatial cells surrounding an interest point. Unlike SIFT, you do not need to build-in rotation or scale invariance. A reasonable default design is to consider a 3 x 3 spatial grid consisting of cell of a set width (see below) surrounding an interest point, marked by () in the diagram below. Using 8 orientation bins, spaced evenly in [-pi,pi), yields a feature vector with 3 * 3 * 8 = 72 dimensions. ____ ____ ____ | | | | | | | | |____|____|____| | | | | | | () | | |____|____|____| | | | | | | | | |____|____|____| |----| width You will need to decide on a default spatial width. Optionally, this can be a multiple of a scale factor, passed as an argument. We will only test your code by calling it with scale = 1.0. In addition to your implementation, include a brief write-up (in hw2.pdf) of your design choices. Arguments: image - a grayscale image in the form of a 2D numpy xs - numpy array of shape (N,) containing x-coordinates ys - numpy array of shape (N,) containing y-coordinates scale - scale factor Returns: feats - a numpy array of shape (N,K), containing K-dimensional feature descriptors at each of the N input locations (using the default scheme suggested above, K = 72) """ # check that image is grayscale assert image.ndim == 2, 'image should be grayscale' ########################################################################## width = int(scale * 2 + 1) half = (width - 1) // 2 image = pad_border(image, width, width) dx, dy = sobel_gradients(image) PI = np.pi n = len(xs) feats = np.zeros((n, 72)) for point in range(n): xi = xs[point] yi = ys[point] for i in range(3): for j in range(3): px = xi + (j - 1) * width py = yi + (i - 1) * width for xx in range(-half, half + 1): for yy in range(-half, half + 1): x = dx[px + xx, py + yy] y = dy[px + xx, py + yy] angle = np.arctan2(y, x) index = int(angle / (PI / 4.0)) if int(angle / (PI / 4.0)) >= 0 else 8 - int(angle / (PI / 4.0)) feats[point][i * j * 8 + index] += np.sqrt(x ** 2 + y ** 2) ########################################################################## return feats
def find_interest_points(image, max_points = 200, scale = 1.0): """ INTEREST POINT OPERATOR (12 Points Implementation + 3 Points Write-up) Implement an interest point operator of your choice. Your operator could be: (A) The Harris corner detector (Szeliski 4.1.1) OR (B) The Difference-of-Gaussians (DoG) operator defined in: Lowe, "Distinctive Image Features from Scale-Invariant Keypoints", 2004. https://www.cs.ubc.ca/~lowe/papers/ijcv04.pdf OR (C) Any of the alternative interest point operators appearing in publications referenced in Szeliski or in lecture OR (D) A custom operator of your own design You implementation should return locations of the interest points in the form of (x,y) pixel coordinates, as well as a real-valued score for each interest point. Greater scores indicate a stronger detector response. In addition, be sure to apply some form of spatial non-maximum suppression prior to returning interest points. Whichever of these options you choose, there is flexibility in the exact implementation, notably in regard to: (1) Scale At what scale (e.g. over what size of local patch) do you operate? You may optionally vary this according to an input scale argument. We will test your implementation at the default scale = 1.0, so you should make a reasonable choice for how to translate scale value 1.0 into a size measured in pixels. (2) Nonmaximum suppression What strategy do you use for nonmaximum suppression? A simple (and sufficient) choice is to apply nonmaximum suppression over a local region. In this case, over how large of a local region do you suppress? How does that tie into the scale of your operator? For making these, and any other design choices, keep in mind a target of obtaining a few hundred interest points on the examples included with this assignment, with enough repeatability to have a large number of reliable matches between different views. If you detect more interest points than the requested maximum (given by the max_points argument), return only the max_points highest scoring ones. In addition to your implementation, include a brief write-up (in hw2.pdf) of your design choices. Arguments: image - a grayscale image in the form of a 2D numpy array max_points - maximum number of interest points to return scale - (optional, for your use only) scale factor at which to detect interest points Returns: xs - numpy array of shape (N,) containing x-coordinates of the N detected interest points (N <= max_points) ys - numpy array of shape (N,) containing y-coordinates scores - numpy array of shape (N,) containing a real-valued measurement of the relative strength of each interest point (e.g. corner detector criterion OR DoG operator magnitude) """ # check that image is grayscale assert image.ndim == 2, 'image should be grayscale' dx, dy = sobel_gradients(image) Ix2 = conv_2d_gaussian(dx ** 2) Iy2 = conv_2d_gaussian(dy ** 2) IxIy = conv_2d_gaussian(dx * dy) # measured interest = determinant over trace, by Brown et al 2005 interest = (Ix2 * Iy2 - IxIy ** 2) / (Ix2 + Iy2) np.seterr(divide='ignore', invalid='ignore') # now apply nonmaximum suppression over a local 11*11 region for scale 1 half = int(2 * scale + 3) width = half * 2 + 1 interest = mirror_border(interest, half, half) nonmax_suppressed = np.zeros(interest.shape) for i in range(half, interest.shape[0] - half, width): for j in range(half, interest.shape[1] - half, width): window = interest[i - half:i + half + 1, j - half:j + half + 1] val = np.max(window) ind = np.unravel_index(np.argmax(window), window.shape) nonmax_suppressed[i - half + ind[0]][j - half + ind[1]] = val nonmax_suppressed = trim_border(nonmax_suppressed, half, half) # choose the first max_points of interest values if there are more than enough xs, ys = np.where(nonmax_suppressed > 0.0001) if len(xs) > max_points: xs, ys = np.unravel_index(np.argpartition(nonmax_suppressed.ravel(), -max_points)[-max_points:], nonmax_suppressed.shape) scores = nonmax_suppressed[xs, ys] return xs, ys, scores
# # plt.figure(); plt.imshow(image, cmap='gray') # plt.figure(); plt.imshow(imgA, cmap='gray') # plt.figure(); plt.imshow(imgB, cmap='gray') # plt.show() # # ## Problem 3 - Sobel gradient operator (5 Points) # ## # ## Implement sobel_gradients() as described in hw1.py. # ## # ## The example below tests your implementation. # # image = load_image('data/69015.jpg') image = load_image('data/edge_img/easy/002.jpg') dx, dy = sobel_gradients(image) dx_c, dy_c = c.sobel_gradients(image) # plt.figure() plt.imshow(image, cmap='gray') plt.figure() plt.imshow(dx, cmap='gray') plt.figure() plt.imshow(dx_c, cmap='gray') plt.figure() plt.imshow(dy, cmap='gray') plt.figure() plt.imshow(dy_c, cmap='gray') plt.show() # # # # # Problem 4 - (a) Nonmax suppression (10 Points) # # (b) Edge linking and hysteresis thresholding (10 Points)