def estimateFeatureTranslation(startX, startY, Ix, Iy, img1, img2): I_gray_1, I_gray_2 = rgb2gray(img1), rgb2gray(img2) X_old, Y_old = generatePatch(startX, startY) X_old, Y_old = X_old.astype(np.int32), Y_old.astype(np.int32) Ix_temp, Iy_temp = Ix[Y_old, X_old], Iy[Y_old, X_old] x0, y0 = startX, startY min_error = 999999 error_thresh = 1 iteration = 5 for i in range(iteration): X_new, Y_new = generatePatch(x0, y0) old_coor = np.array((x0, y0)).reshape(-1, 1) It_temp = interp2(I_gray_2, X_new, Y_new) - I_gray_1[Y_old, X_old] error = np.linalg.norm(It_temp) if error < min_error: min_error = error newX, newY = x0, y0 if error < error_thresh: break A = np.hstack((Ix_temp.reshape(-1, 1), Iy_temp.reshape(-1, 1))) b = -It_temp.reshape(-1, 1) flow_temp = np.linalg.solve(np.dot(A.T, A), np.dot(A.T, b)) new_coor = old_coor + flow_temp x0, y0 = new_coor[0, 0], new_coor[1, 0] return newX, newY
def warp_box(startXs, startYs, newXs, newYs, img1, img2, box): import numpy as np from helpers import rgb2gray from helpers import interp2 from scipy import signal import matplotlib.pyplot as plt from scipy.optimize import least_squares from helpers import inlier_cost_func from helpers import warp_image # This will fail if there is overlap between the boxes max_dist = 4 pad = 5 source = rgb2gray(img1) target = rgb2gray(img2) source_warped = np.copy(source) h, w = source.shape # Get the boundaries of bounding box xmin = max([np.amin(box[:, 0]) - pad, 0]) xmax = min([np.amax(box[:, 0]) + pad + 1, w]) ymin = max([np.amin(box[:, 1]) - pad, 0]) ymax = min([np.amax(box[:, 1]) + pad + 1, h]) # Remove points who traveled to far between frames distances = np.sqrt( np.square(newXs - startXs) + np.square(newYs - startYs)) indexer = distances < max_dist indexer = np.ones(len(startXs), dtype=bool) # Overwrite this for now ux = startXs[indexer] uy = startYs[indexer] vx = newXs[indexer] vy = newYs[indexer] # Form our initial and final feature points in homogeneous coordinates N = len(ux) u = np.stack([ux, uy, np.ones(N)]) v = np.stack([vx, vy, np.ones(N)]) T = least_squares(inlier_cost_func, np.identity(3)[:2].reshape(6), args=(u, v))["x"].reshape(2, 3) T = np.concatenate((T, np.array([[0, 0, 1]]))) target_area = target[ymin:ymax, xmin:xmax] source_area = source[ymin:ymax, xmin:xmax] warped_area = warp_image(source, T, xmin, xmax, ymin, ymax) source_warped[ymin:ymax, xmin:xmax] = warped_area return source_warped
def calc_sim(img, coin): if img.shape[-1] == 3: img = rgb2gray(img) img = img.astype('uint8') img = img_as_ubyte(img) if coin.shape[-1] == 3: coin = rgb2gray(coin) coin = coin.astype('uint8') coin = img_as_ubyte(coin) quantized_img = img // 16 coin = coin // 16 # Compute coin histogram and normalize coin_hist, _ = np.histogram(coin.flatten(), bins=16, range=(0, 16)) coin_hist = coin_hist.astype(float) / np.sum(coin_hist) # Compute a disk shaped mask that will define the shape of our sliding window # A disk with diameter equal to max(w,h) of the roi should be a big enough reference selem = disk(max(coin.shape) // 2) # Compute the similarity across the complete image similarity = windowed_histogram_similarity(quantized_img, selem, coin_hist, coin_hist.shape[0]) fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(10, 10)) axes[0].imshow(quantized_img, cmap='gray') axes[0].set_title('Quantized image') axes[0].axis('off') axes[1].imshow(coin, cmap='gray') axes[1].set_title('Quantized ROI image') axes[1].axis('off') axes[2].imshow(img, cmap='gray') axes[2].imshow(similarity, cmap='hot', alpha=0.5) axes[2].set_title('Original image with overlaid similarity') axes[2].axis('off') plt.tight_layout() plt.show(block=False) cv2.imwrite('img/sim_map.jpg', similarity) return similarity
def getBoxFeature(img): max_pts = 20 row, col = img.shape[0], img.shape[1] if (img.ndim == 3): img = rgb2gray(flipChannel(img)) corners = cv.goodFeaturesToTrack(img.astype(np.float32), max_pts, 0.01, 10) corners = np.int0(corners) x, y = corners[:, :, 0], corners[:, :, 1] inlier_ind = (x > 5) * (x < col - 5) * (y > 5) * (y < row - 5) x, y = x[inlier_ind], y[inlier_ind] return x, y
def estimateAllTranslation(startXs, startYs, img1, img2): row, col = img1.shape[0], img1.shape[1] N, F = startXs.shape[0], startXs.shape[1] I_gray_1 = rgb2gray(img1) Ix, Iy = np.gradient(I_gray_1, axis=(1, 0)) newXs = np.zeros((N, F), dtype=np.int32) newYs = np.zeros((N, F), dtype=np.int32) for i in range(N): for j in range(F): if startXs[i][j] < 2 or startYs[i][j] < 2 \ or startXs[i][j] > col-2 or startYs[i][j] > row-2: continue newX, newY = estimateFeatureTranslation(startXs[i][j],\ startYs[i][j], Ix, Iy, img1, img2) if newX >= 0 and newX < col and newY >= 0 and newY < row > 0: newXs[i][j] = newX newYs[i][j] = newY return newXs, newYs
def estimateAllTranslation(startXs, startYs, origXs, origYs, img1, img2, bbox, params): import numpy as np from helpers import rgb2gray from helpers import interp2 from scipy import signal from calculateError import calculateError # ---------- Part 1: Setup ---------- # # Get images computed to grayscale # For now I'm going to pad the images symmetrically to get W for edges and corners # It will just be important to remember that padding's there when solving for pixel locations window = 11 pad = int((window - 1) / 2) # Blur the images to get better optical flow results Gx = signal.gaussian(window, 1.4).reshape(1, window) Gy = signal.gaussian(window, 1.4).reshape(window, 1) gray1 = signal.convolve2d(rgb2gray(img1), Gx, mode="full", boundary="symm") gray1 = signal.convolve2d(gray1, Gy, mode="full", boundary="symm") gray2 = signal.convolve2d(rgb2gray(img2), Gx, mode="full", boundary="symm") gray2 = signal.convolve2d(gray2, Gy, mode="full", boundary="symm") # Pull out parameters for looping F = len(startXs) # Initialize our outputs newXs = np.zeros(F, dtype=object) newYs = np.zeros(F, dtype=object) # Calculate the gradients kx = np.array([[1, -1]]) ky = np.array([[1], [-1]]) Ix = signal.convolve2d(gray1, kx, mode="same") Iy = signal.convolve2d(gray1, ky, mode="same") # ---------- Part 2: Caluclate the feature translations ---------- # # Use these gradients to find the new locations of the feature points # I'm not going to put this into a second function to reduce runtime A = np.zeros((window**2, 2)) b = np.zeros((window**2, 1)) # Iterate for each bounding box as necessary for i in range(F): error = np.nan_to_num(np.Inf) min_error = error.copy() iters = 0 tempOrigXs = np.copy(origXs[i]) tempOrigYs = np.copy(origYs[i]) # Run for a max of 5 iterations or until the average squared distance between ea feat pt value is less than 5k while error > 5000 and iters < 5: N = len(startXs[i]) potXs = np.zeros(N) potYs = np.zeros(N) It = gray2 - gray1 iters += 1 for j in range(N): # Get our feature location fx = startXs[i][j] fy = startYs[i][j] # Generate a meshgrid for interpolating meshx, meshy = np.meshgrid(np.arange(window), np.arange(window)) meshx = meshx + fx meshy = meshy + fy # Build A and b from A*[u; v] = b centered around the feature location A[:, 0] = interp2(Ix, meshx, meshy).reshape( window**2 ) # Ix[fy - pad: fy + pad + 1, fx - pad: fx + pad + 1].reshape(window**2) A[:, 1] = interp2(Iy, meshx, meshy).reshape( window**2 ) # Iy[fy - pad: fy + pad + 1, fx - pad: fx + pad + 1].reshape(window**2) b[:, 0] = interp2(It, meshx, meshy).reshape( window**2 ) # It[fy - pad: fy + pad + 1, fx - pad: fx + pad + 1].reshape(window**2) # Solve for [u; v] try: translation = np.matmul( np.matmul(np.linalg.inv(np.matmul(A.T, A)), A.T), -b) except np.linalg.LinAlgError: translation = np.array([0, 0]) # Save our result into our output potXs[j] = startXs[i][j] + translation[0] potYs[j] = startYs[i][j] + translation[1] # Calculate the error error, gray1, indexer, Ix, Iy, potXs, potYs = calculateError( startXs[i], startYs[i], potXs, potYs, np.copy(gray1), np.copy(gray2), Ix, Iy, np.copy(bbox[i]), params) startXs[i] = np.copy(potXs) startYs[i] = np.copy(potYs) tempOrigXs = tempOrigXs[indexer] tempOrigYs = tempOrigYs[indexer] # If we did better this time, save the results if error < min_error: min_error = error.copy() newXs[i] = np.copy(potXs) newYs[i] = np.copy(potYs) origXs[i] = np.copy(tempOrigXs) origYs[i] = np.copy(tempOrigYs) return newXs, newYs, origXs, origYs
def calculateError(startXs, startYs, newXs, newYs, img1, img2, Ix, Iy, box, params): import numpy as np from helpers import rgb2gray from helpers import interp2 from scipy import signal from scipy.optimize import least_squares from helpers import inlier_cost_func from helpers import warp_image # Extract parameters max_dist = params[0] k1 = params[1] k2 = params[2] k3 = params[3] k4 = params[4] pad = 5 source = rgb2gray(img1) target = rgb2gray(img2) source_warped = np.copy(source) h, w = source.shape # Get the boundaries of bounding box xmin = max([np.amin(box[:, 0]) - pad, 0]) xmax = min([np.amax(box[:, 0]) + pad + 1, w]) ymin = max([np.amin(box[:, 1]) - pad, 0]) ymax = min([np.amax(box[:, 1]) + pad + 1, h]) # Outlier handling indexer = np.all(np.stack([ newXs > xmin + pad, newXs < xmax - pad, newYs > ymin + pad, newYs < ymax - pad ], axis=0), axis=0) distances = np.sqrt( np.square(newXs - startXs) + np.square(newYs - startYs)) avg_dist = np.mean(distances) std_dist = np.std(distances) if avg_dist != 0: indexer = np.logical_and( indexer, np.logical_and( distances < min([k1 * avg_dist + k2 * std_dist, max_dist]), distances > k3 * avg_dist - k4 * std_dist)) # Generate vectors of inliers for calculating the transformation ux = startXs[indexer] uy = startYs[indexer] vx = newXs[indexer] vy = newYs[indexer] # Form our initial and final feature points in homogeneous coordinates N = len(ux) u = np.stack([ux, uy, np.ones(N)]) v = np.stack([vx, vy, np.ones(N)]) # Calculate the transformation via least squares T = least_squares(inlier_cost_func, np.identity(3)[:2].reshape(6), args=(u, v))["x"].reshape(2, 3) T = np.concatenate((T, np.array([[0, 0, 1]]))) newXs = np.matmul(T, u)[0] newYs = np.matmul(T, u)[1] # Warp img1, Ix and Iy based on calculated transformation target_area = target[ymin:ymax, xmin:xmax] source_area = source[ymin:ymax, xmin:xmax] warped_area = warp_image(source, T, xmin, xmax, ymin, ymax) source_warped[ymin:ymax, xmin:xmax] = warped_area Ix_area = warp_image(Ix, T, xmin, xmax, ymin, ymax) Ix[ymin:ymax, xmin:xmax] = Ix_area Iy_area = warp_image(Iy, T, xmin, xmax, ymin, ymax) Iy[ymin:ymax, xmin:xmax] = Iy_area # Calculate the error per feature point interpx = np.array([newXs]) interpy = np.array([newYs]) values_this = interp2(source_warped, interpx, interpy).reshape(len(newXs)) values_next = interp2(target, interpx, interpy).reshape(len(newXs)) error = np.sum(np.square(values_next - values_this)) / len(newXs) return error, source_warped, indexer, Ix, Iy, newXs, newYs
def main(video_file, output_filename): imgs = np.array([]) cap = cv2.VideoCapture(video_file) ret, img1 = cap.read() img1 = img1[..., ::-1] h, w, d = img1.shape display_img = img1.copy() display_img = cv2.cvtColor(display_img, cv2.COLOR_BGR2RGB) cv2.namedWindow("Start Frame") cv2.setMouseCallback("Start Frame", draw_box) # Loop until the user is done drawing boxes while True: cv2.imshow("Start Frame", display_img) key = cv2.waitKey(0) if key == ord('q'): break # Destroy the drawing window cv2.destroyAllWindows() # Show the result for i in range(int(len(refPt) / 2)): cv2.rectangle(display_img, refPt[2 * i], refPt[(2 * i) + 1], (0, 255, 0), 2) cv2.imshow("Result", display_img) cv2.waitKey(0) cv2.destroyAllWindows() bbox = [] for i in range(int(len(refPt) / 2)): # Top Left and bottom right box_corners = np.array([refPt[2 * i], refPt[(2 * i) + 1]]) start_x, start_y, width, height = cv2.boundingRect(box_corners) # Create the four coordinates for the box and reshape box = np.array([[start_x, start_y], [start_x + width, start_y], [start_x + width, start_y + height], [start_x, start_y + width]]) bbox.append(box) # Turn it into a numpy array bbox = np.array(bbox) orig_box = np.copy(bbox) centers = np.zeros((len(bbox), 2)) trajectory_indexer = np.zeros((h, w), dtype=bool) # Get the features from inside the bounding box x, y = getFeatures(rgb2gray(img1), bbox) newXs = np.copy(x) newYs = np.copy(y) f = 0 frame = generate_output_frame(np.copy(img1), bbox, np.copy(trajectory_indexer), np.copy(newXs), np.copy(newYs)) frame = Image.fromarray(frame) # Store the processed frames so we can turn it into a video later all_frames = [] all_frames.append(frame) a = 0 while ret: f += 1 a += 1 if not f % 8: print("Frame: ", f) a = 1 for i in range(len(bbox)): # xmin = np.sort(bbox[i, :, 0])[0] # xmax = np.sort(bbox[i, :, 0])[3] # ymin = np.sort(bbox[i, :, 1])[0] # ymax = np.sort(bbox[i, :, 1])[3] # bbox[i, ...] = np.array([xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]).reshape(4,2) orig_box = np.copy(bbox) x, y = getFeatures(rgb2gray(img1), bbox) newXs = np.copy(x) newYs = np.copy(y) thresh = .1 + .02 * a ret, img2 = cap.read() if not ret: break img2 = img2[..., ::-1] iterations = 1 # Get the new feature locations in the next frame updatex, updatey, x, y = estimateAllTranslation( newXs, newYs, np.copy(x), np.copy(y), np.copy(img1), np.copy(img2), np.copy(bbox)) for k in range(len(bbox)): centers[k] = np.array( [np.mean(bbox[k, :, 0]), np.mean(bbox[k, :, 1])]).astype(int) # Warp the image for the next iteration newXs, newYs, bbox, warped = applyGeometricTransformation( np.copy(x), np.copy(y), updatex, updatey, np.copy(orig_box), np.copy(img1), np.copy(img2), thresh) for k in range(len(bbox)): xcen = int(np.mean(bbox[k, :, 0])) ycen = int(np.mean(bbox[k, :, 1])) num = int( max([abs(xcen - centers[k, 0]), abs(ycen - centers[k, 1])])) centerx = np.linspace(centers[k, 0], xcen + 1, num).astype(int) centery = np.linspace(centers[k, 1], ycen + 1, num).astype(int) if centerx.size > 0 and centery.size > 0: trajectory_indexer[centery, centerx] = True trajectory_indexer[centery + 1, centerx] = True trajectory_indexer[centery, centerx + 1] = True trajectory_indexer[centery + 1, centerx + 1] = True else: trajectory_indexer[ycen, xcen] = True trajectory_indexer[ycen + 1, xcen] = True trajectory_indexer[ycen, xcen + 1] = True trajectory_indexer[ycen + 1, xcen + 1] = True frame = generate_output_frame(np.copy(img2), bbox, np.copy(trajectory_indexer), np.copy(newXs), np.copy(newYs)) frame = Image.fromarray(frame) # frame.save("medium_frame%d.jpg" % f) img1 = np.copy(img2) all_frames.append(frame) cap.release() np_frames = np.array( [cv2.cvtColor(np.array(f), cv2.COLOR_BGR2RGB) for f in all_frames]) gen_video(np.array(np_frames), "{0}.avi".format(output_filename))
def mymosaic(img_input): import numpy as np from helpers import rgb2gray from helpers import warp_image from corner_detector import corner_detector from anms import anms from feat_desc import feat_desc from feat_match import feat_match from ransac_est_homography import ransac_est_homography import matplotlib.pyplot as plt import math # Set out constants max_pts = 1000 thresh = 0.5 h, w, d = img_input[0].shape # ---------- Part 1: Get descs for each image ---------- # # Initialize all the cell arrays that we will be using # For now, I'm only saving the variables that matter for later steps x = np.zeros(3, dtype=object) y = np.zeros(3, dtype=object) descs = np.zeros(3, dtype=object) # Get x, y, and descs for each image for i in range(3): print("---------- Processing Image %d ----------" % (i + 1)) gray = rgb2gray(img_input[i]) print("Detecting corners") cimg = corner_detector(gray) print("Suppressing non maxima") x[i], y[i], rmax = anms(cimg, max_pts) print("Finding descriptors") descs[i] = feat_desc(gray, x[i], y[i]) # ---------- Part 2: Estimate homographies ---------- # # Initialize all the cell arrays that we will be using # For now, I'm only saving the variables that matter for later steps H = np.zeros(3, dtype=object) inlier_ind = np.zeros(3, dtype=object) corners = np.zeros(3, dtype=object) corners[1] = np.stack( [np.array([0, w, w, 0]), np.array([0, 0, h, h]), np.ones(4)]) H[1] = np.identity(3) for i in [0, 2]: print("---------- Matching Images %d and %d ----------" % (i + 1, 2)) print("Finding matching descriptors") match = feat_match(descs[1], descs[i]) matches = (np.where([match >= 0])[1], match[match >= 0]) print("Performing RANSAC") H[i], inlier_ind[i] = ransac_est_homography(x[i][matches[1]], y[i][matches[1]], x[1][matches[0]], y[1][matches[0]], thresh) # Find the boundaries that the mosaic has to fit into warped_corners = np.matmul(H[i], corners[1]) corners[i] = warped_corners / warped_corners[2] # ---------- Part 3: Assemble the mosaic ---------- # print("---------- Assembling the Mosaic ----------") # Initialize the mosaic using corners xmin = int(math.floor(np.amin(corners[0][0]))) xmax = int(math.ceil(np.amax(corners[2][0]))) ymin = int( math.floor(np.amin([np.amin(corners[0][1]), np.amin(corners[2][1])]))) ymax = int( math.ceil(np.amax([np.amax(corners[0][1]), np.amax(corners[2][1])]))) img_mosaic = np.zeros((ymax - ymin, xmax - xmin, 3)).astype(int) # Need to find the mesh to interpolate with print("Warping images") left, yi0, w0, h0 = warp_image(img_input[0], H[0], corners[0]) center = img_input[1].astype(int) right, yi2, w2, h2 = warp_image(img_input[2], H[2], corners[2]) # Need the offsets to correctly align images xi1 = -xmin yi1 = -ymin w1 = w h1 = h # Assemble the mosaic print("Putting it all together") if yi0 < yi2: img_mosaic[:h0, :w0][left > 0] = left[left > 0] img_mosaic[yi2 - yi0:h2 + yi2 - yi0, -w2 - 1:-1][right > 0] = right[right > 0] else: img_mosaic[yi0 - yi2:h0 + yi0 - yi2, :w0][left > 0] = left[left > 0] img_mosaic[:h2, -w2 - 1:-1][right > 0] = right[right > 0] img_mosaic[yi1:yi1 + h1, xi1:xi1 + w1] = center return img_mosaic
EPISODES = 5000 if __name__ == "__main__": env = gym.make('Seaquest-v0') state_size = env.observation_space.shape action_size = env.action_space.n agent = DQNAgent(state_size, action_size) agent.load("./save/seaquest-dqn-save.h5") done = False batch_size = 32 K_frames = 3 action = 0 i = 0 while True: state = env.reset() state = rgb2gray(state) i += 1 for t in range(4000): if t % K_frames == 0: action = agent.decide(state) isOpened = env.render() if not isOpened: env.close() exit(0) next_state, reward, done, _ = env.step(action) state = rgb2gray(next_state) if done: print("episode: {}, score: {}".format(i, t))
import numpy as np from PIL import Image import matplotlib.pyplot as plt from helpers import rgb2gray from corner_detector import corner_detector from anms import anms from feat_desc import feat_desc from feat_match import feat_match from ransac_est_homography import ransac_est_homography print("---------- Processing First Image ----------") path1 = "street-2.jpg" img1 = Image.open(path1) img1 = np.array(img1)[..., :3] gray1 = rgb2gray(img1) max_pts = 1000 print("Detecting corners") cimg1 = corner_detector(gray1) print("Suppressing non maxima") x1, y1, rmax1 = anms(cimg1, max_pts) print("Finding descriptors") descs1, boxes1, oris1, ori1 = feat_desc(gray1, x1, y1) # plt.imshow(img1) # plt.scatter(x1, y1) # for i in range(boxes1.shape[2]):
frame1 = generate_output_frame(np.copy(img1), bbox) frame1 = Image.fromarray(frame1) frame1.save("easy_frame1.jpg") # plt.imshow(frame1) # plt.show() # For debugging: Show the bounding box we've chosen # plt.imshow(img1) # for box in bbox: # for i in range(3): # plt.plot(box[i: i+2, 0], box[i: i+2, 1], color="red") # plt.plot([box[0, 0], box[3, 0]], [box[0, 1], box[3, 1]], color="red") # plt.show() # Get the features from inside the bounding box x, y = getFeatures(rgb2gray(img1), bbox) # For debugging: Show the bounding box and the features inside # plt.imshow(img1) # for box in bbox: # for i in range(3): # plt.plot(box[i: i+2, 0], box[i: i+2, 1], color="red") # plt.plot([box[0, 0], box[3, 0]], [box[0, 1], box[3, 1]], color="red") # for i in range(x.shape[1]): # plt.scatter(x[i], y[i][:], color="blue") # plt.show() nextframe = np.copy(img2) warped = np.copy(img1) newXs = np.copy(x) newYs = np.copy(y)
def mymosaic(img_input): # Your Code Here img_mosaic = np.zeros((img_input.shape[0], 1), dtype=object) for i in range(img_input.shape[0]): imgA = img_input[i, 0] imgB = img_input[i, 1] imgC = img_input[i, 2] imgA_gray = rgb2gray(imgA) imgB_gray = rgb2gray(imgB) imgC_gray = rgb2gray(imgC) cimgA = corner_detector(imgA_gray) cimgB = corner_detector(imgB_gray) cimgC = corner_detector(imgC_gray) max_pts = 500 xA, yA, rmaxA = anms(cimgA, max_pts) xB, yB, rmaxB = anms(cimgB, max_pts) xC, yC, rmaxC = anms(cimgC, max_pts) # ============================================================================= # # Demonstrate ANMS result # IA = flipChannel(imgA) # drawPoints(IA,xA,yA,(0,0,255)) # cv.imwrite('A'+str(i+1)+'.jpg',IA) # # IB = flipChannel(imgB) # drawPoints(IB,xB,yB,(0,0,255)) # cv.imwrite('B'+str(i+1)+'.jpg',IB) # # IC = flipChannel(imgC) # drawPoints(IC,xC,yC,(0,0,255)) # cv.imwrite('C'+str(i+1)+'.jpg',IC) # ============================================================================= descsA = feat_desc(imgA_gray, xA, yA) descsB = feat_desc(imgB_gray, xB, yB) descsC = feat_desc(imgC_gray, xC, yC) match1 = feat_match(descsA, descsB) match2 = feat_match(descsC, descsB) ransac_thresh = 10 xA1, yA1 = xA[match1 > 0].reshape(-1, 1), yA[match1 > 0].reshape(-1, 1) xB1, yB1 = xB[match1[match1 > 0]].reshape( -1, 1), yB[match1[match1 > 0]].reshape(-1, 1) H1, inlier_ind1 = ransac_est_homography(xA1, yA1, xB1, yB1, ransac_thresh) xC2, yC2 = xC[match2 > 0].reshape(-1, 1), yC[match2 > 0].reshape(-1, 1) xB2, yB2 = xB[match2[match2 > 0]].reshape( -1, 1), yB[match2[match2 > 0]].reshape(-1, 1) H2, inlier_ind2 = ransac_est_homography(xC2, yC2, xB2, yB2, ransac_thresh) # ============================================================================= # # Demonstrating RANSAC match result # row,col,_ = imgA.shape # # outlier_ind1 = np.delete(np.arange(len(xA1)),inlier_ind1) # IA1 = flipChannel(imgA) # drawPoints(IA1,xA1[inlier_ind1],yA1[inlier_ind1],(0,0,255)) # drawPoints(IA1,xA1[outlier_ind1],yA1[outlier_ind1],(255,0,0)) # IB1 = flipChannel(imgB) # drawPoints(IB1,xB1[inlier_ind1],yB1[inlier_ind1],(0,0,255)) # drawPoints(IB1,xB1[outlier_ind1],yB1[outlier_ind1],(255,0,0)) # imgAB = np.zeros((row,2*col,3)) # imgAB[:,0:col,:] = IA1 # imgAB[:,col:2*col,:] = IB1 # drawLines(imgAB,xA1[inlier_ind1],yA1[inlier_ind1]\ # ,xB1[inlier_ind1]+col,yB1[inlier_ind1],(0,255,0)) # cv.imwrite('left_match'+str(i+1)+'.jpg',imgAB) # # outlier_ind2 = np.delete(np.arange(len(xC2)),inlier_ind2) # IC2 = flipChannel(imgC) # drawPoints(IC2,xC2[inlier_ind2],yC2[inlier_ind2],(0,0,255)) # drawPoints(IC2,xC2[outlier_ind2],yC2[outlier_ind2],(255,0,0)) # IB2 = flipChannel(imgB) # drawPoints(IB2,xB2[inlier_ind2],yB2[inlier_ind2],(0,0,255)) # drawPoints(IB2,xB2[outlier_ind2],yB2[outlier_ind2],(255,0,0)) # imgBC = np.zeros((row,2*col,3)) # imgBC[:,0:col,:] = IB2 # imgBC[:,col:2*col,:] = IC2 # drawLines(imgBC,xB2[inlier_ind2],yB2[inlier_ind2]\ # ,xC2[inlier_ind2]+col,yC2[inlier_ind2],(0,255,0)) # cv.imwrite('right_match'+str(i+1)+'.jpg',imgBC) # ============================================================================= new_left, new_middle, new_right = getNewSize(H1, H2, imgA, imgB, imgC) # Blend Images by Seam Carving or Alpha Blending img_mosaic[i, 0] = seamBlend(new_left, new_middle, new_right) # img_mosaic[i,0] = alphaBlend(alphaBlend(new_left,new_middle),new_right) return img_mosaic
def objectTracking(rawVideo, output_filename, draw_boxes=False): imgs = np.array([]) cap = cv2.VideoCapture(rawVideo) ret, img1 = cap.read() img1 = img1[...,::-1] h, w, d = img1.shape fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter("{0}.avi".format(output_filename), fourcc, 20.0, (w,h)) # Set parameters based on which video it is if rawVideo == "Easy.mp4": difficulty = "easy" print("Performing tracking on easy video") k_pad = 2 params = [4, 1, 3, 1, 1.5] elif rawVideo == "Medium.mp4": difficulty = "medium" print("Performing tracking on medium video") k_pad = 2.5 params = [3, 1, 3, 0, 0] else: print("Invalid path - valid videos are 'Easy.mp4' and 'Medium.mp4'") return None if draw_boxes: display_img = img1.copy() display_img = cv2.cvtColor(display_img, cv2.COLOR_BGR2RGB) cv2.namedWindow("Start Frame") cv2.setMouseCallback("Start Frame", draw_box) # Loop until the user is done drawing boxes while True: cv2.imshow("Start Frame", display_img) key = cv2.waitKey(0) if key == ord('q'): break # Destroy the drawing window cv2.destroyAllWindows() # Show the result for i in range(int(len(refPt)/2)): cv2.rectangle(display_img, refPt[2*i], refPt[(2*i)+1], (0,255,0), 2) cv2.imshow("Result", display_img) cv2.waitKey(0) cv2.destroyAllWindows() bbox = [] for i in range(int(len(refPt)/2)): # Top Left and bottom right box_corners = np.array([refPt[2*i], refPt[(2*i)+1]]) start_x, start_y, width, height = cv2.boundingRect(box_corners) # Create the four coordinates for the box and reshape box = np.array([[start_x, start_y], [start_x+width, start_y], [start_x+width, start_y + height], [start_x, start_y + height]]) bbox.append(box) # Turn it into a numpy array bbox = np.array(bbox) else: # Load the bounding boxes that were drawn manually bbox = np.load("bbox_" + difficulty + ".npy") orig_box = np.copy(bbox) centers = np.zeros((len(bbox), 2)) # For ploting the trajectory of the object trajectory_indexer = np.zeros((h, w), dtype=bool) # Get the features from inside the bounding box x, y = getFeatures(rgb2gray(img1), bbox) # Initialize these before the loop starts newXs = np.copy(x) newYs = np.copy(y) # Record the initial frame f = 0 frame = generate_output_frame(np.copy(img1), bbox, np.copy(trajectory_indexer), np.copy(newXs), np.copy(newYs)) out.write(frame[..., ::-1]) # Loop through the remainder of the frames while True: f += 1 print("Processing frame: %d..." % f, end="\r", flush=True) if bbox.size: # Get new features every 8 frames and update the key frame if not f % 8: for i in range(len(bbox)): orig_box = np.copy(bbox) x, y = getFeatures(rgb2gray(img1), bbox) newXs = np.copy(x) newYs = np.copy(y) # Read the next frame ret, img2 = cap.read() if not ret: break # Switch to RGB img2 = img2[...,::-1] # Get the new feature locations in the next frame updatex, updatey, x, y = estimateAllTranslation(np.copy(newXs), np.copy(newYs), np.copy(x), np.copy(y), np.copy(img1), np.copy(img2), np.copy(bbox), params) # Find centers for trajectory plotting for k in range(len(bbox)): centers[k] = np.array([np.mean(bbox[k, :, 0]), np.mean(bbox[k, :, 1])]).astype(int) # Warp the image for the next iteration newXs, newYs, bbox = applyGeometricTransformation(np.copy(x), np.copy(y), updatex, updatey, np.copy(orig_box), np.copy(img1), k_pad) # Handle when we've gotten rid of a bounding box indexer = np.ones(len(bbox), dtype=bool) for k in range(len(bbox)): if not np.any(bbox[k]) or len(newXs[k]) < 2: indexer[k] = False # Nix everything associated with the removed bounding box bbox = bbox[indexer] orig_box = orig_box[indexer] newXs = newXs[indexer] newYs = newYs[indexer] x = x[indexer] y = y[indexer] centers = centers[indexer] # Plot the trajectory on the image for k in range(len(bbox)): xcen = int(np.mean(bbox[k, :, 0])) ycen = int(np.mean(bbox[k, :, 1])) if xcen < w - 2 and xcen > 2 and ycen < h - 2 and ycen > 2: num = int(max([abs(xcen - centers[k, 0]), abs(ycen - centers[k, 1])])) centerx = np.linspace(centers[k, 0], xcen + 1, num).astype(int) centery = np.linspace(centers[k, 1], ycen + 1, num).astype(int) if centerx.size > 0 and centery.size > 0: trajectory_indexer[centery, centerx] = True trajectory_indexer[centery + 1, centerx] = True trajectory_indexer[centery, centerx + 1] = True trajectory_indexer[centery + 1, centerx + 1] = True else: trajectory_indexer[ycen, xcen] = True trajectory_indexer[ycen + 1, xcen] = True trajectory_indexer[ycen, xcen + 1] = True trajectory_indexer[ycen + 1, xcen + 1] = True # Generate the next frame frame = generate_output_frame(np.copy(img2), bbox, np.copy(trajectory_indexer), np.copy(newXs), np.copy(newYs)) frame = frame[..., ::-1] # We have no bounding boxes, move on to generating the video else: ret, img2 = cap.read() if not ret: break frame = img2 # Update img1 for the next loop img1 = np.copy(img2) out.write(frame) cap.release() out.release() return None
Author: Shiv, Matt Date created: 10/25/2018 ''' import numpy as np from PIL import Image import matplotlib.pyplot as plt from helpers import rgb2gray from corner_detector import corner_detector from anms import anms from feat_desc import feat_desc path1 = "1L.jpg" img1 = Image.open(path1) img1 = np.array(img1)[..., :3] gray1 = rgb2gray(img1) max_pts = 50 cimg1 = corner_detector(gray1) x1, y1, rmax1 = anms(cimg1, max_pts) descs1, boxes1, oris1, ori1 = feat_desc(gray1, x1, y1) plt.imshow(img1) plt.imshow(cimg1) # plt.scatter(x1, y1) # for i in range(boxes1.shape[2]): # plt.plot(boxes1[:,0,i], boxes1[:,1,i], color="red") # plt.plot(oris1[0,:,i], oris1[1,:,i], color="green")
def applyGeometricTransformation(startXs, startYs, newXs, newYs, bbox, img, k_pad): import numpy as np from helpers import inlier_cost_func from helpers import rgb2gray from scipy.optimize import least_squares F = len(bbox) pad = 5 gray = rgb2gray(img) output = np.copy(gray) h, w = gray.shape for i in range(F): # If the item went of the screen, its bounding box could be empty if not np.any(bbox[i]): continue # --------- Part 1: Estimate the homography for a given bounding box ---------- # # Squeeze the box to the features within a margin determined by k*pad xmin = max([np.amin(bbox[i, :, 0]), np.amin(newXs[i]) - k_pad * pad]) xmax = min([np.amax(bbox[i, :, 0]), np.amax(newXs[i]) + k_pad * pad]) ymin = max([np.amin(bbox[i, :, 1]), np.amin(newYs[i]) - k_pad * pad]) ymax = min([np.amax(bbox[i, :, 1]), np.amax(newYs[i]) + k_pad * pad]) bbox[i] = np.array([xmin, ymin, xmin, ymax, xmax, ymax, xmax, ymin]).reshape(4, 2) ux = np.copy(startXs[i]) uy = np.copy(startYs[i]) vx = np.copy(newXs[i]) vy = np.copy(newYs[i]) # Form our initial and final feature points in homogeneous coordinates N = len(ux) u = np.stack([ux, uy, np.ones(N)]) v = np.stack([vx, vy, np.ones(N)]) # Calculate the transformation H = least_squares(inlier_cost_func, np.identity(3)[:2].reshape(6), args=(u, v))["x"].reshape(2, 3) H = np.concatenate((H, np.array([[0, 0, 1]]))) # --------- Part 2: Update the ith bounding box ---------- # # Apply the homography to the corners corners = np.stack([bbox[i].T[0], bbox[i].T[1], np.ones(4)]) corners = np.matmul(H, corners) corners = corners / corners[ 2] # unnecessary for affine transformations # If the object has passed out of the image frame, get rid of it if np.any(np.logical_or(corners[0] >= w, corners[0] < 0)) and np.any( np.logical_or(corners[1] >= h, corners[1] < 0)): bbox[i] = np.zeros((4, 2)) newXs[i] = vx newYs[i] = vy continue # Restrict the bounding box to the image frame corners[corners < 0] = 0 corners[0][corners[0] >= w] = w - 1 corners[1][corners[1] >= h] = h - 1 # Update the corners of the box bbox[i, ...] = corners[:2].T # Update the feature entries to remove outliers newXs[i] = vx newYs[i] = vy return newXs, newYs, bbox