def main(): img = open_image("mandril.bmp") M = rns_matrix(img, 45, 0.5) kps1, des1 = points(img) rns = rotate_n_scale(img, M) kps2, des2 = points(rns) show_image("Original - keypoints", draw_points(img, kps1)) show_image('Rotated and scaled - keypoints', draw_points(rns, kps2)) # FLANN parameters FLANN_INDEX_KDTREE = 0 index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5) search_params = dict(checks=50) # or pass empty dictionary flann = cv2.FlannBasedMatcher(index_params,search_params) matches = flann.knnMatch(des1,des2,k=2) matches = map(lambda (m, n): m if m.distance < 0.7 * n.distance else n, matches) matches = sorted(matches, key=lambda val: val.distance) iM = cv2.invertAffineTransform(M) percetage = perfect_matches(iM, matches, kps1, kps2) print str(percetage) + "%" drawMatches(img, kps1, rns, kps2, matches) wait_exit()
def faceclone(src_name, dst_name): src_img = cv2.imread(src_name) dst_img = cv2.imread(dst_name) src_rst = api.detection.detect(img = File(src_name), attribute='pose') src_img_width = src_rst['img_width'] src_img_height = src_rst['img_height'] src_face = src_rst['face'][0] dst_rst = api.detection.detect(img = File(dst_name), attribute='pose') dst_img_width = dst_rst['img_width'] dst_img_height = dst_rst['img_height'] dst_face = dst_rst['face'][0] ss = np.array(get_feature_points(src_face, src_img_width, src_img_height), dtype=np.float32) ps = np.array(get_feature_points(dst_face, dst_img_width, dst_img_height), dtype=np.float32) map_matrix = cv2.getAffineTransform(ps, ss) #dsize = (300,300) map_result = cv2.warpAffine(dst_img, map_matrix, dsize=(src_img_width,src_img_height)) extract_mask, center = contour.extract_face_mask(src_face['face_id'], src_img_width, src_img_height, src_name) # merge ## first blending the border extract_alpha = contour.extract_face_alpha(src_face['face_id'], src_img_width, src_img_height, src_name) center = (map_result.shape[0]/2, map_result.shape[1]/2) map_result = cv2.seamlessClone(src_img, map_result, extract_mask, center, flags=cv2.NORMAL_CLONE) imap_matrix = cv2.invertAffineTransform(map_matrix) final = cv2.warpAffine(map_result, imap_matrix, dsize=(dst_img.shape[0:2])) return final
def affine_skew(tilt, phi, img, mask=None): ''' affine_skew(tilt, phi, img, mask=None) -> skew_img, skew_mask, Ai Ai - is an affine transform matrix from skew_img to img ''' h, w = img.shape[:2] if mask is None: mask = np.zeros((h, w), np.uint8) mask[:] = 255 A = np.float32([[1, 0, 0], [0, 1, 0]]) if phi != 0.0: phi = np.deg2rad(phi) s, c = np.sin(phi), np.cos(phi) A = np.float32([[c,-s], [ s, c]]) corners = [[0, 0], [w, 0], [w, h], [0, h]] tcorners = np.int32( np.dot(corners, A.T) ) x, y, w, h = cv2.boundingRect(tcorners.reshape(1,-1,2)) A = np.hstack([A, [[-x], [-y]]]) img = cv2.warpAffine(img, A, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE) if tilt != 1.0: s = 0.8*np.sqrt(tilt*tilt-1) img = cv2.GaussianBlur(img, (0, 0), sigmaX=s, sigmaY=0.01) img = cv2.resize(img, (0, 0), fx=1.0/tilt, fy=1.0, interpolation=cv2.INTER_NEAREST) A[0] /= tilt if phi != 0.0 or tilt != 1.0: h, w = img.shape[:2] mask = cv2.warpAffine(mask, A, (w, h), flags=cv2.INTER_NEAREST) Ai = cv2.invertAffineTransform(A) return img, mask, Ai
def three_e(affine, a_img, b_img): rand = np.zeros((a_img.shape[0], a_img.shape[1])) out_warp = cv2.invertAffineTransform(affine) warped_image = cv2.warpAffine(b_img.astype(np.uint8),out_warp, (b_img.shape[1], b_img.shape[0]), flags=cv2.INTER_LINEAR) write_image(warped_image, "ps5-3-e-1.png") merged = cv2.merge((rand.astype(np.uint8),warped_image.astype(np.uint8),a_img.astype(np.uint8))) write_image(merged, "ps5-3-e-2.png") return out_warp
def extract_box(img, box, padding_factor = 0.2): ''' we can search for whatever we want in the rotated bordered image, Any point found can be translated back to the original image by: 1. adding the origins of the bordered area, 2. rotating the point using the inverse rotation matrix ''' if box.angle != 0: b_w = max(img.shape)*2 b_h = b_w dx_center = b_w / 2 - box.center[0] dy_center = b_h / 2 - box.center[1] new_img = np.zeros((b_w, b_h, 3), dtype = img.dtype) new_img[dy_center:(dy_center + img.shape[0]), dx_center:(dx_center + img.shape[1]), :] = img box_in_big_image = box.points + np.c_[np.ones((4,1)) * dx_center, np.ones((4,1)) * dy_center] rot_mat = cv2.getRotationMatrix2D((b_w/2, b_h/2), box.angle, scale = 1.0) inv_rot_mat = cv2.invertAffineTransform(rot_mat) rot_image = cv2.warpAffine(new_img, rot_mat, (new_img.shape[1],new_img.shape[0]), flags=cv2.INTER_CUBIC) box_UL_in_rotated = (rot_mat * np.matrix([box_in_big_image[0,0], box_in_big_image[0,1], 1]).transpose()).transpose().tolist()[0] box_coords_in_rotated = np.matrix(np.c_[box_in_big_image, np.ones((4,1))]) * rot_mat.T box_coords_in_rotated = box_coords_in_rotated[0,:].tolist()[0] + [box.dx, box.dy] else: rot_mat = cv2.getRotationMatrix2D(box.center, box.angle, scale = 1.0) inv_rot_mat = cv2.invertAffineTransform(rot_mat) # for efficiency rot_image = img.copy() box_UL_in_rotated = (rot_mat * np.matrix([box.points[0,0], box.points[0,1], 1]).transpose()).transpose().tolist()[0] box_coords_in_rotated = box_UL_in_rotated + [box.dx, box.dy] img_with_border, Dx, Dy = extract_rect(rot_image, box_coords_in_rotated, padding_factor) box_coords_in_bordered = [Dx, Dy] + [box.dx, box.dy] border_UL_in_rotated = [box_UL_in_rotated[0]-Dx, box_UL_in_rotated[1]-Dy] return img_with_border, box_coords_in_bordered, border_UL_in_rotated, inv_rot_mat
def get_original_roi(self, mat, size, padding=0): """ Return the square aligned box location on the original image """ logger.trace("matrix: %s, size: %s. padding: %s", mat, size, padding) matrix = self.transform_matrix(mat, size, padding) points = np.array([[0, 0], [0, size - 1], [size - 1, size - 1], [size - 1, 0]], np.int32) points = points.reshape((-1, 1, 2)) matrix = cv2.invertAffineTransform(matrix) # pylint: disable=no-member logger.trace("Returning: (points: %s, matrix: %s", points, matrix) return cv2.transform(points, matrix) # pylint: disable=no-member
def detect(self, img, degree=DEFAULT_DEGREE, debug=False, min_size=150): """ Detect people in the image. :param debug: show each rotated image and press to continue :param img: source image :param degree: delta angle for rotations. :param min_size: minimum height in pixels for a person """ # Rotate image image_list = self.rotate_image(img, degree) detected_pols = [] # For each rotated image for image, rotation_matrix in image_list: # Run HOG detected_rectangles, w = self.hog.detectMultiScale(image, winStride=(8, 8), padding=(32, 32), scale=1.05) if debug: self.draw_detections(image, detected_rectangles) cv2.imshow("test", image) cv2.waitKey(0) # Inverted matrix inv_mat = cv2.invertAffineTransform(rotation_matrix) # For each detected person for x, y, w, h in detected_rectangles: # WARNING: size of the person is known a priori if w < min_size: continue # transform # transformed_point p1 = inv_mat.dot(np.array([x, y, 1])).tolist() p2 = inv_mat.dot(np.array([x + w, y, 1])).tolist() p3 = inv_mat.dot(np.array([x + w, y + h, 1])).tolist() p4 = inv_mat.dot(np.array([x, y + h, 1])).tolist() polygon = [p1, p2, p3, p4] # Add to the list detected_pols.append(polygon) return detected_pols
def affine_skew(tilt, phi, img, mask=None): """ Increase robustness to descriptors by calculating other invariant perspectives to image. :param tilt: tilting of image :param phi: rotation of image (in degrees) :param img: image to find Affine transforms :param mask: mask to detect keypoints (it uses default, mask[:] = 255) :return: skew_img, skew_mask, Ai (invert Affine Transform) Ai - is an affine transform matrix from skew_img to img """ h, w = img.shape[:2] # get 2D shape if mask is None: mask = np.zeros((h, w), np.uint8) mask[:] = 255 A = np.float32([[1, 0, 0], [0, 1, 0]]) # init Transformation matrix if phi != 0.0: # simulate rotation phi = np.deg2rad(phi) # convert degrees to radian s, c = np.sin(phi), np.cos(phi) # get sine, cosine components # build partial Transformation matrix A = np.float32([[c, -s], [s, c]]) corners = [[0, 0], [w, 0], [w, h], [0, h]] # use corners tcorners = np.int32(np.dot(corners, A.T)) # transform corners x, y, w, h = cv2.boundingRect( tcorners.reshape(1, -1, 2)) # get translations A = np.hstack([A, [[-x], [-y]]]) # finish Transformation matrix build img = cv2.warpAffine( img, A, (w, h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE) if tilt != 1.0: s = 0.8 * np.sqrt(tilt * tilt - 1) # get sigma # blur image with gaussian blur img = cv2.GaussianBlur(img, (0, 0), sigmaX=s, sigmaY=0.01) img = cv2.resize(img, (0, 0), fx=1.0 / tilt, fy=1.0, interpolation=cv2.INTER_NEAREST) # resize A[0] /= tilt if phi != 0.0 or tilt != 1.0: h, w = img.shape[:2] # get new 2D shape # also get mask transformation mask = cv2.warpAffine(mask, A, (w, h), flags=cv2.INTER_NEAREST) Ai = cv2.invertAffineTransform(A) return img, mask, Ai
return ret, M ctx_id = 4 img_path = '../deploy/Tom_Hanks_54745.png' img = cv2.imread(img_path) #img = np.zeros( (128,128,3), dtype=np.uint8 ) handler = Handler('./model/HG', 1, ctx_id) for _ in range(10): ta = datetime.datetime.now() landmark, M = handler.get(img) tb = datetime.datetime.now() print('get time cost', (tb - ta).total_seconds()) #visualize landmark IM = cv2.invertAffineTransform(M) for i in range(landmark.shape[0]): p = landmark[i] point = np.ones((3, ), dtype=np.float32) point[0:2] = p point = np.dot(IM, point) landmark[i] = point[0:2] for i in range(landmark.shape[0]): p = landmark[i] point = (int(p[0]), int(p[1])) cv2.circle(img, point, 1, (0, 255, 0), 2) filename = './landmark_test.png' print('writing', filename) cv2.imwrite(filename, img)
def transformPointsInverse(T,width,height): T=cv2.invertAffineTransform(T) return transformPointsForward(T,width,height)
def estimate(self, frame): """Estimate the warp parameters that best fit the given frame. Note that this implicitly assumes that frames are supplied in sequence. This method uses the previously solved for warp parameters of (presumably) the previous frame in the same sequence. """ # start with our previous parameter estimate p = self.p dP = np.zeros(6) dP_prev = dP + np.inf # precompute anything we can frame_gradient = [ cv2.Sobel(np.float32(frame), cv2.CV_32F, 1, 0, ksize=3), cv2.Sobel(np.float32(frame), cv2.CV_32F, 0, 1, ksize=3) ] # begin iteration until gradient descent converges count = 0 st = time.time() norm_hist = [] while np.linalg.norm(dP - dP_prev) > self.epsilon or count < self.min_count: # get representation of affine transform W = np.array([[1, 0, 0], [0, 1, 0]]) + p W = cv2.invertAffineTransform(W) # warp image with current parameter estimate I = cv2.warpAffine(cv2.warpAffine(frame, W, self.shape), self.H[:2], self.shape) # convert various entities to floating point I = np.float32(I) grad = np.float32(frame_gradient) # scale to match template frame brightness # note: this fails if our previous estimate is bad. # we're probably already in a dead-end, but might as well try. if I.mean() != 0: scale = self.template.mean() / I.mean() I *= scale # compute error image E = self.template - I I_grad = np.array([ cv2.warpAffine(cv2.warpAffine(grad[0], W, self.shape), self.H[:2], self.shape), cv2.warpAffine(cv2.warpAffine(grad[1], W, self.shape), self.H[:2], self.shape) ]) # calculate steepest descent matrix D1 = I_grad[0][..., np.newaxis] * self.J[:, :, 0, :] D2 = I_grad[1][..., np.newaxis] * self.J[:, :, 1, :] D = D1 + D2 # calculate huber loss matrix H_w = 0.5 * (E * E) H_w[abs(E) > self.sigma] = (self.sigma * abs(E) - 0.5 * self.sigma)[abs(E) > self.sigma] # calculate Hessian and remaining terms needed to solve for dP H = np.tensordot(D, H_w[..., np.newaxis] * D, axes=((0, 1), (0, 1))) O = (D * (H_w * E)[..., np.newaxis]).sum((0, 1)) # calculate parameter delta try: dP_prev = dP dP = np.linalg.inv(H) @ O except np.linalg.LinAlgError: return False, None # update parameter estimates p += dP.reshape(3, 2).T # update our counter and evaluate stop criterion norm_hist.append(np.linalg.norm(dP - dP_prev)) count += 1 if count % 100 == 0: # check if we need a bump if np.std(norm_hist[-50:]) < 0.05: p += np.random.random(p.shape) / 5 - 0.1 # stop after max_count iterations if count >= self.max_count: return False, p # we've converged: update current location estimate self.p = p return True, p
def process(args): # print "process\n",args img1 = args['img1'] idir = args['idir'] odir = args['odir'] dat = args['dat'] tim = args['tim'] filename = args['filename'] cmap = args['cmap'] iframe=int(filename[9:12]) #frame number [001,002,...] frame_name=os.path.join(idir,filename) #full path filename print 'process( %s )'% frame_name # create a mask that processes only the central region mask = zeros(img1.shape,dtype=uint8) mask[20:1040,140:1500] = 1 # mask for black border on saved image fmask = zeros(img1.shape,dtype=uint8) # black fmask[20:1050,90:1680] = 1 #y,x # create an OpenCV SURF (Speeded Up Robust Features) object hessianThreshold = 1200 img2 = read_frame(frame_name) # img2 = enhance_image(img2,cmap) detector = cv2.SURF(hessianThreshold) kp1, desc1 = detector.detectAndCompute(img1, mask) kp2, desc2 = detector.detectAndCompute(img2, mask) desc1.shape = (-1, detector.descriptorSize()) desc2.shape = (-1, detector.descriptorSize()) r_threshold = 0.75 m = match_bruteforce(desc1, desc2, r_threshold) u1 = np.array([kp1[i].pt for i, j in m]) u2 = np.array([kp2[j].pt for i, j in m]) H, status = cv2.findHomography(u1, u2, cv2.RANSAC, 1.0) M=H[0:2,:] # this affine matrix from findHomography is not as constrained as doing # least-squares to allow affine only, so only use the 'status' # from findHomography to indicate # "good pairs of points" to use in least-squares fit below ind=where(status)[0] M,p = affine_lsq(u1[ind],u2[ind]) # invert the affine matrix so we can register image 2 to image 1 Minv = cv2.invertAffineTransform(M) img2b = cv2.warpAffine(img2,Minv,np.shape(img2.T)) # transform successive frames rather than just the 1st frame # comment this out to just difference on the 1st frame #img1=img2b # fill in left side with grey from single pixel img2b[0:624,90:152] = img2b[625,152] img2b[624:,90:109] = img2b[625,152] # create PIL image # overlay registered image with black border to eliminate # annoying jumping of ragged border regions when animating im = Image.fromarray(img2b*fmask) im = enhance_im(im,cmap) # annotate frame with date and time print filename,dat,tim annotate_frame(im,iframe,dat,tim) plotpoints=False if plotpoints: plot_points(img1,img2,u1,u2,ind,p,iframe) im.save(('%s/%3.3d.png' % (odir,iframe)),'png')
# imgRotation2 = cv2.warpAffine(img, matRotation2, (widthNew, heightNew), borderValue=(255, 255, 255)) imgRotation2 = getWrapImage(img, matRotation2, widthNew, heightNew) return imgRotation, imgRotation2, matRotation def draw_box(img, box): cv2.line(img, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 3) cv2.line(img, (box[0], box[1]), (box[4], box[5]), (0, 255, 0), 3) cv2.line(img, (box[2], box[3]), (box[6], box[7]), (0, 255, 0), 3) cv2.line(img, (box[4], box[5]), (box[6], box[7]), (0, 255, 0), 3) return img image = cv2.imread('/Users/austinjing/Documents/Aye/RotateImage/test.jpg') imgRotation, imgRotation2, matRotation = dumpRotateImage(image, 15) box = [200, 250, 250, 200, 230, 280, 280, 230] reverseMatRotation = cv2.invertAffineTransform(matRotation) pt1 = np.dot(reverseMatRotation, np.array([[box[0]], [box[1]], [1]])) pt2 = np.dot(reverseMatRotation, np.array([[box[2]], [box[3]], [1]])) pt3 = np.dot(reverseMatRotation, np.array([[box[4]], [box[5]], [1]])) pt4 = np.dot(reverseMatRotation, np.array([[box[6]], [box[7]], [1]])) #print(pt1, pt2, pt3, pt4) box2 = [pt1[0], pt1[1], pt2[0], pt2[1], pt3[0], pt3[1], pt4[0], pt4[1]] cv2.imwrite('/Users/austinjing/Documents/Aye/RotateImage/drawBox.jpg', draw_box(imgRotation, box)) cv2.imwrite('/Users/austinjing/Documents/Aye/RotateImage/raw.png', draw_box(image, box2))
def get(self, img): out = [] out_lands = [] limit = 512 det_scale = 1.0 if min(img.shape[0:2]) > limit: det_scale = float(limit) / min(img.shape[0:2]) bboxes, landmarks = self.detector.detect(img, scale=det_scale) if bboxes.shape[0] == 0: return out for fi in range(bboxes.shape[0]): bbox = bboxes[fi] landmark = landmarks[fi] input_blob = np.zeros((self.aug, 3) + self.image_size, dtype=np.uint8) M_list = [] # ta = datetime.datetime.now() for retry in range(self.aug): #found = False #for _ in range(10): # diff = np.random.rand(5,2)*2.0-1.0 # #diff *= self.aug_value # av = min(self.aug_value, (retry//2)) # diff *= av # pts5 = landmark+diff # if pts5[0][0]<pts5[1][0] and pts5[3][0]<pts5[4][0]: # found = True # break #if not found: # pts5 = landmark #diff = np.clip(diff, max_diff*-1, max_diff) #M = estimate_trans(pts5, self.image_size[0], s = 0.7) #rimg = cv2.warpAffine(img, M, self.image_size, borderValue = 0.0) w, h = (bbox[2] - bbox[0]), (bbox[3] - bbox[1]) center = (bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2 rotate = 0 _scale = 128.0 / max(w, h) rimg, M = img_helper.transform(img, center, self.image_size[0], _scale, rotate) #cv2.imwrite('./vis/rimg.jpg', rimg) #if retry%2==1: # rimg = rimg[:,::-1,:] rimg = cv2.cvtColor(rimg, cv2.COLOR_BGR2RGB) rimg = np.transpose(rimg, (2, 0, 1)) #3*112*112, RGB input_blob[retry] = rimg M_list.append(M) data = mx.nd.array(input_blob) db = mx.io.DataBatch(data=(data, )) for model in self.models: model.forward(db, is_train=False) X = None for model in self.models: #model.forward(db, is_train=False) x = model.get_outputs()[-1].asnumpy() if X is None: X = x else: X += x X /= len(self.models) #print(X.shape) if X.shape[1] >= 3000: X = X.reshape((X.shape[0], -1, 3)) else: X = X.reshape((X.shape[0], -1, 2)) #print(X.shape) X[:, :, 0:2] += 1 X[:, :, 0:2] *= (self.image_size[0] // 2) if X.shape[2] == 3: X[:, :, 2] *= (self.image_size[0] // 2) #X *= self.image_size[0] for i in range(X.shape[0]): M = M_list[i] IM = cv2.invertAffineTransform(M) x = X[i] x = img_helper.trans_points(x, IM) X[i] = x ret = np.mean(X, axis=0) # tb = datetime.datetime.now() #print('module time cost', (tb-ta).total_seconds()) out.append(ret) out_lands.append(landmark) return out, out_lands
def stabilise_image(thermogram, frames_to_process=-1, start_frame=-1, global_motion=False): ''' Stabilises an input video as a 3D numpy array. Can use global motion to maintain a single dimension of movement; useful for implementing DPPT. Expects a thermogram as a 3D numpy multdimensional array where each dimension is: [frame, row, column]. Frames_to_process sets the numbers of frames to use in FFT analysis, uses all frames by default. Frame_start is the first frame to process. Return_phase controls what phase will be returned, 0 is always a blank map; should almost always be 1. Global_motion indicates whether global motion should be used Returns an array in the format: [frame_index, row, column] ''' #Store original frames_to_process, as it is changed later in the code _frames_to_process = frames_to_process #CV requires frames to be 8 bit map any numpy array to 8bit framesU8 = file_io_thermal._convert_to_u_int8(thermogram) #read the shape of the images height, width = thermogram[0].shape #set a transform matrix transformed_frames = [np.identity(3)] #create a global motion matrix if specified if global_motion: #create a translation matrix, this will remove everything except the translation of the t'form translation_matrix = np.array([[0, 0, 1], [0, 0, 1]]) #initialise global transform as nonetype global_transform = None while (global_transform is None): #translation and frames_to_process arrays which will be applied to get the global translation per frame frames_to_process_matrix = np.array( [[1, frames_to_process, frames_to_process], [frames_to_process, 1, frames_to_process]]) #used to find global motion, will instead take frame sizes #find global transformation #find points (good Features) on the final and current frames and map them global_previous_points = cv2.goodFeaturesToTrack( framesU8[frames_to_process - 1], 100, 0.1, 1) global_current_points, global_status, _ = cv2.calcOpticalFlowPyrLK( framesU8[0], framesU8[frames_to_process - 1], global_previous_points, np.array([])) global_previous_points, global_current_points = map( lambda corners: corners[global_status.ravel().astype(bool)], [global_previous_points, global_current_points]) global_transform = cv2.estimateRigidTransform( global_previous_points, global_current_points, False) #if global transform does not occur, take the 10 off the frames_to_process frames_to_process = int( round(frames_to_process - (frames_to_process * 0.1))) #if global transform can be found, create global transform matrix and reset frames_to_process frames_to_process_transform_matrix = np.divide( global_transform, frames_to_process_matrix) global_transform_matrix = np.multiply( frames_to_process_transform_matrix, translation_matrix) frames_to_process = _frames_to_process #initialise frame 1 i = start_frame + 1 #+1 because tracking must start from the second frame while i < (start_frame + frames_to_process): #get current frame (uint8) current_frame = framesU8[i] #find points (good Features) on the previous and current frames and map them previous_points = cv2.goodFeaturesToTrack(current_frame, 100, 0.1, 1) current_points, status, _ = cv2.calcOpticalFlowPyrLK( framesU8[i - 1], current_frame, previous_points, np.array([])) previous_points, current_points = map( lambda corners: corners[status.ravel().astype(bool)], [previous_points, current_points]) transform = cv2.estimateRigidTransform(previous_points, current_points, False) if global_motion: #take out the global transformation from the current transform transform = np.subtract(transform, global_transform_matrix) #append the transform onto the transforms. if transform is not None: transform = np.append(transform, [[0, 0, 1]], axis=0) if transform is None: transform = transformed_frames[-1] transformed_frames.append(transform) i = i + 1 #create a stabilised frames array and apply the transform to each frame in the image. stabilised_frames = [] final_transform = np.identity(3) thermogram = thermogram[start_frame:start_frame + frames_to_process, :, :] for frame, transform, index in zip(thermogram, transformed_frames, range(len(thermogram))): transform = transform.dot(final_transform) if index % frames_to_process == 0: transform = np.identity(3) final_transform = transform inverse_transform = cv2.invertAffineTransform(transform[:2]) stabilised_frames.append( cv2.warpAffine(frame, inverse_transform, (width, height))) stabilisedFrames = np.stack(stabilised_frames, axis=0) #return the new stabilised image return stabilisedFrames
def align(self, T, I, rect, p, dp0=np.zeros(6), threshold=0.001, iterations=50): cap = get_frames(self.vid) T_rows, T_cols = T.shape I_rows, I_cols = I.shape dp = dp0 for i in range(iterations): # Forward warp matrix from frame_t to frame_t+1 W = np.float32([[1 + p[0], p[2], p[4]], [p[1], 1 + p[3], p[5]]]) # Warp image from frame_t+1 to frame_t and crop it I_warped = cv2.warpAffine(I, cv2.invertAffineTransform(W), (I_cols, I_rows)) I_warped = cap.crop_im(I_warped, rect) # Image gradients temp = cv2.Sobel(I_warped, cv2.CV_32F, 1, 0, ksize=3) print(temp) print(temp.shape) dI_x = temp.flatten() dI_y = cv2.Sobel(I_warped, cv2.CV_32F, 0, 1, ksize=3).flatten() A = np.zeros(6).reshape(1, 6) for y in range(T_rows): for x in range(T_cols): dW = np.array([[x, 0, y, 0, 1, 0], [0, x, 0, y, 0, 1]]) dI = np.array([dI_x[x * y], dI_y[x * y]]).reshape(1, 2) A = np.vstack((A, np.matmul(dI, dW).reshape(1, 6))) # print(np.shape(A)) # Steepest descent A = np.sum(A, axis=0).reshape(1, 6) # Hessian # print(np.shape(A)) H = np.matmul(A.T, A) w, v = np.linalg.eig(H) # print(w) # Error image err_im = (T - I_warped).flatten() err_im = np.reshape(err_im, (1, len(err_im))) del_p = np.sum(np.matmul(np.linalg.pinv(H), np.matmul(A.T, err_im)), axis=1) # Test for convergence and exit if np.linalg.norm(del_p) <= threshold: break # Update the parameters p = p + del_p return p
def detect(self,frame): rects=[] acount=0 dx=30 angle=self.prev_angle maxtimes=360/dx+1 times=0 angle=self.prev_angle img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) rimg = cv2.resize(img,None,fx=self.scale, fy=self.scale, interpolation = cv2.INTER_LINEAR) while len(rects)==0 and acount<maxtimes: rows,cols = rimg.shape times=times+1 M = cv2.getRotationMatrix2D((cols/2,rows/2),angle,1) imgw = cv2.warpAffine(rimg,M,(cols,rows)) rects = self.cascade.detectMultiScale(imgw, scaleFactor=self.scaleFactor, minNeighbors=4, minSize=self.minSize, flags = 2) acount=acount+1 sign=pow(-1,acount) self.prev_angle=angle angle=angle+(sign*acount*dx) angle=angle%360 if len(rects) == 0: return None #print('rect=2',rects) re_rect=rects rects[:,2:] += rects[:,:2] points=[] try: x1, y1, x2, y2 =rects[0] height=x2-x1 width=y2-y1 points.append((x1,y1)) points.append((x1,y1+width)) points.append((x2,y2)) points.append((x2,y2-width)) except: pass self.prev_points=points npoints=None if len(points)==4: c=np.array(points) iM=cv2.invertAffineTransform(M) extra=np.array([0.0,0.0,1.0]) iM=np.vstack((iM,extra)) cc=np.array([c],dtype='float') conv=cv2.perspectiveTransform(cc,iM) npoints=[] for vv in conv[0]: npoints.append((int(vv[0]/self.scale),int(vv[1]/self.scale))) print('whole card angel:',angle) if(angle !=''): return npoints,re_rect,angle else: angle=0 return npoints,re_rect,angle
def rotate_landmarks(face, rotation_matrix): """ Rotate the landmarks and bounding box for faces found in rotated images. Pass in a DetectedFace object, Alignments dict or DLib rectangle""" logger.trace("Rotating landmarks: (rotation_matrix: %s, type(face): %s", rotation_matrix, type(face)) if isinstance(face, DetectedFace): bounding_box = [[face.x, face.y], [face.x + face.w, face.y], [face.x + face.w, face.y + face.h], [face.x, face.y + face.h]] landmarks = face.landmarksXY elif isinstance(face, dict): bounding_box = [[face.get("x", 0), face.get("y", 0)], [face.get("x", 0) + face.get("w", 0), face.get("y", 0)], [face.get("x", 0) + face.get("w", 0), face.get("y", 0) + face.get("h", 0)], [face.get("x", 0), face.get("y", 0) + face.get("h", 0)]] landmarks = face.get("landmarksXY", list()) elif isinstance(face, dlib.rectangle): # pylint: disable=c-extension-no-member bounding_box = [[face.left(), face.top()], [face.right(), face.top()], [face.right(), face.bottom()], [face.left(), face.bottom()]] landmarks = list() else: raise ValueError("Unsupported face type") logger.trace("Original landmarks: %s", landmarks) rotation_matrix = cv2.invertAffineTransform( # pylint: disable=no-member rotation_matrix) rotated = list() for item in (bounding_box, landmarks): if not item: continue points = np.array(item, np.int32) points = np.expand_dims(points, axis=0) transformed = cv2.transform(points, # pylint: disable=no-member rotation_matrix).astype(np.int32) rotated.append(transformed.squeeze()) # Bounding box should follow x, y planes, so get min/max # for non-90 degree rotations pt_x = min([pnt[0] for pnt in rotated[0]]) pt_y = min([pnt[1] for pnt in rotated[0]]) pt_x1 = max([pnt[0] for pnt in rotated[0]]) pt_y1 = max([pnt[1] for pnt in rotated[0]]) if isinstance(face, DetectedFace): face.x = int(pt_x) face.y = int(pt_y) face.w = int(pt_x1 - pt_x) face.h = int(pt_y1 - pt_y) face.r = 0 if len(rotated) > 1: rotated_landmarks = [tuple(point) for point in rotated[1].tolist()] face.landmarksXY = rotated_landmarks elif isinstance(face, dict): face["x"] = int(pt_x) face["y"] = int(pt_y) face["w"] = int(pt_x1 - pt_x) face["h"] = int(pt_y1 - pt_y) face["r"] = 0 if len(rotated) > 1: rotated_landmarks = [tuple(point) for point in rotated[1].tolist()] face["landmarksXY"] = rotated_landmarks else: rotated_landmarks = dlib.rectangle( # pylint: disable=c-extension-no-member int(pt_x), int(pt_y), int(pt_x1), int(pt_y1)) face = rotated_landmarks logger.trace("Rotated landmarks: %s", rotated_landmarks) return face
def rotate_landmarks(face, rotation_matrix): # pylint:disable=c-extension-no-member """ Rotate the landmarks and bounding box for faces found in rotated images. Pass in a DetectedFace object, Alignments dict or bounding box dict (as defined in lib/plugins/extract/detect/_base.py) """ logger = logging.getLogger(__name__) # pylint:disable=invalid-name logger.trace("Rotating landmarks: (rotation_matrix: %s, type(face): %s", rotation_matrix, type(face)) # Detected Face Object if isinstance(face, DetectedFace): bounding_box = [[face.x, face.y], [face.x + face.w, face.y], [face.x + face.w, face.y + face.h], [face.x, face.y + face.h]] landmarks = face.landmarksXY # Alignments Dict elif isinstance(face, dict) and "x" in face: bounding_box = [ [face.get("x", 0), face.get("y", 0)], [face.get("x", 0) + face.get("w", 0), face.get("y", 0)], [ face.get("x", 0) + face.get("w", 0), face.get("y", 0) + face.get("h", 0) ], [face.get("x", 0), face.get("y", 0) + face.get("h", 0)] ] landmarks = face.get("landmarksXY", list()) # Bounding Box Dict elif isinstance(face, dict) and "left" in face: bounding_box = [[face["left"], face["top"]], [face["right"], face["top"]], [face["right"], face["bottom"]], [face["left"], face["bottom"]]] landmarks = list() else: raise ValueError("Unsupported face type") logger.trace("Original landmarks: %s", landmarks) rotation_matrix = cv2.invertAffineTransform( # pylint:disable=no-member rotation_matrix) rotated = list() for item in (bounding_box, landmarks): if not item: continue points = np.array(item, np.int32) points = np.expand_dims(points, axis=0) transformed = cv2.transform( points, # pylint:disable=no-member rotation_matrix).astype(np.int32) rotated.append(transformed.squeeze()) # Bounding box should follow x, y planes, so get min/max # for non-90 degree rotations pt_x = min([pnt[0] for pnt in rotated[0]]) pt_y = min([pnt[1] for pnt in rotated[0]]) pt_x1 = max([pnt[0] for pnt in rotated[0]]) pt_y1 = max([pnt[1] for pnt in rotated[0]]) width = pt_x1 - pt_x height = pt_y1 - pt_y if isinstance(face, DetectedFace): face.x = int(pt_x) face.y = int(pt_y) face.w = int(width) face.h = int(height) face.r = 0 if len(rotated) > 1: rotated_landmarks = [tuple(point) for point in rotated[1].tolist()] face.landmarksXY = rotated_landmarks elif isinstance(face, dict) and "x" in face: face["x"] = int(pt_x) face["y"] = int(pt_y) face["w"] = int(width) face["h"] = int(height) face["r"] = 0 if len(rotated) > 1: rotated_landmarks = [tuple(point) for point in rotated[1].tolist()] face["landmarksXY"] = rotated_landmarks else: face["left"] = int(pt_x) face["top"] = int(pt_y) face["right"] = int(pt_x1) face["bottom"] = int(pt_y1) logger.trace("Rotated landmarks: %s", rotated_landmarks) return face
def face_swap(orig_image, down_scale, index): # extract face from original facelist = extract_faces(orig_image, 256) if len(facelist) > 0: print("saving this frame: " + str(index)) path = './images' # only write frame into file if found faces. cv2.imwrite(os.path.join(path, 'ori' + str(index) + '.jpg'), orig_image) result_image = orig_image # iterate through all detected faces for (face, resized_image) in enumerate(facelist): range_ = numpy.linspace(128 - 80, 128 + 80, 5) mapx = numpy.broadcast_to(range_, (5, 5)) mapy = mapx.T # warp image like in the training mapx = mapx + numpy.random.normal(size=(5, 5), scale=5) mapy = mapy + numpy.random.normal(size=(5, 5), scale=5) src_points = numpy.stack([mapx.ravel(), mapy.ravel()], axis=-1) dst_points = numpy.mgrid[0:65:16, 0:65:16].T.reshape(-1, 2) mat = umeyama(src_points, dst_points, True)[0:2] # resized_image = np.float32(resized_image) # print(resized_image[0].shape) warped_resized_image = cv2.warpAffine(resized_image[0], mat, (64, 64)) / 255.0 test_images = numpy.empty((1, ) + warped_resized_image.shape) test_images[0] = warped_resized_image # predict faceswap using encoder A # figure = autoencoder_A.predict(test_images) cv2.imwrite('./images/ori-image' + str(index), warped_resized_image) # print(warped_resized_image.shape) out = model.forward(warped_resized_image, select='B') new_face = numpy.clip(out.detach().cpu().numpy()[0] * 255.0, 0, 255).astype('uint8') cv2.imwrite('./images/new-image' + str(index), new_face) mat_inv = umeyama(dst_points, src_points, True)[0:2] # insert face into extracted face dest_face = blend_warp(new_face, resized_image, mat_inv) # create an inverse affine transform matrix to insert extracted face again mat = get_align_mat(face) mat = mat * (256 - 2 * 48) mat[:, 2] += 48 mat_inv = cv2.invertAffineTransform(mat) # insert new face into original image result_image = blend_warp(dest_face, result_image, mat_inv) # return resulting image after downscale return cv2.resize(result_image, (result_image.shape[1] // down_scale, result_image.shape[0] // down_scale))
def align(self, image, gray, rect, z_addition): # convert the landmark (x, y)-coordinates to a NumPy h1, w1 = image.shape[:2] shape = self.predictor(gray, rect) shape = shape_to_np(shape) #simple hack ;) if (len(shape)==68): # extract the left and right eye (x, y)-coordinates (lStart, lEnd) = FACIAL_LANDMARKS_68_IDXS["left_eye"] (rStart, rEnd) = FACIAL_LANDMARKS_68_IDXS["right_eye"] else: (lStart, lEnd) = FACIAL_LANDMARKS_5_IDXS["left_eye"] (rStart, rEnd) = FACIAL_LANDMARKS_5_IDXS["right_eye"] leftEyePts = shape[lStart:lEnd] rightEyePts = shape[rStart:rEnd] # compute the center of mass for each eye leftEyeCenter = leftEyePts.mean(axis=0).astype("int") rightEyeCenter = rightEyePts.mean(axis=0).astype("int") # compute the angle between the eye centroids dY = rightEyeCenter[1] - leftEyeCenter[1] dX = rightEyeCenter[0] - leftEyeCenter[0] angle = np.degrees(np.arctan2(dY, dX)) - 180 # compute the desired right eye x-coordinate based on the # desired x-coordinate of the left eye desiredRightEyeX = 1.0 - self.desiredLeftEye[0] # determine the scale of the new resulting image by taking # the ratio of the distance between eyes in the *current* # image to the ratio of distance between eyes in the # *desired* image dist = np.sqrt((dX ** 2) + (dY ** 2)) desiredDist = (desiredRightEyeX - self.desiredLeftEye[0]) desiredDist *= self.desiredFaceWidth scale = desiredDist / dist # compute center (x, y)-coordinates (i.e., the median point) # between the two eyes in the input image eyesCenter = ((leftEyeCenter[0] + rightEyeCenter[0]) // 2, (leftEyeCenter[1] + rightEyeCenter[1]) // 2) # grab the rotation matrix for rotating and scaling the face M = cv2.getRotationMatrix2D(eyesCenter, angle, scale) # update the translation component of the matrix tX = self.desiredFaceWidth * 0.5 tY = self.desiredFaceHeight * self.desiredLeftEye[1] M[0, 2] += (tX - eyesCenter[0]) M[1, 2] += (tY - eyesCenter[1]) # apply the affine transformation (w, h) = (self.desiredFaceWidth, self.desiredFaceHeight) output = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC) # invert the previous affine transformation for later Mi = cv2.invertAffineTransform(M) # BGR -> RGB output = output[:,:,::-1] # encode with GLOW, do operations on z z = encode(output) z[0] += z_addition # decode back to image and back to BGR output = decode(z)[0] output = output[:,:,::-1] # invert the affine transformation on output output = cv2.warpAffine(output, Mi, (w1, h1), flags=cv2.INTER_CUBIC) # overwrite original image with masked output mask = np.sum(output, axis=2) == 0.0 image = np.multiply(mask.reshape((h1, w1, 1)), image) image += output return image
#for x in xrange(src_img_width): # for y in xrange(src_img_height): # alpha = extract_alpha[y][x] # map_result[y][x][0] = (1-alpha) * map_result[y][x][0] + (alpha) * src_img[y][x][0] # map_result[y][x][1] = (1-alpha) * map_result[y][x][1] + (alpha) * src_img[y][x][1] # map_result[y][x][2] = (1-alpha) * map_result[y][x][2] + (alpha) * src_img[y][x][2] #cv2.imshow('map result', map_result) #center = src_face['position']['nose'] #x = center['x'] * src_img_width / 100 #y = center['y'] * src_img_height / 100 #center = (int(x), int(y)) center = (map_result.shape[0] / 2, map_result.shape[1] / 2) map_result = cv2.seamlessClone(src_img, map_result, extract_mask, center, flags=cv2.NORMAL_CLONE) cv2.imshow('merge', map_result) imap_matrix = cv2.invertAffineTransform(map_matrix) print map_result.shape print imap_matrix final = cv2.warpAffine(map_result, imap_matrix, dsize=(dst_img.shape[0:2])) cv2.imshow('final.png', final) cv2.imwrite(src_name + dst_name + 'final.png', final) cv2.waitKey(0)
ss = np.array(get_feature_points(src_face, src_img_width, src_img_height), dtype=np.float32) ps = np.array(get_feature_points(dst_face, dst_img_width, dst_img_height), dtype=np.float32) print ps print ss map_matrix = cv2.getAffineTransform(ps, ss) print map_matrix #dsize = (300,300) map_result = cv2.warpAffine(dst_img, map_matrix, dsize=(300,300)) extract_face = contour.extract_face(src_face['face_id'], src_img_width, src_img_height, src_name) cv2.imshow('extract source image', extract_face) # merge for x in xrange(src_img_width): for y in xrange(src_img_height): if sum(extract_face[y][x]) == 0: continue else: # here we need to change the light of extract face map_result[y][x] = extract_face[y][x] cv2.imshow('merge', map_result) imap_matrix = cv2.invertAffineTransform(map_matrix) print map_result.shape print imap_matrix final = cv2.warpAffine(map_result, imap_matrix, dsize=(dst_img.shape[0:2])) cv2.imwrite('final.png', final) cv2.waitKey(0)
def findRotMaxRect(data_in, flag_opt=False, flag_parallel=False, nbre_angle=10, flag_out=None, flag_enlarge_img=False, limit_image_size=300): ''' flag_opt : True only nbre_angle are tested between 90 and 180 and a opt descent algo is run on the best fit False 100 angle are tested from 90 to 180. flag_parallel: only valid when flag_opt=False. the 100 angle are run on multithreading flag_out : angle and rectangle of the rotated image are output together with the rectangle of the original image flag_enlarge_img : the image used in the function is double of the size of the original to ensure all feature stay in when rotated limit_image_size : control the size numbre of pixel of the image use in the function. this speeds up the code but can give approximated results if the shape is not simple ''' #time_s = datetime.datetime.now() #make the image square #---------------- nx_in, ny_in = data_in.shape if nx_in != ny_in: n = max([nx_in, ny_in]) data_square = np.ones([n, n]) xshift = (n - nx_in) / 2 yshift = (n - ny_in) / 2 if yshift == 0: data_square[xshift:(xshift + nx_in), :] = data_in[:, :] else: data_square[:, yshift:(yshift + ny_in)] = data_in[:, :] else: xshift = 0 yshift = 0 data_square = data_in #apply scale factor if image bigger than limit_image_size #---------------- if data_square.shape[0] > limit_image_size: data_small = cv2.resize(data_square, (limit_image_size, limit_image_size), interpolation=0) scale_factor = 1. * data_square.shape[0] / data_small.shape[0] else: data_small = data_square scale_factor = 1 # set the input data with an odd number of point in each dimension to make rotation easier #---------------- nx, ny = data_small.shape nx_extra = -nx ny_extra = -ny if nx % 2 == 0: nx += 1 nx_extra = 1 if ny % 2 == 0: ny += 1 ny_extra = 1 data_odd = np.ones([ data_small.shape[0] + max([0, nx_extra]), data_small.shape[1] + max([0, ny_extra]) ]) data_odd[:-nx_extra, :-ny_extra] = data_small nx, ny = data_odd.shape nx_odd, ny_odd = data_odd.shape if flag_enlarge_img: data = np.zeros([2 * data_odd.shape[0] + 1, 2 * data_odd.shape[1] + 1 ]) + 1 nx, ny = data.shape data[nx / 2 - nx_odd / 2:nx / 2 + nx_odd / 2, ny / 2 - ny_odd / 2:ny / 2 + ny_odd / 2] = data_odd else: data = np.copy(data_odd) nx, ny = data.shape #print (datetime.datetime.now()-time_s).total_seconds() if flag_opt: myranges_brute = ([ (90., 180.), ]) coeff0 = np.array([ 0., ]) coeff1 = optimize.brute(residual, myranges_brute, args=(data, ), Ns=nbre_angle, finish=None) popt = optimize.fmin(residual, coeff1, args=(data, ), xtol=5, ftol=1.e-5, disp=False) angle_selected = popt[0] #rotation_angle = np.linspace(0,360,100+1)[:-1] #mm = [residual(aa,data) for aa in rotation_angle] #plt.plot(rotation_angle,mm) #plt.show() #pdb.set_trace() else: rotation_angle = np.linspace(90, 180, 100 + 1)[:-1] args_here = [] for angle in rotation_angle: args_here.append([angle, data]) if flag_parallel: # set up a pool to run the parallel processing cpus = multiprocessing.cpu_count() pool = multiprocessing.Pool(processes=cpus) # then the map method of pool actually does the parallelisation results = pool.map(residual_star, args_here) pool.close() pool.join() else: results = [] for arg in args_here: results.append(residual_star(arg)) argmin = np.array(results).argmin() angle_selected = args_here[argmin][0] rectangle, M_rect_max, RotData = get_rectangle_coord(angle_selected, data, flag_out=True) #rectangle, M_rect_max = get_rectangle_coord(angle_selected,data) #print (datetime.datetime.now()-time_s).total_seconds() #invert rectangle M_invert = cv2.invertAffineTransform(M_rect_max) rect_coord = [ rectangle[:2], [rectangle[0], rectangle[3]], rectangle[2:], [rectangle[2], rectangle[1]] ] #ax = plt.subplot(111) #ax.imshow(RotData.T,origin='lower',interpolation='nearest') #patch = patches.Polygon(rect_coord, edgecolor='k', facecolor='None', linewidth=2) #ax.add_patch(patch) #plt.show() rect_coord_ori = [] for coord in rect_coord: rect_coord_ori.append( np.dot(M_invert, [coord[0], (ny - 1) - coord[1], 1])) #transform to numpy coord of input image coord_out = [] for coord in rect_coord_ori: coord_out.append( [ scale_factor*round( coord[0]-(nx/2-nx_odd/2),0)-xshift,\ scale_factor*round((ny-1)-coord[1]-(ny/2-ny_odd/2),0)-yshift]) coord_out_rot = [] coord_out_rot_h = [] for coord in rect_coord: coord_out_rot.append( [ scale_factor*round( coord[0]-(nx/2-nx_odd/2),0)-xshift, \ scale_factor*round( coord[1]-(ny/2-ny_odd/2),0)-yshift ]) coord_out_rot_h.append( [ scale_factor*round( coord[0]-(nx/2-nx_odd/2),0), \ scale_factor*round( coord[1]-(ny/2-ny_odd/2),0) ]) #M = cv2.getRotationMatrix2D( ( (data_square.shape[0]-1)/2, (data_square.shape[1]-1)/2 ), angle_selected,1) #RotData = cv2.warpAffine(data_square,M,data_square.shape,flags=cv2.INTER_NEAREST,borderValue=1) #ax = plt.subplot(121) #ax.imshow(data_square.T,origin='lower',interpolation='nearest') #ax = plt.subplot(122) #ax.imshow(RotData.T,origin='lower',interpolation='nearest') #patch = patches.Polygon(coord_out_rot_h, edgecolor='k', facecolor='None', linewidth=2) #ax.add_patch(patch) #plt.show() #coord for data_in #---------------- #print scale_factor, xshift, yshift #coord_out2 = [] #for coord in coord_out: # coord_out2.append([int(np.round(scale_factor*coord[0]-xshift,0)),int(np.round(scale_factor*coord[1]-yshift,0))]) #print (datetime.datetime.now()-time_s).total_seconds() if flag_out is None: return coord_out elif flag_out == 'rotation': return coord_out, angle_selected, coord_out_rot else: print 'bad def in findRotMaxRect input. stop' pdb.set_trace()
def apply_new_face(self, image, new_face, image_mask, mat, image_size, size): base_image = numpy.copy( image ) new_image = numpy.copy( image ) cv2.warpAffine( new_face, mat, image_size, new_image, cv2.WARP_INVERSE_MAP, cv2.BORDER_TRANSPARENT ) outImage = None if self.seamless_clone: masky,maskx = cv2.transform( numpy.array([ size/2,size/2 ]).reshape(1,1,2) ,cv2.invertAffineTransform(mat) ).reshape(2).astype(int) outimage = cv2.seamlessClone(new_image.astype(numpy.uint8),base_image.astype(numpy.uint8),(image_mask*255).astype(numpy.uint8),(masky,maskx) , cv2.NORMAL_CLONE ) else: foreground = cv2.multiply(image_mask, new_image.astype(float)) background = cv2.multiply(1.0 - image_mask, base_image.astype(float)) outimage = cv2.add(foreground, background) return outimage
def align_ecc(img, img_ref, method='ecc', mode='affine', coords=None, rescale=False, use_gradient=True): try: import cv2 except ModuleNotFoundError: print('It seems OpenCV is not install. Please do so by running:' 'pip install opencv-python-headless') if rescale: img0 = rescale_intensity(img_ref, in_range='image', out_range='float32').astype('float32') img1 = rescale_intensity(img, in_range='image', out_range='float32').astype('float32') else: img0 = img_ref.astype('float32') img1 = img.astype('float32') if use_gradient: def get_gradient(im): # Calculate the x and y gradients using Sobel operator grad_x = cv2.Sobel(im, cv2.CV_32F, 1, 0, ksize=1) grad_y = cv2.Sobel(im, cv2.CV_32F, 0, 1, ksize=1) # Combine the two gradients grad = cv2.addWeighted(np.absolute(grad_x), 0.5, np.absolute(grad_y), 0.5, 0) return grad img0 = get_gradient(img0) img1 = get_gradient(img1) shift = register_translation(img0, img1, 10) print('Found init shift: {}'.format(shift[0])) warp_matrix = np.eye(2, 3, dtype=np.float32) warp_matrix[:, 2] = -shift[0][::-1] # warp_matrix[:,2] = -shift[0] number_of_iterations = 1000000 termination_eps = 1e-6 ecc_mode = { 'affine': cv2.MOTION_AFFINE, 'translation': cv2.MOTION_TRANSLATION } criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps) # print(warp_matrix) (cc, warp_matrix) = cv2.findTransformECC(img0, img1, warp_matrix, ecc_mode[mode], criteria) # print(warp_matrix) img_x = cv2.warpAffine(img1, cv2.invertAffineTransform(warp_matrix), img1.shape) imgref_x = cv2.warpAffine(img0, warp_matrix, img0.shape) # make a scikit-image/ndimage-compatible output transform matrix (y and x flipped!) trans_matrix = np.vstack((np.hstack( (np.rot90(warp_matrix[:2, :2], 2), np.flipud(warp_matrix[:, 2:]))), [0, 0, 1])) if coords is not None: coords_x = matrix_transform(coords, trans_matrix) else: coords_x = [] return trans_matrix, coords_x, img_x, imgref_x
def compare(self, pattern, images, settings): frame_height, frame_width = images[COLOR_IMAGE].shape[:2] frame_mask = np.zeros((frame_height, frame_width), np.uint8) pattern_height, pattern_width = pattern.image.shape[0:2] selected_split = settings.selected_split() points = selected_split.split(images) if selected_split else [ (0, frame_height, 0, frame_width) ] for y1, y2, x1, x2 in points: pcb_gray = np.array(images[GRAY_IMAGE][y1:y2, x1:x2]) pcb_bin = np.array(images[BIN_IMAGE][y1:y2, x1:x2]) pcb_height, pcb_width = pcb_gray.shape[0:2] pcb_keypoints, pcb_descriptors = self.extract_key_points_and_descriptors( pcb_gray) matches = self.extract_matches(pattern.descriptors, pcb_descriptors) #img3 = cv2.drawMatches(pattern.image,pattern.keypoints,pcb_bin,pcb_keypoints,matches, flags=2, outImg = None) #plt.imshow(img3), plt.show() pattern_points = np.float32([ pattern.keypoints[m.queryIdx].pt for m in matches ]).reshape(-1, 2) pcb_points = np.float32([ pcb_keypoints[m.trainIdx].pt for m in matches ]).reshape(-1, 2) M_pcb_translate = np.float32([[1, 0, x1], [0, 1, y1]]) M_pcb_to_pattern = self.ransac(pcb_points, pattern_points, iters=1000, maxerror=2) M_pattern_to_pcb = cv2.invertAffineTransform(M_pcb_to_pattern) M_xor_to_frame = self.add_affine_transform(M_pattern_to_pcb, M_pcb_translate) transformed_pcb = cv2.warpAffine(pcb_bin, M_pcb_to_pattern, (pattern_width, pattern_height)) _, transformed_pcb = cv2.threshold(transformed_pcb, 127, 255, cv2.THRESH_BINARY) #plt.subplot(1,3,1) #plt.title("Orig"), plt.imshow(pcb_bin, 'gray', interpolation='none') #plt.subplot(1,3,2) #plt.title("transformed_pcb"), plt.imshow(transformed_pcb, 'gray', interpolation='none') #plt.subplot(1,3,3) #plt.title("pattern"), plt.imshow(pattern.image, 'gray', interpolation='none'), plt.show() xor_img = cv2.bitwise_xor(transformed_pcb, pattern.image) frame_mask = cv2.bitwise_or( cv2.warpAffine(xor_img, M_xor_to_frame, (frame_width, frame_height)), frame_mask) _, frame_mask = cv2.threshold(frame_mask, 127, 255, cv2.THRESH_BINARY) #plt.title("After xor"), plt.imshow(frame_mask, 'gray', interpolation='none'), plt.show() frame_mask = self.morphology_opening(frame_mask, (20, 20)) #plt.title("After morphology"), plt.imshow(frame_mask, 'gray', interpolation='none'), plt.show() images[OUT_IMAGE] = BIN_IMAGE images[BIN_IMAGE] = frame_mask return images
return np.array(selected_pts, dtype=np.float32) show_img = np.copy(img) src_pts = select_points(show_img, 3) dst_pts = np.array([[0, 240], [0, 0], [240, 0]], dtype=np.float32) affine_m = cv2.getAffineTransform(src_pts, dst_pts) unwarped_img = cv2.warpAffine(img, affine_m, (240, 240)) cv2.imshow('result', np.hstack((show_img, unwarped_img))) k = cv2.waitKey() cv2.destroyAllWindows() inv_affine = cv2.invertAffineTransform(affine_m) warped_img = cv2.warpAffine(unwarped_img, inv_affine, (320, 240)) cv2.imshow('result', np.hstack((show_img, unwarped_img, warped_img))) k = cv2.waitKey() cv2.destroyAllWindows() rotation_mat = cv2.getRotationMatrix2D(tuple(src_pts[0]), 6, 1) rotated_img = cv2.warpAffine(img, rotation_mat, (240, 240)) cv2.imshow('result', np.hstack((show_img, rotated_img))) k = cv2.waitKey() cv2.destroyAllWindows() show_img = np.copy(img) src_pts = select_points(show_img, 4) dst_pts = np.array([[0, 240], [0, 0], [240, 0], [240, 240]], dtype=np.float32) perspective_m = cv2.getPerspectiveTransform(src_pts, dst_pts)
#normalized_palm_detections = palm_detector.predict_on_image(img1) #palm_detections = denormalize_detections(normalized_palm_detections, scale, pad) #xc, yc, scale, theta = palm_detector.detection2roi(palm_detections.cpu()) res = 256 points1 = np.array([[0, 0, res - 1], [0, res - 1, 0]], dtype=np.float32).T affines = [] imgs = [] for i in range(points.shape[0]): pts = points[i, :, :3].cpu().numpy().T M = cv2.getAffineTransform(pts, points1) img = cv2.warpAffine(img1, M, (res, res)) #, borderValue=127.5) img = torch.tensor(img).to(gpu) imgs.append(img) affine = cv2.invertAffineTransform(M).astype('float32') affine = torch.tensor(affine).to(gpu) affines.append(affine) if imgs: imgs = torch.stack( imgs) #.permute(0,3,1,2).float() / 255. #/ 127.5 - 1.0 affines = torch.stack(affines) else: imgs = torch.zeros((0, 3, res, res)).to(gpu) affines = torch.zeros((0, 2, 3)).to(gpu) #img, affine2, box2 = hand_regressor.extract_roi(img1, xc, yc, theta, scale) flags2, handed2, normalized_landmarks2 = hand_regressor(imgs) landmarks2 = hand_regressor.denormalize_landmarks(normalized_landmarks2, affines)
def rulerEndpoints(image_mask): """ Find the ruler end points given an image mask image_mask: 8-bit single channel image_mask """ image_height = image_mask.shape[0] image_mask = image_mask.astype(np.float64) image_mask /= 255.0 # Find center of rotation of the mask moments = cv2.moments(image_mask) centroid_x = moments['m10'] / moments['m00'] centroid_y = moments['m01'] / moments['m00'] centroid = (centroid_x, centroid_y) # Find the transofrm to translate the image to the # center of the of the ruler center_y = image_mask.shape[0] / 2.0 center_x = image_mask.shape[1] / 2.0 center = (center_x, center_y) diff_x = center_x - centroid_x diff_y = center_y - centroid_y translation=np.array([[1,0,diff_x], [0,1,diff_y], [0,0,1]]) min_moment = float('+inf') best = None best_angle = None for angle in np.linspace(-90,90,181): rotation = cv2.getRotationMatrix2D(centroid, float(angle), 1.0) # Matrix needs bottom row added # Warning: cv2 dimensions are width, height not height, width! rotation = np.vstack([rotation, [0,0,1]]) rt_matrix = np.matmul(translation,rotation) rotated = cv2.warpAffine(image_mask, rt_matrix[0:2], (image_mask.shape[1], image_mask.shape[0])) rotated_moments = cv2.moments(rotated) if rotated_moments['mu02'] < min_moment: best_angle = angle min_moment = rotated_moments['mu02'] best = np.copy(rt_matrix) #Now that we have the best rotation, find the endpoints warped = cv2.warpAffine(image_mask, best[0:2], (image_mask.shape[1], image_mask.shape[0])) # Reduce the image down to a 1d line and up convert to 64-bit # float between 0 and 1 col_sum = cv2.reduce(warped,0, cv2.REDUCE_SUM).astype(np.float64) # Find the left/right of masked region in the line vector # Then, knowing its the center of the transformed image # back out the y coordinates in the actual image inversing # the transform above cumulative_sum = np.cumsum(col_sum[0]) # Normalize the cumulative sum from 0 to 1 max_sum=np.max(cumulative_sum) cumulative_sum /= max_sum # Find the left,right indices based on thresholds left_idx = np.searchsorted(cumulative_sum, 0.06, side='left') right_idx = np.searchsorted(cumulative_sum, 0.94, side='right') width = right_idx - left_idx # Add 10% of the ruler width left_idx = left_idx-(width*0.10) right_idx = right_idx+(width*0.10) endpoints=np.array([[[left_idx, image_height / 2], [right_idx, image_height / 2]]]) # Finally inverse the transform to get the actual y coordinates inverse = cv2.invertAffineTransform(best[0:2]) inverse = np.vstack([inverse, [0,0,1]]) return cv2.perspectiveTransform(endpoints, inverse)[0]
def crop_segmentation(mask, *others, width=512, height=512, extra_space=0.1): ''' Crop using `mask` as input. `others` are optional arguments that will be croped using `mask` as reference. ''' # Declare structure used in morphotology opening morph_structure = np.ones((11, 11)) # Binarize mask mask_bin = np.squeeze(mask) > 0.5 # Use morphology opening to reduce small structures detected. mask_bin = ndimage.morphology.binary_opening(mask_bin, morph_structure) mask_bin_labeled = measure.label(mask_bin, background=0) mask_bin = np.zeros_like(mask_bin) unique_values, counts = np.unique(mask_bin_labeled, return_counts=True) for i in np.argsort(counts)[-3:]: val = unique_values[i] if val == 0: continue mask_bin[mask_bin_labeled == val] = 1 morph_structure = np.ones((22, 22)) mask_bin = ndimage.morphology.binary_closing(mask_bin, morph_structure) # Squeeze horizontal and vertical dimention to find where mask begins and ends mask_bin_hor = mask_bin.any(axis=0) mask_bin_ver = mask_bin.any(axis=1) # Find index of first and last positive pixel xmin, xmax = np.argmax(mask_bin_hor), len(mask_bin_hor) - np.argmax(mask_bin_hor[::-1]) ymin, ymax = np.argmax(mask_bin_ver), len(mask_bin_ver) - np.argmax(mask_bin_ver[::-1]) # Add extra space xextra = int((xmax - xmin) * extra_space) yextra = int((ymax - ymin) * extra_space) xmin -= xextra xmax += xextra ymin -= yextra ymax += yextra # We will use affine transform to crop image. It will deal with padding image if necessary # Note: `pts` will follow a L shape: top left, bottom left and bottom right # For details see: https://docs.opencv.org/3.0-beta/doc/py_tutorials/py_imgproc/py_geometric_transformations/py_geometric_transformations.html#affine-transformation pts1 = np.float32([[xmin, ymin], [xmin, ymax], [xmax, ymax]]) pts2 = np.float32([[0, 0], [0, height], [width, height]]) M = cv2.getAffineTransform(pts1, pts2) # Crop mask mask_crop = cv2.warpAffine(mask_bin.astype(np.float), M, (height, width), flags=cv2.INTER_AREA, borderValue=0) M_inv = cv2.invertAffineTransform(M) inverse = lambda x: cv2.warpAffine(x, M_inv, (height, width)) if len(others) > 0: # Crop others others_crop = tuple( cv2.warpAffine(np.squeeze(other), M, (height, width), flags=cv2.INTER_AREA, borderValue=0) for other in others) return (mask_crop,) + others_crop, inverse else: return mask_crop, inverse
if (count == 19 or count == 95 or count == 125 or count == 175 or count == 205 or count == 240): frame = cv2.imread(img) x, y, w, h = ROIs[count] rect = (x, y, w, h) color_template = frame[y:y + h, x:x + w] template = cv2.cvtColor(color_template, cv2.COLOR_BGR2GRAY) T = np.float32(template) / 255 refPt = [[x, y], [x + w, y + h]] p = np.zeros(6) color_frame = cv2.imread(img) gray_frame = cv2.cvtColor(color_frame, cv2.COLOR_BGR2GRAY) I = np.float32(gray_frame) / 255 p = InverseLK(I, T, parameters, refPt, p) warp_mat = np.array([[1 + p[0], p[2], p[4]], [p[1], 1 + p[3], p[5]]]) warp_mat = cv2.invertAffineTransform(warp_mat) rectangle = [[rect[0], rect[1]], [rect[0] + w, rect[1]], [rect[0] + w, rect[1] + h], [rect[0], rect[1] + h]] box = np.array(rectangle) box = box.T box = np.vstack((box, np.ones((1, 4)))) pts = np.dot(warp_mat, box) pts = pts.T pts = pts.astype(np.int32) print(count) count += 1 cv2.polylines(color_frame, [pts], True, (0, 255, 255), 2) cv2.imshow('Tracked Image', color_frame) cv2.waitKey(1)
def traks_obj_util(img, T, p, bb): W = get_W(p) # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # T = cv2.cvtColor(T, cv2.COLOR_BGR2GRAY) W = cv2.invertAffineTransform(W) I = cv2.warpAffine(img, W, (img.shape[1], img.shape[0])) # cv2.imshow("warped", im) # cv2.waitKey(0) # if cv2.waitKey(0) & 0xff == 27: # cv2.destroyAllWindows() # cv2.imshow("template", T) # if cv2.waitKey(0) & 0xff == 27: # cv2.destroyAllWindows() grad_x = cv2.Sobel(img, cv2.CV_32F, 1, 0, ksize=3) grad_y = cv2.Sobel(img, cv2.CV_32F, 0, 1, ksize=3) grad_x = cv2.warpAffine(grad_x, W, (grad_x.shape[1], grad_x.shape[0])) grad_y = cv2.warpAffine(grad_y, W, (grad_y.shape[1], grad_y.shape[0])) H = np.zeros((6, 6)) for y in range(bb[0], bb[2], 1): for x in range(bb[1], bb[3], 1): del_W = np.asarray([[x, 0, y, 0, 1, 0], [0, x, 0, y, 0, 1]]) grad = np.asarray([grad_x[y, x], grad_y[y, x]]) # grad = [grad_x[y,x], grad_y[y,x]] # print(grad.shape) # print(del_W.shape) sd = np.matmul(grad, del_W) sd = np.reshape(sd, (1, 6)) H_ = np.matmul(sd.transpose(), sd) H = H + H_ H_inv = np.linalg.inv(H) temp = np.zeros((6, 1)) for y in range(bb[0], bb[2], 1): for x in range(bb[1], bb[3], 1): del_W = np.asarray([[x, 0, y, 0, 1, 0], [0, x, 0, y, 0, 1]]) # grad = [grad_x[y,x], grad_y[y,x]] grad = np.asarray([grad_x[y, x], grad_y[y, x]]) sd = np.matmul(grad, del_W) sd = np.reshape(sd, (1, 6)).T e = T[y, x] - I[y, x] # e = I[y, x] - T[y, x] temp = temp + e * sd delta_p = np.matmul(H_inv, temp) e = 0 for y in range(bb[0], bb[2], 1): for x in range(bb[1], bb[3], 1): # e = e + T[y, x] - I[y, x] e = e + (I[y, x] - T[y, x])**2 # print("e: ", math.sqrt(e)) # print("delta_p: ", delta_p) return delta_p, I, math.sqrt(e)
def generate_driver_card(): ''' 批量生产虚拟驾驶证 ''' # baseline = 2000000 # ratio_width = 755 / 1133 # ratio_height = 529 / 794 # create font object with the font file and specify desired size font_song1 = ImageFont.truetype('./material/font/song1.ttf', size=48) font_song2 = ImageFont.truetype('./material/font/song2.ttf', size=45) font_song3 = ImageFont.truetype('./material/font/song3.ttf', size=44) font_id = ImageFont.truetype('./material/font/jishi4.ttf', size=36) font_birth = ImageFont.truetype('./material/font/jishi4.ttf', size=42) font_msyh = ImageFont.truetype('./material/font/msyh.ttf', size=45) font_xinshi = ImageFont.truetype('./material/font/xinshi.ttf', size=38) # font list print('load fonts...') font_song1_ls = [ ImageFont.truetype('./material/font/song1.ttf', size=x) for x in range(46, 65) ] font_song3_ls = [ ImageFont.truetype('./material/font/song3.ttf', size=x) for x in range(40, 49) ] font_id_ls = [ ImageFont.truetype('./material/font/jishi4.ttf', size=x) for x in range(32, 41) ] font_birth_ls = [ ImageFont.truetype('./material/font/jishi4.ttf', size=x) for x in range(34, 43) ] # other_fonts = [] for f_ in os.listdir('./material/font/random_font'): fn_ = os.path.join('./material/font/random_font', f_) for x_ in range(32, 65): imfont = ImageFont.truetype(fn_, size=x_) if imfont is None: print('error font: ', 'fn') continue if 46 <= x_ <= 65: font_song1_ls.append(imfont) if 40 <= x_ <= 49: font_song3_ls.append(imfont) if 32 <= x_ <= 41: font_id_ls.append(imfont) if 34 <= x_ <= 43: font_birth_ls.append(imfont) X, Y = 0, 1 BASE_POS, POS, FONT, FILL, TEXT, LABEL = 0, 1, 2, 3, 4, 5 # 驾驶本基本数据,分别为:基础坐标、实际坐标、字体、颜色、文本、标注信息 items = { 'name': [(240, 240), [280, 240], font_song1_ls, (75, 75, 75), "王宏", None], 'id': [(477, 172), [477, 172], font_id_ls, (80, 80, 80), "374562876538274561", None], # 'class': [(608, 608), [608, 608], font_song3, (80, 80, 80), "C1", None], 'birth': [(497, 452), [497, 452], font_birth_ls, (80, 80, 80), "1990-08-08", None], 'first': [(544, 524), [544, 524], font_birth_ls, (80, 80, 80), "2016-10-23", None], 'start': [(291, 680), [291, 680], font_birth_ls, (80, 80, 80), "2016-10-23", None], 'end': [(598, 680), [598, 680], font_birth_ls, (80, 80, 80), "2022-10-23", None], # address2 增加一列,用于处理过长地址 'address2': [(220, 374), [220, 374], font_song3_ls, (80, 80, 80), "", None], 'address': [(220, 304), [220, 304], font_song3_ls, (80, 80, 80), "重庆市渝北区龙景路158号", None], } # load template print('tempaltes load...') templatesName = [] templateStrings = {} # template_width = 1133 # template_height = 794 for tem in os.listdir('./material/templates'): temFn = os.path.join('./material/templates', tem) if not os.path.isfile(temFn): continue templatesName.append(temFn) # load background bg_images = [] if opt.background: print('load background images...') for f in os.listdir(opt.background_dir): fn = os.path.join(opt.background_dir, f) im_ = cv2.imread(fn) if im_ is not None: bg_images.append(im_) # load faker generator print('load fake generator...') fakeGen = FakeGanerator() imagesBatch = {'original': [], 'cut': []} labels = [] # label = [filename, [()]] for ep in range(opt.amount): if ((ep + 1) % 100 == 0): print("images {}/{}".format(ep + 1, opt.amount)) # 其他固定文本的信息 others = [ ['title', [(235, 78), (900, 78), (900, 164), (235, 164)]], ['zhenghao', [(345, 170), (436, 170), (436, 210), (345, 210)]], ['name', [(85, 241), (160, 241), (160, 295), (85, 295)]], ['sex', [(536, 241), (602, 241), (602, 294), (536, 294)]], ['nationality', [(715, 241), (847, 241), (847, 298), (715, 298)]], ['china', [(849, 227), (940, 227), (940, 272), (849, 272)]], ['address', [(85, 310), (186, 310), (186, 368), (85, 368)]], ['birth', [(323, 451), (474, 451), (474, 508), (323, 508)]], ['first', [(322, 527), (531, 527), (531, 584), (322, 584)]], ['class', [(321, 605), (455, 605), (455, 658), (321, 658)]], ['period', [(95, 687), (239, 687), (239, 739), (95, 739)]], ['logo', [(68, 425), (324, 425), (324, 675), (68, 675)]], # 下面的文本会变化,根据模板不同作修改 ['男', [(640, 228), (686, 228), (686, 273), (640, 273)]], ['C1', [(592, 602), (652, 602), (652, 643), (592, 643)]] # 出现A1B1这种4个字符的,y轴向两边分别扩充30个像素点 ] # 是否加入随机扰动 if opt.pbias: # v_bias 控制上下偏移 h_bias = random.randint(-10, 10) v_bias = random.randint(-12, 12) for key, val in items.items(): val[POS][X] = val[BASE_POS][X] + h_bias val[POS][Y] = val[BASE_POS][Y] + v_bias # 随机生成一个地址 address = fakeGen.generate_address_c5(length=random.randint(15, 25)) # 若地址长度大于18,则换行 if len(address) > 18: items['address'][TEXT] = address[:18] items['address2'][TEXT] = address[18:] else: items['address'][TEXT] = address items['address2'][TEXT] = '' items['name'][TEXT] = fakeGen.generate_name() if ep % 2 else makeName() items['id'][TEXT] = fakeGen.generate_id() # 生成有效身份证 # items['class'][TEXT] = fakeGen.generate_class() # items['birth'][TEXT] = fakeGen.generate_date() birth = items['id'][TEXT][6:14] items['birth'][TEXT] = birth[:4] + '-' + birth[4:6] + '-' + birth[ 6:] # 出生日期根据身份证id生成 items['first'][TEXT] = fakeGen.generate_date() # 第一次领证日期 items['start'][TEXT] = items['first'][TEXT] # 有效起始日期与第一次领证日期一致 seed_end = random.randint(0, 9) if 0 <= seed_end < 7: items['end'][TEXT] = fakeGen.add_date_year(items['start'][TEXT], random.choice([6, 10])) elif seed_end == 7: items['end'][TEXT] = '6年' elif seed_end == 8: items['end'][TEXT] = '10年' else: items['end'][TEXT] = '长期' # initialise the drawing context with the image object as background rd_filename = random.choice(templatesName) # 选择文件名称后,解析红章文本/性别/驾驶证类别 tmp_fn = os.path.split(rd_filename)[-1] tmp_fn = os.path.splitext(tmp_fn)[0] org, cl, sex = tmp_fn.split('-') len_org = len(org) # 据统计,红章分行规则如下 if len_org == 13: org_0, org_1, org_2 = org[:4], org[4:8], org[8:] # 4,4,5 elif len_org == 14: org_0, org_1, org_2 = org[:5], org[5:9], org[9:] # 5,4,5 elif len_org == 15: org_0, org_1, org_2 = org[:5], org[5:10], org[10:] # 5,5,5 elif len_org == 16: org_0, org_1, org_2 = org[:6], org[6:11], org[11:] # 6,5,5 elif len_org == 17: org_0, org_1, org_2 = org[:5], org[5:11], org[11:] # 5,6,6 elif len_org == 18: org_0, org_1, org_2 = org[:6], org[6:12], org[12:] # 6,6,6 elif len_org == 19: org_0, org_1, org_2 = org[:7], org[7:13], org[13:] # 7,6,6 elif len_org == 20: org_0, org_1, org_2 = org[:6], org[6:13], org[13:] # 6,7,7 elif len_org == 21: org_0, org_1, org_2 = org[:8], org[8:15], org[15:] # 8,7,6 else: print('红章换行规则解析错误: ', rd_filename) continue if rd_filename not in templateStrings: srcimage = Image.open(rd_filename) srcimage = srcimage.convert('RGB') srcimage = srcimage.resize((1133, 794), Image.BOX) buffered = BytesIO() srcimage.save(buffered, format="JPEG") img_encode = base64.b64encode(buffered.getvalue()) image_string = BytesIO(base64.b64decode(img_encode)) templateStrings[rd_filename] = image_string image = Image.open(templateStrings[rd_filename]) draw = ImageDraw.Draw(image) # get random fill rd_fill = random.randint(-70, 50) new_fill = (80 + rd_fill, ) * 3 # draw the text on the background for key, val in items.items(): new_font = random.choice(val[FONT]) # random select a font if key == 'name': val[TEXT] = ' '.join(list(val[TEXT])) xy, mask = draw.text(val[POS], val[TEXT], fill=new_fill, font=new_font) x0, y0 = xy x1 = x0 + mask.size[0] y1 = y0 + mask.size[1] # x0, y0, x1, y1 = x0 - 5, y0 - 5, x1 + 5, y1 + 5 val[LABEL] = (x0, y0, x1, y0, x1, y1, x0, y1) image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) if opt.background: bgImg = random.choice(bg_images) angle = random.randint(-4, 4) height, width = bgImg.shape[:2] M = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1) bgRotate = cv2.warpAffine(bgImg, M, (width, height)) src_h, src_w = image.shape[:2] # 设计随机比例,将驾驶证图片嵌入背景图中 ratio = random.randint(80, 95) / 100 if width / height < src_w / src_h: # target_w = int(width * ratio) target_h = int(src_h * target_w / src_w) else: target_h = int(height * ratio) target_w = int(src_w * target_h / src_h) x0 = (width - target_w) // 2 y0 = (height - target_h) // 2 x1 = x0 + target_w y1 = y0 + target_h if y0 <= 0 or y1 >= height: print('height out of range..') continue if x0 <= 0 or x1 >= width: print('width out of range..') continue inner = cv2.resize(image, (target_w, target_h)) # resize后记录的标注信息也要跟着变 for key, val in items.items(): val[LABEL] = tuple( [int(x * target_w / src_w) for x in val[LABEL]]) for other in others: new_xy = [] for x, y in other[1]: new_xy.append( (int(x * target_w / src_w), int(y * target_w / src_w))) other[1].clear() other[1].extend(new_xy) alpha_bg = 0.01 * random.randint(5, 35) alpha_dv = 1 - alpha_bg bgRotate[y0:y1, x0:x1, :] = (alpha_bg * bgRotate[y0:y1, x0:x1, :] + alpha_dv * inner[:, :, :]) image = bgRotate # 计算原来标注位置在新图片中的绝对位置 for key, val in items.items(): new_pos = [] for i, v in enumerate(val[LABEL]): if i % 2: new_pos.append(y0 + v) else: new_pos.append(x0 + v) val[LABEL] = tuple(new_pos) for other in others: new_xy = [] for x, y in other[1]: new_xy.append((x + x0, y + y0)) other[1].clear() other[1].extend(new_xy) if opt.salt: # 椒盐噪声 if 1 == random.randint(0, 3): image = img_salt_pepper_noise(image, 0.005) if opt.blur and image.shape[0] > 500 and image.shape[1] > 500: seed = random.choice((0, 3, 5)) if seed: # 随机选择滤波方式 blur_method = random.choice( [cv2.GaussianBlur, cv2.blur, cv2.medianBlur]) if blur_method is cv2.bilateralFilter: image = blur_method(image, -1, random.randint(20, 200), random.randint(20, 200)) elif blur_method is cv2.GaussianBlur: image = blur_method(image, (seed, seed), 0) elif blur_method is cv2.blur: image = blur_method(image, (seed, seed)) else: image = blur_method(image, seed) if opt.perspective: ps = [(x0, y0), (x1, y0), (x1, y1), (x0, y1)] pM = get_perspective_matrix(image, ps) image = cv2.warpPerspective(image, pM, (width, height)) # 透视变换后,标注坐标也要变化 for key, val in items.items(): ps = val[LABEL] old_points = [(ps[i], ps[i + 1]) for i in range(0, 8, 2)] old_points = np.array(old_points, dtype='float32') old_points = np.array([old_points]) new_points = cv2.perspectiveTransform(old_points, pM) points_by_warp = [] for i in range(4): points_by_warp.append(int(new_points[0][i][0])) points_by_warp.append(int(new_points[0][i][1])) val[LABEL] = tuple(points_by_warp) for other in others: old_points = np.array(other[1], dtype='float32') old_points = np.array([old_points]) new_points = cv2.perspectiveTransform(old_points, pM) new_xy = [] for i in range(4): new_xy.append((int(new_points[0][i][0]), int(new_points[0][i][1]))) other[1].clear() other[1].extend(new_xy) invM = cv2.invertAffineTransform(M) image = cv2.warpAffine(image, invM, (width, height)) # 仿射变换后标注坐标点也要跟着变换 for key, val in items.items(): ps = val[LABEL] old_points = [(ps[i], ps[i + 1]) for i in range(0, 8, 2)] old_points = np.array(old_points, dtype=np.int32) old_points = np.reshape(old_points, (4, 1, 2)) new_points = cv2.transform(old_points, invM) points_by_warp = [] for i in range(4): points_by_warp.append(new_points[i][0][0]) points_by_warp.append(new_points[i][0][1]) val[LABEL] = tuple(points_by_warp) for other in others: # old_points = list(other[1]) old_points = np.array(other[1], dtype=np.int32) old_points = np.reshape(old_points, (4, 1, 2)) new_points = cv2.transform(old_points, invM) new_xy = [] for i in range(4): new_xy.append((new_points[i][0][0], new_points[i][0][1])) other[1].clear() other[1].extend(new_xy) if opt.redbox: for key, val in items.items(): points = val[LABEL] x0, y0, x1, y1, x2, y2, x3, y3 = points # x, y, w, h = cv2.boundingRect([(x0,y0), (x1,y1), (x2,y2), (x3,y3)]) x_min = min(x0, x1, x2, x3) x_max = max(x0, x1, x2, x3) y_min = min(y0, y1, y2, y3) y_max = max(y0, y1, y2, y3) cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 0, 255), 2) for i, other in enumerate(others): # ["中华人民共和国机动车驾驶证", [(234,75), (900,75), (900,134), (234,134)]], (x0, y0), (x1, y1), (x2, y2), (x3, y3) = other[1] if len(others) - 1 == i: temp_fn = os.path.split(rd_filename)[-1] temp_fn = os.path.splitext(temp_fn)[0] try: origanization, cl, sex = temp_fn.split('-') except Exception as e: print(str(e)) print(temp_fn) if len(cl) > 2: x0 = x3 = x0 - 30 x1 = x2 = x1 + 30 x_min = min(x0, x1, x2, x3) x_max = max(x0, x1, x2, x3) y_min = min(y0, y1, y2, y3) y_max = max(y0, y1, y2, y3) cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 0, 255), 2) if opt.image: filename = '%s.jpg' % (BASELINE + ep) originalFilename = os.path.join(opt.output, 'original', filename) imagesBatch['original'].append((image, originalFilename)) # 在保存原图的情况下,如果选择了保存标注信息,则在此处控制保存 if opt.label: text_label = [] fn = '%s.txt' % (BASELINE + ep) for key, val in items.items(): if key == 'address2': if not val[TEXT]: # 地址栏第二栏如果没有数据,则跳过 continue x0, y0, x1, y1, x2, y2, x3, y3 = val[LABEL] x_min = str(int(min(x0, x1, x2, x3))) x_max = str(int(max(x0, x1, x2, x3))) y_min = str(int(min(y0, y1, y2, y3))) y_max = str(int(max(y0, y1, y2, y3))) # here if key == 'id': text_label.append(','.join( (x_min, y_min, x_max, y_max, 'id', fn))) elif key in ['birth', 'first', 'start', 'end']: text_label.append(','.join( (x_min, y_min, x_max, y_max, 'date', fn))) # text_label.append(','.join(list(map(str,val[LABEL]))) + ',' + val[TEXT]) for idx, other in enumerate(others[:-2]): (x0, y0), (x1, y1), (x2, y2), (x3, y3) = other[1] x_min = str(int(min(x0, x1, x2, x3))) x_max = str(int(max(x0, x1, x2, x3))) y_min = str(int(min(y0, y1, y2, y3))) y_max = str(int(max(y0, y1, y2, y3))) # here text_label.append(','.join( (x_min, y_min, x_max, y_max, other[0], fn))) label = '\n'.join(text_label) txtFilename = os.path.join(opt.output, 'labels', fn) labels.append((label, txtFilename)) # 图片量达到saveInterval设定值后,一次性保存 for key in ['original', 'cut']: if len(imagesBatch[key]) >= opt.saveInterval: for img, fn in imagesBatch[key]: encode_param = [ int(cv2.IMWRITE_JPEG_QUALITY), random.randint(50, 90) ] try: # img = adjust_contast_brightness(img) # img = img_salt_pepper_noise(img, 0.01) cv2.imencode('.jpg', img, encode_param)[1].tofile(fn) # cv2.imwrite(fn, img) except Exception as e: print('error: ', fn) imagesBatch[key].clear() # 标注图片数达到saveInterval设定值后,一次性保存txt标注文件 if len(labels) >= opt.saveInterval: for label, fn in labels: try: with open(fn, 'w', encoding='utf-8') as f: f.write(label) except Exception as e: print('write txt error : ', fn) labels.clear() # 保存剩余图片 for key in ['original', 'cut']: if len(imagesBatch[key]): for img, fn in imagesBatch[key]: encode_param = [ int(cv2.IMWRITE_JPEG_QUALITY), random.randint(50, 90) ] try: # img = adjust_contast_brightness(img) # img = img_salt_pepper_noise(img, 0.01) cv2.imencode('.jpg', img, encode_param)[1].tofile(fn) # cv2.imwrite(fn, img) except Exception as e: print('error: ', fn) if key == 'original' and opt.label: txt_name = os.path.splitext(fn)[0] + '.txt' imagesBatch[key].clear() if len(labels): for label, fn in labels: try: with open(fn, 'w', encoding='utf-8') as f: f.write(label) except Exception as e: print('write txt error : ', fn) labels.clear() print('Finished!')
def rotate_landmarks(face, rotation_matrix): """ Rotates the 68 point landmarks and detection bounding box around the given rotation matrix. Paramaters ---------- face: DetectedFace or dict A :class:`DetectedFace` or an `alignments file` ``dict`` containing the 68 point landmarks and the `x`, `w`, `y`, `h` detection bounding box points. rotation_matrix: numpy.ndarray The rotation matrix to rotate the given object by. Returns ------- DetectedFace or dict The rotated :class:`DetectedFace` or `alignments file` ``dict`` with the landmarks and detection bounding box points rotated by the given matrix. The return type is the same as the input type for ``face`` """ logger.trace("Rotating landmarks: (rotation_matrix: %s, type(face): %s", rotation_matrix, type(face)) rotated_landmarks = None # Detected Face Object if isinstance(face, DetectedFace): bounding_box = [[face.x, face.y], [face.x + face.w, face.y], [face.x + face.w, face.y + face.h], [face.x, face.y + face.h]] landmarks = face.landmarks_xy # Alignments Dict elif isinstance(face, dict) and "x" in face: bounding_box = [ [face.get("x", 0), face.get("y", 0)], [face.get("x", 0) + face.get("w", 0), face.get("y", 0)], [ face.get("x", 0) + face.get("w", 0), face.get("y", 0) + face.get("h", 0) ], [face.get("x", 0), face.get("y", 0) + face.get("h", 0)] ] landmarks = face.get("landmarks_xy", list()) else: raise ValueError("Unsupported face type") logger.trace("Original landmarks: %s", landmarks) rotation_matrix = cv2.invertAffineTransform(rotation_matrix) rotated = list() for item in (bounding_box, landmarks): if not item: continue points = np.array(item, np.int32) points = np.expand_dims(points, axis=0) transformed = cv2.transform(points, rotation_matrix).astype(np.int32) rotated.append(transformed.squeeze()) # Bounding box should follow x, y planes, so get min/max # for non-90 degree rotations pt_x = min([pnt[0] for pnt in rotated[0]]) pt_y = min([pnt[1] for pnt in rotated[0]]) pt_x1 = max([pnt[0] for pnt in rotated[0]]) pt_y1 = max([pnt[1] for pnt in rotated[0]]) width = pt_x1 - pt_x height = pt_y1 - pt_y if isinstance(face, DetectedFace): face.x = int(pt_x) face.y = int(pt_y) face.w = int(width) face.h = int(height) face.r = 0 if len(rotated) > 1: rotated_landmarks = [tuple(point) for point in rotated[1].tolist()] face.landmarks_xy = rotated_landmarks else: face["left"] = int(pt_x) face["top"] = int(pt_y) face["right"] = int(pt_x1) face["bottom"] = int(pt_y1) rotated_landmarks = face logger.trace("Rotated landmarks: %s", rotated_landmarks) return face
# load the input image, resize it, and convert it to grayscale gray = cv2.cvtColor(img_original_resize, cv2.COLOR_BGR2GRAY) # show the original input image and detect faces in the grayscale # image rects = detector(gray, 2) faceAligned, m_affine, (w_affine_orig, h_affine_orig) = fa.align(img_original_resize, gray, rects[0]) cv2.namedWindow("orig_aligned", cv2.WINDOW_NORMAL) # Create window with freedom of dimensions cv2.imshow("orig_aligned", faceAligned) m_affine_inverse = cv2.invertAffineTransform(m_affine).copy() gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # show the original input image and detect faces in the grayscale # image rects = detector(gray, 2) faceAligned, m_affine, (w_affine, h_affine) = fa.align(img, gray, rects[0]) cv2.namedWindow("out_aligned", cv2.WINDOW_NORMAL) # Create window with freedom of dimensions cv2.imshow("out_aligned", faceAligned) output_realigned = cv2.warpAffine(img, m_affine, (w_affine, h_affine), flags=cv2.INTER_CUBIC) output_realigned = cv2.warpAffine(output_realigned,
def main(): # Note: Comment out parts of this code as necessary kernel1D = cv2.getGaussianKernel(3, 7) kernel = kernel1D * np.transpose(kernel1D) #kernel = np.array([[-1, -2, -1], [0, 0, 0], [1, 2, 1]]) # 1a transA, transA_Ix, transA_Iy,transA_pair = get_image_gradients_paired("transA.jpg") norm_and_write_image(transA_pair, "ps5-1-a-1.png") # TODO: Similarly for simA.jpg simA, simA_Ix, simA_Iy,simA_pair = get_image_gradients_paired("simA.jpg") norm_and_write_image(simA_pair, "ps5-1-a-2.png") # # 1b transA_R = harris_response(transA_Ix, transA_Iy, kernel, 0.02) norm_and_write_image(transA_R, "ps5-1-b-1.png") transB, transB_Ix, transB_Iy,transB_pair = get_image_gradients_paired("transB.jpg") transB_R = harris_response(transB_Ix, transB_Iy, kernel, 0.02) norm_and_write_image(transB_R, "ps5-1-b-2.png") simA_R = harris_response(simA_Ix, simA_Iy, kernel, 0.02) norm_and_write_image(simA_R, "ps5-1-b-3.png") simB, simB_Ix, simB_Iy,simB_pair = get_image_gradients_paired("simB.jpg") simB_R = harris_response(simB_Ix, simB_Iy, kernel, 0.02) norm_and_write_image(simB_R, "ps5-1-b-4.png") # 1c transA_corners = find_corners(transA_R, 50, 10) print "transA corners " + str(len(transA_corners)) transA_out = draw_corners(transA, transA_corners) norm_and_write_image(transA_out, "ps5-1-c-1.png") transB_corners = find_corners(transB_R, 50, 10) print "transB corners " + str(len(transB_corners)) transB_out = draw_corners(transB, transB_corners) norm_and_write_image(transB_out, "ps5-1-c-2.png") simA_corners = find_corners(simA_R, 40, 5) print "simA corners " + str(len(simA_corners)) simA_out = draw_corners(simA, simA_corners) norm_and_write_image(simA_out, "ps5-1-c-3.png") simB_corners = find_corners(simB_R, 40, 5) print "simB corners " + str(len(simB_corners)) simB_out = draw_corners(simB, simB_corners) norm_and_write_image(simB_out, "ps5-1-c-4.png") # # 2a transA_angle = gradient_angle(transA_Ix, transA_Iy) transA_kp = get_keypoints(transA_corners, transA_R, transA_angle, _size=10.0, _octave=0) transA = cv2.imread(os.path.join(input_dir, "transA.jpg"), cv2.IMREAD_GRAYSCALE).astype(np.uint8) transA_out = cv2.drawKeypoints(transA, transA_kp,flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS) transB_angle = gradient_angle(transB_Ix, transB_Iy) transB_kp = get_keypoints(transB_corners, transB_R, transB_angle, _size=10.0, _octave=0) transB = cv2.imread(os.path.join(input_dir, "transB.jpg"), cv2.IMREAD_GRAYSCALE).astype(np.uint8) transB_out = cv2.drawKeypoints(transB, transB_kp,flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS) trans_paired = make_image_pair(transA_out, transB_out) write_image(trans_paired, "ps5-2-a-1.png") simA_angle = gradient_angle(simA_Ix, simA_Iy) simA_kp = get_keypoints(simA_corners, simA_R, simA_angle, _size=10.0, _octave=0) simA = cv2.imread(os.path.join(input_dir, "simA.jpg"), cv2.IMREAD_GRAYSCALE).astype(np.uint8) simA_out = cv2.drawKeypoints(simA, simA_kp,flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS) simB_angle = gradient_angle(simB_Ix, simB_Iy) simB_kp = get_keypoints(simB_corners, simB_R, simB_angle, _size=10.0, _octave=0) simB = cv2.imread(os.path.join(input_dir, "simB.jpg"), cv2.IMREAD_GRAYSCALE).astype(np.uint8) simB_out = cv2.drawKeypoints(simB, simB_kp,flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS) sim_paired = make_image_pair(simA_out, simB_out) write_image(sim_paired, "ps5-2-a-2.png") # TODO: Ditto for (simA, simB) pair # 2b transA_desc = get_descriptors(transA, transA_kp) transB_desc = get_descriptors(transB, transB_kp) trans_matches = match_descriptors(transA_desc, transB_desc) # print trans_matches trans_matched = draw_matches(transA, transB, transA_kp, transB_kp, trans_matches) write_image(trans_matched, "ps5-2-b-1.png") simA_desc = get_descriptors(simA, simA_kp) simB_desc = get_descriptors(simB, simB_kp) sim_matches = match_descriptors(simA_desc, simB_desc) sim_matched = draw_matches(simA, simB, simA_kp, simB_kp, sim_matches) write_image(sim_matched, "ps5-2-b-2.png") # 3a # TODO: Compute translation vector using RANSAC for (transA, transB) pair, draw biggest consensus set translation, matchSets = compute_translation_RANSAC(transA_kp, transB_kp, trans_matches, 20) print "translation" print translation print len(trans_matches) print len(matchSets) ransac_trans_match = draw_matches(transA, transB, transA_kp, transB_kp, matchSets) write_image(ransac_trans_match, "ps5-3-a-1.png") # 3b # TODO: Compute similarity transform for (simA, simB) pair, draw biggest consensus set sim_matrix, matchSets = compute_similarity_RANSAC(simA_kp, simB_kp, sim_matches, 10, 20) print "sim matrix" print sim_matrix print len(sim_matches) print len(matchSets) ransac_sim_match = draw_matches(simA, simB, simA_kp, simB_kp, matchSets) write_image(ransac_sim_match, "ps5-3-b-1.png") # # Extra credit: 3c, 3d, 3e #3c affine_matrix, matchSets = compute_affine_RANSAC(simA_kp, simB_kp, sim_matches, 10, 14) print "affine matrix" print affine_matrix print len(sim_matches) print len(matchSets) ransac_sim_match = draw_matches(simA, simB, simA_kp, simB_kp, matchSets) write_image(ransac_sim_match, "ps5-3-c-2.png") #3d # sim_matrix = np.array([[0.94355048, -0.33337659, 56.75110304], # [0.33337659, 0.94355048, -67.81053724]]) # simB = cv2.imread(os.path.join(input_dir, "simB.jpg"), cv2.IMREAD_GRAYSCALE).astype(np.uint8) # simA = cv2.imread(os.path.join(input_dir, "simA.jpg"), cv2.IMREAD_GRAYSCALE).astype(np.uint8) rand = np.zeros((simB.shape[0], simB.shape[1])) out_warp = cv2.invertAffineTransform(sim_matrix) warped_image = cv2.warpAffine(simB.astype(np.uint8),out_warp, (simB.shape[1], simB.shape[0]), flags=cv2.INTER_LINEAR) write_image(warped_image, "ps5-3-d-1.png") merged = cv2.merge((rand.astype(np.uint8),warped_image.astype(np.uint8),simA.astype(np.uint8))) write_image(merged, "ps5-3-d-2.png") out_warp = cv2.invertAffineTransform(affine_matrix) warped_image = cv2.warpAffine(simB.astype(np.uint8),out_warp, (simB.shape[1], simB.shape[0]), flags=cv2.INTER_LINEAR) write_image(warped_image, "ps5-3-e-1.png") merged = cv2.merge((rand.astype(np.uint8),warped_image.astype(np.uint8),simA.astype(np.uint8))) write_image(merged, "ps5-3-e-2.png")
def SimulateAffineMap(zoom_step, psi, t1_step, phi, img0, mask=None, CenteredAt=None, t2_step=1.0, inter_flag=cv2.INTER_CUBIC, border_flag=cv2.BORDER_CONSTANT, SimuBlur=True): ''' Computing affine deformations of images as in [https://rdguez-mariano.github.io/pages/imas] Let A = R_psi0 * diag(t1,t2) * R_phi0 with t1>t2 = lambda * R_psi0 * diag(t1/t2,1) * R_phi0 Parameters given should be as: zoom_step = 1/lambda t1_step = 1/t1 t2_step = 1/t2 psi = -psi0 (in degrees) phi = -phi0 (in degrees) ASIFT proposed params: inter_flag = cv2.INTER_LINEAR SimuBlur = True Also, another kind of exterior could be: border_flag = cv2.BORDER_REPLICATE ''' tx = zoom_step * t1_step ty = zoom_step * t2_step assert tx >= 1 and ty >= 1, 'Either scale or t are defining a zoom-in operation. If you want to zoom-in do it manually. tx = ' + str( tx) + ', ty = ' + str(ty) img = img0.copy() arr = [] DoCenter = False if type(CenteredAt) is list: DoCenter = True arr = np.array(CenteredAt).reshape(-1, 2) h, w = img.shape[:2] tcorners = SquareOrderedPts(h, w, CV=False) if mask is None: mask = np.zeros((h, w), np.uint8) mask[:] = 255 A1 = np.float32([[1, 0, 0], [0, 1, 0]]) if phi != 0.0: phi = np.deg2rad(phi) s, c = np.sin(phi), np.cos(phi) A1 = np.float32([[c, -s], [s, c]]) tcorners = np.dot(tcorners, A1.T) x, y, w, h = cv2.boundingRect(np.int32(tcorners).reshape(1, -1, 2)) A1 = np.hstack([A1, [[-x], [-y]]]) if DoCenter and tx == 1.0 and ty == 1.0 and psi == 0.0: arr = AffineArrayCoor(arr, A1)[0].ravel() h0, w0 = img0.shape[:2] A1[0][2] += h0 / 2.0 - arr[0] A1[1][2] += w0 / 2.0 - arr[1] w, h = w0, h0 img = cv2.warpAffine(img, A1, (w, h), flags=inter_flag, borderMode=border_flag) else: img = cv2.warpAffine(img, A1, (w, h), flags=inter_flag, borderMode=border_flag) h, w = img.shape[:2] A2 = np.float32([[1, 0, 0], [0, 1, 0]]) tcorners = SquareOrderedPts(h, w, CV=False) if tx != 1.0 or ty != 1.0: sx = 0.8 * np.sqrt(tx * tx - 1) sy = 0.8 * np.sqrt(ty * ty - 1) if SimuBlur: img = cv2.GaussianBlur(img, (0, 0), sigmaX=sx, sigmaY=sy) A2[0] /= tx A2[1] /= ty if psi != 0.0: psi = np.deg2rad(psi) s, c = np.sin(psi), np.cos(psi) Apsi = np.float32([[c, -s], [s, c]]) Apsi = np.matmul(Apsi, A2[0:2, 0:2]) tcorners = np.dot(tcorners, Apsi.T) x, y, w, h = cv2.boundingRect(np.int32(tcorners).reshape(1, -1, 2)) A2[0:2, 0:2] = Apsi A2[0][2] -= x A2[1][2] -= y if tx != 1.0 or ty != 1.0 or psi != 0.0: if DoCenter: A = ComposeAffineMaps(A2, A1) arr = AffineArrayCoor(arr, A)[0].ravel() h0, w0 = img0.shape[:2] A2[0][2] += h0 / 2.0 - arr[0] A2[1][2] += w0 / 2.0 - arr[1] w, h = w0, h0 img = cv2.warpAffine(img, A2, (w, h), flags=inter_flag, borderMode=border_flag) A = ComposeAffineMaps(A2, A1) if psi != 0 or phi != 0.0 or tx != 1.0 or ty != 1.0: if DoCenter: h, w = img0.shape[:2] else: h, w = img.shape[:2] mask = cv2.warpAffine(mask, A, (w, h), flags=inter_flag) Ai = cv2.invertAffineTransform(A) return img, mask, A, Ai
def detectWithAngles(self, img, angels = None, resolve = True, thr = None ): ''' angles - a list of angles to test. If None, default to the value created at the constructor (which defaults to [0]) resolve - a boolean flag, whether or not to cluster the boxes, and resolve cluster by highest score. thr - the maximum area covered with objects, before we break from the angles loop returns - a list of CascadeResult() objects ''' if thr == None: thr = self.thr original_size = img.shape[0] * img.shape[0] if angels == None: angels = self.angles results = [] total_area = 0 for angle in angels: # the diagonal of the image is the diameter of the rotated image, so the big_image needs to bound this circle # by being that big big_image, x_shift, y_shift, diag, rot_center = pad_image_for_rotation(img) # find the rotation and the inverse rotation matrix, to allow translations between old and new coordinates and vice versa rot_mat = cv2.getRotationMatrix2D(rot_center, angle, scale = 1.0) inv_rot_mat = cv2.invertAffineTransform(rot_mat) # rotate the image by the desired angle rot_image = cv2.warpAffine(big_image, rot_mat, (big_image.shape[1],big_image.shape[0]), flags=cv2.INTER_CUBIC) faces = self.detectMultiScaleWithScores(rot_image, scaleFactor = 1.03, minNeighbors = 20, minSize = (15,15), flags = 4) for face in faces: xp = face[0] dx = face[2] yp = face[1] dy = face[3] score = 1 dots = np.matrix([[xp,xp+dx,xp+dx,xp], [yp,yp,yp+dy,yp+dy], [1, 1, 1, 1]]) # these are the original coordinates in the "big_image" # print dots originals_in_big = inv_rot_mat * dots # print originals_in_big shifter = np.matrix([[x_shift]*4, [y_shift]*4]) # print shifter # these are the original coordinate in the original image originals = originals_in_big - shifter # print originals points = np.array(originals.transpose()) x = points[0,0] y = points[0,1] box_with_score = ([x,y,dx,dy], score) cascade_result = CascadeResult.from_polygon_points(points, score, self.cascade_type) # print cascade_result results.append(cascade_result) ################# # test and see, if we found enough objects, break out and don't waste our time total_area += cascade_result.area if resolve: return resolve_angles(results, width = img.shape[1], height = img.shape[0]) else: return results
def stitch_chunks(src_mf, trg_mf, \ patch_size, \ coarse_scale, fine_scale, \ initial_estimate, \ search_space, \ mls_alpha=3.): (shift_space, d_shift), \ (angle_space, d_angle), \ (scale_space, d_scale) = search_space def norm(data): with np.errstate(invalid='ignore'): return (data - np.mean(data)) / np.std(data) def get_patch_at(img, pos, patch_size): x, y = pos t = np.array([[1., 0., patch_size - x], \ [0., 1., patch_size - y]], 'float32') patch = cv2.warpAffine(img, t, \ (int(2 * patch_size + .5), int(2 * patch_size + .5))) return norm(patch) # Estimate initial affine transform src_mask = cuttlefish_mask(src_mf) trg_mask = cuttlefish_mask(trg_mf) try: t0, t1 = estimate_affine(src_mask, trg_mask, mode=initial_estimate) except RuntimeError: return None t0_inv = cv2.invertAffineTransform(t0) t1_inv = cv2.invertAffineTransform(t1) # Coarse grid coarse_grid = np.mgrid[tuple(np.s_[: s : coarse_scale] \ for s in trg_mf.shape)] coarse_point_in_mask = trg_mask[coarse_grid[0], coarse_grid[1]] coarse_trg_coords = np.float32(coarse_grid[:, coarse_point_in_mask] \ .T[:, ::-1]) def coarse_alignment(t_inv): t_ide = np.identity(3, 'float32')[: 2] coarse_src_coords = cv2.transform( \ coarse_trg_coords[:, None], t_inv)[:, 0] # Transform target for coarse grid search shape = tuple(int(s / d_shift) for s in src_mf.shape) trg_mf_t = cv2.warpAffine(trg_mf, t_inv / d_shift, shape[::-1]) src_mf_t = cv2.warpAffine(src_mf, t_ide / d_shift, shape[::-1]) # Coarse grid search t_corr_list = [libreg.affine_registration.match_template_brute( \ get_patch_at(trg_mf_t, \ src_coord / d_shift, patch_size / d_shift), \ scipy.fftpack.fft2(get_patch_at(src_mf_t, \ src_coord / d_shift, shift_space / d_shift)), \ rotation=slice(0, 1, 1) if angle_space is None \ else slice(-angle_space, +angle_space, d_angle), \ logscale_x=slice(0, 1, 1) if scale_space is None \ else slice(-scale_space, +scale_space, d_scale), \ logscale_y=slice(0, 1, 1) if scale_space is None \ else slice(-scale_space, +scale_space, d_scale), \ find_translation=libreg.affine_registration \ .cross_correlation_fft) \ for src_coord in coarse_src_coords] dx = np.array([np.dot(t[:, :2], \ (patch_size / d_shift, patch_size / d_shift)) \ + t[:, 2] - (shift_space / d_shift, shift_space / d_shift) \ for t, _ in t_corr_list]) coarse_src_coords += dx * d_shift corr = np.array([corr for _, corr in t_corr_list], 'float32') return coarse_src_coords, corr coarse_src_coords_0, coarse_corr_0 = coarse_alignment(t0_inv) coarse_src_coords_1, coarse_corr_1 = coarse_alignment(t1_inv) coarse_src_coords, coarse_corr = (coarse_src_coords_0, coarse_corr_0) \ if np.nanmean(coarse_corr_0) > np.nanmean(coarse_corr_1) else \ (coarse_src_coords_1, coarse_corr_1) # Filter out points for _ in range(8): affine_model = sklearn.linear_model.RANSACRegressor( \ sklearn.linear_model.LinearRegression(), max_trials=2048, \ loss='squared_loss') try: affine_model.fit(coarse_trg_coords, coarse_src_coords) except ValueError: continue else: break else: raise RuntimeError('RANSAC did not converge.') coarse_trg_coords_flt = np.ascontiguousarray( \ coarse_trg_coords[affine_model.inlier_mask_], 'float32') coarse_src_coords_flt = np.ascontiguousarray( \ coarse_src_coords[affine_model.inlier_mask_], 'float32') # Warp src_mf with the coarse grid transformation trg_mf_warped = warp_image(trg_mf, \ coarse_src_coords_flt, coarse_trg_coords_flt, mls_alpha, 32) # Fine grid fine_grid = np.mgrid[tuple(np.s_[: s + fine_scale : fine_scale] \ for s in trg_mf.shape)] fine_point_in_mask = np.zeros(fine_grid.shape[1:], 'bool') end_y = fine_grid.shape[1] - 1 end_x = fine_grid.shape[2] - 1 fine_point_in_mask[: end_y, : end_x] = \ trg_mask[fine_grid[0, : end_y, : end_x], \ fine_grid[1, : end_y, : end_x]] fine_coord_in_grid = fine_point_in_mask[ \ np.ones(fine_grid.shape[1:], 'bool')] fine_trg_coords = np.ascontiguousarray( \ fine_grid.reshape((2, -1)).T[:, ::-1], 'float32') fine_src_coords = moving_least_squares.similarity( \ coarse_trg_coords_flt, coarse_src_coords_flt, \ fine_trg_coords, mls_alpha) # Estimate new shifts dx_corr_list = [libreg.affine_registration.cross_correlation_fft( \ scipy.fftpack.fft2(get_patch_at(trg_mf_warped, \ src_coord, patch_size)), \ scipy.fftpack.fft2(get_patch_at(src_mf, \ src_coord, patch_size))) \ for src_coord \ in fine_src_coords[fine_coord_in_grid]] dx = np.array([dx for dx, _ in dx_corr_list], 'float32') corr = np.array([corr for _ , corr in dx_corr_list], 'float32') # Apply fine shifts small_map = np.empty(fine_grid.shape[1 : ] + (2, ), 'float32') small_map[np.ones(fine_grid.shape[1:], 'bool')] = fine_src_coords small_map[fine_point_in_mask] += dx return small_map
def rotate_landmarks(face, rotation_matrix): # pylint: disable=c-extension-no-member """ Rotate the landmarks and bounding box for faces found in rotated images. Pass in a DetectedFace object, Alignments dict or BoundingBox""" logger.trace("Rotating landmarks: (rotation_matrix: %s, type(face): %s", rotation_matrix, type(face)) if isinstance(face, DetectedFace): bounding_box = [[face.x, face.y], [face.x + face.w, face.y], [face.x + face.w, face.y + face.h], [face.x, face.y + face.h]] landmarks = face.landmarksXY elif isinstance(face, dict): bounding_box = [[face.get("x", 0), face.get("y", 0)], [face.get("x", 0) + face.get("w", 0), face.get("y", 0)], [face.get("x", 0) + face.get("w", 0), face.get("y", 0) + face.get("h", 0)], [face.get("x", 0), face.get("y", 0) + face.get("h", 0)]] landmarks = face.get("landmarksXY", list()) elif isinstance(face, BoundingBox): bounding_box = [[face.left, face.top], [face.right, face.top], [face.right, face.bottom], [face.left, face.bottom]] landmarks = list() else: raise ValueError("Unsupported face type") logger.trace("Original landmarks: %s", landmarks) rotation_matrix = cv2.invertAffineTransform( # pylint: disable=no-member rotation_matrix) rotated = list() for item in (bounding_box, landmarks): if not item: continue points = np.array(item, np.int32) points = np.expand_dims(points, axis=0) transformed = cv2.transform(points, # pylint: disable=no-member rotation_matrix).astype(np.int32) rotated.append(transformed.squeeze()) # Bounding box should follow x, y planes, so get min/max # for non-90 degree rotations pt_x = min([pnt[0] for pnt in rotated[0]]) pt_y = min([pnt[1] for pnt in rotated[0]]) pt_x1 = max([pnt[0] for pnt in rotated[0]]) pt_y1 = max([pnt[1] for pnt in rotated[0]]) if isinstance(face, DetectedFace): face.x = int(pt_x) face.y = int(pt_y) face.w = int(pt_x1 - pt_x) face.h = int(pt_y1 - pt_y) face.r = 0 if len(rotated) > 1: rotated_landmarks = [tuple(point) for point in rotated[1].tolist()] face.landmarksXY = rotated_landmarks elif isinstance(face, dict): face["x"] = int(pt_x) face["y"] = int(pt_y) face["w"] = int(pt_x1 - pt_x) face["h"] = int(pt_y1 - pt_y) face["r"] = 0 if len(rotated) > 1: rotated_landmarks = [tuple(point) for point in rotated[1].tolist()] face["landmarksXY"] = rotated_landmarks else: rotated_landmarks = BoundingBox(pt_x, pt_y, pt_x1, pt_y1) face = rotated_landmarks logger.trace("Rotated landmarks: %s", rotated_landmarks) return face
import numpy as np from math import * import cv2 def dist((x1, y1), (x2, y2)): return np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2) src = cv2.imread("mandril.bmp", 0) (r, c) = src.shape M = cv2.getRotationMatrix2D((c/2, r/2), 45, 0.5) Mi = cv2.invertAffineTransform(M) i1 = src.copy() (kp1, d1) = cv2.SIFT().detectAndCompute(i1, None) i2 = src.copy() i2 = cv2.warpAffine(i2, M, (r, c)) (kp2, d2) = cv2.SIFT().detectAndCompute(i2, None) FLANN_INDEX_KDTREE = 0 index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5) search_params = dict(checks=50) flann = cv2.FlannBasedMatcher(index_params,search_params) matches = flann.match(d1, d2) k = 0 for m in matches: p1 = kp1[m.queryIdx].pt p2 = kp2[m.trainIdx].pt p2 = np.dot(Mi,(p2[0], p2[1], 1.0))
def _mask_post_processing(self, mask): target_mask = (mask > cfg.TRACK.MASK_THERSHOLD) target_mask = target_mask.astype(np.uint8) if cv2.__version__[-5] == '4': contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) else: _, contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) cnt_area = [cv2.contourArea(cnt) for cnt in contours] if len(contours) != 0 and np.max(cnt_area) > 100: contour = contours[np.argmax(cnt_area)] polygon = contour.reshape(-1, 2) ## the following code estimate the shape angle with ellipse ## then fit a axis-aligned bounding box on the rotated image ellipseBox = cv2.fitEllipse(polygon) # get the center of the ellipse and the angle angle = ellipseBox[-1] #print(angle) center = np.array(ellipseBox[0]) axes = np.array(ellipseBox[1]) # get the ellipse box ellipseBox = cv2.boxPoints(ellipseBox) #compute the rotation matrix rot_mat = cv2.getRotationMatrix2D((center[0],center[1]), angle, 1.0) # rotate the ellipse box one = np.ones([ellipseBox.shape[0],3,1]) one[:,:2,:] = ellipseBox.reshape(-1,2,1) ellipseBox = np.matmul(rot_mat, one).reshape(-1,2) # to xmin ymin xmax ymax xs = ellipseBox[:,0] xmin, xmax = np.min(xs), np.max(xs) ys = ellipseBox[:,1] ymin, ymax = np.min(ys), np.max(ys) ellipseBox = [xmin, ymin, xmax, ymax] # rotate the contour one = np.ones([polygon.shape[0],3,1]) one[:,:2,:] = polygon.reshape(-1,2,1) polygon = np.matmul(rot_mat, one).astype(int).reshape(-1,2) # remove points outside of the ellipseBox logi = polygon[:,0]<=xmax logi = np.logical_and(polygon[:,0]>=xmin, logi) logi = np.logical_and(polygon[:,1]>=ymin, logi) logi = np.logical_and(polygon[:,1]<=ymax, logi) polygon = polygon[logi,:] x,y,w,h = cv2.boundingRect(polygon) bRect = [x, y, x+w, y+h] # get the intersection of ellipse box and the rotated box x1, y1, x2, y2 = ellipseBox[0], ellipseBox[1], ellipseBox[2], ellipseBox[3] tx1, ty1, tx2, ty2 = bRect[0], bRect[1], bRect[2], bRect[3] xx1 = min(max(tx1, x1, 0), target_mask.shape[1]-1) yy1 = min(max(ty1, y1, 0), target_mask.shape[0]-1) xx2 = max(min(tx2, x2, target_mask.shape[1]-1), 0) yy2 = max(min(ty2, y2, target_mask.shape[0]-1), 0) rotated_mask = cv2.warpAffine(target_mask, rot_mat,(target_mask.shape[1],target_mask.shape[0])) #refinement alpha_factor = cfg.TRACK.FACTOR while True: if np.sum(rotated_mask[int(yy1):int(yy2),int(xx1)]) < (yy2-yy1)*alpha_factor: temp = xx1+(xx2-xx1)*0.02 if not (temp >= target_mask.shape[1]-1 or xx2-xx1 < 1): xx1 = temp else: break else: break while True: if np.sum(rotated_mask[int(yy1):int(yy2),int(xx2)]) < (yy2-yy1)*alpha_factor: temp = xx2-(xx2-xx1)*0.02 if not (temp <= 0 or xx2-xx1 < 1): xx2 = temp else: break else: break while True: if np.sum(rotated_mask[int(yy1),int(xx1):int(xx2)]) < (xx2-xx1)*alpha_factor: temp = yy1+(yy2-yy1)*0.02 if not (temp >= target_mask.shape[0]-1 or yy2-yy1 < 1): yy1 = temp else: break else: break while True: if np.sum(rotated_mask[int(yy2),int(xx1):int(xx2)]) < (xx2-xx1)*alpha_factor: temp = yy2-(yy2-yy1)*0.02 if not (temp <= 0 or yy2-yy1 < 1): yy2 = temp else: break else: break prbox = np.array([[xx1,yy1],[xx2,yy1],[xx2,yy2],[xx1,yy2]]) # inverse of the rotation matrix M_inv = cv2.invertAffineTransform(rot_mat) # project the points back to image coordinate one = np.ones([prbox.shape[0],3,1]) one[:,:2,:] = prbox.reshape(-1,2,1) prbox = np.matmul(M_inv, one).reshape(-1,2) rbox_in_img = prbox else: # empty mask location = cxy_wh_2_rect(self.center_pos, self.size) rbox_in_img = np.array([[location[0], location[1]], [location[0] + location[2], location[1]], [location[0] + location[2], location[1] + location[3]], [location[0], location[1] + location[3]]]) return rbox_in_img
def findRotMaxRect(data_in,flag_opt=False,flag_parallel = False, nbre_angle=10,flag_out=None,flag_enlarge_img=False,limit_image_size=300): ''' flag_opt : True only nbre_angle are tested between 90 and 180 and a opt descent algo is run on the best fit False 100 angle are tested from 90 to 180. flag_parallel: only valid when flag_opt=False. the 100 angle are run on multithreading flag_out : angle and rectangle of the rotated image are output together with the rectangle of the original image flag_enlarge_img : the image used in the function is double of the size of the original to ensure all feature stay in when rotated limit_image_size : control the size numbre of pixel of the image use in the function. this speeds up the code but can give approximated results if the shape is not simple ''' #time_s = datetime.datetime.now() #make the image square #---------------- nx_in, ny_in = data_in.shape if nx_in != ny_in: n = max([nx_in,ny_in]) data_square = np.ones([n,n]) xshift = (n-nx_in)/2 yshift = (n-ny_in)/2 if yshift == 0: data_square[xshift:(xshift+nx_in),: ] = data_in[:,:] else: data_square[: ,yshift:(yshift+ny_in)] = data_in[:,:] else: xshift = 0 yshift = 0 data_square = data_in #apply scale factor if image bigger than limit_image_size #---------------- if data_square.shape[0] > limit_image_size: data_small = cv2.resize(data_square,(limit_image_size, limit_image_size),interpolation=0) scale_factor = 1.*data_square.shape[0]/data_small.shape[0] else: data_small = data_square scale_factor = 1 # set the input data with an odd number of point in each dimension to make rotation easier #---------------- nx,ny = data_small.shape nx_extra = -nx; ny_extra = -ny if nx%2==0: nx+=1 nx_extra = 1 if ny%2==0: ny+=1 ny_extra = 1 data_odd = np.ones([data_small.shape[0]+max([0,nx_extra]),data_small.shape[1]+max([0,ny_extra])]) data_odd[:-nx_extra, :-ny_extra] = data_small nx,ny = data_odd.shape nx_odd,ny_odd = data_odd.shape if flag_enlarge_img: data = np.zeros([2*data_odd.shape[0]+1,2*data_odd.shape[1]+1]) + 1 nx,ny = data.shape data[nx/2-nx_odd/2:nx/2+nx_odd/2,ny/2-ny_odd/2:ny/2+ny_odd/2] = data_odd else: data = np.copy(data_odd) nx,ny = data.shape #print (datetime.datetime.now()-time_s).total_seconds() if flag_opt: myranges_brute = ([(90.,180.),]) coeff0 = np.array([0.,]) coeff1 = optimize.brute(residual, myranges_brute, args=(data,), Ns=nbre_angle, finish=None) popt = optimize.fmin(residual, coeff1, args=(data,), xtol=5, ftol=1.e-5, disp=False) angle_selected = popt[0] #rotation_angle = np.linspace(0,360,100+1)[:-1] #mm = [residual(aa,data) for aa in rotation_angle] #plt.plot(rotation_angle,mm) #plt.show() #pdb.set_trace() else: rotation_angle = np.linspace(90,180,100+1)[:-1] args_here=[] for angle in rotation_angle: args_here.append([angle,data]) if flag_parallel: # set up a pool to run the parallel processing cpus = multiprocessing.cpu_count() pool = multiprocessing.Pool(processes=cpus) # then the map method of pool actually does the parallelisation results = pool.map(residual_star, args_here) pool.close() pool.join() else: results = [] for arg in args_here: results.append(residual_star(arg)) argmin = np.array(results).argmin() angle_selected = args_here[argmin][0] rectangle, M_rect_max, RotData = get_rectangle_coord(angle_selected,data,flag_out=True) #rectangle, M_rect_max = get_rectangle_coord(angle_selected,data) #print (datetime.datetime.now()-time_s).total_seconds() #invert rectangle M_invert = cv2.invertAffineTransform(M_rect_max) rect_coord = [rectangle[:2], [rectangle[0],rectangle[3]] , rectangle[2:], [rectangle[2],rectangle[1]] ] #ax = plt.subplot(111) #ax.imshow(RotData.T,origin='lower',interpolation='nearest') #patch = patches.Polygon(rect_coord, edgecolor='k', facecolor='None', linewidth=2) #ax.add_patch(patch) #plt.show() rect_coord_ori = [] for coord in rect_coord: rect_coord_ori.append(np.dot(M_invert,[coord[0],(ny-1)-coord[1],1])) #transform to numpy coord of input image coord_out = [] for coord in rect_coord_ori: coord_out.append( [ scale_factor*round( coord[0]-(nx/2-nx_odd/2),0)-xshift,\ scale_factor*round((ny-1)-coord[1]-(ny/2-ny_odd/2),0)-yshift]) coord_out_rot = [] coord_out_rot_h = [] for coord in rect_coord: coord_out_rot.append( [ scale_factor*round( coord[0]-(nx/2-nx_odd/2),0)-xshift, \ scale_factor*round( coord[1]-(ny/2-ny_odd/2),0)-yshift ]) coord_out_rot_h.append( [ scale_factor*round( coord[0]-(nx/2-nx_odd/2),0), \ scale_factor*round( coord[1]-(ny/2-ny_odd/2),0) ]) #M = cv2.getRotationMatrix2D( ( (data_square.shape[0]-1)/2, (data_square.shape[1]-1)/2 ), angle_selected,1) #RotData = cv2.warpAffine(data_square,M,data_square.shape,flags=cv2.INTER_NEAREST,borderValue=1) #ax = plt.subplot(121) #ax.imshow(data_square.T,origin='lower',interpolation='nearest') #ax = plt.subplot(122) #ax.imshow(RotData.T,origin='lower',interpolation='nearest') #patch = patches.Polygon(coord_out_rot_h, edgecolor='k', facecolor='None', linewidth=2) #ax.add_patch(patch) #plt.show() #coord for data_in #---------------- #print scale_factor, xshift, yshift #coord_out2 = [] #for coord in coord_out: # coord_out2.append([int(np.round(scale_factor*coord[0]-xshift,0)),int(np.round(scale_factor*coord[1]-yshift,0))]) #print (datetime.datetime.now()-time_s).total_seconds() if flag_out is None: return coord_out elif flag_out == 'rotation': return coord_out, angle_selected, coord_out_rot else: print 'bad def in findRotMaxRect input. stop' pdb.set_trace()
def transformAndCalcBest(groundTruthFolder, testPath, saveFolder, poorMapsFolder,hit,miss,unknown,feature='sift'): groundTruthList = deque() ROCs = deque() FLANN_INDEX_KDTREE = 0 out = 1 if feature == 'orb': feature_obj = cv2.ORB_create() if feature == 'sift': feature_obj = cv2.xfeatures2d.SIFT_create() for fileN in os.listdir(groundTruthFolder): if not fileN.endswith(".png"): continue imFile = os.path.join(groundTruthFolder,fileN) print("Loading: %s"%(imFile)) im = cv2.imread(imFile,0) print("Loaded!") kp, des = feature_obj.detectAndCompute(im,None) des = np.asarray(des,np.float32) groundTruthList.append((fileN,os.path.join(saveFolder,os.path.splitext(fileN)[0]),im,des,kp)) if not os.path.isdir(saveFolder): os.makedirs(saveFolder) # for gt_file,gt_folder,gt_im,gt_des,gt_kp in groundTruthList: # if not os.path.isdir(gt_folder): # os.makedirs(gt_folder) for fileN in os.listdir(testPath): if not fileN.endswith(".pgm"): continue imFile = os.path.join(testPath,fileN) imFileP = os.path.join(poorMapsFolder,fileN) if out>0: print("Loading: %s"%(imFile)) im = cv2.imread(imFile,0) if out>0: print("Loaded!") kp, des = feature_obj.detectAndCompute(im,None) des = np.asarray(des,np.float32) best_fpr=best_tpr=best_mtr=best_val=0 best_file=best_folder=None best_im=None for gt_file,gt_folder,gt_im,gt_des,gt_kp in groundTruthList: if out>1: print("With: %s"%(gt_file)) height, width = gt_im.shape index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5) search_params = dict(checks = 50) flann = cv2.FlannBasedMatcher(index_params, search_params) matches = flann.knnMatch(des,gt_des,k=2) good = deque() for m,n in matches: if m.distance < 0.7*n.distance: good.append(m) if out>1: print("Found %f points"%(len(good))) if len(good)<1: if out>1: print("Not enough points!") continue dst_pts = np.float32([kp[m.queryIdx].pt for m in good]).reshape(-1,1,2) src_pts = np.float32([gt_kp[m.trainIdx].pt for m in good]).reshape(-1,1,2) M = cv2.estimateRigidTransform(src_pts,dst_pts,False) if M!=None: M = np.asarray(M,np.float32) M = cv2.invertAffineTransform(M) imW = cv2.warpAffine(im,M,(width,height),cv2.INTER_NEAREST) imW = imW-hit imW = cv2.threshold(imW,unknown-2-hit,255,3) imW = miss-hit-imW[1] imW = cv2.threshold(imW,miss-unknown,255,3) imW = miss-imW[1] fpr, tpr, mtr = ROCCalc.calcROC(gt_im,imW,miss,hit) new_best_val = ((tpr-fpr)/2.0)*math.sqrt(2) if (new_best_val>best_val): best_val = new_best_val best_fpr = fpr best_tpr = tpr best_mtr = mtr best_file = gt_file best_folder = gt_folder best_im = imW else: if out>1: print("No homography found!") if (best_im!=None): ROCs.append([fileN,[best_fpr,best_tpr],best_mtr,best_file]) if not os.path.isdir(best_folder): os.makedirs(best_folder) im_file = os.path.join(best_folder,fileN) # print("\n") # print(im_file) # print("\n") cv2.imwrite(im_file,best_im) else: if not os.path.isdir(poorMapsFolder): os.makedirs(poorMapsFolder) im_file = os.path.join(poorMapsFolder,fileN) cv2.imwrite(im_file,im) # break return ROCs
def process_splits(trans, conf, splits, norm2, ctc_f, rot_mat, boxt, draw, iou, debug = False, alow_non_dict = False): ''' Summary : Split the transciption and corresponding bounding-box based on spaces predicted by recognizer FCN. Description : Parameters ---------- trans : string String containing the predicted transcription for the corresponding predicted bounding-box. conf : list List containing sum of confidence for all the character by recognizer FCN, start and end position in bounding-box for generated transciption. splits : list List containing index of position of predicted spaces by the recognizer FCN. norm2 : matrix Matrix containing the cropped bounding-box predicted by localization FCN in the originial image. ctc_f : matrix Matrix containing output of recognizer FCN for the given input bounding-box. rot_mat : matrix Rotation matrix returned by get_normalized_image function. boxt : tuple of tuples Tuple of tuples containing parametes of predicted bounding-box by localization FCN. draw : matrix Matrix containing input image. debug : boolean Boolean parameter representing debug mode, if it is True visualization boxes are generated. Returns ------- boxes_out : list of tuples List of tuples containing predicted bounding-box parameters, predicted transcription and mean confidence score from the recognizer. ''' spl = trans.split(" ") boxout = cv2.boxPoints(boxt) start_f = 0 mean_conf = conf[0, 0] / len(trans) # Overall confidence of recognizer FCN boxes_out = [] for s in range(len(spl)): text = spl[s] end_f = conf[0, 2] if s < len(spl) - 1: try: if splits[0, s] > start_f: end_f = splits[0, s] # New ending point of bounding-box transcription except IndexError: pass scalex = norm2.shape[1] / float(ctc_f.shape[0]) poss = start_f * scalex pose = (end_f + 2) * scalex rect = [[poss, 0], [pose, 0], \ [pose, norm2.shape[0] - 1], [poss, norm2.shape[0] - 1]] rect = np.array(rect) #rect[:, 0] += boxt[0][0] #rect[:, 1] += boxt[0][1] int_t = cv2.invertAffineTransform(rot_mat) dst_rect = np.copy(rect) dst_rect[:,0] = int_t[0,0]*rect[:,0] + int_t[0,1]*rect[:, 1] + int_t[0,2] dst_rect[:,1] = int_t[1,0]*rect[:,0] + int_t[1,1]*rect[:, 1] + int_t[1,2] tx = np.sum(dst_rect[:,0]) / 4.0 ty = np.sum(dst_rect[:,1]) / 4.0 br = cv2.boundingRect(boxout) tx += br[0] ty += br[1] twidth = (pose - poss) #twidth = (pose - poss) / ext_factor theight = norm2.shape[0] box_back = ( (tx, ty), (twidth, theight * 0.9), boxt[2] ) if debug: boxout_u = cv2.boxPoints(box_back) vis.draw_box_points(draw, boxout_u, color = (0, 255, 0)) cv2.imshow('draw', draw) if len(text.strip()) == 0: print("zero length text!") continue textc = text.replace(".", "").replace(":", "").replace("!", "").replace("?", "").replace(",", "").replace("/", "").replace("-", "").replace("$", "").replace("'", "").replace("(", "").replace(")", "").replace("+", "") if textc.endswith("'s"): textc = textc[:-2] is_dict = cmp_trie.is_dict(textc.encode('utf-8')) or textc.isdigit() or alow_non_dict if len(text) > 2 and ( text.isdigit() or is_dict): boxes_out.append( (box_back, (text, mean_conf, is_dict, iou) ) ) start_f = end_f + 1 return boxes_out