def visualize_bee_points(image, mask, points): # Need to transpose from (3, 720, 1280) tensor to (720, 1280, 3) image image = np.asarray(image).transpose(1, 2, 0) mask = np.asarray(mask).squeeze().transpose(0, 1) #print(image.dtype) #print(image.shape) #print(mask.dtype) #print(mask.shape) # Convert back from 0-1.0 to 0-255 image = cv2.normalize(src=image, dst=None, \ alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U) #print("Shapes: %s, %s" % (image.shape, mask.shape)) image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) mask = cv2.normalize(src=mask, dst=None, \ alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U) if 0: # Dilate for visibility # You may or may not want to do this depending on your value of sigma mask = cv2.dilate(mask, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))) mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR) #helper.show_image("img", image) cv2.imshow("img", image) helper.show_image("mask", mask)
def __getitem__(self, idx): if torch.is_tensor(idx): idx = idx.tolist() # TODO - if jpg doesn't exist, check for png img_name = os.path.splitext(self.labels_file_list[idx])[0] + ".jpg" #print("Reading ", img_name) image = io.imread(img_name) points = self.__read_points(self.labels_file_list[idx]) # Resize image and points image, points = self.__resize(image, points) # Create a 2D binary mask with background 0 and our points 1 mask = np.zeros(image.shape[:2], dtype=np.float32) pts_x, pts_y = points.T mask[pts_x, pts_y] = 1.0 if 1: # This assert will get triggered if you update input resolution and force # you to re-evaluate the Gaussian that represents GT instances assert self.input_size == (544, 960), \ "You changed input resolution, is your Gaussian window and sigma still valid?" # With (736, 1280) a bee width is ~17 pix mask = cv2.GaussianBlur(mask, ksize=(13, 13), sigmaX=self.sigma) if 0: show_mask = deepcopy(mask) show_mask = cv2.normalize(src=show_mask, dst=None, \ alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U) helper.show_image("Gaussian mask", show_mask) image, mask = self.__transform(image, mask) sample = [image, mask, points] return sample
def _test_bilibili(): captcha_provider = BilibiliCaptchaProvider() print(captcha_provider.is_valid_seq("".join(random.sample(captcha_provider.chars, 5)))) print(captcha_provider.is_valid_seq("".join(random.sample(captcha_provider.chars, 4)) + " ")) show_image(captcha_provider.fetch()) seq = captcha_provider.canonicalize_seq(input("Input the answer sequence to verify: ")) print(captcha_provider.verify(seq))
def _test_bilibili(): captcha_provider = BilibiliCaptchaProvider() print( captcha_provider.is_valid_seq(''.join( random.sample(captcha_provider.chars, 5)))) print( captcha_provider.is_valid_seq( ''.join(random.sample(captcha_provider.chars, 4)) + ' ')) show_image(captcha_provider.fetch()) seq = captcha_provider.canonicalize_seq( input('Input the answer sequence to verify: ')) print(captcha_provider.verify(seq))
def inference(args): model, dataloder, device = setup_model_dataloader(args, batch_size=1) for _ in range(len(dataloder)): # Because we have variable length points coming from Dataloader, there's some extra overhead # in managing the input and GT data. See collate_fn(). inputs, mask, points = next(iter(dataloder)) inputs = torch.unsqueeze(inputs[0], 0) points = points[0] pred = helper.model_forward(model, inputs, device, ENABLE_TIMING) # For visualization, convert to viewable image input_img = inputs.cpu().numpy().squeeze().transpose(1, 2, 0) input_img = cv2.cvtColor(input_img, cv2.COLOR_RGB2BGR) input_img = helper.normalize_uint8(input_img) #helper.show_image("input_img", input_img) # Normalize for viewing pred_norm = helper.normalize_uint8(pred) #pred_norm = pred * 255 #helper.show_image("pred", pred_norm) cv2.imshow("pred", pred_norm) #print(np.max(pred_norm)) start_time = time.time() centroids = helper.get_centroids(pred) if ENABLE_TIMING: print("get_centroids time: %s s" % (time.time() - start_time)) # get_centroids time: 0.009763717651367188 s # Convert pred to color pred_norm_color = cv2.cvtColor(pred_norm, cv2.COLOR_GRAY2BGR) color_mask = deepcopy(pred_norm_color) # Color it by zeroing specific channels color_mask[:, :, [0, 2]] = 0 # green #color_mask[:,:,[0,1]] = 0 # red # Create a colored overlay overlay = cv2.addWeighted(input_img, 0.5, color_mask, 0.5, 0.0, dtype=cv2.CV_8UC3) #helper.show_image("Heatmap Overlay", overlay) cv2.imshow("Heatmap Overlay", overlay) #stacked = np.hstack((pred_norm_color, overlay)) #helper.show_image("pred", stacked) draw = True if draw and len(centroids) > 0: if 1: # Draw GT for point in points: cv2.circle(input_img, tuple((point[1], point[0])), 5, (0, 255, 0), cv2.FILLED) for centroid in centroids: cv2.circle(input_img, tuple((centroid[1], centroid[0])), 5, (0, 0, 255), cv2.FILLED) helper.show_image("Predictions", input_img)
def test(args): model, dataloder, device = setup_model_dataloader(args, batch_size=1) num_tp = 0 num_fp = 0 num_fn = 0 for i in range(len(dataloder)): inputs, mask, gt_points = next(iter(dataloder)) inputs = torch.unsqueeze(inputs[0], 0) gt_points = gt_points[0] # Forward pass pred = helper.model_forward(model, inputs, device, ENABLE_TIMING) # Get centroids from resulting heatmap pred_pts = helper.get_centroids(pred) # Compare pred pts to GT pairs = helper.calculate_pairs(pred_pts, gt_points, args.threshold) if len(pairs) > 0: # Calculate stats on the predictions sample_tp, sample_fp, sample_fn = helper.calculate_stats( pred_pts, gt_points, pairs) num_tp += sample_tp num_fp += sample_fp num_fn += sample_fn if args.debug: ic("TP: ", sample_tp) ic("FP: ", sample_fp) ic("FN: ", sample_fn) elif args.debug: print("No matches found") if args.debug: # For visualization, convert to viewable image input_img = inputs.cpu().numpy().squeeze().transpose(1, 2, 0) input_img = cv2.cvtColor(input_img, cv2.COLOR_RGB2BGR) input_img = helper.normalize_uint8(input_img) #helper.show_image("input_img", input_img) # Draw GT in green for gt_pt in gt_points: cv2.circle(input_img, tuple((gt_pt[1], gt_pt[0])), 5, (0, 255, 0), cv2.FILLED) # Draw all preds in red for pred_pt in pred_pts: cv2.circle(input_img, tuple((pred_pt[1], pred_pt[0])), 5, (0, 0, 255), cv2.FILLED) # Draw matched preds in yellow, and matched GTs in blue. # This will overwrite the red spots for good matches. # Note that pairs looks like: [(0, 2), (2, 1), (3, 3), (4, 4), (5, 0)] # Where each entry is (gt_idx, pred_idx) for pair in pairs: gt_pt = gt_points[pair[0]] pred_pt = pred_pts[pair[1]] cv2.circle(input_img, tuple((gt_pt[1], gt_pt[0])), 5, (255, 0, 0), cv2.FILLED) cv2.circle(input_img, tuple((pred_pt[1], pred_pt[0])), 5, (0, 255, 255), cv2.FILLED) cv2.namedWindow("input_img") helper.show_image("input_img", input_img) print() ic("Confusion matrix:") conf_mat_id = np.array([["TP", "FP"], ["FN", "TN"]]) ic(conf_mat_id) conf_mat = np.array([[num_tp, num_fp], [num_fn, 0]]) ic(conf_mat) precision = num_tp / (num_tp + num_fp) recall = num_tp / (num_tp + num_fn) f1 = 2 * precision * recall / (precision + recall) ic("Precision: ", precision) ic("Recall: ", recall) ic("F1 Score: ", f1) model_dir = os.path.abspath(args.model_dir) result_file = os.path.join(model_dir, "results.txt") with open(result_file, "w") as f: f.write("Confusion matrix:\n") f.writelines(str(conf_mat_id) + '\n') f.writelines(str(conf_mat) + '\n') f.writelines("Precision: %f\n" % precision) f.writelines("Recall: %f\n" % recall) f.writelines("F1 Score: %f\n" % f1)
def test_recognize_http(show_img=False, num=1, reconstruct=False, force_partition=True): time_start = time.time() provider = BilibiliCaptchaProvider() recognizer = CaptchaRecognizer() fail = 0 right_strong = 0 right_weak = 0 wrong_strong = 0 wrong_weak = 0 for i in range(num): image = time_func("fetch" if num == 1 else None, lambda: provider.fetch()) if show_img and num == 1: show_image(image) if num == 1: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=True, verbose=True, reconstruct=reconstruct, force_partition=force_partition ) else: if i == 0: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=False, verbose=False, reconstruct=reconstruct, force_partition=force_partition, ) else: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=False, verbose=False, reconstruct=False, force_partition=force_partition ) if success: print(seq) result = time_func("verify" if num == 1 else None, lambda: provider.verify(seq)) if num == 1: print("Recognized seq is {}".format(result)) if result: if weak_confidence: right_weak += 1 else: right_strong += 1 else: if weak_confidence: wrong_weak += 1 else: wrong_strong += 1 else: fail += 1 right_total = right_strong + right_weak wrong_total = wrong_strong + wrong_weak print("Fail: ", fail) print("Right weak: ", right_weak) print("Right strong: ", right_strong) print("Right total: ", right_total) print("Wrong weak: ", wrong_weak) print("Wrong strong: ", wrong_strong) print("Wrong total: ", wrong_total) print("Total success rate: ", (right_weak + right_strong) / num) print("Success rate when confident: ", (right_strong + right_weak) / (num - fail) if num - fail > 0 else 0) print( "Success rate when strongly confident: ", right_strong / (right_strong + wrong_strong) if right_strong + wrong_strong > 0 else 0, ) print( "Success rate when weakly confident: ", right_weak / (right_weak + wrong_weak) if right_weak + wrong_weak > 0 else 0, ) time_end = time.time() print("Time used to test recognize http is: ", time_end - time_start)
def test_recognize_http(show_img=False, num=1, reconstruct=False, force_partition=True): time_start = time.time() provider = BilibiliCaptchaProvider() recognizer = CaptchaRecognizer() fail = 0 right_strong = 0 right_weak = 0 wrong_strong = 0 wrong_weak = 0 for i in range(num): image = time_func('fetch' if num == 1 else None, lambda: provider.fetch()) if show_img and num == 1: show_image(image) if num == 1: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=True, verbose=True, reconstruct=reconstruct, force_partition=force_partition) else: if i == 0: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=False, verbose=False, reconstruct=reconstruct, force_partition=force_partition) else: success, seq, weak_confidence = recognizer.recognize( image, save_intermediate=False, verbose=False, reconstruct=False, force_partition=force_partition) if success: print(seq) result = time_func('verify' if num == 1 else None, lambda: provider.verify(seq)) if num == 1: print('Recognized seq is {}'.format(result)) if result: if weak_confidence: right_weak += 1 else: right_strong += 1 else: if weak_confidence: wrong_weak += 1 else: wrong_strong += 1 else: fail += 1 right_total = right_strong + right_weak wrong_total = wrong_strong + wrong_weak print('Fail: ', fail) print('Right weak: ', right_weak) print('Right strong: ', right_strong) print('Right total: ', right_total) print('Wrong weak: ', wrong_weak) print('Wrong strong: ', wrong_strong) print('Wrong total: ', wrong_total) print('Total success rate: ', (right_weak + right_strong) / num) print('Success rate when confident: ', (right_strong + right_weak) / (num - fail) if num - fail > 0 else 0) print( 'Success rate when strongly confident: ', right_strong / (right_strong + wrong_strong) if right_strong + wrong_strong > 0 else 0) print( 'Success rate when weakly confident: ', right_weak / (right_weak + wrong_weak) if right_weak + wrong_weak > 0 else 0) time_end = time.time() print('Time used to test recognize http is: ', time_end - time_start)
def track_bees(args): # Handle file not found if not os.path.exists(args.video_file_in): sys.exit("Error. File not found. (-2)") model, device = setup_model_dataloader(args, batch_size=1) vid = cv2.VideoCapture(args.video_file_in) ic("Processing ", args.video_file_in) # Get some metadata num_frames = int(vid.get(cv2.CAP_PROP_FRAME_COUNT)) width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = vid.get(cv2.CAP_PROP_FPS) ic("Num frames: ", num_frames) ic("Width x Height: ", (width, height)) # I don't think fps is accurate! ic("Source FPS: ", fps) if args.fps != 0: frame_duration = 1000 // int(args.fps) # ms else: frame_duration = 1000 // int(fps) # ms # Don't actually need the dataset. Just need the input dimensions bee_ds = BeePointDataset(root_dir="/dev/null") input_size = bee_ds.input_size ic(bee_ds.input_size) # Vars for tracking prev_pts = np.array([]) running_pairs = [] # Vars for profiling total_exec_time_accumulator = 0 exec_time_accumulator = 0 num_exec_frames = 0 for frame_num in range(num_frames): # Read frame from video ret, frame = vid.read() if frame is None: break if args.enable_timing: start_time = time.time() # Rotate if rotation is set if args.rotate == -90: frame = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE) elif args.rotate == 90: frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE) # Resize to network input size frame = cv2.resize(frame, (input_size[1], input_size[0])) if 0: cv2.imshow("Video", frame) key = cv2.waitKey(30) if key == 'q' or key == 27: break # Convert to tensor and forward pass tensor_frame = TF.to_tensor(frame) tensor_frame.to(device) tensor_frame = torch.unsqueeze(tensor_frame, 0) pred = helper.model_forward(model, tensor_frame, device, ENABLE_TIMING) # Get prediction centroids (predicted points) pred_pts = helper.get_centroids(pred) num_bees = len(pred_pts) # Since what we really care about is # bees, stop time profiling here # The rest is for visualization if args.enable_timing and frame_num > 0: iter_time = time.time() - start_time total_exec_time_accumulator += iter_time exec_time_accumulator += iter_time num_exec_frames += 1 if frame_num % int(fps) == 0: avg_exec_time = exec_time_accumulator / num_exec_frames ic("Avg exec time: ", avg_exec_time) exec_time_accumulator = 0 num_exec_frames = 0 # Use bipartite graph minimum weight matching to associate detections if len(pred_pts) > 0 and len( prev_pts) > 0 and not args.disable_tracking: pairs = helper.calculate_pairs(prev_pts, pred_pts, args.threshold) # Extract actual points based on indices in original arrays point_pairs = [] for pair in pairs: point_pairs.append((prev_pts[pair[1]], pred_pts[pair[0]])) running_pairs.append(point_pairs) if len(running_pairs) > args.tracker_frame_len: running_pairs.pop(0) # Draw the tracking lines frame = draw_tracking_lines(frame, running_pairs) elif len(running_pairs) > 0: running_pairs.pop(0) if len(pred_pts) > 0: for pred_pt in pred_pts: cv2.circle(frame, tuple((pred_pt[1], pred_pt[0])), 5, VIS_COLOR, cv2.FILLED) # Draw # of bees in text on bottom left of image num_bees_text = "# Bees: %i" % num_bees bottom_left = (0, frame.shape[0] - 20) frame = cv2.putText(frame, num_bees_text, org=bottom_left, fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(255, 0, 0), thickness=2, lineType=cv2.LINE_AA) # Show results helper.show_image("Predictions", frame, delay=frame_duration) prev_pts = deepcopy(pred_pts) # Calculate the average processing fps. num_frames-1 b/c we didnt' count the first frame initialization delay avg_exec_time = total_exec_time_accumulator / (num_frames - 1) measured_fps = 1 / avg_exec_time ic("Overall avg exec time: ", avg_exec_time) ic("Overall FPS: ", measured_fps)