Example #1
0
def visualize_bee_points(image, mask, points):
    # Need to transpose from (3, 720, 1280) tensor to (720, 1280, 3) image
    image = np.asarray(image).transpose(1, 2, 0)
    mask = np.asarray(mask).squeeze().transpose(0, 1)
    #print(image.dtype)
    #print(image.shape)
    #print(mask.dtype)
    #print(mask.shape)
    # Convert back from 0-1.0 to 0-255
    image = cv2.normalize(src=image, dst=None, \
     alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)

    #print("Shapes: %s, %s" % (image.shape, mask.shape))
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    mask = cv2.normalize(src=mask, dst=None, \
     alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
    if 0:
        # Dilate for visibility
        # You may or may not want to do this depending on your value of sigma
        mask = cv2.dilate(mask,
                          cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)))
    mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR)
    #helper.show_image("img", image)
    cv2.imshow("img", image)
    helper.show_image("mask", mask)
Example #2
0
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        # TODO - if jpg doesn't exist, check for png
        img_name = os.path.splitext(self.labels_file_list[idx])[0] + ".jpg"
        #print("Reading ", img_name)
        image = io.imread(img_name)
        points = self.__read_points(self.labels_file_list[idx])

        # Resize image and points
        image, points = self.__resize(image, points)

        # Create a 2D binary mask with background 0 and our points 1
        mask = np.zeros(image.shape[:2], dtype=np.float32)
        pts_x, pts_y = points.T
        mask[pts_x, pts_y] = 1.0
        if 1:
            # This assert will get triggered if you update input resolution and force
            # you to re-evaluate the Gaussian that represents GT instances
            assert self.input_size == (544, 960), \
             "You changed input resolution, is your Gaussian window and sigma still valid?"
            # With (736, 1280) a bee width is ~17 pix
            mask = cv2.GaussianBlur(mask, ksize=(13, 13), sigmaX=self.sigma)
            if 0:
                show_mask = deepcopy(mask)
                show_mask = cv2.normalize(src=show_mask, dst=None, \
                 alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8U)
                helper.show_image("Gaussian mask", show_mask)

        image, mask = self.__transform(image, mask)

        sample = [image, mask, points]
        return sample
def _test_bilibili():
    captcha_provider = BilibiliCaptchaProvider()
    print(captcha_provider.is_valid_seq("".join(random.sample(captcha_provider.chars, 5))))
    print(captcha_provider.is_valid_seq("".join(random.sample(captcha_provider.chars, 4)) + " "))
    show_image(captcha_provider.fetch())
    seq = captcha_provider.canonicalize_seq(input("Input the answer sequence to verify: "))
    print(captcha_provider.verify(seq))
Example #4
0
def _test_bilibili():
    captcha_provider = BilibiliCaptchaProvider()
    print(
        captcha_provider.is_valid_seq(''.join(
            random.sample(captcha_provider.chars, 5))))
    print(
        captcha_provider.is_valid_seq(
            ''.join(random.sample(captcha_provider.chars, 4)) + ' '))
    show_image(captcha_provider.fetch())
    seq = captcha_provider.canonicalize_seq(
        input('Input the answer sequence to verify: '))
    print(captcha_provider.verify(seq))
Example #5
0
def inference(args):

    model, dataloder, device = setup_model_dataloader(args, batch_size=1)

    for _ in range(len(dataloder)):
        # Because we have variable length points coming from Dataloader, there's some extra overhead
        # in managing the input and GT data. See collate_fn().
        inputs, mask, points = next(iter(dataloder))
        inputs = torch.unsqueeze(inputs[0], 0)
        points = points[0]
        pred = helper.model_forward(model, inputs, device, ENABLE_TIMING)

        # For visualization, convert to viewable image
        input_img = inputs.cpu().numpy().squeeze().transpose(1, 2, 0)
        input_img = cv2.cvtColor(input_img, cv2.COLOR_RGB2BGR)
        input_img = helper.normalize_uint8(input_img)
        #helper.show_image("input_img", input_img)

        # Normalize for viewing
        pred_norm = helper.normalize_uint8(pred)
        #pred_norm = pred * 255
        #helper.show_image("pred", pred_norm)
        cv2.imshow("pred", pred_norm)
        #print(np.max(pred_norm))

        start_time = time.time()
        centroids = helper.get_centroids(pred)
        if ENABLE_TIMING:
            print("get_centroids time: %s s" % (time.time() - start_time))
            # get_centroids time: 0.009763717651367188 s

        # Convert pred to color
        pred_norm_color = cv2.cvtColor(pred_norm, cv2.COLOR_GRAY2BGR)
        color_mask = deepcopy(pred_norm_color)
        # Color it by zeroing specific channels
        color_mask[:, :, [0, 2]] = 0  # green
        #color_mask[:,:,[0,1]] = 0 # red

        # Create a colored overlay
        overlay = cv2.addWeighted(input_img,
                                  0.5,
                                  color_mask,
                                  0.5,
                                  0.0,
                                  dtype=cv2.CV_8UC3)
        #helper.show_image("Heatmap Overlay", overlay)
        cv2.imshow("Heatmap Overlay", overlay)

        #stacked = np.hstack((pred_norm_color, overlay))
        #helper.show_image("pred", stacked)

        draw = True
        if draw and len(centroids) > 0:
            if 1:  # Draw GT
                for point in points:
                    cv2.circle(input_img, tuple((point[1], point[0])), 5,
                               (0, 255, 0), cv2.FILLED)
            for centroid in centroids:
                cv2.circle(input_img, tuple((centroid[1], centroid[0])), 5,
                           (0, 0, 255), cv2.FILLED)
            helper.show_image("Predictions", input_img)
Example #6
0
def test(args):
    model, dataloder, device = setup_model_dataloader(args, batch_size=1)

    num_tp = 0
    num_fp = 0
    num_fn = 0

    for i in range(len(dataloder)):
        inputs, mask, gt_points = next(iter(dataloder))
        inputs = torch.unsqueeze(inputs[0], 0)
        gt_points = gt_points[0]
        # Forward pass
        pred = helper.model_forward(model, inputs, device, ENABLE_TIMING)
        # Get centroids from resulting heatmap
        pred_pts = helper.get_centroids(pred)

        # Compare pred pts to GT
        pairs = helper.calculate_pairs(pred_pts, gt_points, args.threshold)

        if len(pairs) > 0:
            # Calculate stats on the predictions
            sample_tp, sample_fp, sample_fn = helper.calculate_stats(
                pred_pts, gt_points, pairs)
            num_tp += sample_tp
            num_fp += sample_fp
            num_fn += sample_fn
            if args.debug:
                ic("TP: ", sample_tp)
                ic("FP: ", sample_fp)
                ic("FN: ", sample_fn)
        elif args.debug:
            print("No matches found")

        if args.debug:
            # For visualization, convert to viewable image
            input_img = inputs.cpu().numpy().squeeze().transpose(1, 2, 0)
            input_img = cv2.cvtColor(input_img, cv2.COLOR_RGB2BGR)
            input_img = helper.normalize_uint8(input_img)
            #helper.show_image("input_img", input_img)

            # Draw GT in green
            for gt_pt in gt_points:
                cv2.circle(input_img, tuple((gt_pt[1], gt_pt[0])), 5,
                           (0, 255, 0), cv2.FILLED)
            # Draw all preds in red
            for pred_pt in pred_pts:
                cv2.circle(input_img, tuple((pred_pt[1], pred_pt[0])), 5,
                           (0, 0, 255), cv2.FILLED)
            # Draw matched preds in yellow, and matched GTs in blue.
            # This will overwrite the red spots for good matches.
            # Note that pairs looks like: [(0, 2), (2, 1), (3, 3), (4, 4), (5, 0)]
            # Where each entry is (gt_idx, pred_idx)
            for pair in pairs:
                gt_pt = gt_points[pair[0]]
                pred_pt = pred_pts[pair[1]]
                cv2.circle(input_img, tuple((gt_pt[1], gt_pt[0])), 5,
                           (255, 0, 0), cv2.FILLED)
                cv2.circle(input_img, tuple((pred_pt[1], pred_pt[0])), 5,
                           (0, 255, 255), cv2.FILLED)

            cv2.namedWindow("input_img")
            helper.show_image("input_img", input_img)
            print()

    ic("Confusion matrix:")
    conf_mat_id = np.array([["TP", "FP"], ["FN", "TN"]])
    ic(conf_mat_id)
    conf_mat = np.array([[num_tp, num_fp], [num_fn, 0]])
    ic(conf_mat)
    precision = num_tp / (num_tp + num_fp)
    recall = num_tp / (num_tp + num_fn)
    f1 = 2 * precision * recall / (precision + recall)
    ic("Precision: ", precision)
    ic("Recall: ", recall)
    ic("F1 Score: ", f1)

    model_dir = os.path.abspath(args.model_dir)
    result_file = os.path.join(model_dir, "results.txt")
    with open(result_file, "w") as f:
        f.write("Confusion matrix:\n")
        f.writelines(str(conf_mat_id) + '\n')
        f.writelines(str(conf_mat) + '\n')
        f.writelines("Precision: %f\n" % precision)
        f.writelines("Recall: %f\n" % recall)
        f.writelines("F1 Score: %f\n" % f1)
Example #7
0
def test_recognize_http(show_img=False, num=1, reconstruct=False, force_partition=True):
    time_start = time.time()
    provider = BilibiliCaptchaProvider()
    recognizer = CaptchaRecognizer()
    fail = 0
    right_strong = 0
    right_weak = 0
    wrong_strong = 0
    wrong_weak = 0
    for i in range(num):
        image = time_func("fetch" if num == 1 else None, lambda: provider.fetch())
        if show_img and num == 1:
            show_image(image)
        if num == 1:
            success, seq, weak_confidence = recognizer.recognize(
                image, save_intermediate=True, verbose=True, reconstruct=reconstruct, force_partition=force_partition
            )
        else:
            if i == 0:
                success, seq, weak_confidence = recognizer.recognize(
                    image,
                    save_intermediate=False,
                    verbose=False,
                    reconstruct=reconstruct,
                    force_partition=force_partition,
                )
            else:
                success, seq, weak_confidence = recognizer.recognize(
                    image, save_intermediate=False, verbose=False, reconstruct=False, force_partition=force_partition
                )
        if success:
            print(seq)
            result = time_func("verify" if num == 1 else None, lambda: provider.verify(seq))
            if num == 1:
                print("Recognized seq is {}".format(result))
            if result:
                if weak_confidence:
                    right_weak += 1
                else:
                    right_strong += 1
            else:
                if weak_confidence:
                    wrong_weak += 1
                else:
                    wrong_strong += 1
        else:
            fail += 1
    right_total = right_strong + right_weak
    wrong_total = wrong_strong + wrong_weak
    print("Fail: ", fail)
    print("Right weak: ", right_weak)
    print("Right strong: ", right_strong)
    print("Right total: ", right_total)
    print("Wrong weak: ", wrong_weak)
    print("Wrong strong: ", wrong_strong)
    print("Wrong total: ", wrong_total)
    print("Total success rate: ", (right_weak + right_strong) / num)
    print("Success rate when confident: ", (right_strong + right_weak) / (num - fail) if num - fail > 0 else 0)
    print(
        "Success rate when strongly confident: ",
        right_strong / (right_strong + wrong_strong) if right_strong + wrong_strong > 0 else 0,
    )
    print(
        "Success rate when weakly confident: ",
        right_weak / (right_weak + wrong_weak) if right_weak + wrong_weak > 0 else 0,
    )
    time_end = time.time()
    print("Time used to test recognize http is: ", time_end - time_start)
Example #8
0
def test_recognize_http(show_img=False,
                        num=1,
                        reconstruct=False,
                        force_partition=True):
    time_start = time.time()
    provider = BilibiliCaptchaProvider()
    recognizer = CaptchaRecognizer()
    fail = 0
    right_strong = 0
    right_weak = 0
    wrong_strong = 0
    wrong_weak = 0
    for i in range(num):
        image = time_func('fetch' if num == 1 else None,
                          lambda: provider.fetch())
        if show_img and num == 1:
            show_image(image)
        if num == 1:
            success, seq, weak_confidence = recognizer.recognize(
                image,
                save_intermediate=True,
                verbose=True,
                reconstruct=reconstruct,
                force_partition=force_partition)
        else:
            if i == 0:
                success, seq, weak_confidence = recognizer.recognize(
                    image,
                    save_intermediate=False,
                    verbose=False,
                    reconstruct=reconstruct,
                    force_partition=force_partition)
            else:
                success, seq, weak_confidence = recognizer.recognize(
                    image,
                    save_intermediate=False,
                    verbose=False,
                    reconstruct=False,
                    force_partition=force_partition)
        if success:
            print(seq)
            result = time_func('verify' if num == 1 else None,
                               lambda: provider.verify(seq))
            if num == 1:
                print('Recognized seq is {}'.format(result))
            if result:
                if weak_confidence:
                    right_weak += 1
                else:
                    right_strong += 1
            else:
                if weak_confidence:
                    wrong_weak += 1
                else:
                    wrong_strong += 1
        else:
            fail += 1
    right_total = right_strong + right_weak
    wrong_total = wrong_strong + wrong_weak
    print('Fail: ', fail)
    print('Right weak: ', right_weak)
    print('Right strong: ', right_strong)
    print('Right total: ', right_total)
    print('Wrong weak: ', wrong_weak)
    print('Wrong strong: ', wrong_strong)
    print('Wrong total: ', wrong_total)
    print('Total success rate: ', (right_weak + right_strong) / num)
    print('Success rate when confident: ',
          (right_strong + right_weak) / (num - fail) if num - fail > 0 else 0)
    print(
        'Success rate when strongly confident: ',
        right_strong / (right_strong + wrong_strong)
        if right_strong + wrong_strong > 0 else 0)
    print(
        'Success rate when weakly confident: ', right_weak /
        (right_weak + wrong_weak) if right_weak + wrong_weak > 0 else 0)
    time_end = time.time()
    print('Time used to test recognize http is: ', time_end - time_start)
Example #9
0
def track_bees(args):

    # Handle file not found
    if not os.path.exists(args.video_file_in):
        sys.exit("Error. File not found. (-2)")

    model, device = setup_model_dataloader(args, batch_size=1)

    vid = cv2.VideoCapture(args.video_file_in)
    ic("Processing ", args.video_file_in)

    # Get some metadata
    num_frames = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
    width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = vid.get(cv2.CAP_PROP_FPS)
    ic("Num frames: ", num_frames)
    ic("Width x Height: ", (width, height))
    # I don't think fps is accurate!
    ic("Source FPS: ", fps)
    if args.fps != 0:
        frame_duration = 1000 // int(args.fps)  # ms
    else:
        frame_duration = 1000 // int(fps)  # ms

    # Don't actually need the dataset. Just need the input dimensions
    bee_ds = BeePointDataset(root_dir="/dev/null")
    input_size = bee_ds.input_size
    ic(bee_ds.input_size)

    # Vars for tracking
    prev_pts = np.array([])
    running_pairs = []

    # Vars for profiling
    total_exec_time_accumulator = 0
    exec_time_accumulator = 0
    num_exec_frames = 0

    for frame_num in range(num_frames):

        # Read frame from video
        ret, frame = vid.read()
        if frame is None:
            break

        if args.enable_timing:
            start_time = time.time()

        # Rotate if rotation is set
        if args.rotate == -90:
            frame = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE)
        elif args.rotate == 90:
            frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
        # Resize to network input size
        frame = cv2.resize(frame, (input_size[1], input_size[0]))

        if 0:
            cv2.imshow("Video", frame)
            key = cv2.waitKey(30)
            if key == 'q' or key == 27:
                break

        # Convert to tensor and forward pass
        tensor_frame = TF.to_tensor(frame)
        tensor_frame.to(device)
        tensor_frame = torch.unsqueeze(tensor_frame, 0)
        pred = helper.model_forward(model, tensor_frame, device, ENABLE_TIMING)

        # Get prediction centroids (predicted points)
        pred_pts = helper.get_centroids(pred)
        num_bees = len(pred_pts)

        # Since what we really care about is # bees, stop time profiling here
        # The rest is for visualization
        if args.enable_timing and frame_num > 0:
            iter_time = time.time() - start_time
            total_exec_time_accumulator += iter_time
            exec_time_accumulator += iter_time
            num_exec_frames += 1
            if frame_num % int(fps) == 0:
                avg_exec_time = exec_time_accumulator / num_exec_frames
                ic("Avg exec time: ", avg_exec_time)
                exec_time_accumulator = 0
                num_exec_frames = 0

        # Use bipartite graph minimum weight matching to associate detections
        if len(pred_pts) > 0 and len(
                prev_pts) > 0 and not args.disable_tracking:
            pairs = helper.calculate_pairs(prev_pts, pred_pts, args.threshold)
            # Extract actual points based on indices in original arrays
            point_pairs = []
            for pair in pairs:
                point_pairs.append((prev_pts[pair[1]], pred_pts[pair[0]]))

            running_pairs.append(point_pairs)
            if len(running_pairs) > args.tracker_frame_len:
                running_pairs.pop(0)

            # Draw the tracking lines
            frame = draw_tracking_lines(frame, running_pairs)
        elif len(running_pairs) > 0:
            running_pairs.pop(0)

        if len(pred_pts) > 0:
            for pred_pt in pred_pts:
                cv2.circle(frame, tuple((pred_pt[1], pred_pt[0])), 5,
                           VIS_COLOR, cv2.FILLED)

        # Draw # of bees in text on bottom left of image
        num_bees_text = "# Bees: %i" % num_bees
        bottom_left = (0, frame.shape[0] - 20)
        frame = cv2.putText(frame,
                            num_bees_text,
                            org=bottom_left,
                            fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                            fontScale=1,
                            color=(255, 0, 0),
                            thickness=2,
                            lineType=cv2.LINE_AA)

        # Show results
        helper.show_image("Predictions", frame, delay=frame_duration)

        prev_pts = deepcopy(pred_pts)

    # Calculate the average processing fps. num_frames-1 b/c we didnt' count the first frame initialization delay
    avg_exec_time = total_exec_time_accumulator / (num_frames - 1)
    measured_fps = 1 / avg_exec_time
    ic("Overall avg exec time: ", avg_exec_time)
    ic("Overall FPS: ", measured_fps)