def cones_detection(target_path, output_path, model, device, conf_thres,
                    nms_thres):

    img = Image.open(target_path).convert('RGB')
    w, h = img.size
    new_width, new_height = model.img_size()
    pad_h, pad_w, ratio = calculate_padding(h, w, new_height, new_width)
    img = torchvision.transforms.functional.pad(img,
                                                padding=(pad_w, pad_h, pad_w,
                                                         pad_h),
                                                fill=(127, 127, 127),
                                                padding_mode="constant")
    img = torchvision.transforms.functional.resize(img,
                                                   (new_height, new_width))

    bw = model.get_bw()
    if bw:
        img = torchvision.transforms.functional.to_grayscale(
            img, num_output_channels=1)

    img = torchvision.transforms.functional.to_tensor(img)
    img = img.unsqueeze(0)

    with torch.no_grad():
        model.eval()
        img = img.to(device, non_blocking=True)
        # output,first_layer,second_layer,third_layer = model(img)
        output = model(img)

        for detections in output:
            detections = detections[detections[:, 4] > conf_thres]
            box_corner = torch.zeros((detections.shape[0], 4),
                                     device=detections.device)
            xy = detections[:, 0:2]
            wh = detections[:, 2:4] / 2
            box_corner[:, 0:2] = xy - wh
            box_corner[:, 2:4] = xy + wh
            probabilities = detections[:, 4]
            nms_indices = nms(box_corner, probabilities, nms_thres)
            main_box_corner = box_corner[nms_indices]
            if nms_indices.shape[0] == 0:
                continue

        pred_boxes = []
        for i in range(len(main_box_corner)):
            x0 = main_box_corner[i, 0].to('cpu').item() / ratio - pad_w
            y0 = main_box_corner[i, 1].to('cpu').item() / ratio - pad_h
            x1 = main_box_corner[i, 2].to('cpu').item() / ratio - pad_w
            y1 = main_box_corner[i, 3].to('cpu').item() / ratio - pad_h
            box = [x0, y0, x1, y1]
            pred_boxes.append(box)

        return pred_boxes
Ejemplo n.º 2
0
    def detect(self, cv_img):
        cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)
        img = img_pil.fromarray(cv_img)
        w, h = img.size
        new_width, new_height = self.model.img_size()
        pad_h, pad_w, ratio = calculate_padding(h, w, new_height, new_width)
        img = torchvision.transforms.functional.pad(img,
                                                    padding=(pad_w, pad_h,
                                                             pad_w, pad_h),
                                                    fill=(127, 127, 127),
                                                    padding_mode="constant")
        img = torchvision.transforms.functional.resize(img,
                                                       (new_height, new_width))

        bw = self.model.get_bw()
        if bw:
            img = torchvision.transforms.functional.to_grayscale(
                img, num_output_channels=1)

        img = torchvision.transforms.functional.to_tensor(img)
        img = img.unsqueeze(0)

        with torch.no_grad():
            self.model.eval()
            img = img.to(self.device, non_blocking=True)
            # output,first_layer,second_layer,third_layer = model(img)
            output = self.model(img)

            for detections in output:
                detections = detections[detections[:, 4] > self.conf_thres]
                box_corner = torch.zeros((detections.shape[0], 4),
                                         device=detections.device)
                xy = detections[:, 0:2]
                wh = detections[:, 2:4] / 2
                box_corner[:, 0:2] = xy - wh
                box_corner[:, 2:4] = xy + wh
                probabilities = detections[:, 4]
                nms_indices = nms(box_corner, probabilities, self.nms_thres)
                main_box_corner = box_corner[nms_indices]
                if nms_indices.shape[0] == 0:
                    continue
            bboxes = []
            for i in range(len(main_box_corner)):
                x0 = main_box_corner[i, 0].to('cpu').item() / ratio - pad_w
                y0 = main_box_corner[i, 1].to('cpu').item() / ratio - pad_h
                x1 = main_box_corner[i, 2].to('cpu').item() / ratio - pad_w
                y1 = main_box_corner[i, 3].to('cpu').item() / ratio - pad_h
                bboxes.append([x0, y0, x1, y1])

        return bboxes
Ejemplo n.º 3
0
def validate(*,
             dataloader,
             model,
             device,
             step=-1,
             bbox_all=False,
             debug_mode):
    # result = open("logs/result.txt", "w" )

    with torch.no_grad():
        t_start = time.time()
        conf_thres, nms_thres, iou_thres = model.get_threshs()
        width, height = model.img_size()
        model.eval()
        print("Calculating mAP - Model in evaluation mode")
        n_images = len(dataloader.dataset)
        mAPs = []
        mR = []
        mP = []
        for batch_i, (img_uris, imgs, targets) in enumerate(
                tqdm(dataloader, desc='Computing mAP')):
            imgs = imgs.to(device, non_blocking=True)
            targets = targets.to(device, non_blocking=True)
            # output,_,_,_ = model(imgs)
            output = model(imgs)

            for sample_i, (labels,
                           detections) in enumerate(zip(targets, output)):
                detections = detections[detections[:, 4] > conf_thres]
                if detections.size()[0] == 0:
                    predictions = torch.tensor([])
                else:
                    predictions = torch.argmax(detections[:, 5:], dim=1)
                # From (center x, center y, width, height) to (x1, y1, x2, y2)
                box_corner = torch.zeros((detections.shape[0], 4),
                                         device=detections.device)
                xy = detections[:, 0:2]
                wh = detections[:, 2:4] / 2
                box_corner[:, 0:2] = xy - wh
                box_corner[:, 2:4] = xy + wh
                probabilities = detections[:, 4]
                nms_indices = nms(box_corner, probabilities, nms_thres)
                box_corner = box_corner[nms_indices]
                probabilities = probabilities[nms_indices]
                predictions = predictions[nms_indices]

                if nms_indices.shape[
                        0] == 0:  # there should always be at least one label
                    continue
                # Get detections sorted by decreasing confidence scores
                _, inds = torch.sort(-probabilities)
                box_corner = box_corner[inds]

                probabilities = probabilities[inds]
                predictions = predictions[inds]
                labels = labels[(labels[:, 1:5] <= 0).sum(
                    dim=1
                ) == 0]  # remove the 0-padding added by the dataloader
                # Extract target boxes as (x1, y1, x2, y2)
                target_boxes = xywh2xyxy(labels[:, 1:5])
                target_boxes[:, (0, 2)] *= width
                target_boxes[:, (1, 3)] *= height
                detected = torch.zeros(target_boxes.shape[0],
                                       device=target_boxes.device,
                                       dtype=torch.uint8)
                correct = torch.zeros(nms_indices.shape[0],
                                      device=box_corner.device,
                                      dtype=torch.uint8)
                # 0th dim is the detection
                # (repeat in the 1st dim)
                # 2nd dim is the coord
                ious = bbox_iou(
                    box_corner.unsqueeze(1).expand(-1, target_boxes.shape[0],
                                                   -1),
                    target_boxes.unsqueeze(0).expand(box_corner.shape[0], -1,
                                                     -1))
                # ious is 2d -- 0th dim is the detected box, 1st dim is the target box, value is iou

                #######################################################
                ##### skip images without label #####
                if [] in ious.data.tolist():
                    continue
                #######################################################

                best_is = torch.argmax(ious, dim=1)

                # TODO fix for multi-class. Need to use predictions somehow?
                for i, iou in enumerate(ious):
                    best_i = best_is[i]
                    if ious[i, best_i] > iou_thres and detected[best_i] == 0:
                        correct[i] = 1
                        detected[best_i] = 1

                # Compute Average Precision (AP) per class
                ap, r, p = average_precision(tp=correct,
                                             conf=probabilities,
                                             n_gt=labels.shape[0])

                # Compute mean AP across all classes in this image, and append to image list
                mAPs.append(ap)
                mR.append(r)
                mP.append(p)
                if bbox_all or sample_i < 2:  # log the first two images in every batch
                    img_filepath = img_uris[sample_i]
                    if img_filepath is None:
                        print(
                            "NULL image filepath for image uri: {uri}".format(
                                uri=img_uris[sample_i]))
                    orig_img = Image.open(img_filepath)
                    # draw = ImageDraw.Draw(img_with_boxes)
                    w, h = orig_img.size
                    pad_h, pad_w, scale_factor = calculate_padding(
                        h, w, height, width)

                    ##################################
                    detect_box = copy.deepcopy(box_corner)
                    ##################################

                    box_corner /= scale_factor
                    box_corner[:, (0, 2)] -= pad_w
                    box_corner[:, (1, 3)] -= pad_h

                    #######################################################################################
                    if debug_mode:
                        pil_img = transforms.ToPILImage()(imgs.squeeze())
                        ##### getting the image's name #####
                        img_path = img_uris[0]
                        img_name = ("_".join(map(str,
                                                 img_path.split("_")[-5:])))
                        tmp_path = os.path.join(
                            visualization_tmp_path,
                            img_name[:-4] + "_predicted_vis.jpg")
                        vis_label = add_class_dimension_to_labels(detect_box)
                        visualize_and_save_to_local(pil_img,
                                                    vis_label,
                                                    tmp_path,
                                                    box_color="red")
                        print("Prediction visualization uploaded")
                    #######################################################################################

            mean_mAP = torch.tensor(mAPs, dtype=torch.float).mean().item()
            mean_R = torch.tensor(mR, dtype=torch.float).mean().item()
            mean_P = torch.tensor(mP, dtype=torch.float).mean().item()
        # Means of all images
        mean_mAP = torch.tensor(mAPs, dtype=torch.float).mean().item()
        mean_R = torch.tensor(mR, dtype=torch.float).mean().item()
        mean_P = torch.tensor(mP, dtype=torch.float).mean().item()
        dt = time.time() - t_start
        print('mAP: {0:5.2%}, Recall: {1:5.2%}, Precision: {2:5.2%}'.format(
            mean_mAP, mean_R, mean_P))
        # result.write(str(1-mean_mAP))
        # result.close()
        return mean_mAP, mean_R, mean_P, dt / (n_images + 1e-12)
Ejemplo n.º 4
0
    def __getitem__(self, index):
        img_uri = self.img_files[index]
        img_labels = self.labels[index]
        # don't download, since it was already downloaded in the init
        img_path = img_uri
        img_name = ("_".join(map(str, img_path.split("_")[-5:])))
        orig_img = PIL.Image.open(img_path).convert('RGB')
        if orig_img is None:
            raise Exception(
                "Empty image: {img_path}".format(img_path=img_path))

        if self.vis_batch and len(img_labels) > 0:
            vis_orig_img = copy.deepcopy(orig_img)
            labels = add_class_dimension_to_labels(img_labels)
            labels = xyhw2xyxy_corner(labels, skip_class_dimension=True)
            tmp_path = os.path.join(visualization_tmp_path,
                                    img_name[:-4] + ".jpg")
            visualize_and_save_to_local(vis_orig_img,
                                        labels,
                                        tmp_path,
                                        box_color="green")
            print(f'new image uploaded to {tmp_path}')

        # First, handle image re-shaping
        if self.ts:
            scale = self.scales[index]
            scaled_img = scale_image(orig_img, scale)
            scaled_img_width, scaled_img_height = scaled_img.size
            patch_width, patch_height = self.width, self.height

            vert_pad, horiz_pad = pre_tile_padding(scaled_img_width,
                                                   scaled_img_height,
                                                   patch_width, patch_height)
            padded_img = torchvision.transforms.functional.pad(
                scaled_img,
                padding=(horiz_pad, vert_pad, horiz_pad, vert_pad),
                fill=(127, 127, 127),
                padding_mode="constant")
            padded_img_width, padded_img_height = padded_img.size

            _, _, n_patches, _, _ = get_patch_spacings(padded_img_width,
                                                       padded_img_height,
                                                       patch_width,
                                                       patch_height)

            patch_index = random.randint(0, n_patches - 1)
            if self.debug_mode:
                patch_index = 0
            img, boundary = get_patch(padded_img, patch_width, patch_height,
                                      patch_index)
        else:
            orig_img_width, orig_img_height = orig_img.size
            vert_pad, horiz_pad, ratio = calculate_padding(
                orig_img_height, orig_img_width, self.height, self.width)
            img = torchvision.transforms.functional.pad(
                orig_img,
                padding=(horiz_pad, vert_pad, horiz_pad, vert_pad),
                fill=(127, 127, 127),
                padding_mode="constant")
            img = torchvision.transforms.functional.resize(
                img, (self.height, self.width))

        # If no labels, no need to do augmentation (this should change in the future)
        #   so immediately return with the padded image and empty labels
        if len(img_labels) == 0:
            labels = torch.zeros((len(img_labels), 5))
            img = torchvision.transforms.functional.to_tensor(img)
            labels = F.pad(
                labels,
                pad=[0, 0, 0, self.num_targets_per_image - len(labels)],
                mode="constant")
            return img_uri, img, labels

        # Next, handle label re-shaping
        labels = add_class_dimension_to_labels(img_labels)
        labels = xyhw2xyxy_corner(labels)
        if self.ts:
            labels = scale_labels(labels, self.scales[index])
            labels = add_padding_on_each_side(labels, horiz_pad, vert_pad)
            if self.vis_batch:
                tmp_path = os.path.join(visualization_tmp_path,
                                        img_name[:-4] + "_scaled.jpg")
                visualize_and_save_to_local(padded_img,
                                            labels,
                                            tmp_path,
                                            box_color="red")

            labels_temp = filter_and_offset_labels(labels, boundary)

            if self.vis_batch:
                pre_vis_labels = copy.deepcopy(labels)
                for i in range(n_patches):
                    vis_patch_img, boundary = get_patch(
                        padded_img, patch_width, patch_height, i)

                    labels = filter_and_offset_labels(pre_vis_labels, boundary)

                    tmp_path = os.path.join(visualization_tmp_path, img_name[:-4] + \
                                        "_patch_{}.jpg".format(i))
                    visualize_and_save_to_local(vis_patch_img,
                                                labels,
                                                tmp_path,
                                                box_color="blue")
            if self.upload_dataset:
                pre_vis_labels = copy.deepcopy(labels)
                for i in range(n_patches):
                    vis_patch_img, boundary = get_patch(
                        padded_img, patch_width, patch_height, i)

                    labels = filter_and_offset_labels(pre_vis_labels, boundary)

                    tmp_path = os.path.join(visualization_tmp_path, img_name[:-4] + \
                                        "_patch_{}.jpg".format(i))
                    upload_label_and_image_to_gcloud(vis_patch_img, labels,
                                                     tmp_path)

            else:
                labels = filter_and_offset_labels(labels, boundary)
        else:
            labels = add_padding_on_each_side(labels, horiz_pad, vert_pad)
            labels = scale_labels(labels, ratio)
            labels_temp = labels

            if self.vis_batch:
                tmp_path = os.path.join(visualization_tmp_path,
                                        img_name[:-4] + "_pad_resized.jpg")
                visualize_and_save_to_local(img,
                                            labels,
                                            tmp_path,
                                            box_color="blue")

        labels = labels_temp
        if self.vis_batch and self.data_aug:
            vis_aug_img = copy.deepcopy(img)
            tmp_path = os.path.join(visualization_tmp_path,
                                    img_name[:-4] + "_before_aug.jpg")
            visualize_and_save_to_local(vis_aug_img,
                                        labels,
                                        tmp_path,
                                        box_color="red")
        if self.augment_hsv or self.data_aug:
            if random.random() > 0.5:
                img = self.jitter(img)
                # no transformation on labels

        # Augment image and labels
        img_width, img_height = img.size
        if self.augment_affine or self.data_aug:
            if random.random() > 0:
                angle = random.uniform(-10, 10)
                translate = (random.uniform(-40,
                                            40), random.uniform(-40,
                                                                40))  ## WORKS
                scale = random.uniform(0.9, 1.1)
                shear = random.uniform(-3, 3)
                img = torchvision.transforms.functional.affine(img,
                                                               angle,
                                                               translate,
                                                               scale,
                                                               shear,
                                                               2,
                                                               fillcolor=(127,
                                                                          127,
                                                                          127))
                labels = affine_labels(img_height, img_width, labels, -angle,
                                       translate, scale, (-shear, 0))

        if self.bw:
            img = torchvision.transforms.functional.to_grayscale(
                img, num_output_channels=1)

        # random left-right flip
        if self.lr_flip:
            if random.random() > 0.5:
                img = torchvision.transforms.functional.hflip(img)
                # Is this correct?
                # Not immediately obvious, when composed with the angle shift above
                labels[:, 1] = img_width - labels[:, 1]
                labels[:, 3] = img_width - labels[:, 3]

        # GaussianBlur, needs further development
        if self.blur:
            if random.random() > 0.2:
                arr = np.asarray(img)
                angle = random.uniform(40, -40)
                sigma = random.uniform(0, 3.00)
                seq = iaa.Sequential([iaa.GaussianBlur(sigma=sigma)])
                images_aug = seq.augment_images(arr)
                img = PIL.Image.fromarray(np.uint8(images_aug), 'RGB')

        #AdditiveGaussianNoise
        if self.noise:
            if random.random() > 0.3:
                arr = np.asarray(img)
                scale = random.uniform(0, 0.03 * 255)
                seq = iaa.Sequential([
                    iaa.AdditiveGaussianNoise(loc=0,
                                              scale=scale,
                                              per_channel=0.5)
                ])
                images_aug = seq.augment_images(arr)
                img = PIL.Image.fromarray(np.uint8(images_aug), 'RGB')

        #SigmoidContrast, need further development
        if self.contrast:
            if random.random() > 0.5:
                arr = np.asarray(img)
                cutoff = random.uniform(0.45, 0.75)
                gain = random.randint(5, 10)
                seq = iaa.Sequential(
                    [iaa.SigmoidContrast(gain=gain, cutoff=cutoff)])
                images_aug = seq.augment_images(arr)
                img = PIL.Image.fromarray(np.uint8(images_aug), 'RGB')

        #Sharpen, need further development
        if self.sharpen:
            if random.random() > 0.3:
                arr = np.asarray(img)
                alpha = random.uniform(0, 0.5)
                seq = iaa.Sharpen(alpha=alpha)
                images_aug = seq.augment_images(arr)
                img = PIL.Image.fromarray(np.uint8(images_aug), 'RGB')

        if self.vis_batch and self.data_aug:
            vis_post_aug_img = copy.deepcopy(img)
            tmp_path = os.path.join(visualization_tmp_path,
                                    img_name[:-4] + "_post_augmentation.jpg")
            visualize_and_save_to_local(vis_post_aug_img,
                                        labels,
                                        tmp_path,
                                        box_color="green")

        if self.vis_batch:
            self.vis_counter += 1
            if self.vis_counter > (self.vis_batch - 1):
                sys.exit('Finished visualizing enough images. Exiting!')

        labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])
        labels[:, (1, 3)] /= self.width
        labels[:, (2, 4)] /= self.height

        img = torchvision.transforms.functional.to_tensor(img)
        labels = F.pad(labels,
                       pad=[0, 0, 0, self.num_targets_per_image - len(labels)],
                       mode="constant")
        if (labels < 0).sum() > 0:
            raise Exception(f"labels for image {img_uri} have negative values")
        return img_uri, img, labels