Ejemplo n.º 1
0
 def fliter_small_box(boxes, min_size):
     boxes_xywh = xyxy2xywh(boxes)
     boxes_ws = boxes_xywh[:, 2]
     boxes_hs = boxes_xywh[:, 3]
     boxes_areas = boxes_ws * boxes_hs
     keep_index = np.where(boxes_areas > min_size)[0]
     return keep_index
Ejemplo n.º 2
0
def test(epochs_tested):
    is_train=False
    transforms = transform.build_transforms(is_train=is_train)
    coco_dataset = dataset.COCODataset(is_train=is_train, transforms=transforms)
    dataloader = build_dataloader(coco_dataset, sampler=None, is_train=is_train)

    assert isinstance(epochs_tested, (list, set)), "during test, archive_name must be a list or set!"
    model = FCOS(is_train=is_train)

    for epoch in epochs_tested:
        utils.load_model(model, epoch)
        model.cuda()
        model.eval()

        final_results = []

        with torch.no_grad():
            for data in tqdm(dataloader):
                img = data["images"]
                ori_img_shape = data["ori_img_shape"]
                fin_img_shape = data["fin_img_shape"]
                index = data["indexs"]

                img = img.cuda()
                ori_img_shape = ori_img_shape.cuda()
                fin_img_shape = fin_img_shape.cuda()

                cls_pred, reg_pred, label_pred = model([img, ori_img_shape, fin_img_shape])

                cls_pred = cls_pred[0].cpu()
                reg_pred = reg_pred[0].cpu()
                label_pred = label_pred[0].cpu()
                index = index[0]

                img_info = dataloader.dataset.img_infos[index]
                imgid = img_info["id"]

                reg_pred = utils.xyxy2xywh(reg_pred)

                label_pred = label_pred.tolist()
                cls_pred = cls_pred.tolist()

                final_results.extend(
                    [
                        {
                            "image_id": imgid,
                            "category_id": dataloader.dataset.label2catid[label_pred[k]],
                            "bbox": reg_pred[k].tolist(),
                            "score": cls_pred[k],
                        }
                        for k in range(len(reg_pred))
                    ]
                )

        output_path = os.path.join(cfg.output_path, "fcos_"+str(epoch)+".json")
        utils.evaluate_coco(dataloader.dataset.coco, final_results, output_path, "bbox")
Ejemplo n.º 3
0
def convert_labelme_to_coco(input_dir, output_path):
    json_file_paths = get_files(input_dir, ('.json'))
    # json_file_names = [basename(f) for f in json_file_paths]
    print('Load {} files from {}'.format(len(json_file_paths), input_dir))
    json_dict = {
        'images': [],
        'annotations': [],
        'categories': [{
            'supercategory': 'face',
            'id': 1,
            'name': 'face',
        }],
    }
    image_id = 1
    bbox_id = 1
    ignore = 0
    category_id = 1
    image = {}

    for json_file_path in json_file_paths:
        with open(json_file_path, 'r') as fp:
            anno = json.load(fp)

        img_height = anno['imageHeight']
        img_width = anno['imageWidth']
        file_name = anno['imagePath']

        image_info = {
            'file_name': file_name,
            'height': img_height,
            'width': img_width,
            'id': image_id,
        }
        json_dict['images'].append(image_info)

        for shape in anno['shapes']:
            p0, p1 = shape['points']
            bbox = xyxy2xywh([p0[0], p0[1], p1[0], p1[1]])
            _, _, w, h = bbox
            annotation = {
                'area': w * h,
                'iscrowd': ignore,
                'image_id': image_id,
                'bbox': bbox,
                'category_id': category_id,
                'id': bbox_id,
                'ignore': ignore,
                'segmentation': [],
            }
            json_dict['annotations'].append(annotation)
            bbox_id += 1
        image_id += 1
    with open(output_path, 'w') as json_fp:
        json_str = json.dumps(json_dict)
        json_fp.write(json_str)
    print('Done, save to {}'.format(output_path))
Ejemplo n.º 4
0
    def make_target(self, gt_boxes, gt_labels, img_size):
        """
        :param gt_boxes: [M, 4] / [xmin, ymin, xmax, ymax] / ndarray
        :param gt_labels: [M,] / ndarray
        :param img_size: 416
        :return:
            [[x, y, w, h, label], ...] / shape: [13, 13, 5, 25]
            x, y, w, h的scale均为416
        """
        assert len(gt_boxes) == len(gt_labels)
        assert isinstance(gt_boxes, np.ndarray)
        assert isinstance(gt_labels, np.ndarray)

        grid_size = img_size / self.opt.S
        target = np.zeros((self.opt.S, self.opt.S, self.opt.B,
                           4 + 1 + self.opt.voc_class_num),
                          dtype=np.float32)
        # [xmin, ymin, xmax, ymax] -> [center_x, center_y, w, h]
        gt_boxes_xywh = xyxy2xywh(gt_boxes)
        # [M, 2]
        box_coors = np.floor(gt_boxes_xywh[:, :2] / grid_size).astype(np.int32)
        keep_index = self._fliter_duplicate(box_coors)
        keep_box_coors = box_coors[keep_index]
        keep_labels = gt_labels[keep_index]
        keep_boxes = gt_boxes_xywh[keep_index]
        # 存在目标的cell的5个预测框中,只有gt_box与5个anchor的iou值最大的那个预测框的ground_truth才有值
        gt_anchor_ious = self._iou(keep_boxes, self.anchor_base)
        best_match = np.argmax(gt_anchor_ious, axis=-1)
        max_iou = np.max(gt_anchor_ious, axis=-1)

        for grid, k, iou, xywh, label in zip(keep_box_coors, best_match,
                                             max_iou, keep_boxes, keep_labels):
            target[grid[1], grid[0], k, :2] = xywh[:2]  # x,y
            target[grid[1], grid[0], k, 2:4] = xywh[2:]  # w,h
            target[grid[1], grid[0], k, 4] = 1.  # confidence
            # target[grid[1], grid[0], k, 4] = iou  # confidence
            target[grid[1], grid[0], k, 5 + label] = 1.  # label

        return target
Ejemplo n.º 5
0
    def __call__(self, **kwargs):
        reg_targets_xyxy = kwargs["reg_targets"]
        image_shapes = [np.array(image.shape[-2::-1])
            for image in kwargs["images"]]

        # calculate the size of each cell in the grid
        cell_sizes = [shape / self.grid_size for shape in image_shapes]

        # convert the regression targets to xywh format
        reg_targets_xywh = [xyxy2xywh(xyxy) for xyxy in reg_targets_xyxy]

        # calculate regression targets' centers
        reg_targets_centers = [np.array(xywh[:, :2])
            for xywh in reg_targets_xywh]

        # normalize centers' coordinates
        centers_norm = [center / cell_size for center, cell_size
            in zip(reg_targets_centers, cell_sizes)]

        # calculate centers indices in the grid
        centers_indices = [np.floor(center).astype(int)
            for center in centers_norm]

        # calculate normalized centers and sizes
        bboxes_centers_norm = [norm - idx - 0.5 for norm, idx
            in zip(centers_norm, centers_indices)]
        bboxes_sizes_norm = [xywh[:, 2:] / cell_size for xywh, cell_size
            in zip(reg_targets_xywh, cell_sizes)]

        # concatenate normalized centers and sizes
        bboxes = [np.concatenate((center, size), axis=1) for center, size
            in zip(bboxes_centers_norm, bboxes_sizes_norm)]

        kwargs["reg_targets"] = bboxes
        kwargs["obj_indices"] = centers_indices

        return kwargs
Ejemplo n.º 6
0
    def __getitem__(self, index):

        img_path = self.img_files[index % len(self.img_files)].rstrip()
        label_path = self.label_files[index % len(self.img_files)].rstrip()

        # Getting image
        img = Image.open(img_path).convert('RGB')
        width, height = img.size

        if os.path.exists(label_path):
            boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5))

        # RESIZING
        if width > height:
            ratio = height / width
            t_width = self.img_size
            t_height = int(ratio * self.img_size)
        else:
            ratio = width / height
            t_width = int(ratio * self.img_size)
            t_height = self.img_size

        img = transforms.functional.resize(img, (t_height, t_width))

        # IF TRAIN APPLY BRIGHTNESS CONTRAST HUE SATURTATION
        if self.train:
            brightness_rnd = random.uniform(1 - self.brightness_range,
                                            1 + self.brightness_range)
            contrast_rnd = random.uniform(1 - self.contrast_range,
                                          1 + self.contrast_range)
            hue_rnd = random.uniform(-self.hue_range, self.hue_range)
            saturation_rnd = random.uniform(1 - self.saturation_range,
                                            1 + self.saturation_range)

            img = transforms.functional.adjust_brightness(img, brightness_rnd)
            img = transforms.functional.adjust_contrast(img, contrast_rnd)
            img = transforms.functional.adjust_hue(img, hue_rnd)
            img = transforms.functional.adjust_saturation(img, saturation_rnd)

        # CONVERTING TO TENSOR
        tensor_img = transforms.functional.to_tensor(img)

        # Handle grayscaled images
        if len(tensor_img.shape) != 3:
            tensor_img = tensor_img.unsqueeze(0)
            tensor_img = tensor_img.expand((3, img.shape[1:]))

        # !!!WARNING IN PIL IT'S WIDTH HEIGHT, WHEN IN PYTORCH IT IS HEIGHT WIDTH

        # Apply augmentations for train it would be mosaic
        if self.train:
            mossaic_img = torch.zeros(3, self.img_size, self.img_size)

            # FINDING CROSS POINT
            cross_x = int(
                random.uniform(self.img_size * self.cross_offset,
                               self.img_size * (1 - self.cross_offset)))
            cross_y = int(
                random.uniform(self.img_size * self.cross_offset,
                               self.img_size * (1 - self.cross_offset)))

            fragment_img, fragment_bbox = self.get_mosaic(
                0, cross_x, cross_y, tensor_img, boxes)
            mossaic_img[:, 0:cross_y, 0:cross_x] = fragment_img
            boxes = fragment_bbox

            for n in range(1, 4):
                raw_fragment_img, raw_fragment_bbox = self.get_img_for_mosaic(
                    brightness_rnd, contrast_rnd, hue_rnd, saturation_rnd)
                fragment_img, fragment_bbox = self.get_mosaic(
                    n, cross_x, cross_y, raw_fragment_img, raw_fragment_bbox)
                boxes = torch.cat([boxes, fragment_bbox])

                if n == 1:
                    mossaic_img[:, 0:cross_y,
                                cross_x:self.img_size] = fragment_img
                elif n == 2:
                    mossaic_img[:, cross_y:self.img_size,
                                0:cross_x] = fragment_img
                elif n == 3:
                    mossaic_img[:, cross_y:self.img_size,
                                cross_x:self.img_size] = fragment_img

            #Set mossaic to return tensor
            tensor_img = mossaic_img

        # For validation it would be letterbox
        else:
            xyxy_bboxes = utils.xywh2xyxy(boxes[:, 1:])

            #IMG
            padding = abs((t_width - t_height)) // 2
            padded_img = torch.zeros(3, self.img_size, self.img_size)
            if t_width > t_height:
                padded_img[:, padding:padding + t_height] = tensor_img
            else:
                padded_img[:, :, padding:padding + t_width] = tensor_img

            tensor_img = padded_img

            relative_padding = padding / self.img_size
            #BOXES
            if t_width > t_height:
                #Change y's relative position
                xyxy_bboxes[:, 1] *= ratio
                xyxy_bboxes[:, 3] *= ratio
                xyxy_bboxes[:, 1] += relative_padding
                xyxy_bboxes[:, 3] += relative_padding
            else:  #x's
                xyxy_bboxes[:, 0] *= ratio
                xyxy_bboxes[:, 2] *= ratio
                xyxy_bboxes[:, 0] += relative_padding
                xyxy_bboxes[:, 2] += relative_padding

            boxes[:, 1:] = utils.xyxy2xywh(xyxy_bboxes)

        targets = torch.zeros((len(boxes), 6))
        targets[:, 1:] = boxes

        return img_path, tensor_img, targets
Ejemplo n.º 7
0
    def get_mosaic(self, n, cross_x, cross_y, tensor_img, boxes):
        t_height = tensor_img.shape[1]
        t_width = tensor_img.shape[2]

        xyxy_bboxes = utils.xywh2xyxy(boxes[:, 1:])

        relative_cross_x = cross_x / self.img_size
        relative_cross_y = cross_y / self.img_size

        #CALCULATING TARGET WIDTH AND HEIGHT OF PICTURE
        if n == 0:
            width_of_nth_pic = cross_x
            height_of_nth_pic = cross_y
        elif n == 1:
            width_of_nth_pic = self.img_size - cross_x
            height_of_nth_pic = cross_y
        elif n == 2:
            width_of_nth_pic = cross_x
            height_of_nth_pic = self.img_size - cross_y
        elif n == 3:
            width_of_nth_pic = self.img_size - cross_x
            height_of_nth_pic = self.img_size - cross_y

        # self.img_size - width_of_1st_pic
        # selg.img_size - height_of_1st_pic

        # CHOOSING TOP LEFT CORNER (doing offset to have more than fex pixels in bbox :-) )
        cut_x1 = random.randint(0, int(t_width * 0.33))
        cut_y1 = random.randint(0, int(t_height * 0.33))

        # Now we should find which axis should we randomly enlarge (this we do by finding out which ratio is bigger); cross x is basically width of the top left picture
        if (t_width - cut_x1) / width_of_nth_pic < (
                t_height - cut_y1) / height_of_nth_pic:
            cut_x2 = random.randint(cut_x1 + int(t_width * 0.67), t_width)
            cut_y2 = int(cut_y1 + (cut_x2 - cut_x1) / width_of_nth_pic *
                         height_of_nth_pic)

        else:
            cut_y2 = random.randint(cut_y1 + int(t_height * 0.67), t_height)
            cut_x2 = int(cut_x1 + (cut_y2 - cut_y1) / height_of_nth_pic *
                         width_of_nth_pic)

        # RESIZING AND INSERTING (TO DO 2D interpolation wants 4 dimensions, so I add and remove one by using None and squeeze)
        tensor_img = F.interpolate(
            tensor_img[:, cut_y1:cut_y2, cut_x1:cut_x2][None],
            (height_of_nth_pic, width_of_nth_pic)).squeeze()

        # BBOX
        relative_cut_x1 = cut_x1 / t_width
        relative_cut_y1 = cut_y1 / t_height
        relative_cropped_width = (cut_x2 - cut_x1) / t_width
        relative_cropped_height = (cut_y2 - cut_y1) / t_height

        # SHIFTING TO CUTTED IMG SO X1 Y1 WILL 0
        xyxy_bboxes[:, 0] = xyxy_bboxes[:, 0] - relative_cut_x1
        xyxy_bboxes[:, 1] = xyxy_bboxes[:, 1] - relative_cut_y1
        xyxy_bboxes[:, 2] = xyxy_bboxes[:, 2] - relative_cut_x1
        xyxy_bboxes[:, 3] = xyxy_bboxes[:, 3] - relative_cut_y1

        # RESIZING TO CUTTED IMG SO X2 WILL BE 1
        xyxy_bboxes[:, 0] /= relative_cropped_width
        xyxy_bboxes[:, 1] /= relative_cropped_height
        xyxy_bboxes[:, 2] /= relative_cropped_width
        xyxy_bboxes[:, 3] /= relative_cropped_height

        # CLAMPING BOUNDING BOXES, SO THEY DO NOT OVERCOME OUTSIDE THE IMAGE
        xyxy_bboxes[:, 0].clamp_(0, 1)
        xyxy_bboxes[:, 1].clamp_(0, 1)
        xyxy_bboxes[:, 2].clamp_(0, 1)
        xyxy_bboxes[:, 3].clamp_(0, 1)

        # FILTER TO THROUGH OUT ALL SMALL BBOXES
        filter_minbbox = (
            xyxy_bboxes[:, 2] - xyxy_bboxes[:, 0] > self.bbox_minsize) & (
                xyxy_bboxes[:, 3] - xyxy_bboxes[:, 1] > self.bbox_minsize)

        # RESIZING TO MOSAIC
        if n == 0:
            xyxy_bboxes[:, 0] *= relative_cross_x  #
            xyxy_bboxes[:, 1] *= relative_cross_y  #(1 - relative_cross_y)
            xyxy_bboxes[:, 2] *= relative_cross_x  #
            xyxy_bboxes[:, 3] *= relative_cross_y  #(1 - relative_cross_y)
        elif n == 1:
            xyxy_bboxes[:, 0] *= (1 - relative_cross_x)
            xyxy_bboxes[:, 1] *= relative_cross_y
            xyxy_bboxes[:, 2] *= (1 - relative_cross_x)
            xyxy_bboxes[:, 3] *= relative_cross_y
        elif n == 2:
            xyxy_bboxes[:, 0] *= relative_cross_x
            xyxy_bboxes[:, 1] *= (1 - relative_cross_y)
            xyxy_bboxes[:, 2] *= relative_cross_x
            xyxy_bboxes[:, 3] *= (1 - relative_cross_y)
        elif n == 3:
            xyxy_bboxes[:, 0] *= (1 - relative_cross_x)
            xyxy_bboxes[:, 1] *= (1 - relative_cross_y)
            xyxy_bboxes[:, 2] *= (1 - relative_cross_x)
            xyxy_bboxes[:, 3] *= (1 - relative_cross_y)

        # RESIZING TO MOSAIC
        if n == 0:
            xyxy_bboxes[:, 0] = xyxy_bboxes[:, 0]  # + relative_cross_x
            xyxy_bboxes[:, 1] = xyxy_bboxes[:, 1]  # + relative_cross_y
            xyxy_bboxes[:, 2] = xyxy_bboxes[:, 2]  # + relative_cross_x
            xyxy_bboxes[:, 3] = xyxy_bboxes[:, 3]  # + relative_cross_y
        elif n == 1:
            xyxy_bboxes[:, 0] = xyxy_bboxes[:, 0] + relative_cross_x
            xyxy_bboxes[:, 1] = xyxy_bboxes[:, 1]
            xyxy_bboxes[:, 2] = xyxy_bboxes[:, 2] + relative_cross_x
            xyxy_bboxes[:, 3] = xyxy_bboxes[:, 3]
        elif n == 2:
            xyxy_bboxes[:, 0] = xyxy_bboxes[:, 0]
            xyxy_bboxes[:, 1] = xyxy_bboxes[:, 1] + relative_cross_y
            xyxy_bboxes[:, 2] = xyxy_bboxes[:, 2]
            xyxy_bboxes[:, 3] = xyxy_bboxes[:, 3] + relative_cross_y
        elif n == 3:
            xyxy_bboxes[:, 0] = xyxy_bboxes[:, 0] + relative_cross_x
            xyxy_bboxes[:, 1] = xyxy_bboxes[:, 1] + relative_cross_y
            xyxy_bboxes[:, 2] = xyxy_bboxes[:, 2] + relative_cross_x
            xyxy_bboxes[:, 3] = xyxy_bboxes[:, 3] + relative_cross_y

        boxes = boxes[filter_minbbox]
        boxes[:, 1:] = utils.xyxy2xywh(xyxy_bboxes)[filter_minbbox]

        return tensor_img, boxes
Ejemplo n.º 8
0
                              torch.FloatTensor(0, 6).to(device)))
                out, loss = net(inputs, labels)
                # if _trainCount%5==0:
                print(_trainCount, loss.item())

                loss.backward()
                optimizer.step()
                _trainCount += 1
                temp += 1
                if _trainCount % 10 == 0 and _trainCount > 100:
                    torch.save(net.state_dict(),
                               "yoloParam%d.dict" % _trainCount)
            print("temp", temp)
    elif MODE is "predict":
        fileName = './data/images/BloodImage_00000.jpg'
        net.eval()
        img = Variable(trainDataSet.imgRead(fileName).unsqueeze(0).to(device))
        with torch.no_grad():
            out, _ = net(img)
        pred = torch.cat(out, dim=1).cpu()
        print(pred.shape)
        detections = utils.non_max_suppression(pred, 0.4, 0.2)[0]
        if detections is None:
            print("can not find the red cell")
            exit()
        a, label = torch.split(detections, [6, 1], dim=1)
        label = torch.cat([torch.zeros(label.shape[0], 1), label, a], dim=1)
        label[:, 2:6] = utils.xyxy2xywh(label[:, 2:6]) / options.imgSquareSize

        imgUtils.showImgNLab(img[0], label)
Ejemplo n.º 9
0
    def __getitem__(self, index):
        if self.image_weights:
            index = self.indices[index]

        hyp = self.hyp
        if self.mosaic:
            # Load mosaic
            img, labels = load_mosaic(self, index)
            shapes = None

        else:
            # Load image
            img, (h0, w0), (h, w) = load_image(self, index)

            # Letterbox
            shape = self.batch_shapes[self.batch[
                index]] if self.rect else self.img_size  # final letterboxed shape
            img, ratio, pad = letterbox(img,
                                        shape,
                                        auto=False,
                                        scaleup=self.augment)
            shapes = (h0, w0), (
                (h / h0, w / w0), pad)  # for COCO mAP rescaling

            # Load labels
            labels = []
            x = self.labels[index]
            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:,
                       1] = ratio[0] * w * (x[:, 1] -
                                            x[:, 3] / 2) + pad[0]  # pad width
                labels[:,
                       2] = ratio[1] * h * (x[:, 2] -
                                            x[:, 4] / 2) + pad[1]  # pad height
                labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
                labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]

        if self.augment:
            # Augment imagespace
            if not self.mosaic:
                img, labels = random_affine(img,
                                            labels,
                                            degrees=hyp['degrees'],
                                            translate=hyp['translate'],
                                            scale=hyp['scale'],
                                            shear=hyp['shear'])

            # Augment colorspace
            augment_hsv(img,
                        hgain=hyp['hsv_h'],
                        sgain=hyp['hsv_s'],
                        vgain=hyp['hsv_v'])

            # Apply cutouts
            # if random.random() < 0.9:
            #     labels = cutout(img, labels)

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() < 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() < 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        return torch.from_numpy(img), labels_out, self.img_files[index], shapes
Ejemplo n.º 10
0
    def __getitem__(self, index):

        info = self.img_files[index].rstrip().split(' ')
        img_path = info[0]


        img = cv2.imread(img_path)  # BGR
        assert img is not None, 'File Not Found ' + img_path

        augment_hsv = True
        if self.augment and augment_hsv:
            # SV augmentation by 50%
            fraction = 0.50  # must be < 1.0
            img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
            S = img_hsv[:, :, 1].astype(np.float32)
            V = img_hsv[:, :, 2].astype(np.float32)

            a = (random.random() * 2 - 1) * fraction + 1
            S *= a
            if a > 1:
                np.clip(S, None, 255, out=S)

            a = (random.random() * 2 - 1) * fraction + 1
            V *= a
            if a > 1:
                np.clip(V, None, 255, out=V)

            img_hsv[:, :, 1] = S  # .astype(np.uint8)
            img_hsv[:, :, 2] = V  # .astype(np.uint8)
            cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)

        h, w, _ = img.shape
        img, ratio, padw, padh = letterbox(img, height=self.img_size)

        # Load labels
        labels = []
        labels0 = np.array(info[1:]).astype(np.float32).reshape(-1, 5)
        if len(labels0 > 0):
            #
            labels = labels0.copy()
            labels[:, 1] = ratio * labels[:, 1] + padw
            labels[:, 2] = ratio * labels[:, 2] + padh
            labels[:, 3] = ratio * labels[:, 3] + padw
            labels[:, 4] = ratio * labels[:, 4] + padh
            # labels[:, 5] = labels[:, 5]
        else:
            labels = np.array([])

        # Augment image and labels
        if self.augment:
            img, labels = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.90, 1.10))

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) / self.img_size

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() > 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() > 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Normalize
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        return torch.from_numpy(img), labels_out, img_path, (h, w)
Ejemplo n.º 11
0
    def __getitem__(self, index):
        index = self.indices[index]  # linear, shuffled, or image_weights

        hyp = self.hyp
        mosaic = self.mosaic and random.random() < hyp['mosaic']
        if mosaic:
            # Load mosaic
            img, labels = load_mosaic(self, index)
            shapes = None

            # MixUp https://arxiv.org/pdf/1710.09412.pdf
            if random.random() < hyp['mixup']:
                img2, labels2 = load_mosaic(self, random.randint(0, self.n - 1))
                r = np.random.beta(8.0, 8.0)  # mixup ratio, alpha=beta=8.0
                img = (img * r + img2 * (1 - r)).astype(np.uint8)
                labels = np.concatenate((labels, labels2), 0)

        else:
            # Load image
            img, (h0, w0), (h, w) = load_image(self, index)

            # Letterbox
            shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size  # final letterboxed shape
            img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
            shapes = (h0, w0), ((h / h0, w / w0), pad)  # for COCO mAP rescaling

            # Load labels
            labels = []
            x = self.labels[index]
            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0]  # pad width
                labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1]  # pad height
                labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
                labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]

        if self.augment:
            # Augment imagespace
            if not mosaic:
                img, labels = random_perspective(img, labels,
                                                 degrees=hyp['degrees'],
                                                 translate=hyp['translate'],
                                                 scale=hyp['scale'],
                                                 shear=hyp['shear'],
                                                 perspective=hyp['perspective'])

            # Augment colorspace
            augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])

            # Apply cutouts
            # if random.random() < 0.9:
            #     labels = cutout(img, labels)

        nL = len(labels)  # number of labels
        if nL:
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])  # convert xyxy to xywh
            labels[:, [2, 4]] /= img.shape[0]  # normalized height 0-1
            labels[:, [1, 3]] /= img.shape[1]  # normalized width 0-1

        if self.augment:
            # flip up-down
            if random.random() < hyp['flipud']:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

            # flip left-right
            if random.random() < hyp['fliplr']:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        return torch.from_numpy(img), labels_out, self.img_files[index], shapes
    def __getitem__(self, index):
        img_path = self.img_files[index]  #image path
        label_path = self.label_files[index]  #label path
        hyp = self.hyp

        # Load image
        img = self.imgs[index]
        if img is None:
            img = cv2.imread(
                img_path, cv2.IMREAD_ANYDEPTH
                | cv2.IMREAD_UNCHANGED)  #reading image with OpenCV
            #stacking channels
            img = np.stack((img, ) * 3, axis=2)  # BGR
            assert img is not None, 'File Not Found ' + img_path

            #augmentation
            r = self.img_size / max(img.shape)
            if self.augment and r < 1:  # if training (NOT testing), downsize to inference shape
                h, w, _ = img.shape
                img = cv2.resize(img, (int(w * r), int(h * r)),
                                 interpolation=cv2.INTER_AREA)

            if self.n < 3000:  # cache into memory if image count < 3000
                self.imgs[index] = img

        # Letterbox
        h, w, _ = img.shape  #image shape
        shape = self.img_size
        img, ratiow, ratioh, padw, padh = letterbox(
            img, new_shape=shape, mode='square'
        )  #image and its padding after applying letterbox function

        # Load labels
        labels = []
        if os.path.isfile(label_path):
            x = self.labels[index]
            if x is None:  # labels not preloaded
                with open(label_path, 'r') as f:
                    x = np.array([x.split() for x in f.read().splitlines()],
                                 dtype=np.float32)
                    self.labels[index] = x  # save for next time

            if x.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = x.copy()
                labels[:, 1] = ratiow * w * (x[:, 1] - x[:, 3] / 2) + padw
                labels[:, 2] = ratioh * h * (x[:, 2] - x[:, 4] / 2) + padh
                labels[:, 3] = ratiow * w * (x[:, 1] + x[:, 3] / 2) + padw
                labels[:, 4] = ratioh * h * (x[:, 2] + x[:, 4] / 2) + padh

        # Augment image and labels
        if self.augment:
            img, labels = random_affine(img,
                                        labels,
                                        degrees=hyp['degrees'],
                                        translate=hyp['translate'],
                                        scale=hyp['scale'],
                                        shear=hyp['shear'])

        nL = len(labels)  # number of labels
        if nL:
            # convert xyxy to xywh
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])

            # Normalize coordinates 0 - 1
            labels[:, [2, 4]] /= img.shape[0]  # height
            labels[:, [1, 3]] /= img.shape[1]  # width

        if self.augment:
            # random left-right flip
            lr_flip = True
            if lr_flip and random.random() > 0.5:
                img = np.fliplr(img)
                if nL:
                    labels[:, 1] = 1 - labels[:, 1]

            # random up-down flip
            ud_flip = False
            if ud_flip and random.random() > 0.5:
                img = np.flipud(img)
                if nL:
                    labels[:, 2] = 1 - labels[:, 2]

        labels_out = torch.zeros((nL, 6))
        if nL:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Normalize

        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0

        return torch.from_numpy(img), labels_out, img_path, (h, w)
Ejemplo n.º 13
0
        outputs = detr((images, masks), post_process=True)

        for img_id, scores, labels, boxes in zip(img_ids, outputs['scores'],
                                                 outputs['labels'],
                                                 outputs['boxes']):
            img_id = img_id.numpy()

            img_info = coco_data.coco.loadImgs([img_id])[0]
            img_height = img_info['height']
            img_width = img_info['width']

            for score, label, box in zip(scores, labels, boxes):
                score = score.numpy()
                label = label.numpy()
                box = absolute2relative(box, (img_width, img_height))
                box = xyxy2xywh(box).numpy()

                results.append({
                    "image_id": int(img_id),
                    "category_id": int(label),
                    "bbox": box.tolist(),
                    "score": float(score)
                })

        pbar.update(int(len(images)))

json_object = json.dumps(results, indent=2)
with open(args.results_file, 'w') as f:
    f.write(json_object)

evaluate(args.results_file)
Ejemplo n.º 14
0
    def __getitem__(self, index):
        index = self.indices[index]
        hyp = self.hyp
        mosaic = self.mosaic
        if mosaic:
            # Load mosaic
            img, labels = load_mosaic(self, index)
            shapes = None
            # MixUp  https://arxiv.org/pdf/1710.09412.pdf
            if random.random() < hyp['mixup']:
                img2, labels2 = load_mosaic(self, random.randint(0, self.num_img - 1))
                mixup_ratio = np.random.beta(8.0, 8.0)  # mixup ratio, alpha=beta=8.0
                img = (img * mixup_ratio + img2 * (1 - mixup_ratio)).astype(np.uint8)
                labels = np.concatenate((labels, labels2), 0)
        else:
            # Load image
            img, (orig_h, orig_w), (height, width) = load_image(self, index)

            # Letterbox
            shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size
            img, ratio, pad = _letterbox(img, shape, auto=False, scaleup=self.augment)
            shapes = (orig_h, orig_w), ((height / orig_h, width / orig_w), pad)   # for COCO mAP rescaling

            # Load labels
            labels = []
            label = self.labels[index]
            if label.size > 0:
                # Normalized xywh to pixel xyxy format
                labels = label.copy()
                labels[:, 1] = ratio[0] * width * (label[:, 1] - label[:, 3] / 2) + pad[0]  # pad width
                labels[:, 2] = ratio[1] * height * (label[:, 2] - label[:, 4] / 2) + pad[1]  # pad height
                labels[:, 3] = ratio[0] * width * (label[:, 1] + label[:, 3] / 2) + pad[0]
                labels[:, 4] = ratio[1] * height * (label[:, 2] + label[:, 4] / 2) + pad[1]

        if self.augment:
            # Augment imagespace
            if not mosaic:
                img, labels = random_perspective(img, labels,
                                                 degrees=hyp['degrees'],
                                                 translate=hyp['translate'],
                                                 scale=hyp['scale'],
                                                 shear=hyp['shear'],
                                                 perspective=hyp['perspective'])

            # Augment colorspace
            augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])

        num_labels = len(labels)
        if num_labels:
            labels[:, 1:5] = xyxy2xywh(labels[:, 1:5])
            labels[:, [2, 4]] /= img.shape[0]           # normalized height 0-1
            labels[:, [1, 3]] /= img.shape[1]           # normalized width 0-1

        if self.augment:
            # flip up-down
            if random.random() < hyp['flipud']:
                img = np.flipud(img)
                if num_labels:
                    labels[:, 2] = 1 - labels[:, 2]

            # flip left-right
            if random.random() < hyp['fliplr']:
                img = np.fliplr(img)
                if num_labels:
                    labels[:, 1] = 1 - labels[:, 1]

        labels_out = torch.zeros((num_labels, 6))
        if num_labels:
            labels_out[:, 1:] = torch.from_numpy(labels)

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
        img = np.ascontiguousarray(img)

        return torch.from_numpy(img), labels_out, self.img_files[index], shapes