Ejemplo n.º 1
0
    def __getitem__(self, index):
        if index == 0:
            shuffle(self.train_lines)
        lines = self.train_lines
        n = self.train_batches
        index = index % n
        img, y = self.get_random_data(lines[index], self.input_size[0:2])

        batch_hm = np.zeros(
            (self.output_size[0], self.output_size[1], self.num_classes),
            dtype=np.float32)
        batch_wh = np.zeros((self.output_size[0], self.output_size[1], 2),
                            dtype=np.float32)
        batch_reg = np.zeros((self.output_size[0], self.output_size[1], 2),
                             dtype=np.float32)
        batch_reg_mask = np.zeros((self.output_size[0], self.output_size[1]),
                                  dtype=np.float32)

        if len(y) != 0:
            boxes = np.array(y[:, :4], dtype=np.float32)
            boxes[:,
                  0] = boxes[:, 0] / self.input_size[1] * self.output_size[1]
            boxes[:,
                  1] = boxes[:, 1] / self.input_size[0] * self.output_size[0]
            boxes[:,
                  2] = boxes[:, 2] / self.input_size[1] * self.output_size[1]
            boxes[:,
                  3] = boxes[:, 3] / self.input_size[0] * self.output_size[0]

        for i in range(len(y)):
            bbox = boxes[i].copy()
            bbox = np.array(bbox)
            bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, self.output_size[1] - 1)
            bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, self.output_size[0] - 1)
            cls_id = int(y[i, -1])

            h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
            if h > 0 and w > 0:
                radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                radius = max(0, int(radius))
                ct = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
                ct_int = ct.astype(np.int32)
                batch_hm[:, :, cls_id] = draw_gaussian(batch_hm[:, :, cls_id],
                                                       ct_int, radius)

                batch_wh[ct_int[1], ct_int[0]] = 1. * w, 1. * h
                batch_reg[ct_int[1], ct_int[0]] = ct - ct_int
                batch_reg_mask[ct_int[1], ct_int[0]] = 1

        img = np.array(img, dtype=np.float32)
        # 网上训练好的这个模型用的是BGR而不是RGB
        img = np.transpose(img / 255., (2, 0, 1))[:, :, ::-1]
        # img = np.transpose(img / 255., (2, 0, 1))

        return img, batch_hm, batch_wh, batch_reg, batch_reg_mask
Ejemplo n.º 2
0
 def __getitem__(self, index):
     data = self.datasets[index]
     image_path = os.path.join(
         r'C:\Users\Administrator\Desktop\pig\JPEGImages', data[0])
     points = np.split(np.array([int(x) for x in data[1:]]),
                       len(data[1:]) // 2)
     detections = np.array(points, dtype=np.float64)
     heatmaps = np.zeros((self.output_size[0], self.output_size[1]),
                         dtype=np.float32)
     image = cv2.imread(image_path)
     # reading detections
     # cropping an image randomly
     image, detections = _full_image_crop(image, detections)  # 填充成一个方形
     image, detections = _resize_image(image, detections,
                                       self.input_size)  # resize为512
     # detections = _clip_detections(image, detections)
     width_ratio = self.output_size[1] / self.input_size[1]
     height_ratio = self.output_size[0] / self.input_size[0]
     image = image.astype(np.float32) / 255.
     normalize_(image, self.mean, self.std)
     image = image.transpose((2, 0, 1))
     for ind, detection in enumerate(detections):
         x, y = detection[0], detection[1]
         # 计算图片在经过下采样以后的实际的点坐标
         fx = (x * width_ratio)
         fy = (y * height_ratio)
         x = int(fx)
         y = int(fy)
         # 生成高斯heatmap
         if self.gaussian_bump:
             width = 10
             height = 10
             if self.gaussian_rad == -1:
                 # 计算高斯半径
                 radius = gaussian_radius((height, width),
                                          self.gaussian_iou)
                 radius = max(0, int(radius))
             else:
                 radius = self.gaussian_rad
             draw_gaussian(heatmaps, [x, y], radius)
     heatmaps = np.expand_dims(heatmaps, axis=0)
     image = torch.from_numpy(image)
     heatmaps = torch.from_numpy(heatmaps)
     return image, heatmaps
Ejemplo n.º 3
0
    def __getitem__(self, index):
        img_path, list_bbox_cls = self.images[index]
        img = Image.open(img_path)
        real_w, real_h = img.size
        if self.transform: img = self.transform(img)
        heatmap_size = self.opt.input_size // self.opt.down_ratio
        # heatmap
        hm = np.zeros((self.opt.num_classes, heatmap_size, heatmap_size),
                      dtype=np.float32)
        # withd and hight
        wh = np.zeros((self.opt.max_objs, 2), dtype=np.float32)
        # regression
        reg = np.zeros((self.opt.max_objs, 2), dtype=np.float32)
        # index in 1D heatmap
        ind = np.zeros((self.opt.max_objs), dtype=np.int)
        # 1=there is a target in the list 0=there is not
        reg_mask = np.zeros((self.opt.max_objs), dtype=np.uint8)

        # get the absolute ratio
        w_ratio = self.opt.input_size / real_w / self.opt.down_ratio
        h_ratio = self.opt.input_size / real_h / self.opt.down_ratio

        for i, (bbox, cls) in enumerate(list_bbox_cls):
            # original bbox size -> heatmap bbox size
            bbox = bbox[0] * w_ratio, bbox[1] * h_ratio, bbox[
                2] * w_ratio, bbox[3] * h_ratio
            width, height = bbox[2] - bbox[0], bbox[3] - bbox[1]
            # center point(x,y)
            center = np.array([(bbox[0] + bbox[2]) / 2,
                               (bbox[1] + bbox[3]) / 2],
                              dtype=np.float32)
            center_int = center.astype(np.int)
            reg[i] = center - center_int
            wh[i] = 1. * width, 1. * height
            reg_mask[i] = 1
            ind[i] = center_int[1] * heatmap_size + center_int[0]
            radius = utils.gaussian_radius((height, width))
            #半径保证为整数
            radius = max(0, int(radius))
            utils.draw_umich_gaussian(hm[cls], center_int, radius)
        return (img, hm, wh, reg, ind, reg_mask)
Ejemplo n.º 4
0
    def main(self):
        img_annots = {}
        with open('./wider_face_split/wider_face_train_bbx_gt.txt', 'r+') as gt_file:
            name = gt_file.readline().strip()
            # print(name)
            name=name.split('/')[1]
            # if self.img_dir_struct == "flat":
            #     name = name.split("/")[1]
            while name:
                img, scale = self.read_img(name)
                hmap = np.zeros((img.shape[:2]), dtype=np.float32)

                draw_gaussian = draw_umich_gaussian

                img_annots[name] = []
                num_faces =int(gt_file.readline().strip())#number of faces
                t=num_faces
                while num_faces:
                    # print(f"processing image {name}....")
                    annots = gt_file.readline().strip().split()
                    annots = list(map(lambda x: int(x)*scale, annots))#convert to int
                    annots = annots[:4]
                    # remove faces with pixel boxes smaller than 16 pixels in area
                    if annots[2]*annots[3] < 16:
                        num_faces -= 1

                        continue
                    # so now annots are [x1, y1, box_size_x, box_size_y, center_x,center_y]
                    w=annots[2]
                    h=annots[3]
                    img_annots[name].append(annots)

                    radius = gaussian_radius((math.ceil(h), math.ceil(w)))
                    radius = max(0, int(radius))

                    center = [int(annots[0]+annots[2]/2),  # round(x1+w/2)
                              int(annots[1]+annots[3]/2)]  # round(y1+w/2)

                    annots.extend(center)
                    draw_dense_reg()
                    draw_gaussian(hmap, (center[0],center[1]), radius)

                    # heatmap = self.generate_heatmaps(img, annots)
                    # classification_map = self.generate_cmap(heatmap,annots)
                    # print("Image:",name)
                    downsampled = self.downsample(hmap)
                    # print(downsampled.shape)
                    np.save(os.path.join(self.heatmap_root, name[:-4]),
                            downsampled)
                    # break
                    # img_annots[name].append(annots)
                    num_faces -= 1
                if(np.sum(downsampled)<=0):
                    os.remove("/home/dlbox/Desktop/obj_detection/varun/centerface_reimp/widerface/train/"+name)
                    print("deleted")
                # print(np.sum(downsampled),t, "########################################################",name)
                # cv2.imshow("test", hmap)
                # print(hmap.shape,"cmap")
                # cv2.waitKey(0)
                # cv2.destroyAllWindows()
                # import pdb
                # pdb.set_trace()
                self.generate_annotations(
                        name[:-4], downsampled.shape, img_annots[name])
                name = gt_file.readline().strip()
                if self.img_dir_struct == "flat":
                    name = name.split("/")[1]
    def __getitem__(self, index):

        index = self.pick[index]
        dataset, index = self._find_dataset(index)

        # gray = cfg.DATASET.GRAY and cfg.DATASET.GRAY > np.random.random()

        # negative sample
        neg = self.negative_rate and self.negative_rate > np.random.random()

        if neg:
            template = dataset.get_random_target(index)
            search = np.random.choice(self.all_dataset).get_random_target()
        else:
            template, search = dataset.get_positive_pair(index)

        # get image
        template_image = cv2.imread(template[0])
        search_image = cv2.imread(search[0])

        # get bounding box
        template_box, template_mask = self._get_bbox(template_image,
                                                     template[1], template[2])
        search_box, search_mask = self._get_bbox(search_image, search[1],
                                                 search[2])

        #print("search_box: {}, search_mask: {}".format(search_box, search_mask))

        # augmentation
        template, _, template_mask = self.template_aug(template_image,
                                                       template_box,
                                                       template_mask,
                                                       self.exempler_size)

        search, input_bbox, search_mask = self.search_aug(
            search_image, search_box, search_mask, self.search_size)
        #print("after aug search_box: {}, search_mask: {}".format(search_box, search_mask))

        hm = np.zeros((self.output_size, self.output_size), dtype=np.float32)

        valid = not neg
        scale = float(self.output_size) / float(self.search_size)
        output_bbox = [
            input_bbox.x1 * scale, input_bbox.y1 * scale,
            input_bbox.x2 * scale, input_bbox.y2 * scale
        ]
        radius = gaussian_radius((math.ceil(output_bbox[3] - output_bbox[1]),
                                  math.ceil(output_bbox[2] - output_bbox[0])))
        radius = max(0, int(radius))
        ct = np.array([(output_bbox[0] + output_bbox[2]) / 2,
                       (output_bbox[1] + output_bbox[3]) / 2],
                      dtype=np.float32)
        ct_int = ct.astype(np.int32)
        reg = torch.tensor(ct - ct_int, dtype=torch.float32)  # range is [0, 1)
        wh = torch.tensor(
            [input_bbox[2] - input_bbox[0], input_bbox[3] - input_bbox[1]],
            dtype=torch.float32) / float(self.search_size)  # normalized
        ind = ct_int[1] * self.output_size + ct_int[0]
        if valid:
            draw_umich_gaussian(hm, ct_int, radius)
        hm = torch.tensor(hm, dtype=torch.float32)  # range is [0, 1)

        # print("index: {}, gaussian radius: {}, ct: {}, reg: {}, wh: {}, valid: {}".format(index, radius, ct, reg, wh, valid)) # debug
        """
        print("dataset idx: {}".format(index))
        print("aug search image for {}: {}".format(index, search))
        temp_search = search.astype(np.uint8).copy()
        bbox_int = np.round(bbox).astype(np.uint16)
        cv2.rectangle(temp_search, (bbox_int[0], bbox_int[1]), (bbox_int[2], bbox_int[3]), (0,255,0))

        cv2.imshow('auged search_image', temp_search)
        k = cv2.waitKey(0)
        """

        # normalize
        # we have to change to type from float to uint8 for torchvision.transforms.ToTensor
        # https://pytorch.org/docs/stable/torchvision/transforms.html#torchvision.transforms.ToTensor
        template = self.normalize(np.round(template).astype(np.uint8))
        search = self.normalize(np.round(search).astype(np.uint8))
        template_mask = torch.as_tensor(template_mask)
        search_mask = torch.as_tensor(search_mask)

        target_dict = {
            'hm': hm,
            'reg': reg,
            'wh': wh,
            'ind': torch.as_tensor(ind),
            'valid': torch.as_tensor(valid),
            'bbox_debug': torch.as_tensor(input_bbox)
        }

        return template, search, template_mask, search_mask, target_dict