Exemple #1
0
    def __getitem__(self, idx):
        '''

        :param idx:
        :return: patch, target_action, target_label
        '''

        img_path = os.path.join(self.root_dir, self.imgpaths[idx])
        img = Image.open(img_path)
        gt_bbox = self.gt_bboxes[idx]
        noisy_bbox = gen_gaussian_noise(gt_bbox)

        target_label = 0
        if calculate_iou(noisy_bbox, gt_bbox) > 0.7:
            target_label = 1

        # search the best action
        action_iou = np.zeros(10)
        for i in range(10):
            warpped_bbox = noisy_bbox + warp[i]
            action_iou[i] = calculate_iou(warpped_bbox, gt_bbox)

        target_action = np.argmax(action_iou)

        # patch
        patch = cropping(img, noisy_bbox)
        if self.transform:
            patch = self.transform(patch)

        return patch, target_action, target_label
Exemple #2
0
 def appropriate_box(self,bbox,i):
     gt_bbox=[]
     max_iou=0
     for j in range(len(self.frames_gt[str(i+1)])):
         iou=calculate_iou(bbox,self.frames_gt[str(i+1)][j])
         #print(iou)
         if iou>0.5 and iou>max_iou:
             gt_bbox.append(self.frames_gt[str(i+1)][j])
             max_iou=iou
     
     return gt_bbox
Exemple #3
0
    def step(self, action):

        '''
        :param action: int, range[0, 10]
        :return:
            new_state (img)
            is_terminate,
            reward
        '''

        # calculate step size
        w = self.state[2]-self.state[0]
        h = self.state[3]-self.state[1]
        step_size = 1
        # compute new bbox
        new_bbox = self.state + warp[action] * step_size

        # check if the new bbox is valid
        if not self.is_valid(new_bbox):
            # return current bbox and Termination
            return cropping(self.img, self.state), True, -1

        # if valid
        self.state = new_bbox
        self.step_count += 1
        ns = cropping(self.img, self.state)
        if action == 10 or self.step_count == 100:
            # if curruent action is termination or the episode is long enough
            is_t = True
        else:
            is_t = False

        # computing reward
        reward = 0
        if is_t:
            iou = calculate_iou(self.state, self.gt_bbox)
            if iou > 0.7:
                reward = 100
            else:
                reward = -1

        return ns, is_t, reward
Exemple #4
0
def main():
    root_dir = '../data/OTB100'
    list = os.listdir(root_dir)
    iou_list = []

    for name in list:
        pred_path = os.path.join(root_dir, name, 'pred_rect_sl2.txt')
        gt_path = os.path.join(root_dir, name, 'groundtruth_rect.txt')
        if os.path.isfile(pred_path):
            print(name)
            pred_bbox = np.loadtxt(pred_path)
            gt_bbox = np.genfromtxt(gt_path, delimiter=',')
            if len(gt_bbox.shape) == 1:
                gt_bbox = np.genfromtxt(gt_path)

            for i in range(len(pred_bbox)):
                pb = pred_bbox[i, :]
                gb = gt_bbox[i, :]
                gb[2] = gb[0] + gb[2]
                gb[3] = gb[1] + gb[3]
                iou = calculate_iou(pb, gb)
                iou_list.append(iou)
        else:
            continue
    iou_list.sort()
    iou_list = np.array(iou_list)
    total = len(iou_list)
    precicion = np.zeros(6)
    for i in range(6):
        thresh = i * 0.2
        precicion[i] = (iou_list >= thresh).sum() / total

    print(precicion)
    x = np.arange(0, 1.2, 0.2)
    plt.plot(x, precicion, 'r--')
    t = plt.xlabel('IoU [AUC: %.3f]' % precicion.mean(),
                   fontsize=14,
                   color='black')
    t = plt.ylabel('success_rate', fontsize=14, color='black')
    plt.show()
Exemple #5
0
    def encode(self, boxes, labels, input_size):
        """
        Encode target bounding boxes and class labels.

        We obey the Faster RCNN box coder:
          tx = (x - anchor_x) / anchor_w
          ty = (y - anchor_y) / anchor_h
          tw = log(w / anchor_w)
          th = log(h / anchor_h)

        Args:
          boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4].
          labels: (tensor) object class labels, sized [#obj,].
          input_size: (int/tuple) model input size of (w,h).

        Returns:
          loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4].
          cls_targets: (tensor) encoded class labels, sized [#anchors,].
        """
        input_size = torch.FloatTensor([input_size, input_size]) if isinstance(input_size, int) \
            else torch.FloatTensor(input_size)
        anchor_boxes = self._get_anchor_boxes(input_size)
        boxes = change_box_order(boxes, 'xyxy2xywh')

        ious = calculate_iou(anchor_boxes, boxes, order='xywh')
        max_ious, max_ids = ious.max(1)
        boxes = boxes[max_ids]

        loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:]
        loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:])
        loc_targets = torch.cat([loc_xy, loc_wh], 1)
        cls_targets = 1 + labels[max_ids]

        cls_targets[max_ious < 0.5] = 0
        ignore = (max_ious > 0.4) & (max_ious < 0.5
                                     )  # ignore ious between [0.4,0.5]
        cls_targets[ignore] = -1  # for now just mark ignored to -1
        return loc_targets, cls_targets
Exemple #6
0
    def forward(self, feats, gt_boxes, im_info):
        batch_size = feats.size(0)

        # feature map after conv layer
        rpn_conv1 = F.relu(self.conv3x3(feats), inplace=True)

        # rpn classification score
        rpn_cls_score = self.conv1x1_cls(rpn_conv1)  # b*18*50*37
        rpn_cls_score_reshape = reshape(rpn_cls_score, 2)  # b*2*450*37
        rpn_cls_score_softmax = F.softmax(rpn_cls_score_reshape, 1)  # b*2*450*37
        rpn_cls_score_reshape_back = reshape(rpn_cls_score_softmax, 18)  # b*18*50*37

        # rpn offsets to the anchor boxes
        rpn_loc_pred = self.conv1x1_loc(rpn_conv1)  # b*36*50*37

        # ----------------------------------------generate proposals----------------------------------------------------
        rpn_proposals = roi_pooling(rpn_cls_score_softmax, rpn_loc_pred, im_info[1])

        # rpn loss
        rpn_loss_cls = 0
        rpn_loss_loc = 0

        if self.training:
            assert gt_boxes is not None

            anchors = self.anchors

            # keep only inside anchors
            keep = ((anchors[:, 0] >= 0) &
                    (anchors[:, 1] >= 0) &
                    (anchors[:, 2] <= int(im_info[0][1]) + 0) &
                    (anchors[:, 3] <= int(im_info[0][0]) + 0))
            idxs_inside = torch.nonzero(keep).view(-1)
            anchors = anchors[idxs_inside, :]  # 5076*4
            print('anchors after clip:', anchors.shape, anchors)

            # ----------------------------------compute classification loss---------------------------------------------

            # b*450*37*2 -> b*16650*2
            rpn_cls_score_reshape = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
            rpn_cls_score_reshape = rpn_cls_score_reshape[:, idxs_inside, :]  # b*5076*2

            cls_labels = torch.zeros((batch_size, anchors.shape[0])).long()
            positives = []
            positive_idxs = []
            for b in range(batch_size):
                positive_idx_gt = {}
                iou_matrix = torch.zeros((anchors.shape[0], len(gt_boxes[b])))  # 5076*num_gts_per_img

                # compute iou matrix
                for i in range(anchors.shape[0]):
                    for j in range(len(gt_boxes[b])):
                        iou = calculate_iou(anchors[i], gt_boxes[b][j], box_form='xyxy')
                        iou_matrix[i][j] = iou

                # 1.for each anchor if iou < 0.3, negative
                for i in range(anchors.shape[0]):
                    max_iou = torch.max(iou_matrix[i][:])
                    if max_iou < 0.3:
                        cls_labels[b][i] = 0

                # 2.for each gt, max iou, positive
                idxs = torch.max(iou_matrix[:][:], 0)[1]
                cls_labels[b][idxs] = 1
                for i, idx in enumerate(idxs):
                    if idx not in positive_idx_gt:
                        positive_idx_gt[idx] = i
                        positive_idxs.append(b * idx + idx)

                # 3.for each anchor if iou > 0.7, positive
                for i in range(anchors.shape[0]):
                    row = torch.zeros((1, len(gt_boxes[b])))
                    row[0] = iou_matrix[i][:]
                    max_iou, max_idx = torch.max(row, dim=1)
                    if max_iou > 0.7:
                        positive_idx_gt[i] = max_idx
                        positive_idxs.append(i * b + i)
                        cls_labels[b][i] = 1

                positives.append(positive_idx_gt)

            rpn_cls_score_reshape = rpn_cls_score_reshape.view(-1, 2)
            cls_labels = cls_labels.view(-1)
            print(rpn_cls_score_reshape.shape, cls_labels.shape)
            rpn_loss_cls += F.cross_entropy(rpn_cls_score_reshape, cls_labels)

            # -------------------------------------compute regression loss----------------------------------------------
            print(rpn_loc_pred.shape)
            # b*36*50*37 -> b*16650*4
            rpn_loc_pred = rpn_loc_pred.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 4)
            rpn_loc_pred = rpn_loc_pred[:, idxs_inside, :]  # b*5076*4
            loc_labels = torch.zeros((batch_size, anchors.shape[0], 4)).long()

            for b in range(batch_size):
                for i in range(anchors.shape[0]):
                    if i in positives[b]:
                        loc_labels[b][i][:] = gt_boxes[b][positives[i]]

            rpn_loc_pred = rpn_loc_pred[:, positive_idxs, :]
            loc_labels = loc_labels[:, positive_idxs, :]

            rpn_loc_pred = rpn_loc_pred.view(-1, 4)
            loc_labels = loc_labels.view(-1, 4)

            rpn_loss_loc = F.smooth_l1_loss(rpn_loc_pred, loc_labels, reduction='sum') / 256

        print(rpn_loss_cls, rpn_loss_loc)
        return rpn_proposals, rpn_loss_cls, rpn_loss_loc
def gen_pnet_data(im_dir, data_dir, anno_file, label):
    # note as for small training purpose, only get the face greater than 40
    # one more important thing is when generating mask faces, it will generates lots of the non masked faces for negative, so when training mtcnn,
    # just ignore the negative data from mask/negative. just use positive would be fine
    neg_save_dir = os.path.join(data_dir, "12/%s/negative" % (label))
    pos_save_dir = os.path.join(data_dir, "12/%s/positive" % (label))
    part_save_dir = os.path.join(data_dir, "12/%s/part" % (label))

    for dir_path in [neg_save_dir, pos_save_dir, part_save_dir]:
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)

    save_dir = os.path.join(data_dir, "pnet")

    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    pos_save_file = os.path.join('../annos', '%s_pos_12.txt' % label)
    neg_save_file = os.path.join('../annos', '%s_neg_12.txt' % label)
    part_save_file = os.path.join('../annos', '%s_part_12.txt' % label)

    f1 = open(pos_save_file, 'w')
    f2 = open(neg_save_file, 'w')
    f3 = open(part_save_file, 'w')

    with open(anno_file, 'r') as f:
        annotations = f.readlines()

    num = len(annotations)
    print("%d pics in total" % num)
    p_idx = 0  # positive
    n_idx = 0  # negative
    d_idx = 0  # don't care
    idx = 0

    for annotation in annotations:
        annotation = annotation.strip().split(' ')
        #image path
        im_name = annotation[0]
        #print(im_path)
        #boxed change to float type
        bbox = list(map(float, annotation[1:]))
        #gt
        boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4)

        #load image
        im_path = os.path.join(im_dir, im_name + '.jpg')
        if not os.path.exists(im_path):
            im_path = os.path.join(im_dir, im_name + '.png')

        img = cv2.imread(im_path)

        idx += 1

        #if idx % 100 == 0:
        #print(idx, "images done")

        height, width, channel = img.shape

        neg_num = 0
        #1---->50
        # keep crop random parts, until have 50 negative examples
        # get 50 negative sample from every image
        while neg_num < 50:
            size = npr.randint(12, min(width, height) / 2)
            nx = npr.randint(0, width - size)
            ny = npr.randint(0, height - size)
            crop_box = np.array([nx, ny, nx + size, ny + size])

            Iou = calculate_iou(crop_box, boxes)

            cropped_im = img[int(ny):int(ny + size), int(nx):int(nx + size), :]
            resized_im = cv2.resize(cropped_im, (12, 12),
                                    interpolation=cv2.INTER_LINEAR)

            if np.max(Iou) < 0.3:
                # Iou with all gts must below 0.3
                save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
                f2.write(save_file + ' 0\n')
                cv2.imwrite(save_file, resized_im)

                n_idx += 1
                neg_num += 1

        for box in boxes:  # for each box we all do the postive collection in our dataset there are 39915 we may get 800k pos and part faces
            # box (x_left, y_top, x_right, y_bottom)
            x1, y1, x2, y2 = box
            w = x2 - x1 + 1
            h = y2 - y1 + 1

            # generate negative examples that have overlap with gt, does this mean hard example ????
            for i in range(5):
                size = npr.randint(12, min(width, height) / 2)
                # delta_x and delta_y are offsets of (x1, y1)
                delta_x = npr.randint(max(-size, -x1), w)
                delta_y = npr.randint(max(-size, -y1), h)
                nx1 = max(0, x1 + delta_x)
                ny1 = max(0, y1 + delta_y)

                if nx1 + size > width or ny1 + size > height:
                    continue

                crop_box = np.array([nx1, ny1, nx1 + size, ny1 + size])
                Iou = calculate_iou(crop_box, boxes)

                cropped_im = img[int(ny1):int(ny1 + size),
                                 int(nx1):int(nx1 + size), :]
                resized_im = cv2.resize(cropped_im, (12, 12),
                                        interpolation=cv2.INTER_LINEAR)

                if np.max(Iou) < 0.3:
                    # Iou with all gts must below 0.3
                    save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
                    f2.write(save_file + ' 0\n')
                    cv2.imwrite(save_file, resized_im)
                    n_idx += 1

            # generate positive examples and part faces
            for i in range(20):
                size = npr.randint(int(min(w, h) * 0.8),
                                   np.ceil(1.25 * max(w, h)))

                # delta here is the offset of box center
                if w < 5:
                    print(w)
                    continue

                delta_x = npr.randint(-w * 0.2, w * 0.2)
                delta_y = npr.randint(-h * 0.2, h * 0.2)

                nx1 = max(
                    x1 + w / 2 + delta_x - size / 2, 0
                )  # x1 + w / 2 means gt center_x, + delta_x represents box_center moving, size/2 means crop half size,
                ny1 = max(y1 + h / 2 + delta_y - size / 2, 0)
                nx2 = nx1 + size
                ny2 = ny1 + size

                if nx2 > width or ny2 > height:
                    continue

                crop_box = np.array([nx1, ny1, nx2, ny2])

                offset_x1 = (x1 - nx1) / float(size)
                offset_y1 = (y1 - ny1) / float(size)
                offset_x2 = (x2 - nx2) / float(size)
                offset_y2 = (y2 - ny2) / float(size)

                cropped_im = img[int(ny1):int(ny2), int(nx1):int(nx2), :]
                resized_im = cv2.resize(cropped_im, (12, 12),
                                        interpolation=cv2.INTER_LINEAR)

                box_ = box.reshape(1, -1)

                if calculate_iou(crop_box, box_) >= 0.65:
                    save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
                    f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' %
                             (offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    p_idx += 1

                elif calculate_iou(crop_box, box_) >= 0.4:
                    save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
                    f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' %
                             (offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    d_idx += 1

        if idx % 100 == 0:
            print("%s images done, pos: %s part: %s neg: %s" %
                  (idx, p_idx, d_idx, n_idx))

    f1.close()
    f2.close()
    f3.close()