def __getitem__(self, idx): ''' :param idx: :return: patch, target_action, target_label ''' img_path = os.path.join(self.root_dir, self.imgpaths[idx]) img = Image.open(img_path) gt_bbox = self.gt_bboxes[idx] noisy_bbox = gen_gaussian_noise(gt_bbox) target_label = 0 if calculate_iou(noisy_bbox, gt_bbox) > 0.7: target_label = 1 # search the best action action_iou = np.zeros(10) for i in range(10): warpped_bbox = noisy_bbox + warp[i] action_iou[i] = calculate_iou(warpped_bbox, gt_bbox) target_action = np.argmax(action_iou) # patch patch = cropping(img, noisy_bbox) if self.transform: patch = self.transform(patch) return patch, target_action, target_label
def appropriate_box(self,bbox,i): gt_bbox=[] max_iou=0 for j in range(len(self.frames_gt[str(i+1)])): iou=calculate_iou(bbox,self.frames_gt[str(i+1)][j]) #print(iou) if iou>0.5 and iou>max_iou: gt_bbox.append(self.frames_gt[str(i+1)][j]) max_iou=iou return gt_bbox
def step(self, action): ''' :param action: int, range[0, 10] :return: new_state (img) is_terminate, reward ''' # calculate step size w = self.state[2]-self.state[0] h = self.state[3]-self.state[1] step_size = 1 # compute new bbox new_bbox = self.state + warp[action] * step_size # check if the new bbox is valid if not self.is_valid(new_bbox): # return current bbox and Termination return cropping(self.img, self.state), True, -1 # if valid self.state = new_bbox self.step_count += 1 ns = cropping(self.img, self.state) if action == 10 or self.step_count == 100: # if curruent action is termination or the episode is long enough is_t = True else: is_t = False # computing reward reward = 0 if is_t: iou = calculate_iou(self.state, self.gt_bbox) if iou > 0.7: reward = 100 else: reward = -1 return ns, is_t, reward
def main(): root_dir = '../data/OTB100' list = os.listdir(root_dir) iou_list = [] for name in list: pred_path = os.path.join(root_dir, name, 'pred_rect_sl2.txt') gt_path = os.path.join(root_dir, name, 'groundtruth_rect.txt') if os.path.isfile(pred_path): print(name) pred_bbox = np.loadtxt(pred_path) gt_bbox = np.genfromtxt(gt_path, delimiter=',') if len(gt_bbox.shape) == 1: gt_bbox = np.genfromtxt(gt_path) for i in range(len(pred_bbox)): pb = pred_bbox[i, :] gb = gt_bbox[i, :] gb[2] = gb[0] + gb[2] gb[3] = gb[1] + gb[3] iou = calculate_iou(pb, gb) iou_list.append(iou) else: continue iou_list.sort() iou_list = np.array(iou_list) total = len(iou_list) precicion = np.zeros(6) for i in range(6): thresh = i * 0.2 precicion[i] = (iou_list >= thresh).sum() / total print(precicion) x = np.arange(0, 1.2, 0.2) plt.plot(x, precicion, 'r--') t = plt.xlabel('IoU [AUC: %.3f]' % precicion.mean(), fontsize=14, color='black') t = plt.ylabel('success_rate', fontsize=14, color='black') plt.show()
def encode(self, boxes, labels, input_size): """ Encode target bounding boxes and class labels. We obey the Faster RCNN box coder: tx = (x - anchor_x) / anchor_w ty = (y - anchor_y) / anchor_h tw = log(w / anchor_w) th = log(h / anchor_h) Args: boxes: (tensor) bounding boxes of (xmin,ymin,xmax,ymax), sized [#obj, 4]. labels: (tensor) object class labels, sized [#obj,]. input_size: (int/tuple) model input size of (w,h). Returns: loc_targets: (tensor) encoded bounding boxes, sized [#anchors,4]. cls_targets: (tensor) encoded class labels, sized [#anchors,]. """ input_size = torch.FloatTensor([input_size, input_size]) if isinstance(input_size, int) \ else torch.FloatTensor(input_size) anchor_boxes = self._get_anchor_boxes(input_size) boxes = change_box_order(boxes, 'xyxy2xywh') ious = calculate_iou(anchor_boxes, boxes, order='xywh') max_ious, max_ids = ious.max(1) boxes = boxes[max_ids] loc_xy = (boxes[:, :2] - anchor_boxes[:, :2]) / anchor_boxes[:, 2:] loc_wh = torch.log(boxes[:, 2:] / anchor_boxes[:, 2:]) loc_targets = torch.cat([loc_xy, loc_wh], 1) cls_targets = 1 + labels[max_ids] cls_targets[max_ious < 0.5] = 0 ignore = (max_ious > 0.4) & (max_ious < 0.5 ) # ignore ious between [0.4,0.5] cls_targets[ignore] = -1 # for now just mark ignored to -1 return loc_targets, cls_targets
def forward(self, feats, gt_boxes, im_info): batch_size = feats.size(0) # feature map after conv layer rpn_conv1 = F.relu(self.conv3x3(feats), inplace=True) # rpn classification score rpn_cls_score = self.conv1x1_cls(rpn_conv1) # b*18*50*37 rpn_cls_score_reshape = reshape(rpn_cls_score, 2) # b*2*450*37 rpn_cls_score_softmax = F.softmax(rpn_cls_score_reshape, 1) # b*2*450*37 rpn_cls_score_reshape_back = reshape(rpn_cls_score_softmax, 18) # b*18*50*37 # rpn offsets to the anchor boxes rpn_loc_pred = self.conv1x1_loc(rpn_conv1) # b*36*50*37 # ----------------------------------------generate proposals---------------------------------------------------- rpn_proposals = roi_pooling(rpn_cls_score_softmax, rpn_loc_pred, im_info[1]) # rpn loss rpn_loss_cls = 0 rpn_loss_loc = 0 if self.training: assert gt_boxes is not None anchors = self.anchors # keep only inside anchors keep = ((anchors[:, 0] >= 0) & (anchors[:, 1] >= 0) & (anchors[:, 2] <= int(im_info[0][1]) + 0) & (anchors[:, 3] <= int(im_info[0][0]) + 0)) idxs_inside = torch.nonzero(keep).view(-1) anchors = anchors[idxs_inside, :] # 5076*4 print('anchors after clip:', anchors.shape, anchors) # ----------------------------------compute classification loss--------------------------------------------- # b*450*37*2 -> b*16650*2 rpn_cls_score_reshape = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2) rpn_cls_score_reshape = rpn_cls_score_reshape[:, idxs_inside, :] # b*5076*2 cls_labels = torch.zeros((batch_size, anchors.shape[0])).long() positives = [] positive_idxs = [] for b in range(batch_size): positive_idx_gt = {} iou_matrix = torch.zeros((anchors.shape[0], len(gt_boxes[b]))) # 5076*num_gts_per_img # compute iou matrix for i in range(anchors.shape[0]): for j in range(len(gt_boxes[b])): iou = calculate_iou(anchors[i], gt_boxes[b][j], box_form='xyxy') iou_matrix[i][j] = iou # 1.for each anchor if iou < 0.3, negative for i in range(anchors.shape[0]): max_iou = torch.max(iou_matrix[i][:]) if max_iou < 0.3: cls_labels[b][i] = 0 # 2.for each gt, max iou, positive idxs = torch.max(iou_matrix[:][:], 0)[1] cls_labels[b][idxs] = 1 for i, idx in enumerate(idxs): if idx not in positive_idx_gt: positive_idx_gt[idx] = i positive_idxs.append(b * idx + idx) # 3.for each anchor if iou > 0.7, positive for i in range(anchors.shape[0]): row = torch.zeros((1, len(gt_boxes[b]))) row[0] = iou_matrix[i][:] max_iou, max_idx = torch.max(row, dim=1) if max_iou > 0.7: positive_idx_gt[i] = max_idx positive_idxs.append(i * b + i) cls_labels[b][i] = 1 positives.append(positive_idx_gt) rpn_cls_score_reshape = rpn_cls_score_reshape.view(-1, 2) cls_labels = cls_labels.view(-1) print(rpn_cls_score_reshape.shape, cls_labels.shape) rpn_loss_cls += F.cross_entropy(rpn_cls_score_reshape, cls_labels) # -------------------------------------compute regression loss---------------------------------------------- print(rpn_loc_pred.shape) # b*36*50*37 -> b*16650*4 rpn_loc_pred = rpn_loc_pred.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 4) rpn_loc_pred = rpn_loc_pred[:, idxs_inside, :] # b*5076*4 loc_labels = torch.zeros((batch_size, anchors.shape[0], 4)).long() for b in range(batch_size): for i in range(anchors.shape[0]): if i in positives[b]: loc_labels[b][i][:] = gt_boxes[b][positives[i]] rpn_loc_pred = rpn_loc_pred[:, positive_idxs, :] loc_labels = loc_labels[:, positive_idxs, :] rpn_loc_pred = rpn_loc_pred.view(-1, 4) loc_labels = loc_labels.view(-1, 4) rpn_loss_loc = F.smooth_l1_loss(rpn_loc_pred, loc_labels, reduction='sum') / 256 print(rpn_loss_cls, rpn_loss_loc) return rpn_proposals, rpn_loss_cls, rpn_loss_loc
def gen_pnet_data(im_dir, data_dir, anno_file, label): # note as for small training purpose, only get the face greater than 40 # one more important thing is when generating mask faces, it will generates lots of the non masked faces for negative, so when training mtcnn, # just ignore the negative data from mask/negative. just use positive would be fine neg_save_dir = os.path.join(data_dir, "12/%s/negative" % (label)) pos_save_dir = os.path.join(data_dir, "12/%s/positive" % (label)) part_save_dir = os.path.join(data_dir, "12/%s/part" % (label)) for dir_path in [neg_save_dir, pos_save_dir, part_save_dir]: if not os.path.exists(dir_path): os.makedirs(dir_path) save_dir = os.path.join(data_dir, "pnet") if not os.path.exists(save_dir): os.mkdir(save_dir) pos_save_file = os.path.join('../annos', '%s_pos_12.txt' % label) neg_save_file = os.path.join('../annos', '%s_neg_12.txt' % label) part_save_file = os.path.join('../annos', '%s_part_12.txt' % label) f1 = open(pos_save_file, 'w') f2 = open(neg_save_file, 'w') f3 = open(part_save_file, 'w') with open(anno_file, 'r') as f: annotations = f.readlines() num = len(annotations) print("%d pics in total" % num) p_idx = 0 # positive n_idx = 0 # negative d_idx = 0 # don't care idx = 0 for annotation in annotations: annotation = annotation.strip().split(' ') #image path im_name = annotation[0] #print(im_path) #boxed change to float type bbox = list(map(float, annotation[1:])) #gt boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4) #load image im_path = os.path.join(im_dir, im_name + '.jpg') if not os.path.exists(im_path): im_path = os.path.join(im_dir, im_name + '.png') img = cv2.imread(im_path) idx += 1 #if idx % 100 == 0: #print(idx, "images done") height, width, channel = img.shape neg_num = 0 #1---->50 # keep crop random parts, until have 50 negative examples # get 50 negative sample from every image while neg_num < 50: size = npr.randint(12, min(width, height) / 2) nx = npr.randint(0, width - size) ny = npr.randint(0, height - size) crop_box = np.array([nx, ny, nx + size, ny + size]) Iou = calculate_iou(crop_box, boxes) cropped_im = img[int(ny):int(ny + size), int(nx):int(nx + size), :] resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR) if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) f2.write(save_file + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 neg_num += 1 for box in boxes: # for each box we all do the postive collection in our dataset there are 39915 we may get 800k pos and part faces # box (x_left, y_top, x_right, y_bottom) x1, y1, x2, y2 = box w = x2 - x1 + 1 h = y2 - y1 + 1 # generate negative examples that have overlap with gt, does this mean hard example ???? for i in range(5): size = npr.randint(12, min(width, height) / 2) # delta_x and delta_y are offsets of (x1, y1) delta_x = npr.randint(max(-size, -x1), w) delta_y = npr.randint(max(-size, -y1), h) nx1 = max(0, x1 + delta_x) ny1 = max(0, y1 + delta_y) if nx1 + size > width or ny1 + size > height: continue crop_box = np.array([nx1, ny1, nx1 + size, ny1 + size]) Iou = calculate_iou(crop_box, boxes) cropped_im = img[int(ny1):int(ny1 + size), int(nx1):int(nx1 + size), :] resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR) if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) f2.write(save_file + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 # generate positive examples and part faces for i in range(20): size = npr.randint(int(min(w, h) * 0.8), np.ceil(1.25 * max(w, h))) # delta here is the offset of box center if w < 5: print(w) continue delta_x = npr.randint(-w * 0.2, w * 0.2) delta_y = npr.randint(-h * 0.2, h * 0.2) nx1 = max( x1 + w / 2 + delta_x - size / 2, 0 ) # x1 + w / 2 means gt center_x, + delta_x represents box_center moving, size/2 means crop half size, ny1 = max(y1 + h / 2 + delta_y - size / 2, 0) nx2 = nx1 + size ny2 = ny1 + size if nx2 > width or ny2 > height: continue crop_box = np.array([nx1, ny1, nx2, ny2]) offset_x1 = (x1 - nx1) / float(size) offset_y1 = (y1 - ny1) / float(size) offset_x2 = (x2 - nx2) / float(size) offset_y2 = (y2 - ny2) / float(size) cropped_im = img[int(ny1):int(ny2), int(nx1):int(nx2), :] resized_im = cv2.resize(cropped_im, (12, 12), interpolation=cv2.INTER_LINEAR) box_ = box.reshape(1, -1) if calculate_iou(crop_box, box_) >= 0.65: save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx) f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif calculate_iou(crop_box, box_) >= 0.4: save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx) f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 if idx % 100 == 0: print("%s images done, pos: %s part: %s neg: %s" % (idx, p_idx, d_idx, n_idx)) f1.close() f2.close() f3.close()