def generate_proposals(self, anchor_bboxes: Tensor, objectnesses: Tensor, transformers: Tensor, image_width: int, image_height: int) -> Tensor: proposal_score = objectnesses[:, 1] _, sorted_indices = torch.sort(proposal_score, dim=0, descending=True) sorted_transformers = transformers[sorted_indices] sorted_anchor_bboxes = anchor_bboxes[sorted_indices] proposal_bboxes = BBox.apply_transformer(sorted_anchor_bboxes, sorted_transformers.detach()) proposal_bboxes = BBox.clip(proposal_bboxes, 0, 0, image_width, image_height) # sorted_bboxes = proposal_bboxes[batch_index][sorted_indices[batch_index]][:self._pre_nms_top_n] # sorted_probs = proposal_probs[batch_index][sorted_indices[batch_index]][:self._pre_nms_top_n] # threshold = 0.7 # kept_indices = nms(sorted_bboxes, sorted_probs, threshold) # nms_bboxes = sorted_bboxes[kept_indices][:self._post_nms_top_n] # nms_proposal_bboxes_batch.append(nms_bboxes) proposal_bboxes = proposal_bboxes[:self._pre_nms_top_n] proposal_score = proposal_score[:self._pre_nms_top_n] threshold = 0.7 kept_indices = nms(proposal_bboxes, proposal_score, threshold) proposal_bboxes = proposal_bboxes[kept_indices] proposal_bboxes = proposal_bboxes[:self._post_nms_top_n] return proposal_bboxes
def read_file_to_dic(self, filename, dic): with open(filename, 'r') as f: data = f.readlines() for line in data: content = line.split(',') key = content[0] + "/" + str(int(content[1])) img_h = int(self.data_size[content[0]][0]) img_w = int(self.data_size[content[0]][1]) if key not in dic: dic[key] = [ AVA_video.info( content[6], BBox( # convert to 0-based pixel index left=float(content[2]) * img_w, top=float(content[3]) * img_h, right=float(content[4]) * img_w, bottom=float(content[5]) * img_h), img_h, img_w, key) ] else: dic[key].append( AVA_video.info( content[6], BBox( # convert to 0-based pixel index left=float(content[2]) * img_w, top=float(content[3]) * img_h, right=float(content[4]) * img_w, bottom=float(content[5]) * img_h), img_h, img_w, key))
def sample(self, proposal_bboxes: Tensor, gt_classes: Tensor, gt_bboxes: Tensor) -> Tuple[Tensor, Tensor, Tensor, Tensor]: sample_fg_indices = torch.arange(end=len(proposal_bboxes), dtype=torch.long) sample_selected_indices = torch.arange(end=len(proposal_bboxes), dtype=torch.long) # find labels for each `proposal_bboxes` labels = torch.ones(len(proposal_bboxes), dtype=torch.long).cuda() * -1 ious = BBox.iou(proposal_bboxes, gt_bboxes) proposal_max_ious, proposal_assignments = ious.max(dim=1) labels[proposal_max_ious < 0.5] = 0 labels[proposal_max_ious >= 0.5] = gt_classes[proposal_assignments[proposal_max_ious >= 0.5]] # select 128 samples fg_indices = (labels > 0).nonzero().view(-1) bg_indices = (labels == 0).nonzero().view(-1) fg_indices = fg_indices[torch.randperm(len(fg_indices))[:min(len(fg_indices), 32)]] bg_indices = bg_indices[torch.randperm(len(bg_indices))[:128 - len(fg_indices)]] selected_indices = torch.cat([fg_indices, bg_indices]) selected_indices = selected_indices[torch.randperm(len(selected_indices))] proposal_bboxes = proposal_bboxes[selected_indices] gt_proposal_transformers = BBox.calc_transformer(proposal_bboxes, gt_bboxes[proposal_assignments[selected_indices]]) gt_proposal_classes = labels[selected_indices] gt_proposal_transformers = (gt_proposal_transformers - self._transformer_normalize_mean) / self._transformer_normalize_std gt_proposal_transformers = gt_proposal_transformers.cuda() gt_proposal_classes = gt_proposal_classes.cuda() sample_fg_indices = sample_fg_indices[fg_indices] sample_selected_indices = sample_selected_indices[selected_indices] return sample_fg_indices, sample_selected_indices, gt_proposal_classes, gt_proposal_transformers
def exclude_boxes_by_statistics(bboxes, sigma_num=2): rotation_mapping = {} h_poses = [] from bbox import BBox for __bbox in bboxes: h_pos = points_tool.rotate_rect_to_horizontal(__bbox.pos) # 由于numpy数组和list都无法作为hash的key,变通的用BBox对象把 rotation_mapping[BBox(h_pos, "")] = __bbox h_poses.append(h_pos) logger.debug("对就%d个bboxes都摆平,并一一映射,映射[%d]个", len(bboxes), len(rotation_mapping)) h_good_poses, mean, h_bad_poses = exclude_1sigma(h_poses, sigma_num=sigma_num) logger.debug("对摆平pos进行统计排除,从%d个原始水平框,得到%d个好的,%d个坏的,均值是:%f", len(bboxes), len(h_good_poses), len(h_bad_poses), mean) height = h_good_poses[:, :, 1].max() # 所有bbox的最大的y width = h_bad_poses[:, :, 0].max() # 所有bbox的最大的x logger.debug("图像宽[%d],高[%d]", width, height) good_bboxes = [] for h_pos in h_good_poses: good_bboxes.append(rotation_mapping[BBox(h_pos, "")]) logger.debug("好摆平pos %d个,还原回原始pos %d个", len(h_good_poses), len(good_bboxes)) bad_bboxes = [] for h_pos in h_bad_poses: bad_bboxes.append(rotation_mapping[BBox(h_pos, "")]) logger.debug("差摆平pos %d个,还原回原始pos %d个", len(h_bad_poses), len(bad_bboxes)) return good_bboxes, bad_bboxes, mean, width, height
def generate_proposals(self, anchor_bboxes: Tensor, objectnesses: Tensor, transformers: Tensor, image_width: int, image_height: int) -> Tensor: batch_size = anchor_bboxes.shape[0] proposal_bboxes = BBox.apply_transformer(anchor_bboxes, transformers) proposal_bboxes = BBox.clip(proposal_bboxes, left=0, top=0, right=image_width, bottom=image_height) proposal_probs = F.softmax(objectnesses[:, :, 1], dim=-1) _, sorted_indices = torch.sort(proposal_probs, dim=-1, descending=True) nms_proposal_bboxes_batch = [] for batch_index in range(batch_size): sorted_bboxes = proposal_bboxes[batch_index][sorted_indices[batch_index]][:self._pre_nms_top_n] sorted_probs = proposal_probs[batch_index][sorted_indices[batch_index]][:self._pre_nms_top_n] threshold = 0.7 kept_indices = nms(sorted_bboxes, sorted_probs, threshold) nms_bboxes = sorted_bboxes[kept_indices][:self._post_nms_top_n] nms_proposal_bboxes_batch.append(nms_bboxes) max_nms_proposal_bboxes_length = max([len(it) for it in nms_proposal_bboxes_batch]) padded_proposal_bboxes = [] for nms_proposal_bboxes in nms_proposal_bboxes_batch: padded_proposal_bboxes.append( torch.cat([ nms_proposal_bboxes, torch.zeros(max_nms_proposal_bboxes_length - len(nms_proposal_bboxes), 4).to(nms_proposal_bboxes) ]) ) padded_proposal_bboxes = torch.stack(padded_proposal_bboxes, dim=0) return padded_proposal_bboxes
def forward(self, features, proposal_bboxes, gt_classes_batch : Tensor = None , gt_bboxes_batch : Tensor = None): batch_size = features.shape[0] '''assign -1 to all labels ( -1 labels are not used for training) calculate iou for proposals with ground truth bboxes proposal_assignments contains the highest ranked ground truth bbox for each proposal get the maximum iou of each proposal with respect to each ground truth box and store highest ground truth box in proposal assigments assign 0 to all labels with iou less than 0.5 assign class labels to all proposals with iou higher than 0.5 foreground ( >= 0.5 ) background ( < 0.5 ) Take total of 128 proposal samples split fg and bg into 0.25 : 0.75 ratio after reshuffling ''' if not self.training: proposal_batch_indices = torch.arange(end=batch_size, dtype=torch.long, device=proposal_bboxes.device).view(-1, 1).repeat(1, proposal_bboxes.shape[1]) pool = Pooler.apply(features,torch.squeeze(proposal_bboxes), torch.squeeze(proposal_batch_indices), self._pooler_mode) pool = pool.view(pool.shape[0],-1) hidden = self.hidden(pool) proposal_classes = self._proposal_class(hidden) proposal_transformers = self._proposal_transformer(hidden) proposal_classes = proposal_classes.view(batch_size, -1, proposal_classes.shape[-1]) proposal_transformers = proposal_transformers.view(batch_size, -1, proposal_transformers.shape[-1]) return proposal_classes, proposal_transformers else: labels = torch.full((batch_size,proposal_bboxes.shape[1]), -1, dtype=torch.long, device=proposal_bboxes.device) ious = BBox.iou(proposal_bboxes,gt_bboxes_batch) proposal_max_ious, proposal_assignments = ious.max(dim=2) labels[proposal_max_ious < 0.5] = 0 fg_masks = proposal_max_ious >= 0.5 if len(fg_masks.nonzero()) > 0: labels[fg_masks] = gt_classes_batch[fg_masks.nonzero()[:,0],proposal_assignments[fg_masks]] fg_indices = (labels > 0).nonzero() bg_indices = (labels == 0).nonzero() fg_indices = fg_indices[torch.randperm(len(fg_indices))[:min(len(fg_indices),32*batch_size)]] bg_indices = bg_indices[torch.randperm(len(bg_indices))[:128 * batch_size - len(fg_indices)]] selected_indices = torch.cat([fg_indices,bg_indices],dim=0) ''' selected_indices shape : torch.Size([128, 2]) ''' selected_indices = selected_indices[torch.randperm(len(selected_indices))].unbind(dim=1) ''' len(selected_indices) = 128 Assign ground truth targets of selected indices gt_bboxes are formed by Apply ROI pooling on the features with proposal_bboxes generated Pass it through a hidden layer, pool and flatten ''' proposal_bboxes = proposal_bboxes[selected_indices] gt_bboxes = gt_bboxes_batch[selected_indices[0],proposal_assignments[selected_indices]] gt_proposal_classes = labels[selected_indices] diff_x = torch.min(gt_bboxes[:,2]-gt_bboxes[:,0]) diff_y = torch.min(gt_bboxes[:,3]-gt_bboxes[:,1]) gt_proposal_transformers = BBox.calc_transformer(proposal_bboxes,gt_bboxes) batch_indices = selected_indices[0] pool = Pooler.apply(features, proposal_bboxes, proposal_batch_indices = batch_indices, mode = self._pooler_mode) pool = pool.view(pool.shape[0],-1) hidden = self.hidden(pool) proposal_classes = self._proposal_class(hidden) proposal_transformers = self._proposal_transformer(hidden) proposal_class_losses, proposal_transformer_losses = self.loss(proposal_classes,proposal_transformers,gt_proposal_classes,gt_proposal_transformers,batch_size,batch_indices) return proposal_classes,proposal_transformers, proposal_class_losses, proposal_transformer_losses
def bbox(self): """ returns the bounding box of the line """ bbox = BBox() for pt in self.pts: bbox.update(pt) return bbox
def sample(self, anchor_bboxes: Tensor, gt_bboxes: Tensor, image_width: int, image_height: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]: sample_fg_indices = torch.arange(end=len(anchor_bboxes), dtype=torch.long) sample_selected_indices = torch.arange(end=len(anchor_bboxes), dtype=torch.long) anchor_bboxes = anchor_bboxes.cpu() gt_bboxes = gt_bboxes.cpu() # remove cross-boundary boundary = torch.tensor(BBox(0, 0, image_width, image_height).tolist(), dtype=torch.float) inside_indices = BBox.inside( anchor_bboxes, boundary.unsqueeze(dim=0)).squeeze().nonzero().view(-1) anchor_bboxes = anchor_bboxes[inside_indices] sample_fg_indices = sample_fg_indices[inside_indices] sample_selected_indices = sample_selected_indices[inside_indices] # find labels for each `anchor_bboxes` labels = torch.ones(len(anchor_bboxes), dtype=torch.long) * -1 ious = BBox.iou(anchor_bboxes, gt_bboxes) anchor_max_ious, anchor_assignments = ious.max(dim=1) gt_max_ious, gt_assignments = ious.max(dim=0) anchor_additions = (ious == gt_max_ious).nonzero()[:, 0] labels[anchor_max_ious < 0.3] = 0 labels[anchor_additions] = 1 labels[anchor_max_ious >= 0.7] = 1 # select 256 samples fg_indices = (labels == 1).nonzero().view(-1) bg_indices = (labels == 0).nonzero().view(-1) fg_indices = fg_indices[torch.randperm( len(fg_indices))[:min(len(fg_indices), 128)]] bg_indices = bg_indices[torch.randperm( len(bg_indices))[:256 - len(fg_indices)]] selected_indices = torch.cat([fg_indices, bg_indices]) selected_indices = selected_indices[torch.randperm( len(selected_indices))] gt_anchor_objectnesses = labels[selected_indices] gt_bboxes = gt_bboxes[anchor_assignments[fg_indices]] anchor_bboxes = anchor_bboxes[fg_indices] gt_anchor_transformers = BBox.calc_transformer(anchor_bboxes, gt_bboxes) gt_anchor_objectnesses = gt_anchor_objectnesses.cuda() gt_anchor_transformers = gt_anchor_transformers.cuda() sample_fg_indices = sample_fg_indices[fg_indices] sample_selected_indices = sample_selected_indices[selected_indices] return sample_fg_indices, sample_selected_indices, gt_anchor_objectnesses, gt_anchor_transformers
def __init__(self, parent=None): super(Canvas, self).__init__(parent) self.setMouseTracking(True) self.origin = QPoint(0, 0) self.pixmap = QPixmap(640, 480) self.scaled = None self.fs = 10 self.color = QColor(0, 170, 0) self.mat = self.QPixmapToCvMat(self.pixmap) self.paint_ = QPainter() self.shapes = [] self.width_ = None self.height_ = None self.width_0 = None self.height_0 = None self.shapeSelected = None self.edit = False self.drawing = False self.verified = False self.editShape = False self.shapeEdit = None self.moveShape = None self.posMove = None self.nearest = False self.tl = QPoint() self.br = QPoint() self.scale = (1., 1.) self.bbox = BBox() self.curPos = None self.contextMenu = QMenu() self.items = [] self.current = None self.boxTeaching = parent.boxTeaching action = partial(newAction, self) crop = action("Crop", self.cropImage, "a", "crop", None, False) test = action("Test", self.test, "a", "test", None, False) decision = action("Decision", self.decision, "shift+a", "decision", "shift+a", False) delete = action("Delete", self.delete, "delete", "delete", None, False) self.actions = struct(crop=crop, test=test, delete=delete, decision=decision) addActions(self.contextMenu, [crop, test, decision, delete]) self.setContextMenuPolicy(Qt.CustomContextMenu) self.customContextMenuRequested.connect(self.popUpMenu) # # # # self.selectedShapeSignal.connect(self._selectedShape) # self.newShape.connect(self._newShape) self.drawShape.connect(self._drawShape) self.deleteShape.connect(self._delShape) pass
def generate_detections(self, proposal_bboxes: Tensor, proposal_classes: Tensor, proposal_transformers: Tensor, image_width: int, image_height: int ): # -> Tuple[Tensor, Tensor, Tensor, Tensor]: batch_size = proposal_bboxes.shape[0] proposal_transformers = proposal_transformers.view( batch_size, -1, self.num_classes, 4) transformer_normalize_std = self._transformer_normalize_std.to( device=proposal_transformers.device) transformer_normalize_mean = self._transformer_normalize_mean.to( device=proposal_transformers.device) proposal_transformers = proposal_transformers * transformer_normalize_std + transformer_normalize_mean proposal_bboxes = proposal_bboxes.unsqueeze(dim=2).repeat( 1, 1, self.num_classes, 1) detection_bboxes = BBox.apply_transformer(proposal_bboxes, proposal_transformers) detection_bboxes = BBox.clip(detection_bboxes, left=0, top=0, right=image_width, bottom=image_height) detection_probs = F.softmax(proposal_classes, dim=-1) all_detection_bboxes = [] all_detection_classes = [] all_detection_probs = [] all_detection_batch_indices = [] for batch_index in range(batch_size): for c in range(1, self.num_classes): class_bboxes = detection_bboxes[batch_index, :, c, :] class_probs = detection_probs[batch_index, :, c] threshold = 0.3 kept_indices = nms(class_bboxes, class_probs, threshold) class_bboxes = class_bboxes[kept_indices] class_probs = class_probs[kept_indices] all_detection_bboxes.append(class_bboxes) all_detection_classes.append( torch.full((len(kept_indices), ), c, dtype=torch.int)) all_detection_probs.append(class_probs) all_detection_batch_indices.append( torch.full((len(kept_indices), ), batch_index, dtype=torch.long)) all_detection_bboxes = torch.cat(all_detection_bboxes, dim=0) all_detection_classes = torch.cat(all_detection_classes, dim=0) all_detection_probs = torch.cat(all_detection_probs, dim=0) all_detection_batch_indices = torch.cat( all_detection_batch_indices, dim=0) return all_detection_bboxes, all_detection_classes, all_detection_probs, all_detection_batch_indices
def edit_target_class(self, target_idx=-1): # findout which target is selected first if target_idx < 0: target_idx = self.targetList.currentRow() # if yaml is provided (ex: in task) if len(self.cls_map) > 0: class_list = [] for cls_idx, cls_name in self.cls_map.items(): class_list.append(f"{cls_idx}-{cls_name}") # show a dialog dialog = QInputDialog() label_text = "Input the correct class number.\n"\ "Please note your input will not be checked for legality" item, okPressed = \ QInputDialog.getItem(dialog, \ "Edit class", \ label_text, \ class_list, False) # print(text, okPressed) if okPressed and item: cur_bbox = label_table[self.data_name][target_idx] old_bbox = BBox(cur_bbox.xywh, cur_bbox.imgSizeWH, cur_bbox.cls) class_idx = item.split('-')[0] label_table[self.data_name][target_idx].cls = int(class_idx) self.last_cls = int(class_idx) # log the change new_data = label_table[self.data_name][target_idx].to_label_str() # print(new_data) mod = [self.data_name, target_idx, new_data, old_bbox] modification_list.append(mod) self.ui_form.check_undoable() self.show() else: dialog = QInputDialog() label_text = "Input the correct class number.\n"\ "Please note your input will not be checked for legality" text, okPressed = \ QInputDialog.getText(dialog, \ "Edit class", \ label_text, \ QLineEdit.Normal) if okPressed and text != '': cur_bbox = label_table[self.data_name][target_idx] old_bbox = BBox(cur_bbox.xywh, cur_bbox.imgSizeWH, cur_bbox.cls) label_table[self.data_name][target_idx].cls = int(text) self.last_cls = int(text) # log the change new_data = label_table[self.data_name][target_idx].to_label_str() # print(new_data) mod = [self.data_name, target_idx, new_data, old_bbox] modification_list.append(mod) self.ui_form.check_undoable() self.show()
def forward(self, features: Tensor, anchor_bboxes: Optional[Tensor] = None, gt_bboxes_batch: Optional[Tensor] = None, image_width: Optional[int]=None, image_height: Optional[int]=None) -> Union[Tuple[Tensor, Tensor], Tuple[Tensor, Tensor, Tensor, Tensor]]: batch_size = features.shape[0] features = self._features(features) anchor_objectnesses = self._anchor_objectness(features) anchor_transformers = self._anchor_transformer(features) anchor_objectnesses = anchor_objectnesses.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2) anchor_transformers = anchor_transformers.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 4) if not self.training: return anchor_objectnesses, anchor_transformers else: # remove cross-boundary # NOTE: The length of `inside_indices` is guaranteed to be a multiple of `anchor_bboxes.shape[0]` as each batch in `anchor_bboxes` is the same inside_indices = BBox.inside(anchor_bboxes, left=0, top=0, right=image_width, bottom=image_height).nonzero().unbind(dim=1) inside_anchor_bboxes = anchor_bboxes[inside_indices].view(batch_size, -1, anchor_bboxes.shape[2]) inside_anchor_objectnesses = anchor_objectnesses[inside_indices].view(batch_size, -1, anchor_objectnesses.shape[2]) inside_anchor_transformers = anchor_transformers[inside_indices].view(batch_size, -1, anchor_transformers.shape[2]) # find labels for each `anchor_bboxes` labels = torch.full((batch_size, inside_anchor_bboxes.shape[1]), -1, dtype=torch.long, device=inside_anchor_bboxes.device) ious = BBox.iou(inside_anchor_bboxes, gt_bboxes_batch) anchor_max_ious, anchor_assignments = ious.max(dim=2) gt_max_ious, gt_assignments = ious.max(dim=1) anchor_additions = ((ious > 0) & (ious == gt_max_ious.unsqueeze(dim=1))).nonzero()[:, :2].unbind(dim=1) labels[anchor_max_ious < 0.3] = 0 labels[anchor_additions] = 1 labels[anchor_max_ious >= 0.7] = 1 # select 256 x `batch_size` samples fg_indices = (labels == 1).nonzero() bg_indices = (labels == 0).nonzero() fg_indices = fg_indices[torch.randperm(len(fg_indices))[:min(len(fg_indices), 128 * batch_size)]] bg_indices = bg_indices[torch.randperm(len(bg_indices))[:256 * batch_size - len(fg_indices)]] selected_indices = torch.cat([fg_indices, bg_indices], dim=0) selected_indices = selected_indices[torch.randperm(len(selected_indices))].unbind(dim=1) inside_anchor_bboxes = inside_anchor_bboxes[selected_indices] gt_bboxes = gt_bboxes_batch[selected_indices[0], anchor_assignments[selected_indices]] gt_anchor_objectnesses = labels[selected_indices] gt_anchor_transformers = BBox.calc_transformer(inside_anchor_bboxes, gt_bboxes) batch_indices = selected_indices[0] anchor_objectness_losses, anchor_transformer_losses = self.loss(inside_anchor_objectnesses[selected_indices], inside_anchor_transformers[selected_indices], gt_anchor_objectnesses, gt_anchor_transformers, batch_size, batch_indices) return anchor_objectnesses, anchor_transformers, anchor_objectness_losses, anchor_transformer_losses
def forward(self, features: Tensor, proposal_bboxes: Tensor, gt_classes_batch: Optional[Tensor] = None, gt_bboxes_batch: Optional[Tensor] = None) -> Union[Tuple[Tensor, Tensor], Tuple[Tensor, Tensor, Tensor, Tensor]]: batch_size = features.shape[0] if not self.training: proposal_batch_indices = torch.arange(end=batch_size, dtype=torch.long, device=proposal_bboxes.device).view(-1, 1).repeat(1, proposal_bboxes.shape[1]) pool = Pooler.apply(features, proposal_bboxes.view(-1, 4), proposal_batch_indices.view(-1), mode=self._pooler_mode) hidden = self.hidden(pool) hidden = F.adaptive_max_pool2d(input=hidden, output_size=1) hidden = hidden.view(hidden.shape[0], -1) proposal_classes = self._proposal_class(hidden) proposal_transformers = self._proposal_transformer(hidden) proposal_classes = proposal_classes.view(batch_size, -1, proposal_classes.shape[-1]) proposal_transformers = proposal_transformers.view(batch_size, -1, proposal_transformers.shape[-1]) return proposal_classes, proposal_transformers else: # find labels for each `proposal_bboxes` labels = torch.full((batch_size, proposal_bboxes.shape[1]), -1, dtype=torch.long, device=proposal_bboxes.device) ious = BBox.iou(proposal_bboxes, gt_bboxes_batch) proposal_max_ious, proposal_assignments = ious.max(dim=2) labels[proposal_max_ious < 0.5] = 0 fg_masks = proposal_max_ious >= 0.5 if len(fg_masks.nonzero()) > 0: labels[fg_masks] = gt_classes_batch[fg_masks.nonzero()[:, 0], proposal_assignments[fg_masks]] # select 128 x `batch_size` samples fg_indices = (labels > 0).nonzero() bg_indices = (labels == 0).nonzero() fg_indices = fg_indices[torch.randperm(len(fg_indices))[:min(len(fg_indices), 32 * batch_size)]] bg_indices = bg_indices[torch.randperm(len(bg_indices))[:128 * batch_size - len(fg_indices)]] selected_indices = torch.cat([fg_indices, bg_indices], dim=0) selected_indices = selected_indices[torch.randperm(len(selected_indices))].unbind(dim=1) proposal_bboxes = proposal_bboxes[selected_indices] gt_bboxes = gt_bboxes_batch[selected_indices[0], proposal_assignments[selected_indices]] gt_proposal_classes = labels[selected_indices] gt_proposal_transformers = BBox.calc_transformer(proposal_bboxes, gt_bboxes) batch_indices = selected_indices[0] pool = Pooler.apply(features, proposal_bboxes, proposal_batch_indices=batch_indices, mode=self._pooler_mode) hidden = self.hidden(pool) hidden = F.adaptive_max_pool2d(input=hidden, output_size=1) hidden = hidden.view(hidden.shape[0], -1) proposal_classes = self._proposal_class(hidden) proposal_transformers = self._proposal_transformer(hidden) proposal_class_losses, proposal_transformer_losses = self.loss(proposal_classes, proposal_transformers, gt_proposal_classes, gt_proposal_transformers, batch_size, batch_indices) return proposal_classes, proposal_transformers, proposal_class_losses, proposal_transformer_losses
def write_label(img_dir, img_id, label, bboxes): global _min_x_w, _min_y_h, _max_x_w, _max_y_h global _max_w, _max_h, _max_w_h, _max_h_w img = None for f in glob.glob(os.path.join(img_dir, img_id + '.*')): if f.endswith('.txt'): continue if img: raise FileExistsError('{} and\n{}'.format(img, f)) img = f if img: img = cv2.imread(f) if img is None: raise FileNotFoundError(os.path.join(img_dir, img_id)) h, w = img.shape[:2] if _max_w < w: _max_w = w print('New max width: {}'.format(w)) if _max_h < h: _max_h = h print('New max height: {}'.format(h)) if h < w: w_h = w / h if _max_w_h < w_h: _max_w_h = w_h print('New width / height: {}'.format(w_h)) else: h_w = h / w if _max_h_w < h_w: _max_h_w = h_w print('New height / width: {}'.format(h_w)) with open( find_replace(os.path.join(img_dir, img_id + '.txt.' + label), 'images', 'labels'), 'w') as flabel: for bbox_tuple in bboxes: bbox = BBox(hw=(h, w), type_=BBox.VOC, bbox=bbox_tuple) xmin, ymin, xmax, ymax = bbox.get(type_=BBox.OPEN_IMAGES) if _min_x_w > xmin: _min_x_w = xmin print('New min xmin / (width-1): {}'.format(_min_x_w)) if _min_y_h > ymin: _min_y_h = ymin print('New min ymin / (height-1): {}'.format(_min_y_h)) if _max_x_w < xmax: _max_x_w = xmax print('New max xmax / (width-1): {}'.format(_max_x_w)) if _max_y_h < ymax: _max_y_h = ymax print('New max ymax / (height-1): {}'.format(_max_y_h)) flabel.write('{:1.15f} {:1.15f} {:1.15f} {:1.15f}\n'.format( *bbox.get(type_=BBox.YOLO)))
def generate_detections(self,proposal_bboxes,proposal_classes,proposal_transformers,image_width,image_height): ''' Get proposal deltas for each different class Denormalized the deltas Duplicate the proposal bboxes for each class Apply delta transform on the proposal bboxes for each class CLip detection bboxes so the it wont go out of bounds ''' batch_size = proposal_bboxes.shape[0] proposal_transformers = proposal_transformers.view(batch_size,-1,self.num_classes,4) transformer_normalize_std = self._transformer_normalize_std.to(device=proposal_transformers.device) transformer_normalize_mean = self._transformer_normalize_mean.to(device=proposal_transformers.device) proposal_transformers = proposal_transformers * transformer_normalize_std + transformer_normalize_mean proposal_bboxes = proposal_bboxes.unsqueeze(dim=2).repeat(1,1,self.num_classes,1) detection_bboxes = BBox.apply_transformer(proposal_bboxes,proposal_transformers) detection_bboxes = BBox.clip(detection_bboxes, left=0, top=0, right=image_width, bottom=image_height) detection_probs = F.softmax(proposal_classes, dim=-1) detection_bboxes_list = [] detection_classes_list = [] detection_probs_list = [] detection_batch_indices_list = [] '''Class iteration starts from 1, ignore background class (0) ''' for batch_index in range(batch_size): for class_ in range(1, self.num_classes): # print(detection_bboxes.shape) class_bboxes = detection_bboxes[batch_index,:,class_,:] class_probs = detection_probs[batch_index,:,class_] threshold = 0.3 kept_indices = nms(class_bboxes, class_probs, threshold) class_bboxes = class_bboxes[kept_indices] # print(class_bboxes.shape) class_probs = class_probs[kept_indices] detection_bboxes_list.append(class_bboxes) detection_classes_list.append(torch.full((len(kept_indices),), class_, dtype=torch.int)) detection_probs_list.append(class_probs) detection_batch_indices_list.append(torch.full((len(kept_indices),),batch_index,dtype=torch.long)) detection_bboxes_list = torch.cat(detection_bboxes_list,dim=0) detection_classes_list = torch.cat(detection_classes_list,dim=0) detection_probs_list = torch.cat(detection_probs_list,dim=0) detection_batch_indices_list = torch.cat(detection_batch_indices_list,dim=0) return detection_bboxes_list,detection_classes_list,detection_probs_list,detection_batch_indices_list
def detect_face(image, net, crop_coordinates=None, threshold=0.4): blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0)) net.setInput(blob) detected = net.forward()[0, 0, ...] conf = detected[:, 2] detected = detected[conf > threshold, :] detected[:, 3:] = np.clip(detected[:, 3:], a_min=0., a_max=1.) detected[:, (3, 5)] *= image.shape[1] detected[:, (4, 6)] *= image.shape[0] if crop_coordinates is not None: detected[:, (3, 5)] += crop_coordinates[0] detected[:, (4, 6)] += crop_coordinates[1] faces = [] for f in detected: coor = f[3:].astype(int) if coor[0] >= coor[2] or coor[1] >= coor[3]: continue faces.append( AnnotationInstance(bbox=BBox(xmin=coor[0], ymin=coor[1], xmax=coor[2], ymax=coor[3], label='face', score=f[2], coordinate_mode='absolute'))) return faces
def _generate_proposals(anchor_bboxes: Tensor, objectnesses: Tensor, transformers: Tensor, image_width: int, image_height: int) -> Tensor: proposal_score = objectnesses[:, 1] _, sorted_indices = torch.sort(proposal_score, dim=0, descending=True) sorted_transformers = transformers[sorted_indices] sorted_anchor_bboxes = anchor_bboxes[sorted_indices] proposal_bboxes = BBox.apply_transformer(sorted_anchor_bboxes, sorted_transformers.detach()) proposal_bboxes = BBox.clip(proposal_bboxes, 0, 0, image_width, image_height) area_threshold = 16 non_small_area_indices = ((proposal_bboxes[:, 2] - proposal_bboxes[:, 0] >= area_threshold) & (proposal_bboxes[:, 3] - proposal_bboxes[:, 1] >= area_threshold)).nonzero().view(-1) proposal_bboxes = proposal_bboxes[non_small_area_indices] return proposal_bboxes
def read_label_file(label_file, with_landmark=True): """ read data from given label file :param img_dir: str, directory shared by label files and images :param label_file: str, absolute path of label file :param with_landmark: :return: List of 3-element-tuple, (img_path, bbox_tuple, landmark_tuple) """ result = [] with open(label_file, 'r') as lf: for line in lf: data_units = line.strip().split() # read absolute path of image img_path = data_units[0].replace('\\', '/') # read bounding box (x1, y1, x2, y2) bbox = [data_units[1], data_units[3], data_units[2], data_units[4]] bbox = [int(float(x)) for x in bbox] # read landmarks (x1, ) if with_landmark: landmarks = np.zeros((5, 2)) for i in range(5): landmarks[i] = (float(data_units[5 + 2 * i]), float(data_units[6 + 2 * i])) result.append((img_path, BBox(bbox), landmarks)) return result
def generate_anchors(self, image_width: int, image_height: int, num_x_anchors: int, num_y_anchors: int): # -> Tensor: center_ys = np.linspace(start=0, stop=image_height, num=num_y_anchors + 2)[1:-1] center_xs = np.linspace(start=0, stop=image_width, num=num_x_anchors + 2)[1:-1] ratios = np.array(self._anchor_ratios) ratios = ratios[:, 0] / ratios[:, 1] sizes = np.array(self._anchor_sizes) # NOTE: it's important to let `center_ys` be the major index (i.e., move horizontally and then vertically) for consistency with 2D convolution # giving the string 'ij' returns a meshgrid with matrix indexing, i.e., with shape (#center_ys, #center_xs, #ratios) center_ys, center_xs, ratios, sizes = np.meshgrid(center_ys, center_xs, ratios, sizes, indexing='ij') center_ys = center_ys.reshape(-1) center_xs = center_xs.reshape(-1) ratios = ratios.reshape(-1) sizes = sizes.reshape(-1) widths = sizes * np.sqrt(1 / ratios) heights = sizes * np.sqrt(ratios) center_based_anchor_bboxes = np.stack( (center_xs, center_ys, widths, heights), axis=1) center_based_anchor_bboxes = torch.from_numpy( center_based_anchor_bboxes).float() anchor_bboxes = BBox.from_center_base(center_based_anchor_bboxes) return anchor_bboxes
def generate_proposals(self, anchor_bboxes: Tensor, objectnesses: Tensor, transformers: Tensor, image_width: int, image_height: int) -> Tensor: proposal_score = objectnesses[:, 1] _, sorted_indices = torch.sort(proposal_score, dim=0, descending=True) sorted_transformers = transformers[sorted_indices] sorted_anchor_bboxes = anchor_bboxes[sorted_indices] proposal_bboxes = BBox.apply_transformer(sorted_anchor_bboxes, sorted_transformers.detach()) proposal_bboxes = BBox.clip(proposal_bboxes, 0, 0, image_width, image_height) proposal_bboxes = proposal_bboxes[:self._pre_nms_top_n] kept_indices = NMS.suppress(proposal_bboxes, threshold=0.7) proposal_bboxes = proposal_bboxes[kept_indices] proposal_bboxes = proposal_bboxes[:self._post_nms_top_n] return proposal_bboxes
def _infer(path_to_input_image: str, path_to_output_image: str, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): image = transforms.Image.open(path_to_input_image) dataset_class = DatasetBase.from_name(dataset_name) image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooling_mode=Config.POOLING_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) forward_input = Model.ForwardInput.Eval(image_tensor.cuda()) forward_output: Model.ForwardOutput.Eval = model.eval().forward(forward_input) detection_bboxes = forward_output.detection_bboxes / scale detection_classes = forward_output.detection_classes detection_probs = forward_output.detection_probs kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_classes = detection_classes[kept_indices] detection_probs = detection_probs[kept_indices] draw = ImageDraw.Draw(image) for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): color = random.choice(['red', 'green', 'blue', 'yellow', 'purple', 'white']) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) image.save(path_to_output_image) print(f'Output image is saved to {path_to_output_image}')
def generate_detections( self, proposal_bboxes: Tensor, proposal_classes: Tensor, proposal_transformers: Tensor, image_width: int, image_height: int) -> Tuple[Tensor, Tensor, Tensor]: proposal_transformers = proposal_transformers.view( -1, self.num_classes, 4) mean = self._transformer_normalize_mean.repeat( 1, self.num_classes, 1) std = self._transformer_normalize_std.repeat( 1, self.num_classes, 1) proposal_transformers = proposal_transformers * std - mean proposal_bboxes = proposal_bboxes.view(-1, 1, 4).repeat( 1, self.num_classes, 1) detection_bboxes = BBox.apply_transformer( proposal_bboxes.view(-1, 4), proposal_transformers.view(-1, 4)) detection_bboxes = detection_bboxes.view(-1, self.num_classes, 4) detection_bboxes[:, :, [0, 2]] = detection_bboxes[:, :, [0, 2]].clamp( min=0, max=image_width) detection_bboxes[:, :, [1, 3]] = detection_bboxes[:, :, [1, 3]].clamp( min=0, max=image_height) detection_probs = F.softmax(proposal_classes, dim=1) # detection_bboxes = detection_bboxes.cpu() # proposal_probs = detection_probs.cpu() generated_bboxes = [] generated_classes = [] generated_probs = [] for c in range(1, self.num_classes): detection_class_bboxes = detection_bboxes[:, c, :] detection_class_probs = detection_probs[:, c] # _, sorted_indices = detection_class_probs.sort(descending=True) # detection_class_bboxes = detection_class_bboxes[sorted_indices] # proposal_class_probs = detection_class_probs[sorted_indices] threshold = 0.3 kept_indices = nms(detection_class_bboxes, detection_class_probs, threshold) detection_class_bboxes = detection_class_bboxes[kept_indices] detection_class_probs = detection_class_probs[kept_indices] generated_bboxes.append(detection_class_bboxes) generated_classes.append( torch.ones(len(kept_indices), dtype=torch.int) * c) generated_probs.append(detection_class_probs) generated_bboxes = torch.cat(generated_bboxes, dim=0) generated_classes = torch.cat(generated_classes, dim=0) generated_probs = torch.cat(generated_probs, dim=0) return generated_bboxes, generated_classes, generated_probs
def load_dataList(self, nameList, showThumbnail=True, progressBar=True): self.dataList.clear() if progressBar: progress = QProgressDialog("Loading data...", "Abort", \ 0, len(nameList), self.window) progress.setWindowModality(Qt.WindowModal) for i, dataName in enumerate(nameList): newItem = QtWidgets.QListWidgetItem(dataName) # Mark finished data if self.in_task and (dataName in self.current_task.finished_data): newItem.setBackground(QBrush(QColor("#b3b3b3"))) if showThumbnail: # boring img down sizing and img format converting img = Image.open(self.current_data_dir + IMG_FOLDER \ + '/' + dataName + '.' + IMG_EXT) w, h = img.size img = img.resize((128, int(128 * h / w))) img = img.convert("RGBA") qimg = QImage(img.tobytes('raw', 'RGBA'), img.size[0], \ img.size[1], QImage.Format_RGBA8888) thumbnail = QIcon() thumbnail.addPixmap(QtGui.QPixmap.fromImage(qimg)) newItem.setIcon(thumbnail) # pre load all the labels label_dir = self.current_data_dir + LEBEL_FOLDER \ + '/' + dataName + '.txt' if os.path.exists(label_dir): with open(label_dir, 'r') as label_file: bboxs = [] for line in label_file: bbox_l = line.split() class_num = int(bbox_l[0]) centerX = int(float(bbox_l[1]) * w) centerY = int(float(bbox_l[2]) * h) width = int(float(bbox_l[3]) * w) height = int(float(bbox_l[4]) * h) new_bbox = BBox([centerX, centerY, width, height],\ [w, h], class_num) bboxs.append(new_bbox) label_table[dataName] = bboxs else: # self.error_msg("Cannot find label: " + \ # label_dir) # if the label do not exist, create an empty bbox list bboxs = [] label_table[dataName] = bboxs self.dataList.addItem(newItem) if progressBar: progress.setValue(i) if progress.wasCanceled(): break if progressBar: progress.setValue(len(nameList))
def to_box(self) -> BBox: x0 = (self.x - self.w * 0.5) x1 = (self.x + self.w * 0.5) y0 = (self.y - self.h * 0.5) y1 = (self.y + self.h * 0.5) box = BBox(x0, y0, x1, y1) return box
def _infer_stream(path_to_input_stream_endpoint: str, period_of_inference: int, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): dataset_class = DatasetBase.from_name(dataset_name) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) if path_to_input_stream_endpoint.isdigit(): path_to_input_stream_endpoint = int(path_to_input_stream_endpoint) video_capture = cv2.VideoCapture(path_to_input_stream_endpoint) with torch.no_grad(): for sn in itertools.count(start=1): _, frame = video_capture.read() if sn % period_of_inference != 0: continue timestamp = time.time() image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(image) image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) detection_bboxes, detection_classes, detection_probs, _ = \ model.eval().forward(image_tensor.unsqueeze(dim=0).cuda()) detection_bboxes /= scale kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_classes = detection_classes[kept_indices] detection_probs = detection_probs[kept_indices] draw = ImageDraw.Draw(image) for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): color = random.choice(['red', 'green', 'blue', 'yellow', 'purple', 'white']) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) image = np.array(image) frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) elapse = time.time() - timestamp fps = 1 / elapse cv2.putText(frame, f'FPS = {fps:.1f}', (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA) cv2.imshow('easy-faster-rcnn.pytorch', frame) if cv2.waitKey(10) == 27: break video_capture.release() cv2.destroyAllWindows()
def getMeshSceneBBox(self): """Get the view-aligned bbox of the mesh at its current orientation. Return a BBox. """ mappedVerts = [] for meshVert in self._mesh.sharedVertices(): mappedVerts.append( self.mxv(self.modelviewMatrix, meshVert + [1.0])[:3]) bbox = BBox.fromVertices(mappedVerts) return bbox
def _generate_detections( self, proposal_bboxes: Tensor, proposal_classes: Tensor, proposal_transformers: Tensor, image_width: int, image_height: int) -> Tuple[Tensor, Tensor, Tensor]: proposal_transformers = proposal_transformers.view( -1, Model.NUM_CLASSES, 4) mean = self._transformer_normalize_mean.repeat(1, Model.NUM_CLASSES, 1).cuda() std = self._transformer_normalize_std.repeat(1, Model.NUM_CLASSES, 1).cuda() proposal_transformers = proposal_transformers * std - mean proposal_bboxes = proposal_bboxes.view(-1, 1, 4).repeat( 1, Model.NUM_CLASSES, 1) detection_bboxes = BBox.apply_transformer( proposal_bboxes.view(-1, 4), proposal_transformers.view(-1, 4)) detection_bboxes = detection_bboxes.view(-1, Model.NUM_CLASSES, 4) detection_bboxes[:, :, [0, 2]] = detection_bboxes[:, :, [0, 2]].clamp( min=0, max=image_width) detection_bboxes[:, :, [1, 3]] = detection_bboxes[:, :, [1, 3]].clamp( min=0, max=image_height) proposal_probs = F.softmax(proposal_classes, dim=1) detection_bboxes = detection_bboxes.cpu() proposal_probs = proposal_probs.cpu() generated_bboxes = [] generated_labels = [] generated_probs = [] for c in range(1, Model.NUM_CLASSES): detection_class_bboxes = detection_bboxes[:, c, :] proposal_class_probs = proposal_probs[:, c] _, sorted_indices = proposal_class_probs.sort(descending=True) detection_class_bboxes = detection_class_bboxes[sorted_indices] proposal_class_probs = proposal_class_probs[sorted_indices] keep_indices = NMS.suppress(detection_class_bboxes.cuda(), threshold=0.3) detection_class_bboxes = detection_class_bboxes[keep_indices] proposal_class_probs = proposal_class_probs[keep_indices] generated_bboxes.append(detection_class_bboxes) generated_labels.append(torch.ones(len(keep_indices)) * c) generated_probs.append(proposal_class_probs) generated_bboxes = torch.cat(generated_bboxes, dim=0) generated_labels = torch.cat(generated_labels, dim=0) generated_probs = torch.cat(generated_probs, dim=0) return generated_bboxes, generated_labels, generated_probs
def getMeshSceneBBox(self): """Get the view-aligned bbox of the mesh at its current orientation. Return a BBox. """ mappedVerts = [] for meshVert in self._mesh.sharedVertices(): mappedVerts.append(self.mxv(self.modelviewMatrix, meshVert + [1.0])[:3]) bbox = BBox.fromVertices(mappedVerts) return bbox
def _infer(path_to_input_dir: str, path_to_output_dir: str, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): dataset_class = DatasetBase.from_name(dataset_name) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) images = glob.glob(path_to_input_dir + '/*.jpg') with torch.no_grad(): for image in tqdm(images): name = image.split("/")[-1] image = transforms.Image.open(image).convert("RGB") image_tensor, scale = dataset_class.preprocess( image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) detection_bboxes, detection_classes, detection_probs, _ = \ model.eval().forward(image_tensor.unsqueeze(dim=0).cuda()) detection_bboxes /= scale kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_classes = detection_classes[kept_indices] detection_probs = detection_probs[kept_indices] draw = ImageDraw.Draw(image) for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): color = random.choice( ['red', 'green', 'blue', 'yellow', 'purple', 'white']) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] draw.rectangle( ((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) image.save(path_to_output_dir + name) print(f'Output image is saved to {path_to_output_dir}')
def create_bboxes(self, proposal_bboxes, proposal_transformers, image_width, image_height, batch_size): proposal_transformers = proposal_transformers.view( batch_size, -1, 4) proposal_bboxes = proposal_bboxes.unsqueeze(dim=2).repeat( 1, 1, 1, 1) transformer_normalize_std = self._transformer_normalize_std.to( device=proposal_transformers.device) transformer_normalize_mean = self._transformer_normalize_mean.to( device=proposal_transformers.device) proposal_transformers = proposal_transformers * transformer_normalize_std + transformer_normalize_mean detection_bboxes = BBox.apply_transformer( proposal_bboxes, proposal_transformers.unsqueeze(2)) detection_bboxes = BBox.clip(detection_bboxes, left=0, top=0, right=image_width, bottom=image_height) return detection_bboxes
def __init__(self, path_to_data_dir: str, mode: Mode): super().__init__() self._mode = mode path_to_voc2007_dir = os.path.join(path_to_data_dir, 'VOCdevkit', 'VOC2007') path_to_imagesets_main_dir = os.path.join(path_to_voc2007_dir, 'ImageSets', 'Main') path_to_annotations_dir = os.path.join(path_to_voc2007_dir, 'Annotations') self._path_to_jpeg_images_dir = os.path.join(path_to_voc2007_dir, 'JPEGImages') if self._mode == Dataset.Mode.TRAIN: path_to_image_ids_txt = os.path.join(path_to_imagesets_main_dir, 'trainval.txt') elif self._mode == Dataset.Mode.TEST: path_to_image_ids_txt = os.path.join(path_to_imagesets_main_dir, 'test.txt') else: raise ValueError('invalid mode') with open(path_to_image_ids_txt, 'r') as f: lines = f.readlines() self._image_ids = [line.rstrip() for line in lines] self._image_id_to_annotation_dict = {} for image_id in self._image_ids: path_to_annotation_xml = os.path.join(path_to_annotations_dir, f'{image_id}.xml') tree = ET.ElementTree(file=path_to_annotation_xml) root = tree.getroot() self._image_id_to_annotation_dict[image_id] = Dataset.Annotation( filename=next(root.iterfind('filename')).text, objects=[ Dataset.Annotation.Object( name=next(tag_object.iterfind('name')).text, difficult=next( tag_object.iterfind('difficult')).text == '1', bbox=BBox( left=float( next(tag_object.iterfind('bndbox/xmin')).text), top=float( next(tag_object.iterfind('bndbox/ymin')).text), right=float( next(tag_object.iterfind('bndbox/xmax')).text), bottom=float( next( tag_object.iterfind('bndbox/ymax')).text))) for tag_object in root.iterfind('object') ])
def bbox(self, min_area = 0): """ smart bounding box """ bb = [] bbox = BBox() if min_area == 0: bb.append(self.poly.boundingBox()) else: areas = self.areas() max_a = max(areas) for i in range(len(self.poly)): if self.poly.isHole(i): continue a = areas[i] if a < max_a * min_area: continue bb.append(self.poly.boundingBox(i)) for b in bb: bbox.update((b[0],b[2])) bbox.update((b[1],b[2])) bbox.update((b[0],b[3])) bbox.update((b[1],b[3])) return bbox
def addProfile(self, profile, color=None, close=False): """Create each Patch defined by the profile. profile -- a list of tuples as defined in tooldef.py color -- [r, g, b, a] close -- if True and the profile start or end points are not on the axis of revolution, insert one with X=0.0 and Y equal to the start or end point Y. """ if close: e1 = profile[0] # should always be a point if e1[0] != 0.0: profile = [(0.0, e1[1])] + profile e2 = profile[-1] if e2[0] != 0.0: if len(e2) == 2: profile.append((0.0, e2[1])) else: # profile ends in an arc profile.append((0.0, e2[0][1])) # previous line start x/y, for line -> arc px1 = py1 = None for e1, e2 in windowItr(profile, 2, 1): if e2 is None: break le1 = len(e1) le2 = len(e2) # line or start -> line if le1 == 2 and le2 == 2: x1, y1 = e1 x2, y2 = e2 self.blendTangent(False) patch = Patch.fromRevLineSeg(x1, y1, x2, y2, self) if color: patch.setColor(color) self._patches.append(patch) px1 = x1 py1 = y1 # line or start -> arc elif le1 == 2 and le2 == 3: x1, y1 = e1 (x2, y2), (cx, cy), d = e2 if px1 is not None: self.blendTangent(self._isLineTanToArc(px1, py1, x1, y1, cx, cy, d)) patch = Patch.fromRevArcSeg(x1, y1, x2, y2, cx, cy, d, self) if color: patch.setColor(color) self._patches.append(patch) # arc -> line elif le1 == 3 and le2 == 2: (aex, aey), (cx, cy), d = e1 lex, ley = e2 self.blendTangent(self._isLineTanToArc(lex, ley, aex, aey, cx, cy, d)) patch = Patch.fromRevLineSeg(aex, aey, lex, ley, self) if color: patch.setColor(color) self._patches.append(patch) px1 = aex py1 = aey # arc -> arc else: (x1, y1), (cx1, cy1), d1 = e1 (x2, y2), (cx2, cy2), d2 = e2 self.blendTangent(self._isArcTangentToArc(x1, y1, cx1, cy1, cx2, cy2)) patch = Patch.fromRevArcSeg(x1, y1, x2, y2, cx2, cy2, d2, self) if color: patch.setColor(color) self._patches.append(patch) self._bbox = BBox.fromVertices(self._sharedVertices)