def nms(class_pred, box_pred, coef_pred, proto_out, anchors, cfg): class_p = class_pred.squeeze() # [19248, 81] box_p = box_pred.squeeze() # [19248, 4] coef_p = coef_pred.squeeze() # [19248, 32] proto_p = proto_out.squeeze() # [138, 138, 32] class_p = class_p.transpose(1, 0).contiguous() # [81, 19248] # exclude the background class class_p = class_p[1:, :] # get the max score class of 19248 predicted boxes class_p_max, _ = torch.max(class_p, dim=0) # [19248] # filter predicted boxes according the class score keep = (class_p_max > cfg.nms_score_thre) class_thre = class_p[:, keep] box_thre = decode(box_p[keep, :], anchors[keep, :]) coef_thre = coef_p[keep, :] if class_thre.shape[1] == 0: return None, None, None, None, None else: box_thre, coef_thre, class_ids, class_thre = fast_nms( box_thre, coef_thre, class_thre, cfg) return class_ids, class_thre, box_thre, coef_thre, proto_p
def forward(self, loc_data, conf_data, priors): # loc_data = prediction[:,:,:4] # conf_data = prediction[:,:,4:] num_priors = priors.shape[0] batch_size = loc_data.shape[0] output = np.zeros(shape=(batch_size, self.num_classes, self.top_k, 5), dtype=np.float32) conf_preds = conf_data.swapaxes(2, 1) for i in range(batch_size): decoded_boxes = decode(loc=loc_data[i], priors=priors, variances=self.variances) conf_scores = conf_preds[i].copy() for cl in range(1, self.num_classes): c_mask = np.greater(conf_scores[cl], self.conf_thresh) scores = conf_scores[cl][c_mask] scores = np.float32(scores) if scores.shape[0] == 0: continue l_mask = c_mask.reshape(-1, 1).repeat(4, axis=-1) boxes = decoded_boxes[l_mask].reshape(-1, 4).astype(np.float32) # print(boxes.shape) # boxes = torch.from_numpy(boxes).float() # scores = torch.from_numpy(scores).float() # # ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) ids, count = non_maximum_supression(boxes=boxes, scores=scores, overlap=self.nms_thresh, top_k=self.top_k) # print(ids.shape) # print(count) ids = np.int32(ids) count = np.int32(count) scores = scores[ids[:count]] scores = np.expand_dims(scores, axis=1) output[i, cl, :count] = np.concatenate( (scores, boxes[ids[:count]]), axis=-1) # flt = output.ascontiguousarray().reshape(batch_size, -1, 5) # idx = np.argsort(flt[:,:,0], axis=-1) # rank = np.argsort(idx, axis=-1) # flt[rank < self.top_k].ex return output
def test(ssd, data, default_boxes): """ Execute test on one image then perform NMS algorithm then rescale boxes back to normal size Args: ssd: trained SSD model img: Torch array of shape (1, 3, H, W) default_boxes: Torch array of shape (num_default, 4) Returns: img: numpy array of (3, H, W) all_boxes: final boxes, numpy array of (num_boxes, 4) all_scores: final scores, numpy array of (num_boxes,) all_names: final class names, numpy array of (num_boxes,) """ img, _, _ = data img = img.to(device) default_boxes = default_boxes.to(device) with torch.no_grad(): out_confs, out_locs = ssd(img) out_confs = out_confs.squeeze(0) out_locs = out_locs.squeeze(0) out_boxes = decode(default_boxes, out_locs) out_labels = F.softmax(out_confs, dim=1) all_boxes = [] all_scores = [] all_names = [] for c in range(1, NUM_CLASSES): cls_scores = out_labels[:, c] score_idx = cls_scores > args.score_thresh cls_boxes = out_boxes[score_idx] cls_scores = cls_scores[score_idx] box_idx = compute_nms(cls_boxes, cls_scores, args.nms_thresh, args.max_num_boxes_per_class) cls_boxes = cls_boxes[box_idx] cls_scores = cls_scores[box_idx] cls_names = [c] * cls_boxes.size(0) all_boxes.append(cls_boxes) all_scores.append(cls_scores) all_names.extend(cls_names) all_boxes = torch.cat(all_boxes, dim=0) all_scores = torch.cat(all_scores, dim=0) img = img.squeeze(0).cpu().numpy() all_boxes *= img.shape[-1] all_boxes = all_boxes.cpu().numpy() all_scores = all_scores.cpu().numpy() all_names = np.array(all_names) return img, all_boxes, all_scores, all_names
def conf_objectness_loss(self, conf_data, conf_t, batch_size, loc_p, loc_t, priors): """ Instead of using softmax, use class[0] to be p(obj) * p(IoU) as in YOLO. Then for the rest of the classes, softmax them and apply CE for only the positive examples. """ conf_t = conf_t.view(-1) # [batch_size*num_priors] conf_data = conf_data.view( -1, conf_data.size(-1)) # [batch_size*num_priors, num_classes] pos_mask = conf_t > 0 neg_mask = conf_t == 0 obj_data = conf_data[:, 0] obj_data_pos = obj_data[pos_mask] obj_data_neg = obj_data[neg_mask] obj_neg_loss = -F.logsigmoid(-obj_data_neg).sum() with torch.no_grad(): pos_priors = (priors.unsqueeze(0).expand( batch_size, -1, -1).reshape(-1, 4)[pos_mask, :]) boxes_pred = decode(loc_p, pos_priors, cfg.use_yolo_regressors) boxes_targ = decode(loc_t, pos_priors, cfg.use_yolo_regressors) iou_targets = elemwise_box_iou(boxes_pred, boxes_targ) obj_pos_loss = -iou_targets * F.logsigmoid(obj_data_pos) - ( 1 - iou_targets) * F.logsigmoid(-obj_data_pos) obj_pos_loss = obj_pos_loss.sum() conf_data_pos = ( conf_data[:, 1:])[pos_mask] # Now this has just 80 classes conf_t_pos = conf_t[pos_mask] - 1 # So subtract 1 here class_loss = F.cross_entropy(conf_data_pos, conf_t_pos, reduction="sum") return cfg.conf_alpha * (class_loss + obj_pos_loss + obj_neg_loss)
def __call__(self, predictions): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch, num_priors, 4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch, num_priors, num_classes] mask_data: (tensor) Mask preds from mask layers Shape: [batch, num_priors, mask_dim] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [num_priors, 4] proto_data: (tensor) If using mask_type.lincomb, the prototype masks Shape: [batch, mask_h, mask_w, mask_dim] Returns: output of shape (batch_size, top_k, 1 + 1 + 4 + mask_dim) These outputs are in the order: class idx, confidence, bbox coords, and mask. Note that the outputs are sorted only if cross_class_nms is False """ loc_data = predictions["loc"] conf_data = predictions["conf"] mask_data = predictions["mask"] prior_data = predictions["priors"] proto_data = predictions["proto"] if "proto" in predictions else None inst_data = predictions["inst"] if "inst" in predictions else None out = [] with timer.env("Detect"): batch_size = loc_data.size(0) num_priors = prior_data.size(0) conf_preds = (conf_data.view(batch_size, num_priors, self.num_classes).transpose( 2, 1).contiguous()) for batch_idx in range(batch_size): decoded_boxes = decode(loc_data[batch_idx], prior_data) result = self.detect(batch_idx, conf_preds, decoded_boxes, mask_data, inst_data) if result is not None and proto_data is not None: result["proto"] = proto_data[batch_idx] out.append(result) return out
def forward(self, num_classes, bkg_label, top_k, conf_thresh, nms_thresh, loc_data, conf_data, prior_data): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ self.num_classes = num_classes self.background_label = bkg_label self.top_k = top_k # Parameters used in nms. self.nms_thresh = nms_thresh if nms_thresh <= 0: raise ValueError('nms_threshold must be non negative.') self.conf_thresh = conf_thresh self.variance = cfg['variance'] num = loc_data.size(0) # batch size num_priors = prior_data.size(0) output = torch.zeros(num, self.num_classes, self.top_k, 5) conf_preds = conf_data.view(num, num_priors, self.num_classes).transpose(2, 1) # Decode predictions into bboxes. for i in range(num): decoded_boxes = decode(loc_data[i], prior_data, self.variance) # For each class, perform nms conf_scores = conf_preds[i].clone() # num_det = 0 for cl in range(1, self.num_classes): c_mask = conf_scores[cl].gt(self.conf_thresh) scores = conf_scores[cl][c_mask] if scores.size(0) == 0: continue l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) boxes = decoded_boxes[l_mask].view(-1, 4) # idx of highest scoring and non-overlapping boxes per class ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) output[i, cl, :count] = \ torch.cat((scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) flt = output.contiguous().view(num, -1, 5) _, idx = flt[:, :, 0].sort(1, descending=True) _, rank = idx.sort(1) flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0) return output
def post_process(confs, locs, scores, default_boxes, mode=1): # confs = tf.squeeze(confs, 0) # locs = tf.squeeze(locs, 0) # i have o return the locs back # print(scores) # print(confs) # print(locs) print(tf.math.reduce_max(locs), tf.math.reduce_min(locs)) newres = decode(default_boxes, locs).numpy() if (mode == 2): confs = tf.math.softmax(confs, axis=-1) classes = tf.math.argmax(confs, axis=-1) scores = tf.math.reduce_max(confs, axis=-1) out_boxes = [] out_labels = [] out_scores = [] # print(confs.shape,classes.shape, scores.shape, boxes.shape) for c in range(1, NUM_CLASSES): if (mode == 1): cls_scores = np.zeros(np.shape(confs)) cls_scores[confs == c] = confs[confs == c] else: cls_scores = confs[:, c] score_idx = cls_scores > 0.5 # cls_boxes = tf.boolean_mask(boxes, score_idx) # cls_scores = tf.boolean_mask(cls_scores, score_idx) cls_boxes = newres[score_idx] cls_scores = cls_scores[score_idx] nms_idx = compute_nms(cls_boxes, cls_scores, 0.35, 200) cls_boxes = tf.gather(cls_boxes, nms_idx) cls_scores = tf.gather(cls_scores, nms_idx) cls_labels = [c] * cls_boxes.shape[0] out_boxes.append(cls_boxes) out_labels.extend(cls_labels) out_scores.append(cls_scores) out_boxes = tf.concat(out_boxes, axis=0) out_scores = tf.concat(out_scores, axis=0) boxes = tf.clip_by_value(out_boxes, 0.0, 1.0).numpy() classes = np.array(out_labels) scores = out_scores.numpy() return boxes, classes, scores
def direct_mask_loss(self, pos_idx, idx_t, loc_data, mask_data, priors, masks): """ Crops the gt masks using the predicted bboxes, scales them down, and outputs the BCE loss. """ loss_m = 0 for idx in range(mask_data.size(0)): with torch.no_grad(): cur_pos_idx = pos_idx[idx, :, :] cur_pos_idx_squeezed = cur_pos_idx[:, 1] # Shape: [num_priors, 4], decoded predicted bboxes pos_bboxes = decode(loc_data[idx, :, :], priors.data, cfg.use_yolo_regressors) pos_bboxes = pos_bboxes[cur_pos_idx].view(-1, 4).clamp(0, 1) pos_lookup = idx_t[idx, cur_pos_idx_squeezed] cur_masks = masks[idx] pos_masks = cur_masks[pos_lookup, :, :] # Convert bboxes to absolute coordinates num_pos, img_height, img_width = pos_masks.size() x1, x2 = sanitize_coordinates(pos_bboxes[:, 0], pos_bboxes[:, 2], img_width) y1, y2 = sanitize_coordinates(pos_bboxes[:, 1], pos_bboxes[:, 3], img_height) scaled_masks = [] for jdx in range(num_pos): tmp_mask = pos_masks[jdx, y1[jdx]:y2[jdx], x1[jdx]:x2[jdx]] while tmp_mask.dim() < 2: tmp_mask = tmp_mask.unsqueeze(0) new_mask = F.adaptive_avg_pool2d(tmp_mask.unsqueeze(0), cfg.mask_size) scaled_masks.append(new_mask.view(1, -1)) mask_t = (torch.cat(scaled_masks, 0).gt(0.5).float() ) # Threshold downsampled mask pos_mask_data = mask_data[idx, cur_pos_idx_squeezed, :] loss_m += (F.binary_cross_entropy( torch.clamp(pos_mask_data, 0, 1), mask_t, reduction="sum") * cfg.mask_alpha) return loss_m
def forward(self, loc_data, conf_data, prior_data): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ num = loc_data.size(0) # batch size num_priors = prior_data.size(0) # [バッチサイズN,クラス数21,トップ200件,確信度+位置]のゼロリストを作成 output = torch.zeros(num, self.num_classes, self.top_k, 5) # 確信度を[バッチサイズN,クラス数,ボックス数]の順番に変更 conf_preds = conf_data.view(num, num_priors, self.num_classes).transpose(2, 1) # Decode predictions into bboxes. for i in range(num): decoded_boxes = decode(loc_data[i], prior_data, self.variance) # For each class, perform nms conf_scores = conf_preds[i].clone() for cl in range(1, self.num_classes): # 確信度の閾値を使ってボックスを削除 c_mask = conf_scores[cl].gt(self.conf_thresh) scores = conf_scores[cl][c_mask] # handbook if scores.size(0) == 0: # handbook continue l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) # ボックスのデコード処理 boxes = decoded_boxes[l_mask].view(-1, 4) # idx of highest scoring and non-overlapping boxes per class # boxesからNMSで重複するボックスを削除 ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) output[i, cl, :count] = \ torch.cat((scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) flt = output.contiguous().view(num, -1, 5) _, idx = flt[:, :, 0].sort(1, descending=True) _, rank = idx.sort(1) flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0) return output
def __call__(self, loc_data, conf_data, prior_data): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ num = loc_data.size(0) num_priors = prior_data.size(0) conf_data = self.softmax(conf_data) conf_preds = conf_data.view( num, num_priors, self.num_classes).transpose(2, 1) batch_priors = prior_data.view(-1, num_priors, 4).expand(num, num_priors, 4) batch_priors = batch_priors.contiguous().view(-1, 4) decoded_boxes = decode(loc_data.view(-1, 4), batch_priors, self.variance) decoded_boxes = decoded_boxes.view(num, num_priors, 4) # output = torch.zeros(num, self.num_classes, self.top_k, 5) output = list() for i in range(num): boxes = decoded_boxes[i].clone() conf_scores = conf_preds[i].clone() c_mask = conf_scores[1].gt(self.conf_thresh) scores = conf_scores[1][c_mask] if scores.numel() == 0: continue l_mask = c_mask.unsqueeze(1).expand_as(boxes) boxes_ = boxes[l_mask].view(-1, 4) # ids, count = nms(boxes_, scores, self.nms_thresh, self.nms_top_k) # ids, count = nms_py(boxes_, scores, self.nms_thresh, self.nms_top_k) # count = count if count < self.top_k else self.top_k #ids = torch.tensor(ids) # if count >0: box_score = [boxes_.detach().numpy(),scores.detach().numpy()] #[boxes_[ids[:count]].detach().numpy(),scores[ids[:count]].detach().numpy()] output.append(box_score) return output
def forward(self, loc, conf, prior): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ #loc, conf = predictions loc_data = loc.data conf_data = conf.data prior_data = prior.data num = loc_data.size(0) # batch size self.num_priors = prior_data.size(0) self.boxes = torch.zeros(1, self.num_priors, 4) self.scores = torch.zeros(1, self.num_priors, self.num_classes) if loc_data.is_cuda: self.boxes = self.boxes.cuda() self.scores = self.scores.cuda() if num == 1: # size batch x num_classes x num_priors conf_preds = conf_data.unsqueeze(0) else: conf_preds = conf_data.view(num, num_priors, self.num_classes) self.boxes.expand_(num, self.num_priors, 4) self.scores.expand_(num, self.num_priors, self.num_classes) # Decode predictions into bboxes. for i in range(num): decoded_boxes = decode(loc_data[i], prior_data, self.variance) conf_scores = conf_preds[i].clone() self.boxes[i] = decoded_boxes self.scores[i] = conf_scores return self.boxes, self.scores
def forward(self, loc_data, conf_data, prior_data, conf_thresh): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ batch_size = loc_data.size(0) num_priors = prior_data.size(0) output = torch.zeros(batch_size, self.num_classes, self.top_k, 5) if loc_data.is_cuda: output = output.cuda() conf_preds = conf_data.transpose(2, 1) # group by classes # Decode predictions into bboxes. for i in range(batch_size): decoded_boxes = decode(loc_data[i], prior_data, self.variance) # For each class, perform nms conf_scores = conf_preds[i].clone() for cl in range(self.num_classes): c_mask = conf_scores[cl].gt(conf_thresh) scores = conf_scores[cl][c_mask] if scores.dim() == 0: continue l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) thresholded_boxes = decoded_boxes[l_mask] if len(thresholded_boxes) > 0: boxes = thresholded_boxes.view(-1, 4) # idx of highest scoring and non-overlapping boxes per class ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) output[i, cl, :count] = \ torch.cat((scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) flt = output.contiguous().view(batch_size, -1, 5) _, idx = flt[:, :, 0].sort(1, descending=True) _, rank = idx.sort(1) flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0) return output
def predict(confs, locs, thresh, default_boxes): confs = tf.squeeze(confs, 0) locs = tf.squeeze(locs, 0) confs = tf.math.softmax(confs, axis=-1) boxes = decode(default_boxes, locs) out_boxes = [] out_labels = [] out_scores = [] print('confs shape =', np.shape(confs)) print('confs max =', np.amax(confs)) print('confs min =', np.amin(confs)) print('confs max index =', np.where(np.array(confs) == np.array(confs).max())) for c in range(1, num_classes): cls_scores = confs[:, c] score_idx = cls_scores > thresh cls_boxes = boxes[score_idx] cls_scores = cls_scores[score_idx] nms_idx = compute_nms(cls_boxes, cls_scores, 0.45, 200) cls_boxes = tf.gather(cls_boxes, nms_idx) cls_scores = tf.gather(cls_scores, nms_idx) cls_labels = [c] * cls_boxes.shape[0] out_boxes.append(cls_boxes) out_labels.extend(cls_labels) out_scores.append(cls_scores) out_boxes = tf.concat(out_boxes, axis=0) out_scores = tf.concat(out_scores, axis=0) boxes = tf.clip_by_value(out_boxes, 0.0, 1.0).numpy() classes = np.array(out_labels) scores = out_scores.numpy() return boxes, classes, scores
def predict(imgs, default_boxes): confs, locs = ssd(imgs) confs = tf.squeeze(confs, 0) locs = tf.squeeze(locs, 0) confs = tf.math.softmax(confs, axis=-1) classes = tf.math.argmax(confs, axis=-1) scores = tf.math.reduce_max(confs, axis=-1) boxes = decode(default_boxes, locs) out_boxes = [] out_labels = [] out_scores = [] for c in range(1, NUM_CLASSES): cls_scores = confs[:, c] score_idx = cls_scores > 0.6 # cls_boxes = tf.boolean_mask(boxes, score_idx) # cls_scores = tf.boolean_mask(cls_scores, score_idx) cls_boxes = boxes[score_idx] cls_scores = cls_scores[score_idx] nms_idx = compute_nms(cls_boxes, cls_scores, 0.45, 200) cls_boxes = tf.gather(cls_boxes, nms_idx) cls_scores = tf.gather(cls_scores, nms_idx) cls_labels = [c] * cls_boxes.shape[0] out_boxes.append(cls_boxes) out_labels.extend(cls_labels) out_scores.append(cls_scores) out_boxes = tf.concat(out_boxes, axis=0) out_scores = tf.concat(out_scores, axis=0) boxes = tf.clip_by_value(out_boxes, 0.0, 1.0).numpy() classes = np.array(out_labels) scores = out_scores.numpy() return boxes, classes, scores
def forward(self, arm_loc_data, arm_conf_data, odm_loc_data, odm_conf_data, prior_data): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [num_priors,4] """ loc_data = odm_loc_data conf_data = F.softmax(odm_conf_data,dim=2) arm_conf_data = F.softmax(arm_conf_data,dim=2) arm_object_conf = arm_conf_data.data[:, :, 1:] no_object_index = arm_object_conf <= self.objectness_thre conf_data[no_object_index.expand_as(conf_data)] = 0 num = loc_data.size(0) # batch size num_priors = prior_data.size(0) output = torch.zeros(num, self.num_classes, self.top_k, 5) conf_preds = conf_data.view(num, num_priors, self.num_classes).transpose(2, 1) #conf_preds = conf_data.view(num,num_priors,self.num_classes) # Decode predictions into bboxes. if torch.cuda.is_available(): prior_data.cuda() for i in range(num): default = decode(arm_loc_data[i], prior_data, self.variance) default = center_size(default) decoded_boxes = decode(loc_data[i], default, self.variance) # For each class, perform nms conf_scores = conf_preds[i].clone() ''' prior_conf_max,prior_conf_idx = conf_scores.max(1,keepdim=True) cls_mask = prior_conf_idx.gt(0) prior_conf_max = prior_conf_max[cls_mask] prior_conf_idx = prior_conf_idx[cls_mask] decoded_boxes = decoded_boxes[cls_mask] conf_mask = prior_conf_max.gt(self.conf_thresh) prior_conf_max = prior_conf_max[conf_mask] prior_conf_idx = prior_conf_idx[conf_mask] decoded_boxes = decoded_boxes[conf_mask] ''' #print(decoded_boxes, conf_scores) for cl in range(1, self.num_classes): c_mask = conf_scores[cl].gt(self.conf_thresh) scores = conf_scores[cl][c_mask] #print(scores.dim()) if scores.size(0) == 0: continue l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) boxes = decoded_boxes[l_mask].view(-1, 4) # idx of highest scoring and non-overlapping boxes per class #print(boxes.size(), scores.size()) ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) ids = torch.tensor(ids,dtype=torch.long) if count ==0: continue #print(count,ids[:count],torch.gather(scores,0,ids).data) #print(boxes[ids[:count]]) #print('debug',scores[ids[:count]].size(),boxes[ids[:count]].size()) output[i, cl, :count] = \ torch.cat((scores[ids[:count]].view(-1,1), boxes[ids[:count]].view(-1,4)), 1) #flt = output.contiguous().view(num, -1, 5) #_, idx = flt[:, :, 0].sort(1, descending=True) #_, rank = idx.sort(1) ############???????? #flt[(rank < self.keep_top_k).unsqueeze(-1).expand_as(flt)].fill_(0) #print('fit',output.size()) return output
p_conf = (p_conf - np.float32( interpreter.get_output_details()[0]['quantization'][1])) * np.float32( interpreter.get_output_details()[0]['quantization'][0]) p_boxes = (p_boxes - np.float32( interpreter.get_output_details()[1]['quantization'][1])) * np.float32( interpreter.get_output_details()[1]['quantization'][0]) with open('./config.yml') as f: cfg = yaml.load(f) try: config = cfg['SSD300'] #[args.arch.upper()] except AttributeError: raise ValueError('Unknown architecture:') default_boxes = generate_default_boxes(config) newres = decode(default_boxes, p_boxes[0]).numpy() conf = softmax(p_conf, -1)[0] classes = np.argmax(conf, -1) # sort ant filter to threshold > 0.5, top 400 dets def det_sort_filt(boxes, conf, classes, topn=100, threshold=0.5): # one class conf = conf[:, 1:] scores = np.squeeze(conf) #filter 1 classes mask1 = classes == 1 mask2 = scores >= threshold mask = np.logical_and(mask1, mask2) boxes = boxes[mask]
def lincomb_mask_loss( self, pos, idx_t, loc_data, mask_data, priors, proto_data, masks, gt_box_t, score_data, inst_data, interpolation_mode="bilinear", ): mask_h = proto_data.size(1) mask_w = proto_data.size(2) process_gt_bboxes = (cfg.mask_proto_normalize_emulate_roi_pooling or cfg.mask_proto_crop) if cfg.mask_proto_remove_empty_masks: pos = pos.clone() loss_m = 0 loss_d = 0 # Coefficient diversity loss for idx in range(mask_data.size(0)): with torch.no_grad(): downsampled_masks = F.interpolate( masks[idx].unsqueeze(0), (mask_h, mask_w), mode=interpolation_mode, align_corners=False, ).squeeze(0) downsampled_masks = downsampled_masks.permute(1, 2, 0).contiguous() if cfg.mask_proto_binarize_downsampled_gt: downsampled_masks = downsampled_masks.gt(0.5).float() if cfg.mask_proto_remove_empty_masks: very_small_masks = (downsampled_masks.sum(dim=(0, 1)) <= 0.0001) for i in range(very_small_masks.size(0)): if very_small_masks[i]: pos[idx, idx_t[idx] == i] = 0 if cfg.mask_proto_reweight_mask_loss: # Ensure that the gt is binary if not cfg.mask_proto_binarize_downsampled_gt: bin_gt = downsampled_masks.gt(0.5).float() else: bin_gt = downsampled_masks gt_foreground_norm = bin_gt / ( torch.sum(bin_gt, dim=(0, 1), keepdim=True) + 0.0001) gt_background_norm = (1 - bin_gt) / (torch.sum( 1 - bin_gt, dim=(0, 1), keepdim=True) + 0.0001) mask_reweighting = ( gt_foreground_norm * cfg.mask_proto_reweight_coeff + gt_background_norm) mask_reweighting *= mask_h * mask_w cur_pos = pos[idx] pos_idx_t = idx_t[idx, cur_pos] if process_gt_bboxes: # Note: this is in point-form if cfg.mask_proto_crop_with_pred_box: pos_gt_box_t = decode( loc_data[idx, :, :], priors.data, cfg.use_yolo_regressors, )[cur_pos] else: pos_gt_box_t = gt_box_t[idx, cur_pos] if pos_idx_t.size(0) == 0: continue proto_masks = proto_data[idx] proto_coef = mask_data[idx, cur_pos, :] if cfg.use_mask_scoring: mask_scores = score_data[idx, cur_pos, :] if cfg.mask_proto_coeff_diversity_loss: if inst_data is not None: div_coeffs = inst_data[idx, cur_pos, :] else: div_coeffs = proto_coef loss_d += self.coeff_diversity_loss(div_coeffs, pos_idx_t) old_num_pos = proto_coef.size(0) if old_num_pos > cfg.masks_to_train: perm = torch.randperm(proto_coef.size(0)) select = perm[:cfg.masks_to_train] proto_coef = proto_coef[select, :] pos_idx_t = pos_idx_t[select] if process_gt_bboxes: pos_gt_box_t = pos_gt_box_t[select, :] if cfg.use_mask_scoring: mask_scores = mask_scores[select, :] num_pos = proto_coef.size(0) mask_t = downsampled_masks[:, :, pos_idx_t] # Size: [mask_h, mask_w, num_pos] pred_masks = proto_masks @ proto_coef.t() pred_masks = cfg.mask_proto_mask_activation(pred_masks) if cfg.mask_proto_double_loss: if cfg.mask_proto_mask_activation == activation_func.sigmoid: pre_loss = F.binary_cross_entropy(torch.clamp( pred_masks, 0, 1), mask_t, reduction="sum") else: pre_loss = F.smooth_l1_loss(pred_masks, mask_t, reduction="sum") loss_m += cfg.mask_proto_double_loss_alpha * pre_loss if cfg.mask_proto_crop: pred_masks = crop(pred_masks, pos_gt_box_t) if cfg.mask_proto_mask_activation == activation_func.sigmoid: pre_loss = F.binary_cross_entropy(torch.clamp( pred_masks, 0, 1), mask_t, reduction="none") else: pre_loss = F.smooth_l1_loss(pred_masks, mask_t, reduction="none") if cfg.mask_proto_normalize_mask_loss_by_sqrt_area: gt_area = torch.sum(mask_t, dim=(0, 1), keepdim=True) pre_loss = pre_loss / (torch.sqrt(gt_area) + 0.0001) if cfg.mask_proto_reweight_mask_loss: pre_loss = pre_loss * mask_reweighting[:, :, pos_idx_t] if cfg.mask_proto_normalize_emulate_roi_pooling: weight = mask_h * mask_w if cfg.mask_proto_crop else 1 pos_get_csize = center_size(pos_gt_box_t) gt_box_width = pos_get_csize[:, 2] * mask_w gt_box_height = pos_get_csize[:, 3] * mask_h pre_loss = (pre_loss.sum(dim=(0, 1)) / gt_box_width / gt_box_height * weight) # If the number of masks were limited scale the loss accordingly if old_num_pos > num_pos: pre_loss *= old_num_pos / num_pos loss_m += torch.sum(pre_loss) losses = {"M": loss_m * cfg.mask_alpha / mask_h / mask_w} if cfg.mask_proto_coeff_diversity_loss: losses["D"] = loss_d return losses
[img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) tic = time.time() loc, conf, landms = net(img) print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores