def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: confidences = [] locations = [] start_layer_index = 0 header_index = 0 for end_layer_index in self.source_layer_indexes: if isinstance(end_layer_index, GraphPath): path = end_layer_index end_layer_index = end_layer_index.s0 added_layer = None elif isinstance(end_layer_index, tuple): added_layer = end_layer_index[1] end_layer_index = end_layer_index[0] path = None else: added_layer = None path = None for layer in self.base_net[start_layer_index: end_layer_index]: x = layer(x) if added_layer: y = added_layer(x) else: y = x if path: sub = getattr(self.base_net[end_layer_index], path.name) for layer in sub[:path.s1]: x = layer(x) y = x for layer in sub[path.s1:]: x = layer(x) end_layer_index += 1 start_layer_index = end_layer_index confidence, location = self.compute_header(header_index, y) header_index += 1 confidences.append(confidence) locations.append(location) for layer in self.base_net[end_layer_index:]: x = layer(x) for layer in self.extras: x = layer(x) confidence, location = self.compute_header(header_index, x) header_index += 1 confidences.append(confidence) locations.append(location) confidences = torch.cat(confidences, 1) locations = torch.cat(locations, 1) if self.is_test: confidences = F.softmax(confidences, dim=2) boxes = box_utils.convert_locations_to_boxes( locations, self.priors, self.config.center_variance, self.config.size_variance ) boxes = box_utils.center_form_to_corner_form(boxes) return confidences, boxes else: return confidences, locations
def _forward_test(self, cls_logits, bbox_pred): if self.priors is None: self.priors = PriorBox(self.cfg)().to(bbox_pred.device) scores = F.softmax(cls_logits, dim=2) boxes = box_utils.convert_locations_to_boxes( bbox_pred, self.priors, self.cfg.MODEL.CENTER_VARIANCE, self.cfg.MODEL.SIZE_VARIANCE ) boxes = box_utils.center_form_to_corner_form(boxes) detections = (scores, boxes) detections = self.post_processor(detections) return detections, {}
def forward(self, x, targets=None): sources = [] confidences = [] locations = [] for i in range(1): x = self.vgg[i](x) s = self.l2_norm(x) # Conv4_3 L2 normalization sources.append(s) # apply vgg up to fc7 for i in range(1, len(self.vgg)): x = self.vgg[i](x) sources.append(x) # for k, v in enumerate(self.extras): # x = F.relu(v(x), inplace=True) # if k % 2 == 1: # sources.append(x) # for aaa in sources: # print(aaa.shape) for (x, l, c) in zip(sources, self.regression_headers, self.classification_headers): locations.append(l(x).permute(0, 2, 3, 1).contiguous()) confidences.append(c(x).permute(0, 2, 3, 1).contiguous()) confidences = torch.cat([o.view(o.size(0), -1) for o in confidences], 1) locations = torch.cat([o.view(o.size(0), -1) for o in locations], 1) confidences = confidences.view(confidences.size(0), -1, self.num_classes) locations = locations.view(locations.size(0), -1, 4) if not self.training: # when evaluating, decode predictions if self.priors is None: self.priors = PriorBox(self.cfg)().to(locations.device) confidences = F.softmax(confidences, dim=2) boxes = box_utils.convert_locations_to_boxes( locations, self.priors, self.cfg.MODEL.CENTER_VARIANCE, self.cfg.MODEL.SIZE_VARIANCE ) boxes = box_utils.center_form_to_corner_form(boxes) return confidences, boxes else: # when training, compute losses gt_boxes, gt_labels = targets regression_loss, classification_loss = self.criterion(confidences, locations, gt_labels, gt_boxes) loss_dict = dict( regression_loss=regression_loss, classification_loss=classification_loss, ) return loss_dict
def forward(self, x): sources = [] confidences = [] locations = [] for i in range(23): x = self.vgg[i](x) s = self.l2_norm(x) # Conv4_3 L2 normalization sources.append(s) # apply vgg up to fc7 for i in range(23, len(self.vgg)): x = self.vgg[i](x) sources.append(x) for k, v in enumerate(self.extras): x = F.relu(v(x), inplace=True) if k % 2 == 1: sources.append(x) for (x, l, c) in zip(sources, self.regression_headers, self.classification_headers): locations.append(l(x).permute(0, 2, 3, 1).contiguous()) confidences.append(c(x).permute(0, 2, 3, 1).contiguous()) confidences = torch.cat([o.view(o.size(0), -1) for o in confidences], 1) locations = torch.cat([o.view(o.size(0), -1) for o in locations], 1) confidences = confidences.view(confidences.size(0), -1, self.num_classes) locations = locations.view(locations.size(0), -1, 4) if self.is_test: if self.priors is None: self.priors = PriorBox(self.cfg)().to(locations.device) confidences = F.softmax(confidences, dim=2) boxes = box_utils.convert_locations_to_boxes( locations, self.priors, self.cfg.MODEL.CENTER_VARIANCE, self.cfg.MODEL.SIZE_VARIANCE) boxes = box_utils.center_form_to_corner_form(boxes) return confidences, boxes else: return confidences, locations
cfg.merge_from_list(args.opts) cfg.freeze() data_loader = make_data_loader(cfg, is_train=True) mean = np.array([cfg.INPUT.PIXEL_MEAN]).reshape(1, 1, -1) std = np.array([cfg.INPUT.PIXEL_STD]) priors = PriorBox(cfg)() if isinstance(data_loader, list): data_loader = data_loader[0] for img, batch, *_ in data_loader: boxes = batch["boxes"] # SSD Target transform transfers target boxes into prior locations # Have to revert the transformation boxes = box_utils.convert_locations_to_boxes(boxes, priors, cfg.MODEL.CENTER_VARIANCE, cfg.MODEL.SIZE_VARIANCE) boxes = box_utils.center_form_to_corner_form(boxes) # Remove all priors that are background boxes = boxes[0] labels = batch["labels"][0].squeeze().cpu().numpy() boxes = boxes[labels != 0] labels = labels[labels != 0] # Resize to image widht and height boxes[:, [0, 2]] *= img.shape[3] boxes[:, [1, 3]] *= img.shape[2] img = img.numpy() # NCHW to HWC (only select first element of batch) img = np.moveaxis(img, 1, -1)[0] # Remove normalization
def forward(self, x, targets=None, auxiliary_task=False): ss_criterion = nn.CrossEntropyLoss() sources = [] confidences = [] locations = [] for i in range(23): x = self.vgg[i](x) s = self.l2_norm(x) # Conv4_3 L2 normalization sources.append(s) # apply vgg up to fc7 for i in range(23, len(self.vgg)): x = self.vgg[i](x) sources.append(x) for k, v in enumerate(self.extras): x = F.relu(v(x), inplace=True) if k % 2 == 1: sources.append(x) # if the auxiliary task is the rotation task we can apply it after the last layer if auxiliary_task and self.cfg.MODEL.SELF_SUPERVISOR.TYPE == "rotation": jx = x.view(x.size(0), -1) j_output = self.ss_classifier(self.ss_dropout(jx)) if not auxiliary_task: for (x, l, c) in zip(sources, self.regression_headers, self.classification_headers): locations.append(l(x).permute(0, 2, 3, 1).contiguous()) confidences.append(c(x).permute(0, 2, 3, 1).contiguous()) confidences = torch.cat( [o.view(o.size(0), -1) for o in confidences], 1) locations = torch.cat([o.view(o.size(0), -1) for o in locations], 1) confidences = confidences.view(confidences.size(0), -1, self.num_classes) locations = locations.view(locations.size(0), -1, 4) if not self.training: # when evaluating, decode predictions if self.priors is None: self.priors = PriorBox(self.cfg)().to(locations.device) confidences = F.softmax(confidences, dim=2) boxes = box_utils.convert_locations_to_boxes( locations, self.priors, self.cfg.MODEL.CENTER_VARIANCE, self.cfg.MODEL.SIZE_VARIANCE) boxes = box_utils.center_form_to_corner_form(boxes) return confidences, boxes else: # when training, compute losses if auxiliary_task: j_index = targets j_loss = ss_criterion(j_output, j_index) loss_dict = dict(aux_loss=j_loss) else: gt_boxes, gt_labels = targets regression_loss, classification_loss = self.criterion( confidences, locations, gt_labels, gt_boxes) loss_dict = dict( regression_loss=regression_loss, classification_loss=classification_loss, ) return loss_dict
def forward(self, x, targets, score_map=None): # print('self.downsample_layers_index',self.downsample_layers_index) downsample_feature_map=[] sources = [] confidences = [] locations = [] for i in range(23): x = self.vgg[i](x) # print('x.size():',x.size()) # if i == 3: # import os # import glob # path = '/home/binchengxiong/ssd_fcn_multitask_text_detection_pytorch1.0/img/tmp/' # for infile in glob.glob(os.path.join(path, '*.jpg')): # os.remove(infile) # sizez = x.size() # print('x.size:',x.size()) # for i in range(sizez[1]): # tmp = x[0][i].cpu().numpy() # max = tmp.max() # min = tmp.min() # print('max:',max) # print('min:',min) # featuremap = (tmp - min) / (max - min) * 255 # # featuremap = featuremap.astype(np.uint8) # featuremap = cv2.applyColorMap(featuremap, cv2.COLORMAP_JET) # cv2.imwrite( # '/home/binchengxiong/ssd_fcn_multitask_text_detection_pytorch1.0/img/tmp/' + str(i) + '.jpg', # featuremap) if i in self.downsample_layers_index: downsample_feature_map.append(x) s = self.l2_norm(x) # Conv4_3 L2 normalization sources.append(s) # apply vgg up to fc7 # print('len(vgg):',len(self.vgg)) for i in range(23, len(self.vgg)): x = self.vgg[i](x) # print('x.size():',x.size()) if i in self.downsample_layers_index: downsample_feature_map.append(x) sources.append(x) #Conv_7 # FCN part # for i in downsample_feature_map: # print('i.size:',i.size()) h = downsample_feature_map[2] # bs 2048 w/32 h/32,f[3]是最后的输出层 g = self.unpool1(h) # bs 2048 w/16 h/16 g = self.unpool1_conv2d(g) # print('downsample_feature_map[2].size():',downsample_feature_map[2].size()) c = self.conv1(g.add_(downsample_feature_map[1])) c = self.bn1(c) c = self.relu1(c) g = self.unpool2(c) # bs 128 w/8 h/8 g = self.unpool2_conv2d(g) c = self.conv2(g.add_(downsample_feature_map[0])) c = self.bn2(c) c = self.relu2(c) F_score = self.conv3(c) # bs 1 w/4 h/4 F_score = self.sigmoid(F_score) F_score = torch.squeeze(F_score) # print('F_score.size()',F_score.size()) # print('score_map.size()',score_map.size()) # for i in downsample_feature_map: # print('i.size():',i.size()) for k, v in enumerate(self.extras): x = F.relu(v(x), inplace=True) # print('x.size():',x.size()) if k % 2 == 1: sources.append(x) #Conv_8_2,Conv_9_2,Conv_10_2,Conv_11_2 for (x, l, c) in zip(sources, self.regression_headers, self.classification_headers): #原始的feature map的维度是NCHW,permute之后是NHWC a = l(x).permute(0, 2, 3, 1).contiguous() # print('a.size:',a.size()) locations.append(a) b = c(x).permute(0, 2, 3, 1).contiguous() # print('b.size:',b.size()) confidences.append(b) confidences = torch.cat([o.view(o.size(0), -1) for o in confidences], 1) locations = torch.cat([o.view(o.size(0), -1) for o in locations], 1) #print('locations.size()',locations.size()) # print('self.num_classes:',self.num_classes) confidences = confidences.view(confidences.size(0), -1, self.num_classes) # print('confidence.size()',confidences.size()) #[batch_size,24564,2] locations = locations.view(locations.size(0), -1, 8) #print('locations.size()',locations.size()) #[batch_size,24564,8] if not self.training: # print('test') # when evaluating, decode predictions if self.priors is None: self.priors = PriorBox(self.cfg)() confidences = F.softmax(confidences, dim=2) quad = box_utils.convert_locations_to_boxes( locations, self.priors, self.cfg.MODEL.CENTER_VARIANCE, self.cfg.MODEL.SIZE_VARIANCE ) score_map = F_score.cpu() return confidences, quad,score_map else: # when training, compute losses gt_boxes, gt_labels = targets # print('locations:',locations) #给了事先匹配好的default box的位置和类别作为真值,回归预测的confidences和locations regression_loss, classification_loss = self.criterion(confidences, locations, gt_labels, gt_boxes) seg_loss = self.dice_coefficient(score_map,F_score) #seg_loss = self.balanced_cross_entropy(score_map,F_score) #seg_loss = self.balanced_cross_entropy_1(score_map,F_score) loss_dict = dict( regression_loss=regression_loss, classification_loss=classification_loss, fcn_loss=seg_loss ) return loss_dict
def visualize_prior_boxes(cfg, layer): def plot_bbox(ax, box, color, do_plot=True, circle=True): cx, cy, w, h = box cx *= cfg.INPUT.IMAGE_SIZE[0] cy *= cfg.INPUT.IMAGE_SIZE[1] w *= cfg.INPUT.IMAGE_SIZE[0] h *= cfg.INPUT.IMAGE_SIZE[1] x1, y1 = cx + w / 2, cy + h / 2 x0, y0 = cx - w / 2, cy - h / 2 if do_plot: if circle: ax.add_artist( matplotlib.patches.Ellipse([cx, cy], w, h, alpha=.1, color=color)) else: ax.add_artist( plt.Rectangle([x0, y0], x1 - x0, y1 - y0, alpha=0.3, color=color)) #plt.plot([x0, x0, x1, x1, x0],[y0, y1, y1, y0, y0], f"{color}", alpha=.5, color=color) plt.plot(cx, cy, f"o{color}") else: plt.plot(cx, cy, f"o{color}", alpha=0.1) def get_num_boxes_in_fmap(idx): boxes_per_location = cfg.MODEL.PRIORS.BOXES_PER_LOCATION[idx] feature_map_size = cfg.MODEL.PRIORS.FEATURE_MAPS[idx] return int(boxes_per_location * np.prod(feature_map_size)) PLOT_CIRCLE = False # Set which priors we want to visualize # 0 is the last layer layer_to_visualize = layer # Set which aspect ratio indices we want to visualize aspect_ratio_indices = list( range(cfg.MODEL.PRIORS.BOXES_PER_LOCATION[layer_to_visualize])) fig, ax = plt.subplots() # Create prior box prior_box = PriorBox(cfg) priors = prior_box() print("Prior box shape:", priors.shape) # Prior boxes are saved such that all prior boxes at the first feature map is saved first, then all prios at the next (lower) feature map print("First prior example:", priors[5]) locations = torch.zeros_like(priors)[None] priors_as_location = convert_locations_to_boxes(locations, priors, cfg.MODEL.CENTER_VARIANCE, cfg.MODEL.SIZE_VARIANCE)[0] # Set up our scene plt.ylim([-100, cfg.INPUT.IMAGE_SIZE[1] + 100]) plt.xlim([-100, cfg.INPUT.IMAGE_SIZE[0] + 100]) offset = sum([ get_num_boxes_in_fmap(prev_layer) for prev_layer in range(layer_to_visualize) ]) boxes_per_location = cfg.MODEL.PRIORS.BOXES_PER_LOCATION[ layer_to_visualize] indices_to_visualize = [] colors = [] available_colors = ["r", "g", "b", "y", "m", "b", "w"] for idx in range(offset, offset + get_num_boxes_in_fmap(layer_to_visualize)): for aspect_ratio_idx in aspect_ratio_indices: if idx % boxes_per_location == aspect_ratio_idx: indices_to_visualize.append(idx) colors.append(available_colors[aspect_ratio_idx]) ax.add_artist( plt.Rectangle([0, 0], cfg.INPUT.IMAGE_SIZE[0], cfg.INPUT.IMAGE_SIZE[1], fill=False, edgecolor="black")) do_plot = False # Only plot prior boxes at middle, for visability indice_to_plot = len(indices_to_visualize) / 2 for i, idx in enumerate(indices_to_visualize): prior = priors_as_location[idx] color = colors[i] if i >= (indice_to_plot - len(aspect_ratio_indices) // 2) and i < ( indice_to_plot + len(aspect_ratio_indices) // 2): do_plot = True else: do_plot = False plot_bbox(ax, prior, color, do_plot, PLOT_CIRCLE) plt.show()