def forward(self, image: torch.Tensor, # (batch_size, c, h, w) image_sizes: torch.Tensor, # (batch_size, 2) boxes: torch.Tensor = None, # (batch_size, max_boxes_in_batch, 4) box_classes: torch.Tensor = None) -> Dict[str, torch.Tensor]: # pylint: disable=arguments-differ im_sizes = [(x[1].item(), x[0].item()) for x in image_sizes] image_list = ImageList(image, im_sizes) features = self.backbone.forward(image) objectness, rpn_box_regression = self._rpn_head(features) anchors: List[torch.Tensor] = self.anchor_generator(image_list, features) num_anchors_per_level = [o[0].numel() for o in objectness] objectness, rpn_box_regression = \ concat_box_prediction_layers(objectness, rpn_box_regression) out = {'features': features, 'objectness': objectness, 'rpn_box_regression': rpn_box_regression, 'anchors': anchors, 'sizes': image_sizes, 'num_anchors_per_level': num_anchors_per_level} if boxes is not None: labels, matched_gt_boxes = self.assign_targets_to_anchors( anchors, object_utils.unpad(boxes)) regression_targets = self.box_coder.encode(matched_gt_boxes, anchors) sampled_pos_inds, sampled_neg_inds = self.sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness = objectness.flatten() labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) loss_rpn_box_reg = F.l1_loss( rpn_box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], reduction="sum", ) / (sampled_inds.numel()) loss_objectness = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds] ) self._loss_meters['rpn_cls_loss'](loss_objectness.item()) self._loss_meters['rpn_reg_loss'](loss_rpn_box_reg.item()) out["loss_objectness"] = loss_objectness out["loss_rpn_box_reg"] = loss_rpn_box_reg out["loss"] = loss_objectness + 10*loss_rpn_box_reg return out
def forward( self, images, # type: List[Tensor] targets=None # type: Optional[List[Dict[str, Tensor]]] ): # type: (...) -> Tuple[ImageList, Optional[List[Dict[str, Tensor]]]] images = [img for img in images] if targets is not None: # make a copy of targets to avoid modifying it in-place # once torchscript supports dict comprehension # this can be simplified as as follows # targets = [{k: v for k,v in t.items()} for t in targets] targets_copy: List[Dict[str, Tensor]] = [] for t in targets: #if t is not None: if len(t) == 5: data: Dict[str, Tensor] = {} for k, v in t.items(): data[k] = v targets_copy.append(data) else: data: Dict[str, Tensor] = {} for k, v in t.items(): data[k] = v targets_copy.append(data) targets = targets_copy for i in range(len(images)): image = images[i] target_index = targets[i] if targets is not None and {} else None if image.dim() != 3: raise ValueError( "images is expected to be a list of 3d tensors " "of shape [C, H, W], got {}".format(image.shape)) image = self.normalize(image) image, target_index = self.resize(image, target_index) images[i] = image if targets is not None and target_index is not None: targets[i] = target_index image_sizes = [img.shape[-2:] for img in images] images = self.batch_images(images) image_sizes_list = torch.jit.annotate(List[Tuple[int, int]], []) for image_size in image_sizes: assert len(image_size) == 2 image_sizes_list.append((image_size[0], image_size[1])) image_list = ImageList(images, image_sizes_list) return image_list, targets
def forward(self, images, targets=None): for i in range(len(images)): image = images[i] target = targets[i] if targets is not None else targets if image.dim() != 3: raise ValueError("images is expected to be a list of 3d tensors " "of shape [C, H, W], got {}".format(image.shape)) # image = self.normalize(image) # image, target = self.resize(image, target) images[i] = image if targets is not None: targets[i] = target image_sizes = [img.shape[-2:] for img in images] images = self.batch_images(images) image_list = ImageList(images, image_sizes) return image_list, targets
def inference(model, input, transform, device="cuda"): input_image = Image.open(input) data = {'image': input_image} # data pre-processing data = transform(data) with torch.no_grad(): input_image_list = ImageList([data['image'].to(device)], image_sizes=[input_image.size[::-1]]) panoptic_result, _ = model.forward(input_image_list) semseg_logics = [o.to('cpu') for o in panoptic_result["semantic_segmentation_result"]] # Export the result output = input.replace("/data/", "/output/") os.makedirs(parent(output), exist_ok=True) assert os.path.exists(parent(output)) semseg_prob = [torch.argmax(semantic_logit, dim=0) for semantic_logit in semseg_logics] seg_vis = visualize_segmentation_image(semseg_prob[0], input_image, cityscapes_colormap_sky) Image.fromarray(seg_vis.astype('uint8')).save(output)
def forward(self, images, targets=None): for i in range(len(images)): if targets == None: img, target = self.rescale(images[i]) else: img, target = self.rescale(images[i], targets[i]) img = self.normalize(img) images[i] = img if targets: targets[i] = target images_batched = self.batchimages(images) imgs_size = [img.shape[-2:] for img in images] imgs_list = ImageList(images_batched, imgs_size) if targets: return imgs_list, targets return imgs_list
def forward(self, images, targets=None): # # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) images = [img for img in images] for idx, img in enumerate(images): target_index = targets[idx] if targets is not None else None img = self.normalize(img) img, target_index = self.resize(img, target_index) images[idx] = img if targets is not None and target_index is not None: assert img.shape[-2:] == target_index['masks'].shape[-2:] targets[idx] = target_index image_sizes = [img.shape[-2:] for img in images] images = self.batch_images(images) # image_sizes_list = torch.jit.annotate(List[Tuple[int, int]], []) # for image_size in image_sizes: # assert len(image_size) == 2 # image_sizes_list.append((image_size[0], image_size[1])) image_list = ImageList(images, image_sizes) return image_list, targets
def box_validation(box_head, test_loader,optimizer,epoch,backbone,rpn,keep_topK): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") epoch_loss = 0 epoch_clas_loss = 0 epoch_regr_loss = 0 # TODO double check following two values, just placehoder for now l = 5 effective_batch = 32 # suggestd 150 for 4 images for i,data in enumerate(test_loader): # imgs, label_list, mask_list, bbox_list, index_list = [data[key] for key in data.keys()] images = data['images'].to(device) boxes = data['bbox'] labels = data['labels'] with torch.no_grad(): backout = backbone(images) im_lis = ImageList(images, [(800, 1088)]*images.shape[0]) rpnout = rpn(im_lis, backout) proposals=[proposal[0:keep_topK,:] for proposal in rpnout[0]] fpn_feat_list= list(backout.values()) gt_labels, gt_regressor_target = box_head.create_ground_truth(proposals, labels, boxes) roi_align_result = box_head.MultiScaleRoiAlign(fpn_feat_list, proposals) # This is the input to Box head clas_out, regr_out = box_head.forward(roi_align_result.to(device)) loss, loss_c, loss_r = box_head.compute_loss(clas_out, regr_out, gt_labels, gt_regressor_target, l, effective_batch) epoch_loss += loss.item() epoch_clas_loss += loss_c.item() epoch_regr_loss += loss_r.item() #delete variables after usage to free GPU ram, double check if these variables are needed for future!!!!!!! del loss ,loss_c , loss_r del images, labels, boxes del clas_out, regr_out del gt_labels, gt_regressor_target torch.cuda.empty_cache() epoch_loss /= i epoch_clas_loss /= i epoch_regr_loss /= i return epoch_loss, epoch_clas_loss, epoch_regr_loss
def test_defaultbox_generator(self): images = torch.zeros(2, 3, 15, 15) features = [torch.zeros(2, 8, 1, 1)] image_shapes = [i.shape[-2:] for i in images] images = ImageList(images, image_shapes) model = self._init_test_defaultbox_generator() model.eval() dboxes = model(images, features) dboxes_output = torch.tensor([[6.3750, 6.3750, 8.6250, 8.6250], [4.7443, 4.7443, 10.2557, 10.2557], [5.9090, 6.7045, 9.0910, 8.2955], [6.7045, 5.9090, 8.2955, 9.0910]]) self.assertEqual(len(dboxes), 2) self.assertEqual(tuple(dboxes[0].shape), (4, 4)) self.assertEqual(tuple(dboxes[1].shape), (4, 4)) self.assertTrue(dboxes[0].allclose(dboxes_output)) self.assertTrue(dboxes[1].allclose(dboxes_output))
def test_defaultbox_generator(self): images = torch.zeros(2, 3, 15, 15) features = [torch.zeros(2, 8, 1, 1)] image_shapes = [i.shape[-2:] for i in images] images = ImageList(images, image_shapes) model = self._init_test_defaultbox_generator() model.eval() dboxes = model(images, features) dboxes_output = torch.tensor([[6.9750, 6.9750, 8.0250, 8.0250], [6.7315, 6.7315, 8.2685, 8.2685], [6.7575, 7.1288, 8.2425, 7.8712], [7.1288, 6.7575, 7.8712, 8.2425]]) self.assertEqual(len(dboxes), 2) self.assertEqual(tuple(dboxes[0].shape), (4, 4)) self.assertEqual(tuple(dboxes[1].shape), (4, 4)) self.assertTrue(dboxes[0].allclose(dboxes_output)) self.assertTrue(dboxes[1].allclose(dboxes_output))
def test_anchor_generator(self): images = torch.randn(2, 3, 16, 32) features = self.get_features(images) features = list(features.values()) image_shapes = [i.shape[-2:] for i in images] images = ImageList(images, image_shapes) model = self._init_test_anchor_generator() model.eval() anchors = model(images, features) # Compute target anchors numbers grid_sizes = [f.shape[-2:] for f in features] num_anchors_estimated = 0 for sizes, num_anchors_per_loc in zip( grid_sizes, model.num_anchors_per_location()): num_anchors_estimated += sizes[0] * sizes[1] * num_anchors_per_loc self.assertEqual(num_anchors_estimated, 126) self.assertEqual(len(anchors), 2) self.assertEqual(tuple(anchors[0].shape), (num_anchors_estimated, 4)) self.assertEqual(tuple(anchors[1].shape), (num_anchors_estimated, 4))
def test_defaultbox_generator(self): images = torch.zeros(2, 3, 15, 15) features = [torch.zeros(2, 8, 1, 1)] image_shapes = [i.shape[-2:] for i in images] images = ImageList(images, image_shapes) model = self._init_test_defaultbox_generator() model.eval() dboxes = model(images, features) dboxes_output = torch.tensor([ [6.3750, 6.3750, 8.6250, 8.6250], [4.7443, 4.7443, 10.2557, 10.2557], [5.9090, 6.7045, 9.0910, 8.2955], [6.7045, 5.9090, 8.2955, 9.0910] ]) assert len(dboxes) == 2 assert tuple(dboxes[0].shape) == (4, 4) assert tuple(dboxes[1].shape) == (4, 4) torch.testing.assert_close(dboxes[0], dboxes_output, rtol=1e-5, atol=1e-8) torch.testing.assert_close(dboxes[1], dboxes_output, rtol=1e-5, atol=1e-8)
with torch.no_grad(): # if epoch > 0: # box_head.gt_dict.clear() # gt_path = os.path.join('.', 'drive', 'My Drive', 'CIS 680', 'Faster_RCNN', 'saved_dicts3', # 'gt_val_dict.h5') # box_head.gt_dict = torch.load(gt_path) for j, data in enumerate(test_loader, 0): images, label, bbox, index = data # Take the features from the backbone backout = backbone(images.float()) # The RPN implementation takes as first argument the following image list im_lis = ImageList(images.float(), [(800, 1088)] * images.shape[0]) # Then we pass the image list and the backbone output through the rpn rpnout = rpn(im_lis, backout) # The final output is # A list of proposal tensors: list:len(bz){(keep_topK,4)} proposals = [proposal[0:keep_topK, :] for proposal in rpnout[0]] # A list of features produces by the backbone's FPN levels: list:len(FPN){(bz,256,H_feat,W_feat)} fpn_feat_list = list(backout.values()) del images, backout, rpnout, im_lis torch.cuda.empty_cache() feature_vectors = box_head.MultiScaleRoiAlign(fpn_feat_list, proposals, index)
def run_inference(images, index, backbone, rpn, boxHead): """ Run inference and visualization for one image :param images: :param index: :param backbone: :param rpn: :param boxHead: :return: """ with torch.no_grad(): # Take the features from the backbone backout = backbone(images) # The RPN implementation takes as first argument the following image list im_lis = ImageList(images, [(800, 1088)] * images.shape[0]) # Then we pass the image list and the backbone output through the rpn rpnout = rpn(im_lis, backout) # The final output is # A list of proposal tensors: list:len(bz){(keep_topK,4)} proposals = [proposal[0:keep_topK, :] for proposal in rpnout[0]] # A list of features produces by the backbone's FPN levels: list:len(FPN){(bz,256,H_feat,W_feat)} fpn_feat_list = list(backout.values()) feature_vectors = boxHead.MultiScaleRoiAlign(fpn_feat_list, proposals) class_logits, box_pred = boxHead(feature_vectors) class_logits = torch.softmax( class_logits, dim=1) # todo: check softmax is applied everywhere # convert proposal to xywh proposal_torch = torch.cat(proposals, dim=0) # x1 y1 x2 y2 proposal_xywh = torch.zeros_like(proposal_torch, device=proposal_torch.device) proposal_xywh[:, 0] = ((proposal_torch[:, 0] + proposal_torch[:, 2]) / 2) proposal_xywh[:, 1] = ((proposal_torch[:, 1] + proposal_torch[:, 3]) / 2) proposal_xywh[:, 2] = torch.abs(proposal_torch[:, 2] - proposal_torch[:, 0]) proposal_xywh[:, 3] = torch.abs(proposal_torch[:, 3] - proposal_torch[:, 1]) # decode output prob_simp, class_simp, box_simp = utils.simplifyOutputs( class_logits, box_pred) # box_decoded: format x1, y1, x2, y2 box_decoded = utils.decode_output(proposal_xywh, box_simp) # Do whaterver post processing you find performs best post_nms_prob, post_nms_class, post_nms_box = boxHead.postprocess_detections( prob_simp, class_simp, box_decoded, conf_thresh=0.8, keep_num_preNMS=200, keep_num_postNMS=3, IOU_thresh=0.5) # visualization: PostNMS assert post_nms_class.dim() == 1 assert post_nms_box.dim() == 2 return post_nms_prob, post_nms_class, post_nms_box
def forward(self, _images, targets=None, return_result=False): bs = _images.size(0) assert bs == 1 # Process images device = _images.device images = torch.zeros(1, 6, 3, 400, 400) for i in range(6): images[0, i] = self.img_transform(_images[0, i].cpu()) del _images images = images.to(device) # Process targets # label_index = targets[0]['labels'] == 2 # targets[0]['boxes'] = targets[0]['boxes'][label_index] # targets[0]['labels'] = targets[0]['labels'][label_index] targets = [{k: v for k, v in t.items()} for t in targets] targets[0]['old_boxes'] = targets[0]['boxes'] / 2. min_coordinates, _ = torch.min(targets[0]['boxes'], 2) max_coordinates, _ = torch.max(targets[0]['boxes'], 2) targets[0]['boxes'] = torch.cat([min_coordinates, max_coordinates], 1) temp_tensor = torch.zeros(1, 3, 800, 800) _, targets = self.target_transform(temp_tensor, targets) if self.training and targets is None: raise ValueError("In training mode, targets should be passed") original_image_sizes = torch.jit.annotate(List[Tuple[int, int]], []) for img in images: val = img.shape[-2:] assert len(val) == 2 original_image_sizes.append((val[0], val[1])) # images, targets = self.transform(images, targets) # HACK images = ImageList(images, ((400, 400), ) * images.size(0)) targets = [{ k: v.to(images.tensors.device) for k, v in t.items() if k != 'masks' } for t in targets] # Pass images from 6 camera angle to different backbone features_list = torch.stack([ self.backbone(images.tensors[:, i])['0'] for i in range(self.input_img_num) ], dim=1) feature_h, feature_w = features_list.size()[-2:] features_list = features_list.view( bs, self.backbone_out_channels * self.input_img_num, feature_h, feature_w) features = OrderedDict([('0', features_list)]) # if isinstance(features, torch.Tensor): # features = OrderedDict([('0', features)]) proposals, proposal_losses = self.rpn(images, features, targets) detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets) detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes) losses = {} losses.update(detector_losses) losses.update(proposal_losses) losses.update( {'loss_mask': torch.zeros(1, device=images.tensors.device)}) mask_ts = 0. mask_ts_numerator = 0 mask_ts_denominator = 1 with torch.no_grad(): # Get object detection threat score cpu_detections = [{k: v.cpu() for k, v in t.items()} for t in detections] # TODO: add threshold more than 0.5 detection_ts, detection_ts_numerator, detection_ts_denominator =\ get_detection_threat_score(cpu_detections, targets, 0.5) if return_result: # DEBUG masks = 0 # return losses, mask_ts, mask_ts_numerator,\ # mask_ts_denominator, detection_ts, detection_ts_numerator,\ # detection_ts_denominator, detections, masks return mask_ts, mask_ts_numerator,\ mask_ts_denominator, detection_ts, detection_ts_numerator,\ detection_ts_denominator, detections, masks else: # return losses, mask_ts, mask_ts_numerator, mask_ts_denominator,\ # detection_ts, detection_ts_numerator, detection_ts_denominator return losses
def box_train(box_head, train_loader,optimizer,epoch,backbone,rpn,keep_topK,l): start_time = time.time() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") epoch_loss = 0 epoch_clas_loss = 0 epoch_regr_loss = 0 running_loss =0 running_clas_loss =0 running_regr_loss =0 # TODO double check following two values, just placehoder for now effective_batch = 32 # used in TA's test case batch_loss = [] batch_loss_c = [] batch_loss_r = [] for i,data in enumerate(train_loader): optimizer.zero_grad() # images, labels, mask, boxes, indexes = [data[key] for key in data.keys()] # images = images.to(device) images = data['images'].to(device) indexes = data['index'] boxes = data['bbox'] labels = data['labels'] mask = data['masks'] # Take the features from the backbone with torch.no_grad(): backout = backbone(images) # The RPN implementation takes as first argument the following image list im_lis = ImageList(images, [(800, 1088)]*images.shape[0]) # Then we pass the image list and the backbone output through the rpn rpnout = rpn(im_lis, backout) #The final output is # A list of proposal tensors: list:len(bz){(keep_topK,4)} proposals=[proposal[0:keep_topK,:] for proposal in rpnout[0]] # A list of features produces by the backbone's FPN levels: list:len(FPN){(bz,256,H_feat,W_feat)} fpn_feat_list= list(backout.values()) # fpn_feat_list = [item.to('cpu') for item in fpn_feat_list] gt_labels, gt_regressor_target = box_head.create_ground_truth(proposals,labels,boxes) #TOdo check this line # proposals_roi = copy.deepcopy(proposals) roi_align_result = box_head.MultiScaleRoiAlign(fpn_feat_list,proposals) #This is the input to Box head clas_out, regr_out = box_head.forward(roi_align_result.to(device)) loss, loss_c, loss_r = box_head.compute_loss(clas_out,regr_out,gt_labels,gt_regressor_target,l,effective_batch) loss.backward() optimizer.step() epoch_loss += loss.item() epoch_clas_loss += loss_c.item() epoch_regr_loss += loss_r.item() running_loss += loss.item() running_clas_loss += loss_c.item() running_regr_loss += loss_r.item() # batch_loss.append(loss.item()) # batch_loss_c.append(loss_c.item()) # batch_loss_r.append(loss_r.item()) #print results every log_iter batch: # log_iter = 100 # if i % log_iter == (log_iter-1): # print every 100 mini-batches # print('[%d, %5d] total_loss: %.5f clas_loss: %.5f regr_loss: %.5f' % # (epoch + 1, i + 1, # running_loss / log_iter, # running_clas_loss / log_iter, # running_regr_loss / log_iter)) # # running_loss = 0 # running_clas_loss = 0 # running_regr_loss = 0 # print("--- %s minutes ---" % ((time.time() - start_time)/60)) # start_time = time.time() #delete variables after usage to free GPU ram, double check if these variables are needed for future!!!!!!! # del loss ,loss_c , loss_r # del images, labels, mask, boxes, indexes # del clas_out, regr_out # del gt_labels, gt_regressor_target # torch.cuda.empty_cache() # plt.figure() # plt.plot(batch_loss, label='Training') # plt.figure() # plt.plot(batch_loss_c, label='Training') # plt.figure() # plt.plot(batch_loss_r, label='Training') # plt.show() epoch_loss /= i epoch_clas_loss /= i epoch_regr_loss /= i # print('finished one epoch ') # exit() return epoch_loss, epoch_clas_loss, epoch_regr_loss
def forward(self, _images, _targets=None, return_result=False, return_losses=False): bs = _images.size(0) assert bs == 1 device = _images.device # Process images images = torch.zeros(1, 6, 3, 400, 400) depths = torch.zeros(1, 6, 3, 128, 416) for i in range(6): images[0, i] = self.img_transform(_images[0, i].cpu()) depths[0, i] = self.depth_transform(_images[0, i].cpu()) del _images images = images.to(device) depths = depths.to(device) # Get depth map depths = self.depth_estimator(depths.squeeze(0))[0] depths = self.depth_resize(depths.unsqueeze(1)) depths = depths.view(1, 6, 1, 400, 400) images = torch.cat((images, depths), dim=2) del depths # Process targets dis = torch.mean(_targets[0]['boxes'], dim=2) - torch.tensor( [400., 400.]) index_1 = torch.sqrt(torch.sum(torch.pow(dis, 2), dim=1)) < 300. index_2 = (_targets[0]['labels'] == 0) | (_targets[0]['labels'] == 2) |\ (_targets[0]['labels'] == 4) | (_targets[0]['labels'] == 5) label_index = index_1 * index_2 targets = [copy.deepcopy(_targets[0])] targets[0]['boxes'] = targets[0]['boxes'][label_index] targets[0]['labels'] = targets[0]['labels'][label_index] targets = [{k: v for k, v in t.items()} for t in targets] # targets[0]['old_boxes'] = targets[0]['boxes'] / 2. min_coordinates, _ = torch.min(targets[0]['boxes'], 2) max_coordinates, _ = torch.max(targets[0]['boxes'], 2) targets[0]['boxes'] = torch.cat([min_coordinates, max_coordinates], 1) temp_tensor = torch.zeros(1, 3, 800, 800) _, targets = self.target_transform(temp_tensor, targets) # type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) if self.training and targets is None: raise ValueError("In training mode, targets should be passed") original_image_sizes = torch.jit.annotate(List[Tuple[int, int]], []) for img in images: val = img.shape[-2:] assert len(val) == 2 original_image_sizes.append((val[0], val[1])) # images, targets = self.transform(images, targets) device = images.device images = ImageList(images, ((400, 400), ) * images.size(0)) target_masks = torch.stack( [t['masks'].float().to(device) for t in targets]) targets = [{k: v.to(device) for k, v in t.items() if k != 'masks'} for t in targets] # Mask backbone features_list = torch.stack([ self.backbone(images.tensors[:, i]) for i in range(self.input_img_num) ], dim=1) feature_h, feature_w = features_list.size()[-2:] combined_feature_map = features_list.view(bs, self.input_img_num, feature_h, feature_w) masks, mask_losses = self.mask_net(combined_feature_map, target_masks) del features_list torch.cuda.empty_cache() # Detction backbone features_list = torch.stack([ self.backbone_(images.tensors[:, i]) for i in range(self.input_img_num) ], dim=1) feature_h, feature_w = features_list.size()[-2:] detection_combined_feature_map = features_list.view( bs, 64 * self.input_img_num, 400, 400) del features_list torch.cuda.empty_cache() road_map_features = OrderedDict([('0', combined_feature_map)]) detection_features = OrderedDict([('0', detection_combined_feature_map) ]) proposals, proposal_losses = self.rpn(images, road_map_features, targets) # try: # detections, detector_losses = self.roi_heads(detection_features, proposals, images.image_sizes, targets) # detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes) # except RuntimeError as e: # print(e) # detections = None # detector_losses = { # 'loss_box_reg': torch.zeros(1), # 'loss_classifier': torch.zeros(1)} detections, detector_losses = self.roi_heads(detection_features, proposals, images.image_sizes, targets) detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes) losses = {} losses.update(detector_losses) losses.update(proposal_losses) losses.update(mask_losses) if return_result: return masks, detections else: return losses
num_bbox_class.append(num_class2) num_class3 = torch.count_nonzero(bbox_list[0] == 3) num_bbox_class.append(num_class3) image = transforms.functional.normalize(img[0].cpu().detach(), [-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.225], [1 / 0.229, 1 / 0.224, 1 / 0.225], inplace=False) image_vis = image.permute(1, 2, 0).cpu().detach().numpy() num_grnd_box = len(bbox_list) # Take the features from the backbone backout = backbone(img) # The RPN implementation takes as first argument the following image list im_lis = ImageList(img, [(800, 1088)] * img.shape[0]) rpnout = rpn(im_lis, backout) # The final output is a list of proposal tensors: list:len(bz){(keep_topK,4)} proposals = [proposal[0:keep_topK_check, :] for proposal in rpnout[0]] # generate gt labels labels, regressor_target = box_head.create_ground_truth(proposals, label_list, bbox_list) #tx,ty,tw,twh labels = labels.flatten() # A list of features produces by the backbone's FPN levels: list:len(FPN){(bz,256,H_feat,W_feat)} fpn_feat_list = list(backout.values()) proposal_torch = torch.cat(proposals, dim=0) # x1 y1 x2 y2 proposal_xywh = torch.zeros_like(proposal_torch, device=proposal_torch.device) proposal_xywh[:, 0] = ((proposal_torch[:, 0] + proposal_torch[:, 2]) / 2) proposal_xywh[:, 1] = ((proposal_torch[:, 1] + proposal_torch[:, 3]) / 2) proposal_xywh[:, 2] = torch.abs(proposal_torch[:, 2] - proposal_torch[:, 0]) proposal_xywh[:, 3] = torch.abs(proposal_torch[:, 3] - proposal_torch[:, 1])
def proposal_confusion_matrix(loader): """Returns mean loss per sample in loader""" TP, FP, TN, FN = 0, 0, 0, 0 with torch.no_grad(): for idx, (batch, pad_lengths) in enumerate(loader): images, masks, bboxes, labels = batch images = images.to(device=DEVICE, dtype=torch.float) bboxes = bboxes.to(device=DEVICE, dtype=torch.float) labels = labels.to(device=DEVICE, dtype=torch.float) backbone_out = BACKBONE(images) img_list = ImageList( images, list(itertools.repeat((TARGET_HEIGHT, TARGET_WIDTH), len(images)))) rpn_proposals = RPN(img_list, backbone_out)[0] sel_pos_proposals = [1]*len(images) sel_pos_bboxes = [1]*len(images) sel_pos_labels = [1]*len(images) sel_neg_proposals = [1]*len(images) sel_neg_bboxes = [1]*len(images) sel_neg_labels = [1]*len(images) # Sample rpn proposals for positive and negative proposals for ix, proposals in enumerate(rpn_proposals): ground_truth = sample_ground_truth( proposals, bboxes[ix][:pad_lengths["bboxes"][ix]], labels[ix][:pad_lengths["labels"][ix]], iou_thresh=0.5) positive_proposals, positive_bboxes, positive_labels, negative_proposals, negative_bboxes, negative_labels = ground_truth # Positive samples sel_pos_proposals[ix] = positive_proposals sel_pos_bboxes[ix] = positive_bboxes sel_pos_labels[ix] = positive_labels # Negatives samples sel_neg_proposals[ix] = negative_proposals sel_neg_bboxes[ix] = negative_bboxes sel_neg_labels[ix] = negative_labels sel_proposals = sel_pos_proposals + filter_none(sel_neg_proposals) sel_bboxes = sel_pos_bboxes + filter_none(sel_neg_bboxes) sel_labels = sel_pos_labels + filter_none(sel_neg_labels) # ROI Align roi_aligned_proposals = torchvision.ops.roi_align( backbone_out[0], sel_proposals, (7,7), spatial_scale=1./4., sampling_ratio=4) sel_proposals = torch.cat(sel_proposals, dim=0) sel_bboxes = torch.cat(sel_bboxes, dim=0) sel_labels = torch.cat(sel_labels, dim=0) sel_pos_proposals = torch.cat(sel_pos_proposals, dim=0) sel_pos_bboxes = torch.cat(sel_pos_bboxes, dim=0) sel_pos_labels = torch.cat(sel_pos_labels, dim=0) # the total num. of positive proposals n_pos_proposals = len(sel_pos_proposals) # Roi Aligned into Intermediate then Regressor/Classifier roi_out = ROI_NET(roi_aligned_proposals) class_out = CLASS_NET(roi_out) pred_probs, pred_classes = torch.softmax(class_out, dim=1).max(dim=1) tp = (pred_classes[(sel_labels != 0).nonzero().squeeze()] == sel_labels[(sel_labels != 0).nonzero().squeeze()]).sum(0).item() fp = (pred_classes != 0).sum(0).item() - tp tn = (pred_classes[(sel_labels == 0).nonzero().squeeze()] == sel_labels[(sel_labels == 0).nonzero().squeeze()]).sum(0).item() fn = (pred_classes == 0).sum(0).item() - tn TP += tp FP += fp TN += tn FN += fn accuracy = (TP + TN) / (TP + FP + TN + FN) precision = TP / (TP + FP) recall = TP / (TP + FN) return accuracy, precision, recall
for idx, (batch, pad_lengths) in enumerate(tiny_loader): # zero the parameter gradients optimizer.zero_grad() images, masks, bboxes, labels = batch images = images.to(device=DEVICE, dtype=torch.float) bboxes = bboxes.to(device=DEVICE, dtype=torch.float) labels = labels.to(device=DEVICE, dtype=torch.float) masks = masks.to(device=DEVICE, dtype=torch.float) backbone_out = BACKBONE(images) img_list = ImageList( images, list(itertools.repeat((TARGET_HEIGHT, TARGET_WIDTH), len(images)))) rpn_proposals = RPN(img_list, backbone_out)[0] sel_pos_proposals = [1]*len(images) sel_pos_bboxes = [1]*len(images) sel_pos_labels = [1]*len(images) POS_CT = [1]*len(images) sel_neg_proposals = [1]*len(images) sel_neg_bboxes = [1]*len(images) sel_neg_labels = [1]*len(images) # Sample rpn proposals for positive and negative proposals for ix, proposals in enumerate(rpn_proposals):
test_loader = test_build_loader.loader() # Here we keep the top 20, but during training you # should keep around 200 boxes from the 1000 proposals keep_topK = 20 with torch.no_grad(): for iter, batch in enumerate(test_loader, 0): images, *other = batch images = images.to(device) # Take the features from the backbone backout = backbone(images) # The RPN implementation takes as first argument the following image list im_lis = ImageList(images, [(800, 1088)] * images.shape[0]) # Then we pass the image list and the backbone output through the rpn rpnout = rpn(im_lis, backout) # The final output is # A list of proposal tensors: # list:len(bz){(keep_topK,4)} proposals = [proposal[0:keep_topK, :] for proposal in rpnout[0]] # A list of features produces by the backbone's FPN levels: # list:len(FPN){(bz,256,H_feat,W_feat)} fpn_feat_list = list(backout.values()) print("For the proposals We have a list containing " + str(len(proposals)) + " tensors") print("Each one with shape " + str(proposals[0].shape)) print("")
def __init__(self, model, loss, metrics, optimizer, config, data_loader, valid_data_loader=None, lr_scheduler=None, len_epoch=None): super().__init__(model, loss, metrics, optimizer, config) self.config = config self.data_loader = data_loader if len_epoch is None: # epoch-based training self.len_epoch = len(self.data_loader) else: # iteration-based training self.data_loader = inf_loop(data_loader) self.len_epoch = len_epoch self.valid_data_loader = valid_data_loader self.do_validation = self.valid_data_loader is not None self.lr_scheduler = lr_scheduler self.log_step = int(np.sqrt(data_loader.batch_size)) # conv_size = [5, 10, 20, 40, 80, 160] conv_size = [160, 80, 40, 20, 10, 5] anchors = [] # stride = 128 stride = 4 colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255), (255, 0, 255)] col_idx = 0 image = np.zeros((640, 640, 3), np.uint8) img_list = ImageList(image, [(640, 640)]) for fm_size in conv_size: # anchor_gene = AnchorGenerator(sizes=((stride * 4),), aspect_ratios=((1.0),)) anchor = Generate_Anchors(stride, fm_size) # anchor = anchor_gene(img_list, torch.randn(1, fm_size, fm_size))[0] # anchors.append(anchor_gene(img_list, torch.randn(1, fm_size, fm_size))[0]) stride *= 2 for box in anchor: # x1 = box[1] # y1 = box[0] # x2 = box[3] # y2 = box[2] x1 = box[1] y1 = box[0] x2 = box[3] y2 = box[2] print(x1, y1, x2, y2) cv2.rectangle(image, (x1, y1), (x2, y2), colors[col_idx], 2) col_idx += 1 anchors = torch.cat(anchors, 0) anchors[0] gt_box = torch.tensor([71., 42., 90., 62.], dtype=torch.float32) jaccard(gt_box, anchors) img = cv2.resize(image, (1000, 1000)) cv2.imshow("anchors", img) cv2.waitKey() cv2.destroyAllWindows()
def do_eval(dataloader, checkpoint_file, device, result_dir=None, keep_topK=200, keep_num_preNMS=50, keep_num_postNMS=5): if result_dir is not None: os.makedirs(result_dir, exist_ok=True) os.makedirs("PreNMS", exist_ok=True) os.makedirs("PostNMS", exist_ok=True) # =========================== Pretrained =============================== # Put the path were you save the given pretrained model pretrained_path = '../pretrained/checkpoint680.pth' backbone, rpn = pretrained_models_680(pretrained_path) backbone = backbone.to(device) rpn = rpn.to(device) # ========================= Loading Model ============================== boxHead = BoxHead(Classes=3, P=7, device=device).to(device) if torch.cuda.is_available(): checkpoint = torch.load(checkpoint_file) else: checkpoint = torch.load(checkpoint_file, map_location=torch.device('cpu')) print("[INFO] Weight loaded from checkpoint file: {}".format( checkpoint_file)) boxHead.load_state_dict(checkpoint['model_state_dict']) boxHead.eval() # set to eval mode # ============================ Eval ================================ for iter, data in enumerate(tqdm(dataloader), 0): img = data['images'].to(device) batch_size = img.shape[0] label_list = [x.to(device) for x in data['labels']] mask_list = [x.to(device) for x in data['masks']] bbox_list = [x.to(device) for x in data['bbox']] # index_list = data['index'] img_shape = (img.shape[2], img.shape[3]) with torch.no_grad(): backout = backbone(img) im_lis = ImageList(img, [(800, 1088)] * img.shape[0]) rpnout = rpn(im_lis, backout) proposals = [proposal[0:keep_topK, :] for proposal in rpnout[0]] fpn_feat_list = list(backout.values()) feature_vectors = boxHead.MultiScaleRoiAlign( fpn_feat_list, proposals) class_logits, box_pred = boxHead(feature_vectors) class_logits = torch.softmax(class_logits, dim=1) proposal_torch = torch.cat(proposals, dim=0) # x1 y1 x2 y2 proposal_xywh = torch.zeros_like(proposal_torch, device=proposal_torch.device) proposal_xywh[:, 0] = ((proposal_torch[:, 0] + proposal_torch[:, 2]) / 2) proposal_xywh[:, 1] = ((proposal_torch[:, 1] + proposal_torch[:, 3]) / 2) proposal_xywh[:, 2] = torch.abs(proposal_torch[:, 2] - proposal_torch[:, 0]) proposal_xywh[:, 3] = torch.abs(proposal_torch[:, 3] - proposal_torch[:, 1]) result_prob, result_class, result_box = simplifyOutputs( class_logits, box_pred) box_decoded = decode_output(proposal_xywh, result_box) post_nms_prob, post_nms_class, post_nms_box = boxHead.postprocess_detections( result_prob, result_class, box_decoded, IOU_thresh=0.5, conf_thresh=0.5, keep_num_preNMS=keep_num_preNMS, keep_num_postNMS=keep_num_postNMS)
def forward(self_module, images, features): images = ImageList(images, [i.shape[-2:] for i in images]) return self_module.rpn(images, features)
def first_filter(self, x, images): image_sizes = [img.shape[-2:] for img in images] scores = self.rpn.first_filter(ImageList(images, image_sizes), OrderedDict([(0, x)])) #.reshape(-1,6) return scores
def __init__(self_module, images): super(RPNModule, self_module).__init__() self_module.rpn = self._init_test_rpn() self_module.images = ImageList(images, [i.shape[-2:] for i in images])
def forward(self, x, images, first=None): image_sizes = [img.shape[-2:] for img in images] proposals, scores = self.rpn(ImageList(images, image_sizes), OrderedDict([(0, x)]), first=first) #.reshape(-1,6) return proposals, scores