def forward(self, x): if self.training: with EventStorage() as storage: out = self.model(x) else: self.model.train() with torch.no_grad(), EventStorage() as storage: out = self.model(x) self.model.eval() return out
def train(self, start_iter: int, max_iter: int): """ Args: start_iter, max_iter (int): See docs above """ logger = logging.getLogger(__name__) logger.info("Starting training from iteration {}".format(start_iter)) self.iter = self.start_iter = start_iter self.max_iter = max_iter with EventStorage(start_iter) as self.storage: try: self.before_train() for self.iter in range(start_iter, max_iter): self.before_step() self.run_step() self.after_step() # self.iter == max_iter can be used by `after_train` to # tell whether the training successfully finished or failed # due to exceptions. self.iter += 1 except Exception: logger.exception("Exception during training:") raise finally: self.after_train()
def train(self, start_iter: int, max_iter: int): """ Args: start_iter, max_iter (int): See docs above """ logger = logging.getLogger(__name__) logger.info("Starting training from iteration {}".format(start_iter)) self.iter = self.start_iter = start_iter self.max_iter = max_iter logger = logging.getLogger(__name__) with EventStorage(start_iter) as self.storage: try: # logger.info('into before train') self.before_train() logger.info(f'into training {start_iter}->{max_iter}') for self.iter in range(start_iter, max_iter): # logger.info(f'{self.iter} stepping') self.before_step() self.run_step() # logger.info('into after step') self.after_step() # logger.info('stepping finish') finally: # logger.info('finally into after train') self.after_train()
def train(self, start_iter: int, max_iter: int): """ Args: start_iter, max_iter (int): See docs above """ losses = [] logger = logging.getLogger(__name__) logger.info("Starting training from iteration {}".format(start_iter)) self.iter = self.start_iter = start_iter self.max_iter = max_iter loss_cnt = 0 with EventStorage(start_iter) as self.storage: try: self.before_train() for self.iter in range(start_iter, max_iter): self.before_step() loss = self.run_step() losses.append(loss) loss_cnt += 1 if loss_cnt % 10 == 0: print("has got {} losses, still need {} ".format( loss_cnt, self.max_iter - loss_cnt)) self.after_step() finally: self.after_train() return losses
def test_load_ema_weights(self, tmp_dir): cfg = self._get_cfg(tmp_dir) cfg.MODEL_EMA.ENABLED = True task = GeneralizedRCNNTask(cfg) checkpoint_callback = ModelCheckpoint(dirpath=task.cfg.OUTPUT_DIR, save_last=True) trainer = pl.Trainer( max_steps=1, limit_train_batches=1, num_sanity_val_steps=0, callbacks=[checkpoint_callback], ) with EventStorage() as storage: task.storage = storage trainer.fit(task) # load EMA weights from checkpoint task2 = GeneralizedRCNNTask.load_from_checkpoint( os.path.join(tmp_dir, "last.ckpt")) self.assertTrue( self._compare_state_dict(task.ema_state.state_dict(), task2.ema_state.state_dict())) # apply EMA weights to model task2.ema_state.apply_to(task2.model) self.assertTrue( self._compare_state_dict(task.ema_state.state_dict(), task2.model.state_dict()))
def test_fast_rcnn(self): torch.manual_seed(132) box_head_output_size = 8 box_predictor = FastRCNNOutputLayers( ShapeSpec(channels=box_head_output_size), box2box_transform=Box2BoxTransform(weights=(10, 10, 5, 5)), num_classes=5, ) feature_pooled = torch.rand(2, box_head_output_size) predictions = box_predictor(feature_pooled) proposal_boxes = torch.tensor([[0.8, 1.1, 3.2, 2.8], [2.3, 2.5, 7, 8]], dtype=torch.float32) gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32) proposal = Instances((10, 10)) proposal.proposal_boxes = Boxes(proposal_boxes) proposal.gt_boxes = Boxes(gt_boxes) proposal.gt_classes = torch.tensor([1, 2]) with EventStorage(): # capture events in a new storage to discard them losses = box_predictor.losses(predictions, [proposal]) expected_losses = { "loss_cls": torch.tensor(1.7951188087), "loss_box_reg": torch.tensor(4.0357131958), } for name in expected_losses.keys(): assert torch.allclose(losses[name], expected_losses[name])
def test_fast_rcnn_rotated(self): torch.manual_seed(132) box_head_output_size = 8 box_predictor = RotatedFastRCNNOutputLayers( ShapeSpec(channels=box_head_output_size), box2box_transform=Box2BoxTransformRotated(weights=(10, 10, 5, 5, 1)), num_classes=5, ) feature_pooled = torch.rand(2, box_head_output_size) predictions = box_predictor(feature_pooled) proposal_boxes = torch.tensor( [[2, 1.95, 2.4, 1.7, 0], [4.65, 5.25, 4.7, 5.5, 0]], dtype=torch.float32) gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32) proposal = Instances((10, 10)) proposal.proposal_boxes = RotatedBoxes(proposal_boxes) proposal.gt_boxes = RotatedBoxes(gt_boxes) proposal.gt_classes = torch.tensor([1, 2]) with EventStorage(): # capture events in a new storage to discard them losses = box_predictor.losses(predictions, [proposal]) # Note: the expected losses are slightly different even if # the boxes are essentially the same as in the FastRCNNOutput test, because # bbox_pred in FastRCNNOutputLayers have different Linear layers/initialization # between the two cases. expected_losses = { "loss_cls": torch.tensor(1.7920907736), "loss_box_reg": torch.tensor(4.0410838127), } for name in expected_losses.keys(): assert torch.allclose(losses[name], expected_losses[name])
def initialize_from_support(trainer_self): class_means = defaultdict(list) class_activations = defaultdict(list) print('Computing support set centroids') # Make sure this doesn't break on multigpu # Disable default Collate function support_loader = torch.utils.data.DataLoader( trainer_self.data_loader.dataset.dataset, batch_size=trainer_self.data_loader.batch_size, shuffle=False, num_workers=4, collate_fn=lambda x: x) with EventStorage() as storage: for i, batched_inputs in enumerate(support_loader): #for i, batched_inputs in enumerate(trainer_self.data_loader): print('Processed {} batches'.format(i)) self = trainer_self.model images = self.preprocess_image(batched_inputs) gt_instances = [ x["instances"].to(self.device) for x in batched_inputs ] features = self.backbone(images.tensor) proposals, proposal_losses = self.proposal_generator( images, features, gt_instances) proposals = self.roi_heads.label_and_sample_proposals( proposals, gt_instances) # Average box deatures here gt_as_proposals = append_gt_as_proposal(gt_instances) losses, box_features = self.roi_heads._forward_box( features, gt_as_proposals, gt_instances, return_box_features=True) box_features_idx = 0 for instances in gt_as_proposals: for gt_class in instances.gt_classes: category_id = gt_class.item() activation = box_features[box_features_idx] class_activations[category_id].append( activation.detach().cpu()) box_features_idx += 1 for category_id in class_activations: class_activations[category_id] = torch.stack( class_activations[category_id]) class_means[category_id] = class_activations[category_id].mean( dim=0) print('Category: #{}, shape: {}'.format( category_id, class_activations[category_id].size())) pass
def train(self, patience=3): """ Args: start_iter, max_iter (int): See docs above """ logger = logging.getLogger(__name__) logger.info("Starting training from iteration {}".format( self.start_iter)) self.iter = start_iter = self.start_iter max_iter = self.max_iter = self.cfg.SOLVER.MAX_ITER from detectron2.utils.events import EventStorage with EventStorage(start_iter) as self.storage: try: self.before_train() print('start_iter, max_iter', start_iter, max_iter) for self.iter in range(start_iter, max_iter): self.before_step() self.run_step() self.after_step() if (self.iter + 1 ) % self.cfg.TEST.EVAL_PERIOD == 0 and self.early_stop( patience): break finally: self.after_train()
def training_step(self, batch, batch_idx): data_time = time.perf_counter() - self.data_start # Need to manually enter/exit since trainer may launch processes # This ideally belongs in setup, but setup seems to run before processes are spawned if self.storage is None: self.storage = EventStorage(0) self.storage.__enter__() self.iteration_timer.trainer = weakref.proxy(self) self.iteration_timer.before_step() self.writers = ( default_writers(self.cfg.OUTPUT_DIR, self.max_iter) if comm.is_main_process() else {} ) loss_dict = self.model(batch) SimpleTrainer.write_metrics(loss_dict, data_time) opt = self.optimizers() self.storage.put_scalar( "lr", opt.param_groups[self._best_param_group_id]["lr"], smoothing_hint=False ) self.iteration_timer.after_step() self.storage.step() # A little odd to put before step here, but it's the best way to get a proper timing self.iteration_timer.before_step() if self.storage.iter % 20 == 0: for writer in self.writers: writer.write() return sum(loss_dict.values())
def test_load_from_checkpoint(self) -> None: with tempfile.TemporaryDirectory() as tmp_dir: task = GeneralizedRCNNTask(self._get_cfg(tmp_dir)) from stl.lightning.callbacks.model_checkpoint import ModelCheckpoint checkpoint_callback = ModelCheckpoint( directory=task.cfg.OUTPUT_DIR, has_user_data=False) params = { "max_steps": 1, "limit_train_batches": 1, "num_sanity_val_steps": 0, "checkpoint_callback": checkpoint_callback, } trainer = pl.Trainer(**params) with EventStorage() as storage: task.storage = storage trainer.fit(task) ckpt_path = os.path.join(tmp_dir, "test.ckpt") trainer.save_checkpoint(ckpt_path) self.assertTrue(os.path.exists(ckpt_path)) # load model weights from checkpoint task2 = GeneralizedRCNNTask.load_from_checkpoint(ckpt_path) self.assertTrue( self._compare_state_dict(task.model.state_dict(), task2.model.state_dict()))
def update_stats(self): """ Update the model with precise statistics. Users can manually call this method. """ if self._disabled: return if self._data_iter is None: self._data_iter = iter(self._data_loader) def data_loader(): for num_iter in itertools.count(1): if num_iter % 100 == 0: self._logger.info( "Running precise-BN ... {}/{} iterations.".format(num_iter, self._num_iter) ) # This way we can reuse the same iterator yield next(self._data_iter) with EventStorage(): # capture events in a new storage to discard them self._logger.info( "Running precise-BN for {} iterations... ".format(self._num_iter) + "Note that this could produce different statistics every time." ) update_bn_stats(self._model, data_loader(), self._num_iter)
def test_build_model(self, tmp_dir): cfg = self._get_cfg(tmp_dir) cfg.MODEL_EMA.ENABLED = True task = GeneralizedRCNNTask(cfg) trainer = self._get_trainer(tmp_dir) with EventStorage() as storage: task.storage = storage trainer.fit(task) # test building untrained model model = GeneralizedRCNNTask.build_model(cfg) self.assertTrue(model.training) # test loading regular weights with temp_defrost(cfg): cfg.MODEL.WEIGHTS = os.path.join(tmp_dir, "last.ckpt") model = GeneralizedRCNNTask.build_model(cfg, eval_only=True) self.assertFalse(model.training) self.assertTrue( self._compare_state_dict(model.state_dict(), task.model.state_dict())) # test loading EMA weights with temp_defrost(cfg): cfg.MODEL.WEIGHTS = os.path.join(tmp_dir, "last.ckpt") cfg.MODEL_EMA.USE_EMA_WEIGHTS_FOR_EVAL_ONLY = True model = GeneralizedRCNNTask.build_model(cfg, eval_only=True) self.assertFalse(model.training) self.assertTrue( self._compare_state_dict(model.state_dict(), task.ema_state.state_dict()))
def test_load_ema_weights(self, tmp_dir): cfg = self._get_cfg(tmp_dir) cfg.MODEL_EMA.ENABLED = True task = GeneralizedRCNNTask(cfg) trainer = self._get_trainer(tmp_dir) with EventStorage() as storage: task.storage = storage trainer.fit(task) # load EMA weights from checkpoint task2 = GeneralizedRCNNTask.load_from_checkpoint( os.path.join(tmp_dir, "last.ckpt") ) self.assertTrue( self._compare_state_dict( task.ema_state.state_dict(), task2.ema_state.state_dict() ) ) # apply EMA weights to model task2.ema_state.apply_to(task2.model) self.assertTrue( self._compare_state_dict( task.ema_state.state_dict(), task2.model.state_dict() ) )
def test_rpn(self): torch.manual_seed(121) cfg = get_cfg() cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RPN" cfg.MODEL.ANCHOR_GENERATOR.NAME = "DefaultAnchorGenerator" cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1) backbone = build_backbone(cfg) proposal_generator = build_proposal_generator(cfg, backbone.output_shape()) num_images = 2 images_tensor = torch.rand(num_images, 20, 30) image_sizes = [(10, 10), (20, 30)] images = ImageList(images_tensor, image_sizes) image_shape = (15, 15) num_channels = 1024 features = {"res4": torch.rand(num_images, num_channels, 1, 2)} gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32) gt_instances = Instances(image_shape) gt_instances.gt_boxes = Boxes(gt_boxes) with EventStorage(): # capture events in a new storage to discard them proposals, proposal_losses = proposal_generator( images, features, gt_instances) expected_losses = { "loss_rpn_cls": torch.tensor(0.0804563984), "loss_rpn_loc": torch.tensor(0.0990132466), } for name in expected_losses.keys(): assert torch.allclose(proposal_losses[name], expected_losses[name]) expected_proposal_boxes = [ Boxes(torch.tensor([[0, 0, 10, 10], [7.3365392685, 0, 10, 10]])), Boxes( torch.tensor([ [0, 0, 30, 20], [0, 0, 16.7862777710, 13.1362524033], [0, 0, 30, 13.3173446655], [0, 0, 10.8602609634, 20], [7.7165775299, 0, 27.3875980377, 20], ])), ] expected_objectness_logits = [ torch.tensor([0.1225359365, -0.0133192837]), torch.tensor([ 0.1415634006, 0.0989848152, 0.0565387346, -0.0072308783, -0.0428492837 ]), ] for i in range(len(image_sizes)): assert len(proposals[i]) == len(expected_proposal_boxes[i]) assert proposals[i].image_size == (image_sizes[i][0], image_sizes[i][1]) assert torch.allclose(proposals[i].proposal_boxes.tensor, expected_proposal_boxes[i].tensor) assert torch.allclose(proposals[i].objectness_logits, expected_objectness_logits[i])
def test_rpn(self): torch.manual_seed(121) cfg = get_cfg() backbone = build_backbone(cfg) proposal_generator = RPN(cfg, backbone.output_shape()) num_images = 2 images_tensor = torch.rand(num_images, 20, 30) image_sizes = [(10, 10), (20, 30)] images = ImageList(images_tensor, image_sizes) image_shape = (15, 15) num_channels = 1024 features = {"res4": torch.rand(num_images, num_channels, 1, 2)} gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32) gt_instances = Instances(image_shape) gt_instances.gt_boxes = Boxes(gt_boxes) with EventStorage(): # capture events in a new storage to discard them proposals, proposal_losses = proposal_generator( images, features, [gt_instances[0], gt_instances[1]] ) expected_losses = { "loss_rpn_cls": torch.tensor(0.0804563984), "loss_rpn_loc": torch.tensor(0.0990132466), } for name in expected_losses.keys(): err_msg = "proposal_losses[{}] = {}, expected losses = {}".format( name, proposal_losses[name], expected_losses[name] ) self.assertTrue(torch.allclose(proposal_losses[name], expected_losses[name]), err_msg) expected_proposal_boxes = [ Boxes(torch.tensor([[0, 0, 10, 10], [7.3365392685, 0, 10, 10]])), Boxes( torch.tensor( [ [0, 0, 30, 20], [0, 0, 16.7862777710, 13.1362524033], [0, 0, 30, 13.3173446655], [0, 0, 10.8602609634, 20], [7.7165775299, 0, 27.3875980377, 20], ] ) ), ] expected_objectness_logits = [ torch.tensor([0.1225359365, -0.0133192837]), torch.tensor([0.1415634006, 0.0989848152, 0.0565387346, -0.0072308783, -0.0428492837]), ] for proposal, expected_proposal_box, im_size, expected_objectness_logit in zip( proposals, expected_proposal_boxes, image_sizes, expected_objectness_logits ): self.assertEqual(len(proposal), len(expected_proposal_box)) self.assertEqual(proposal.image_size, im_size) self.assertTrue( torch.allclose(proposal.proposal_boxes.tensor, expected_proposal_box.tensor) ) self.assertTrue(torch.allclose(proposal.objectness_logits, expected_objectness_logit))
def test_rroi_heads(self): torch.manual_seed(121) cfg = get_cfg() cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN" cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator" cfg.MODEL.ROI_HEADS.NAME = "RROIHeads" cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1) cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead" cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated" cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1) num_images = 2 images_tensor = torch.rand(num_images, 20, 30) image_sizes = [(10, 10), (20, 30)] images = ImageList(images_tensor, image_sizes) num_channels = 1024 features = {"res4": torch.rand(num_images, num_channels, 1, 2)} feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)} image_shape = (15, 15) gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]], dtype=torch.float32) gt_instance0 = Instances(image_shape) gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0) gt_instance0.gt_classes = torch.tensor([2, 1]) gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]], dtype=torch.float32) gt_instance1 = Instances(image_shape) gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1) gt_instance1.gt_classes = torch.tensor([1, 2]) gt_instances = [gt_instance0, gt_instance1] proposal_generator = build_proposal_generator(cfg, feature_shape) roi_heads = build_roi_heads(cfg, feature_shape) with EventStorage(): # capture events in a new storage to discard them proposals, proposal_losses = proposal_generator( images, features, gt_instances) _, detector_losses = roi_heads(images, features, proposals, gt_instances) detector_losses.update(proposal_losses) expected_losses = { "loss_cls": 4.365657806396484, "loss_box_reg": 0.0015851043863222003, "loss_rpn_cls": 0.2427729219198227, "loss_rpn_loc": 0.3646621108055115, } succ = all( torch.allclose(detector_losses[name], torch.tensor(expected_losses.get(name, 0.0))) for name in detector_losses.keys()) self.assertTrue( succ, "Losses has changed! New losses: {}".format( {k: v.item() for k, v in detector_losses.items()}), )
def __init__(self, uncertainty=True): super(MaskRCNNWithPokeHead, self).__init__(make_rpn50_fpn_config()) self.poking_head = nn.Sequential( nn.Conv2d(256, 64, kernel_size=1, bias=False), nn.BatchNorm2d(64), nn.ReLU(), nn.Conv2d(64, 2, kernel_size=1)) self.poking_loss = MaskPokingLoss(uncertainty) self.event_storage = EventStorage()
def test_roi_heads(self): torch.manual_seed(121) cfg = get_cfg() cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignV2" cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5) cfg.MODEL.MASK_ON = True num_images = 2 images_tensor = torch.rand(num_images, 20, 30) image_sizes = [(10, 10), (20, 30)] images = ImageList(images_tensor, image_sizes) num_channels = 1024 features = {"res4": torch.rand(num_images, num_channels, 1, 2)} feature_shape = {"res4": ShapeSpec(channels=num_channels, stride=16)} image_shape = (15, 15) gt_boxes0 = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]], dtype=torch.float32) gt_instance0 = Instances(image_shape) gt_instance0.gt_boxes = Boxes(gt_boxes0) gt_instance0.gt_classes = torch.tensor([2, 1]) gt_instance0.gt_masks = BitMasks(torch.rand((2, ) + image_shape) > 0.5) gt_boxes1 = torch.tensor([[1, 5, 2, 8], [7, 3, 10, 5]], dtype=torch.float32) gt_instance1 = Instances(image_shape) gt_instance1.gt_boxes = Boxes(gt_boxes1) gt_instance1.gt_classes = torch.tensor([1, 2]) gt_instance1.gt_masks = BitMasks(torch.rand((2, ) + image_shape) > 0.5) gt_instances = [gt_instance0, gt_instance1] proposal_generator = build_proposal_generator(cfg, feature_shape) roi_heads = StandardROIHeads(cfg, feature_shape) with EventStorage(): # capture events in a new storage to discard them proposals, proposal_losses = proposal_generator( images, features, gt_instances) _, detector_losses = roi_heads(images, features, proposals, gt_instances) detector_losses.update(proposal_losses) expected_losses = { "loss_cls": 4.5253729820251465, "loss_box_reg": 0.009785720147192478, "loss_mask": 0.693184494972229, "loss_rpn_cls": 0.08186662942171097, "loss_rpn_loc": 0.1104838103055954, } succ = all( torch.allclose(detector_losses[name], torch.tensor(expected_losses.get(name, 0.0))) for name in detector_losses.keys()) self.assertTrue( succ, "Losses has changed! New losses: {}".format( {k: v.item() for k, v in detector_losses.items()}), )
def test_qat(self, tmp_dir): @META_ARCH_REGISTRY.register() class QuantizableDetMetaArchForTest(mah.DetMetaArchForTest): custom_config_dict = {"preserved_attributes": ["preserved_attr"]} def __init__(self, cfg): super().__init__(cfg) self.avgpool.preserved_attr = "foo" self.avgpool.not_preserved_attr = "bar" def prepare_for_quant(self, cfg): example_inputs = (torch.rand(1, 3, 3, 3), ) self.avgpool = prepare_qat_fx( self.avgpool, { "": set_backend_and_create_qconfig(cfg, is_train=self.training) }, example_inputs, self.custom_config_dict, ) return self def prepare_for_quant_convert(self, cfg): self.avgpool = convert_fx( self.avgpool, convert_custom_config_dict=self.custom_config_dict) return self cfg = self._get_cfg(tmp_dir) cfg.MODEL.META_ARCHITECTURE = "QuantizableDetMetaArchForTest" cfg.QUANTIZATION.QAT.ENABLED = True task = GeneralizedRCNNTask(cfg) callbacks = [ QuantizationAwareTraining.from_config(cfg), ModelCheckpoint(dirpath=task.cfg.OUTPUT_DIR, save_last=True), ] trainer = pl.Trainer( max_steps=1, limit_train_batches=1, num_sanity_val_steps=0, callbacks=callbacks, logger=False, ) with EventStorage() as storage: task.storage = storage trainer.fit(task) prepared_avgpool = task._prepared.model.avgpool self.assertEqual(prepared_avgpool.preserved_attr, "foo") self.assertFalse(hasattr(prepared_avgpool, "not_preserved_attr")) with temp_defrost(cfg): cfg.MODEL.WEIGHTS = os.path.join(tmp_dir, "last.ckpt") model = GeneralizedRCNNTask.build_model(cfg, eval_only=True) self.assertTrue(isinstance(model.avgpool, torch.fx.GraphModule))
def do_train(cfg, model, resume=False): model.train() optimizer = build_optimizer(cfg, model) scheduler = build_lr_scheduler(cfg, optimizer) checkpointer = DetectionCheckpointer( model, cfg.OUTPUT_DIR, optimizer=optimizer, scheduler=scheduler ) start_iter = ( checkpointer.resume_or_load(cfg.MODEL.WEIGHTS, resume=resume).get("iteration", -1) + 1 ) max_iter = cfg.SOLVER.MAX_ITER periodic_checkpointer = PeriodicCheckpointer( checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD, max_iter=max_iter ) writers = default_writers(cfg.OUTPUT_DIR, max_iter) if comm.is_main_process() else [] # compared to "train_net.py", we do not support accurate timing and # precise BN here, because they are not trivial to implement in a small training loop data_loader = build_detection_train_loader(cfg) logger.info("Starting training from iteration {}".format(start_iter)) with EventStorage(start_iter) as storage: for data, iteration in zip(data_loader, range(start_iter, max_iter)): storage.iter = iteration loss_dict = model(data) losses = sum(loss_dict.values()) assert torch.isfinite(losses).all(), loss_dict loss_dict_reduced = {k: v.item() for k, v in comm.reduce_dict(loss_dict).items()} losses_reduced = sum(loss for loss in loss_dict_reduced.values()) if comm.is_main_process(): storage.put_scalars(total_loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() losses.backward() optimizer.step() storage.put_scalar("lr", optimizer.param_groups[0]["lr"], smoothing_hint=False) scheduler.step() if ( cfg.TEST.EVAL_PERIOD > 0 and (iteration + 1) % cfg.TEST.EVAL_PERIOD == 0 and iteration != max_iter - 1 ): do_test(cfg, model) # Compared to "train_net.py", the test results are not dumped to EventStorage comm.synchronize() if iteration - start_iter > 5 and ( (iteration + 1) % 20 == 0 or iteration == max_iter - 1 ): for writer in writers: writer.write() periodic_checkpointer.step(iteration)
def do_train(cfg, model, resume=False): model.train() optimizer = optim.Adam(model.parameters(), lr=cfg.SOLVER.BASE_LR) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20], gamma=0.1) checkpointer = DetectionCheckpointer( model, cfg.OUTPUT_DIR, optimizer=optimizer, scheduler=scheduler ) start_iter = ( checkpointer.resume_or_load(cfg.MODEL.WEIGHTS, resume=resume).get("iteration", -1) + 1 ) max_iter = cfg.SOLVER.MAX_ITER periodic_checkpointer = PeriodicCheckpointer( checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD, max_iter=max_iter ) writers = [ CommonMetricPrinter(max_iter), JSONWriter(os.path.join(cfg.OUTPUT_DIR, "metrics.json")), TensorboardXWriter(cfg.OUTPUT_DIR) ] data_loader = build_detection_train_loader(cfg) logger.info("Starting training from iteration {}".format(start_iter)) with EventStorage(start_iter) as storage: for data, iteration in zip(data_loader, range(start_iter, max_iter)): iteration = iteration + 1 storage.step() loss_dict = model(data) losses = sum(loss for loss in loss_dict.values()) assert torch.isfinite(losses).all(), loss_dict storage.put_scalars(total_loss=losses, **loss_dict) optimizer.zero_grad() losses.backward() optimizer.step() storage.put_scalar("lr", optimizer.param_groups[0]["lr"], smoothing_hint=False) if ( cfg.TEST.EVAL_PERIOD > 0 and iteration % cfg.TEST.EVAL_PERIOD == 0 and iteration != max_iter ): do_test(cfg, model) scheduler.step() if iteration - start_iter > 5 and (iteration % 20 == 0 or iteration == max_iter): for writer in writers: writer.write() periodic_checkpointer.step(iteration)
def get_proposals(self, images, features, gt_instances=None): with EventStorage(): if self.detectron.training: proposals, _ = self.detectron.proposal_generator( images, features, gt_instances) proposals = self.roi_heads_module.label_and_sample_proposals( proposals, gt_instances) else: proposals, _ = self.detectron.proposal_generator( images, features, None) return proposals
def _test_train(self, input_sizes, instances): assert len(input_sizes) == len(instances) inputs = [ create_model_input(torch.rand(3, s[0], s[1]), inst) for s, inst in zip(input_sizes, instances) ] self.model.train() with EventStorage(): losses = self.model(inputs) sum(losses.values()).backward() del losses
def test_rroi_heads(self): torch.manual_seed(121) cfg = get_cfg() cfg.MODEL.PROPOSAL_GENERATOR.NAME = "RRPN" cfg.MODEL.ANCHOR_GENERATOR.NAME = "RotatedAnchorGenerator" cfg.MODEL.ROI_HEADS.NAME = "RROIHeads" cfg.MODEL.ROI_BOX_HEAD.NAME = "FastRCNNConvFCHead" cfg.MODEL.ROI_BOX_HEAD.NUM_FC = 2 cfg.MODEL.RPN.BBOX_REG_WEIGHTS = (1, 1, 1, 1, 1) cfg.MODEL.RPN.HEAD_NAME = "StandardRPNHead" cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE = "ROIAlignRotated" cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1) backbone = build_backbone(cfg) num_images = 2 images_tensor = torch.rand(num_images, 20, 30) image_sizes = [(10, 10), (20, 30)] images = ImageList(images_tensor, image_sizes) num_channels = 1024 features = {"res4": torch.rand(num_images, num_channels, 1, 2)} image_shape = (15, 15) gt_boxes0 = torch.tensor([[2, 2, 2, 2, 30], [4, 4, 4, 4, 0]], dtype=torch.float32) gt_instance0 = Instances(image_shape) gt_instance0.gt_boxes = RotatedBoxes(gt_boxes0) gt_instance0.gt_classes = torch.tensor([2, 1]) gt_boxes1 = torch.tensor([[1.5, 5.5, 1, 3, 0], [8.5, 4, 3, 2, -50]], dtype=torch.float32) gt_instance1 = Instances(image_shape) gt_instance1.gt_boxes = RotatedBoxes(gt_boxes1) gt_instance1.gt_classes = torch.tensor([1, 2]) gt_instances = [gt_instance0, gt_instance1] proposal_generator = build_proposal_generator(cfg, backbone.output_shape()) roi_heads = build_roi_heads(cfg, backbone.output_shape()) with EventStorage(): # capture events in a new storage to discard them proposals, proposal_losses = proposal_generator( images, features, gt_instances) _, detector_losses = roi_heads(images, features, proposals, gt_instances) expected_losses = { "loss_cls": torch.tensor(4.381618499755859), "loss_box_reg": torch.tensor(0.0011829272843897343), } for name in expected_losses.keys(): err_msg = "detector_losses[{}] = {}, expected losses = {}".format( name, detector_losses[name], expected_losses[name]) self.assertTrue( torch.allclose(detector_losses[name], expected_losses[name]), err_msg)
def do_test(trainer: pl.Trainer, task: GeneralizedRCNNTask): """Runs the evaluation with a pre-trained model. Args: cfg: The normalized ConfigNode for this D2Go Task. trainer: PyTorch Lightning trainer. task: Lightning module instance. """ with EventStorage() as storage: task.storage = storage trainer.test(task)
def memory_partition(): with EventStorage(10) as storage: optimizer.zero_grad() loss_dict = model(partition_inputs) losses = sum(loss_dict.values()) assert torch.isfinite(losses).all(), loss_dict torch.cuda.synchronize() start_time = time_() losses.backward() optimizer.step() torch.cuda.synchronize() return time_() - start_time
def testScalar(self): with tempfile.TemporaryDirectory( prefix="detectron2_tests") as dir, EventStorage() as storage: json_file = os.path.join(dir, "test.json") writer = JSONWriter(json_file) for k in range(60): storage.put_scalar("key", k, smoothing_hint=False) if (k + 1) % 20 == 0: writer.write() storage.step() writer.close() with open(json_file) as f: data = [json.loads(l) for l in f] self.assertTrue([int(k["key"]) for k in data] == [19, 39, 59])
def test_load_from_checkpoint(self, tmp_dir) -> None: task = GeneralizedRCNNTask(self._get_cfg(tmp_dir)) trainer = self._get_trainer(tmp_dir) with EventStorage() as storage: task.storage = storage trainer.fit(task) ckpt_path = os.path.join(tmp_dir, "test.ckpt") trainer.save_checkpoint(ckpt_path) self.assertTrue(os.path.exists(ckpt_path)) # load model weights from checkpoint task2 = GeneralizedRCNNTask.load_from_checkpoint(ckpt_path) self.assertTrue( self._compare_state_dict(task.model.state_dict(), task2.model.state_dict()))
def test_fast_rcnn_rotated(self): torch.manual_seed(132) cfg = get_cfg() cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS = (10, 10, 5, 5, 1) box2box_transform = Box2BoxTransformRotated( weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS) box_head_output_size = 8 num_classes = 5 cls_agnostic_bbox_reg = False box_predictor = FastRCNNOutputLayers(box_head_output_size, num_classes, cls_agnostic_bbox_reg, box_dim=5) feature_pooled = torch.rand(2, box_head_output_size) pred_class_logits, pred_proposal_deltas = box_predictor(feature_pooled) image_shape = (10, 10) proposal_boxes = torch.tensor( [[2, 1.95, 2.4, 1.7, 0], [4.65, 5.25, 4.7, 5.5, 0]], dtype=torch.float32) gt_boxes = torch.tensor([[2, 2, 2, 2, 0], [4, 4, 4, 4, 0]], dtype=torch.float32) result = Instances(image_shape) result.proposal_boxes = RotatedBoxes(proposal_boxes) result.gt_boxes = RotatedBoxes(gt_boxes) result.gt_classes = torch.tensor([1, 2]) proposals = [] proposals.append(result) smooth_l1_beta = cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA outputs = FastRCNNOutputs(box2box_transform, pred_class_logits, pred_proposal_deltas, proposals, smooth_l1_beta) with EventStorage(): # capture events in a new storage to discard them losses = outputs.losses() # Note: the expected losses are slightly different even if # the boxes are essentially the same as in the FastRCNNOutput test, because # bbox_pred in FastRCNNOutputLayers have different Linear layers/initialization # between the two cases. expected_losses = { "loss_cls": torch.tensor(1.7920907736), "loss_box_reg": torch.tensor(4.0410838127), } for name in expected_losses.keys(): assert torch.allclose(losses[name], expected_losses[name])