def test_matcher(control_bboxes): matcher = HungarianMatcher(1, 1, 1) predictions = { 'bboxes': torch.rand((1, 100, 4), dtype=torch.float32), 'logits': torch.rand((1, 100, 91), dtype=torch.float32) } labels = [{ 'bboxes': torch.tensor(control_bboxes, dtype=torch.float32), 'classes': torch.tensor([18, 17, 18, 17, 18], dtype=torch.long) }] indices = list(range(predictions['logits'].size(1))) indices = random.sample(indices, labels[0]['classes'].size(0)) pred_classes = torch.zeros((len(indices), 91), dtype=torch.float32) pred_classes[range(len(indices)), labels[0]['classes'].numpy().astype('int')] = 1 predictions['bboxes'][0, indices] = labels[0]['bboxes'] predictions['logits'][0, indices] = pred_classes matching_indices = matcher(predictions, labels) matching_indices = matching_indices[0] assert torch.equal( torch.tensor(indices)[matching_indices[1]], matching_indices[0])
def test_hungarian(self): n_queries, n_targets, n_classes = 100, 15, 91 logits = torch.rand(1, n_queries, n_classes + 1) boxes = torch.rand(1, n_queries, 4) tgt_labels = torch.randint(high=n_classes, size=(n_targets,)) tgt_boxes = torch.rand(n_targets, 4) matcher = HungarianMatcher() targets = [{'labels': tgt_labels, 'boxes': tgt_boxes}] indices_single = matcher({'pred_logits': logits, 'pred_boxes': boxes}, targets) indices_batched = matcher({'pred_logits': logits.repeat(2, 1, 1), 'pred_boxes': boxes.repeat(2, 1, 1)}, targets * 2) self.assertEqual(len(indices_single[0][0]), n_targets) self.assertEqual(len(indices_single[0][1]), n_targets) self.assertEqual(self.indices_torch2python(indices_single), self.indices_torch2python([indices_batched[0]])) self.assertEqual(self.indices_torch2python(indices_single), self.indices_torch2python([indices_batched[1]])) # test with empty targets tgt_labels_empty = torch.randint(high=n_classes, size=(0,)) tgt_boxes_empty = torch.rand(0, 4) targets_empty = [{'labels': tgt_labels_empty, 'boxes': tgt_boxes_empty}] indices = matcher({'pred_logits': logits.repeat(2, 1, 1), 'pred_boxes': boxes.repeat(2, 1, 1)}, targets + targets_empty) self.assertEqual(len(indices[1][0]), 0) indices = matcher({'pred_logits': logits.repeat(2, 1, 1), 'pred_boxes': boxes.repeat(2, 1, 1)}, targets_empty * 2) self.assertEqual(len(indices[0][0]), 0)
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.num_classes = cfg.MODEL.DETR.NUM_CLASSES self.mask_on = cfg.MODEL.MASK_ON hidden_dim = cfg.MODEL.DETR.HIDDEN_DIM num_queries = cfg.MODEL.DETR.NUM_OBJECT_QUERIES # Transformer parameters: nheads = cfg.MODEL.DETR.NHEADS dropout = cfg.MODEL.DETR.DROPOUT dim_feedforward = cfg.MODEL.DETR.DIM_FEEDFORWARD enc_layers = cfg.MODEL.DETR.ENC_LAYERS dec_layers = cfg.MODEL.DETR.DEC_LAYERS pre_norm = cfg.MODEL.DETR.PRE_NORM # Loss parameters: giou_weight = cfg.MODEL.DETR.GIOU_WEIGHT l1_weight = cfg.MODEL.DETR.L1_WEIGHT cls_weight = cfg.MODEL.DETR.CLS_WEIGHT deep_supervision = cfg.MODEL.DETR.DEEP_SUPERVISION no_object_weight = cfg.MODEL.DETR.NO_OBJECT_WEIGHT centered_position_encoding = cfg.MODEL.DETR.CENTERED_POSITION_ENCODIND num_feature_levels = cfg.MODEL.DETR.NUM_FEATURE_LEVELS N_steps = hidden_dim // 2 if 'resnet' in cfg.MODEL.BACKBONE.NAME.lower(): d2_backbone = ResNetMaskedBackbone(cfg) elif 'fbnet' in cfg.MODEL.BACKBONE.NAME.lower(): d2_backbone = FBNetMaskedBackbone(cfg) else: raise NotImplementedError backbone = Joiner( d2_backbone, PositionEmbeddingSine(N_steps, normalize=True, centered=centered_position_encoding)) backbone.num_channels = d2_backbone.num_channels self.use_focal_loss = cfg.MODEL.DETR.USE_FOCAL_LOSS if cfg.MODEL.DETR.DEFORMABLE: transformer = DeformableTransformer( d_model=hidden_dim, nhead=nheads, num_encoder_layers=enc_layers, num_decoder_layers=dec_layers, dim_feedforward=dim_feedforward, dropout=dropout, activation="relu", return_intermediate_dec=True, num_feature_levels=num_feature_levels, dec_n_points=4, enc_n_points=4, two_stage=False, two_stage_num_proposals=num_queries, ) self.detr = DeformableDETR( backbone, transformer, num_classes=self.num_classes, num_queries=num_queries, num_feature_levels=num_feature_levels, aux_loss=deep_supervision, ) else: transformer = Transformer( d_model=hidden_dim, dropout=dropout, nhead=nheads, dim_feedforward=dim_feedforward, num_encoder_layers=enc_layers, num_decoder_layers=dec_layers, normalize_before=pre_norm, return_intermediate_dec=deep_supervision, ) self.detr = DETR( backbone, transformer, num_classes=self.num_classes, num_queries=num_queries, aux_loss=deep_supervision, use_focal_loss=self.use_focal_loss, ) if self.mask_on: frozen_weights = cfg.MODEL.DETR.FROZEN_WEIGHTS if frozen_weights != '': print("LOAD pre-trained weights") weight = torch.load( frozen_weights, map_location=lambda storage, loc: storage)['model'] new_weight = {} for k, v in weight.items(): if 'detr.' in k: new_weight[k.replace('detr.', '')] = v else: print(f"Skipping loading weight {k} from frozen model") del weight self.detr.load_state_dict(new_weight) del new_weight self.detr = DETRsegm(self.detr, freeze_detr=(frozen_weights != '')) self.seg_postprocess = PostProcessSegm self.detr.to(self.device) # building criterion matcher = HungarianMatcher(cost_class=cls_weight, cost_bbox=l1_weight, cost_giou=giou_weight, use_focal_loss=self.use_focal_loss) weight_dict = {"loss_ce": cls_weight, "loss_bbox": l1_weight} weight_dict["loss_giou"] = giou_weight if deep_supervision: aux_weight_dict = {} for i in range(dec_layers - 1): aux_weight_dict.update( {k + f"_{i}": v for k, v in weight_dict.items()}) weight_dict.update(aux_weight_dict) losses = ["labels", "boxes", "cardinality"] if self.mask_on: losses += ["masks"] if self.use_focal_loss: self.criterion = FocalLossSetCriterion( self.num_classes, matcher=matcher, weight_dict=weight_dict, losses=losses, ) else: self.criterion = SetCriterion( self.num_classes, matcher=matcher, weight_dict=weight_dict, eos_coef=no_object_weight, losses=losses, ) self.criterion.to(self.device) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 3, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
def main(config): project = Project() if (project.inputs_dir / "df_folds.csv").is_file(): pass else: print("splitting dataset..") data.split_folds(project.inputs_dir) fold = config["val_fold"] logging.info(f"val fold = {fold}") df_folds = pd.read_csv(project.inputs_dir / "df_folds.csv") marking = pd.read_csv(project.inputs_dir / "marking.csv") df_train = df_folds[df_folds["fold"] != fold] df_valid = df_folds[df_folds["fold"] == fold] train_dataset = WheatDataset( image_ids=df_train["image_id"].values, dataframe=marking, path=project.inputs_dir / "train", transforms=get_train_transforms(), ) valid_dataset = WheatDataset( image_ids=df_valid["image_id"].values, dataframe=marking, path=project.inputs_dir / "train", transforms=get_valid_transforms(), ) train_data_loader = DataLoader( train_dataset, batch_size=config["batch_size"], shuffle=False, num_workers=4, collate_fn=collate_fn, ) valid_data_loader = DataLoader( valid_dataset, batch_size=config["batch_size"], shuffle=False, num_workers=4, collate_fn=collate_fn, ) model = model_detr.DETRModel( num_classes=config["num_classes"], num_queries=config["num_queries"] ) model = model.to(device) matcher = HungarianMatcher() weight_dict = weight_dict = {"loss_ce": 1, "loss_bbox": 1, "loss_giou": 1} losses = ["labels", "boxes", "cardinality"] criterion = SetCriterion( config["num_classes"] - 1, matcher, weight_dict, eos_coef=config["null_class_coef"], losses=losses, ) criterion = criterion.to(device) optimizer = torch.optim.AdamW(model.parameters(), lr=config["lr"]) best_loss = float("inf") for epoch in range(config["num_epochs"]): train_loss = train_fn( train_data_loader, model, criterion, optimizer, config=config, epoch=epoch ) valid_loss = eval_fn(valid_data_loader, model, criterion) print( f"|EPOCH {epoch+1}| TRAIN_LOSS {train_loss.avg}| VALID_LOSS {valid_loss.avg}|" ) logging.info( f"|EPOCH {epoch+1}| TRAIN_LOSS {train_loss.avg}| VALID_LOSS {valid_loss.avg}|" ) if valid_loss.avg < best_loss: best_loss = valid_loss.avg print(f"New best model in epoch {epoch+1}") torch.save(model.state_dict(), project.checkpoint_dir / f"detr_best_{fold}.pth")
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.num_classes = cfg.MODEL.DETR.NUM_CLASSES self.mask_on = cfg.MODEL.MASK_ON hidden_dim = cfg.MODEL.DETR.HIDDEN_DIM num_queries = cfg.MODEL.DETR.NUM_OBJECT_QUERIES # Transformer parameters: nheads = cfg.MODEL.DETR.NHEADS dropout = cfg.MODEL.DETR.DROPOUT dim_feedforward = cfg.MODEL.DETR.DIM_FEEDFORWARD enc_layers = cfg.MODEL.DETR.ENC_LAYERS dec_layers = cfg.MODEL.DETR.DEC_LAYERS pre_norm = cfg.MODEL.DETR.PRE_NORM # Loss parameters: giou_weight = cfg.MODEL.DETR.GIOU_WEIGHT l1_weight = cfg.MODEL.DETR.L1_WEIGHT deep_supervision = cfg.MODEL.DETR.DEEP_SUPERVISION no_object_weight = cfg.MODEL.DETR.NO_OBJECT_WEIGHT N_steps = hidden_dim // 2 d2_backbone = MaskedBackbone(cfg) backbone = Joiner(d2_backbone, PositionEmbeddingSine(N_steps, normalize=True)) backbone.num_channels = d2_backbone.num_channels transformer = Transformer( d_model=hidden_dim, dropout=dropout, nhead=nheads, dim_feedforward=dim_feedforward, num_encoder_layers=enc_layers, num_decoder_layers=dec_layers, normalize_before=pre_norm, return_intermediate_dec=deep_supervision, ) self.detr = DETR(backbone, transformer, num_classes=self.num_classes, num_queries=num_queries, aux_loss=deep_supervision) if self.mask_on: frozen_weights = cfg.MODEL.DETR.FROZEN_WEIGHTS if frozen_weights != '': print("LOAD pre-trained weights") weight = torch.load( frozen_weights, map_location=lambda storage, loc: storage)['model'] new_weight = {} for k, v in weight.items(): if 'detr.' in k: new_weight[k.replace('detr.', '')] = v else: print(f"Skipping loading weight {k} from frozen model") del weight self.detr.load_state_dict(new_weight) del new_weight self.detr = DETRsegm(self.detr, freeze_detr=(frozen_weights != '')) self.seg_postprocess = PostProcessSegm self.detr.to(self.device) # building criterion matcher = HungarianMatcher(cost_class=1, cost_bbox=l1_weight, cost_giou=giou_weight) weight_dict = {"loss_ce": 1, "loss_bbox": l1_weight} weight_dict["loss_giou"] = giou_weight if deep_supervision: aux_weight_dict = {} for i in range(dec_layers - 1): aux_weight_dict.update( {k + f"_{i}": v for k, v in weight_dict.items()}) weight_dict.update(aux_weight_dict) losses = ["labels", "boxes", "cardinality"] if self.mask_on: losses += ["masks"] self.criterion = SetCriterion( self.num_classes, matcher=matcher, weight_dict=weight_dict, eos_coef=no_object_weight, losses=losses, ) self.criterion.to(self.device) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 3, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
def __init__(self, cfg): super().__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.use_focal_loss = cfg.MODEL.DETR.USE_FOCAL_LOSS self.num_classes = cfg.MODEL.DETR.NUM_CLASSES self.mask_on = cfg.MODEL.MASK_ON dec_layers = cfg.MODEL.DETR.DEC_LAYERS # Loss parameters: giou_weight = cfg.MODEL.DETR.GIOU_WEIGHT l1_weight = cfg.MODEL.DETR.L1_WEIGHT cls_weight = cfg.MODEL.DETR.CLS_WEIGHT deep_supervision = cfg.MODEL.DETR.DEEP_SUPERVISION no_object_weight = cfg.MODEL.DETR.NO_OBJECT_WEIGHT self.detr = build_detr_model(cfg) if self.mask_on: frozen_weights = cfg.MODEL.DETR.FROZEN_WEIGHTS if frozen_weights != "": print("LOAD pre-trained weights") weight = torch.load( frozen_weights, map_location=lambda storage, loc: storage)["model"] new_weight = {} for k, v in weight.items(): if "detr." in k: new_weight[k.replace("detr.", "")] = v else: print(f"Skipping loading weight {k} from frozen model") del weight self.detr.load_state_dict(new_weight) del new_weight self.detr = DETRsegm(self.detr, freeze_detr=(frozen_weights != "")) self.seg_postprocess = PostProcessSegm self.detr.to(self.device) # building criterion matcher = HungarianMatcher( cost_class=cls_weight, cost_bbox=l1_weight, cost_giou=giou_weight, use_focal_loss=self.use_focal_loss, ) weight_dict = {"loss_ce": cls_weight, "loss_bbox": l1_weight} weight_dict["loss_giou"] = giou_weight if deep_supervision: aux_weight_dict = {} for i in range(dec_layers - 1): aux_weight_dict.update( {k + f"_{i}": v for k, v in weight_dict.items()}) weight_dict.update(aux_weight_dict) losses = ["labels", "boxes", "cardinality"] if self.mask_on: losses += ["masks"] if self.use_focal_loss: self.criterion = FocalLossSetCriterion( self.num_classes, matcher=matcher, weight_dict=weight_dict, losses=losses, ) else: self.criterion = SetCriterion( self.num_classes, matcher=matcher, weight_dict=weight_dict, eos_coef=no_object_weight, losses=losses, ) self.criterion.to(self.device) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 3, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
pretrained=True) self.in_features = self.model.class_embed.in_features self.model.class_embed = nn.Linear(in_features=self.in_features, out_features=self.num_classes) self.model.num_queries = self.num_queries def forward(self, images): return self.model(images) ''' code taken from github repo detr , 'code present in engine.py' ''' matcher = HungarianMatcher() weight_dict = weight_dict = {'loss_ce': 1, 'loss_bbox': 1, 'loss_giou': 1} losses = ['labels', 'boxes', 'cardinality'] device = torch.device('cuda') model = DETRModel(num_classes=num_classes, num_queries=num_queries) model.load_state_dict(torch.load('detr_best_0.pth')) model = model.to(device) def iou(bboxes_preds, bboxes_targets): area1 = (bboxes_preds[:, 2] - bboxes_preds[:, 0]) * (bboxes_preds[:, 3] - bboxes_preds[:, 1]) area2 = (bboxes_targets[:, 2] - bboxes_targets[:, 0]) * ( bboxes_targets[:, 3] - bboxes_targets[:, 1])