class Classifier(fluid.dygraph.Layer): def __init__(self, backbone, neck, head, train_cfg=None, test_cfg=None, pretrained=None): super(Classifier, self).__init__() self.dropout = backbone.pop('dropout') self.backbone = ResNet(**backbone) self.train_cfg = train_cfg self.test_cfg = test_cfg self.triple_loss = TripletLoss() self.avgpool = nn.Pool2D(pool_type='avg', global_pooling=True) self.fc = nn.Linear(512, 2, act='softmax') self.init_weights(pretrained=pretrained) def init_weights(self, pretrained=None): self.backbone.init_weights(pretrained=pretrained) model_size(self) def get_losses(self, out, cls_out, mask, gt_labels): loss_cls = L.mean(L.cross_entropy(cls_out, gt_labels)) * self.train_cfg['w_cls'] loss_tir = 0 for feat in out[:-1]: feat = L.squeeze(self.avgpool(feat), axes=[2, 3]) loss_tir += self.triple_loss(feat, gt_labels) * self.train_cfg['w_tri'] loss = loss_cls + loss_tir return dict(loss_cls=loss_cls, loss_tir=loss_tir, loss=loss) def forward(self, img, label, mask=None, return_loss=True): outs = self.backbone(img) cls_out = self.avgpool(outs[-1]) if return_loss: cls_out = L.dropout(cls_out, dropout_prob=self.dropout, is_test=False) cls_out = self.fc(L.squeeze(cls_out, axes=[2, 3])) losses = self.get_losses(outs, cls_out, mask, label) return losses else: cls_out = self.fc(L.squeeze(cls_out, axes=[2, 3])) cls_out = L.softmax(cls_out).numpy()[:, 0] return cls_out
class SCAN(fluid.dygraph.Layer): def __init__(self, backbone, neck, head, train_cfg=None, test_cfg=None, pretrained=None): super(SCAN, self).__init__() self.dropout = head.pop('dropout') self.backbone = ResNet(**backbone) self.neck = DeCoder(**neck) self.head = ResNet(**head) self.train_cfg = train_cfg self.test_cfg = test_cfg self.triple_loss = TripletLoss() self.avgpool = nn.Pool2D(pool_type='avg', global_pooling=True) self.fc = nn.Linear(512, 2, act='softmax') self.init_weights(pretrained=pretrained) def init_weights(self, pretrained=None): self.backbone.init_weights(pretrained=pretrained) self.neck.init_weights() self.head.init_weights() model_size(self) def get_losses(self, out, cls_out, mask, gt_labels): loss_cls = L.mean(L.cross_entropy(cls_out, gt_labels)) * self.train_cfg['w_cls'] cue = out[-1] if self.train_cfg['with_mask'] else L.elementwise_mul( out[-1], L.cast(gt_labels, 'float32'), axis=0) num_reg = L.cast( L.reduce_sum(gt_labels) * cue.shape[1] * cue.shape[2] * cue.shape[3], 'float32') loss_reg = L.reduce_sum( L.abs(mask - cue)) / (num_reg + 1e-8) * self.train_cfg['w_reg'] loss_tir = 0 for feat in out[:-1]: feat = L.squeeze(self.avgpool(feat), axes=[2, 3]) loss_tir += self.triple_loss(feat, gt_labels) * self.train_cfg['w_tri'] loss = loss_cls + loss_reg + loss_tir return dict(loss_cls=loss_cls, loss_reg=loss_reg, loss_tir=loss_tir, loss=loss) def forward(self, img, label, mask=None, return_loss=True): outs = self.backbone(img) outs = self.neck(outs) if return_loss: s = img + outs[-1] cls_out = self.avgpool(self.head(s)[-1]) cls_out = L.squeeze(cls_out, axes=[2, 3]) if self.dropout: cls_out = L.dropout(cls_out, dropout_prob=self.dropout) cls_out = self.fc(cls_out) losses = self.get_losses(outs, cls_out, mask, label) return losses else: cue = L.abs(outs[-1]).numpy() return cue