예제 #1
0
    def _get_surpressed_boxes(self, img_batch, regression, classification, anchors):
        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()

        transformed_anchors = self.regressBoxes.forward(anchors, regression)
        transformed_anchors = self.clipBoxes.forward(transformed_anchors, img_batch)

        scores = torch.max(classification, dim=2, keepdim=True)[0]

        scores_over_thresh = (scores>0.05)[0, :, 0]

        if scores_over_thresh.sum() == 0:
            # no boxes to NMS, just return
            return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)]

        classification = classification[:, scores_over_thresh, :]
        transformed_anchors = transformed_anchors[:, scores_over_thresh, :]
        scores = scores[:, scores_over_thresh, :]

        anchors_nms_idx = nms(transformed_anchors, scores, overlap=0.5)

        nms_scores, nms_class = classification[0, anchors_nms_idx, :].max(dim=1)
        
        outputs = { 'bboxes': transformed_anchors[0, anchors_nms_idx, :],
                    'pred_class': nms_class,
                    'prob': nms_scores }

        return outputs
예제 #2
0
파일: model.py 프로젝트: Tung-I/FRCNN
    def __init__(self, num_classes, block, layers):
        self.inplanes = 64
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        if block == BasicBlock:
            fpn_sizes = [self.layer2[layers[1] - 1].conv2.out_channels, self.layer3[layers[2] - 1].conv2.out_channels,
                         self.layer4[layers[3] - 1].conv2.out_channels]
        elif block == Bottleneck:
            fpn_sizes = [self.layer2[layers[1] - 1].conv3.out_channels, self.layer3[layers[2] - 1].conv3.out_channels,
                         self.layer4[layers[3] - 1].conv3.out_channels]
        else:
            raise ValueError(f"Block type {block} not understood")

        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])

        self.regressionModel = RegressionModel(256)
        self.classificationModel = ClassificationModel(256, num_classes=num_classes)

        self.anchors = Anchors()
        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()
        self.focalLoss = losses.FocalLoss()

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

        prior = 0.01

        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log((1.0 - prior) / prior))

        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)

        self.freeze_bn()
예제 #3
0
    def __init__(self,
                 num_classes,
                 block,
                 layers,
                 n_head=1,
                 attention_type='concat',
                 shot_mode='mean',
                 num_way=2,
                 num_shot=5,
                 pos_encoding=True,
                 pretrained=False):
        super(ceaa_retinanet, self).__init__()
        self.model_path = 'data/pretrained_model/resnet50_caffe.pth'
        self.pretrained = pretrained
        self.inplanes = 64
        self.n_head = n_head
        self.attention_type = attention_type
        self.shot_mode = shot_mode
        self.num_shot = num_shot
        self.pos_encoding = pos_encoding
        self.support_im_size = 320

        self.conv1 = nn.Conv2d(3,
                               64,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        if self.pretrained == True:
            print("Loading pretrained weights from %s" % (self.model_path))
            state_dict = torch.load(self.model_path)
            self.load_state_dict({
                k: v
                for k, v in state_dict.items() if k in self.state_dict()
            })

            def set_bn_fix(m):
                classname = m.__class__.__name__
                if classname.find('BatchNorm') != -1:
                    for p in m.parameters():
                        p.requires_grad = False

            self.apply(set_bn_fix)

        if block == BasicBlock:
            fpn_sizes = [
                self.layer2[layers[1] - 1].conv2.out_channels,
                self.layer3[layers[2] - 1].conv2.out_channels,
                self.layer4[layers[3] - 1].conv2.out_channels
            ]
        elif block == Bottleneck:
            fpn_sizes = [
                self.layer2[layers[1] - 1].conv3.out_channels,
                self.layer3[layers[2] - 1].conv3.out_channels,
                self.layer4[layers[3] - 1].conv3.out_channels
            ]
        else:
            raise ValueError(f"Block type {block} not understood")

        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1],
                                   fpn_sizes[2])  # [512, 1024, 2048]
        self.fpn_dim = 256
        attention_output_dim = 256 if attention_type == 'product' else 512
        self.regressionModel = RegressionModel(attention_output_dim)
        self.classificationModel = ClassificationModel(attention_output_dim,
                                                       num_classes=num_classes)
        self.anchors = Anchors([4, 5, 6, 7])
        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()
        self.focalLoss = losses.FocalLoss()

        # weights initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
        prior = 0.01
        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))
        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)
        self.freeze_bn()

        self.resnet_base = nn.Sequential(self.conv1, self.bn1, self.relu,
                                         self.maxpool)

        # querys, keys
        Q_list = []
        K_list = []
        self.d_k = 64
        for i in range(self.n_head):
            Q_weight = nn.Linear(self.fpn_dim, self.d_k)
            K_weight = nn.Linear(self.fpn_dim, self.d_k)
            init.normal_(Q_weight.weight, std=0.01)
            init.constant_(Q_weight.bias, 0)
            init.normal_(K_weight.weight, std=0.01)
            init.constant_(K_weight.bias, 0)
            Q_list.append(Q_weight)
            K_list.append(K_weight)
        self.pyramid_Q_layers = nn.ModuleList(Q_list)
        self.pyramid_K_layers = nn.ModuleList(K_list)
        if self.pos_encoding:
            pel_4 = PositionalEncoding(d_model=256, max_len=20 * 20)
            pel_5 = PositionalEncoding(d_model=256, max_len=10 * 10)
            pel_6 = PositionalEncoding(d_model=256, max_len=5 * 5)
            pel_7 = PositionalEncoding(d_model=256, max_len=3 * 3)
        self.pos_encoding_layers = nn.ModuleList([pel_4, pel_5, pel_6, pel_7])
        if n_head != 1:
            self.multihead_layer = nn.Linear(n_head * feature_size,
                                             feature_size)
예제 #4
0
class ObjectDetection(ImageAnalysis):
    def __init__(self, model, classes, means, sdevs, train_loader=None, val_loader=None, line_colors=None):
        super().__init__(model, classes, train_loader, val_loader, means, sdevs)
        self.line_col = line_colors

        self.epoch_now = 1

    def run_singletask_model(self, settings, split, loader, optimizer=False):
        loss = 0
        accuracies = []
        for i, batch in enumerate(loader):
            if optimizer:
                optimizer.zero_grad()
            images = Variable(batch[0])
            labels = Variable(batch[1])
            if torch.cuda.is_available:
                images = images.cuda()
                labels = labels.cuda()       

            losses, preds = self._get_batch_loss_and_preds(images, labels, settings['criterion'])
            batch_loss = losses[0] + losses[1]
            if optimizer:
                batch_loss.backward()
                optimizer.step()

            loss += batch_loss
            # accuracies.append(analysis_utils.get_mean_acc(preds, labels))
            if (i+1) % settings['report_interval'][split] == 0:
                print(f"{split}: [{i} out of {len(loader)}]\nClassification: {losses[0]/(i+1):.4f}\nRegression: {losses[1]/(i+1):.4f}")

            if self.visualiser and i+1==len(loader):
                visualise.draw_rectangles(images, preds['bboxes'], preds['pred_class'], preds['prob'], gt=labels)
                self._imgs_to_tensorboard(images, split)

            # Memory management - must be cleared else the output between train/val phase are both stored
            # Which leads to 2x memory use
            images = labels = batch_loss = None

        loss = loss/(i+1)
        return loss

    def _get_batch_loss_and_preds(self, images, labels, criterion):
        regression, classification, anchors = self.model([images, labels])
        criterion = FocalLoss()
        loss = criterion.forward(regression, classification, anchors, labels)
        preds = self._get_surpressed_boxes(images, regression, classification, anchors)
        return loss, preds

    def _get_surpressed_boxes(self, img_batch, regression, classification, anchors):
        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()

        transformed_anchors = self.regressBoxes.forward(anchors, regression)
        transformed_anchors = self.clipBoxes.forward(transformed_anchors, img_batch)

        scores = torch.max(classification, dim=2, keepdim=True)[0]

        scores_over_thresh = (scores>0.05)[0, :, 0]

        if scores_over_thresh.sum() == 0:
            # no boxes to NMS, just return
            return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)]

        classification = classification[:, scores_over_thresh, :]
        transformed_anchors = transformed_anchors[:, scores_over_thresh, :]
        scores = scores[:, scores_over_thresh, :]

        anchors_nms_idx = nms(transformed_anchors, scores, overlap=0.5)

        nms_scores, nms_class = classification[0, anchors_nms_idx, :].max(dim=1)
        
        outputs = { 'bboxes': transformed_anchors[0, anchors_nms_idx, :],
                    'pred_class': nms_class,
                    'prob': nms_scores }

        return outputs

    def _imgs_to_tensorboard(self, imgs, split):
        img = visualise.decode_image(imgs, self.means, self.sdevs)
        imag = torch.Tensor(img.permute(0,3,1,2))
        row_views = imag[:,:3,:,:] # Grab only colour bands on image

        side_view = vutils.make_grid(row_views, nrow=len(img), normalize=True, scale_each=True)
        self.writer.add_image(f'{split}_Predicted-from-Image', side_view, self.epoch_now, dataformats='CHW')
        
    def train(self, settings):
        """Performs model training"""
        if self.loss_tracker.store_loss is True:
            self.loss_tracker.set_loss_file('train')
        if 'lr_decay_patience' in settings:
            lr_scheduler = ReduceLROnPlateau(settings['optimizer'],
                                             'min',
                                             factor=settings['lr_decay'],
                                             patience=settings['lr_decay_patience'],
                                             verbose=True)

        for epoch in range(settings['n_epochs']):
            print(f"\n======= Epoch {self.epoch_now} =======\n")
            self.model = self.model.train()
            epoch_train_loss = self.run_singletask_model(settings,
                                                        'train',
                                                        self.train_loader,
                                                        optimizer=settings['optimizer'])

            # self.loss_tracker.store_epoch_loss('train', self.epoch_now, epoch_train_loss, epoch_train_accuracy)
        
            if self.val_loader is not None:
                self.validate(settings)

            if self.epoch_now % settings['save_interval'] == 0 and self.loss_tracker.store_models is True:
                print("Checkpoint-saving model")
                self.loss_tracker.save_model(self.model, epoch)

            # self._visualise_loss(settings, self.epoch_now, epoch_train_accuracy, 'train')
            # self._print_results(self.epoch_now, epoch_train_loss, epoch_train_accuracy, 'train')

            if 'lr_decay_epoch' in settings:
                if epoch in settings['lr_decay_epoch']:
                    analysis_utils.decay_optimizer_lr(settings['optimizer'], settings['lr_decay'])
                    print(f"\nlr decayed by {settings['lr_decay']}\n")
            elif 'lr_decay_patience' in settings:
                lr_scheduler.step(epoch_train_loss)

            self.epoch_now = len(self.loss_tracker.all_loss['train'])+1                 
                
        if settings['shutdown'] is True:
            os.system("shutdown")

    def validate(self, settings):
        """For a given model, evaluation criterion,
        and validation loader, performs a single evaluation
        pass."""
        self.model = self.model.eval()

        with torch.no_grad():
            if self.loss_tracker.store_loss is True:
                self.loss_tracker.set_loss_file('val')

            epoch_val_loss, epoch_val_accuracy = self.run_singletask_model(settings,
                                                                        'val',
                                                                        self.val_loader)

            self.loss_tracker.store_epoch_loss('val', self.epoch_now, epoch_val_loss, epoch_val_accuracy)

            self._visualise_loss(settings, self.epoch_now, epoch_val_accuracy, 'val')
            self._save_if_best(epoch_val_loss, self.model, settings['run_name']+'_best')
            self._print_results(self.epoch_now, epoch_val_loss, epoch_val_accuracy, 'val')
예제 #5
0
    def __init__(self, num_classes, block, layers, pretrained=False):
        super(retina, self).__init__()
        self.model_path = 'data/pretrained_model/resnet50_caffe.pth'
        self.pretrained = pretrained
        self.inplanes = 64
    
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        if self.pretrained == True:
            print("Loading pretrained weights from %s" %(self.model_path))
            state_dict = torch.load(self.model_path)
            self.load_state_dict({k:v for k,v in state_dict.items() if k in self.state_dict()})

            def set_bn_fix(m):
                classname = m.__class__.__name__
                if classname.find('BatchNorm') != -1:
                    for p in m.parameters(): p.requires_grad=False
            self.apply(set_bn_fix)

        if block == BasicBlock:
            fpn_sizes = [self.layer2[layers[1] - 1].conv2.out_channels, self.layer3[layers[2] - 1].conv2.out_channels,
                         self.layer4[layers[3] - 1].conv2.out_channels]
        elif block == Bottleneck:
            fpn_sizes = [self.layer2[layers[1] - 1].conv3.out_channels, self.layer3[layers[2] - 1].conv3.out_channels,
                         self.layer4[layers[3] - 1].conv3.out_channels]
        else:
            raise ValueError(f"Block type {block} not understood")

        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])
        self.regressionModel = RegressionModel(256)
        self.classificationModel = ClassificationModel(256, num_classes=num_classes)

        self.anchors = Anchors()
        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()
        self.focalLoss = losses.FocalLoss()

        # weights initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
        prior = 0.01
        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log((1.0 - prior) / prior))
        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)
        self.freeze_bn()

        self.resnet_base = nn.Sequential(
            self.conv1, 
            self.bn1, 
            self.relu, 
            self.maxpool
        )
예제 #6
0
    def __init__(self,
                 num_classes,
                 block,
                 layers,
                 attention_type,
                 reduce_dim,
                 beta,
                 num_way=2,
                 num_shot=5,
                 pos_encoding=True,
                 pretrained=False):
        super(SEPAA_retinanet, self).__init__()
        self.model_path = 'data/pretrained_model/resnet50_caffe.pth'
        self.pretrained = pretrained
        self.inplanes = 64
        self.attention_type = attention_type
        self.num_shot = num_shot
        self.pos_encoding = pos_encoding
        self.support_im_size = 320
        self.reduce_dim = reduce_dim
        self.beta = beta
        self.unary_gamma = 0.1

        self.conv1 = nn.Conv2d(3,
                               64,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        if self.pretrained == True:
            print("Loading pretrained weights from %s" % (self.model_path))
            state_dict = torch.load(self.model_path)
            self.load_state_dict({
                k: v
                for k, v in state_dict.items() if k in self.state_dict()
            })

            def set_bn_fix(m):
                classname = m.__class__.__name__
                if classname.find('BatchNorm') != -1:
                    for p in m.parameters():
                        p.requires_grad = False

            self.apply(set_bn_fix)

        if block == BasicBlock:
            fpn_sizes = [
                self.layer2[layers[1] - 1].conv2.out_channels,
                self.layer3[layers[2] - 1].conv2.out_channels,
                self.layer4[layers[3] - 1].conv2.out_channels
            ]
        elif block == Bottleneck:
            fpn_sizes = [
                self.layer2[layers[1] - 1].conv3.out_channels,
                self.layer3[layers[2] - 1].conv3.out_channels,
                self.layer4[layers[3] - 1].conv3.out_channels
            ]
        else:
            raise ValueError(f"Block type {block} not understood")

        attention_output_dim = 256 if self.attention_type == 'product' else 512
        if self.attention_type == 'product':
            self.fpn = PyramidFeatures(
                fpn_sizes[0],
                fpn_sizes[1],
                fpn_sizes[2],
                feature_size=attention_output_dim)  # [512, 1024, 2048]
        else:
            self.fpn = PyramidFeatures(fpn_sizes[0] * 2,
                                       fpn_sizes[1] * 2,
                                       fpn_sizes[2] * 2,
                                       feature_size=attention_output_dim)

        self.regressionModel = RegressionModel(attention_output_dim)
        self.classificationModel = ClassificationModel(attention_output_dim,
                                                       num_classes=num_classes)
        self.anchors = Anchors([4, 5, 6, 7])
        self.regressBoxes = BBoxTransform()
        self.clipBoxes = ClipBoxes()
        self.focalLoss = losses.FocalLoss()

        # weights initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
        prior = 0.01
        self.classificationModel.output.weight.data.fill_(0)
        self.classificationModel.output.bias.data.fill_(-math.log(
            (1.0 - prior) / prior))
        self.regressionModel.output.weight.data.fill_(0)
        self.regressionModel.output.bias.data.fill_(0)
        self.freeze_bn()

        self.resnet_base = nn.Sequential(self.conv1, self.bn1, self.relu,
                                         self.maxpool)

        # querys, keys
        unary_list = []
        adapt_q_list = []
        adapt_k_list = []
        channel_k_list = []
        self.fpn_dims = [512, 1024, 2048]
        for fpn_dim in self.fpn_dims:
            unary_layer = nn.Linear(fpn_dim, 1)
            init.normal_(unary_layer.weight, std=0.01)
            init.constant_(unary_layer.bias, 0)
            adapt_q_layer = nn.Linear(fpn_dim, reduce_dim)
            init.normal_(adapt_q_layer.weight, std=0.01)
            init.constant_(adapt_q_layer.bias, 0)
            adapt_k_layer = nn.Linear(fpn_dim, reduce_dim)
            init.normal_(adapt_k_layer.weight, std=0.01)
            init.constant_(adapt_k_layer.bias, 0)
            channel_k_layer = nn.Linear(fpn_dim, 1)
            init.normal_(channel_k_layer.weight, std=0.01)
            init.constant_(channel_k_layer.bias, 0)

            unary_list.append(unary_layer)
            adapt_q_list.append(adapt_q_layer)
            adapt_k_list.append(adapt_k_layer)
            channel_k_list.append(channel_k_layer)
        self.unary_layers = nn.ModuleList(unary_list)
        self.adapt_Q_layers = nn.ModuleList(adapt_q_list)
        self.adapt_K_layers = nn.ModuleList(adapt_k_list)
        self.channel_K_layers = nn.ModuleList(channel_k_list)
        if self.pos_encoding:
            pel_3 = PositionalEncoding(d_model=512, max_len=40 * 40)
            pel_4 = PositionalEncoding(d_model=1024, max_len=20 * 20)
            pel_5 = PositionalEncoding(d_model=2048, max_len=10 * 10)
        self.pos_encoding_layers = nn.ModuleList([pel_3, pel_4, pel_5])