コード例 #1
0
ファイル: eval.py プロジェクト: ai4prod/ai4prod_python
def evalimage(net: Yolact, path: str, save_path: str = None):
    frame = torch.from_numpy(cv2.imread(path)).float()
    batch = FastBaseTransform()(frame.unsqueeze(0))
    pred_outs = net(batch)
    #priors = np.array(pred_outs[3])
    #np.savetxt('priors.txt', priors, fmt="%f", delimiter=",")
    detect = Detect(cfg.num_classes,
                    bkg_label=0,
                    top_k=200,
                    conf_thresh=0.05,
                    nms_thresh=0.5)
    preds = detect({
        'loc': pred_outs[0],
        'conf': pred_outs[1],
        'mask': pred_outs[2],
        'priors': pred_outs[3],
        'proto': pred_outs[4]
    })

    dummy_input = Variable(torch.randn(1, 3, 550, 550))
    torch.onnx.export(net,
                      dummy_input,
                      "yolact.onnx",
                      verbose=False,
                      opset_version=11)

    img_numpy = prep_display(preds, frame, None, None, undo_transform=False)

    if save_path is None:
        img_numpy = img_numpy[:, :, (2, 1, 0)]

    cv2.imwrite(save_path, img_numpy)
コード例 #2
0
ファイル: onnxeval.py プロジェクト: tyctor/yolact
def evalimage(net:Yolact, path:str, save_path:str=None):
    frame = torch.from_numpy(cv2.imread(path)).float()
    batch = FastBaseTransform()(frame.unsqueeze(0))

    sess = rt.InferenceSession("yolact.onnx")
    input_name = sess.get_inputs()[0].name
    loc_name = sess.get_outputs()[0].name
    conf_name = sess.get_outputs()[1].name
    mask_name = sess.get_outputs()[2].name
    priors_name = sess.get_outputs()[3].name
    proto_name = sess.get_outputs()[4].name

    pred_onx = sess.run([loc_name, conf_name, mask_name, priors_name, proto_name], {input_name: batch.cpu().detach().numpy()})

    #priors = np.loadtxt('priors.txt', delimiter=',', dtype='float32')

    detect = Detect(cfg.num_classes, bkg_label=0, top_k=200, conf_thresh=0.05, nms_thresh=0.5)
    preds = detect({'loc': torch.from_numpy(pred_onx[0]), 'conf': torch.from_numpy(pred_onx[1]), 'mask': torch.from_numpy(pred_onx[2]), 'priors': torch.from_numpy(pred_onx[3]), 'proto': torch.from_numpy(pred_onx[4])})

    img_numpy = prep_display(preds, frame, None, None, undo_transform=False)
    
    if save_path is None:
        img_numpy = img_numpy[:, :, (2, 1, 0)]

    if save_path is None:
        plt.imshow(img_numpy)
        plt.title(path)
        plt.show()
    else:
        cv2.imwrite(save_path, img_numpy)
コード例 #3
0
ファイル: ssd.py プロジェクト: ternaus/nexar2_ssd
    def __init__(self,
                 config,
                 phase,
                 base,
                 extras,
                 head,
                 num_classes,
                 top_k=200):
        super(SSD, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        # TODO: implement __call__ in PriorBox
        self.priorbox = PriorBox(config)
        self.priors = Variable(self.priorbox.forward(), volatile=True)

        # SSD network
        self.vgg = nn.ModuleList(base)
        # Layer learns to scale the l2 normalized features from conv4_3
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)

        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])

        if phase == 'test':
            self.softmax = nn.Softmax()
            self.detect = Detect(num_classes,
                                 0,
                                 top_k,
                                 0.01,
                                 0.45,
                                 variance=config['variance'])
コード例 #4
0
ファイル: yolact_onnx.py プロジェクト: luiszeni/yolact_onnx
    def __init__(self, model_path, device='cuda'):

        self.sess = onnxruntime.InferenceSession(model_path)
        self.device = device

        loc_name = self.sess.get_outputs()[0].name
        conf_name = self.sess.get_outputs()[1].name
        mask_name = self.sess.get_outputs()[2].name
        priors_name = self.sess.get_outputs()[3].name
        proto_name = self.sess.get_outputs()[4].name

        self.names = [loc_name, conf_name, mask_name, priors_name, proto_name]
        self.input_name = self.sess.get_inputs()[0].name

        # For use in evaluation
        self.detect = Detect(cfg.num_classes,
                             bkg_label=0,
                             top_k=200,
                             conf_thresh=0.05,
                             nms_thresh=0.5)
コード例 #5
0
    def __init__(self, cfg, phase='train'):
        super(FaceBox, self).__init__()
        self.phase = phase
        # model
        self.conv1 = nn.Conv2d(3,
                               24,
                               kernel_size=7,
                               stride=4,
                               padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(24)
        self.conv2 = nn.Conv2d(48,
                               64,
                               kernel_size=5,
                               stride=2,
                               padding=2,
                               bias=False)
        self.bn2 = nn.BatchNorm2d(64)

        self.inception1 = Inception()
        self.inception2 = Inception()
        self.inception3 = Inception()

        self.conv3_1 = conv_bn_relu(128, 128, kernel_size=1)
        self.conv3_2 = conv_bn_relu(128,
                                    256,
                                    kernel_size=3,
                                    stride=2,
                                    padding=1)
        self.conv4_1 = conv_bn_relu(256, 128, kernel_size=1)
        self.conv4_2 = conv_bn_relu(128,
                                    256,
                                    kernel_size=3,
                                    stride=2,
                                    padding=1)

        self.multilbox = MultiBoxLayer()

        if self.phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.test_det = Detect(cfg)
コード例 #6
0
ファイル: ssd.py プロジェクト: Lornatang/SSD_PyTorch
    def __init__(self, phase, size, base, extras, head, num_classes):
        super(SSD, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.cfg = (coco, voc)[num_classes == 21]
        self.priorbox = PriorBox(self.cfg)
        self.priors = Variable(self.priorbox.forward(), volatile=True)
        self.size = size

        # SSD network
        self.vgg = nn.ModuleList(base)
        # Layer learns to scale the l2 normalized features from conv4_3
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)

        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])

        if phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
コード例 #7
0
ファイル: onnxeval.py プロジェクト: simutisernestas/yolact
def evalimage(net:Yolact, path:str, save_path:str=None):
    frame = torch.from_numpy(cv2.imread(path)).float()
    batch = FastBaseTransform()(frame.unsqueeze(0))

    # sess = rt.InferenceSession("yolact.onnx")
    # input_name = sess.get_inputs()[0].name
    # loc_name = sess.get_outputs()[0].name
    # conf_name = sess.get_outputs()[1].name
    # mask_name = sess.get_outputs()[2].name
    # priors_name = sess.get_outputs()[3].name
    # proto_name = sess.get_outputs()[4].name

    # pred_onx = sess.run([loc_name, conf_name, mask_name, priors_name, proto_name], {input_name: batch.cpu().detach().numpy()})

    #priors = np.loadtxt('priors.txt', delimiter=',', dtype='float32')

    # print(pred_onx)
    # exit()

    pred_onx = []
    pred_onx.append(np.load('0.npy'))
    pred_onx.append(np.load('1.npy'))
    pred_onx.append(np.load('2.npy'))
    pred_onx.append(np.load('3.npy'))
    pred_onx.append(np.load('4.npy'))

    for pred in pred_onx:
        print(pred.shape)

    detect = Detect(81, bkg_label=0, top_k=200, conf_thresh=0.05, nms_thresh=0.5)
    preds = detect({'loc': torch.from_numpy(pred_onx[0]), 
                    'conf': torch.from_numpy(pred_onx[1]), 
                    'mask': torch.from_numpy(pred_onx[2]), 
                    'priors': torch.from_numpy(pred_onx[3]), 
                    'proto': torch.from_numpy(pred_onx[4])}, net)

    img_numpy = prep_display(preds, frame, None, None, undo_transform=False)
    plt.imshow(img_numpy)
    plt.title(path)
    plt.show()
コード例 #8
0
    def __init__(self, classes, size):
        super(Retina, self).__init__()

        self.size = size

        self.priors = torch.autograd.Variable(prior_box(size),
                                              requires_grad=False)

        mask = ((self.priors[:, 2] > self.priors[:, 0]) &
                (self.priors[:, 3] > self.priors[:, 1]))

        self.classes = classes
        self.num_classes = len(classes) + 1

        self._backbone = resnet101(pretrained=True)

        names, layers = zip(*list(
            self._backbone.named_children())[:-2])  # leave off avgpool and fc

        self.backbone = []

        i = 0
        while i < len(names):
            j = i + 1
            while j < len(names) and not (names[j].startswith('layer')):
                j += 1
            self.backbone.append(torch.nn.Sequential(*layers[i:j]))
            i = j

        self.conv6 = torch.nn.Conv2d(2048, 256, 3, stride=2, padding=1)
        self.conv7 = torch.nn.Conv2d(256, 256, 3, stride=2, padding=1)
        self.conv5 = torch.nn.Conv2d(2048, 256, 3, padding=1)
        self.conv4 = torch.nn.Conv2d(1024, 256, 1)
        self.conv3 = torch.nn.Conv2d(512, 256, 1)
        self.conv2 = torch.nn.Conv2d(256, 256, 1)

        self.loc = self.mk_subnet(4, include_sigmoid=False)
        self.conf = self.mk_subnet(self.num_classes, include_sigmoid=False)

        self.detect = Detect(self.num_classes, 0, 200, 0.01, 0.45)
コード例 #9
0
    def __init__(self, phase, size, base, extras, head, num_classes):
        super(TBPP, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.cfg = {
            'num_classes':
            2,
            'lr_steps': (80000, 100000, 120000),
            'max_iter':
            120000,
            'feature_maps': [64, 32, 16, 8, 4, 2, 1],
            'min_dim':
            512,
            'steps': [8, 16, 32, 64, 128, 256, 512],
            'min_sizes': [20, 51, 133, 215, 296, 378, 460],
            'max_sizes': [51, 133, 215, 296, 378, 460, 542],
            'aspect_ratios': [[2, 3], [2, 3, 5], [2, 3, 5], [2, 3, 5],
                              [2, 3, 5], [2, 3], [2, 3]],  # TODO
            'variance': [0.1, 0.2],
            'clip':
            True,
            'name':
            'MINE'
        }
        self.priorbox = PriorBox(
            self.cfg)  # calculate the size of prior boxes, i.e. defaults boxes
        self.priors = Variable(self.priorbox.forward(), volatile=True)
        self.size = size

        # TBPP network
        self.vgg = nn.ModuleList(base)
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)
        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])

        if phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
コード例 #10
0
ファイル: ssd.py プロジェクト: DuinoDu/codelib
    def __init__(self, num_classes, phase, pretrain=False, finetune=None):
        super(SSD300, self).__init__()
        self.num_classes = num_classes
        self.phase = phase
        
        self.base_net = self._base_net()
        self.extra_net = self._extra_net()
        self.loc_pred, self.cls_pred = self._predict_net()

        self.L2Norm = L2Norm(512, 20)
        self.priorbox = PriorBox(v2)
        self.priors = Variable(self.priorbox.forward(), volatile=True)

        if phase == 'test':
            self.softmax = nn.Softmax()
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)

        self._init_weight()
        if pretrain:
            self._load_weight()
        if finetune is not None:
            self._finetune(finetune)
コード例 #11
0
    def __init__(self, phase, base, extras, lfpn_cpm, head, num_classes):
        super(PyramidBox, self).__init__()
        #self.use_transposed_conv2d = use_transposed_conv2d

        self.vgg = nn.ModuleList(base)
        self.extras = nn.ModuleList(extras)

        self.L2Norm3_3 = L2Norm(256, 10)
        self.L2Norm4_3 = L2Norm(512, 8)
        self.L2Norm5_3 = L2Norm(512, 5)
        """
        self.lfpn_topdown = nn.ModuleList([
            nn.Conv2d(1024, 512, 1, 1),
            nn.Conv2d(512, 512, 1, 1),
            nn.Conv2d(512, 256, 1, 1)
        ])
        self.lfpn_later = nn.ModuleList([
            nn.Conv2d(512, 512, 1, 1),
            nn.Conv2d(512, 512, 1, 1),
            nn.Conv2d(256, 256, 1, 1)
        ])
        self.cpm = nn.ModuleList([
            CPM(256), CPM(512), CPM(512),
            CPM(1024), CPM(512), CPM(256)
        ])
        """
        self.lfpn_topdown = nn.ModuleList(lfpn_cpm[0])
        self.lfpn_later = nn.ModuleList(lfpn_cpm[1])
        self.cpm = nn.ModuleList(lfpn_cpm[2])

        self.loc_layers = nn.ModuleList(head[0])
        self.conf_layers = nn.ModuleList(head[1])

        self.is_infer = False
        if phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(cfg)
            self.is_infer = True
コード例 #12
0
ファイル: STMask.py プロジェクト: MinghanLi/STMask
    def __init__(self):
        super().__init__()

        self.backbone = construct_backbone(cfg.backbone)

        if cfg.freeze_bn:
            self.freeze_bn()

        # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early!
        if cfg.mask_type == mask_type.direct:
            cfg.mask_dim = cfg.mask_size**2
        elif cfg.mask_type == mask_type.lincomb:
            if cfg.mask_proto_use_grid:
                self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file))
                self.num_grids = self.grid.size(0)
            else:
                self.num_grids = 0

            self.proto_src = cfg.mask_proto_src
            self.interpolation_mode = cfg.fpn.interpolation_mode

            if self.proto_src is None:
                in_channels = 3
            elif cfg.fpn is not None:
                in_channels = cfg.fpn.num_features
            else:
                in_channels = self.backbone.channels[self.proto_src]
            in_channels += self.num_grids

            # The include_last_relu=false here is because we might want to change it to another function
            self.proto_net, cfg.mask_dim = make_net(in_channels,
                                                    cfg.mask_proto_net,
                                                    include_last_relu=False)

            if cfg.mask_proto_bias:
                cfg.mask_dim += 1

        self.selected_layers = cfg.backbone.selected_layers
        self.pred_scales = cfg.backbone.pred_scales
        self.pred_aspect_ratios = cfg.backbone.pred_aspect_ratios
        self.num_priors = len(self.pred_scales[0])
        src_channels = self.backbone.channels

        if cfg.use_maskiou:
            self.maskiou_net = FastMaskIoUNet()

        if cfg.fpn is not None:
            # Some hacky rewiring to accomodate the FPN
            self.fpn = FPN([src_channels[i] for i in self.selected_layers])

            if cfg.backbone_C2_as_features:
                self.selected_layers = list(
                    range(1,
                          len(self.selected_layers) + cfg.fpn.num_downsample))
                src_channels = [cfg.fpn.num_features
                                ] * (len(self.selected_layers) + 1)
            else:
                self.selected_layers = list(
                    range(len(self.selected_layers) + cfg.fpn.num_downsample))
                src_channels = [cfg.fpn.num_features] * len(
                    self.selected_layers)

        # prediction layers for loc, conf, mask
        self.prediction_layers = nn.ModuleList()
        cfg.num_heads = len(self.selected_layers)  # yolact++
        for idx, layer_idx in enumerate(self.selected_layers):
            # If we're sharing prediction module weights, have every module's parent be the first one
            parent, parent_t = None, None
            if cfg.share_prediction_module and idx > 0:
                parent = self.prediction_layers[0]

            pred = PredictionModule_FC(
                src_channels[layer_idx],
                src_channels[layer_idx],
                deform_groups=1,
                pred_aspect_ratios=self.pred_aspect_ratios[idx],
                pred_scales=self.pred_scales[idx],
                parent=parent)

            self.prediction_layers.append(pred)

        # parameters in temporal correlation net
        if cfg.temporal_fusion_module:
            corr_channels = 2 * in_channels + cfg.correlation_patch_size**2
            self.TemporalNet = TemporalNet(corr_channels, cfg.mask_proto_n)
            self.correlation_selected_layer = cfg.correlation_selected_layer

            # evaluation for frame-level tracking
            self.Detect_TF = Detect_TF(cfg.num_classes,
                                       bkg_label=0,
                                       top_k=cfg.nms_top_k,
                                       conf_thresh=cfg.nms_conf_thresh,
                                       nms_thresh=cfg.nms_thresh)
            self.Track_TF = Track_TF()

        # Extra parameters for the extra losses
        if cfg.use_class_existence_loss:
            # This comes from the smallest layer selected
            # Also note that cfg.num_classes includes background
            self.class_existence_fc = nn.Linear(src_channels[-1],
                                                cfg.num_classes - 1)

        if cfg.use_semantic_segmentation_loss:
            self.semantic_seg_conv = nn.Conv2d(src_channels[0],
                                               cfg.num_classes - 1,
                                               kernel_size=1)

        # For use in evaluation
        self.detect = Detect(cfg.num_classes,
                             bkg_label=0,
                             top_k=cfg.nms_top_k,
                             conf_thresh=cfg.nms_conf_thresh,
                             nms_thresh=cfg.nms_thresh)
        self.Track = Track()
コード例 #13
0
    set_type = 'test'

    # load data
    dataset = VOCDetection(args.voc_root, [('2007', set_type)],
                           dataset_name='VOC0712')
    # load net
    torch.set_grad_enabled(False)
    load_to_cpu = not args.cuda
    cudnn.benchmark = True
    device = torch.device('cuda' if args.cuda else 'cpu')
    if args.wo_refined_anchor:
        detect = Detect(num_classes,
                        int(args.input_size),
                        0,
                        confidence_threshold=args.confidence_threshold,
                        nms_threshold=args.nms_threshold,
                        top_k=args.top_k,
                        keep_top_k=args.keep_top_k)
    else:
        detect = Detect_RefineDet(
            num_classes,
            int(args.input_size),
            0,
            objectness_threshold,
            confidence_threshold=args.confidence_threshold,
            nms_threshold=args.nms_threshold,
            top_k=args.top_k,
            keep_top_k=args.keep_top_k)
    net = build_refinedet('test', int(args.input_size), num_classes,
                          backbone_dict)
コード例 #14
0
def train():
    if args.dataset == 'COCO':
        if args.dataset_root == VOC_ROOT:
            if not os.path.exists(COCO_ROOT):
                parser.error('Must specify dataset_root if specifying dataset')
            print("WARNING: Using default COCO dataset_root because " +
                  "--dataset_root was not specified.")
            args.dataset_root = COCO_ROOT
        cfg = coco
        dataset = COCODetection(root=args.dataset_root,
                                transform=SSDAugmentation(
                                    cfg['min_dim'], MEANS))
    elif args.dataset == 'VOC':
        if args.dataset_root == COCO_ROOT:
            parser.error('Must specify dataset if specifying dataset_root')
        cfg = voc
        dataset = VOCDetection(root=args.dataset_root,
                               transform=SSDAugmentation(
                                   cfg['min_dim'], MEANS))

    elif args.dataset == 'Watermark':
        cfg = voc
        dataset = watermark.WatermarkDetection(
            root=args.dataset_root,
            target_transform=watermark.target_transform,
            transform=SSDAugmentation(cfg['min_dim'], MEANS))

    if args.visdom:
        import visdom
        viz = visdom.Visdom()

    softmax = nn.Softmax(dim=-1)
    detect = Detect(cfg['num_classes'], 0, 200, 0.01, 0.45)

    ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes'])
    net = ssd_net

    if args.cuda:
        net = torch.nn.DataParallel(ssd_net)
        cudnn.benchmark = True
    if args.resume:
        print('Resuming training, loading {}...'.format(args.resume))
        ssd_net.load_weights(args.resume)
    '''
    else:
        vgg_weights = torch.load(args.save_folder + args.basenet)
        print('Loading base network...')
        ssd_net.vgg.load_state_dict(vgg_weights)
    '''
    if args.cuda:
        net = net.cuda()

    if not args.resume:
        print('Initializing weights...')
        # initialize newly added layers' weights with xavier method
        ssd_net.extras.apply(weights_init)
        ssd_net.loc.apply(weights_init)
        ssd_net.conf.apply(weights_init)

    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5,
                             False, args.cuda)

    net.train()
    # loss counters
    loc_loss = 0
    conf_loss = 0
    epoch = 0
    print('Loading the dataset...')

    epoch_size = len(dataset) // args.batch_size
    print('Training SSD on:', dataset.name)
    print('Using the specified args:')
    print(args)

    step_index = 0

    if args.visdom:
        vis_title = 'SSD.PyTorch on ' + dataset.name
        vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss']
        iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend)
        epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend)

    data_loader = data.DataLoader(dataset,
                                  args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True,
                                  collate_fn=detection_collate,
                                  pin_memory=True)
    # create batch iterator
    batch_iterator = iter(data_loader)
    for iteration in range(args.start_iter, cfg['max_iter']):
        if args.visdom and iteration != 0 and (iteration % epoch_size == 0):
            update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None,
                            'append', epoch_size)
            # reset epoch loss counters
            loc_loss = 0
            conf_loss = 0
            epoch += 1

        if iteration in cfg['lr_steps']:
            step_index += 1
            adjust_learning_rate(optimizer, args.gamma, step_index)

        # load train data
        try:
            images, targets = next(batch_iterator)
        except StopIteration:
            batch_iterator = iter(data_loader)
            print('Start data iteration over again.')
            images, targets = next(batch_iterator)

        if args.cuda:
            images = Variable(images.cuda())
            targets = [
                torch.tensor(ann, requires_grad=False).cuda()
                for ann in targets
            ]
        else:
            images = Variable(images)
            targets = [Variable(ann, volatile=True) for ann in targets]
        # forward
        t0 = time.time()
        out = net(images)
        # backprop
        optimizer.zero_grad()
        loss_l, loss_c = criterion(out, targets)
        loss = loss_l + loss_c
        loss.backward()
        optimizer.step()
        t1 = time.time()
        loc_loss += loss_l.item()
        conf_loss += loss_c.item()

        if iteration % 10 == 0:
            #print('timer: %.4f sec.' % (t1 - t0))
            #print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ')
            writer.add_scalars('loss', {
                'class': loss_c.item(),
                'loc': loss_l.item()
            },
                               global_step=iteration)

        # plot the training image with bounding box
        if iteration % 100 == 0:
            img = images[0]
            img = watermark.tv_inv_trans(img)
            bnd_box_gt = torch.Tensor(
                [watermark.to_coord(targets[0][0, :-1],
                                    img.size()[-2:])])
            with torch.no_grad():
                detections = detect(out[0].detach(), softmax(out[1].detach()),
                                    out[2].detach())
                positive_idx = detections[0, :, :, 0] > 0.6
                bnd_box_preds = detections[0][positive_idx][:, 1:]

            bnd_box_pred = [
                torch.Tensor(watermark.to_coord(bnd_box_pred,
                                                img.size()[-2:])).unsqueeze(0)
                for bnd_box_pred in bnd_box_preds
            ]

            if len(bnd_box_pred):
                bnd_box_pred = torch.cat(bnd_box_pred, 0)
                writer.add_image_with_boxes('pred',
                                            img,
                                            bnd_box_pred,
                                            global_step=iteration)
            else:
                writer.add_image('pred', img, global_step=iteration)
            writer.add_image_with_boxes('gt',
                                        img,
                                        bnd_box_gt,
                                        global_step=iteration)

        # plot gradient of every layer
        if iteration % 500 == 0:
            writer.add_figure('grad_flow',
                              vis_grad.plot_grad_flow_v2(
                                  net.named_parameters()),
                              global_step=iteration)

        if args.visdom:
            update_vis_plot(iteration, loss_l.item(), loss_c.item(), iter_plot,
                            epoch_plot, 'append')

        if iteration != 0 and (iteration + 1) % 5000 == 0:
            print('Saving state, iter:', iteration)
            torch.save(
                ssd_net.state_dict(),
                os.path.join(args.save_folder, args.dataset) +
                repr(iteration) + '.pth')
    torch.save(ssd_net.state_dict(),
               args.save_folder + '' + args.dataset + '.pth')
コード例 #15
0
    def __init__(self):
        super().__init__()

        self.backbone = construct_backbone(cfg.backbone)

        if cfg.freeze_bn:
            self.freeze_bn()

        #Fusion FPN
        self.fusion_layers = cfg.fusion_layers
        self.fusion_dim = cfg.fusion_dim

        # Compute mask_dim here and add it back to the config.
        if cfg.mask_type == mask_type.direct:
            cfg.mask_dim = cfg.mask_size**2
        elif cfg.mask_type == mask_type.lincomb:
            if cfg.mask_proto_use_grid:
                self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file))
                self.num_grids = self.grid.size(0)
            else:
                self.num_grids = 0

            self.proto_src = cfg.mask_proto_src

            if self.proto_src is None: in_channels = 3
            elif cfg.fpn is not None: in_channels = cfg.fpn.num_features
            else: in_channels = self.backbone.channels[self.proto_src]
            in_channels += self.num_grids

            # The include_last_relu=false here is because we might want to change it to another function

            if cfg.proto_coordconv:
                in_channels += 2
            elif cfg.fpn_fusion:
                in_channels = self.fusion_dim

            self.proto_net, cfg.mask_dim = make_net(in_channels,
                                                    cfg.mask_proto_net,
                                                    include_last_relu=False)

            if cfg.mask_proto_bias:
                cfg.mask_dim += 1

        self.selected_layers = cfg.backbone.selected_layers
        src_channels = self.backbone.channels
        if cfg.fpn is not None:
            # Some hacky rewiring to accomodate the FPN
            self.fpn = FPN([src_channels[i] for i in self.selected_layers])
            self.selected_layers = list(
                range(len(self.selected_layers) + cfg.fpn.num_downsample))
            src_channels = [cfg.fpn.num_features] * len(self.selected_layers)

        if cfg.fpn_fusion is True:
            self.fusion_module = FusionModule(src_channels[0],
                                              self.fusion_layers,
                                              out_dim=self.fusion_dim)

        if cfg.ins_coordconv or cfg.sem_coordconv or cfg.proto_coordconv:
            self.addcoords = AddCoords()

        self.prediction_layers = nn.ModuleList()

        for idx, layer_idx in enumerate(self.selected_layers):
            # If we're sharing prediction module weights, have every module's parent be the first one
            parent = None
            if cfg.share_prediction_module and idx > 0:
                parent = self.prediction_layers[0]

            pred_in_ch = src_channels[
                layer_idx] + 2 if cfg.ins_coordconv else src_channels[layer_idx]
            pred = PredictionModule(
                pred_in_ch,
                src_channels[layer_idx],
                aspect_ratios=cfg.backbone.pred_aspect_ratios[idx],
                scales=cfg.backbone.pred_scales[idx],
                parent=parent)
            self.prediction_layers.append(pred)

        # Extra parameters for the extra losses
        if cfg.use_class_existence_loss:
            # This comes from the smallest layer selected
            # Also note that cfg.num_classes includes background
            self.class_existence_fc = nn.Linear(src_channels[-1],
                                                cfg.num_classes - 1)

        if cfg.cross_attention_fusion:
            self.CALayer = CAModule(src_channels[0], share_conv=False)

        if cfg.use_semantic_segmentation_loss:
            sem_in_ch = None
            if cfg.sem_src_fusion is True:
                sem_in_ch = self.fusion_dim
            elif cfg.sem_lincomb is True:
                sem_in_ch = src_channels[0]
            else:  # normal semantic segmentation head
                sem_in_ch = src_channels[-1]

            if cfg.sem_coordconv:
                sem_in_ch += 2

            # Panoptic FPN Fusion Version
            if cfg.sem_src_fusion is True:
                self.semantic_seg_conv = nn.Sequential(
                    nn.Conv2d(sem_in_ch,
                              cfg.stuff_num_classes,
                              kernel_size=(1, 1)))

            elif cfg.sem_lincomb is True:
                self.semantic_seg_conv = nn.Sequential(
                    nn.Conv2d(sem_in_ch, 256, kernel_size=3),
                    # nn.BatchNorm2d(256),
                    nn.GroupNorm(32, 256),
                    nn.ReLU(True),
                    nn.Conv2d(256, (cfg.stuff_num_classes) * cfg.mask_dim,
                              kernel_size=1),
                    nn.Tanh())
            else:
                self.semantic_seg_conv = nn.Sequential(
                    nn.Conv2d(sem_in_ch,
                              cfg.stuff_num_classes,
                              kernel_size=(1, 1)))

        # For use in evaluation
        self.detect = Detect(cfg.num_classes,
                             bkg_label=0,
                             top_k=200,
                             conf_thresh=0.05,
                             nms_thresh=0.5)
コード例 #16
0
ファイル: eval.py プロジェクト: unizard/detection.pytorch
def test_net(save_folder,
             net,
             cuda,
             dataset,
             transform,
             top_k,
             im_size=300,
             thresh=0.05):
    num_images = len(dataset)
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(len(labelmap) + 1)]

    from layers import Detect

    num_images = len(dataset)
    parser = Detect(num_classes, 0, 200, 0.1, 0.45)
    softmax = nn.Softmax(dim=-1)

    # timers
    _t = {'im_detect': Timer(), 'misc': Timer()}
    output_dir = get_output_dir('ssd300_120000', set_type)
    det_file = os.path.join(output_dir, 'detections.pkl')

    for i in range(num_images):
        break
        im, gt, h, w = dataset.pull_item(i)

        x = im.unsqueeze(0)
        if args.cuda:
            x = x.cuda()
        _t['im_detect'].tic()
        #         detections = net(x).data
        with torch.no_grad():
            loc_pred, cls_pred, priors = net(x)
        detections = parser(loc_pred, softmax(cls_pred), priors.type(type(x)))

        detect_time = _t['im_detect'].toc(average=False)

        # skip j = 0, because it's the background class
        for j in range(1, detections.size(1)):
            dets = detections[0, j, :]
            mask = dets[:, 0].gt(0.).expand(5, dets.size(0)).t()
            dets = torch.masked_select(dets, mask).view(-1, 5)
            if dets.shape[0] == 0:
                continue
            boxes = dets[:, 1:]
            boxes[:, 0] *= w
            boxes[:, 2] *= w
            boxes[:, 1] *= h
            boxes[:, 3] *= h
            scores = dets[:, 0].cpu().numpy()
            cls_dets = np.hstack(
                (boxes.cpu().numpy(), scores[:,
                                             np.newaxis])).astype(np.float32,
                                                                  copy=False)
            all_boxes[j][i] = cls_dets

            print('im_detect: {:d}/{:d} {:.3f}s'.format(
                i + 1, num_images, detect_time))


#         # import pdb
#         # pdb.set_trace()

#         from data import VOC_CLASSES as labels
#         top_k=10

#         im = cv2.imread(dataset._imgpath % dataset.ids[i])

#         plt.figure(figsize=(10,10))
#         colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()
#         plt.imshow(im)  # plot the image for matplotlib
#         currentAxis = plt.gca()

#         detections = cls_dets.copy()
#         # scale each detection back up to the image
#         scale = torch.Tensor(im.shape[1::-1]).repeat(2)
#         for i in range(detections.size(1)):
#             j = 0
#             while detections[0,i,j,0] >= 0.6:
#                 score = detections[0,i,j,0]
#                 label_name = labels[i-1]
#                 display_txt = '%s: %.2f'%(label_name, score)
#                 pt = (detections[0,i,j,1:]*scale).cpu().numpy()
#                 coords = (pt[0], pt[1]), pt[2]-pt[0]+1, pt[3]-pt[1]+1
#                 color = colors[i]
#                 currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2))
#                 currentAxis.text(pt[0], pt[1], display_txt, bbox={'facecolor':color, 'alpha':0.5})
#                 j+=1

#         print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1,
#                                                     num_images, detect_time))

#with open(det_file, 'wb') as f:
#    pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
    with open(det_file, 'rb') as f:
        all_boxes = pickle.load(f)

    print('Evaluating detections')
    evaluate_detections(all_boxes, output_dir, dataset)
コード例 #17
0
    def __init__(self, phase, nms_thresh=0.3, nms_conf_thresh=0.01):
        super(SSD, self).__init__()
        self.phase = phase
        self.num_classes = 2
        self.cfg = cfg

        resnet = torchvision.models.resnet152(pretrained=True)

        self.layer1 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu,
                                    resnet.maxpool, resnet.layer1)
        self.layer2 = nn.Sequential(resnet.layer2)
        self.layer3 = nn.Sequential(resnet.layer3)
        self.layer4 = nn.Sequential(resnet.layer4)
        self.layer5 = nn.Sequential(*[
            nn.Conv2d(2048, 512, kernel_size=1),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1, stride=2),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True)
        ])
        self.layer6 = nn.Sequential(*[
            nn.Conv2d(
                512,
                128,
                kernel_size=1,
            ),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 256, kernel_size=3, padding=1, stride=2),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True)
        ])

        output_channels = [256, 512, 1024, 2048, 512, 256]

        # FPN
        fpn_in = output_channels

        self.latlayer3 = nn.Conv2d(fpn_in[3],
                                   fpn_in[2],
                                   kernel_size=1,
                                   stride=1,
                                   padding=0)
        self.latlayer2 = nn.Conv2d(fpn_in[2],
                                   fpn_in[1],
                                   kernel_size=1,
                                   stride=1,
                                   padding=0)
        self.latlayer1 = nn.Conv2d(fpn_in[1],
                                   fpn_in[0],
                                   kernel_size=1,
                                   stride=1,
                                   padding=0)

        self.smooth3 = nn.Conv2d(fpn_in[2],
                                 fpn_in[2],
                                 kernel_size=1,
                                 stride=1,
                                 padding=0)
        self.smooth2 = nn.Conv2d(fpn_in[1],
                                 fpn_in[1],
                                 kernel_size=1,
                                 stride=1,
                                 padding=0)
        self.smooth1 = nn.Conv2d(fpn_in[0],
                                 fpn_in[0],
                                 kernel_size=1,
                                 stride=1,
                                 padding=0)

        # FEM
        cpm_in = output_channels

        self.cpm3_3 = FEM(cpm_in[0])
        self.cpm4_3 = FEM(cpm_in[1])
        self.cpm5_3 = FEM(cpm_in[2])
        self.cpm7 = FEM(cpm_in[3])
        self.cpm6_2 = FEM(cpm_in[4])
        self.cpm7_2 = FEM(cpm_in[5])

        # head
        head = pa_multibox(output_channels)
        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])

        self.softmax = nn.Softmax(dim=-1)

        if self.phase != 'onnx_export':
            self.detect = Detect(self.num_classes, 0, cfg['num_thresh'],
                                 nms_conf_thresh, nms_thresh, cfg['variance'])
            self.last_image_size = None
            self.last_feature_maps = None

        if self.phase == 'test':
            self.test_transform = TestBaseTransform((104, 117, 123))
コード例 #18
0
ファイル: yolact.py プロジェクト: sutefun/zaitaku-benkyou
    def __init__(self):
        super().__init__()

        # yolac++ cfg.backbone =
        # 'backbone': resnet101_dcn_inter3_backbone.copy({
        #     'selected_layers': list(range(1, 4)),
        #
        #     'pred_aspect_ratios': [[[1, 1 / 2, 2]]] * 5,
        #     'pred_scales': [[i * 2 ** (j / 3.0) for j in range(3)] for i in [24, 48, 96, 192, 384]],
        #     'use_pixel_scales': True,
        #     'preapply_sqrt': False,
        #     'use_square_anchors': False,
        # })
        self.backbone = construct_backbone(cfg.backbone)

        if cfg.freeze_bn:
            self.freeze_bn()

        # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early!
        if cfg.mask_type == mask_type.direct:
                            # 16^2 = 256 ??
            cfg.mask_dim = cfg.mask_size**2

        elif cfg.mask_type == mask_type.lincomb:

            # mask_proto_use_grid ALWAYS false ??
            if cfg.mask_proto_use_grid:
                self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file))
                self.num_grids = self.grid.size(0)
            else:
                self.num_grids = 0

            # yolact use 0
            self.proto_src = cfg.mask_proto_src
            
            if self.proto_src is None: in_channels = 3
            elif cfg.fpn is not None: in_channels = cfg.fpn.num_features
            else: in_channels = self.backbone.channels[self.proto_src]

            in_channels += self.num_grids

            # The include_last_relu=false here is because we might want to change it to another function
            # yolact ++ proto net
            #     'mask_proto_net': [(256, 3, {'padding': 1})] * 3
            #     + [(None, -2, {}), (256, 3, {'padding': 1})]
            #     + [(32, 1, {})],
            self.proto_net, cfg.mask_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False)

            if cfg.mask_proto_bias:
                cfg.mask_dim += 1

        ## end of mask type if else ______________________________________________]

        self.selected_layers = cfg.backbone.selected_layers
        src_channels = self.backbone.channels

        if cfg.use_maskiou:
            self.maskiou_net = FastMaskIoUNet()

        if cfg.fpn is not None:
            # Some hacky rewiring to accomodate the FPN
            self.fpn = FPN(
                # yolact++ 101 selected layers = 1,2,3
                # 2nd  128x4
                # 3rd  256x4
                # 4th  512x4
                [src_channels[i] for i in self.selected_layers]
            )

            self.selected_layers = list( # selected_layers = 0,1,2,3,4
                range(
                    # yolact++
                    # 1 , 2 , 3                               2
                    len(self.selected_layers) + cfg.fpn.num_downsample)
            )

                            # num features = 256  x  5
            src_channels = [cfg.fpn.num_features] * len(self.selected_layers)


        self.prediction_layers = nn.ModuleList()
        cfg.num_heads = len(self.selected_layers) # --> 5 num_heads ??

        # sooo... is this making 5 prediction modules ????
        for idx, layer_idx in enumerate(self.selected_layers):
            # If we're sharing prediction module weights, have every module's parent be the first one
            parent = None

            # yolact++ share_prediction_module always True
            if cfg.share_prediction_module and idx > 0:
                parent = self.prediction_layers[0]

            pred = PredictionModule(
                                    # in_channels=
                                    src_channels[layer_idx],
                                    # out_channels=
                                    src_channels[layer_idx],
                                    # 'pred_scales': [[1]] * 6
                                    #  'pred_aspect_ratios': [[[0.66685089, 1.7073535, 0.87508774, 1.16524493,
                                    #                            0.49059086]]] * 6
                                    aspect_ratios = cfg.backbone.pred_aspect_ratios[idx],
                                    scales        = cfg.backbone.pred_scales[idx],
                                    parent        = parent,
                                    index         = idx)
            self.prediction_layers.append(pred)

        # Extra parameters for the extra losses
        # always False ??
        if cfg.use_class_existence_loss:
            # This comes from the smallest layer selected
            # Also note that cfg.num_classes includes background
            self.class_existence_fc = nn.Linear(src_channels[-1], cfg.num_classes - 1)

        # yolact always True ??
        if cfg.use_semantic_segmentation_loss:
            self.semantic_seg_conv = nn.Conv2d(src_channels[0], cfg.num_classes-1, kernel_size=1)

        # For use in evaluation
        self.detect = Detect(cfg.num_classes,
                             bkg_label=0,
                             top_k=cfg.nms_top_k,     #'nms_top_k': 200,
                             conf_thresh=cfg.nms_conf_thresh,     #'nms_conf_thresh': 0.05
                             nms_thresh=cfg.nms_thresh      #'nms_thresh': 0.5
                             )
コード例 #19
0
    def __init__(self):
        super().__init__()

        self.backbone = construct_backbone(
            cfg.backbone)  #resnet101_dcn_inter3_backbone

        if cfg.freeze_bn:
            self.freeze_bn()

        # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early!
        if cfg.mask_type == mask_type.direct:
            cfg.mask_dim = cfg.mask_size**2

        elif cfg.mask_type == mask_type.lincomb:
            if cfg.mask_proto_use_grid:  #False
                self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file))
                self.num_grids = self.grid.size(0)
            else:
                self.num_grids = 0

            #cw yolact_plus default:0
            self.proto_src = cfg.mask_proto_src

            if self.proto_src is None: in_channels = 3  #cw  0 != None
            elif cfg.fpn is not None:
                in_channels = cfg.fpn.num_features  #cw fpn.num_features -- default:'num_features': 256,
            else:
                in_channels = self.backbone.channels[self.proto_src]
            in_channels += self.num_grids  #cw (256 + 0)

            #TODO#Fig. 3 PART
            # The include_last_relu=false here is because we might want to change it to another function
            # 'mask_proto_net': [(256, 3, {'padding': 1})] * 3 + [(None, -2, {}), (256, 3, {'padding': 1})] + [(32, 1, {})],
            self.proto_net, cfg.mask_dim = make_net(in_channels,
                                                    cfg.mask_proto_net,
                                                    include_last_relu=False)
            #256        , 6개의 conv및 bilinear
            #cw make_net에 넘기는 cfg.mask_proto_net을 in_channels이 통과하였을 때 마지막 output의 채널을 두번째 인자로 반환하므로.
            #   final in_channels이 cfg.mask_dim이 된다고 보면 되시겠다.

            if cfg.mask_proto_bias:  #False
                cfg.mask_dim += 1
            #   cfg.mask_dim = 32

        self.selected_layers = cfg.backbone.selected_layers  #cw yp -- [1, 2, 3]
        src_channels = self.backbone.channels  #src_channels = [256, 512, 1024, 2048]

        #True #TODO#
        if cfg.use_maskiou:
            self.maskiou_net = FastMaskIoUNet()

        # 'fpn': fpn_base.copy({
        #     'use_conv_downsample': True,
        #     'num_downsample': 2,
        # }),

        #TODO#
        if cfg.fpn is not None:
            # Some hacky rewiring to accomodate the FPN
            self.fpn = FPN([src_channels[i] for i in self.selected_layers
                            ])  #[512, 1024, 2048] 넘김.
            self.selected_layers = list(
                range(len(self.selected_layers) +
                      cfg.fpn.num_downsample))  #cw range(3 + 2)
            src_channels = [cfg.fpn.num_features] * len(self.selected_layers)

        # src_channels = [256, 256, 256, 256, 256]
        # selected_layers : [0, 1, 2, 3, 4]

        self.prediction_layers = nn.ModuleList()
        cfg.num_heads = len(self.selected_layers)  #5 #Prediction Module에서 쓰임.

        for idx, layer_idx in enumerate(self.selected_layers):
            # If we're sharing prediction module weights, have every module's parent be the first one
            parent = None
            #True
            if cfg.share_prediction_module and idx > 0:
                parent = self.prediction_layers[0]
            #cw src_channels는 본래 resnet의 layer_idx의 채널수를 가지고 있음.
            #   즉, selected layer에서는 bbox를 prediction하는 것.
            #   call하여 얻은 pred는 prediction_layers에 추가. (selected_layers 수만큼 생성)
            pred = PredictionModule(
                src_channels[layer_idx],
                src_channels[layer_idx],
                aspect_ratios=cfg.backbone.pred_aspect_ratios[idx],
                scales=cfg.backbone.pred_scales[idx],
                parent=parent,
                index=idx)
            self.prediction_layers.append(pred)

        #False
        # Extra parameters for the extra losses
        if cfg.use_class_existence_loss:
            # This comes from the smallest layer selected
            # Also note that cfg.num_classes includes background
            self.class_existence_fc = nn.Linear(src_channels[-1],
                                                cfg.num_classes - 1)
        #True
        if cfg.use_semantic_segmentation_loss:
            self.semantic_seg_conv = nn.Conv2d(src_channels[0],
                                               cfg.num_classes - 1,
                                               kernel_size=1)

        # For use in evaluation
        self.detect = Detect(cfg.num_classes,
                             bkg_label=0,
                             top_k=cfg.nms_top_k,
                             conf_thresh=cfg.nms_conf_thresh,
                             nms_thresh=cfg.nms_thresh)
コード例 #20
0
    def __init__(self):
        super(Yolact, self).__init__()
        ####################################################
        #                   for mainly net                 #
        ####################################################
        self.backbone = resnet18(pretrained=True)

        self.fpn1 = nn.Sequential(
            nn.Conv2d(512, 256, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, 1, 1, 0),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
        )
        self.fpn2 = nn.Sequential(
            nn.Conv2d(1024, 256, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, 1, 1, 0),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
        )
        self.fpn3 = nn.Sequential(
            nn.Conv2d(2048, 256, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, 1, 1, 0),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
        )
        self.conv_b = nn.Sequential(
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
        )
        self.conv_c = nn.Sequential(
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
        )
        self.conv_m = nn.Sequential(
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
        )

        self.downsample_layers1 = nn.Sequential(
            nn.Conv2d(256, 256, 3, 2, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
        )
        self.downsample_layers2 = nn.Sequential(
            nn.Conv2d(256, 256, 3, 2, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
        )

        self.bbox_layer = nn.Conv2d(256, 12, 3, 1, 1)
        self.conf_layer = nn.Conv2d(256, 243, 3, 1, 1)
        self.mask_layer = nn.Conv2d(256, 96, 3, 1, 1)

        self.semantic_set_conv = nn.Conv2d(256, 80, 1, 1)
        ##################################################
        #                for proto net                   #
        ##################################################

        self.proto_net1 = nn.Sequential(
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
        )

        self.proto_net2 = nn.Sequential(nn.Conv2d(256, 256, 3, 1, 1),
                                        nn.BatchNorm2d(256),
                                        nn.ReLU(inplace=True),
                                        nn.Conv2d(256, 32, 1, 1))
        #########################################################
        # forward process                                       #
        #########################################################
        self.detect = Detect(cfg.num_classes,
                             bkg_label=0,
                             top_k=200,
                             conf_thresh=0.05,
                             nms_thresh=0.5)
コード例 #21
0
ファイル: yolact.py プロジェクト: michfelip/yolact_yx
    def __init__(self):
        super().__init__()

        self.backbone = construct_backbone(cfg.backbone)

        if cfg.freeze_bn:
            self.freeze_bn()

        # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early!
        if cfg.mask_type == mask_type.direct:
            cfg.mask_dim = cfg.mask_size**2
        elif cfg.mask_type == mask_type.lincomb:
            if cfg.mask_proto_use_grid:
                self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file))
                self.num_grids = self.grid.size(0)
            else:
                self.num_grids = 0

            self.proto_src = cfg.mask_proto_src

            if self.proto_src is None: in_channels = 3
            elif cfg.fpn is not None: in_channels = cfg.fpn.num_features
            else: in_channels = self.backbone.channels[self.proto_src]
            in_channels += self.num_grids

            # The include_last_relu=false here is because we might want to change it to another function
            self.proto_net, cfg.mask_dim = make_net(in_channels,
                                                    cfg.mask_proto_net,
                                                    include_last_relu=False)

            if cfg.mask_proto_bias:
                cfg.mask_dim += 1

        self.selected_layers = cfg.backbone.selected_layers
        src_channels = self.backbone.channels

        if cfg.fpn is not None:
            # Some hacky rewiring to accomodate the FPN
            self.fpn = FPN([src_channels[i] for i in self.selected_layers])
            self.selected_layers = list(
                range(len(self.selected_layers) + cfg.fpn.num_downsample))
            src_channels = [cfg.fpn.num_features] * len(self.selected_layers)

        self.prediction_layers = nn.ModuleList()

        for idx, layer_idx in enumerate(self.selected_layers):
            # If we're sharing prediction module weights, have every module's parent be the first one
            parent = None
            if cfg.share_prediction_module and idx > 0:
                parent = self.prediction_layers[0]

            pred = PredictionModule(
                src_channels[layer_idx],
                src_channels[layer_idx],
                aspect_ratios=cfg.backbone.pred_aspect_ratios[idx],
                scales=cfg.backbone.pred_scales[idx],
                parent=parent)
            self.prediction_layers.append(pred)

        # Extra parameters for the extra losses
        if cfg.use_class_existence_loss:
            # This comes from the smallest layer selected
            # Also note that cfg.num_classes includes background
            self.class_existence_fc = nn.Linear(src_channels[-1],
                                                cfg.num_classes - 1)

        if cfg.use_semantic_segmentation_loss:
            self.semantic_seg_conv = nn.Conv2d(src_channels[0],
                                               cfg.num_classes - 1,
                                               kernel_size=1)

        # For use in evaluation
        self.detect = Detect(cfg.num_classes,
                             bkg_label=0,
                             top_k=200,
                             conf_thresh=0.05,
                             nms_thresh=0.5)
コード例 #22
0
    def __init__(self):
        #super:;call the based-class' init func
        super().__init__()
        print('net initial...\n')

        self.backbone = construct_backbone(cfg.backbone)

        if cfg.freeze_bn:
            self.freeze_bn()


        ##get:: self.proto_net, cfg.mask_dim
        # Compute mask_dim here and add it back to the config. Make sure Yolact's constructor is called early!
        if cfg.mask_type == mask_type.direct:
            cfg.mask_dim = cfg.mask_size**2
        elif cfg.mask_type == mask_type.lincomb:
            if cfg.mask_proto_use_grid:
            #cfg.mask_proto_grid_file : data/grid.npy , npy is a numpy data file
                self.grid = torch.Tensor(np.load(cfg.mask_proto_grid_file))
                self.num_grids = self.grid.size(0)
            else:
                self.num_grids = 0

            #0
            self.proto_src = cfg.mask_proto_src

            if self.proto_src is None: in_channels = 3
            #cfg.fpn is obj
            elif cfg.fpn is not None: in_channels = cfg.fpn.num_features
            else: in_channels = self.backbone.channels[self.proto_src]
            in_channels += self.num_grids

            # The include_last_relu=false here is because we might want to change it to another function
            self.proto_net, cfg.mask_dim = make_net(in_channels, cfg.mask_proto_net, include_last_relu=False)

            if cfg.mask_proto_bias:
                cfg.mask_dim += 1

        #        self.proto_net
        #        Sequential(
        #          (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        #          (1): ReLU(inplace)
        #          (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        #          (3): ReLU(inplace)
        #          (4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        #          (5): ReLU(inplace)
        #          (6): InterpolateModule()
        #          (7): ReLU(inplace)
        #          (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        #          (9): ReLU(inplace)
        #          (10): Conv2d(256, 32, kernel_size=(1, 1), stride=(1, 1))
        #)
        #
        #        self.fpn
        #        FPN(
        #          (lat_layers): _ConstModuleList(
        #            (0): WeakScriptModuleProxy()
        #            (1): WeakScriptModuleProxy()
        #            (2): WeakScriptModuleProxy()
        #          )
        #          (pred_layers): _ConstModuleList(
        #            (0): WeakScriptModuleProxy()
        #            (1): WeakScriptModuleProxy()
        #            (2): WeakScriptModuleProxy()
        #          )
        #          (downsample_layers): _ConstModuleList(
        #            (0): WeakScriptModuleProxy()
        #            (1): WeakScriptModuleProxy()
        #          )
        #        )

        self.selected_layers = cfg.backbone.selected_layers
        src_channels = self.backbone.channels

        if cfg.fpn is not None:
            # Some hacky rewiring to accomodate the FPN
            self.fpn = FPN([src_channels[i] for i in self.selected_layers])
            self.selected_layers = list(range(len(self.selected_layers) + cfg.fpn.num_downsample))
            src_channels = [cfg.fpn.num_features] * len(self.selected_layers)

        self.prediction_layers = nn.ModuleList()

        for idx, layer_idx in enumerate(self.selected_layers):
            # If we're sharing prediction module weights, have every module's parent be the first one
            parent = None
            if cfg.share_prediction_module and idx > 0:
                parent = self.prediction_layers[0]

            pred = PredictionModule(src_channels[layer_idx], 
                                src_channels[layer_idx],
                                aspect_ratios = cfg.backbone.pred_aspect_ratios[idx],
                                scales        = cfg.backbone.pred_scales[idx],
                                parent        = parent)
                                
            self.prediction_layers.append(pred)

        #False
        # Extra parameters for the extra losses
        if cfg.use_class_existence_loss:
            # This comes from the smallest layer selected
            # Also note that cfg.num_classes includes background
            self.class_existence_fc = nn.Linear(src_channels[-1], cfg.num_classes - 1)
        
        if cfg.use_semantic_segmentation_loss:
            self.semantic_seg_conv = nn.Conv2d(src_channels[0], cfg.num_classes-1, kernel_size=1)

        # For use in evaluation
        self.detect = Detect(cfg.num_classes, bkg_label=0, top_k=200, conf_thresh=0.2, nms_thresh=0.5)
        self.tmp = 1