Ejemplo n.º 1
0
 def __init__(
     self,
     in_channels=512,
     mid_channels=512,
     ratios=[0.5, 1, 2],
     anchor_scales=[8, 16, 32],
     feat_stride=16,
     mode="training",
 ):
     super(RegionProposalNetwork, self).__init__()
     self.anchor_base = generate_anchor_base(anchor_scales=anchor_scales,
                                             ratios=ratios)
     # 步长,压缩的倍数
     self.feat_stride = feat_stride
     self.proposal_layer = ProposalCreator(mode)
     # 每一个网格上默认先验框的数量
     n_anchor = self.anchor_base.shape[0]
     # 先进行一个3x3的卷积
     self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1)
     # 分类预测先验框内部是否包含物体
     self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0)
     # 回归预测对先验框进行调整
     self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0)
     normal_init(self.conv1, 0, 0.01)
     normal_init(self.score, 0, 0.01)
     normal_init(self.loc, 0, 0.01)
    def __init__(
            self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2],
            anchor_scales=[8, 16, 32], feat_stride=16,
            mode = "training",
    ):
        super(RegionProposalNetwork, self).__init__()
        self.feat_stride = feat_stride
        self.proposal_layer = ProposalCreator(mode)
        #-----------------------------------------#
        #   生成基础先验框,shape为[9, 4]
        #-----------------------------------------#
        self.anchor_base = generate_anchor_base(anchor_scales=anchor_scales, ratios=ratios)
        n_anchor = self.anchor_base.shape[0]

        #-----------------------------------------#
        #   先进行一个3x3的卷积,可理解为特征整合
        #-----------------------------------------#
        self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1)
        #-----------------------------------------#
        #   分类预测先验框内部是否包含物体
        #-----------------------------------------#
        self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0)
        #-----------------------------------------#
        #   回归预测对先验框进行调整
        #-----------------------------------------#
        self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0)

        #--------------------------------------#
        #   对FPN的网络部分进行权值初始化
        #--------------------------------------#
        normal_init(self.conv1, 0, 0.01)
        normal_init(self.score, 0, 0.01)
        normal_init(self.loc, 0, 0.01)
Ejemplo n.º 3
0
 def __init__(self,
              in_channels=512,
              mid_channels=512,
              ratios=[0.5, 1, 2],
              scales=[0.5, 1, 2],
              feat_stride=16):
     super(RegionProposalNetwork, self).__init__()
     # prepare anchor base
     self.anchor_base = generate_anchor_base(side_length=16,
                                             ratios=ratios,
                                             scales=scales,
                                             strides=feat_stride)
     self.feat_stride = feat_stride
     # network params
     n_anchor = self.anchor_base.shape[0]
     self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1)
     self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0)
     self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0)
     normal_init(self.conv1, 0, 0.01)
     normal_init(self.score, 0, 0.01)
     normal_init(self.loc, 0, 0.01)
Ejemplo n.º 4
0
    def __init__(self,
                 in_channels=256,
                 mid_channels=256,
                 ratios=[0.5, 1., 2.],
                 anchor_scales=[8, 16, 32],
                 feat_stride=16,
                 mode="training"):
        super(RPN, self).__init__()
        self.base_anchor = generate_anchor_base(ratios=ratios,
                                                anchor_scales=anchor_scales)
        self.K = self.base_anchor.shape[
            0]  # number of anchors in a anchor base
        self.feat_stride = feat_stride

        self.proposal_layer = region_proposal(mode)

        # layers after conv5
        self.conv1 = nn.Conv2d(in_channels=in_channels,
                               out_channels=mid_channels,
                               kernel_size=3,
                               stride=1,
                               padding=1)
        self.cls = nn.Conv2d(in_channels=mid_channels,
                             out_channels=self.K * 2,
                             kernel_size=1,
                             stride=1,
                             padding=0)
        self.reg = nn.Conv2d(in_channels=mid_channels,
                             out_channels=self.K * 4,
                             kernel_size=1,
                             stride=1,
                             padding=0)

        # channels inistalization
        normal_init(self.conv1, 0, 0.01)
        normal_init(self.cls, 0, 0.01)
        normal_init(self.reg, 0, 0.01)
Ejemplo n.º 5
0
    batch_size = 1
    start_epoch, max_epoch = 0, 30
    data_root = cfg_net['data_path']
    train_file = os.path.join(cfg_net['train_data_path'], "VOC2007_train.txt")
    train_transform = Compose([ToTensor(), RandomHorizontalFlip(0.5)])

    # step 1: data
    train_set = datasets.VOC2007Dataset(data_root, vocfile=train_file, transforms=train_transform)

    train_loader = DataLoader(train_set, batch_size=batch_size, collate_fn=collate_fn)

    base_size = 16
    ratios = [0.5, 1, 2]
    anchor_scales = [8, 16, 32]
    anchors_gen = Anchors.AnchorTargetCreator()
    anchor_base = Anchors.generate_anchor_base(base_size=base_size, anchor_scales=anchor_scales, ratios=ratios)


    net = mobilenet_v2(num_classes=2, width_mult=0.35, inverted_residual_setting=None, round_nearest=8).to(device)
    rpn_net = RPN(1280, 512, 9).to(device)

    # inputs = torch.zeros((2, 3, 128, 128))
    # cls = net(inputs)
    # feature_map = net.features(inputs)
    # rpn_class, rpn_prob, rpn_bbox = rpn_net(feature_map)

    classify_loss = rpn_loss.ClassifyLoss()

    for data in train_loader:
        images, targets = data
        images_size = list(images.shape[2:])