Esempio n. 1
0
    def __init__(self, n_class, roi_size, spatial_scale, M, classifier):
        # n_class includes the background
        super(VGG16RoIHead, self).__init__()

        self.n_class = n_class
        self.roi_size = roi_size
        self.spatial_scale = spatial_scale
        self.M = M

        # branch_1
        self.roi_1 = RoIPooling2D(
            self.roi_size, self.roi_size,
            self.spatial_scale)  # roi shape of (N, C, outh, outw)
        self.classifier = classifier
        self.score = nn.Linear(4096, n_class)

        # branch_2
        self.roi_2 = RoIPooling2D(
            self.roi_size * 2, self.roi_size * 2,
            self.spatial_scale)  # roi shape of (N, C, outh*2, outw*2)
        self.conv_21 = nn.Conv2d(512, 512, (3, 3), padding=1)
        self.conv_22 = nn.Conv2d(512, 512, (3, 3),
                                 padding=1)  # output shape (1, 512, 14, 14)
        self.max_x = nn.MaxPool2d((14, 1))  # output shape (1, 512, 1, 14)
        self.max_y = nn.MaxPool2d((1, 14))  # output shape (1, 512, 14, 1)
        self.fc_x = nn.Linear(7168, M)
        self.fc_y = nn.Linear(7168, M)

        normal_init(self.score, 0, 0.01)
        normal_init(self.conv_21, 0, 0.01)
        normal_init(self.conv_22, 0, 0.01)
        normal_init(self.fc_x, 0, 0.01)
        normal_init(self.fc_y, 0, 0.01)
    def __init__(self, n_class, roi_size, spatial_scale, classifier):
        # n_class包含了背景在内
        super(VGG16RoIHead, self).__init__()

        self.n_class = n_class
        self.roi_size = roi_size
        self.spatial_scale = spatial_scale
        self.roi = RoIPooling2D(roi_size, roi_size, self.spatial_scale)
        self.classifier = classifier
        self.cls_loc = nn.Linear(4096, n_class * 4)
        self.score = nn.Linear(4096, n_class)
        normal_init(self.cls_loc, 0, 0.001)
        normal_init(self.score, 0, 0.01)
Esempio n. 3
0
    def __init__(self, n_class, roi_size, spatial_scale,
                 classifier):
        super(VGG16RoIHead, self).__init__()

        self.classifier = classifier
        self.cls_loc = nn.Linear(4096, n_class * 4)  # class-wise localization
        self.score = nn.Linear(4096, n_class)

        normal_init(self.cls_loc, 0, 0.001)
        normal_init(self.score, 0, 0.01)

        self.roi_size = roi_size
        self.spatial_scale = spatial_scale
        self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale)
 def __init__(self,n_class,roi_size,spatial_scale,classifier):
     super(VGG16RoIHead,self).__init__()
     
     self.classifier = classifier
     #TODO:为什么没有把背景的类别去掉
     self.cls_loc = nn.Linear(4096, n_class*4)
     self.score = nn.Linear(4096,n_class)
     normal_init(self.cls_loc,0,0.001)
     normal_init(self.score,0, 0.01)
     
     self.n_class = n_class
     self.roi_size = roi_size
     self.spatial_scale = spatial_scale
     #构建了一个roi生成网络
     self.roi = RoIPooling2D(self.n_class, self.roi_size, self.spatial_scale)
Esempio n. 5
0
    def __init__(self, n_class, roi_size, spatial_scale, classifier):
        # n_class includes the background
        super(Head, self).__init__()

        self.classifier = classifier
        self.cls_loc = nn.Linear(4096, n_class * 4)
        self.score = nn.Linear(4096, n_class)

        normalizer(self.cls_loc, 0, 0.001)
        normalizer(self.score, 0, 0.01)

        self.n_class = n_class
        self.roi_size = roi_size
        self.spatial_scale = spatial_scale
        self.roi = RoIPooling2D(self.roi_size, self.roi_size,
                                self.spatial_scale)
Esempio n. 6
0
    def __init__(self, n_class, roi_size, spatial_scale,
                 classifier):
        # n_class includes the background
        super(VGG16RoIHead, self).__init__()

        self.classifier = classifier     # 非输出全连接层的最后一层
        self.cls_loc = nn.Linear(4096, n_class * 4)     # 输出每个roi的所有类的偏移量(神经网络层)
        self.score = nn.Linear(4096, n_class)           # 输出每个roi的各个类的得分(神经网络层)

        normal_init(self.cls_loc, 0, 0.001)             # 对两个输出的神经网络层进行权重初始化
        normal_init(self.score, 0, 0.01)

        self.n_class = n_class
        self.roi_size = roi_size                        # roi在pooling后的尺寸
        self.spatial_scale = spatial_scale
        self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale)
    def __init__(self, n_class, roi_size, spatial_scale, classifier):
        # n_class includes the background
        super(VGG16RoIHead, self).__init__()

        self.classifier = classifier  # vgg16最后两个全连接层
        self.cls_loc = nn.Linear(4096, n_class * 4)
        self.score = nn.Linear(4096, n_class)

        normal_init(self.cls_loc, 0, 0.001)
        normal_init(self.score, 0, 0.01)  # 全连接层权重初始化

        self.n_class = n_class  # 21
        self.roi_size = roi_size  # 7
        self.spatial_scale = spatial_scale  # 1/16
        self.roi = RoIPooling2D(self.roi_size, self.roi_size,
                                self.spatial_scale)
    def __init__(self, n_class, roi_size, spatial_scale, classifier):
        # n_class includes the background
        super(VGG16RoIHead, self).__init__()

        self.classifier = classifier  #vgg16的全连接层
        self.cls_loc = nn.Linear(4096, n_class * 4)  #最后一层的回归层
        self.score = nn.Linear(4096, n_class)  #最后一层的分类层

        normal_init(self.cls_loc, 0, 0.001)  #初始化回归层参数
        normal_init(self.score, 0, 0.01)  #初始化分类层参数

        self.n_class = n_class  #类别数量
        self.roi_size = roi_size  #roi pooling切分格子
        self.spatial_scale = spatial_scale
        self.roi = RoIPooling2D(self.roi_size, self.roi_size,
                                self.spatial_scale)  #roi pooling网络
    def forward(self, features_maps, rois, roi_indices):
        roi_indices = array_tool.totensor(roi_indices).float()
        rois = array_tool.totensor(rois).float()
        roi_level = self._PyramidRoI_Feat(rois)
        indices_and_rois = torch.cat([roi_indices[:, None], rois], dim=1)
        xy_indices_and_rois = indices_and_rois[:, [0, 2, 1, 4, 3]]  # yx->xy
        indices_and_rois = xy_indices_and_rois.contiguous(
        )  # 把tensor变成在内存中连续分布的形式

        roi_pool_feats = []
        roi_to_levels = []

        for i, l in enumerate(range(2, 6)):
            if (roi_level == l).sum() == 0:
                continue
            idx_l = (roi_level == l).nonzero()
            roi_to_levels.append(idx_l)
            #if idx_l.shape[0] == 0:
            #   keep_indices_and_rois = indices_and_rois[idx_l.data]
            #else:
            keep_indices_and_rois = indices_and_rois[idx_l]
            keep_indices_and_rois = keep_indices_and_rois.view(-1, 5)
            roi_pooling = RoIPooling2D(self.roi_size, self.roi_size,
                                       self.spatial_scale[i])
            pool = roi_pooling(features_maps[i],
                               keep_indices_and_rois)  #通过roi_pooling
            roi_pool_feats.append(pool)
        roi_pool_feats = torch.cat(roi_pool_feats, 0)
        roi_to_levels = torch.cat(roi_to_levels, 0)
        roi_to_levels = roi_to_levels.squeeze()
        idx_sorted, order = torch.sort(roi_to_levels)
        roi_pool_feats = roi_pool_feats[order]

        pool = roi_pool_feats.view(roi_pool_feats.size(0),
                                   -1)  # batch_size, CHW拉直

        fc6_out = functional.relu(self.fc6(pool))
        fc7_out = functional.relu(self.fc7(fc6_out))
        roi_cls_locs = self.cls_loc(fc7_out)  # (1000->84)每一类坐标回归
        roi_scores = self.score(fc7_out)  # (1000->21) 每一类类别预测
        #all_roi_cls_locs.append(roi_cls_locs)
        #all_roi_scores.append(roi_scores)

        #all_roi_cls_locs = torch.cat(all_roi_cls_locs, 0)
        #all_roi_scores = torch.cat(all_roi_scores, 0)

        return roi_cls_locs, roi_scores
    def __init__(self, n_class, roi_size, spatial_scale, classifier):
        # n_class includes the background
        super(RESNET101RoIHead, self).__init__()

        self.classifier = classifier
        self.cls_loc = nn.Linear(2048, n_class * 4)
        self.score = nn.Linear(2048, n_class)

        normal_init(self.cls_loc, 0, 0.001)
        normal_init(self.score, 0, 0.01)

        self.n_class = n_class
        self.roi_size = roi_size
        self.spatial_scale = spatial_scale
        self.roi = RoIPooling2D(self.roi_size, self.roi_size,
                                self.spatial_scale)
        self.avgpool = nn.AvgPool2d(7, stride=1)
    def __init__(self, n_class, roi_size, spatial_scale,
                 classifier):
        # n_class includes the background
        super(fixed_VGG16RoIHead, self).__init__()

        self.classifier = classifier
        self.cls_loc = nn.Linear(4096, n_class * 4)
        self.score = nn.Linear(4096, n_class)

        self.act_quant = qt.activation_quantization(8, qt.Quant.linear)

        normal_init(self.cls_loc, 0, 0.001)
        normal_init(self.score, 0, 0.01)

        self.n_class = n_class
        self.roi_size = roi_size
        self.spatial_scale = spatial_scale
        self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale)
    def __init__(self, n_class, roi_size, spatial_scale, classifier):
        # n_class includes the background
        super(VGG16RoIHead, self).__init__()

        self.classifier = classifier
        # 这里为什么要对每个类都进行loc的回归?
        self.cls_loc = nn.Linear(4096, n_class * 4)
        self.score = nn.Linear(4096, n_class)

        normal_init(self.cls_loc, 0, 0.001)
        normal_init(self.score, 0, 0.01)

        self.n_class = n_class
        self.roi_size = roi_size
        self.spatial_scale = spatial_scale
        # 这里比较特殊的就是这个ROIPooling层,原理和SSPNet类似,可以把不同大小的图片pooling成大小相同的矩阵
        self.roi = RoIPooling2D(self.roi_size, self.roi_size,
                                self.spatial_scale)
Esempio n. 13
0
    def __init__(self,head,hidden_size,n_class):
        super(VGG16RoIExtractorHead, self).__init__()
        classifier = list(head.classifier)
        extractor = nn.Linear(4096, hidden_size)
        normal_init(extractor, 0, 0.01)
        classifier.append(extractor)
        classifier.append(nn.ReLU(inplace=True))
        self.classifier = nn.Sequential(*classifier)
        self.cls_loc = nn.Linear(hidden_size, n_class * 4)
        self.score = nn.Linear(hidden_size, n_class)

        normal_init(self.cls_loc, 0, 0.001)
        normal_init(self.score, 0, 0.01)

        self.n_class = n_class
        self.roi_size = head.roi_size
        self.spatial_scale = head.spatial_scale
        self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale)
Esempio n. 14
0
 def __init__(self, n_class, roi_size, spatial_scale, classifier):
     # n_class includes the background
     super(VGG16RoIHead, self).__init__()
     # vgg16中的最后两个全连接层
     self.classifier = classifier
     self.cls_loc = nn.Linear(4096, n_class * 4)
     self.score = nn.Linear(4096, n_class)
     # 全连接层权重初始化
     normal_init(self.cls_loc, 0, 0.001)
     normal_init(self.score, 0, 0.01)
     # 加上背景21类
     self.n_class = n_class
     # 7x7
     self.roi_size = roi_size
     # 1/16
     self.spatial_scale = spatial_scale
     # 将大小不同的roi变成大小一致,得到pooling后的特征,
     # 大小为[300, 512, 7, 7]。利用Cupy实现在线编译的
     self.roi = RoIPooling2D(self.roi_size, self.roi_size,
                             self.spatial_scale)
Esempio n. 15
0
    def __init__(self, n_class, roi_size, spatial_scale,
                 classifier,hidden_size):
        # n_class includes the background
        super(VGG16RoIHead, self).__init__()

        classifier = list(classifier)
        extractor = nn.Linear(4096, hidden_size)
        normal_init(extractor, 0, 0.01)
        classifier.append(extractor)
        classifier.append(nn.ReLU(inplace=True))
        self.classifier = nn.Sequential(*classifier)
        self.cls_loc = nn.Linear(hidden_size, n_class * 4)
        self.score = nn.Linear(hidden_size, n_class)

        normal_init(self.cls_loc, 0, 0.001)
        normal_init(self.score, 0, 0.01)

        self.n_class = n_class
        self.roi_size = roi_size
        self.spatial_scale = spatial_scale
        self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale)
Esempio n. 16
0
    def __init__(self, n_class, roi_size, spatial_scale, classifier):
        # n_class includes the background
        super(VGG16RoIHead, self).__init__()

        self.classifier = classifier.cuda()
        #         self.cls_loc = nn.Linear(4096, n_class * 4).to(t.device("cuda:0"))
        #         self.score = nn.Linear(4096, n_class).to(t.device("cuda:0"))
        self.cls_loc = nn.Linear(4096, n_class * 4).cuda()
        self.score = nn.Linear(4096, n_class).cuda()
        # TODO:
        # Modified the depth estimation subnet into a more suitable structure
        self.depth = nn.Linear(4096, 1).cuda()
        self.y_rot = nn.Linear(4096, 1).cuda()

        normal_init(self.cls_loc, 0, 0.001)
        normal_init(self.score, 0, 0.01)

        self.n_class = n_class
        self.roi_size = roi_size
        self.spatial_scale = spatial_scale
        self.roi = RoIPooling2D(self.roi_size, self.roi_size,
                                self.spatial_scale)
Esempio n. 17
0
    def __init__(self, n_class, roi_size, spatial_scale, classifier):
        # n_class includes the background
        super(VGG16RoIHead, self).__init__()

        self.classifier = classifier
        """
        Sequential(
          (0): Linear(in_features=25088, out_features=4096, bias=True)
          (1): ReLU(inplace)
          (2): Linear(in_features=4096, out_features=4096, bias=True)
          (3): ReLU(inplace)
        )
        """
        self.cls_loc = nn.Linear(4096, n_class * 4)
        self.score = nn.Linear(4096, n_class)

        normal_init(self.cls_loc, 0, 0.001)
        normal_init(self.score, 0, 0.01)

        self.n_class = n_class  # 21 for voc
        self.roi_size = roi_size  # 7
        self.spatial_scale = spatial_scale  # 1/16
        self.roi = RoIPooling2D(self.roi_size, self.roi_size,
                                self.spatial_scale)
    def __init__(self,
                 extractor,
                 rpn,
                 head,
                 loc_normalize_mean=(0., 0., 0., 0.),
                 loc_normalize_std=(0.1, 0.1, 0.2, 0.2)):
        super(FasterRCNN_GAN, self).__init__()
        self.extractor = extractor
        self.rpn = rpn
        self.head = head

        # mean and std
        self.loc_normalize_mean = loc_normalize_mean
        self.loc_normalize_std = loc_normalize_std
        self.use_preset('evaluate')

        self.roi_size = 7
        self.spatial_scale = 16

        # # residual branch
        #
        self.residual_conv1_3 = torch.nn.Conv2d(64,
                                                128,
                                                3,
                                                stride=1,
                                                padding=1,
                                                bias=False)
        self.residual_conv1_1 = torch.nn.Conv2d(128,
                                                128,
                                                1,
                                                stride=1,
                                                padding=0,
                                                bias=False)
        self.residual_pool1 = torch.nn.MaxPool2d(kernel_size=2,
                                                 stride=2,
                                                 padding=0,
                                                 dilation=1,
                                                 ceil_mode=False)

        weight_init(self.residual_conv1_3)
        weight_init(self.residual_conv1_1)
        weight_init(self.residual_pool1)

        self.residual_conv2_3 = torch.nn.Conv2d(128,
                                                256,
                                                3,
                                                stride=1,
                                                padding=1,
                                                bias=False)
        self.residual_conv2_1 = torch.nn.Conv2d(256,
                                                256,
                                                1,
                                                stride=1,
                                                padding=0,
                                                bias=False)
        self.residual_pool2 = torch.nn.MaxPool2d(kernel_size=2,
                                                 stride=2,
                                                 padding=0,
                                                 dilation=1,
                                                 ceil_mode=False)

        weight_init(self.residual_conv2_3)
        weight_init(self.residual_conv2_1)
        weight_init(self.residual_pool2)

        self.residual_conv3_3 = torch.nn.Conv2d(256,
                                                256,
                                                3,
                                                stride=1,
                                                padding=1,
                                                bias=False)
        self.residual_conv3_1 = torch.nn.Conv2d(256,
                                                256,
                                                1,
                                                stride=1,
                                                padding=0,
                                                bias=False)
        self.residual_pool3 = torch.nn.MaxPool2d(kernel_size=2,
                                                 stride=2,
                                                 padding=0,
                                                 dilation=1,
                                                 ceil_mode=False)

        weight_init(self.residual_conv3_3)
        weight_init(self.residual_conv3_1)
        weight_init(self.residual_pool3)

        self.residual_conv4_3 = torch.nn.Conv2d(256,
                                                512,
                                                3,
                                                stride=1,
                                                padding=1,
                                                bias=False)
        self.residual_conv4_1 = torch.nn.Conv2d(512,
                                                512,
                                                1,
                                                stride=1,
                                                padding=0,
                                                bias=False)
        self.residual_pool4 = torch.nn.MaxPool2d(kernel_size=2,
                                                 stride=2,
                                                 padding=0,
                                                 dilation=1,
                                                 ceil_mode=False)

        weight_init(self.residual_conv4_3)
        weight_init(self.residual_conv4_1)
        weight_init(self.residual_pool4)

        self.residual_roi = RoIPooling2D(self.roi_size, self.roi_size,
                                         self.spatial_scale)

        self.block1_conv1 = torch.nn.Conv2d(512, 512, 3, 1, 1, bias=False)
        weight_init(self.block1_conv1)
        self.block1_b1 = torch.nn.BatchNorm2d(512)
        weight_init(self.block1_b1)
        self.block1_relu1 = torch.nn.ReLU(inplace=True)
        self.block1_conv2 = torch.nn.Conv2d(512, 512, 3, 1, 1, bias=False)
        weight_init(self.block1_conv2)
        self.block1_b2 = torch.nn.BatchNorm2d(512)
        weight_init(self.block1_b2)
Esempio n. 19
0
def get_combined_feature(feature,
                         bboxes,
                         use_spatial_feature=False,
                         roi_size=7,
                         spatial_scale=1 / 16,
                         flip=True):
    """
    :param feature: feature has passed extractor, shape[1,512,37,50]
    :param bboxes: shape[N, 4], N is num of object
    :return:
        combined_features: list of all combined feature(as same order as gt relation), is a list contains list of
        features in following type: [obj_a, obj_b, rel] or [obj_a, obj_b, rel, spatial_feature]
        rel_flip: list of rel that has been flipped
    """
    roi_pooling = RoIPooling2D(roi_size, roi_size,
                               spatial_scale)  # spatial scale is not important
    num_of_bbox = bboxes.shape[0]
    #set_trace()
    ##bbox_scaling into fature scale
    ##TODO
    scale_x_imgtofeature, scale_y_imgtofeature = float(
        feature.size(3) / 1000), float(feature.size(2) / 600)
    bboxes_f = np.zeros(
        bboxes.shape, dtype=int
    )  ##bboxes_f is bboxes after resized to scale of feture map(37,50)
    for (bbox, bbox_f) in zip(bboxes, bboxes_f):
        bbox_f[0] = int(bbox[0] * scale_y_imgtofeature)
        bbox_f[2] = int(bbox[2] * scale_y_imgtofeature)
        bbox_f[1] = int(bbox[1] * scale_x_imgtofeature)
        bbox_f[3] = int(bbox[3] * scale_x_imgtofeature)
        assert bbox_f[0] in range(feature.size(2)+1) and bbox_f[2] in range(feature.size(2)+1) \
               and bbox_f[1] in range(feature.size(3)+1) and bbox_f[3] in range(feature.size(3)+1), "bbox:{0}  {1}".format(bbox, feature.shape)

    ##start forward
    rel_flip = []
    combined_features = []
    for i_obj_a in range(num_of_bbox):
        for i_obj_b in range(i_obj_a + 1, num_of_bbox):
            bbox_f_a = bboxes_f[i_obj_a]  # in (ymin, xmin, ymax, xmax)
            bbox_f_b = bboxes_f[i_obj_b]
            rel_flip.append(False)
            if rd.random() > 0.5 and flip:  # randomly swap obj1 and obj2
                bbox_f_a, bbox_f_b = bbox_f_b, bbox_f_a
                rel_flip[-1] = True
            ##get union cordinate
            union_cord = np.zeros(4)
            union_cord[0] = min(bbox_f_a[0], bbox_f_b[0])
            union_cord[1] = min(bbox_f_a[1], bbox_f_b[1])
            union_cord[2] = max(bbox_f_a[2], bbox_f_b[2])
            union_cord[3] = max(bbox_f_a[3], bbox_f_b[3])

            ##spatial feature
            if use_spatial_feature:
                ##dual spatial mask
                dual_channel_feature = torch.zeros(2, 37, 50)  # need modified
                for ii, bbox in enumerate([bbox_f_a, bbox_f_b]):  # obj_a_first
                    dual_channel_feature[ii, bbox[0]:bbox[2],
                                         bbox[1]:bbox[3]] = 1
            ##combine features
            rois = np.stack([bbox_f_a, bbox_f_b, union_cord], axis=0)
            roi_indices = np.zeros(3)
            roi_indices = at.to_tensor(roi_indices).float()
            rois = at.to_tensor(rois).float()
            indices_and_rois = torch.cat([roi_indices[:, None], rois], dim=1)
            xy_indices_and_rois = indices_and_rois[:, [0, 2, 1, 4, 3]]
            indices_and_rois = xy_indices_and_rois.contiguous()
            combined_feature_arr = roi_pooling(
                feature, indices_and_rois)  # in obj_a, obj_b, union oreder
            combined_feature = []
            for idx in range(combined_feature_arr.size(0)):
                combined_feature.append(
                    torch.unsqueeze(combined_feature_arr[idx],
                                    0))  #increase 1 dim
                assert combined_feature[-1].shape == (1, 512, 7, 7)
            combined_features.append(combined_feature)
    return combined_features, rel_flip