def __init__(self, n_class, roi_size, spatial_scale, M, classifier): # n_class includes the background super(VGG16RoIHead, self).__init__() self.n_class = n_class self.roi_size = roi_size self.spatial_scale = spatial_scale self.M = M # branch_1 self.roi_1 = RoIPooling2D( self.roi_size, self.roi_size, self.spatial_scale) # roi shape of (N, C, outh, outw) self.classifier = classifier self.score = nn.Linear(4096, n_class) # branch_2 self.roi_2 = RoIPooling2D( self.roi_size * 2, self.roi_size * 2, self.spatial_scale) # roi shape of (N, C, outh*2, outw*2) self.conv_21 = nn.Conv2d(512, 512, (3, 3), padding=1) self.conv_22 = nn.Conv2d(512, 512, (3, 3), padding=1) # output shape (1, 512, 14, 14) self.max_x = nn.MaxPool2d((14, 1)) # output shape (1, 512, 1, 14) self.max_y = nn.MaxPool2d((1, 14)) # output shape (1, 512, 14, 1) self.fc_x = nn.Linear(7168, M) self.fc_y = nn.Linear(7168, M) normal_init(self.score, 0, 0.01) normal_init(self.conv_21, 0, 0.01) normal_init(self.conv_22, 0, 0.01) normal_init(self.fc_x, 0, 0.01) normal_init(self.fc_y, 0, 0.01)
def __init__(self, n_class, roi_size, spatial_scale, classifier): # n_class包含了背景在内 super(VGG16RoIHead, self).__init__() self.n_class = n_class self.roi_size = roi_size self.spatial_scale = spatial_scale self.roi = RoIPooling2D(roi_size, roi_size, self.spatial_scale) self.classifier = classifier self.cls_loc = nn.Linear(4096, n_class * 4) self.score = nn.Linear(4096, n_class) normal_init(self.cls_loc, 0, 0.001) normal_init(self.score, 0, 0.01)
def __init__(self, n_class, roi_size, spatial_scale, classifier): super(VGG16RoIHead, self).__init__() self.classifier = classifier self.cls_loc = nn.Linear(4096, n_class * 4) # class-wise localization self.score = nn.Linear(4096, n_class) normal_init(self.cls_loc, 0, 0.001) normal_init(self.score, 0, 0.01) self.roi_size = roi_size self.spatial_scale = spatial_scale self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale)
def __init__(self,n_class,roi_size,spatial_scale,classifier): super(VGG16RoIHead,self).__init__() self.classifier = classifier #TODO:为什么没有把背景的类别去掉 self.cls_loc = nn.Linear(4096, n_class*4) self.score = nn.Linear(4096,n_class) normal_init(self.cls_loc,0,0.001) normal_init(self.score,0, 0.01) self.n_class = n_class self.roi_size = roi_size self.spatial_scale = spatial_scale #构建了一个roi生成网络 self.roi = RoIPooling2D(self.n_class, self.roi_size, self.spatial_scale)
def __init__(self, n_class, roi_size, spatial_scale, classifier): # n_class includes the background super(Head, self).__init__() self.classifier = classifier self.cls_loc = nn.Linear(4096, n_class * 4) self.score = nn.Linear(4096, n_class) normalizer(self.cls_loc, 0, 0.001) normalizer(self.score, 0, 0.01) self.n_class = n_class self.roi_size = roi_size self.spatial_scale = spatial_scale self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale)
def __init__(self, n_class, roi_size, spatial_scale, classifier): # n_class includes the background super(VGG16RoIHead, self).__init__() self.classifier = classifier # 非输出全连接层的最后一层 self.cls_loc = nn.Linear(4096, n_class * 4) # 输出每个roi的所有类的偏移量(神经网络层) self.score = nn.Linear(4096, n_class) # 输出每个roi的各个类的得分(神经网络层) normal_init(self.cls_loc, 0, 0.001) # 对两个输出的神经网络层进行权重初始化 normal_init(self.score, 0, 0.01) self.n_class = n_class self.roi_size = roi_size # roi在pooling后的尺寸 self.spatial_scale = spatial_scale self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale)
def __init__(self, n_class, roi_size, spatial_scale, classifier): # n_class includes the background super(VGG16RoIHead, self).__init__() self.classifier = classifier # vgg16最后两个全连接层 self.cls_loc = nn.Linear(4096, n_class * 4) self.score = nn.Linear(4096, n_class) normal_init(self.cls_loc, 0, 0.001) normal_init(self.score, 0, 0.01) # 全连接层权重初始化 self.n_class = n_class # 21 self.roi_size = roi_size # 7 self.spatial_scale = spatial_scale # 1/16 self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale)
def __init__(self, n_class, roi_size, spatial_scale, classifier): # n_class includes the background super(VGG16RoIHead, self).__init__() self.classifier = classifier #vgg16的全连接层 self.cls_loc = nn.Linear(4096, n_class * 4) #最后一层的回归层 self.score = nn.Linear(4096, n_class) #最后一层的分类层 normal_init(self.cls_loc, 0, 0.001) #初始化回归层参数 normal_init(self.score, 0, 0.01) #初始化分类层参数 self.n_class = n_class #类别数量 self.roi_size = roi_size #roi pooling切分格子 self.spatial_scale = spatial_scale self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale) #roi pooling网络
def forward(self, features_maps, rois, roi_indices): roi_indices = array_tool.totensor(roi_indices).float() rois = array_tool.totensor(rois).float() roi_level = self._PyramidRoI_Feat(rois) indices_and_rois = torch.cat([roi_indices[:, None], rois], dim=1) xy_indices_and_rois = indices_and_rois[:, [0, 2, 1, 4, 3]] # yx->xy indices_and_rois = xy_indices_and_rois.contiguous( ) # 把tensor变成在内存中连续分布的形式 roi_pool_feats = [] roi_to_levels = [] for i, l in enumerate(range(2, 6)): if (roi_level == l).sum() == 0: continue idx_l = (roi_level == l).nonzero() roi_to_levels.append(idx_l) #if idx_l.shape[0] == 0: # keep_indices_and_rois = indices_and_rois[idx_l.data] #else: keep_indices_and_rois = indices_and_rois[idx_l] keep_indices_and_rois = keep_indices_and_rois.view(-1, 5) roi_pooling = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale[i]) pool = roi_pooling(features_maps[i], keep_indices_and_rois) #通过roi_pooling roi_pool_feats.append(pool) roi_pool_feats = torch.cat(roi_pool_feats, 0) roi_to_levels = torch.cat(roi_to_levels, 0) roi_to_levels = roi_to_levels.squeeze() idx_sorted, order = torch.sort(roi_to_levels) roi_pool_feats = roi_pool_feats[order] pool = roi_pool_feats.view(roi_pool_feats.size(0), -1) # batch_size, CHW拉直 fc6_out = functional.relu(self.fc6(pool)) fc7_out = functional.relu(self.fc7(fc6_out)) roi_cls_locs = self.cls_loc(fc7_out) # (1000->84)每一类坐标回归 roi_scores = self.score(fc7_out) # (1000->21) 每一类类别预测 #all_roi_cls_locs.append(roi_cls_locs) #all_roi_scores.append(roi_scores) #all_roi_cls_locs = torch.cat(all_roi_cls_locs, 0) #all_roi_scores = torch.cat(all_roi_scores, 0) return roi_cls_locs, roi_scores
def __init__(self, n_class, roi_size, spatial_scale, classifier): # n_class includes the background super(RESNET101RoIHead, self).__init__() self.classifier = classifier self.cls_loc = nn.Linear(2048, n_class * 4) self.score = nn.Linear(2048, n_class) normal_init(self.cls_loc, 0, 0.001) normal_init(self.score, 0, 0.01) self.n_class = n_class self.roi_size = roi_size self.spatial_scale = spatial_scale self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale) self.avgpool = nn.AvgPool2d(7, stride=1)
def __init__(self, n_class, roi_size, spatial_scale, classifier): # n_class includes the background super(fixed_VGG16RoIHead, self).__init__() self.classifier = classifier self.cls_loc = nn.Linear(4096, n_class * 4) self.score = nn.Linear(4096, n_class) self.act_quant = qt.activation_quantization(8, qt.Quant.linear) normal_init(self.cls_loc, 0, 0.001) normal_init(self.score, 0, 0.01) self.n_class = n_class self.roi_size = roi_size self.spatial_scale = spatial_scale self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale)
def __init__(self, n_class, roi_size, spatial_scale, classifier): # n_class includes the background super(VGG16RoIHead, self).__init__() self.classifier = classifier # 这里为什么要对每个类都进行loc的回归? self.cls_loc = nn.Linear(4096, n_class * 4) self.score = nn.Linear(4096, n_class) normal_init(self.cls_loc, 0, 0.001) normal_init(self.score, 0, 0.01) self.n_class = n_class self.roi_size = roi_size self.spatial_scale = spatial_scale # 这里比较特殊的就是这个ROIPooling层,原理和SSPNet类似,可以把不同大小的图片pooling成大小相同的矩阵 self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale)
def __init__(self,head,hidden_size,n_class): super(VGG16RoIExtractorHead, self).__init__() classifier = list(head.classifier) extractor = nn.Linear(4096, hidden_size) normal_init(extractor, 0, 0.01) classifier.append(extractor) classifier.append(nn.ReLU(inplace=True)) self.classifier = nn.Sequential(*classifier) self.cls_loc = nn.Linear(hidden_size, n_class * 4) self.score = nn.Linear(hidden_size, n_class) normal_init(self.cls_loc, 0, 0.001) normal_init(self.score, 0, 0.01) self.n_class = n_class self.roi_size = head.roi_size self.spatial_scale = head.spatial_scale self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale)
def __init__(self, n_class, roi_size, spatial_scale, classifier): # n_class includes the background super(VGG16RoIHead, self).__init__() # vgg16中的最后两个全连接层 self.classifier = classifier self.cls_loc = nn.Linear(4096, n_class * 4) self.score = nn.Linear(4096, n_class) # 全连接层权重初始化 normal_init(self.cls_loc, 0, 0.001) normal_init(self.score, 0, 0.01) # 加上背景21类 self.n_class = n_class # 7x7 self.roi_size = roi_size # 1/16 self.spatial_scale = spatial_scale # 将大小不同的roi变成大小一致,得到pooling后的特征, # 大小为[300, 512, 7, 7]。利用Cupy实现在线编译的 self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale)
def __init__(self, n_class, roi_size, spatial_scale, classifier,hidden_size): # n_class includes the background super(VGG16RoIHead, self).__init__() classifier = list(classifier) extractor = nn.Linear(4096, hidden_size) normal_init(extractor, 0, 0.01) classifier.append(extractor) classifier.append(nn.ReLU(inplace=True)) self.classifier = nn.Sequential(*classifier) self.cls_loc = nn.Linear(hidden_size, n_class * 4) self.score = nn.Linear(hidden_size, n_class) normal_init(self.cls_loc, 0, 0.001) normal_init(self.score, 0, 0.01) self.n_class = n_class self.roi_size = roi_size self.spatial_scale = spatial_scale self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale)
def __init__(self, n_class, roi_size, spatial_scale, classifier): # n_class includes the background super(VGG16RoIHead, self).__init__() self.classifier = classifier.cuda() # self.cls_loc = nn.Linear(4096, n_class * 4).to(t.device("cuda:0")) # self.score = nn.Linear(4096, n_class).to(t.device("cuda:0")) self.cls_loc = nn.Linear(4096, n_class * 4).cuda() self.score = nn.Linear(4096, n_class).cuda() # TODO: # Modified the depth estimation subnet into a more suitable structure self.depth = nn.Linear(4096, 1).cuda() self.y_rot = nn.Linear(4096, 1).cuda() normal_init(self.cls_loc, 0, 0.001) normal_init(self.score, 0, 0.01) self.n_class = n_class self.roi_size = roi_size self.spatial_scale = spatial_scale self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale)
def __init__(self, n_class, roi_size, spatial_scale, classifier): # n_class includes the background super(VGG16RoIHead, self).__init__() self.classifier = classifier """ Sequential( (0): Linear(in_features=25088, out_features=4096, bias=True) (1): ReLU(inplace) (2): Linear(in_features=4096, out_features=4096, bias=True) (3): ReLU(inplace) ) """ self.cls_loc = nn.Linear(4096, n_class * 4) self.score = nn.Linear(4096, n_class) normal_init(self.cls_loc, 0, 0.001) normal_init(self.score, 0, 0.01) self.n_class = n_class # 21 for voc self.roi_size = roi_size # 7 self.spatial_scale = spatial_scale # 1/16 self.roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale)
def __init__(self, extractor, rpn, head, loc_normalize_mean=(0., 0., 0., 0.), loc_normalize_std=(0.1, 0.1, 0.2, 0.2)): super(FasterRCNN_GAN, self).__init__() self.extractor = extractor self.rpn = rpn self.head = head # mean and std self.loc_normalize_mean = loc_normalize_mean self.loc_normalize_std = loc_normalize_std self.use_preset('evaluate') self.roi_size = 7 self.spatial_scale = 16 # # residual branch # self.residual_conv1_3 = torch.nn.Conv2d(64, 128, 3, stride=1, padding=1, bias=False) self.residual_conv1_1 = torch.nn.Conv2d(128, 128, 1, stride=1, padding=0, bias=False) self.residual_pool1 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) weight_init(self.residual_conv1_3) weight_init(self.residual_conv1_1) weight_init(self.residual_pool1) self.residual_conv2_3 = torch.nn.Conv2d(128, 256, 3, stride=1, padding=1, bias=False) self.residual_conv2_1 = torch.nn.Conv2d(256, 256, 1, stride=1, padding=0, bias=False) self.residual_pool2 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) weight_init(self.residual_conv2_3) weight_init(self.residual_conv2_1) weight_init(self.residual_pool2) self.residual_conv3_3 = torch.nn.Conv2d(256, 256, 3, stride=1, padding=1, bias=False) self.residual_conv3_1 = torch.nn.Conv2d(256, 256, 1, stride=1, padding=0, bias=False) self.residual_pool3 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) weight_init(self.residual_conv3_3) weight_init(self.residual_conv3_1) weight_init(self.residual_pool3) self.residual_conv4_3 = torch.nn.Conv2d(256, 512, 3, stride=1, padding=1, bias=False) self.residual_conv4_1 = torch.nn.Conv2d(512, 512, 1, stride=1, padding=0, bias=False) self.residual_pool4 = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) weight_init(self.residual_conv4_3) weight_init(self.residual_conv4_1) weight_init(self.residual_pool4) self.residual_roi = RoIPooling2D(self.roi_size, self.roi_size, self.spatial_scale) self.block1_conv1 = torch.nn.Conv2d(512, 512, 3, 1, 1, bias=False) weight_init(self.block1_conv1) self.block1_b1 = torch.nn.BatchNorm2d(512) weight_init(self.block1_b1) self.block1_relu1 = torch.nn.ReLU(inplace=True) self.block1_conv2 = torch.nn.Conv2d(512, 512, 3, 1, 1, bias=False) weight_init(self.block1_conv2) self.block1_b2 = torch.nn.BatchNorm2d(512) weight_init(self.block1_b2)
def get_combined_feature(feature, bboxes, use_spatial_feature=False, roi_size=7, spatial_scale=1 / 16, flip=True): """ :param feature: feature has passed extractor, shape[1,512,37,50] :param bboxes: shape[N, 4], N is num of object :return: combined_features: list of all combined feature(as same order as gt relation), is a list contains list of features in following type: [obj_a, obj_b, rel] or [obj_a, obj_b, rel, spatial_feature] rel_flip: list of rel that has been flipped """ roi_pooling = RoIPooling2D(roi_size, roi_size, spatial_scale) # spatial scale is not important num_of_bbox = bboxes.shape[0] #set_trace() ##bbox_scaling into fature scale ##TODO scale_x_imgtofeature, scale_y_imgtofeature = float( feature.size(3) / 1000), float(feature.size(2) / 600) bboxes_f = np.zeros( bboxes.shape, dtype=int ) ##bboxes_f is bboxes after resized to scale of feture map(37,50) for (bbox, bbox_f) in zip(bboxes, bboxes_f): bbox_f[0] = int(bbox[0] * scale_y_imgtofeature) bbox_f[2] = int(bbox[2] * scale_y_imgtofeature) bbox_f[1] = int(bbox[1] * scale_x_imgtofeature) bbox_f[3] = int(bbox[3] * scale_x_imgtofeature) assert bbox_f[0] in range(feature.size(2)+1) and bbox_f[2] in range(feature.size(2)+1) \ and bbox_f[1] in range(feature.size(3)+1) and bbox_f[3] in range(feature.size(3)+1), "bbox:{0} {1}".format(bbox, feature.shape) ##start forward rel_flip = [] combined_features = [] for i_obj_a in range(num_of_bbox): for i_obj_b in range(i_obj_a + 1, num_of_bbox): bbox_f_a = bboxes_f[i_obj_a] # in (ymin, xmin, ymax, xmax) bbox_f_b = bboxes_f[i_obj_b] rel_flip.append(False) if rd.random() > 0.5 and flip: # randomly swap obj1 and obj2 bbox_f_a, bbox_f_b = bbox_f_b, bbox_f_a rel_flip[-1] = True ##get union cordinate union_cord = np.zeros(4) union_cord[0] = min(bbox_f_a[0], bbox_f_b[0]) union_cord[1] = min(bbox_f_a[1], bbox_f_b[1]) union_cord[2] = max(bbox_f_a[2], bbox_f_b[2]) union_cord[3] = max(bbox_f_a[3], bbox_f_b[3]) ##spatial feature if use_spatial_feature: ##dual spatial mask dual_channel_feature = torch.zeros(2, 37, 50) # need modified for ii, bbox in enumerate([bbox_f_a, bbox_f_b]): # obj_a_first dual_channel_feature[ii, bbox[0]:bbox[2], bbox[1]:bbox[3]] = 1 ##combine features rois = np.stack([bbox_f_a, bbox_f_b, union_cord], axis=0) roi_indices = np.zeros(3) roi_indices = at.to_tensor(roi_indices).float() rois = at.to_tensor(rois).float() indices_and_rois = torch.cat([roi_indices[:, None], rois], dim=1) xy_indices_and_rois = indices_and_rois[:, [0, 2, 1, 4, 3]] indices_and_rois = xy_indices_and_rois.contiguous() combined_feature_arr = roi_pooling( feature, indices_and_rois) # in obj_a, obj_b, union oreder combined_feature = [] for idx in range(combined_feature_arr.size(0)): combined_feature.append( torch.unsqueeze(combined_feature_arr[idx], 0)) #increase 1 dim assert combined_feature[-1].shape == (1, 512, 7, 7) combined_features.append(combined_feature) return combined_features, rel_flip