def __init__(self, classes, class_agnostic): super(_HierRCNN, self).__init__() with h5py.File('data/pretrained_model/label_vec_vrd.h5', 'r') as f: self.label_vecs = np.array(f['label_vec']) self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop() self.order_embedding = nn.Sequential( nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, cfg.HIER.EMBEDDING_LENGTH)) self.order_score = _OrderSimilarity(cfg.HIER.ORDER_DISTANCE_NORM)
def __init__(self, classes, class_agnostic, compact_mode=False): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # self.focalloss_handle = FocalLossV4(num_class=21, alpha=0.25, gamma=2.0, balance_index=2) # define Large Separable Convolution Layer self.rpn = RPN(in_channels=245, f_channels=256) self.sam = SAM(256, 245) # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.rpn_time = None self.pre_roi_time = None self.roi_pooling_time = None self.subnet_time = None self.psroiAlign = PSROIAlignhandle(1. / cfg.FEAT_STRIDE, 7, 2, 5) self.psroiPool = PSROIPoolhandle(7, 7, 1. / cfg.FEAT_STRIDE, 7, 5)
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE# = 7*2 =14 ''' # Size of the pooled region after RoI pooling __C.POOLING_SIZE = 7 roi pooling 之后得到的特征的尺寸 CROP_RESIZE_WITH_MAX_POOL = True ''' self.RCNN_roi_crop = _RoICrop()
def __init__(self, num_classes, num_predicates, class_agnostic): super(_fasterRCNN, self).__init__() self.n_classes = num_classes self.class_agnostic = class_agnostic self.n_preds = num_predicates # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) self.Proposal_rela_layer = _Proposal_rela_layer(self.n_classes) self.Extract_rela_featrue = _Extract_rela_feature(self.n_classes, 4096, 512) self.linear_u_rela = FC(4096, 512) self.linear_v_rela = FC(1536, 512) self.linear_s_rela = FC(512, 512) self.linear_l_rela = FC(1536, 512) self.linear_f_rela = FC(1536, 100) self.linear_d_rela = nn.Linear(100, 1) self.linear_r_rela = FC(101, 512) # rela scores predict self.rela_cls_pred = nn.Linear(512, self.n_preds)
def __init__(self, classes, class_agnostic): super(RFCN_head, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self.box_num_classes = 1 if class_agnostic else self.n_classes # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_psroi_pool_cls = PSRoIPool(cfg.POOLING_SIZE, cfg.POOLING_SIZE, spatial_scale=1 / 16.0, group_size=cfg.POOLING_SIZE, output_dim=self.n_classes) self.RCNN_psroi_pool_loc = PSRoIPool(cfg.POOLING_SIZE, cfg.POOLING_SIZE, spatial_scale=1 / 16.0, group_size=cfg.POOLING_SIZE, output_dim=self.box_num_classes * 4) self.pooling = nn.AvgPool2d(kernel_size=cfg.POOLING_SIZE, stride=cfg.POOLING_SIZE) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 #define rpn self.RCNN_rpn = _RPN(self.dout_base_model) # self.dout_base_model = 512 self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
def __init__(self, dim): """ Args: dim: depth of input feature map, e.g., 512 """ super(RPN, self).__init__() self.rpn = _RPN(dim) self.anchor_ratios = cfg.ANCHOR_RATIOS self.anchor_scales = [8, 16, 32, 64, 128] self.feat_strides = [4, 8, 16, 32, 32] self.RPN_anchor_targets = [_AnchorTargetLayer(feat_stride=self.feat_strides[idx], scales=scale, ratios=self.anchor_ratios) for idx, scale in enumerate(self.anchor_scales)] self.RPN_proposals = [_ProposalLayer(feat_stride=self.feat_strides[idx], scales=scale, ratios=self.anchor_ratios) for idx, scale in enumerate(self.anchor_scales)]
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) # self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0, 0)
def __init__(self): super(_TDCNN, self).__init__() # self.classes = classes self.n_classes = cfg.NUM_CLASSES # 21 # loss self.RCNN_loss_cls = 0 # 多分类损失 self.RCNN_loss_twin = 0 # 窗口回归损失 # define rpn # self.dout_base_model=512 # (1,2000,3),(1,20,96,1,1),(1,20,96,1,1), (1), (1), (256), (1) =0 self.RCNN_rpn = _RPN(self.dout_base_model) # return rois, rpn_cls_prob, rpn_twin_pred, self.rpn_loss_cls, self.rpn_loss_twin, self.rpn_label, self.rpn_loss_mask self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_temporal_pool = _RoITemporalPooling(cfg.POOLING_LENGTH, cfg.POOLING_HEIGHT, cfg.POOLING_WIDTH, cfg.DEDUP_TWINS) if cfg.USE_ATTENTION: # 不走这条路 self.RCNN_attention = NONLocalBlock3D(self.dout_base_model, inter_channels=self.dout_base_model)
def __init__(self, classes, class_agnostic, loss_type, pathway): super(_fasterRCNN, self).__init__() self.classes = classes self.pathway = pathway self.n_classes = classes self.class_agnostic = class_agnostic self.loss_type = loss_type # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 #define fuse layer if pathway == "two_pathway": self.fuselayer = Fuse_twopath(self.dout_base_model) # define rpn self.RCNN_rpn = _RPN(self.dout_base_model, self.n_classes) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0)
from lib.model.rpn.rpn import _RPN import torch import numpy as np if __name__ == "__main__": base_feat = torch.randn((1, 512, 16, 16)) im_info = torch.from_numpy(np.array([[1000, 1000, 3]], dtype=np.float32)) gt_boxes = torch.randn((1, 3, 5)) num_boxes = torch.Tensor([[1, 1, 1]]) rpn = _RPN(512) rois, rpn_loss_cls, rpn_loss_box = rpn(base_feat, im_info, gt_boxes, num_boxes)