def __init__(self, classes, class_agnostic): super(_HierRCNN, self).__init__() with h5py.File('data/pretrained_model/label_vec_vrd.h5', 'r') as f: self.label_vecs = np.array(f['label_vec']) self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop() self.order_embedding = nn.Sequential( nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, cfg.HIER.EMBEDDING_LENGTH)) self.order_score = _OrderSimilarity(cfg.HIER.ORDER_DISTANCE_NORM)
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE# = 7*2 =14 ''' # Size of the pooled region after RoI pooling __C.POOLING_SIZE = 7 roi pooling 之后得到的特征的尺寸 CROP_RESIZE_WITH_MAX_POOL = True ''' self.RCNN_roi_crop = _RoICrop()
def __init__(self, phase, cfg, size, base, extras, head, num_classes): super(test_association, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = vid self.priorbox = PriorBox(self.cfg) self.priors = Variable(self.priorbox.forward(), volatile=True) self.size = size # SSD network self.vgg = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) self.roi_pool = _RoIPooling(self.cfg['POOLING_SIZE'], self.cfg['POOLING_SIZE'], 1.0 / 16.0) self.roi_align = RoIAlignAvg(self.cfg['POOLING_SIZE'], self.cfg['POOLING_SIZE'], 1.0 / 16.0) self.grid_size = self.cfg['POOLING_SIZE'] * 2 if self.cfg['CROP_RESIZE_WITH_MAX_POOL'] else self.cfg['POOLING_SIZE'] self.roi_crop = _RoICrop() self.img_shape = (self.cfg['min_dim'],self.cfg['min_dim']) if phase == 'vid_test': self.softmax = nn.Softmax(dim=-1) self.detect = test_target(num_classes, 200, 0.5, 0.01, 0.45)
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 #define rpn self.RCNN_rpn = _RPN(self.dout_base_model) # self.dout_base_model = 512 self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
def __init__(self, phase, cfg, size, base, extras, head, num_classes): super(association_lstm, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = vid self.priorbox = PriorBox(self.cfg) self.priors = Variable(self.priorbox.forward(), volatile=True) self.size = size # SSD network self.vgg = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) self.roi_pool = _RoIPooling(self.cfg['POOLING_SIZE'], self.cfg['POOLING_SIZE'], 1.0 / 16.0) self.roi_align = RoIAlignAvg(self.cfg['POOLING_SIZE'], self.cfg['POOLING_SIZE'], 1.0 / 16.0) self.grid_size = self.cfg['POOLING_SIZE'] * 2 if self.cfg[ 'CROP_RESIZE_WITH_MAX_POOL'] else self.cfg['POOLING_SIZE'] self.roi_crop = _RoICrop() self.img_shape = (self.cfg['min_dim'], self.cfg['min_dim']) self.tensor_len = 4 + self.num_classes + 49 self.bnlstm1 = BNLSTM(input_size=84, hidden_size=150, batch_first=False, bidirectional=False) self.bnlstm2 = BNLSTM(input_size=150, hidden_size=300, batch_first=False, bidirectional=False) self.cls_pred = nn.Linear(300, self.num_classes) self.bbox_pred = nn.Linear(300, 4) self.association_pred = nn.Linear(300, 49) self.MultiProjectLoss = MultiProjectLoss(self.num_classes, 0, True, 3, 0.5) if phase == 'vid_train': self.softmax = nn.Softmax(dim=-1) #self.detect = Trnsform_target(num_classes, 200, 0.5, 0.01, 0.45) self.detect = train_target(num_classes, 200, 0.5, 0.01, 0.45)
def __init__(self, classes, class_agnostic): super(_FPN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN_FPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # NOTE: the original paper used pool_size = 7 for cls branch, and 14 for mask branch, to save the # computation time, we first use 14 as the pool_size, and then do stride=2 pooling for cls branch. self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()