def _make_linear_layers(self, num_cls, roipool=5, fc=512, emb=1024, norm=True): lin_in = roipool * roipool self.roipool1 = _RoIPooling(roipool, roipool, 1) self.fc_pool1 = nn.Linear(lin_in * 64, fc) self.roipool2 = _RoIPooling(roipool, roipool, .5) self.fc_pool2 = nn.Linear(lin_in * 128, fc) self.roipool3 = _RoIPooling(roipool, roipool, .25) self.fc_pool3 = nn.Linear(lin_in * 256, fc) self.roipool4 = _RoIPooling(roipool, roipool, .125) self.fc_pool4 = nn.Linear(lin_in * 512, fc) self.roipool5 = _RoIPooling(roipool, roipool, .0625) self.fc_pool5 = nn.Linear(lin_in * 512, fc) self.fc_emb = nn.Linear(fc * 5, emb) self.class_scores1 = nn.Linear(fc * 5, num_cls) self.class_scores2 = nn.Linear(num_cls, num_cls)
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model, anchor_scales=cfg.RCNN_COMMON.ANCHOR_SCALES, anchor_ratios=cfg.RCNN_COMMON.ANCHOR_RATIOS, feat_stride=cfg.RCNN_COMMON.FEAT_STRIDE[0]) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.RCNN_COMMON.POOLING_SIZE, cfg.RCNN_COMMON.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.RCNN_COMMON.POOLING_SIZE, cfg.RCNN_COMMON.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.RCNN_COMMON.POOLING_SIZE * 2 if cfg.RCNN_COMMON.CROP_RESIZE_WITH_MAX_POOL else cfg.RCNN_COMMON.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
def __init__(self, classes, class_agnostic, feat_name, feat_list=('conv4',), pretrained = True): super(fasterRCNN, self).__init__(classes, class_agnostic, feat_name, feat_list, pretrained) ##### Important to set model to eval mode before evaluation #### self.FeatExt.eval() rand_img = torch.Tensor(1, 3, 224, 224) rand_feat = self.FeatExt(rand_img) self.FeatExt.train() self.dout_base_model = rand_feat.size(1) self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model, anchor_scales=cfg.RCNN_COMMON.ANCHOR_SCALES, anchor_ratios=cfg.RCNN_COMMON.ANCHOR_RATIOS, feat_stride=cfg.RCNN_COMMON.FEAT_STRIDE[0]) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.RCNN_COMMON.POOLING_SIZE, cfg.RCNN_COMMON.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.RCNN_COMMON.POOLING_SIZE, cfg.RCNN_COMMON.POOLING_SIZE, 1.0/16.0) self.grid_size = cfg.RCNN_COMMON.POOLING_SIZE * 2 if cfg.RCNN_COMMON.CROP_RESIZE_WITH_MAX_POOL else cfg.RCNN_COMMON.POOLING_SIZE self.RCNN_roi_crop = _RoICrop() self.iter_counter = 0
def __init__(self, classes, class_agnostic): super(_All_in_One, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic self._fs = cfg.FCGN.FEAT_STRIDE[0] # for resnet if self.dout_base_model is None: if self._fs == 16: self.dout_base_model = 256 * self.expansions elif self._fs == 32: self.dout_base_model = 512 * self.expansions # loss self.VMRN_obj_loss_cls = 0 self.VMRN_obj_loss_bbox = 0 # define rpn self.VMRN_obj_rpn = _RPN(self.dout_base_model, anchor_scales=cfg.RCNN_COMMON.ANCHOR_SCALES, anchor_ratios=cfg.RCNN_COMMON.ANCHOR_RATIOS, feat_stride=self._fs) self.VMRN_obj_proposal_target = _ProposalTargetLayer(self.n_classes) self.VMRN_obj_roi_pool = _RoIPooling(cfg.RCNN_COMMON.POOLING_SIZE, cfg.RCNN_COMMON.POOLING_SIZE, 1.0 / 16.0) self.VMRN_obj_roi_align = RoIAlignAvg(cfg.RCNN_COMMON.POOLING_SIZE, cfg.RCNN_COMMON.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.RCNN_COMMON.POOLING_SIZE * 2 if cfg.RCNN_COMMON.CROP_RESIZE_WITH_MAX_POOL else cfg.RCNN_COMMON.POOLING_SIZE self.VMRN_obj_roi_crop = _RoICrop() self._isex = cfg.TRAIN.VMRN.ISEX self.VMRN_rel_op2l = _OP2L(cfg.VMRN.OP2L_POOLING_SIZE, cfg.VMRN.OP2L_POOLING_SIZE, 1.0 / 16.0, self._isex) self._train_iter_conter = 0 self._MGN_as = cfg.FCGN.ANCHOR_SCALES self._MGN_ar = cfg.FCGN.ANCHOR_RATIOS self._MGN_aa = cfg.FCGN.ANCHOR_ANGLES # grasp detection components self.MGN_classifier = _Classifier(self.dout_base_model, 5, self._MGN_as, self._MGN_ar, self._MGN_aa) self.MGN_proposal_target = _GraspTargetLayer(self._fs, self._MGN_ar, self._MGN_as, self._MGN_aa) self._MGN_anchors = torch.from_numpy(generate_oriented_anchors(base_size=self._fs, scales=np.array(self._MGN_as), ratios=np.array(self._MGN_ar), angles=np.array(self._MGN_aa))).float() self._MGN_num_anchors = self._MGN_anchors.size(0) # [x1, y1, x2, y2] -> [xc, yc, w, h] self._MGN_anchors = torch.cat([ 0 * self._MGN_anchors[:, 0:1], 0 * self._MGN_anchors[:, 1:2], self._MGN_anchors[:, 2:3] - self._MGN_anchors[:, 0:1] + 1, self._MGN_anchors[:, 3:4] - self._MGN_anchors[:, 1:2] + 1, self._MGN_anchors[:, 4:5] ], dim=1) self._MGN_USE_POOLED_FEATS = cfg.MGN.USE_POOLED_FEATS
def __init__(self, classes, class_agnostic): super(_FPN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpns self._share_rpn = cfg.FPN.SHARE_RPN self._share_header = cfg.FPN.SHARE_HEADER self._num_pyramid_layers = len(cfg.RCNN_COMMON.FEAT_STRIDE) if self._share_rpn: self.RCNN_rpn = _RPN(self.dout_base_model, anchor_scales=cfg.RCNN_COMMON.ANCHOR_SCALES, anchor_ratios=cfg.RCNN_COMMON.ANCHOR_RATIOS, feat_stride=cfg.RCNN_COMMON.FEAT_STRIDE) else: self.RCNN_rpns = nn.ModuleList() for i in range(len(cfg.RCNN_COMMON.FEAT_STRIDE)): self.RCNN_rpns.append( _RPN(self.dout_base_model, anchor_scales=cfg.RCNN_COMMON.ANCHOR_SCALES, anchor_ratios=cfg.RCNN_COMMON.ANCHOR_RATIOS, feat_stride=cfg.RCNN_COMMON.FEAT_STRIDE[i]) ) self.RCNN_roi_aligns = nn.ModuleList() self.RCNN_roi_pools = nn.ModuleList() for i in range(len(cfg.RCNN_COMMON.FEAT_STRIDE)): self.RCNN_roi_aligns.append( RoIAlignAvg(cfg.RCNN_COMMON.POOLING_SIZE, cfg.RCNN_COMMON.POOLING_SIZE, 1.0 / float(cfg.RCNN_COMMON.FEAT_STRIDE[i])) ) self.RCNN_roi_pools.append( _RoIPooling(cfg.RCNN_COMMON.POOLING_SIZE, cfg.RCNN_COMMON.POOLING_SIZE, 1.0 / float(cfg.RCNN_COMMON.FEAT_STRIDE[i])) ) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
def __init__(self, classes, class_agnostic): super(_fastRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
def __init__(self, classes, class_agnostic, feat_name, feat_list=('conv2', 'conv3', 'conv4', 'conv5'), pretrained=True): super(FPN, self).__init__(classes, class_agnostic, feat_name, feat_list, pretrained) ##### Important to set model to eval mode before evaluation #### self.FeatExt.eval() rand_img = torch.Tensor(1, 3, 224, 224) rand_feat = self.FeatExt(rand_img) self.FeatExt.train() self.n_channels = [f.size(1) for f in rand_feat] self.dout_base_model = 256 # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self._num_pyramid_layers = len(cfg.RCNN_COMMON.FEAT_STRIDE) self.RCNN_rpn = _RPN(self.dout_base_model, anchor_scales=cfg.RCNN_COMMON.ANCHOR_SCALES, anchor_ratios=cfg.RCNN_COMMON.ANCHOR_RATIOS, feat_stride=cfg.RCNN_COMMON.FEAT_STRIDE) self.RCNN_roi_aligns = nn.ModuleList() self.RCNN_roi_pools = nn.ModuleList() for i in range(len(cfg.RCNN_COMMON.FEAT_STRIDE)): self.RCNN_roi_aligns.append( RoIAlignAvg(cfg.RCNN_COMMON.POOLING_SIZE, cfg.RCNN_COMMON.POOLING_SIZE, 1.0 / float(cfg.RCNN_COMMON.FEAT_STRIDE[i]))) self.RCNN_roi_pools.append( _RoIPooling(cfg.RCNN_COMMON.POOLING_SIZE, cfg.RCNN_COMMON.POOLING_SIZE, 1.0 / float(cfg.RCNN_COMMON.FEAT_STRIDE[i]))) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.iter_counter = 0
def __init__(self, anchors, all_anchors, inds_inside): super(TPN, self).__init__() # init some para self.image_shape = [[240, 320] ] # for one batch, TODO: maybe need to change here self.anchors = anchors # (630, x, y, xw, yw) anchors coordinates self.inds_inside = inds_inside self.all_anchors = all_anchors # get C3D part, use pretrained weight c3d = C3D() c3d.load_state_dict(torch.load(c3d_checkpoint)) self.c3d_part1 = nn.Sequential(*list( c3d.modules())[1:4]) # be careful about these two indices # get conv2 self.c3d_part2 = nn.Sequential(*list(c3d.modules())[4:13]) # self.BN1 = torch.nn.BatchNorm2d(512) # # for RPN self._CPN = CPN(self.anchors, all_anchors, inds_inside) self.n_classes = 22 self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.head_to_tail_ = torch.nn.Sequential( nn.Linear(512 * 7 * 7, 1024), # change from 4096 to 2048, for memory limit nn.ReLU(True), nn.Dropout(), nn.Linear(1024, 4096), # change from 4096 to 2048, for memory limit nn.ReLU(True)) self.RCNN_bbox_pred = torch.nn.Linear(4096, 4 * self.n_classes) self.RCNN_cls_score = torch.nn.Linear(4096, self.n_classes)