def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) for p in self.RCNN_rpn.parameters(): p.requires_grad = False self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) # roi pooling in vgg16 # stride = 16 self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) # roi pooling in Generator Network # stride = 2 self.RCNN_roi_pool_conv1 = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 2.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
def __init__(self, baseModels, obj_classes, att_classes, rel_classes, dout_base_model, pooled_feat_dim): super(_graphRCNN, self).__init__() self.obj_classes = obj_classes self.n_obj_classes = len(obj_classes) self.att_classes = att_classes self.n_att_classes = 0 if att_classes == None else len(att_classes) self.rel_classes = rel_classes self.n_rel_classes = 0 if rel_classes == None else len(rel_classes) # define base model self.RCNN_base_model = baseModels # define rpn self.RCNN_rpn = _RPN(dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer( self.n_obj_classes, self.n_att_classes, self.n_rel_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) if cfg.HAS_RELATIONS: self.RELPN_rpn = _RelPN(pooled_feat_dim) self.RELPN_proposal_target = _RelProposalTargetLayer( self.n_rel_classes) self.RELPN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) reduced_pooled_feat_dim = 512 self.fc4obj = nn.Linear(pooled_feat_dim, reduced_pooled_feat_dim) self.fc4att = nn.Linear(pooled_feat_dim, reduced_pooled_feat_dim) self.fc4rel = nn.Linear(pooled_feat_dim, reduced_pooled_feat_dim) self.RCNN_gcn_obj_cls_score = nn.Linear(reduced_pooled_feat_dim, self.n_obj_classes) self.RCNN_gcn_att_cls_score = nn.Linear(reduced_pooled_feat_dim, self.n_att_classes) self.RCNN_gcn_rel_cls_score = nn.Linear(reduced_pooled_feat_dim, self.n_rel_classes) if cfg.HAS_ATTRIBUTES and cfg.HAS_RELATIONS and cfg.GCN_LAYERS > 0: if not cfg.GCN_ON_SCORES: self.GRCNN_gcn = _GCN_1(reduced_pooled_feat_dim) else: self.GRCNN_gcn = _GCN_2(self.n_obj_classes, self.n_att_classes, self.n_rel_classes) self.RCNN_loss_obj_cls = 0 self.RCNN_loss_att_cls = 0 self.RCNN_loss_rel_cls = 0 self.RCNN_loss_bbox = 0
def _init_modules(self, load_model=True): resnet = resnet101() self.RCNN_base = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool, resnet.layer1, resnet.layer2, resnet.layer3) self.RCNN_top = nn.Sequential(resnet.layer4) self.Linear_top = nn.Linear(2048, self.embedding_dim) if load_model: state_dict = torch.load(self.oneshot_model_path)['model'] self.load_state_dict({ k: v for k, v in state_dict.items() if k in self.state_dict() }) self.det_module = pseudo_siamese_det(self.det_model_path) self.det_module.create_architecture(load_det_model=load_model) self.det_module.training = False self.RCNN_roi_crop = _RoICrop() self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0)
def __init__(self, classes, class_agnostic): super(CoupleNet, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self.box_num_classes = 1 if class_agnostic else self.n_classes # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_crop = _RoICrop() self.RCNN_psroi_pool_cls = PSRoIPool(cfg.POOLING_SIZE, cfg.POOLING_SIZE, spatial_scale=1/16.0, group_size=cfg.POOLING_SIZE, output_dim=self.n_classes) self.RCNN_psroi_pool_loc = PSRoIPool(cfg.POOLING_SIZE, cfg.POOLING_SIZE, spatial_scale=1/16.0, group_size=cfg.POOLING_SIZE, output_dim=self.box_num_classes * 4) self.avg_pooling = nn.AvgPool2d(kernel_size=cfg.POOLING_SIZE, stride=cfg.POOLING_SIZE) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
def __init__(self, classes, class_agnostic): super(CoupleNet, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self.box_num_classes = 1 if class_agnostic else self.n_classes # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_crop = _RoICrop() self.RCNN_psroi_pool_cls = PSRoIPool(cfg.POOLING_SIZE, cfg.POOLING_SIZE, spatial_scale=1 / 16.0, group_size=cfg.POOLING_SIZE, output_dim=self.n_classes) self.RCNN_psroi_pool_loc = PSRoIPool(cfg.POOLING_SIZE, cfg.POOLING_SIZE, spatial_scale=1 / 16.0, group_size=cfg.POOLING_SIZE, output_dim=self.box_num_classes * 4) self.avg_pooling = nn.AvgPool2d(kernel_size=cfg.POOLING_SIZE, stride=cfg.POOLING_SIZE) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() # 继承父类的__init__()方法 self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn # 实例化,RPN网络(self.dout_base_model)是512,vgg16子类中定义,输入rpn的维度 self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) # grid_size = 7 * 2 = 14 self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop() # dout_base_model = 512 self.RCNN_imageDA = _ImageDA(self.dout_base_model) self.RCNN_instanceDA = _InstanceDA() self.consistency_loss = torch.nn.MSELoss(size_average=False)
def __init__(self, classes, class_agnostic, rpn_batchsize): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic self.rpn_batchsize = rpn_batchsize # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model, self.rpn_batchsize) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) print 'INFO: pooling size is: ', cfg.POOLING_SIZE_H, cfg.POOLING_SIZE_W self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE_H, cfg.POOLING_SIZE_W, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE_H, cfg.POOLING_SIZE_W, 1.0 / 16.0) ## wrote by Xudong Wang self.grid_size_H = cfg.POOLING_SIZE_H * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE_H self.grid_size_W = cfg.POOLING_SIZE_W * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE_W ## end self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_rpn_t = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop() self.RCNN_imageDA_3 = _ImageDA(256) self.RCNN_imageDA_4 = _ImageDA(512) self.RCNN_imageDA = _ImageDA(self.dout_base_model) self.RCNN_instanceDA = _InstanceDA()
def __init__(self, classes, class_agnostic, lighthead=False, compact_mode=False): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic self.lighthead = lighthead # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define Large Separable Convolution Layer if self.lighthead: self.lh_mode = 'S' if compact_mode else 'L' self.lsconv = LargeSeparableConv2d( self.dout_lh_base_model, bias=False, bn=False, setting=self.lh_mode) self.lh_relu = nn.ReLU(inplace=True) # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop() self.rpn_time = None self.pre_roi_time = None self.roi_pooling_time = None self.subnet_time = None
def __init__(self, classes, class_agnostic, alpha_con=None): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # crowds alpha_con self.alpha_con = alpha_con # label source self.label_source = cfg.LABEL_SOURCE # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model, self.classes, self.n_classes) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_aggregation_layer = _RCNNAggregationLayer() self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
def __init__(self, classes, class_agnostic): super(_fasterRCNN2Fc, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling( cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_align = RoIAlignAvg( cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RFCN_psroi_pool = None self.grid_size = cfg.POOLING_SIZE * \ 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop() # attention # self.conv_new_1 = nn.Conv2d(2048, 256, 1) # self.fc_new_1 = nn.dense(name='fc1_new_1', num_hidden=1024) # self.fc_new_2 = nn.dense(name='fc2_new_2', num_hidden=1024) self.fc1 = nn.Linear(2048, 1024) self.fc2 = nn.Linear(1024, 1024) self.nongt_dim = 256 if self.training else cfg.TEST.RPN_POST_NMS_TOP_N
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align_32 = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 32.0) self.RCNN_roi_align_16 = RoIAlignAvg(14, 14, 1.0 / 16.0) self.RCNN_roi_align_8 = RoIAlignAvg(28, 28, 1.0 / 8.0) self.RCNN_roi_align_4 = RoIAlignAvg(56, 56, 1.0 / 4.0) self.RCNN_roi_align_2 = RoIAlignAvg(112, 112, 1.0 / 2.0) self.RCNN_roi_align_1 = RoIAlignAvg(224, 224, 1.0 / 1.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
def __init__(self, classes, class_agnostic, meta_train, meta_test=None, meta_loss=None): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic self.meta_train = meta_train self.meta_test = meta_test self.meta_loss = meta_loss # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
def __init__(self, classes, num_ways, class_agnostic, meta_train, meta_test=None, meta_loss=None, transductive=None, visualization=None): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic self.meta_train = meta_train self.meta_test = meta_test self.meta_loss = meta_loss self.simloss = True self.dis_simloss = True self.transductive = transductive self.visualization = visualization # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop() self.num_layers_g = 3 self.num_ways = num_ways self.alpha = 0.5
def __init__(self, classes, class_agnostic, num_class=20): super(_FPN, self).__init__() self.num_classes = num_class self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self.maxpool2d = nn.MaxPool2d(1, stride=2) # define rpn self.RCNN_rpn = _RPN_FPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # NOTE: the original paper used pool_size = 7 for cls branch, and 14 for mask branch, to save the # computation time, we first use 14 as the pool_size, and then do stride=2 pooling for cls branch. self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop() self.top = nn.Sequential( nn.Linear(in_features=12544, out_features=4096, bias=True), nn.ReLU(inplace=True), nn.Dropout(0.5), nn.Linear(in_features=4096, out_features=4096, bias=True), nn.ReLU(inplace=True), nn.Dropout(0.5)) self.fc8c = nn.Linear(4096, self.num_classes) self.fc8d = nn.Linear(4096, self.num_classes)
def __init__(self, main_classes, sub_classes, class_agnostic): super(_hierarchyFasterRCNN, self).__init__() #self.classes = classes self.main_classes = main_classes self.sub_classes = sub_classes self.n_sub_classes = len(sub_classes) self.n_main_classes = len(main_classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_sub_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RFCN_psroi_pool = None self.grid_size = cfg.POOLING_SIZE * \ 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
def __init__(self, classes, class_agnostic, shrink=1, mimic=False, rois=None): super(_fasterRCNN, self).__init__() self.shrink = shrink self.student = True if shrink >= 2 else False self.mimic = mimic self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
def __init__(self, args, num_classes=20): super(WSDDN_VGG16, self).__init__() self.args = args self.num_classes = num_classes self.pretrained_dir = os.path.join(args.dataroot, args.pretrained_path, args.pretrained_model) # VGG16, pth(weakly게 아닐 수도 있음) vgg_model = torchvision.models.vgg16() # 찍어보자 if self.pretrained_dir is None: logger.debug('There is no VGG16 pretrained model') else: logger.info('Loading pretrained VGG16') state_dict = torch.load(self.pretrained_dir) vgg_model.load_state_dict({k: v for k, v in state_dict.items() if k in vgg_model.state_dict()}) """ Network debug """ for k in vgg_model.state_dict(): print('k, v', k) #fc6 어디감? self.base_network = nn.Sequential(*list(vgg_model.features._modules.values())[:-1]) self.top_network = nn.Sequential(*list(vgg_model.classifier._modules.values())[:-1]) self.fc8c = nn.Linear(4096, self.num_classes) self.fc8d = nn.Linear(4096, self.num_classes) # OICR 참고해보자 self.roi_pooling = _RoIPooling(7, 7, 1.0 / 16.0) self.roi_align = RoIAlignAvg(7, 7, 1.0 / 16.0) self._init_weights()
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_deform_roi_pool_1 = DeformRoIFunction(pool_height=7, pool_width=7, spatial_scale=1.0 / 16.0, no_trans=True, trans_std=0.1, sample_per_part=4, output_dim=256, group_size=1, part_size=7) self.RCNN_deform_roi_pool_2 = DeformRoIFunction(pool_height=7, pool_width=7, spatial_scale=1.0 / 16.0, no_trans=False, trans_std=0.1, sample_per_part=4, output_dim=256, group_size=1, part_size=7) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
def __init__(self, classes, class_agnostic, transfer): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop() #transfer setting self.transfer = transfer if self.transfer: self.transfer_weight = Variable(torch.Tensor([cfg.TRANSFER_WEIGHT ]).cuda(), requires_grad=True) self.grl = cfg.TRANSFER_GRL self.weight = Variable(torch.zeros(0).cuda(), requires_grad=False) self.transfer_select = cfg.TRANSFER_SELECT self.transfer_gamma = cfg.TRANSFER_GAMMA
def __init__(self, classes, class_agnostic): super(_FPN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self.maxpool2d = nn.MaxPool2d(1, stride=2) # define rpn self.RCNN_rpn = _RPN_FPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # NOTE: the original paper used pool_size = 7 for cls branch, and 14 for mask branch, to save the # computation time, we first use 14 as the pool_size, and then do stride=2 pooling for cls branch. self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_prroi = PrRoIPool2D( cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) ######## add precision roi pooling ##### self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
def __init__(self, classes, class_agnostic): super(_Deconv, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic #loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self.maxpool2d = nn.MaxPool2d(1, stride=2) #define rpn # if USE_ONE_FEATURE == 0: self.RCNN_rpn = _RPN_Deconv(self.dout_base_model) # else: # self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # define rpn self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop() self.spaCNN = SpaConv() self.spa_cls_score = nn.Sequential(nn.Linear(5408, 1024), nn.LeakyReLU(), nn.Dropout(p=0.5), nn.Linear(1024, self.n_classes)) self.obj_cls_score = nn.Sequential(nn.Linear(300, 512), nn.LeakyReLU(), nn.Linear(512, self.n_classes)) self.obj_attention = nn.Sequential(nn.Linear(300, 512), nn.LeakyReLU(), nn.Linear(512, 6))
def __init__(self, classes, class_agnostic, context, S_agent, T_agent, ts, tt, select_num, candidate_num): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic self.select_num = select_num self.candidate_num = candidate_num print("self.select_num: %d self.candidate_num: %d " % (self.select_num, self.candidate_num)) # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self.context = context # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.epsilon_by_epoch = lambda epoch_idx: cfg.epsilon_final + (cfg.epsilon_start - \ cfg.epsilon_final) * math.exp(-1. * epoch_idx / cfg.epsilon_decay) self.iter_dqn = 0 self.epsilon_by_epoch_T = lambda epoch_idx: cfg.epsilon_final + (cfg.epsilon_start - \ cfg.epsilon_final) * math.exp(-1. * epoch_idx / cfg.epsilon_decay) self.iter_dqn_T = 0 self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
def __init__(self, inplanes, planes, blocks, stride=1, downsample=None): super(Generator, self).__init__() self.conv1 = conv3x3(inplanes, planes, stride) self.conv2 = conv3x3(planes, planes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.k_blocks = self._make_layer(BasicBlock, planes, blocks) self.relu = nn.ReLU(inplace=True) self.stride = stride
def __init__(self, pool_height, pool_width, pool_scaler, isex): super(_OP2L, self).__init__() self._isex = isex self.OP2L_rois_pairing = _RoisPairExpandingLayer() self.OP2L_object_pair = _ObjPairLayer(self._isex) self.OP2L_roi_pool = _RoIPooling(pool_height, pool_width, pool_scaler) self.OP2L_roi_align = RoIAlignAvg(pool_height, pool_width, pool_scaler)
def __init__(self, features=None, classifier=None, **kwargs): super(Model, self).__init__() self.verbose = kwargs.get('verbose', False) self.roi_mode = kwargs.get('mode', MODE_ALIGN) curdir = os.path.realpath(os.path.join(__file__, '..')) self._init_features(features or os.path.join(curdir, 'conv.prototxt.statedict.pth')) self._init_classifier(classifier or os.path.join(curdir, 'linear.prototxt.statedict.pth')) spatial_scale = kwargs.get('scale', 1.0/32.0) self.RoIPooling = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, spatial_scale) # This needs to be the ratio of imdata.shape to the shape of the feature map at the end of the convolutional layers. This is architecture-dependent, not image-dependent (though a pixel here or there can cause some small shift in the true ratio). self.RoIAlign = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, spatial_scale)
def __init__(self, main_classes, sub_classes, class_agnostic, casecade_type='add_score', alpha=0.5): super(_hierarchyAttentionFasterRCNN, self).__init__() #self.classes = classes # type: add_score, add_prob, mul_score, mul_prob self.casecade_type = casecade_type self.alpha = alpha self.main_classes = main_classes self.sub_classes = sub_classes self.n_sub_classes = len(sub_classes) self.n_main_classes = len(main_classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_sub_classes) self.RCNN_roi_pool = _RoIPooling( cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_align = RoIAlignAvg( cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RFCN_psroi_pool = None self.grid_size = cfg.POOLING_SIZE * \ 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop() self.main2sub_idx_dict = defaultdict(list) for key, val in sub2main_dict.items(): try: # not all cls in dict are in this imdb self.main2sub_idx_dict[self.main_classes.index( val)].append(self.sub_classes.index(key)) except: print("key:{}, val:{} may not in this imdb".format(key, val)) # attention self.fc1 = nn.Linear(2048, 1024) self.fc2 = nn.Linear(1024, 1024) self.nongt_dim = 300 if self.training else cfg.TEST.RPN_POST_NMS_TOP_N self.attention_1 = attention_module_multi_head(nongt_dim=self.nongt_dim, fc_dim=16, feat_dim=1024, index=1, group=16, dim=(1024, 1024, 1024)) self.attention_2 = attention_module_multi_head(nongt_dim=self.nongt_dim, fc_dim=16, feat_dim=1024, index=2, group=16, dim=(1024, 1024, 1024))
def __init__(self, pool_height, pool_width, pool_scaler, isex): super(_OP2L, self).__init__() self._isex = isex self.OP2L_rois_pairing = _RoisPairExpandingLayer() self.OP2L_object_pair = _ObjPairLayer(self._isex) self.OP2L_roi_pool = _RoIPooling(cfg.RCNN_COMMON.POOLING_SIZE, cfg.RCNN_COMMON.POOLING_SIZE, 1.0 / 16.0) self.OP2L_roi_align = RoIAlignAvg(cfg.RCNN_COMMON.POOLING_SIZE, cfg.RCNN_COMMON.POOLING_SIZE, 1.0 / 16.0)
def __init__(self, baseModels, classes, dout_base_model): super(_RCNN_base, self).__init__() if classes is not None: self.classes = classes self.n_classes = len(classes) self.RCNN_base_model = baseModels # define rpn self.RCNN_rpn = _RPN(dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
def __init__(self): super(DNet_pooling, self).__init__() use_bias = False POOLING_SIZE = 32 # resize input to 32x32 using roipooling self.rois = torch.from_numpy( np.array([0.0, 0.0, 0.0, 32.0, 32.0], np.float32)) self.RCNN_roi_pool = _RoIPooling(POOLING_SIZE, POOLING_SIZE, 1.0) # assume input resized to 32x32 conv1 = nn.Conv2d(512, 1024, kernel_size=4, stride=2, padding=1, bias=use_bias) norm1 = nn.BatchNorm2d(1024) relu1 = nn.LeakyReLU(0.2, True) #16x16 conv2 = nn.Conv2d(1024, 2048, kernel_size=4, stride=2, padding=1, bias=use_bias) norm2 = nn.BatchNorm2d(2048) relu2 = nn.LeakyReLU(0.2, True) #8x8 conv3 = nn.Conv2d(2048, 4096, kernel_size=4, stride=2, padding=1, bias=use_bias) norm3 = nn.BatchNorm2d(4096) relu3 = nn.LeakyReLU(0.2, True) #4x4 conv4 = nn.Conv2d(4096, 1, kernel_size=4, stride=1, padding=0, bias=use_bias) sig = nn.Sigmoid() self.discriminator = nn.Sequential(conv1, norm1, relu1, conv2, norm2, relu2, conv3, norm3, relu3, conv4, sig)
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
def __init__(self, classes, class_agnostic): super(_FPN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN_FPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # NOTE: the original paper used pool_size = 7 for cls branch, and 14 for mask branch, to save the # computation time, we first use 14 as the pool_size, and then do stride=2 pooling for cls branch. self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
if __name__ == '__main__': import torch import numpy as np from torch.autograd import Variable from model.roi_pooling.modules.roi_pool import _RoIPooling input = torch.randn(2, 21*7*7, 50, 72) rois = torch.from_numpy( np.array([ [0.0000, 350.6689, 211.0240, 779.0886, 777.7496], [0.0000, 744.0627, 277.4919, 988.4307, 602.7589], [1.0000, 350.6689, 211.0240, 779.0886, 777.7496], [1.0000, 744.0627, 277.4919, 988.4307, 602.7589], ]) ).float() pool = PSRoIPool(7, 7, 1/16.0, 7, 21) input = Variable(input.cuda()) rois = Variable(rois.cuda()) print(rois.size(), input.size()) print(input) out = pool(input, rois) print(out) print(out.size()) print('============================') roi_pool = _RoIPooling(7, 7, 1/16.0) out = roi_pool(input, rois.view(-1, 5)) print(out) print(out.size())