def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) # self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0, 0) self.DCR_proposal = _DCRProposalLayer(self.class_agnostic) self.DCR_target_proposal = _DCRTargetLayer(self.n_classes, self.class_agnostic) # self.DCR_roi_pool = ROIPool((112, 112), 1.0/4.0) self.DCR_roi_pool = ROIAlign((56, 56), 1.0/4.0, 0) # self.DCR_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0) # self.DCR_roi_pool = ROIPool((28, 28), 1.0/8.0) if cfg.TRAIN.ATTENTION_MODEL: self.levelattentionLoss = Attention_loss()
def __init__(self, classes, n_way=2, n_shot=5): super(_fgnRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) # for proposal-target matching self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # pooling or align self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) # few shot rcnn head self.avgpool_rpn = nn.AvgPool2d(20) self.avgpool_rcnn = nn.AvgPool2d(14, stride=1) self.cls_conv1 = nn.Conv2d(2048, 512, 3, padding=0, bias=False) self.bn1 = nn.BatchNorm2d(512) self.cls_conv2 = nn.Conv2d(512, 128, 3, padding=0, bias=False) self.bn2 = nn.BatchNorm2d(128) # few shot settings self.n_way = n_way self.n_shot = n_shot
def __init__(self, classes, class_agnostic, feat_name, feat_list=('conv4',), pretrained = True): super(fasterRCNN, self).__init__(classes, class_agnostic, feat_name, feat_list, pretrained) ##### Important to set model to eval mode before evaluation #### self.FeatExt.eval() rand_img = torch.Tensor(1, 3, 224, 224) rand_feat = self.FeatExt(rand_img) self.FeatExt.train() self.dout_base_model = rand_feat.size(1) self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model, anchor_scales=cfg.RCNN_COMMON.ANCHOR_SCALES, anchor_ratios=cfg.RCNN_COMMON.ANCHOR_RATIOS, feat_stride=cfg.RCNN_COMMON.FEAT_STRIDE[0]) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = ROIPool((cfg.RCNN_COMMON.POOLING_SIZE, cfg.RCNN_COMMON.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.RCNN_COMMON.POOLING_SIZE, cfg.RCNN_COMMON.POOLING_SIZE), 1.0 / 16.0, 0) self.grid_size = cfg.RCNN_COMMON.POOLING_SIZE * 2 if cfg.RCNN_COMMON.CROP_RESIZE_WITH_MAX_POOL else cfg.RCNN_COMMON.POOLING_SIZE self.iter_counter = 0
def __init__(self, texture_channels=3, dropout=0.5): super(TextureModule, self).__init__() self.roi_align = ROIAlign(output_size=(128, 128), spatial_scale=1, sampling_ratio=1) channels = texture_channels * NUM_ROI self.encode = UNetDown(channels, channels) # UNET self.down_1 = UNetDown(channels + texture_channels, 64, normalize=False) self.down_2 = UNetDown(64, 128) self.down_3 = UNetDown(128, 256) self.down_4 = UNetDown(256, 512, dropout=dropout) self.down_5 = UNetDown(512, 1024, dropout=dropout) self.down_6 = UNetDown(1024, 1024, normalize=False, dropout=dropout) self.up_1 = UNetUp(1024, 1024, dropout=dropout) self.up_2 = UNetUp(2 * 1024, 512, dropout=dropout) self.up_3 = UNetUp(2 * 512, 256) self.up_4 = UNetUp(2 * 256, 128) self.up_5 = UNetUp(2 * 128, 64) self.upsample_and_pad = nn.Sequential( nn.Upsample(scale_factor=2), nn.ZeroPad2d((1, 0, 1, 0)), nn.Conv2d(128, texture_channels, 4, padding=1), nn.Tanh(), )
def __init__(self, classes: Tuple[str], class_agnostic: bool): super(OneShotBase, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic self.match_net = match_block(self.dout_base_model) # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) # self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) self.triplet_loss = torch.nn.MarginRankingLoss(margin=cfg.TRAIN.MARGIN) self.RCNN_base = nn.Module() # implement in inherit class self.RCNN_top = nn.Module() # implement in inherit class self.RCNN_cls_score = nn.Module() # implement in inherit class self.RCNN_bbox_pred = nn.Module() # implement in inherit clas
def __init__(self, classes, class_agnostic, in_channel=4096): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) # self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign( (cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0 ) self.grid_size = ( cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE ) # self.RCNN_roi_crop = _RoICrop() self.RCNN_imageDA = _ImageDA(self.dout_base_model) self.RCNN_instanceDA = _InstanceDA(in_channel) self.consistency_loss = torch.nn.MSELoss(size_average=False) self.conv_lst = nn.Conv2d(self.dout_base_model, self.n_classes - 1, 1, 1, 0) self.avg_pool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
def __init__(self, classes, attention_type, rpn_reduce_dim, rcnn_reduce_dim, n_way=2, n_shot=5, pos_encoding=True): super(_MAML_CISA, self).__init__() self.classes = classes self.n_classes = len(classes) self.n_way = n_way self.n_shot = n_shot self.attention_type = attention_type self.unary_gamma = 0.1 self.rpn_reduce_dim = rpn_reduce_dim self.rcnn_reduce_dim = rcnn_reduce_dim # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # pooling or align self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0, 0) # few shot rcnn head self.pool_feat_dim = 1024 self.rcnn_dim = 64 self.avgpool = nn.AvgPool2d(14, stride=1) dim_in = self.pool_feat_dim ################ self.rpn_unary_layer = nn.Linear(dim_in, 1) init.normal_(self.rpn_unary_layer.weight, std=0.01) init.constant_(self.rpn_unary_layer.bias, 0) self.rcnn_unary_layer = nn.Linear(dim_in, 1) init.normal_(self.rcnn_unary_layer.weight, std=0.01) init.constant_(self.rcnn_unary_layer.bias, 0) self.rpn_adapt_q_layer = nn.Linear(dim_in, rpn_reduce_dim) init.normal_(self.rpn_adapt_q_layer.weight, std=0.01) init.constant_(self.rpn_adapt_q_layer.bias, 0) self.rpn_adapt_k_layer = nn.Linear(dim_in, rpn_reduce_dim) init.normal_(self.rpn_adapt_k_layer.weight, std=0.01) init.constant_(self.rpn_adapt_k_layer.bias, 0) self.rcnn_adapt_q_layer = nn.Linear(dim_in, rcnn_reduce_dim) init.normal_(self.rcnn_adapt_q_layer.weight, std=0.01) init.constant_(self.rcnn_adapt_q_layer.bias, 0) self.rcnn_adapt_k_layer = nn.Linear(dim_in, rcnn_reduce_dim) init.normal_(self.rcnn_adapt_k_layer.weight, std=0.01) init.constant_(self.rcnn_adapt_k_layer.bias, 0) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) if self.attention_type == 'concat': self.RCNN_rpn = _RPN(2048) self.rcnn_transform_layer = nn.Linear(2048, self.rcnn_dim) elif self.attention_type == 'product': self.RCNN_rpn = _RPN(1024) self.rcnn_transform_layer = nn.Linear(1024, self.rcnn_dim) self.output_score_layer = FFN(64* 49, dim_in) # positional encoding self.pos_encoding = pos_encoding if pos_encoding: self.pos_encoding_layer = PositionalEncoding() self.rpn_pos_encoding_layer = PositionalEncoding(max_len=400)
def __init__(self, classes, n_head, d_feat=64, n_way=2, n_shot=5, pos_encoding=True): super(_multiRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # for proposal-target matching self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # pooling or align self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0, 0) # few shot rcnn head self.global_relation = True self.local_correlation = True self.pool_feat_dim = 1024 self.soft_gamma = 10 self.avgpool = nn.AvgPool2d(14, stride=1) # self.maxpool_fc = nn.MaxPool1d(49) dim_in = self.pool_feat_dim ################ self.h = n_head if self.h == 4: d_rpn_in = 1024 d_rpn_hidden = 512 d_rcnn_hidden = 64 d_ffn_hidden = 1024 elif self.h == 2: d_rpn_in = 512 d_rpn_hidden = 256 d_rcnn_hidden = 64 d_ffn_hidden = 1024 elif self.h == 1: d_rpn_in = 256 d_rpn_hidden = 128 d_rcnn_hidden = 32 d_ffn_hidden = 512 else: raise Exception(f'{self.h}') self.d_feat = d_feat self.multihead_attention_layer = MultiheadAttentionModule(dim_in, d_feat=self.d_feat, h=self.h) self.rpn_attention_linear = nn.Linear(400 * self.h, d_rpn_in) self.RCNN_rpn = _MultiheadRPN(d_rpn_in, d_rpn_hidden) self.rcnn_attention_linear = nn.Linear(49 * self.h, d_rcnn_hidden) self.rcnn_ffn_layer = FFN(49 * d_rcnn_hidden, d_ffn_hidden) ################ self.pos_encoding = pos_encoding if pos_encoding: self.pos_encoding_layer = PositionalEncoding() self.rpn_pos_encoding_layer = PositionalEncoding(max_len=400) ################ # few shot settings self.n_way = n_way self.n_shot = n_shot
def __init__(self, classes, n_head, n_way=2, n_shot=5, pos_encoding=True): super(_multiheadAttentionRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.n_way = n_way self.n_shot = n_shot self.n_head = n_head # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # pooling or align self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0/16.0, 0) # few shot rcnn head self.pool_feat_dim = 1024 self.avgpool = nn.AvgPool2d(14, stride=1) dim_in = self.pool_feat_dim ################ self.d_k = 64 self.Q_weight_list = [] self.K_weight_list = [] self.V_weight_list = [] for i in range(n_head): Q_weight = nn.Linear(dim_in, self.d_k) K_weight = nn.Linear(dim_in, self.d_k) V_weight = nn.Linear(dim_in, self.d_k) init.normal_(Q_weight.weight, std=0.01) init.constant_(Q_weight.bias, 0) init.normal_(K_weight.weight, std=0.01) init.constant_(K_weight.bias, 0) init.normal_(V_weight.weight, std=0.01) init.constant_(V_weight.bias, 0) self.Q_weight_list.append(Q_weight) self.K_weight_list.append(K_weight) self.V_weight_list.append(V_weight) self.Q_layers = nn.ModuleList(self.Q_weight_list) self.K_layers = nn.ModuleList(self.K_weight_list) self.V_layers = nn.ModuleList(self.V_weight_list) if n_head != 1: self.rpn_multihead_layer = nn.Linear(n_head * 400, 400) self.rcnn_multihead_layer = nn.Linear(n_head * self.d_k * 49, self.d_k * 49) self.output_score_layer = FFN(self.d_k * 49, dim_in) # rpn self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_rpn = _BipathRPN(400, 256) # positional encoding self.pos_encoding = pos_encoding if pos_encoding: self.pos_encoding_layer = PositionalEncoding() self.rpn_pos_encoding_layer = PositionalEncoding(max_len=400)
def __init__( self, texture_channels=3, cloth_channels=19, num_roi=12, norm_type="batch", dropout=0.5, unet_type="pix2pix", img_size=128, ): super(TextureModule, self).__init__() self.roi_align = ROIAlign( output_size=(128, 128), spatial_scale=1, sampling_ratio=1 ) self.num_roi = num_roi channels = texture_channels * num_roi self.encode = UNetDown(channels, channels) # UNET if unet_type == "pix2pix": # fast log2 of img_size, int only. E.g. if size=128 => num_downs=7 num_downs = math.frexp(img_size)[1] - 1 use_dropout = True if dropout is not None else False norm_layer = get_norm_layer(norm_type=norm_type) self.unet = pix2pix_modules.UnetGenerator( channels + cloth_channels, texture_channels, num_downs, norm_layer=norm_layer, use_dropout=use_dropout, ) else: self.unet = nn.Sequential( UNetDown(channels + cloth_channels, 64, normalize=False), UNetDown(64, 128), UNetDown(128, 256), UNetDown(256, 512, dropout=dropout), UNetDown(512, 1024, dropout=dropout), UNetDown(1024, 1024, normalize=False, dropout=dropout), UNetUp(1024, 1024, dropout=dropout), UNetUp(2 * 1024, 512, dropout=dropout), UNetUp(2 * 512, 256), UNetUp(2 * 256, 128), UNetUp(2 * 128, 64), # upsample and pad nn.Upsample(scale_factor=2), nn.ZeroPad2d((1, 0, 1, 0)), nn.Conv2d(128, texture_channels, 4, padding=1), nn.Tanh(), )
def __init__(self, classes): super(_StereoRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox_left_right = 0 self.RCNN_loss_dis = 0 self.RCNN_loss_dim = 0 self.RCNN_loss_dim_orien = 0 self.RCNN_loss_kpts = 0 self.maxpool2d = nn.MaxPool2d(1, stride=2) # define rpn self.RCNN_rpn = _Stereo_RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) self.RCNN_roi_kpts_align = ROIAlign( (cfg.POOLING_SIZE * 2, cfg.POOLING_SIZE * 2), 1.0 / 16.0, 0)
def __init__(self, classes, class_agnostic,lc,gc): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self.lc = lc self.gc = gc # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0)
def __init__(self, classes, class_agnostic=False): super(Faster_RCNN, self).__init__() self.n_classes = len(classes) self.classes = classes self.class_agnostic = class_agnostic # RCNN loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self.RCNN_rpn = RPN(self.feature_out_dim) self.RCNN_proposal_target = ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0)
def forward(self, x, rpn_ret, part_rois, cascade=False, rois=None): # pdb.set_trace() if not cascade: cat_x = self.roi_xform( # roi pooling x, rpn_ret, part_rois, blob_rois='rois', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=cfg.FAST_RCNN.ROI_XFORM_RESOLUTION, spatial_scale=self.spatial_scale, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO) if cfg.USE_CONTEXT_RELA: cat_x[-3:-1] *= self.matrix # -3:1 rebuttal 加上心影 # 针对c4模式 增加cascade操作 else: resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION spatial_scale = self.spatial_scale sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO blobs_in = x device_id = blobs_in.get_device() rois = Variable(torch.from_numpy(rois)).cuda(device_id) cat_x = ROIAlign((resolution, resolution), spatial_scale, sampling_ratio)(blobs_in, rois) res5_feat = self.res5(cat_x) # res 5 cat_x = self.avgpool(res5_feat) # LJ 新增part的roi pooling 过程 # 修改bug 存在box数量不够512的情况 修改读取方式 if part_rois is not None: box_feat = cat_x[:-5] # box_feat res5_feat = res5_feat[:-5] part_feat = cat_x[-5:] else: box_feat = cat_x part_feat = None if not cascade: if cfg.MODEL.SHARE_RES5 and self.training: return box_feat, res5_feat, part_feat else: return box_feat, part_feat else: return box_feat
def __init__(self, classes, n_way=2, n_shot=5, pos_encoding=True): super(_qkvRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) # for proposal-target matching self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # pooling or align self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) # few shot rcnn head self.global_relation = True self.local_correlation = True self.pool_feat_dim = 1024 self.soft_gamma = 10 self.avgpool = nn.AvgPool2d(14, stride=1) # self.maxpool_fc = nn.MaxPool1d(49) dim_in = self.pool_feat_dim ################ self.d_k = 64 self.Q_weight = nn.Linear(dim_in, self.d_k) self.K_weight = nn.Linear(dim_in, self.d_k) self.V_weight = nn.Linear(dim_in, self.d_k) init.normal_(self.Q_weight.weight, std=0.01) init.constant_(self.Q_weight.bias, 0) init.normal_(self.K_weight.weight, std=0.01) init.constant_(self.K_weight.bias, 0) init.normal_(self.V_weight.weight, std=0.01) init.constant_(self.V_weight.bias, 0) self.ffn_layer = FFN(self.d_k * 49, dim_in) ################ self.pos_encoding = pos_encoding if pos_encoding: self.pos_encoding_layer = PositionalEncoding() ################ # few shot settings self.n_way = n_way self.n_shot = n_shot
def __init__(self, classes, n_way=2, n_shot=5): super(_metaRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) # for proposal-target matching self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # pooling or align self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) # few shot settings self.n_way = n_way self.n_shot = n_shot
def __init__(self, classes, class_agnostic, model_type="attention", fusion='query'): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic conv_nd = nn.Conv2d self.fusion = fusion if fusion == 'query': self.attention_net = attention(self.dout_base_model) elif fusion == 'attention': self.attention_net = attention_early_fusion_multi_query( self.dout_base_model) self.projection = conv_nd(in_channels=1024 * 2, out_channels=1024, kernel_size=1, stride=1, padding=0, bias=False) nn.init.xavier_uniform_(self.projection.weight) # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) # self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) self.triplet_loss = torch.nn.MarginRankingLoss(margin=cfg.TRAIN.MARGIN)
def __init__(self, classes, class_agnostic): super(_FPN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self.maxpool2d = nn.MaxPool2d(1, stride=2) # define rpn self.RCNN_rpn = _RPN_FPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) # self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0)
def __init__(self, classes, class_agnostic): super(_FPN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self.maxpool2d = nn.MaxPool2d(1, stride=2) # define rpn self.RCNN_rpn = _RPN_FPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # NOTE: the original paper used pool_size = 7 for cls branch, and 14 for mask branch, to save the # computation time, we first use 14 as the pool_size, and then do stride=2 pooling for cls branch. self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 0)
def __init__(self, classes, class_agnostic, feat_name, feat_list=('conv2', 'conv3', 'conv4', 'conv5'), pretrained=True): super(FPN, self).__init__(classes, class_agnostic, feat_name, feat_list, pretrained) ##### Important to set model to eval mode before evaluation #### self.FeatExt.eval() rand_img = torch.Tensor(1, 3, 224, 224) rand_feat = self.FeatExt(rand_img) self.FeatExt.train() self.n_channels = [f.size(1) for f in rand_feat] self.dout_base_model = 256 # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self._num_pyramid_layers = len(cfg.RCNN_COMMON.FEAT_STRIDE) self.RCNN_rpn = _RPN(self.dout_base_model, anchor_scales=cfg.RCNN_COMMON.ANCHOR_SCALES, anchor_ratios=cfg.RCNN_COMMON.ANCHOR_RATIOS, feat_stride=cfg.RCNN_COMMON.FEAT_STRIDE) self.RCNN_roi_aligns = nn.ModuleList() self.RCNN_roi_pools = nn.ModuleList() for i in range(len(cfg.RCNN_COMMON.FEAT_STRIDE)): self.RCNN_roi_aligns.append( ROIAlign((cfg.RCNN_COMMON.POOLING_SIZE, cfg.RCNN_COMMON.POOLING_SIZE), 1.0 / float(cfg.RCNN_COMMON.FEAT_STRIDE[i]), 0)) self.RCNN_roi_pools.append( ROIPool((cfg.RCNN_COMMON.POOLING_SIZE, cfg.RCNN_COMMON.POOLING_SIZE), 1.0 / float(cfg.RCNN_COMMON.FEAT_STRIDE[i]))) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.iter_counter = 0
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn (传入(base_model)特征提取网络的特征图) self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) # self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) # 两种裁剪特征图的方式 self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0)
def __init__(self, classes, class_agnostic, loss_type): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = classes self.class_agnostic = class_agnostic self.loss_type = loss_type # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model, self.n_classes) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) # self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0)
def __init__(self, classes, class_agnostic, lc, gc, da_use_contex, in_channel=4096): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self.lc = lc self.gc = gc # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) self.grid_size = (cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE) self.conv_lst = nn.Conv2d(self.dout_base_model, self.n_classes - 1, 1, 1, 0) self.avg_pool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) # self.bn1 = nn.BatchNorm2d(self.dout_base_model, momentum=0.01) # self.bn2 = nn.BatchNorm2d(self.n_classes-1, momentum=0.01) self.da_use_contex = da_use_contex if self.da_use_contex: if self.lc: in_channel += 128 if self.gc: in_channel += 128 self.RCNN_instanceDA = _InstanceDA(in_channel)
def __init__(self, classes, class_agnostic, rpn_batchsize): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic self.rpn_batchsize = rpn_batchsize # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model, self.rpn_batchsize) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) #print 'INFO: pooling size is: ', cfg.POOLING_SIZE_H, cfg.POOLING_SIZE_W #self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE_H, cfg.POOLING_SIZE_W, 1.0/16.0) #self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE_H, cfg.POOLING_SIZE_W, 1.0/16.0) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE_H, cfg.POOLING_SIZE_W), 1.0/16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE_H, cfg.POOLING_SIZE_W), 1.0/16.0, 0) ''' RoICrop removed from pytorch-1.0 branch
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # RPN layer (also compute the cls loss and bbox loss for RPN layer) self.RCNN_rpn = _RPN(self.dout_base_model) # RCNN gt labels layer (produces gt labels for final cls and bbox regression) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # ROIPooling or ROIAlign layer self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) # new layer self.extension_layer = extension_layers.extension_layer()
def __init__(self, classes, class_agnostic, in_channel=4096): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) # self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign( (cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0 ) self.grid_size = ( cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE ) # self.RCNN_roi_crop = _RoICrop() self.RCNN_imageDA = _ImageDA(self.dout_base_model, self.n_classes) self.RCNN_instanceDA = _InstanceDA(in_channel) self.consistency_loss = torch.nn.MSELoss(size_average=False) self.conv_lst = nn.Conv2d(self.dout_base_model, self.n_classes - 1, 1, 1, 0) self.avg_pool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) # projection MLP , zf self.s_l1 = nn.Linear(self.dout_base_model, self.dout_base_model) self.s_l2 = nn.Linear(self.dout_base_model, 128) self.s_nt_xent_criterion = NTXentLoss(batch_size=self.n_classes-1, temperature=0.5, use_cosine_similarity=True) self.contra_softmax = nn.Softmax(dim=1)
def __init__(self, classes, class_agnostic, lc, gc): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self.lc = lc self.gc = gc # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) self.grid_size = (cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE) self.conv_lst = nn.Conv2d(self.dout_base_model, self.n_classes - 1, 1, 1, 0) self.avg_pool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic self.match_net = match_block(self.dout_base_model) # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) # self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) self.triplet_loss = torch.nn.MarginRankingLoss(margin=cfg.TRAIN.MARGIN)
def __init__(self, classes, class_agnostic, model_type): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.model_type = model_type print(self.classes) self.class_agnostic = class_agnostic conv_nd = nn.Conv2d if self.model_type in ["match_net"]: self.match_net = match_block(self.dout_base_model) if self.model_type == "attention": self.attention_net = attention(self.dout_base_model) self.projection = conv_nd(in_channels=1024 * 2, out_channels=1024, kernel_size=1, stride=1, padding=0, bias=False) nn.init.xavier_uniform_(self.projection.weight) # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = ROIPool((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0) self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0) self.triplet_loss = torch.nn.MarginRankingLoss(margin=cfg.TRAIN.MARGIN)
def roi_feature_transform(self, blobs_in, rpn_ret, blob_rois='rois', method='RoIPoolF', resolution=7, spatial_scale=1. / 16., sampling_ratio=0, query_blobs_in=None): """Add the specified RoI pooling method. The sampling_ratio argument is supported for some, but not all, RoI transform methods. RoIFeatureTransform abstracts away: - Use of FPN or not - Specifics of the transform method """ assert method in {'RoIPoolF', 'RoICrop', 'RoIAlign'}, \ 'Unknown pooling method: {}'.format(method) if isinstance(blobs_in, list): # FPN case: add RoIFeatureTransform to each FPN level device_id = blobs_in[0].get_device() k_max = cfg.FPN.ROI_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.ROI_MIN_LEVEL # finest level of pyramid assert len(blobs_in) == k_max - k_min + 1 bl_out_list = [] query_bl_out_list = [] for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order sc = spatial_scale[k_max - lvl] # in reversed order bl_rois = blob_rois + '_fpn' + str(lvl) if len(rpn_ret[bl_rois]): rois = Variable(torch.from_numpy(rpn_ret[bl_rois])).cuda(device_id) batch_idxs = rois[:, 0].long() if method == 'RoIPoolF': # Warning!: Not check if implementation matches Detectron #xform_out = RoIPoolFunction(resolution, resolution, sc)(bl_in, rois) xform_out = ROIPool((resolution, resolution), sc)(bl_in, rois) #elif method == 'RoICrop': # Warning!: Not check if implementation matches Detectron # grid_xy = net_utils.affine_grid_gen( # rois, bl_in.size()[2:], self.grid_size) # grid_yx = torch.stack( # [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() # xform_out = RoICropFunction()(bl_in, Variable(grid_yx).detach()) # if cfg.CROP_RESIZE_WITH_MAX_POOL: # xform_out = F.max_pool2d(xform_out, 2, 2) elif method == 'RoIAlign': xform_out = ROIAlign(( resolution, resolution), sc, sampling_ratio)(bl_in, rois) if (blob_rois == 'rois' or blob_rois == 'mask_rois') and query_blobs_in is not None: query_bl_in = query_blobs_in[k_max - lvl] query_bl_in = query_bl_in[batch_idxs] query_bl_in = F.interpolate(query_bl_in, size=[resolution, resolution], mode="bilinear") query_bl_out_list.append(query_bl_in) bl_out_list.append(xform_out) # The pooled features from all levels are concatenated along the # batch dimension into a single 4D tensor. xform_shuffled = torch.cat(bl_out_list, dim=0) # Unshuffle to match rois from dataloader device_id = xform_shuffled.get_device() restore_bl = rpn_ret[blob_rois + '_idx_restore_int32'] restore_bl = Variable( torch.from_numpy(restore_bl.astype('int64', copy=False))).cuda(device_id) xform_out = xform_shuffled[restore_bl] if (blob_rois == 'rois' or blob_rois == 'mask_rois') and query_blobs_in is not None: query_xform_shuffled = torch.cat(query_bl_out_list, dim=0) query_xform_out = query_xform_shuffled[restore_bl] return xform_out, query_xform_out else: return xform_out else: # Single feature level # rois: holds R regions of interest, each is a 5-tuple # (batch_idx, x1, y1, x2, y2) specifying an image batch index and a # rectangle (x1, y1, x2, y2) device_id = blobs_in.get_device() rois = Variable(torch.from_numpy(rpn_ret[blob_rois])).cuda(device_id) batch_idxs = rois[:, 0].long() if method == 'RoIPoolF': xform_out = ROIPool((resolution, resolution), spatial_scale)(blobs_in, rois) #elif method == 'RoICrop': # grid_xy = net_utils.affine_grid_gen(rois, blobs_in.size()[2:], self.grid_size) # grid_yx = torch.stack( # [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() # xform_out = RoICropFunction()(blobs_in, Variable(grid_yx).detach()) # if cfg.CROP_RESIZE_WITH_MAX_POOL: # xform_out = F.max_pool2d(xform_out, 2, 2) elif method == 'RoIAlign': xform_out = ROIAlign(( resolution, resolution), spatial_scale, sampling_ratio)(blobs_in, rois) if (blob_rois == 'rois' or blob_rois == 'mask_rois') and query_blobs_in is not None: query_blobs_in = query_blobs_in[batch_idxs] query_blobs_in = F.interpolate(query_blobs_in, size=[resolution, resolution], mode="bilinear") return xform_out, query_blobs_in else: return xform_out