def __init__(self, out_size, phase, in_im_sz, fm_use): """Initializes RoI_layer module.""" super(RoI_layer, self).__init__() self.phase = phase # in order to get the RoI reigon self.out_size = out_size self.in_img_sz = in_im_sz self.tm_scale = 8 self.fm_ROI = int(fm_use / 4) self.Dense_scale = int(self.tm_scale / 2) if phase == 'train': data_index_file = './data/Charades_train.pkl' elif phase == 'eval': data_index_file = './data/Charades_Val_Video.pkl' else: assert 0, 'The data can not find' self.bx_dir = '/VIDEO_DATA/BBOX/' self.data_index = pickle.load(open(data_index_file, 'rb')) # in order to get the bbox (RPN) # define rpn self.ROI_Align = RoIAlignAvg(out_size, out_size, 1 / 16.0) # scale need to change self.ROI_Pool = _RoIPooling(out_size, out_size, 1 / 16.0) # scale need to change self.Ptorch_ROI = Torch_ROI(feature_scal=(self.in_img_sz / 16)) self.Scene_Roi = np.array([[i, 0, 0, self.in_img_sz - 32, self.in_img_sz - 32] for i in range(self.fm_ROI)]) # 32 = scale * 2 = 16*2 for ROI Align self.Scens_Full = np.array([[i, 0, 0, self.in_img_sz - 16, self.in_img_sz - 16] for i in range(self.fm_ROI)]) self.Scens_Pytorch = np.array([[i, 0, 0, self.in_img_sz, self.in_img_sz] for i in range(self.fm_ROI)]) self.Scens_Sparse = np.array([[i, 0, 0, self.in_img_sz, self.in_img_sz] for i in range(1, self.fm_ROI, 2)])
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE# = 7*2 =14 ''' # Size of the pooled region after RoI pooling __C.POOLING_SIZE = 7 roi pooling 之后得到的特征的尺寸 CROP_RESIZE_WITH_MAX_POOL = True ''' self.RCNN_roi_crop = _RoICrop()
def __init__(self, classes, class_agnostic): super(_HierRCNN, self).__init__() with h5py.File('data/pretrained_model/label_vec_vrd.h5', 'r') as f: self.label_vecs = np.array(f['label_vec']) self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop() self.order_embedding = nn.Sequential( nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, cfg.HIER.EMBEDDING_LENGTH)) self.order_score = _OrderSimilarity(cfg.HIER.ORDER_DISTANCE_NORM)
def __init__(self, out_size, phase, in_im_sz, fm_use): """Initializes RoI_layer module.""" super(RoI_layer, self).__init__() self.phase = phase # in order to get the RoI reigon self.out_size = out_size self.in_img_sz = in_im_sz self.tm_scale = 4 self.fm_ROI = int(fm_use / 4) if phase == 'train': data_index_file = './data/Charades_train.pkl' elif phase == 'eval': data_index_file = './data/Charades_Val_Video.pkl' else: assert 0, 'The data can not find' self.bx_dir = '/VIDEO_DATA/BBOX/' self.data_index = pickle.load(open( data_index_file, 'rb')) # in order to get the bbox (RPN) # define rpn self.RCNN_roi_align = RoIAlignAvg(out_size, out_size, 1 / 16.0) #scale need to change self.Scens_Roi = np.array( [[i, 0, 0, self.in_img_sz - 2 * 16, self.in_img_sz - 2 * 16] for i in range(self.fm_ROI)])
def __init__(self, phase, cfg, size, base, extras, head, num_classes): super(test_association, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = vid self.priorbox = PriorBox(self.cfg) self.priors = Variable(self.priorbox.forward(), volatile=True) self.size = size # SSD network self.vgg = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) self.roi_pool = _RoIPooling(self.cfg['POOLING_SIZE'], self.cfg['POOLING_SIZE'], 1.0 / 16.0) self.roi_align = RoIAlignAvg(self.cfg['POOLING_SIZE'], self.cfg['POOLING_SIZE'], 1.0 / 16.0) self.grid_size = self.cfg['POOLING_SIZE'] * 2 if self.cfg['CROP_RESIZE_WITH_MAX_POOL'] else self.cfg['POOLING_SIZE'] self.roi_crop = _RoICrop() self.img_shape = (self.cfg['min_dim'],self.cfg['min_dim']) if phase == 'vid_test': self.softmax = nn.Softmax(dim=-1) self.detect = test_target(num_classes, 200, 0.5, 0.01, 0.45)
def __init__(self, out_size, in_im_sz): """Initializes RoI_layer module.""" super(RoI_layer_mulity, self).__init__() self.out_size = out_size self.in_img_sz = in_im_sz # define rpn self.ROI_Align = RoIAlignAvg(self.out_size, self.out_size, 1 / 16.0) # 224->14 : 16
def _init_modules(self): res = resnet101() if self.model_path is None: print("Create model without pretrained weights") else: print("Loading pretrained weights from %s" % (self.model_path)) state_dict = torch.load(self.model_path) res.load_state_dict({k: v for k, v in state_dict.items() if k in res.state_dict()}) # not using the last maxpool layer self.base = nn.Sequential(res.conv1, res.bn1, res.relu, res.maxpool, res.layer1, res.layer2, res.layer3) if self.no_dropout: self.top = nn.Sequential( nn.Linear(1024 * 7 * 7, 4096), nn.ReLU(True), nn.Linear(4096, 4096), nn.ReLU(True) ) else: self.top = nn.Sequential( nn.Linear(1024 * 7 * 7, 4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(True), nn.Dropout() ) self.bbox_pred_layer = nn.Linear(4096, 4) self.roi_align = RoIAlignAvg(7, 7, 1.0 / 16.0) for p in self.base[0].parameters(): p.requires_grad = False for p in self.base[1].parameters(): p.requires_grad = False assert (0 <= self.fixed_blocks < 4) if self.fixed_blocks >= 3: for p in self.base[6].parameters(): p.requires_grad = False if self.fixed_blocks >= 2: for p in self.base[5].parameters(): p.requires_grad = False if self.fixed_blocks >= 1: for p in self.base[4].parameters(): p.requires_grad = False def set_bn_fix(m): classname = m.__class__.__name__ if classname.find('BatchNorm') != -1: for p in m.parameters(): p.requires_grad = False self.base.apply(set_bn_fix)
def __init__(self, classes, class_agnostic): super(_fasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 #define rpn self.RCNN_rpn = _RPN(self.dout_base_model) # self.dout_base_model = 512 self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
def _init_modules(self): vgg = models.vgg16() if self.model_path is None: print("Create model without pretrained weights") else: print("Loading pretrained weights from %s" % (self.model_path)) state_dict = torch.load(self.model_path) vgg.load_state_dict({k: v for k, v in state_dict.items() if k in vgg.state_dict()}) vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1]) # not using the last maxpool layer self.base = nn.Sequential(*list(vgg.features._modules.values())[:-1]) # Fix the layers before conv3: if self.freeze_before_conv3: for layer in range(10): for p in self.base[layer].parameters(): p.requires_grad = False if self.use_pretrained_fc: if self.no_dropout: self.top = nn.Sequential( vgg.classifier[0], vgg.classifier[1], vgg.classifier[3], vgg.classifier[4] ) else: self.top = vgg.classifier else: if self.no_dropout: self.top = nn.Sequential( nn.Linear(512 * 7 * 7, 4096), nn.ReLU(True), nn.Linear(4096, 4096), nn.ReLU(True) ) else: self.top = nn.Sequential( nn.Linear(512 * 7 * 7, 4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(True), nn.Dropout() ) self.bbox_pred_layer = nn.Linear(4096, 4) self.roi_align = RoIAlignAvg(7, 7, 1.0/16.0)
def _init_modules(self): vgg = models.vgg16() print("Loading pretrained weights from %s" % (self.model_path)) state_dict = torch.load(self.model_path) vgg.load_state_dict({k: v for k, v in state_dict.items() if k in vgg.state_dict()}) vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1]) # not using the last maxpool layer self.base = nn.Sequential(*list(vgg.features._modules.values())[:-1]) # Fix the layers before conv3: for layer in range(10): for p in self.base[layer].parameters(): p.requires_grad = False self.top = vgg.classifier self.bbox_pred_layer = nn.Linear(4096, 4) self.roi_align = RoIAlignAvg(7, 7, 1.0/16.0)
def __init__(self, phase, cfg, size, base, extras, head, num_classes): super(association_lstm, self).__init__() self.phase = phase self.num_classes = num_classes self.cfg = vid self.priorbox = PriorBox(self.cfg) self.priors = Variable(self.priorbox.forward(), volatile=True) self.size = size # SSD network self.vgg = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) self.roi_pool = _RoIPooling(self.cfg['POOLING_SIZE'], self.cfg['POOLING_SIZE'], 1.0 / 16.0) self.roi_align = RoIAlignAvg(self.cfg['POOLING_SIZE'], self.cfg['POOLING_SIZE'], 1.0 / 16.0) self.grid_size = self.cfg['POOLING_SIZE'] * 2 if self.cfg[ 'CROP_RESIZE_WITH_MAX_POOL'] else self.cfg['POOLING_SIZE'] self.roi_crop = _RoICrop() self.img_shape = (self.cfg['min_dim'], self.cfg['min_dim']) self.tensor_len = 4 + self.num_classes + 49 self.bnlstm1 = BNLSTM(input_size=84, hidden_size=150, batch_first=False, bidirectional=False) self.bnlstm2 = BNLSTM(input_size=150, hidden_size=300, batch_first=False, bidirectional=False) self.cls_pred = nn.Linear(300, self.num_classes) self.bbox_pred = nn.Linear(300, 4) self.association_pred = nn.Linear(300, 49) self.MultiProjectLoss = MultiProjectLoss(self.num_classes, 0, True, 3, 0.5) if phase == 'vid_train': self.softmax = nn.Softmax(dim=-1) #self.detect = Trnsform_target(num_classes, 200, 0.5, 0.01, 0.45) self.detect = train_target(num_classes, 200, 0.5, 0.01, 0.45)
def __init__(self, classes, class_agnostic): super(_FPN, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN_FPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) # NOTE: the original paper used pool_size = 7 for cls branch, and 14 for mask branch, to save the # computation time, we first use 14 as the pool_size, and then do stride=2 pooling for cls branch. self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop()
from __future__ import absolute_import from __future__ import division from __future__ import print_function import torch from torchvision import models import os import torch.nn as nn from torch.autograd import Variable from lib.model.roi_align.modules.roi_align import RoIAlignAvg import _init_paths base = torch.ones(1, 1, 100, 100) for i in range(100): for j in range(100): base[0][0][i][j] = i * 100 + j print(base) base = Variable(base.cuda()) rois = Variable(torch.FloatTensor([[0, 0, 0, 6, 6], [0, 0, 0, 6, 6]]).cuda()) print(rois) roi_align = RoIAlignAvg(7, 7, 1.0 / 2) roi = roi_align(base, rois) print(roi) # print(base)