Beispiel #1
0
    def __init__(self, out_size, phase, in_im_sz, fm_use):
        """Initializes RoI_layer module."""
        super(RoI_layer, self).__init__()

        self.phase = phase  # in order to get the RoI reigon
        self.out_size = out_size
        self.in_img_sz = in_im_sz
        self.tm_scale = 8
        self.fm_ROI = int(fm_use / 4)
        self.Dense_scale = int(self.tm_scale / 2)

        if phase == 'train':
            data_index_file = './data/Charades_train.pkl'
        elif phase == 'eval':
            data_index_file = './data/Charades_Val_Video.pkl'
        else:
            assert 0, 'The data can not find'
        self.bx_dir = '/VIDEO_DATA/BBOX/'
        self.data_index = pickle.load(open(data_index_file, 'rb'))  # in order to  get the bbox (RPN)

        # define rpn
        self.ROI_Align = RoIAlignAvg(out_size, out_size, 1 / 16.0)  # scale need to change

        self.ROI_Pool = _RoIPooling(out_size, out_size, 1 / 16.0)  # scale need to change

        self.Ptorch_ROI = Torch_ROI(feature_scal=(self.in_img_sz / 16))

        self.Scene_Roi = np.array([[i, 0, 0, self.in_img_sz - 32, self.in_img_sz - 32] for i in range(self.fm_ROI)])
        # 32 = scale * 2 = 16*2  for  ROI Align
        self.Scens_Full = np.array([[i, 0, 0, self.in_img_sz - 16, self.in_img_sz - 16] for i in range(self.fm_ROI)])
        self.Scens_Pytorch = np.array([[i, 0, 0, self.in_img_sz, self.in_img_sz] for i in range(self.fm_ROI)])
        self.Scens_Sparse = np.array([[i, 0, 0, self.in_img_sz, self.in_img_sz] for i in range(1, self.fm_ROI, 2)])
Beispiel #2
0
    def __init__(self, classes, class_agnostic):
        super(_fasterRCNN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE# = 7*2 =14
        '''
        # Size of the pooled region after RoI pooling
        __C.POOLING_SIZE = 7  roi pooling 之后得到的特征的尺寸
        CROP_RESIZE_WITH_MAX_POOL = True
        
        
        '''


        self.RCNN_roi_crop = _RoICrop()
Beispiel #3
0
    def __init__(self, classes, class_agnostic):
        super(_HierRCNN, self).__init__()
        with h5py.File('data/pretrained_model/label_vec_vrd.h5', 'r') as f:
            self.label_vecs = np.array(f['label_vec'])
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()

        self.order_embedding = nn.Sequential(
            nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096),
            nn.ReLU(inplace=True), nn.Dropout(),
            nn.Linear(4096, cfg.HIER.EMBEDDING_LENGTH))

        self.order_score = _OrderSimilarity(cfg.HIER.ORDER_DISTANCE_NORM)
Beispiel #4
0
    def __init__(self, out_size, phase, in_im_sz, fm_use):
        """Initializes RoI_layer module."""
        super(RoI_layer, self).__init__()

        self.phase = phase  # in order to get the RoI reigon
        self.out_size = out_size
        self.in_img_sz = in_im_sz
        self.tm_scale = 4
        self.fm_ROI = int(fm_use / 4)

        if phase == 'train':
            data_index_file = './data/Charades_train.pkl'
        elif phase == 'eval':
            data_index_file = './data/Charades_Val_Video.pkl'
        else:
            assert 0, 'The data can not find'
        self.bx_dir = '/VIDEO_DATA/BBOX/'
        self.data_index = pickle.load(open(
            data_index_file, 'rb'))  # in order to  get the bbox (RPN)

        # define rpn
        self.RCNN_roi_align = RoIAlignAvg(out_size, out_size,
                                          1 / 16.0)  #scale need to change
        self.Scens_Roi = np.array(
            [[i, 0, 0, self.in_img_sz - 2 * 16, self.in_img_sz - 2 * 16]
             for i in range(self.fm_ROI)])
    def __init__(self, phase, cfg, size, base, extras, head, num_classes):
        super(test_association, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.cfg = vid
        self.priorbox = PriorBox(self.cfg)
        self.priors = Variable(self.priorbox.forward(), volatile=True)
        self.size = size

        # SSD network
        self.vgg = nn.ModuleList(base)
        # Layer learns to scale the l2 normalized features from conv4_3
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)

        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])
        self.roi_pool = _RoIPooling(self.cfg['POOLING_SIZE'], self.cfg['POOLING_SIZE'], 1.0 / 16.0)
        self.roi_align = RoIAlignAvg(self.cfg['POOLING_SIZE'], self.cfg['POOLING_SIZE'], 1.0 / 16.0)

        self.grid_size = self.cfg['POOLING_SIZE'] * 2 if self.cfg['CROP_RESIZE_WITH_MAX_POOL'] else self.cfg['POOLING_SIZE']
        self.roi_crop = _RoICrop()
        self.img_shape = (self.cfg['min_dim'],self.cfg['min_dim'])

        if phase == 'vid_test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = test_target(num_classes, 200, 0.5, 0.01, 0.45)
Beispiel #6
0
    def __init__(self, out_size, in_im_sz):
        """Initializes RoI_layer module."""
        super(RoI_layer_mulity, self).__init__()
        self.out_size = out_size
        self.in_img_sz = in_im_sz

        # define rpn
        self.ROI_Align = RoIAlignAvg(self.out_size, self.out_size,
                                     1 / 16.0)  # 224->14 : 16
Beispiel #7
0
    def _init_modules(self):
        res = resnet101()
        if self.model_path is None:
            print("Create model without pretrained weights")
        else:
            print("Loading pretrained weights from %s" % (self.model_path))
            state_dict = torch.load(self.model_path)
            res.load_state_dict({k: v for k, v in state_dict.items() if k in res.state_dict()})

        # not using the last maxpool layer
        self.base = nn.Sequential(res.conv1, res.bn1, res.relu, res.maxpool, res.layer1, res.layer2, res.layer3)
        if self.no_dropout:
            self.top = nn.Sequential(
                nn.Linear(1024 * 7 * 7, 4096),
                nn.ReLU(True),
                nn.Linear(4096, 4096),
                nn.ReLU(True)
            )
        else:
            self.top = nn.Sequential(
                nn.Linear(1024 * 7 * 7, 4096),
                nn.ReLU(True),
                nn.Dropout(),
                nn.Linear(4096, 4096),
                nn.ReLU(True),
                nn.Dropout()
            )

        self.bbox_pred_layer = nn.Linear(4096, 4)
        self.roi_align = RoIAlignAvg(7, 7, 1.0 / 16.0)
        for p in self.base[0].parameters():
            p.requires_grad = False
        for p in self.base[1].parameters():
            p.requires_grad = False

        assert (0 <= self.fixed_blocks < 4)
        if self.fixed_blocks >= 3:
            for p in self.base[6].parameters():
                p.requires_grad = False
        if self.fixed_blocks >= 2:
            for p in self.base[5].parameters():
                p.requires_grad = False
        if self.fixed_blocks >= 1:
            for p in self.base[4].parameters():
                p.requires_grad = False

        def set_bn_fix(m):
            classname = m.__class__.__name__
            if classname.find('BatchNorm') != -1:
                for p in m.parameters():
                    p.requires_grad = False

        self.base.apply(set_bn_fix)
 def __init__(self, classes, class_agnostic):
     super(_fasterRCNN, self).__init__()
     self.classes = classes
     self.n_classes = len(classes)
     self.class_agnostic = class_agnostic
     # loss
     self.RCNN_loss_cls = 0
     self.RCNN_loss_bbox = 0
     #define rpn
     self.RCNN_rpn = _RPN(self.dout_base_model)  # self.dout_base_model = 512
     self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
     # self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
     self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
     self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
     self.RCNN_roi_crop = _RoICrop()
Beispiel #9
0
    def _init_modules(self):
        vgg = models.vgg16()
        if self.model_path is None:
            print("Create model without pretrained weights")
        else:
            print("Loading pretrained weights from %s" % (self.model_path))
            state_dict = torch.load(self.model_path)
            vgg.load_state_dict({k: v for k, v in state_dict.items() if k in vgg.state_dict()})

        vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1])

        # not using the last maxpool layer
        self.base = nn.Sequential(*list(vgg.features._modules.values())[:-1])

        # Fix the layers before conv3:
        if self.freeze_before_conv3:
            for layer in range(10):
                for p in self.base[layer].parameters(): p.requires_grad = False

        if self.use_pretrained_fc:
            if self.no_dropout:
                self.top = nn.Sequential(
                    vgg.classifier[0],
                    vgg.classifier[1],
                    vgg.classifier[3],
                    vgg.classifier[4]
                )
            else:
                self.top = vgg.classifier
        else:
            if self.no_dropout:
                self.top = nn.Sequential(
                    nn.Linear(512 * 7 * 7, 4096),
                    nn.ReLU(True),
                    nn.Linear(4096, 4096),
                    nn.ReLU(True)
                )
            else:
                self.top = nn.Sequential(
                    nn.Linear(512 * 7 * 7, 4096),
                    nn.ReLU(True),
                    nn.Dropout(),
                    nn.Linear(4096, 4096),
                    nn.ReLU(True),
                    nn.Dropout()
                )
        self.bbox_pred_layer = nn.Linear(4096, 4)
        self.roi_align = RoIAlignAvg(7, 7, 1.0/16.0)
Beispiel #10
0
    def _init_modules(self):
        vgg = models.vgg16()
        print("Loading pretrained weights from %s" % (self.model_path))
        state_dict = torch.load(self.model_path)
        vgg.load_state_dict({k: v for k, v in state_dict.items() if k in vgg.state_dict()})

        vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1])

        # not using the last maxpool layer
        self.base = nn.Sequential(*list(vgg.features._modules.values())[:-1])

        # Fix the layers before conv3:
        for layer in range(10):
            for p in self.base[layer].parameters(): p.requires_grad = False

        self.top = vgg.classifier
        self.bbox_pred_layer = nn.Linear(4096, 4)
        self.roi_align = RoIAlignAvg(7, 7, 1.0/16.0)
    def __init__(self, phase, cfg, size, base, extras, head, num_classes):
        super(association_lstm, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.cfg = vid
        self.priorbox = PriorBox(self.cfg)
        self.priors = Variable(self.priorbox.forward(), volatile=True)
        self.size = size

        # SSD network
        self.vgg = nn.ModuleList(base)
        # Layer learns to scale the l2 normalized features from conv4_3
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)

        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])
        self.roi_pool = _RoIPooling(self.cfg['POOLING_SIZE'],
                                    self.cfg['POOLING_SIZE'], 1.0 / 16.0)
        self.roi_align = RoIAlignAvg(self.cfg['POOLING_SIZE'],
                                     self.cfg['POOLING_SIZE'], 1.0 / 16.0)

        self.grid_size = self.cfg['POOLING_SIZE'] * 2 if self.cfg[
            'CROP_RESIZE_WITH_MAX_POOL'] else self.cfg['POOLING_SIZE']
        self.roi_crop = _RoICrop()
        self.img_shape = (self.cfg['min_dim'], self.cfg['min_dim'])
        self.tensor_len = 4 + self.num_classes + 49
        self.bnlstm1 = BNLSTM(input_size=84,
                              hidden_size=150,
                              batch_first=False,
                              bidirectional=False)
        self.bnlstm2 = BNLSTM(input_size=150,
                              hidden_size=300,
                              batch_first=False,
                              bidirectional=False)
        self.cls_pred = nn.Linear(300, self.num_classes)
        self.bbox_pred = nn.Linear(300, 4)
        self.association_pred = nn.Linear(300, 49)
        self.MultiProjectLoss = MultiProjectLoss(self.num_classes, 0, True, 3,
                                                 0.5)
        if phase == 'vid_train':
            self.softmax = nn.Softmax(dim=-1)
            #self.detect = Trnsform_target(num_classes, 200, 0.5, 0.01, 0.45)
            self.detect = train_target(num_classes, 200, 0.5, 0.01, 0.45)
Beispiel #12
0
    def __init__(self, classes, class_agnostic):
        super(_FPN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN_FPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)

        # NOTE: the original paper used pool_size = 7 for cls branch, and 14 for mask branch, to save the
        # computation time, we first use 14 as the pool_size, and then do stride=2 pooling for cls branch.
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)
        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()
Beispiel #13
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import torch
from torchvision import models
import os
import torch.nn as nn
from torch.autograd import Variable
from lib.model.roi_align.modules.roi_align import RoIAlignAvg
import _init_paths

base = torch.ones(1, 1, 100, 100)
for i in range(100):
    for j in range(100):
        base[0][0][i][j] = i * 100 + j
print(base)
base = Variable(base.cuda())
rois = Variable(torch.FloatTensor([[0, 0, 0, 6, 6], [0, 0, 0, 6, 6]]).cuda())
print(rois)
roi_align = RoIAlignAvg(7, 7, 1.0 / 2)
roi = roi_align(base, rois)
print(roi)
# print(base)