Esempio n. 1
0
    def __init__(self, classes, class_agnostic):
        super(_HierRCNN, self).__init__()
        with h5py.File('data/pretrained_model/label_vec_vrd.h5', 'r') as f:
            self.label_vecs = np.array(f['label_vec'])
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()

        self.order_embedding = nn.Sequential(
            nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096),
            nn.ReLU(inplace=True), nn.Dropout(),
            nn.Linear(4096, cfg.HIER.EMBEDDING_LENGTH))

        self.order_score = _OrderSimilarity(cfg.HIER.ORDER_DISTANCE_NORM)
Esempio n. 2
0
    def __init__(self, classes, class_agnostic):
        super(_fasterRCNN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE# = 7*2 =14
        '''
        # Size of the pooled region after RoI pooling
        __C.POOLING_SIZE = 7  roi pooling 之后得到的特征的尺寸
        CROP_RESIZE_WITH_MAX_POOL = True
        
        
        '''


        self.RCNN_roi_crop = _RoICrop()
    def __init__(self, phase, cfg, size, base, extras, head, num_classes):
        super(test_association, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.cfg = vid
        self.priorbox = PriorBox(self.cfg)
        self.priors = Variable(self.priorbox.forward(), volatile=True)
        self.size = size

        # SSD network
        self.vgg = nn.ModuleList(base)
        # Layer learns to scale the l2 normalized features from conv4_3
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)

        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])
        self.roi_pool = _RoIPooling(self.cfg['POOLING_SIZE'], self.cfg['POOLING_SIZE'], 1.0 / 16.0)
        self.roi_align = RoIAlignAvg(self.cfg['POOLING_SIZE'], self.cfg['POOLING_SIZE'], 1.0 / 16.0)

        self.grid_size = self.cfg['POOLING_SIZE'] * 2 if self.cfg['CROP_RESIZE_WITH_MAX_POOL'] else self.cfg['POOLING_SIZE']
        self.roi_crop = _RoICrop()
        self.img_shape = (self.cfg['min_dim'],self.cfg['min_dim'])

        if phase == 'vid_test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = test_target(num_classes, 200, 0.5, 0.01, 0.45)
Esempio n. 4
0
    def __init__(self, classes, class_agnostic):
        super(_fasterRCNN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
    def __init__(self, phase, cfg, size, base, extras, head, num_classes):
        super(association_lstm, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.cfg = vid
        self.priorbox = PriorBox(self.cfg)
        self.priors = Variable(self.priorbox.forward(), volatile=True)
        self.size = size

        # SSD network
        self.vgg = nn.ModuleList(base)
        # Layer learns to scale the l2 normalized features from conv4_3
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)

        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])
        self.roi_pool = _RoIPooling(self.cfg['POOLING_SIZE'],
                                    self.cfg['POOLING_SIZE'], 1.0 / 16.0)
        self.roi_align = RoIAlignAvg(self.cfg['POOLING_SIZE'],
                                     self.cfg['POOLING_SIZE'], 1.0 / 16.0)

        self.grid_size = self.cfg['POOLING_SIZE'] * 2 if self.cfg[
            'CROP_RESIZE_WITH_MAX_POOL'] else self.cfg['POOLING_SIZE']
        self.roi_crop = _RoICrop()
        self.img_shape = (self.cfg['min_dim'], self.cfg['min_dim'])
        self.tensor_len = 4 + self.num_classes + 49
        self.bnlstm1 = BNLSTM(input_size=84,
                              hidden_size=150,
                              batch_first=False,
                              bidirectional=False)
        self.bnlstm2 = BNLSTM(input_size=150,
                              hidden_size=300,
                              batch_first=False,
                              bidirectional=False)
        self.cls_pred = nn.Linear(300, self.num_classes)
        self.bbox_pred = nn.Linear(300, 4)
        self.association_pred = nn.Linear(300, 49)
        self.MultiProjectLoss = MultiProjectLoss(self.num_classes, 0, True, 3,
                                                 0.5)
        if phase == 'vid_train':
            self.softmax = nn.Softmax(dim=-1)
            #self.detect = Trnsform_target(num_classes, 200, 0.5, 0.01, 0.45)
            self.detect = train_target(num_classes, 200, 0.5, 0.01, 0.45)
Esempio n. 6
0
    def __init__(self, out_size, phase, in_im_sz, fm_use):
        """Initializes RoI_layer module."""
        super(RoI_layer, self).__init__()

        self.phase = phase  # in order to get the RoI reigon
        self.out_size = out_size
        self.in_img_sz = in_im_sz
        self.tm_scale = 8
        self.fm_ROI = int(fm_use / 4)
        self.Dense_scale = int(self.tm_scale / 2)

        if phase == 'train':
            data_index_file = './data/Charades_train.pkl'
        elif phase == 'eval':
            data_index_file = './data/Charades_Val_Video.pkl'
        else:
            assert 0, 'The data can not find'
        self.bx_dir = '/VIDEO_DATA/BBOX/'
        self.data_index = pickle.load(open(
            data_index_file, 'rb'))  # in order to  get the bbox (RPN)

        # define rpn
        self.ROI_Align = RoIAlignAvg(out_size, out_size,
                                     1 / 16.0)  # scale need to change

        self.ROI_Pool = _RoIPooling(out_size, out_size,
                                    1 / 16.0)  # scale need to change

        self.Ptorch_ROI = Torch_ROI(feature_scal=(self.in_img_sz / 16))

        self.Scene_Roi = np.array(
            [[i, 0, 0, self.in_img_sz - 32, self.in_img_sz - 32]
             for i in range(self.fm_ROI)])
        # 32 = scale * 2 = 16*2  for  ROI Align
        self.Scens_Full = np.array(
            [[i, 0, 0, self.in_img_sz - 16, self.in_img_sz - 16]
             for i in range(self.fm_ROI)])
        self.Scens_Pytorch = np.array(
            [[i, 0, 0, self.in_img_sz, self.in_img_sz]
             for i in range(self.fm_ROI)])
        self.Scens_Sparse = np.array(
            [[i, 0, 0, self.in_img_sz, self.in_img_sz]
             for i in range(1, self.fm_ROI, 2)])
Esempio n. 7
0
    def __init__(self, classes, class_agnostic):
        super(_FPN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN_FPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)

        # NOTE: the original paper used pool_size = 7 for cls branch, and 14 for mask branch, to save the
        # computation time, we first use 14 as the pool_size, and then do stride=2 pooling for cls branch.
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)
        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()
Esempio n. 8
0
    def __init__(self, phase, base, extras, head, extras_lstd, num_classes):
        super(SSD, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        # TODO: implement __call__ in PriorBox
        self.priorbox = PriorBox(v2)
        self.priors = Variable(self.priorbox.forward(), volatile=True)
        self.size = 300

        # SSD network
        self.vgg = nn.ModuleList(base)
        # Layer learns to scale the l2 normalized features from conv4_3
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)

        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])
        '''
        self.extras_lstd = nn.ModuleList(extras_lstd)
        self.classifier = nn.ModuleList([nn.Linear(256*3*3, 21)])
        '''
        self.classifier = nn.ModuleList([
            nn.Linear(1024 * 5 * 5, 4096),
            nn.ReLU(True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(0.5),
            nn.Linear(4096, 21)
        ])

        self.softmax = nn.Softmax(dim=-1)
        self.post_rois = Post_rois(num_classes, 0, 100, 0, 0.65)

        self.detect = Detect(num_classes, 0, 100, 0, 0.65)
        #self.ROI_POOL = RoIPoolFunction(5, 5, 1.0/19.0)
        #self.roi_pooling = _roi_pooling
        self.roi_pool = _RoIPooling(5, 5, 1.0 / 16.0)
Esempio n. 9
0

if __name__ == '__main__':
    import torch
    import numpy as np
    from torch.autograd import Variable
    from lib.model.roi_pooling.modules.roi_pool import _RoIPooling

    input = torch.randn(2, 21 * 7 * 7, 50, 72)
    rois = torch.from_numpy(
        np.array([
            [0.0000, 350.6689, 211.0240, 779.0886, 777.7496],
            [0.0000, 744.0627, 277.4919, 988.4307, 602.7589],
            [1.0000, 350.6689, 211.0240, 779.0886, 777.7496],
            [1.0000, 744.0627, 277.4919, 988.4307, 602.7589],
        ])).float()

    pool = PSRoIPool(7, 7, 1 / 16.0, 7, 21)
    input = Variable(input.cuda())
    rois = Variable(rois.cuda())
    print(rois.size(), input.size())
    print(input)
    out = pool(input, rois)
    print(out)
    print(out.size())

    print('============================')
    roi_pool = _RoIPooling(7, 7, 1 / 16.0)
    out = roi_pool(input, rois.view(-1, 5))
    print(out)
    print(out.size())