Esempio n. 1
0
    def __init__(self, classes, class_agnostic):
        super(_FPN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN_FPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)

        # NOTE: the original paper used pool_size = 7 for cls branch, and 14 for mask branch, to save the
        # computation time, we first use 14 as the pool_size, and then do stride=2 pooling for cls branch.
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0/16.0)
        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()
Esempio n. 2
0
    def __init__(self, classes, class_agnostic):
        super(_FPN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN_FPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)

        # NOTE: the original paper used pool_size = 7 for cls branch, and 14 for mask branch, to save the
        # computation time, we first use 14 as the pool_size, and then do stride=2 pooling for cls branch.
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)
        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()
    def __init__(self, classes):
        super(_StereoRCNN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)

        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox_left_right = 0
        self.RCNN_loss_dis = 0
        self.RCNN_loss_dim = 0
        self.RCNN_loss_dim_orien = 0
        self.RCNN_loss_kpts = 0

        self.maxpool2d = nn.MaxPool2d(1, stride=2)
        # define rpn
        self.RCNN_rpn = _Stereo_RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)

        self.RCNN_roi_align = ROIAlign((cfg.POOLING_SIZE, cfg.POOLING_SIZE),
                                       1.0 / 16.0, 0)
        self.RCNN_roi_kpts_align = ROIAlign(
            (cfg.POOLING_SIZE * 2, cfg.POOLING_SIZE * 2), 1.0 / 16.0, 0)
Esempio n. 4
0
    def __init__(self, classes, class_agnostic):
        super(_FPN, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        self.maxpool2d = nn.MaxPool2d(1, stride=2)

        self._init_modules()

        # define rpn
        self.RCNN_rpn = _RPN_FPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)

        roi_layer = {'type': 'RoIAlign', 'output_size': 7, 'sampling_ratio': 0}
        featmap_strides = [4, 8, 16, 32]
        self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        # self.RCNN_roi_crop = _RoICrop()

        self.RCNN_cls_score = nn.Linear(1024, self.n_classes)
        if self.class_agnostic:
            self.RCNN_bbox_pred = nn.Linear(1024, 4)
        else:
            self.RCNN_bbox_pred = nn.Linear(1024, 4 * (self.n_classes - 1))

        self.RCNN_top = nn.Sequential(
            nn.Linear(256 * cfg.POOLING_SIZE * cfg.POOLING_SIZE, 1024),
            nn.ReLU(True), nn.Linear(1024, 1024), nn.ReLU(True))

        self.upsample = Upsample()
        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.gradients = None
Esempio n. 5
0
    def __init__(self, baseModels, obj_classes, att_classes, rel_classes,
                 dout_base_model, pooled_feat_dim):

        super(_ISGG, self).__init__()
        self.obj_classes = obj_classes
        self.n_obj_classes = len(obj_classes)

        self.att_classes = att_classes
        self.n_att_classes = 0 if att_classes == None else len(att_classes)

        self.rel_classes = rel_classes
        self.n_rel_classes = 0 if rel_classes == None else len(rel_classes)

        # define base model
        self.RCNN_base_model = baseModels

        # define rpn
        self.RCNN_rpn = _RPN(dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(
            self.n_obj_classes, self.n_att_classes, self.n_rel_classes)
        self.RCNN_proposal_target_msdn = _ProposalTargetLayer_MSDN(
            self.n_obj_classes, self.n_att_classes, self.n_rel_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)
        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()

        if cfg.HAS_RELATIONS:
            self.RELPN_rpn = _RelPN(pooled_feat_dim, self.n_obj_classes)
            self.RELPN_proposal_target = _RelProposalTargetLayer(
                self.n_rel_classes)

            self.RELPN_roi_pool = _RoIPooling(cfg.POOLING_SIZE,
                                              cfg.POOLING_SIZE, 1.0 / 16.0)
            self.RELPN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE,
                                               cfg.POOLING_SIZE, 1.0 / 16.0)
            self.RELPN_grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
            self.RELPN_roi_crop = _RoICrop()

        reduced_pooled_feat_dim = pooled_feat_dim

        # define mps
        nhidden = 512
        dropout = False
        gate_width = 1
        use_kernel_function = False

        self.imp = _IMP(nhidden,
                        dropout,
                        gate_width=gate_width,
                        use_kernel_function=use_kernel_function
                        )  # the hierarchical message passing structure
        network.weights_normal_init(self.imp, 0.01)

        # self.fc4obj = nn.Linear(pooled_feat_dim, reduced_pooled_feat_dim)
        # self.fc4att = nn.Linear(pooled_feat_dim, reduced_pooled_feat_dim)
        # self.fc4rel = nn.Linear(pooled_feat_dim, reduced_pooled_feat_dim)

        # self.RCNN_gcn_obj_cls_score = nn.Linear(reduced_pooled_feat_dim, self.n_obj_classes)
        # self.RCNN_gcn_att_cls_score = nn.Linear(reduced_pooled_feat_dim, self.n_att_classes)
        # self.RCNN_gcn_rel_cls_score = nn.Linear(reduced_pooled_feat_dim, self.n_rel_classes)

        if cfg.GCN_LAYERS > 0:
            if cfg.GCN_ON_SCORES:
                self.GRCNN_gcn_score = _GCN_1(self.n_obj_classes,
                                              self.n_att_classes,
                                              self.n_rel_classes)

            if cfg.GCN_ON_FEATS and not cfg.GCN_SHARE_FEAT_PARAMS:
                self.GRCNN_gcn_feat = _GCN_2(reduced_pooled_feat_dim)

            if cfg.GCN_ON_FEATS and cfg.GCN_SHARE_FEAT_PARAMS:
                self.GRCNN_gcn_feat = _GCN_3(reduced_pooled_feat_dim)

            if cfg.GCN_ON_FEATS and cfg.GCN_LOW_RANK_PARAMS:
                self.GRCNN_gcn_feat = _GCN_4(reduced_pooled_feat_dim)

        if cfg.GCN_HAS_ATTENTION:
            self.GRCNN_gcn_att1 = _GCN_ATT(self.n_obj_classes)
            self.GRCNN_gcn_att2 = _GCN_ATT(self.n_obj_classes)

        self.RCNN_loss_obj_cls = 0
        self.RCNN_loss_att_cls = 0
        self.RCNN_loss_rel_cls = 0
        self.RCNN_loss_bbox = 0
Esempio n. 6
0
    def __init__(self, baseModels, obj_classes, att_classes, rel_classes,
                 dout_base_model, pooled_feat_dim):
        '''
        :param baseModels:
        :param obj_classes:
        :param att_classes:
        :param rel_classes:
        :param dout_base_model: ## todo???
        :param pooled_feat_dim: ## todo???
        '''
        super(_graphRCNN, self).__init__()
        self.obj_classes = obj_classes
        self.n_obj_classes = len(obj_classes)

        self.att_classes = att_classes
        self.n_att_classes = 0 if att_classes == None else len(att_classes)

        self.rel_classes = rel_classes
        self.n_rel_classes = 0 if rel_classes == None else len(rel_classes)

        # define base model
        self.RCNN_base_model = baseModels

        # define rpn
        self.RCNN_rpn = _RPN(dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(
            self.n_obj_classes, self.n_att_classes, self.n_rel_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)
        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()
        # define aGCN
        if cfg.HAS_RELATIONS:
            self.RELPN_rpn = _RelPN(pooled_feat_dim, self.n_obj_classes)
            self.RELPN_proposal_target = _RelProposalTargetLayer(
                self.n_rel_classes)

            self.RELPN_roi_pool = _RoIPooling(cfg.POOLING_SIZE,
                                              cfg.POOLING_SIZE, 1.0 / 16.0)
            self.RELPN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE,
                                               cfg.POOLING_SIZE, 1.0 / 16.0)
            self.RELPN_grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
            self.RELPN_roi_crop = _RoICrop()

        reduced_pooled_feat_dim = pooled_feat_dim

        # self.fc4obj = nn.Linear(pooled_feat_dim, reduced_pooled_feat_dim)
        # self.fc4att = nn.Linear(pooled_feat_dim, reduced_pooled_feat_dim)
        # self.fc4rel = nn.Linear(pooled_feat_dim, reduced_pooled_feat_dim)

        # self.RCNN_gcn_obj_cls_score = nn.Linear(reduced_pooled_feat_dim, self.n_obj_classes)
        # self.RCNN_gcn_att_cls_score = nn.Linear(reduced_pooled_feat_dim, self.n_att_classes)
        # self.RCNN_gcn_rel_cls_score = nn.Linear(reduced_pooled_feat_dim, self.n_rel_classes)

        if cfg.GCN_LAYERS > 0:
            if cfg.GCN_ON_SCORES:
                self.GRCNN_gcn_score = _GCN_1(self.n_obj_classes,
                                              self.n_att_classes,
                                              self.n_rel_classes)

            if cfg.GCN_ON_FEATS and not cfg.GCN_SHARE_FEAT_PARAMS:
                self.GRCNN_gcn_feat = _GCN_2(reduced_pooled_feat_dim)

            if cfg.GCN_ON_FEATS and cfg.GCN_SHARE_FEAT_PARAMS:
                self.GRCNN_gcn_feat = _GCN_3(reduced_pooled_feat_dim)

            if cfg.GCN_ON_FEATS and cfg.GCN_LOW_RANK_PARAMS:
                self.GRCNN_gcn_feat = _GCN_4(reduced_pooled_feat_dim)

            if cfg.GCN_ON_FEATS and cfg.GCN_DIFF_FEAT_DIM:
                self.GRCNN_gcn_feat = _GCN_5(cfg.OBJECT_DIM, cfg.ATTRIBUTE_DIM,
                                             cfg.RELATION_DIM)

        if cfg.GCN_HAS_ATTENTION:
            self.GRCNN_gcn_att1 = _GCN_ATT(self.n_obj_classes)
            self.GRCNN_gcn_att2 = _GCN_ATT(self.n_obj_classes)

        self.RCNN_loss_obj_cls = 0
        self.RCNN_loss_att_cls = 0
        self.RCNN_loss_rel_cls = 0
        self.RCNN_loss_bbox = 0