Ejemplo n.º 1
0
    def __init__(self):
        super(_FCN8Base, self).__init__()
        self.features3_4 = None  #
        self.features4_4 = None  #
        self.features5_4 = None  #
        self.fconv3 = None
        self.fconv4 = None
        self.fconv5 = None

        # SSD
        self.norm4 = L2Norm(512, 20)  # 512,64,64

        self.ssd_conv6 = nn.Conv2d(512,
                                   1024,
                                   kernel_size=3,
                                   padding=6,
                                   dilation=6)
        self.ssd_conv7 = nn.Conv2d(1024, 1024,
                                   kernel_size=1)  # conv7 1024,32,32

        self.ssd_conv8_1 = nn.Conv2d(1024, 256, kernel_size=1)
        self.ssd_conv8_2 = nn.Conv2d(256,
                                     512,
                                     kernel_size=3,
                                     padding=1,
                                     stride=2)  # conv8_2 512,16,16

        self.ssd_conv9_1 = nn.Conv2d(512, 128, kernel_size=1)
        self.ssd_conv9_2 = nn.Conv2d(128,
                                     256,
                                     kernel_size=3,
                                     padding=1,
                                     stride=2)  # conv9_2 256,8,8

        self.ssd_conv10_1 = nn.Conv2d(256, 128, kernel_size=1)
        self.ssd_conv10_2 = nn.Conv2d(128,
                                      256,
                                      kernel_size=3,
                                      padding=1,
                                      stride=2)  # conv10_2 256,4,4

        self.ssd_conv11_1 = nn.Conv2d(256, 128, kernel_size=1)
        self.ssd_conv11_2 = nn.Conv2d(128,
                                      256,
                                      kernel_size=3,
                                      padding=1,
                                      stride=2)  # conv11_2 256,2,2

        # multibox layer
        self.multibox = MultiBoxLayer()

        # roi pooling
        self.roi_pool5_4 = RoIPool(cfg.TRAIN.ROI_POOLED_SIZE[1],
                                   cfg.TRAIN.ROI_POOLED_SIZE[2], 1.0 / 16)
        self.roi_pool4_4 = RoIPool(cfg.TRAIN.ROI_POOLED_SIZE[1],
                                   cfg.TRAIN.ROI_POOLED_SIZE[2], 1.0 / 8)
        self.roi_pool3_4 = RoIPool(cfg.TRAIN.ROI_POOLED_SIZE[1],
                                   cfg.TRAIN.ROI_POOLED_SIZE[2], 1.0 / 4)
Ejemplo n.º 2
0
    def __init__(self,nhidden, n_object_cats , n_predicate_cats, n_vocab, voc_sign,
                         max_word_length, MPS_iter, use_language_loss, object_loss_weight,
                         predicate_loss_weight,
                         dropout=False,
                         use_kmeans_anchors=True,
                         gate_width=128,
                         nhidden_caption=256,
                         nembedding = 256,
                         rnn_type='LSTM_normal',
                         rnn_droptout=0.0, rnn_bias=False,
                         use_region_reg=False, use_kernel=False):

        super(faster_rcnn, self).__init__(nhidden, n_object_cats, n_predicate_cats, n_vocab, voc_sign,
                                                             max_word_length, MPS_iter, use_language_loss, object_loss_weight, predicate_loss_weight,
                                                             dropout, use_kmeans_anchors, nhidden_caption, nembedding, rnn_type, use_region_reg)

        self.rpn = RPN(use_kmeans_anchors)
        # self.roi_pool_object = RoIPool(7, 7, 1.0/16)
        # self.roi_pool_phrase = RoIPool(7, 7, 1.0/16)
        self.roi_pool_region = RoIPool(7, 7, 1.0/16)
        # self.fc6_obj = FC(512 * 7 * 7, nhidden, relu=True)
        # self.fc7_obj = FC(nhidden, nhidden, relu=False)
        # self.fc6_phrase = FC(512 * 7 * 7, nhidden, relu=True)
        # self.fc7_phrase = FC(nhidden, nhidden, relu=False)
        self.fc6_region = FC(512 * 7 * 7, nhidden, relu=True)
        self.fc7_region = FC(nhidden, nhidden, relu=False)
        # if MPS_iter == 0:
        #     self.mps = None
        # else:
        #     self.mps = Hierarchical_Message_Passing_Structure(nhidden, dropout,
        #                     gate_width=gate_width, use_kernel_function=use_kernel) # the hierarchical message passing structure
        #     network.weights_normal_init(self.mps, 0.01)

        # self.score_obj = FC(nhidden, self.n_classes_obj, relu=False)
        # self.bbox_obj = FC(nhidden, self.n_classes_obj * 4, relu=False)
        # self.score_pred = FC(nhidden, self.n_classes_pred, relu=False)
        # if self.use_region_reg:
        self.bbox_region = FC(nhidden, 4, relu=False)
        network.weights_normal_init(self.bbox_region, 0.01)
        # else:
        #     self.bbox_region = None

        self.objectiveness = FC(nhidden, 2, relu=False)

        # if use_language_loss:
        #     self.caption_prediction = \
        #         Language_Model(rnn_type=self.rnn_type, ntoken=self.n_vocab, nimg=self.nhidden, nhidden=self.nhidden_caption,
        #                        nembed=self.nembedding, nlayers=2, nseq=self.max_word_length, voc_sign = self.voc_sign,
        #                        bias=rnn_bias, dropout=rnn_droptout)
        # else:
        #     self.caption_prediction = Language_Model(rnn_type=self.rnn_type, ntoken=self.n_vocab, nimg=1, nhidden=1,
        #                                              nembed=1, nlayers=1, nseq=1, voc_sign = self.voc_sign) # just to make the program run

        # network.weights_normal_init(self.score_obj, 0.01)
        # network.weights_normal_init(self.bbox_obj, 0.005)
        # network.weights_normal_init(self.score_pred, 0.01)
        network.weights_normal_init(self.objectiveness, 0.01)

        self.objectiveness_loss = None
Ejemplo n.º 3
0
 def __init__(self,
              n_action_nonagent_roles,
              in_filters=512,
              pool_size=7,
              **kwargs):
     super(InteractionBranch, self).__init__()
     self.roi_pool = RoIPool(pool_size, pool_size, 1.0 / 16)
     pool_outdim = in_filters * (pool_size**2)
     self.feat_2_scores = nn.Linear(pool_outdim, n_action_nonagent_roles)
Ejemplo n.º 4
0
 def __init__(self,
              n_action_classes,
              n_action_nonagent_roles,
              pool_size=7,
              in_filters=512,
              **kwargs):
     super(HumanCentricBranch, self).__init__()
     # TODO should it have its own pool layer, or reuse that of the detector?
     self.roi_pool = RoIPool(pool_size, pool_size, 1.0 / 16)
     pool_outdim = in_filters * (pool_size**2)
     self.feat_2_scores = nn.Linear(pool_outdim, n_action_classes)
     self.n_action_nonagent_roles = n_action_nonagent_roles
     # Note: dimensionality of the output target localizations is 4
     # TODO should be 4 * number of actions.
     # TODO this model is kinda weird and probably naive.
     self.feat_2_locations = nn.Sequential(
         nn.Linear(pool_outdim, pool_outdim), nn.ReLU(),
         nn.Linear(pool_outdim, 4 * n_action_nonagent_roles))
    def __init__(self, classes=None, debug=False):
        super(FasterRCNN, self).__init__()

        if classes is not None:
            self.classes = classes
            self.n_classes = len(classes)

        self.rpn = RPN()
        self.roi_pool = RoIPool(7, 7, 1.0 / 16)
        self.fc6 = FC(512 * 7 * 7, 4096)
        self.fc7 = FC(4096, 4096)
        self.score_fc = FC(4096, self.n_classes, relu=False)
        self.bbox_fc = FC(4096, self.n_classes * 4, relu=False)

        # loss
        self.cross_entropy = None
        self.loss_box = None

        # for log
        self.debug = debug
    def __init__(self, classes=None, debug=False):
        super(FasterRCNN, self).__init__()

        self.classes = np.asarray(classes)
        self.n_classes = len(classes)

        self.rpn = RPN(debug=debug)
        self.proposal_target_layer = proposal_target_layer_py(self.n_classes)
        if cfg.POOLING_MODE == 'align':
            self.roi_pool = RoIAlign(7, 7, 1.0 / 16)
        elif cfg.POOLING_MODE == 'pool':
            self.roi_pool = RoIPool(7, 7, 1.0 / 16)
        self.score_fc = FC(4096, self.n_classes, relu=False)
        self.bbox_fc = FC(4096, self.n_classes * 4, relu=False)

        # loss
        self.cross_entropy = 0
        self.loss_box = 0
        self.triplet_loss = 0
        # for log
        self.debug = debug
        if cfg.TRIPLET.IS_TRUE:
            self.fc_sim = FC(512 * 7 * 7, 4096, relu=False)
            pos_weight = torch.ones(3)
            pos_weight[0] = 2.0
            if self.debug:
                self.set = 0
                self.match = 0
            if cfg.TRIPLET.LOSS == 'euc':
                self.loss_triplet = tpl.euclidean_distance_loss
            elif cfg.TRIPLET.LOSS == 'log':
                self.loss_triplet = tpl.cross_entropy_l2_dist
                self.relu = nn.ReLU(inplace=True)
                self.BCELoss = nn.BCELoss(weight=pos_weight,
                                          size_average=False)
            elif cfg.TRIPLET.LOSS == 'cls':
                self.loss_triplet = tpl.cross_entropy_cosine_sim
                self.relu = nn.ReLU(inplace=True)
                self.BCELoss = nn.BCELoss(weight=pos_weight,
                                          size_average=False)
        self.init_module = self._init_faster_rcnn_vgg16