Esempio n. 1
0
    def __init__(self,nhidden, n_object_cats, n_predicate_cats, n_vocab, voc_sign, 
                 max_word_length, MPS_iter, use_language_loss, object_loss_weight, 
                 predicate_loss_weight, 
                 dropout=False, 
                 use_kmeans_anchors=False, 
                 gate_width=128, 
                 nhidden_caption=256, 
                 nembedding = 256,
                 rnn_type='LSTM_normal', 
                 rnn_droptout=0.0, rnn_bias=False, 
                 use_region_reg=False, use_kernel=False):
    
        super(Hierarchical_Descriptive_Model, self).__init__(nhidden, n_object_cats, n_predicate_cats, n_vocab, voc_sign, 
                 max_word_length, MPS_iter, use_language_loss, object_loss_weight, predicate_loss_weight, 
                 dropout, use_kmeans_anchors, nhidden_caption, nembedding, rnn_type, use_region_reg)

        self.rpn = RPN(use_kmeans_anchors)
        self.roi_pool_object = RoIPool(7, 7, 1.0/16)
        self.roi_pool_phrase = RoIPool(7, 7, 1.0/16)
        self.roi_pool_region = RoIPool(7, 7, 1.0/16)
        self.fc6_obj = FC(512 * 7 * 7, nhidden, relu=True)
        self.fc7_obj = FC(nhidden, nhidden, relu=False)
        self.fc6_phrase = FC(512 * 7 * 7, nhidden, relu=True)
        self.fc7_phrase = FC(nhidden, nhidden, relu=False)
        self.fc6_region = FC(512 * 7 * 7, nhidden, relu=True)
        self.fc7_region = FC(nhidden, nhidden, relu=False)
        if MPS_iter == 0:
            self.mps = None
        else:
            self.mps = Hierarchical_Message_Passing_Structure(nhidden, dropout, 
                            gate_width=gate_width, use_kernel_function=use_kernel) # the hierarchical message passing structure
            network.weights_normal_init(self.mps, 0.01)   

        self.score_obj = FC(nhidden, self.n_classes_obj, relu=False)
        self.bbox_obj = FC(nhidden, self.n_classes_obj * 4, relu=False)
        self.score_pred = FC(nhidden, self.n_classes_pred, relu=False)
        if self.use_region_reg:
            self.bbox_region = FC(nhidden, 4, relu=False)
            network.weights_normal_init(self.bbox_region, 0.01)
        else:
            self.bbox_region = None

        self.objectiveness = FC(nhidden, 2, relu=False)

        if use_language_loss:
            self.caption_prediction = \
                Language_Model(rnn_type=self.rnn_type, ntoken=self.n_vocab, nimg=self.nhidden, nhidden=self.nhidden_caption, 
                                nembed=self.nembedding, nlayers=2, nseq=self.max_word_length, voc_sign = self.voc_sign, 
                                bias=rnn_bias, dropout=rnn_droptout) 
        else:
            self.caption_prediction = Language_Model(rnn_type=self.rnn_type, ntoken=self.n_vocab, nimg=1, nhidden=1, 
                                nembed=1, nlayers=1, nseq=1, voc_sign = self.voc_sign) # just to make the program run

        network.weights_normal_init(self.score_obj, 0.01)
        network.weights_normal_init(self.bbox_obj, 0.005)
        network.weights_normal_init(self.score_pred, 0.01)
        network.weights_normal_init(self.objectiveness, 0.01)

        self.objectiveness_loss = None
Esempio n. 2
0
    def __init__(self, classes=None, debug=False, backbone='VGG'):
        super(FasterRCNN, self).__init__()

        if classes is not None:
            self.classes = classes
            self.n_classes = len(classes)

        self.rpn = RPN(backbone=backbone)
        self.roi_pool = RoIPool(7, 7, 1.0 / 16)
        self._backbone = backbone
        if backbone == 'VGG':
            self.fc6 = FC(512 * 7 * 7, 4096)
            self.fc7 = FC(4096, 4096)
            self.score_fc = FC(4096, self.n_classes, relu=False)
            self.bbox_fc = FC(4096, self.n_classes * 4, relu=False)
        elif backbone == "RESNET":
            self.layer4 = ResNet._make_layer(Bottleneck, 512, 3, stride=2)
            self.score_fc = FC(2048 * 7 * 7, self.n_classes, relu=False)
            self.bbox_fc = FC(2048 * 7 * 7, self.n_classes * 4, relu=False)

        # loss
        self.cross_entropy = None
        self.loss_box = None

        # for log
        self.debug = debug
    def __init__(self, classes=None, debug=False, arch='vgg16'):
        super(FasterRCNN, self).__init__()

        if classes is not None:
            self.classes = classes
            self.n_classes = len(classes)
        print('n_classes: {}\n{}'.format(self.n_classes, self.classes))

        if arch == 'vgg16':
            cnn_arch = models.vgg16(pretrained=False)  # w/o bn
            self.rpn = RPN(features=cnn_arch.features)
            self.fcs = nn.Sequential(nn.Linear(512 * 7 * 7, 4096),
                                     nn.ReLU(True), nn.Dropout(),
                                     nn.Linear(4096, 4096), nn.ReLU(True),
                                     nn.Dropout())

        self.roi_pool = RoIPool(7, 7, 1.0 / 16)
        # self.fc6 = FC(512 * 7 * 7, 4096)
        # self.fc7 = FC(4096, 4096)
        self.score_fc = FC(4096, self.n_classes, relu=False)
        self.bbox_fc = FC(4096, self.n_classes * 4, relu=False)

        # loss
        self.cross_entropy = None
        self.loss_box = None

        # for log
        self.debug = debug
    def __init__(self, classes=None, debug=False):
        super(FasterRCNN, self).__init__()

        if classes is not None:
            self.classes = classes
            self.n_classes = len(classes)

        self.rpn = RPN()
        self.roi_pool = RoIPool(7, 7, 1.0 / 16)
        self.fc6 = FC(512 * 7 * 7, 4096)
        self.fc7 = FC(4096, 4096)
        self.score_fc = FC(4096, self.n_classes, relu=False)
        self.bbox_fc = FC(4096, self.n_classes * 4, relu=False)

        # loss
        self.cross_entropy = None
        self.loss_box = None
        self.cost = None
        # self.crnn = crnn_py.CRNN(32, 1, 63, 256, 1).cuda()
        # model_path = '/home/gabbar/netCRNN_9_30000.pth'
        #
        # # network.load_net(model_path,self.crnn)
        # # for log
        # network.load_net('/home/gabbar/crnn_path/crnn_9.h5', self.crnn)

        self.debug = debug
Esempio n. 5
0
    def __init__(self, classes=None, debug=False, training=True):
        super(WSDDN, self).__init__()

        if classes is not None:
            self.classes = classes
            self.n_classes = len(classes)
            print(classes)

        #TODO: Define the WSDDN model

        self.features = nn.Sequential(
            nn.Conv2d(3,
                      64,
                      kernel_size=(11, 11),
                      stride=(4, 4),
                      padding=(2, 2)), nn.ReLU(),
            nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1)),
            nn.Conv2d(64,
                      192,
                      kernel_size=(5, 5),
                      stride=(1, 1),
                      padding=(2, 2)), nn.ReLU(),
            nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1)),
            nn.Conv2d(192,
                      384,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)), nn.ReLU(),
            nn.Conv2d(384,
                      256,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)), nn.ReLU(),
            nn.Conv2d(256,
                      256,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1))
            #nn.ReLU()
        )

        self.roi_pool = RoIPool(pooled_height=6,
                                pooled_width=6,
                                spatial_scale=1.0 / 16)

        self.classifier = nn.Sequential(
            nn.Dropout(p=0.0), nn.Linear(in_features=9216, out_features=4096),
            nn.ReLU(), nn.Dropout(p=0.5),
            nn.Linear(in_features=4096, out_features=4096), nn.ReLU())

        self.score_cls = FC(in_features=4096, out_features=20)
        self.score_det = FC(in_features=4096, out_features=20)

        #pdb.set_trace()

        # loss
        self.cross_entropy = None

        # for log
        self.debug = debug
    def __init__(self, classes=None, debug=False, training=True):
        super(WSDDN, self).__init__()

        if classes is not None:
            self.classes = classes
            self.n_classes = len(classes)
            print(classes)

        #TODO: Define the WSDDN model
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, 11, stride=4, padding=2), nn.ReLU(),
            nn.MaxPool2d(3, stride=2),
            nn.Conv2d(64, 192, 5, stride=1, padding=2), nn.ReLU(),
            nn.MaxPool2d(3, stride=2),
            nn.Conv2d(192, 384, 3, stride=1, padding=1), nn.ReLU(),
            nn.Conv2d(384, 256, 3, stride=1, padding=1), nn.ReLU(),
            nn.Conv2d(256, 256, 3, stride=1, padding=1))

        self.roi_pool = RoIPool(6, 6, 1.0 / 16)

        self.classifier = nn.Sequential(nn.Linear(9216, 4096), nn.ReLU(),
                                        nn.Dropout(0.5), nn.Linear(4096, 4096),
                                        nn.ReLU())

        self.score_cls = FC(4096, 20, False)

        self.score_det = FC(4096, 20, False)

        # loss
        self.cross_entropy = None

        # for log
        self.debug = debug
Esempio n. 7
0
    def __init__(self, classes=None, debug=False):
        super(FasterRCNN, self).__init__()

        if classes is not None:
            self.classes = classes
            self.n_classes = len(classes)

        self.rpn = RPN()
        self.rpn._init_modules(fixed_blocks=self.fixed_blocks
                               )  # should be same as self.fixed blocks
        self.roi_pool = RoIPool(7, 7, 1.0 / 16)

        #self.fc6 = FC(512 * 7 * 7, 4096)# resnet block conv5
        #self.fc7 = FC(4096, 4096)# resnet block conv5

        #self.score_fc = FC(4096, self.n_classes, relu=False)
        #self.bbox_fc = FC(4096, self.n_classes * 4, relu=False)
        self.score_fc = FC(2048, self.n_classes, relu=False)
        self.bbox_fc = FC(2048, self.n_classes * 4, relu=False)
        # loss
        self.cross_entropy = None
        self.loss_box = None

        # for log
        self.debug = debug
Esempio n. 8
0
 def __init__(self):
     super(FasterRCNN, self).__init__()
     self.rpn = RPN()
     self.roi_pool = RoIPool(7, 7, 1.0/16)
     self.fc6 = FC(512 * 7 * 7, 4096)
     self.fc7 = FC(4096, 4096)
     self.score_fc = FC(4096, self.n_classes, relu=False)
     self.bbox_fc = FC(4096, self.n_classes * 4, relu=False)
Esempio n. 9
0
    def __init__(self, classes=None, debug=False, training=True):
        super(WSDDN, self).__init__()

        if classes is not None:
            self.classes = classes
            self.n_classes = len(classes)
            print(classes)

        #TODO: Define the WSDDN model
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, (11, 11), (4, 4), (2, 2)),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((3, 3), (2, 2), dilation=(1, 1)),
            nn.Conv2d(64, 192, (5, 5), (1, 1), (2, 2)),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((3, 3), (2, 2), dilation=(1, 1)),
            nn.Conv2d(192, 384, (3, 3), (1, 1), (1, 1)),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, (3, 3), (1, 1), (1, 1)),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, (3, 3), (1, 1), (1, 1)),
            nn.ReLU(inplace=True))

        self.roi_pool = RoIPool(6, 6, 1.0/16)
        self.classifier = nn.Sequential(
            nn.Linear(in_features=9216, out_features=4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(inplace=True))

        self.score_cls = nn.Linear(in_features=4096, out_features=20)
        self.score_det = nn.Linear(in_features=4096, out_features=20)
        # self.score_cls = FC(in_features=4096, out_features=20)
        # self.score_det = FC(in_features=4096, out_features=20)

        # self.roi_pool = RoIPool(2, 2, 0.06)

        # self.classifier = nn.Sequential(
        #     nn.Linear(in_features=1024, out_features=1024),
        #     nn.ReLU(inplace=True),
        #     nn.Dropout(p=0.5),
        #     nn.Linear(in_features=1024, out_features=1024),
        #     nn.ReLU(inplace=True))


        # self.score_cls = nn.Linear(in_features=1024, out_features=20)
        # self.score_det = nn.Linear(in_features=1024, out_features=20)
        # self.score_cls = FC(in_features=1024, out_features=20)
        # self.score_det = FC(in_features=1024, out_features=20)

        # loss
        self.cross_entropy = None

        # for log
        self.debug = debug
Esempio n. 10
0
    def __init__(self, classes=None, debug=False, training=True):
        super(WSDDN, self).__init__()

        if classes is not None:
            self.classes = classes
            self.n_classes = len(classes)
            print(classes)

        #TODO: Define the WSDDN model
        self.features = nn.Sequential(
            nn.Conv2d(3,
                      64,
                      kernel_size=(11, 11),
                      stride=(4, 4),
                      padding=(2, 2)), nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1)),
            nn.Conv2d(64,
                      192,
                      kernel_size=(5, 5),
                      stride=(1, 1),
                      padding=(2, 2)), nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1)),
            nn.Conv2d(192,
                      384,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)), nn.ReLU(inplace=True),
            nn.Conv2d(384,
                      256,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)), nn.ReLU(inplace=True),
            nn.Conv2d(256,
                      256,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)), nn.ReLU(inplace=True))

        self.roi_pool = RoIPool(6, 6, 1.0 / 16)

        self.classifier = nn.Sequential(
            nn.Linear(in_features=9216, out_features=4096),
            nn.ReLU(inplace=True), nn.Dropout(p=0.5),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(inplace=True))
        self.score_cls = nn.Sequential(
            nn.Linear(in_features=4096, out_features=20))
        self.score_det = nn.Sequential(
            nn.Linear(in_features=4096, out_features=20))

        # loss
        self.cross_entropy = None
        self.criterion = nn.BCELoss(size_average=True).cuda()

        # for log
        self.debug = debug
Esempio n. 11
0
    def __init__(self, classes, debug=False):
        super(FasterRCNN, self).__init__()

        self.classes = classes
        self.n_classes = len(classes)
        self.rpn = RPN()
        self.roi_pool = RoIPool(7, 7, 1.0 / 16)
        self.res5_features = ResNet152('layer4')
        self.score_fc = FC(2048, self.n_classes, relu=False)
        self.bbox_fc = FC(2048, self.n_classes * 4, relu=False)

        # loss
        self.cross_entropy = None
        self.loss_box = None

        # for log
        self.debug = debug
Esempio n. 12
0
    def __init__(self, classes, debug=False):
        super(FasterRCNN, self).__init__()

        self.classes = classes
        self.n_classes = len(classes)
        self.rpn = RPN()
        self.roi_pool = RoIPool(7, 7, 17.0 / 299)
        self.res5_features = InceptionResnetV2('avgpool')
        self.score_fc = FC(1536, self.n_classes, relu=False)
        self.bbox_fc = FC(1536, self.n_classes * 4, relu=False)

        # loss
        self.cross_entropy = None
        self.loss_box = None

        # for log
        self.debug = debug
Esempio n. 13
0
    def __init__(self, classes=None, debug=False, training=True):
        super(WSDDN, self).__init__()

        if classes is not None:
            self.classes = classes
            self.n_classes = len(classes)
            print(classes)
        
        #TODO: Define the WSDDN model
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
        )
        self.roi_pool = RoIPool(7, 7, 1.0 / 17)
        self.classifier_share = nn.Sequential(
            nn.Linear(256 * 7 * 7, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
        )
        self.classifier_c = nn.Sequential(
            nn.Linear(4096, self.n_classes),
            nn.Softmax(dim = 1)
        )

        self.classifier_d = nn.Sequential(
            nn.Linear(4096, self.n_classes),
            nn.Softmax(dim =0)
        )
 
        # loss
        self.cross_entropy = None

        # for log
        self.debug = debug
Esempio n. 14
0
    def __init__(self, classes, debug=False):
        super(FastRCNN, self).__init__()
        assert classes is not None
        self.classes = classes
        self.n_classes = len(classes)

        # self.features = vgg16()
        self.rpn = RPN()
        self.proposal_target_layer = ProposalTargetLayer(self.n_classes)
        self.roi_pool = RoIPool(7, 7, 1.0 / 16)
        self.fc6 = nn.DataParallel(FC(512 * 7 * 7, 4096))
        self.fc7 = nn.DataParallel(FC(4096, 4096))
        self.score_fc = nn.DataParallel(FC(4096, self.n_classes, relu=False))
        self.bbox_fc = nn.DataParallel(
            FC(4096, self.n_classes * 4, relu=False))

        self.debug = debug
Esempio n. 15
0
    def __init__(self, classes, debug=False):
        super(FasterRCNN, self).__init__()
        assert classes is not None
        self.classes = classes
        self.n_classes = len(classes)

        self.features = vgg16()
        # self.rpn = RPN()
        self.roi_pool = RoIPool(7, 7, 1.0 / 16)
        self.fc6 = FC(512 * 7 * 7, 4096)
        self.fc7 = FC(4096, 4096)
        self.score_fc = FC(4096, self.n_classes, relu=False)
        self.bbox_fc = FC(4096, self.n_classes * 4, relu=False)

        self.cross_entropy = None
        self.loss_box = None

        self.debug = debug
Esempio n. 16
0
    def __init__(self, classes=None, debug=False, training=True):
        super(WSDDN, self).__init__()

        if classes is not None:
            self.classes = classes
            self.n_classes = len(classes)
            print(classes)
        
        #TODO: Define the WSDDN model
	self.features =  nn.Sequential(
	    nn.Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2)),
	    nn.ReLU(inplace=True),
	    nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1)),
	    nn.Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)),
	    nn.ReLU(inplace=True),
	    nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1)),
	    nn.Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
	    nn.ReLU(inplace=True),
	    nn.Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
	    nn.ReLU(inplace=True),
	    nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
	  )
	self.roi_pool = RoIPool(pooled_height = 6, pooled_width=6, spatial_scale=1/16.0)
	self.classifier = nn.Sequential(
	    nn.LeakyReLU(1, inplace=True),
	    nn.Linear(in_features=9216, out_features=4096),
	    nn.ReLU(inplace=True),
	    nn.Dropout(p=0.5),
	    nn.Linear(in_features=4096, out_features=4096),
	    nn.ReLU(inplace=True)
	  )
	self.score_cls = nn.Linear(in_features=4096, out_features=20)
	self.score_det = nn.Linear(in_features=4096, out_features=20)
	self.cls_softmax = nn.Softmax(dim=1)
	self.det_softmax = nn.Softmax(dim=0)
        
	self.loss_layer = nn.BCELoss(size_average=False)
        
        # loss
        self.cross_entropy = None

        # for log
        self.debug = debug
Esempio n. 17
0
    def __init__(self, classes=None, debug=False):
        super(FasterRCNN, self).__init__()

        if classes is not None:
            self.classes = classes
            self.n_classes = len(classes)

        self.rpn = RPN()
        self.roi_pool = RoIPool(7, 7, 1.0/16)
        self.fc6 = FC(1024 * 7 * 7, 4096)
        self.fc7 = FC(4096, 4096)
        self.score_fc = FC(4096, self.n_classes, relu=False)
        self.bbox_fc = FC(4096, self.n_classes * 4, relu=False)

        # loss
        self.cross_entropy = None
        self.loss_box = None

        # for log
        self.debug = debug
Esempio n. 18
0
    def __init__(self, classes=None, debug=False, training=True):
        super(WSDDN, self).__init__()
        self.training = training
        if classes is not None:
            self.classes = classes
            self.n_classes = len(classes)
            print(classes)
        
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            )
        self.roi_pool = RoIPool(6, 6, 1.0/16)
        self.classifier = nn.Sequential(
            nn.Linear(9216, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout2d(inplace=True),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            )
        self.score_cls = nn.Linear(4096, 20)
        self.score_det = nn.Linear(4096, 20)
    
        # loss
        self.cross_entropy = None

        # for log
        self.debug = debug
Esempio n. 19
0
    def __init__(self, classes=None, debug=False, training = False):
        super(FasterRCNN, self).__init__()

        if classes is not None:
            self.classes = classes
            self.n_classes = len(classes)

        self.resnet = resnet101()
        self.fpn = FPN()
        self.training = training
        self.rpn_net = RPN(training=self.training) 
        self.roi_pool_vec = nn.ModuleList([RoIPool(7, 7, 1.0/i) for i in [4,8,16,32]])
        self.fc6 = nn.Linear(in_features=256*7*7, out_features=1024)
        self.fc7 = nn.Linear(in_features=1024, out_features=1024)
        self.relu = nn.ReLU(inplace=True)
        self.score_fc = nn.Linear(in_features=1024, out_features=self.n_classes)
        self.bbox_fc = nn.Linear(in_features=1024, out_features=self.n_classes * 4)
        # loss
        self.cross_entropy = None
        self.loss_box = None

        # for log
        self.debug = debug
Esempio n. 20
0
    def __init__(self,
                 cnn_weights=None,
                 fc1_weights=None,
                 fc2_weights=None,
                 roi_size=(7, 7, 512),
                 dropout_p=0.5,
                 num_categories=21):
        super(FastRCNNModel, self).__init__()
        self.dropout_p = dropout_p

        self.cnn = get_vgg_conv(skip_last_layers=1, pretrained=False)
        if cnn_weights:
            self.cnn.load_state_dict(cnn_weights)
        # freeze the bottommost cnn layers
        for index, param in enumerate(self.cnn.parameters()):
            if index <= 7:
                param.requires_grad = False

        self.roi_pooling = RoIPool_GPU(roi_size[0],
                                       roi_size[1],
                                       spatial_scale=1. / 16)

        input_size = 1
        for dim in roi_size:
            input_size *= dim
        self.fc1 = torch.nn.Linear(input_size, 4096)
        self.fc2 = torch.nn.Linear(4096, 4096)
        if fc1_weights:
            self.fc1.load_state_dict(fc1_weights)
        if fc2_weights:
            self.fc2.load_state_dict(fc2_weights)

        self.classifier = torch.nn.Linear(4096, num_categories)
        self.regressor = torch.nn.Linear(4096, num_categories * 4)

        self.weights_init()
Esempio n. 21
0
 def __init__(self):
     super(RGBDNetwork, self).__init__()
     self.features = VGG16(bn = True)
     self.roi_pool = RoIPool(7, 7, 1.0/16)
Esempio n. 22
0
class FastRCNNModel(torch.nn.Module):
    def __init__(self,
                 cnn_weights=None,
                 fc1_weights=None,
                 fc2_weights=None,
                 roi_size=(7, 7, 512),
                 dropout_p=0.5,
                 num_categories=21):
        super(FastRCNNModel, self).__init__()
        self.dropout_p = dropout_p

        self.cnn = get_vgg_conv(skip_last_layers=1, pretrained=False)
        if cnn_weights:
            self.cnn.load_state_dict(cnn_weights)
        # freeze the bottommost cnn layers
        for index, param in enumerate(self.cnn.parameters()):
            if index <= 7:
                param.requires_grad = False

        self.roi_pooling = RoIPool_GPU(roi_size[0],
                                       roi_size[1],
                                       spatial_scale=1. / 16)

        input_size = 1
        for dim in roi_size:
            input_size *= dim
        self.fc1 = torch.nn.Linear(input_size, 4096)
        self.fc2 = torch.nn.Linear(4096, 4096)
        if fc1_weights:
            self.fc1.load_state_dict(fc1_weights)
        if fc2_weights:
            self.fc2.load_state_dict(fc2_weights)

        self.classifier = torch.nn.Linear(4096, num_categories)
        self.regressor = torch.nn.Linear(4096, num_categories * 4)

        self.weights_init()

    def forward(self, image, regions):
        cnn_features = self.cnn(image)
        rois = self.roi_pooling.forward(cnn_features, regions)
        x = flatten(rois)

        # FC1
        x = self.fc1(x)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout_p, training=self.training)

        # FC2
        x = self.fc2(x)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout_p, training=self.training)

        # Outputs
        class_probabilities = F.log_softmax(self.classifier(x))
        class_box_regression = self.regressor(x)

        return class_probabilities, class_box_regression

    def weights_init(self):
        for m in self.named_modules():
            name = m[0]
            module = m[1]
            class_name = module.__class__.__name__

            if name == 'classifier':
                module.weight.data.normal_(0.0, 0.01)
                module.bias.data.fill_(0.)
            elif name == 'regressor':
                module.weight.data.normal_(0.0, 0.001)
                module.bias.data.fill_(0.)
Esempio n. 23
0
 def __init__(self):
     # TODO: Finished this part.
     super(RoINet, self).__init__()
     self.roi_pool = RoIPool(4, 7, 1.0 / 16)
Esempio n. 24
0
    def __init__(self,
                 nhidden,
                 n_object_cats,
                 n_predicate_cats,
                 n_vocab,
                 voc_sign,
                 object_loss_weight,
                 predicate_loss_weight,
                 dropout=False,
                 use_kmeans_anchors=False,
                 use_kernel=False,
                 disable_spatial_model=False,
                 spatial_type='dual_mask',
                 pool_type='roi_pooling',
                 disable_iteration_model=False,
                 iteration_type='cat_embed',
                 idx2obj=None,
                 idx2rel=None):

        super(Full_Net,
              self).__init__(nhidden, n_object_cats, n_predicate_cats, n_vocab,
                             voc_sign, object_loss_weight,
                             predicate_loss_weight, dropout,
                             use_kmeans_anchors, disable_spatial_model,
                             spatial_type, pool_type, disable_iteration_model,
                             iteration_type)

        self.rpn = RPN(use_kmeans_anchors)
        self.roi_pool = RoIPool(7, 7, 1.0 / 16)
        if self.pool_type == 'roi_pooling':
            self.roi_pool_rel = RoIPool(7, 7, 1.0 / 16)
        if self.pool_type == 'spatial_attention':
            self.mask_roi_pool = MaskRoIPool(7, 7, 1.0 / 16)
        if self.pool_type == 'dual_roipooling':
            self.dualmask_roi_pool = DualMaskRoIPool(7, 7, 1.0 / 16)
        self.fc6 = FC(512 * 7 * 7, nhidden, relu=True)
        self.fc7 = FC(nhidden, nhidden, relu=True)
        self.fc6_r = FC(512 * 7 * 7, nhidden, relu=True)
        self.fc7_r = FC(nhidden, nhidden, relu=True)

        if not self.disable_spatial_model:
            if spatial_type == 'dual_mask':
                self.dm = DualMask(nhidden)
            if self.spatial_type == 'gaussian_model':
                self.gmm = GaussianMixtureModel(25488, nhidden)
            self.fc10_r = FC(2 * nhidden, nhidden, relu=True)
            network.weights_normal_init(self.fc10_r, 0.01)
        else:
            self.gsf = GeometricSpatialFeature(nhidden, dropout)

        TransEmbedding = False
        if TransEmbedding:
            self.TransE = TranslationEmbedding(nhidden, dropout)

        if not self.disable_iteration_model:
            if self.iteration_type == 'use_brnn':
                self.lstm = BrnnStructure(nhidden, dropout)
            if self.iteration_type == 'cat_embed':
                self.embed = Concat(nhidden, dropout)
            if self.iteration_type == 'iteration':
                self.iter = GraphicalModel(nhidden, dropout)
        else:
            self.fc8 = FC(2 * nhidden, nhidden, relu=True)
            self.fc9 = FC(nhidden, nhidden, relu=True)
            network.weights_normal_init(self.fc8, 0.01)
            network.weights_normal_init(self.fc9, 0.01)

        self.score = FC(nhidden, self.n_classes_obj, relu=False)
        self.score_r = FC(nhidden, self.n_classes_pred, relu=False)

        self.boundingbox = FC(nhidden, self.n_classes_obj * 4, relu=False)

        network.weights_normal_init(self.score, 0.01)
        network.weights_normal_init(self.score_r, 0.01)
        network.weights_normal_init(self.boundingbox, 0.005)

        self.bad_img_flag = False

        # for plotting of training
        self.idx2obj = idx2obj
        self.idx2rel = idx2rel
        self.trainImgCount = 0