def __init__(self,nhidden, n_object_cats, n_predicate_cats, n_vocab, voc_sign, max_word_length, MPS_iter, use_language_loss, object_loss_weight, predicate_loss_weight, dropout=False, use_kmeans_anchors=False, gate_width=128, nhidden_caption=256, nembedding = 256, rnn_type='LSTM_normal', rnn_droptout=0.0, rnn_bias=False, use_region_reg=False, use_kernel=False): super(Hierarchical_Descriptive_Model, self).__init__(nhidden, n_object_cats, n_predicate_cats, n_vocab, voc_sign, max_word_length, MPS_iter, use_language_loss, object_loss_weight, predicate_loss_weight, dropout, use_kmeans_anchors, nhidden_caption, nembedding, rnn_type, use_region_reg) self.rpn = RPN(use_kmeans_anchors) self.roi_pool_object = RoIPool(7, 7, 1.0/16) self.roi_pool_phrase = RoIPool(7, 7, 1.0/16) self.roi_pool_region = RoIPool(7, 7, 1.0/16) self.fc6_obj = FC(512 * 7 * 7, nhidden, relu=True) self.fc7_obj = FC(nhidden, nhidden, relu=False) self.fc6_phrase = FC(512 * 7 * 7, nhidden, relu=True) self.fc7_phrase = FC(nhidden, nhidden, relu=False) self.fc6_region = FC(512 * 7 * 7, nhidden, relu=True) self.fc7_region = FC(nhidden, nhidden, relu=False) if MPS_iter == 0: self.mps = None else: self.mps = Hierarchical_Message_Passing_Structure(nhidden, dropout, gate_width=gate_width, use_kernel_function=use_kernel) # the hierarchical message passing structure network.weights_normal_init(self.mps, 0.01) self.score_obj = FC(nhidden, self.n_classes_obj, relu=False) self.bbox_obj = FC(nhidden, self.n_classes_obj * 4, relu=False) self.score_pred = FC(nhidden, self.n_classes_pred, relu=False) if self.use_region_reg: self.bbox_region = FC(nhidden, 4, relu=False) network.weights_normal_init(self.bbox_region, 0.01) else: self.bbox_region = None self.objectiveness = FC(nhidden, 2, relu=False) if use_language_loss: self.caption_prediction = \ Language_Model(rnn_type=self.rnn_type, ntoken=self.n_vocab, nimg=self.nhidden, nhidden=self.nhidden_caption, nembed=self.nembedding, nlayers=2, nseq=self.max_word_length, voc_sign = self.voc_sign, bias=rnn_bias, dropout=rnn_droptout) else: self.caption_prediction = Language_Model(rnn_type=self.rnn_type, ntoken=self.n_vocab, nimg=1, nhidden=1, nembed=1, nlayers=1, nseq=1, voc_sign = self.voc_sign) # just to make the program run network.weights_normal_init(self.score_obj, 0.01) network.weights_normal_init(self.bbox_obj, 0.005) network.weights_normal_init(self.score_pred, 0.01) network.weights_normal_init(self.objectiveness, 0.01) self.objectiveness_loss = None
def __init__(self, classes=None, debug=False, backbone='VGG'): super(FasterRCNN, self).__init__() if classes is not None: self.classes = classes self.n_classes = len(classes) self.rpn = RPN(backbone=backbone) self.roi_pool = RoIPool(7, 7, 1.0 / 16) self._backbone = backbone if backbone == 'VGG': self.fc6 = FC(512 * 7 * 7, 4096) self.fc7 = FC(4096, 4096) self.score_fc = FC(4096, self.n_classes, relu=False) self.bbox_fc = FC(4096, self.n_classes * 4, relu=False) elif backbone == "RESNET": self.layer4 = ResNet._make_layer(Bottleneck, 512, 3, stride=2) self.score_fc = FC(2048 * 7 * 7, self.n_classes, relu=False) self.bbox_fc = FC(2048 * 7 * 7, self.n_classes * 4, relu=False) # loss self.cross_entropy = None self.loss_box = None # for log self.debug = debug
def __init__(self, classes=None, debug=False, arch='vgg16'): super(FasterRCNN, self).__init__() if classes is not None: self.classes = classes self.n_classes = len(classes) print('n_classes: {}\n{}'.format(self.n_classes, self.classes)) if arch == 'vgg16': cnn_arch = models.vgg16(pretrained=False) # w/o bn self.rpn = RPN(features=cnn_arch.features) self.fcs = nn.Sequential(nn.Linear(512 * 7 * 7, 4096), nn.ReLU(True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(True), nn.Dropout()) self.roi_pool = RoIPool(7, 7, 1.0 / 16) # self.fc6 = FC(512 * 7 * 7, 4096) # self.fc7 = FC(4096, 4096) self.score_fc = FC(4096, self.n_classes, relu=False) self.bbox_fc = FC(4096, self.n_classes * 4, relu=False) # loss self.cross_entropy = None self.loss_box = None # for log self.debug = debug
def __init__(self, classes=None, debug=False): super(FasterRCNN, self).__init__() if classes is not None: self.classes = classes self.n_classes = len(classes) self.rpn = RPN() self.roi_pool = RoIPool(7, 7, 1.0 / 16) self.fc6 = FC(512 * 7 * 7, 4096) self.fc7 = FC(4096, 4096) self.score_fc = FC(4096, self.n_classes, relu=False) self.bbox_fc = FC(4096, self.n_classes * 4, relu=False) # loss self.cross_entropy = None self.loss_box = None self.cost = None # self.crnn = crnn_py.CRNN(32, 1, 63, 256, 1).cuda() # model_path = '/home/gabbar/netCRNN_9_30000.pth' # # # network.load_net(model_path,self.crnn) # # for log # network.load_net('/home/gabbar/crnn_path/crnn_9.h5', self.crnn) self.debug = debug
def __init__(self, classes=None, debug=False, training=True): super(WSDDN, self).__init__() if classes is not None: self.classes = classes self.n_classes = len(classes) print(classes) #TODO: Define the WSDDN model self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2)), nn.ReLU(), nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1)), nn.Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)), nn.ReLU(), nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1)), nn.Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.ReLU(), nn.Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.ReLU(), nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) #nn.ReLU() ) self.roi_pool = RoIPool(pooled_height=6, pooled_width=6, spatial_scale=1.0 / 16) self.classifier = nn.Sequential( nn.Dropout(p=0.0), nn.Linear(in_features=9216, out_features=4096), nn.ReLU(), nn.Dropout(p=0.5), nn.Linear(in_features=4096, out_features=4096), nn.ReLU()) self.score_cls = FC(in_features=4096, out_features=20) self.score_det = FC(in_features=4096, out_features=20) #pdb.set_trace() # loss self.cross_entropy = None # for log self.debug = debug
def __init__(self, classes=None, debug=False, training=True): super(WSDDN, self).__init__() if classes is not None: self.classes = classes self.n_classes = len(classes) print(classes) #TODO: Define the WSDDN model self.features = nn.Sequential( nn.Conv2d(3, 64, 11, stride=4, padding=2), nn.ReLU(), nn.MaxPool2d(3, stride=2), nn.Conv2d(64, 192, 5, stride=1, padding=2), nn.ReLU(), nn.MaxPool2d(3, stride=2), nn.Conv2d(192, 384, 3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(384, 256, 3, stride=1, padding=1), nn.ReLU(), nn.Conv2d(256, 256, 3, stride=1, padding=1)) self.roi_pool = RoIPool(6, 6, 1.0 / 16) self.classifier = nn.Sequential(nn.Linear(9216, 4096), nn.ReLU(), nn.Dropout(0.5), nn.Linear(4096, 4096), nn.ReLU()) self.score_cls = FC(4096, 20, False) self.score_det = FC(4096, 20, False) # loss self.cross_entropy = None # for log self.debug = debug
def __init__(self, classes=None, debug=False): super(FasterRCNN, self).__init__() if classes is not None: self.classes = classes self.n_classes = len(classes) self.rpn = RPN() self.rpn._init_modules(fixed_blocks=self.fixed_blocks ) # should be same as self.fixed blocks self.roi_pool = RoIPool(7, 7, 1.0 / 16) #self.fc6 = FC(512 * 7 * 7, 4096)# resnet block conv5 #self.fc7 = FC(4096, 4096)# resnet block conv5 #self.score_fc = FC(4096, self.n_classes, relu=False) #self.bbox_fc = FC(4096, self.n_classes * 4, relu=False) self.score_fc = FC(2048, self.n_classes, relu=False) self.bbox_fc = FC(2048, self.n_classes * 4, relu=False) # loss self.cross_entropy = None self.loss_box = None # for log self.debug = debug
def __init__(self): super(FasterRCNN, self).__init__() self.rpn = RPN() self.roi_pool = RoIPool(7, 7, 1.0/16) self.fc6 = FC(512 * 7 * 7, 4096) self.fc7 = FC(4096, 4096) self.score_fc = FC(4096, self.n_classes, relu=False) self.bbox_fc = FC(4096, self.n_classes * 4, relu=False)
def __init__(self, classes=None, debug=False, training=True): super(WSDDN, self).__init__() if classes is not None: self.classes = classes self.n_classes = len(classes) print(classes) #TODO: Define the WSDDN model self.features = nn.Sequential( nn.Conv2d(3, 64, (11, 11), (4, 4), (2, 2)), nn.ReLU(inplace=True), nn.MaxPool2d((3, 3), (2, 2), dilation=(1, 1)), nn.Conv2d(64, 192, (5, 5), (1, 1), (2, 2)), nn.ReLU(inplace=True), nn.MaxPool2d((3, 3), (2, 2), dilation=(1, 1)), nn.Conv2d(192, 384, (3, 3), (1, 1), (1, 1)), nn.ReLU(inplace=True), nn.Conv2d(384, 256, (3, 3), (1, 1), (1, 1)), nn.ReLU(inplace=True), nn.Conv2d(256, 256, (3, 3), (1, 1), (1, 1)), nn.ReLU(inplace=True)) self.roi_pool = RoIPool(6, 6, 1.0/16) self.classifier = nn.Sequential( nn.Linear(in_features=9216, out_features=4096), nn.ReLU(inplace=True), nn.Dropout(p=0.5), nn.Linear(in_features=4096, out_features=4096), nn.ReLU(inplace=True)) self.score_cls = nn.Linear(in_features=4096, out_features=20) self.score_det = nn.Linear(in_features=4096, out_features=20) # self.score_cls = FC(in_features=4096, out_features=20) # self.score_det = FC(in_features=4096, out_features=20) # self.roi_pool = RoIPool(2, 2, 0.06) # self.classifier = nn.Sequential( # nn.Linear(in_features=1024, out_features=1024), # nn.ReLU(inplace=True), # nn.Dropout(p=0.5), # nn.Linear(in_features=1024, out_features=1024), # nn.ReLU(inplace=True)) # self.score_cls = nn.Linear(in_features=1024, out_features=20) # self.score_det = nn.Linear(in_features=1024, out_features=20) # self.score_cls = FC(in_features=1024, out_features=20) # self.score_det = FC(in_features=1024, out_features=20) # loss self.cross_entropy = None # for log self.debug = debug
def __init__(self, classes=None, debug=False, training=True): super(WSDDN, self).__init__() if classes is not None: self.classes = classes self.n_classes = len(classes) print(classes) #TODO: Define the WSDDN model self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2)), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1)), nn.Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1)), nn.Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.ReLU(inplace=True)) self.roi_pool = RoIPool(6, 6, 1.0 / 16) self.classifier = nn.Sequential( nn.Linear(in_features=9216, out_features=4096), nn.ReLU(inplace=True), nn.Dropout(p=0.5), nn.Linear(in_features=4096, out_features=4096), nn.ReLU(inplace=True)) self.score_cls = nn.Sequential( nn.Linear(in_features=4096, out_features=20)) self.score_det = nn.Sequential( nn.Linear(in_features=4096, out_features=20)) # loss self.cross_entropy = None self.criterion = nn.BCELoss(size_average=True).cuda() # for log self.debug = debug
def __init__(self, classes, debug=False): super(FasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.rpn = RPN() self.roi_pool = RoIPool(7, 7, 1.0 / 16) self.res5_features = ResNet152('layer4') self.score_fc = FC(2048, self.n_classes, relu=False) self.bbox_fc = FC(2048, self.n_classes * 4, relu=False) # loss self.cross_entropy = None self.loss_box = None # for log self.debug = debug
def __init__(self, classes, debug=False): super(FasterRCNN, self).__init__() self.classes = classes self.n_classes = len(classes) self.rpn = RPN() self.roi_pool = RoIPool(7, 7, 17.0 / 299) self.res5_features = InceptionResnetV2('avgpool') self.score_fc = FC(1536, self.n_classes, relu=False) self.bbox_fc = FC(1536, self.n_classes * 4, relu=False) # loss self.cross_entropy = None self.loss_box = None # for log self.debug = debug
def __init__(self, classes=None, debug=False, training=True): super(WSDDN, self).__init__() if classes is not None: self.classes = classes self.n_classes = len(classes) print(classes) #TODO: Define the WSDDN model self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), ) self.roi_pool = RoIPool(7, 7, 1.0 / 17) self.classifier_share = nn.Sequential( nn.Linear(256 * 7 * 7, 4096), nn.ReLU(inplace=True), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(inplace=True), ) self.classifier_c = nn.Sequential( nn.Linear(4096, self.n_classes), nn.Softmax(dim = 1) ) self.classifier_d = nn.Sequential( nn.Linear(4096, self.n_classes), nn.Softmax(dim =0) ) # loss self.cross_entropy = None # for log self.debug = debug
def __init__(self, classes, debug=False): super(FastRCNN, self).__init__() assert classes is not None self.classes = classes self.n_classes = len(classes) # self.features = vgg16() self.rpn = RPN() self.proposal_target_layer = ProposalTargetLayer(self.n_classes) self.roi_pool = RoIPool(7, 7, 1.0 / 16) self.fc6 = nn.DataParallel(FC(512 * 7 * 7, 4096)) self.fc7 = nn.DataParallel(FC(4096, 4096)) self.score_fc = nn.DataParallel(FC(4096, self.n_classes, relu=False)) self.bbox_fc = nn.DataParallel( FC(4096, self.n_classes * 4, relu=False)) self.debug = debug
def __init__(self, classes, debug=False): super(FasterRCNN, self).__init__() assert classes is not None self.classes = classes self.n_classes = len(classes) self.features = vgg16() # self.rpn = RPN() self.roi_pool = RoIPool(7, 7, 1.0 / 16) self.fc6 = FC(512 * 7 * 7, 4096) self.fc7 = FC(4096, 4096) self.score_fc = FC(4096, self.n_classes, relu=False) self.bbox_fc = FC(4096, self.n_classes * 4, relu=False) self.cross_entropy = None self.loss_box = None self.debug = debug
def __init__(self, classes=None, debug=False, training=True): super(WSDDN, self).__init__() if classes is not None: self.classes = classes self.n_classes = len(classes) print(classes) #TODO: Define the WSDDN model self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2)), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1)), nn.Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2)), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1)), nn.Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) ) self.roi_pool = RoIPool(pooled_height = 6, pooled_width=6, spatial_scale=1/16.0) self.classifier = nn.Sequential( nn.LeakyReLU(1, inplace=True), nn.Linear(in_features=9216, out_features=4096), nn.ReLU(inplace=True), nn.Dropout(p=0.5), nn.Linear(in_features=4096, out_features=4096), nn.ReLU(inplace=True) ) self.score_cls = nn.Linear(in_features=4096, out_features=20) self.score_det = nn.Linear(in_features=4096, out_features=20) self.cls_softmax = nn.Softmax(dim=1) self.det_softmax = nn.Softmax(dim=0) self.loss_layer = nn.BCELoss(size_average=False) # loss self.cross_entropy = None # for log self.debug = debug
def __init__(self, classes=None, debug=False): super(FasterRCNN, self).__init__() if classes is not None: self.classes = classes self.n_classes = len(classes) self.rpn = RPN() self.roi_pool = RoIPool(7, 7, 1.0/16) self.fc6 = FC(1024 * 7 * 7, 4096) self.fc7 = FC(4096, 4096) self.score_fc = FC(4096, self.n_classes, relu=False) self.bbox_fc = FC(4096, self.n_classes * 4, relu=False) # loss self.cross_entropy = None self.loss_box = None # for log self.debug = debug
def __init__(self, classes=None, debug=False, training=True): super(WSDDN, self).__init__() self.training = training if classes is not None: self.classes = classes self.n_classes = len(classes) print(classes) self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(inplace=True), ) self.roi_pool = RoIPool(6, 6, 1.0/16) self.classifier = nn.Sequential( nn.Linear(9216, 4096), nn.ReLU(inplace=True), nn.Dropout2d(inplace=True), nn.Linear(4096, 4096), nn.ReLU(inplace=True), ) self.score_cls = nn.Linear(4096, 20) self.score_det = nn.Linear(4096, 20) # loss self.cross_entropy = None # for log self.debug = debug
def __init__(self, classes=None, debug=False, training = False): super(FasterRCNN, self).__init__() if classes is not None: self.classes = classes self.n_classes = len(classes) self.resnet = resnet101() self.fpn = FPN() self.training = training self.rpn_net = RPN(training=self.training) self.roi_pool_vec = nn.ModuleList([RoIPool(7, 7, 1.0/i) for i in [4,8,16,32]]) self.fc6 = nn.Linear(in_features=256*7*7, out_features=1024) self.fc7 = nn.Linear(in_features=1024, out_features=1024) self.relu = nn.ReLU(inplace=True) self.score_fc = nn.Linear(in_features=1024, out_features=self.n_classes) self.bbox_fc = nn.Linear(in_features=1024, out_features=self.n_classes * 4) # loss self.cross_entropy = None self.loss_box = None # for log self.debug = debug
def __init__(self, cnn_weights=None, fc1_weights=None, fc2_weights=None, roi_size=(7, 7, 512), dropout_p=0.5, num_categories=21): super(FastRCNNModel, self).__init__() self.dropout_p = dropout_p self.cnn = get_vgg_conv(skip_last_layers=1, pretrained=False) if cnn_weights: self.cnn.load_state_dict(cnn_weights) # freeze the bottommost cnn layers for index, param in enumerate(self.cnn.parameters()): if index <= 7: param.requires_grad = False self.roi_pooling = RoIPool_GPU(roi_size[0], roi_size[1], spatial_scale=1. / 16) input_size = 1 for dim in roi_size: input_size *= dim self.fc1 = torch.nn.Linear(input_size, 4096) self.fc2 = torch.nn.Linear(4096, 4096) if fc1_weights: self.fc1.load_state_dict(fc1_weights) if fc2_weights: self.fc2.load_state_dict(fc2_weights) self.classifier = torch.nn.Linear(4096, num_categories) self.regressor = torch.nn.Linear(4096, num_categories * 4) self.weights_init()
def __init__(self): super(RGBDNetwork, self).__init__() self.features = VGG16(bn = True) self.roi_pool = RoIPool(7, 7, 1.0/16)
class FastRCNNModel(torch.nn.Module): def __init__(self, cnn_weights=None, fc1_weights=None, fc2_weights=None, roi_size=(7, 7, 512), dropout_p=0.5, num_categories=21): super(FastRCNNModel, self).__init__() self.dropout_p = dropout_p self.cnn = get_vgg_conv(skip_last_layers=1, pretrained=False) if cnn_weights: self.cnn.load_state_dict(cnn_weights) # freeze the bottommost cnn layers for index, param in enumerate(self.cnn.parameters()): if index <= 7: param.requires_grad = False self.roi_pooling = RoIPool_GPU(roi_size[0], roi_size[1], spatial_scale=1. / 16) input_size = 1 for dim in roi_size: input_size *= dim self.fc1 = torch.nn.Linear(input_size, 4096) self.fc2 = torch.nn.Linear(4096, 4096) if fc1_weights: self.fc1.load_state_dict(fc1_weights) if fc2_weights: self.fc2.load_state_dict(fc2_weights) self.classifier = torch.nn.Linear(4096, num_categories) self.regressor = torch.nn.Linear(4096, num_categories * 4) self.weights_init() def forward(self, image, regions): cnn_features = self.cnn(image) rois = self.roi_pooling.forward(cnn_features, regions) x = flatten(rois) # FC1 x = self.fc1(x) x = F.relu(x) x = F.dropout(x, p=self.dropout_p, training=self.training) # FC2 x = self.fc2(x) x = F.relu(x) x = F.dropout(x, p=self.dropout_p, training=self.training) # Outputs class_probabilities = F.log_softmax(self.classifier(x)) class_box_regression = self.regressor(x) return class_probabilities, class_box_regression def weights_init(self): for m in self.named_modules(): name = m[0] module = m[1] class_name = module.__class__.__name__ if name == 'classifier': module.weight.data.normal_(0.0, 0.01) module.bias.data.fill_(0.) elif name == 'regressor': module.weight.data.normal_(0.0, 0.001) module.bias.data.fill_(0.)
def __init__(self): # TODO: Finished this part. super(RoINet, self).__init__() self.roi_pool = RoIPool(4, 7, 1.0 / 16)
def __init__(self, nhidden, n_object_cats, n_predicate_cats, n_vocab, voc_sign, object_loss_weight, predicate_loss_weight, dropout=False, use_kmeans_anchors=False, use_kernel=False, disable_spatial_model=False, spatial_type='dual_mask', pool_type='roi_pooling', disable_iteration_model=False, iteration_type='cat_embed', idx2obj=None, idx2rel=None): super(Full_Net, self).__init__(nhidden, n_object_cats, n_predicate_cats, n_vocab, voc_sign, object_loss_weight, predicate_loss_weight, dropout, use_kmeans_anchors, disable_spatial_model, spatial_type, pool_type, disable_iteration_model, iteration_type) self.rpn = RPN(use_kmeans_anchors) self.roi_pool = RoIPool(7, 7, 1.0 / 16) if self.pool_type == 'roi_pooling': self.roi_pool_rel = RoIPool(7, 7, 1.0 / 16) if self.pool_type == 'spatial_attention': self.mask_roi_pool = MaskRoIPool(7, 7, 1.0 / 16) if self.pool_type == 'dual_roipooling': self.dualmask_roi_pool = DualMaskRoIPool(7, 7, 1.0 / 16) self.fc6 = FC(512 * 7 * 7, nhidden, relu=True) self.fc7 = FC(nhidden, nhidden, relu=True) self.fc6_r = FC(512 * 7 * 7, nhidden, relu=True) self.fc7_r = FC(nhidden, nhidden, relu=True) if not self.disable_spatial_model: if spatial_type == 'dual_mask': self.dm = DualMask(nhidden) if self.spatial_type == 'gaussian_model': self.gmm = GaussianMixtureModel(25488, nhidden) self.fc10_r = FC(2 * nhidden, nhidden, relu=True) network.weights_normal_init(self.fc10_r, 0.01) else: self.gsf = GeometricSpatialFeature(nhidden, dropout) TransEmbedding = False if TransEmbedding: self.TransE = TranslationEmbedding(nhidden, dropout) if not self.disable_iteration_model: if self.iteration_type == 'use_brnn': self.lstm = BrnnStructure(nhidden, dropout) if self.iteration_type == 'cat_embed': self.embed = Concat(nhidden, dropout) if self.iteration_type == 'iteration': self.iter = GraphicalModel(nhidden, dropout) else: self.fc8 = FC(2 * nhidden, nhidden, relu=True) self.fc9 = FC(nhidden, nhidden, relu=True) network.weights_normal_init(self.fc8, 0.01) network.weights_normal_init(self.fc9, 0.01) self.score = FC(nhidden, self.n_classes_obj, relu=False) self.score_r = FC(nhidden, self.n_classes_pred, relu=False) self.boundingbox = FC(nhidden, self.n_classes_obj * 4, relu=False) network.weights_normal_init(self.score, 0.01) network.weights_normal_init(self.score_r, 0.01) network.weights_normal_init(self.boundingbox, 0.005) self.bad_img_flag = False # for plotting of training self.idx2obj = idx2obj self.idx2rel = idx2rel self.trainImgCount = 0