def init_model(self, model_path=None): if model_path is not None: network.load_net(model_path, self.model) else: network.weights_normal_init(self.model, dev=1e-6) # network.load_net('../../pruned_VGG.h5', self.model.front_end, skip=True) # network.load_net("../../vgg16.h5", self.model.front_end, skip=True) def calpara(model): print('---------- Networks initialized -------------') num_params = 0 for param in model.parameters(): num_params += param.numel() print('[Network] Total number of parameters : %.3f M' % (num_params / 1e6)) print('-----------------------------------------------') calpara(self.model) network.weights_normal_init(self.loss_fn_, dev=0.01) if len(self.opt.gpus) > 0: assert(torch.cuda.is_available()) self.model.to(self.device) self.model = torch.nn.DataParallel(self.model, self.opt.gpus) # multi-GPUs if self.opt.loss is not None and 'SSIM' in self.opt.loss: self.loss_fn_.to(self.device) self.loss_fn = torch.nn.DataParallel(self.loss_fn_, self.opt.gpus) # multi-GPUs else: self.loss_fn = self.loss_fn_
def init_model(self, model_path=None): if model_path is not None: network.load_net(model_path, self.model, prefix='model.module.') else: network.weights_normal_init(self.model, dev=1e-6) network.load_net('pretrained_models/pruned_VGG.h5', self.model.front_end, skip=True) for m in self.model.passing_weight4.named_children(): m[1].bias.data.fill_(-2.19) for m in self.model.decoder5.named_children(): if m[0] == '1': m[1].conv.bias.data.fill_(-2.19)
def __init__(self, nhidden, n_object_cats, n_predicate_cats, MPS_iter, object_loss_weight, predicate_loss_weight, dropout=False, use_kmeans_anchors=True, base_model='vgg'): super(Hierarchical_Descriptive_Model, self).__init__(nhidden, n_object_cats, n_predicate_cats, MPS_iter, object_loss_weight, predicate_loss_weight, dropout) self.dropout = dropout # self.rpn = RPN(use_kmeans_anchors) self.rcnn = FasterRCNN(nhidden, use_kmeans_anchors, n_object_cats, model=base_model) # self.roi_pool_object = RoIPool(7, 7, 1.0/16) self.roi_pool_phrase = RoIAlign(7, 7, 1.0/16) if base_model == 'vgg': # self.fc6 = FC(512*7*7, nhidden) self.fc6_phrase = FC(512*7*7, nhidden, relu=True) elif base_model == 'resnet50' or base_model == 'resnet101': # self.fc6 = FC(1024*7*7, nhidden) self.fc6_phrase = FC(1024*7*7, nhidden, relu=True) else: print('please choose a model') # self.fc7 = FC(nhidden, nhidden, relu=True) self.fc7_phrase = FC(nhidden, nhidden, relu=True) self.spacial_conv = SpacialConv(pooling_size=32) if MPS_iter == 0: self.mps = None else: self.mps = Hierarchical_Message_Passing_Structure(nhidden, n_object_cats, n_predicate_cats) # the hierarchical message passing structure network.weights_normal_init(self.mps, 0.01) # self.score_fc = FC(nhidden, self.n_classes_obj, relu=False) # self.bbox_fc = FC(nhidden, self.n_classes_obj * 4, relu=False) self.score_fc_pred = FC(nhidden+64, self.n_classes_pred, relu=False) # self.bbox_pred_fc = FC(nhidden, self.n_classes_pred * 4, relu=False) # network.weights_normal_init(self.score_fc, 0.01) # network.weights_normal_init(self.bbox_fc, 0.005) network.weights_normal_init(self.score_fc_pred, 0.01)
def __init__(self, optimizer, opt): super(CrowdCounter, self).__init__() self.opt = opt self.device = opt.gpus[0] self.model = self.find_model_using_name(opt.model_name)() if opt.loss == 'MSE': self.loss_fn_ = nn.MSELoss(reduction='sum') else: self.loss_fn_ = self.find_loss_using_name(opt.loss or 'MSE')() if 'DLA' not in opt.model_name or opt.pretrain is not None: self.init_model(opt.pretrain) def calpara(model): print('---------- Networks initialized -------------') num_params = 0 for param in model.parameters(): num_params += param.numel() print('[Network] Total number of parameters : %.3f M' % (num_params / 1e6)) print('-----------------------------------------------') calpara(self.model) network.weights_normal_init(self.loss_fn_, dev=0.01) if len(self.opt.gpus) > 0: assert (torch.cuda.is_available()) self.model.to(self.device) self.model = torch.nn.DataParallel(self.model, self.opt.gpus) # multi-GPUs if self.opt.loss is not None and 'SSIM' in self.opt.loss: self.loss_fn_.to(self.device) self.loss_fn = torch.nn.DataParallel( self.loss_fn_, self.opt.gpus) # multi-GPUs else: self.loss_fn = self.loss_fn_ if optimizer is not None: self.optimizer = optimizer(self) self.optimizer.zero_grad()
momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY disp_interval = cfg.TRAIN.DISPLAY log_interval = cfg.TRAIN.LOG_IMAGE_ITERS # load imdb and create data later imdb = get_imdb(imdb_name) rdl_roidb.prepare_roidb(imdb) roidb = imdb.roidb data_layer = RoIDataLayer(roidb, imdb.num_classes) #pdb.set_trace() # Create network and initialize net = WSDDN(classes=imdb.classes, debug=_DEBUG) network.weights_normal_init(net, dev=0.001) if os.path.exists('pretrained_alexnet.pkl'): pret_net = pkl.load(open('pretrained_alexnet.pkl', 'r')) else: pret_net = model_zoo.load_url( 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth') pkl.dump(pret_net, open('pretrained_alexnet.pkl', 'wb'), pkl.HIGHEST_PROTOCOL) own_state = net.state_dict() for name, param in pret_net.items(): if name not in own_state: continue if isinstance(param, Parameter): param = param.data try: own_state[name].copy_(param)
def __init__( self, nhidden, n_object_cats, n_predicate_cats, n_vocab, voc_sign, max_word_length, MPS_iter, use_language_loss, object_loss_weight, predicate_loss_weight, dropout=False, use_kmeans_anchors=False, gate_width=128, nhidden_caption=256, nembedding=256, rnn_type="LSTM_normal", rnn_droptout=0.0, rnn_bias=False, use_region_reg=False, use_kernel=False, ): super(Hierarchical_Descriptive_Model, self).__init__( nhidden, n_object_cats, n_predicate_cats, n_vocab, voc_sign, max_word_length, MPS_iter, use_language_loss, object_loss_weight, predicate_loss_weight, dropout, use_kmeans_anchors, nhidden_caption, nembedding, rnn_type, use_region_reg, ) self.rpn = RPN(use_kmeans_anchors) self.roi_pool_object = RoIPool(7, 7, 1.0 / 16) self.roi_pool_phrase = RoIPool(7, 7, 1.0 / 16) self.roi_pool_region = RoIPool(7, 7, 1.0 / 16) self.fc6_obj = FC(512 * 7 * 7, nhidden, relu=True) self.fc7_obj = FC(nhidden, nhidden, relu=False) self.fc6_phrase = FC(512 * 7 * 7, nhidden, relu=True) self.fc7_phrase = FC(nhidden, nhidden, relu=False) self.fc6_region = FC(512 * 7 * 7, nhidden, relu=True) self.fc7_region = FC(nhidden, nhidden, relu=False) if MPS_iter == 0: self.mps = None else: self.mps = Hierarchical_Message_Passing_Structure( nhidden, dropout, gate_width=gate_width, use_kernel_function=use_kernel ) # the hierarchical message passing structure network.weights_normal_init(self.mps, 0.01) self.score_obj = FC(nhidden, self.n_classes_obj, relu=False) self.bbox_obj = FC(nhidden, self.n_classes_obj * 4, relu=False) self.score_pred = FC(nhidden, self.n_classes_pred, relu=False) if self.use_region_reg: self.bbox_region = FC(nhidden, 4, relu=False) network.weights_normal_init(self.bbox_region, 0.01) else: self.bbox_region = None self.objectiveness = FC(nhidden, 2, relu=False) if use_language_loss: self.caption_prediction = Language_Model( rnn_type=self.rnn_type, ntoken=self.n_vocab, nimg=self.nhidden, nhidden=self.nhidden_caption, nembed=self.nembedding, nlayers=2, nseq=self.max_word_length, voc_sign=self.voc_sign, bias=rnn_bias, dropout=rnn_droptout, ) else: self.caption_prediction = Language_Model( rnn_type=self.rnn_type, ntoken=self.n_vocab, nimg=1, nhidden=1, nembed=1, nlayers=1, nseq=1, voc_sign=self.voc_sign) # just to make the program run network.weights_normal_init(self.score_obj, 0.01) network.weights_normal_init(self.bbox_obj, 0.005) network.weights_normal_init(self.score_pred, 0.01) network.weights_normal_init(self.objectiveness, 0.01) self.objectiveness_loss = None
def __init__(self, nhidden, n_object_cats, n_predicate_cats, n_vocab, voc_sign, object_loss_weight, predicate_loss_weight, dropout=False, use_kmeans_anchors=False, use_kernel=False, disable_spatial_model=False, spatial_type='dual_mask', pool_type='roi_pooling', disable_iteration_model=False, iteration_type='cat_embed', idx2obj=None, idx2rel=None): super(Full_Net, self).__init__(nhidden, n_object_cats, n_predicate_cats, n_vocab, voc_sign, object_loss_weight, predicate_loss_weight, dropout, use_kmeans_anchors, disable_spatial_model, spatial_type, pool_type, disable_iteration_model, iteration_type) self.rpn = RPN(use_kmeans_anchors) self.roi_pool = RoIPool(7, 7, 1.0 / 16) if self.pool_type == 'roi_pooling': self.roi_pool_rel = RoIPool(7, 7, 1.0 / 16) if self.pool_type == 'spatial_attention': self.mask_roi_pool = MaskRoIPool(7, 7, 1.0 / 16) if self.pool_type == 'dual_roipooling': self.dualmask_roi_pool = DualMaskRoIPool(7, 7, 1.0 / 16) self.fc6 = FC(512 * 7 * 7, nhidden, relu=True) self.fc7 = FC(nhidden, nhidden, relu=True) self.fc6_r = FC(512 * 7 * 7, nhidden, relu=True) self.fc7_r = FC(nhidden, nhidden, relu=True) if not self.disable_spatial_model: if spatial_type == 'dual_mask': self.dm = DualMask(nhidden) if self.spatial_type == 'gaussian_model': self.gmm = GaussianMixtureModel(25488, nhidden) self.fc10_r = FC(2 * nhidden, nhidden, relu=True) network.weights_normal_init(self.fc10_r, 0.01) else: self.gsf = GeometricSpatialFeature(nhidden, dropout) TransEmbedding = False if TransEmbedding: self.TransE = TranslationEmbedding(nhidden, dropout) if not self.disable_iteration_model: if self.iteration_type == 'use_brnn': self.lstm = BrnnStructure(nhidden, dropout) if self.iteration_type == 'cat_embed': self.embed = Concat(nhidden, dropout) if self.iteration_type == 'iteration': self.iter = GraphicalModel(nhidden, dropout) else: self.fc8 = FC(2 * nhidden, nhidden, relu=True) self.fc9 = FC(nhidden, nhidden, relu=True) network.weights_normal_init(self.fc8, 0.01) network.weights_normal_init(self.fc9, 0.01) self.score = FC(nhidden, self.n_classes_obj, relu=False) self.score_r = FC(nhidden, self.n_classes_pred, relu=False) self.boundingbox = FC(nhidden, self.n_classes_obj * 4, relu=False) network.weights_normal_init(self.score, 0.01) network.weights_normal_init(self.score_r, 0.01) network.weights_normal_init(self.boundingbox, 0.005) self.bad_img_flag = False # for plotting of training self.idx2obj = idx2obj self.idx2rel = idx2rel self.trainImgCount = 0