def build_model(opt, start_from): if opt.att_model == 'topdown': model = AttModel.TopDownModel(opt) else: model = AttModel.Att2in2Model(opt) if start_from is not None: if opt.load_best_score == 1: model_path = os.path.join(start_from, 'model-best.pth') info_path = os.path.join(start_from, 'infos_' + opt.id + '-best.pkl') else: model_path = os.path.join(start_from, 'model.pth') info_path = os.path.join(start_from, 'infos_' + opt.id + '.pkl') # opt.learning_rate = saved_model_opt.learning_rate print('Loading the model weights, path is %s...' % (model_path)) model.load_state_dict(torch.load(model_path)) if opt.mGPUs: model = nn.DataParallel(model) if opt.cuda: model.cuda() return model
opt.fg_mask = torch.from_numpy(dataset.fg_mask).byte() opt.glove_fg = torch.from_numpy(dataset.glove_fg).float() opt.glove_clss = torch.from_numpy(dataset.glove_clss).float() opt.glove_w = torch.from_numpy(dataset.glove_w).float() opt.st2towidx = torch.from_numpy(dataset.st2towidx).long() opt.itow = dataset.itow opt.itod = dataset.itod opt.ltow = dataset.ltow opt.itoc = dataset.itoc if not opt.finetune_cnn: opt.fixed_block = 4 # if not finetune, fix all cnn block if opt.att_model == 'topdown': model = AttModel.TopDownModel(opt) elif opt.att_model == 'att2in2': model = AttModel.Att2in2Model(opt) tf_summary_writer = tf and tf.summary.FileWriter(opt.checkpoint_path) infos = {} histories = {} if opt.start_from is not None: if opt.load_best_score == 1: model_path = os.path.join(opt.start_from, 'model-best.pth') info_path = os.path.join(opt.start_from, 'infos_' + opt.id + '-best.pkl') else: model_path = os.path.join(opt.start_from, 'model.pth') info_path = os.path.join(opt.start_from, 'infos_' + opt.id + '.pkl')
def init_model(opt): cudnn.benchmark = True #################################################################################### # Build the Model #################################################################################### from misc.dataloader_coco import DataLoader dataset = DataLoader(opt, split='train') opt.vocab_size = dataset.vocab_size opt.detect_size = dataset.detect_size opt.seq_length = opt.seq_length opt.fg_size = dataset.fg_size opt.fg_mask = torch.from_numpy(dataset.fg_mask).byte() opt.glove_fg = torch.from_numpy(dataset.glove_fg).float() opt.glove_clss = torch.from_numpy(dataset.glove_clss).float() opt.glove_w = torch.from_numpy(dataset.glove_w).float() opt.st2towidx = torch.from_numpy(dataset.st2towidx).long() opt.itow = dataset.itow opt.itod = dataset.itod opt.ltow = dataset.ltow opt.itoc = dataset.itoc if not opt.finetune_cnn: opt.fixed_block = 4 # if not finetune, fix all cnn block if opt.att_model == 'topdown': model = AttModel.TopDownModel(opt) elif opt.att_model == 'att2in2': model = AttModel.Att2in2Model(opt) infos = {} histories = {} if opt.start_from is not None: if opt.load_best_score == 1: model_path = os.path.join(opt.start_from, 'model-best.pth') info_path = os.path.join(opt.start_from, 'infos_'+opt.id+'-best.pkl') else: model_path = os.path.join(opt.start_from, 'model.pth') info_path = os.path.join(opt.start_from, 'infos_'+opt.id+'.pkl') # open old infos and check if models are compatible with open(info_path, 'rb') as f: infos = pickle.load(f, encoding='latin1') saved_model_opt = infos['opt'] # opt.learning_rate = saved_model_opt.learning_rate print('Loading the model %s...' %(model_path)) model.load_state_dict(torch.load(model_path)) if os.path.isfile(os.path.join(opt.start_from, 'histories_'+opt.id+'.pkl')): with open(os.path.join(opt.start_from, 'histories_'+opt.id+'.pkl'), 'rb') as f: histories = pickle.load(f, encoding='latin1') if opt.decode_noc: model._reinit_word_weight(opt, dataset.ctoi, dataset.wtoi) best_val_score = infos.get('best_val_score', None) iteration = infos.get('iter', 0) start_epoch = infos.get('epoch', 0) val_result_history = histories.get('val_result_history', {}) loss_history = histories.get('loss_history', {}) lr_history = histories.get('lr_history', {}) ss_prob_history = histories.get('ss_prob_history', {}) if opt.cuda: model.cuda() params = [] # cnn_params = [] for key, value in dict(model.named_parameters()).items(): if value.requires_grad: if 'cnn' in key: params += [{'params':[value], 'lr':opt.cnn_learning_rate, 'weight_decay':opt.cnn_weight_decay, 'betas':(opt.cnn_optim_alpha, opt.cnn_optim_beta)}] else: params += [{'params':[value], 'lr':opt.learning_rate, 'weight_decay':opt.weight_decay, 'betas':(opt.optim_alpha, opt.optim_beta)}] print("Use %s as optmization method" %(opt.optim)) if opt.optim == 'sgd': optimizer = optim.SGD(params, momentum=0.9) elif opt.optim == 'adam': optimizer = optim.Adam(params) elif opt.optim == 'adamax': optimizer = optim.Adamax(params) obj_det_model = prep_model('../faster-rcnn.pytorch/data_fr/pretrained_model/faster_rcnn_coco.pth') return obj_det_model, model
opt.seq_length = opt.seq_length opt.glove_w = torch.from_numpy(dataset.glove_w).float() opt.glove_vg_cls = torch.from_numpy(dataset.glove_vg_cls).float() opt.glove_clss = torch.from_numpy(dataset.glove_clss).float() opt.wtoi = dataset.wtoi opt.itow = dataset.itow opt.itod = dataset.itod opt.ltow = dataset.ltow opt.itoc = dataset.itoc opt.wtol = dataset.wtol opt.wtod = dataset.wtod opt.vg_cls = dataset.vg_cls if opt.att_model == 'topdown': model = AttModel.TopDownModel(opt) elif opt.att_model == 'transformer': model = AttModel.TransformerModel(opt) infos = {} histories = {} if opt.start_from is not None: if opt.load_best_score == 1: model_path = os.path.join(opt.start_from, 'model-best.pth') info_path = os.path.join(opt.start_from, 'infos_' + opt.id + '-best.pkl') else: model_path = os.path.join(opt.start_from, 'model.pth') info_path = os.path.join(opt.start_from, 'infos_' + opt.id + '.pkl')
opt.glove_clss = torch.from_numpy(dataset.glove_clss).float() opt.wtoi = dataset.wtoi opt.itow = dataset.itow opt.itod = dataset.itod opt.ltow = dataset.ltow opt.itoc = dataset.itoc opt.vg_cls = dataset.vg_cls opt.wtol = dataset.wtol opt.wtod = dataset.wtod if not opt.finetune_cnn: opt.fixed_block = 4 # if not finetune, fix all cnn block if opt.att_model == 'topdown': model = AttModel.TopDownModel(opt) else: raise Exception('only support topdown!') infos = {} histories = {} if opt.start_from is not None: if opt.load_best_score == 1: model_path = os.path.join(opt.start_from, 'model-best.pth') info_path = os.path.join(opt.start_from, 'infos_' + opt.id + '-best.pkl') else: model_path = os.path.join(opt.start_from, 'model.pth') info_path = os.path.join(opt.start_from, 'infos_' + opt.id + '.pkl')
def eval_nbt(opt): cudnn.benchmark = True if opt.dataset == 'flickr30k': from misc.dataloader_flickr30k import DataLoader else: from misc.dataloader_coco import DataLoader #################################################################################### # Data Loader #################################################################################### dataset_val = DataLoader(opt, split=opt.val_split) dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=1, shuffle=False, num_workers=1) input_imgs = torch.FloatTensor(1) input_seqs = torch.LongTensor(1) input_ppls = torch.FloatTensor(1) gt_bboxs = torch.FloatTensor(1) mask_bboxs = torch.ByteTensor(1) gt_seqs = torch.LongTensor(1) input_num = torch.LongTensor(1) if opt.cuda: input_imgs = input_imgs.cuda() input_seqs = input_seqs.cuda() gt_seqs = gt_seqs.cuda() input_num = input_num.cuda() input_ppls = input_ppls.cuda() gt_bboxs = gt_bboxs.cuda() mask_bboxs = mask_bboxs.cuda() input_imgs = Variable(input_imgs) input_seqs = Variable(input_seqs) gt_seqs = Variable(gt_seqs) input_num = Variable(input_num) input_ppls = Variable(input_ppls) gt_bboxs = Variable(gt_bboxs) mask_bboxs = Variable(mask_bboxs) #################################################################################### # Build the Model #################################################################################### opt.vocab_size = dataset_val.vocab_size opt.detect_size = dataset_val.detect_size opt.seq_length = opt.seq_length opt.fg_size = dataset_val.fg_size opt.fg_mask = torch.from_numpy(dataset_val.fg_mask).byte() opt.glove_fg = torch.from_numpy(dataset_val.glove_fg).float() opt.glove_clss = torch.from_numpy(dataset_val.glove_clss).float() opt.glove_w = torch.from_numpy(dataset_val.glove_w).float() opt.st2towidx = torch.from_numpy(dataset_val.st2towidx).long() opt.itow = dataset_val.itow opt.itod = dataset_val.itod opt.ltow = dataset_val.ltow opt.itoc = dataset_val.itoc # choose the attention model if opt.att_model == 'topdown': model = AttModel.TopDownModel(opt) else: model = AttModel.Att2in2Model(opt) if opt.start_from is not None: if opt.load_best_score == 1: model_path = os.path.join(opt.start_from, 'model-best.pth') info_path = os.path.join(opt.start_from, 'infos_' + opt.id + '-best.pkl') else: model_path = os.path.join(opt.start_from, 'model.pth') info_path = os.path.join(opt.start_from, 'infos_' + opt.id + '.pkl') # opt.learning_rate = saved_model_opt.learning_rate print('Loading the model weights, path is %s...' % (model_path)) model.load_state_dict(torch.load(model_path)) if opt.mGPUs: model = nn.DataParallel(model) if opt.cuda: model.cuda() #################################################################################### # Evaluate the model #################################################################################### lang_stats, predictions = eval_NBT(opt, model, dataset_val, processing='eval') print('print the evaluation:') for k, v in lang_stats.items(): print('{}:{}'.format(k, v))