def main(**kwargs): exp_base_dir = coco_paths['exp_dir'] if kwargs['dataset'] == 'flickr': exp_base_dir = flickr_paths['exp_dir'] exp_const = ExpConstants(kwargs['exp_name'], exp_base_dir) exp_const.model_dir = os.path.join(exp_const.exp_dir, 'models') exp_const.seed = 0 exp_const.contextualize = not kwargs['no_context'] exp_const.random_lang = kwargs['random_lang'] data_const = FlickrDatasetConstants(kwargs['subset']) model_const = Constants() model_const.model_num = kwargs['model_num'] model_const.object_encoder = ObjectEncoderConstants() model_const.object_encoder.context_layer.output_attentions = True model_const.object_encoder.object_feature_dim = 2048 model_const.cap_encoder = CapEncoderConstants() model_const.cap_encoder.output_attentions = True model_const.cap_info_nce_layers = kwargs['cap_info_nce_layers'] if model_const.model_num == -100: filename = os.path.join(exp_const.exp_dir, f'results_val_best.json') results = io.load_json_object(filename) model_const.model_num = results['model_num'] print('Selected model num:', model_const.model_num) model_const.object_encoder_path = os.path.join( exp_const.model_dir, f'object_encoder_{model_const.model_num}') model_const.lang_sup_criterion_path = os.path.join( exp_const.model_dir, f'lang_sup_criterion_{model_const.model_num}') if exp_const.random_lang is True: model_const.cap_encoder_path = os.path.join( exp_const.model_dir, f'cap_encoder_{model_const.model_num}') eval_flickr_phrase_loc.main(exp_const, data_const, model_const)
def main(**kwargs): exp_base_dir = coco_paths['exp_dir'] if kwargs['dataset'] == 'flickr': exp_base_dir = flickr_paths['exp_dir'] exp_const = ExpConstants(kwargs['exp_name'], exp_base_dir) exp_const.log_dir = os.path.join(exp_const.exp_dir, 'logs') exp_const.model_dir = os.path.join(exp_const.exp_dir, 'models') exp_const.vis_dir = os.path.join(exp_const.exp_dir, 'vis') exp_const.dataset = kwargs['dataset'] exp_const.optimizer = 'Adam' exp_const.lr = kwargs['lr'] exp_const.momentum = None exp_const.num_epochs = 10 exp_const.log_step = 20 # Save models approx. twice every epoch exp_const.model_save_step = 400000 // (2 * kwargs['train_batch_size'] ) # 4000=400000/(2*50) if exp_const.dataset == 'flickr': exp_const.model_save_step = 150000 // (2 * kwargs['train_batch_size']) val_freq_factor = 2 if kwargs['val_frequently'] is True: val_freq_factor = 1 exp_const.val_step = val_freq_factor * exp_const.model_save_step # set to 1*model_save_step for plotting mi vs perf exp_const.num_val_samples = None exp_const.train_batch_size = kwargs['train_batch_size'] exp_const.val_batch_size = 20 exp_const.num_workers = 10 exp_const.seed = 0 exp_const.neg_noun_loss_wt = kwargs['neg_noun_loss_wt'] exp_const.self_sup_loss_wt = kwargs['self_sup_loss_wt'] exp_const.lang_sup_loss_wt = kwargs['lang_sup_loss_wt'] exp_const.contextualize = not kwargs['no_context'] exp_const.random_lang = kwargs['random_lang'] DatasetConstants = CocoDatasetConstants if exp_const.dataset == 'flickr': DatasetConstants = FlickrDatasetConstants data_const = { 'train': DatasetConstants('train'), 'val': DatasetConstants('val'), } model_const = Constants() model_const.model_num = kwargs['model_num'] model_const.object_encoder = ObjectEncoderConstants() model_const.object_encoder.context_layer.output_attentions = True model_const.object_encoder.object_feature_dim = 2048 model_const.cap_encoder = CapEncoderConstants() model_const.cap_encoder.output_attentions = True model_const.cap_info_nce_layers = kwargs['cap_info_nce_layers'] model_const.object_encoder_path = os.path.join( exp_const.model_dir, f'object_encoder_{model_const.model_num}') model_const.self_sup_criterion_path = os.path.join( exp_const.model_dir, f'self_sup_criterion_{model_const.model_num}') model_const.lang_sup_criterion_path = os.path.join( exp_const.model_dir, f'lang_sup_criterion_{model_const.model_num}') train(exp_const, data_const, model_const)
def main(exp_const, data_const, model_const): np.random.seed(exp_const.seed) torch.manual_seed(exp_const.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False print('Creating network ...') model = Constants() model.const = model_const model.object_encoder = ObjectEncoder(model.const.object_encoder) model.cap_encoder = CapEncoder(model.const.cap_encoder) o_dim = model.object_encoder.const.object_feature_dim if exp_const.contextualize == True: o_dim = model.object_encoder.const.context_layer.hidden_size model.lang_sup_criterion = create_cap_info_nce_criterion( o_dim, model.object_encoder.const.object_feature_dim, model.cap_encoder.model.config.hidden_size, model.cap_encoder.model.config.hidden_size // 2, model.const.cap_info_nce_layers) if model.const.model_num != -1: loaded_object_encoder = torch.load(model.const.object_encoder_path) print('Loaded model number:', loaded_object_encoder['step']) model.object_encoder.load_state_dict( loaded_object_encoder['state_dict']) model.lang_sup_criterion.load_state_dict( torch.load(model.const.lang_sup_criterion_path)['state_dict']) if exp_const.random_lang is True: model.cap_encoder.load_state_dict( torch.load(model.const.cap_encoder_path)['state_dict']) model.object_encoder.cuda() model.cap_encoder.cuda() model.lang_sup_criterion.cuda() print('Creating dataloader ...') dataset = FlickrDataset(data_const) with torch.no_grad(): results = eval_model(model, dataset, exp_const) filename = os.path.join( exp_const.exp_dir, f'results_{data_const.subset}_{model_const.model_num}.json') io.dump_json_object(results, filename)
def main(**kwargs): exp_base_dir = coco_paths['exp_dir'] if kwargs['train_dataset'] == 'flickr': exp_base_dir = flickr_paths['exp_dir'] exp_const = ExpConstants(kwargs['exp_name'], exp_base_dir) exp_const.log_dir = os.path.join(exp_const.exp_dir, 'logs') exp_const.model_dir = os.path.join(exp_const.exp_dir, 'models') exp_const.train_dataset = kwargs['train_dataset'] exp_const.vis_dataset = kwargs['vis_dataset'] exp_const.vis_dir = os.path.join(exp_const.exp_dir, f'vis/attention_{exp_const.vis_dataset}') exp_const.num_vis_samples = 50 exp_const.seed = 0 exp_const.contextualize = not kwargs['no_context'] DatasetConstants = CocoDatasetConstants if exp_const.vis_dataset == 'flickr': DatasetConstants = FlickrDatasetConstants data_const = DatasetConstants('val') if exp_const.vis_dataset == 'coco': data_const.image_dir = os.path.join(coco_paths['image_dir'], data_const.subset_image_dirname) data_const.read_neg_samples = False data_const.read_noun_adj_tokens = False model_const = Constants() model_const.model_num = kwargs['model_num'] model_const.object_encoder = ObjectEncoderConstants() model_const.object_encoder.object_feature_dim = 2048 model_const.cap_encoder = CapEncoderConstants() if model_const.model_num == -100: model_const.object_encoder_path = os.path.join(exp_const.model_dir, f'best_object_encoder') model_const.lang_sup_criterion_path = os.path.join( exp_const.model_dir, f'best_lang_sup_criterion') else: model_const.object_encoder_path = os.path.join( exp_const.model_dir, f'object_encoder_{model_const.model_num}') model_const.lang_sup_criterion_path = os.path.join( exp_const.model_dir, f'lang_sup_criterion_{model_const.model_num}') if exp_const.vis_dataset == 'coco': vis_att(exp_const, data_const, model_const) else: vis_att_flickr(exp_const, data_const, model_const)
def main(exp_const, data_const, model_const): np.random.seed(exp_const.seed) torch.manual_seed(exp_const.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False io.mkdir_if_not_exists(exp_const.exp_dir, recursive=True) io.mkdir_if_not_exists(exp_const.log_dir) io.mkdir_if_not_exists(exp_const.model_dir) io.mkdir_if_not_exists(exp_const.vis_dir) print('Creating network ...') model = Constants() model.const = model_const model.object_encoder = ObjectEncoder(model.const.object_encoder) model.cap_encoder = CapEncoder(model.const.cap_encoder) o_dim = model.object_encoder.const.object_feature_dim if exp_const.contextualize == True: o_dim = model.object_encoder.const.context_layer.hidden_size model.lang_sup_criterion = create_cap_info_nce_criterion( o_dim, model.object_encoder.const.object_feature_dim, model.cap_encoder.model.config.hidden_size, model.cap_encoder.model.config.hidden_size // 2) if model.const.model_num != -1: print('Loading model num', model.const.model_num, '...') loaded_object_encoder = torch.load(model.const.object_encoder_path) print(loaded_object_encoder['step']) model.object_encoder.load_state_dict( loaded_object_encoder['state_dict']) model.lang_sup_criterion.load_state_dict( torch.load(model.const.lang_sup_criterion_path)['state_dict']) model.object_encoder.cuda() model.cap_encoder.cuda() model.lang_sup_criterion.cuda() print('Creating dataloader ...') dataset = CocoDataset(data_const) dataloader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=1) eval_model(model, dataloader, exp_const)
def main(**kwargs): exp_base_dir = coco_paths['exp_dir'] if kwargs['dataset'] == 'flickr': exp_base_dir = flickr_paths['exp_dir'] exp_const = ExpConstants(kwargs['exp_name'], exp_base_dir) exp_const.model_dir = os.path.join(exp_const.exp_dir, 'models') exp_const.seed = 0 exp_const.contextualize = not kwargs['no_context'] exp_const.random_lang = kwargs['random_lang'] data_const = FlickrDatasetConstants(kwargs['subset']) model_const = Constants() model_const.object_encoder = ObjectEncoderConstants() model_const.object_encoder.context_layer.output_attentions = True model_const.object_encoder.object_feature_dim = 2048 model_const.cap_encoder = CapEncoderConstants() model_const.cap_encoder.output_attentions = True model_const.cap_info_nce_layers = kwargs['cap_info_nce_layers'] model_nums = find_all_model_numbers(exp_const.model_dir) for num in model_nums: continue if num <= 3000: continue model_const.model_num = num model_const.object_encoder_path = os.path.join( exp_const.model_dir, f'object_encoder_{model_const.model_num}') model_const.lang_sup_criterion_path = os.path.join( exp_const.model_dir, f'lang_sup_criterion_{model_const.model_num}') if exp_const.random_lang is True: model_const.cap_encoder_path = os.path.join( exp_const.model_dir, f'cap_encoder_{model_const.model_num}') filename = os.path.join(exp_const.exp_dir, f'results_{data_const.subset}_{num}.json') if os.path.exists(filename): print(io.load_json_object(filename)) continue eval_flickr_phrase_loc.main(exp_const, data_const, model_const) best_model_num = -1 best_pt_recall = 0 best_results = None for num in model_nums: filename = os.path.join(exp_const.exp_dir, f'results_{data_const.subset}_{num}.json') if not os.path.exists(filename): continue results = io.load_json_object(filename) results['model_num'] = num print(results) if results['pt_recall'] >= best_pt_recall: best_results = results best_pt_recall = results['pt_recall'] best_model_num = num print('-' * 80) best_results['model_num'] = best_model_num print(best_results) filename = os.path.join(exp_const.exp_dir, f'results_{data_const.subset}_best.json') io.dump_json_object(best_results, filename)
def main(exp_const,data_const,model_const): np.random.seed(exp_const.seed) torch.manual_seed(exp_const.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False io.mkdir_if_not_exists(exp_const.exp_dir,recursive=True) io.mkdir_if_not_exists(exp_const.log_dir) io.mkdir_if_not_exists(exp_const.model_dir) io.mkdir_if_not_exists(exp_const.vis_dir) tb_writer = SummaryWriter(log_dir=exp_const.log_dir) model_num = model_const.model_num save_constants({ f'exp_{model_num}': exp_const, f'data_train_{model_num}': data_const['train'], f'data_val_{model_num}': data_const['val'], f'model_{model_num}': model_const}, exp_const.exp_dir) print('Creating network ...') model = Constants() model.const = model_const model.object_encoder = ObjectEncoder(model.const.object_encoder) model.cap_encoder = CapEncoder(model.const.cap_encoder) if exp_const.random_lang is True: model.cap_encoder.random_init() c_dim = model.object_encoder.const.object_feature_dim if exp_const.contextualize==True: c_dim = model.object_encoder.const.context_layer.hidden_size model.self_sup_criterion = create_info_nce_criterion( model.object_encoder.const.object_feature_dim, c_dim, model.object_encoder.const.context_layer.hidden_size) o_dim = model.object_encoder.const.object_feature_dim if exp_const.contextualize==True: o_dim = model.object_encoder.const.context_layer.hidden_size model.lang_sup_criterion = create_cap_info_nce_criterion( o_dim, model.object_encoder.const.object_feature_dim, model.cap_encoder.model.config.hidden_size, model.cap_encoder.model.config.hidden_size//2, model.const.cap_info_nce_layers) if model.const.model_num != -1: model.object_encoder.load_state_dict( torch.load(model.const.object_encoder_path)['state_dict']) model.self_sup_criterion.load_state_dict( torch.load(model.const.self_sup_criterion_path)['state_dict']) model.lang_sup_criterion.load_state_dict( torch.load(model.const.lang_sup_criterion_path)['state_dict']) model.object_encoder.cuda() model.cap_encoder.cuda() model.self_sup_criterion.cuda() model.lang_sup_criterion.cuda() model.object_encoder.to_file( os.path.join(exp_const.exp_dir,'object_encoder.txt')) model.self_sup_criterion.to_file( os.path.join(exp_const.exp_dir,'self_supervised_criterion.txt')) model.lang_sup_criterion.to_file( os.path.join(exp_const.exp_dir,'lang_supervised_criterion.txt')) print('Creating dataloader ...') dataloaders = {} if exp_const.dataset=='coco': Dataset = CocoDataset elif exp_const.dataset=='flickr': Dataset = FlickrDataset else: msg = f'{exp_const.dataset} not implemented' raise NotImplementedError(msg) for mode, const in data_const.items(): dataset = Dataset(const) if mode=='train': shuffle=True batch_size=exp_const.train_batch_size else: shuffle=True batch_size=exp_const.val_batch_size dataloaders[mode] = DataLoader( dataset, batch_size=batch_size, shuffle=shuffle, num_workers=exp_const.num_workers) train_model(model,dataloaders,exp_const,tb_writer)