def build_model(args, vocab): if args.checkpoint_start_from is not None: checkpoint = torch.load(args.checkpoint_start_from) kwargs = checkpoint['model_kwargs'] model = Sg2ImModel(**kwargs) raw_state_dict = checkpoint['model_state'] state_dict = {} for k, v in raw_state_dict.items(): if k.startswith('module.'): k = k[7:] state_dict[k] = v model.load_state_dict(state_dict) else: kwargs = { 'vocab': vocab, 'image_size': args.image_size, 'embedding_dim': args.embedding_dim, 'gconv_dim': args.gconv_dim, 'gconv_hidden_dim': args.gconv_hidden_dim, 'gconv_num_layers': args.gconv_num_layers, 'mlp_normalization': args.mlp_normalization, 'refinement_dims': args.refinement_network_dims, 'normalization': args.normalization, 'activation': args.activation, 'mask_size': args.mask_size, 'layout_noise_dim': args.layout_noise_dim, } model = Sg2ImModel(**kwargs) return model, kwargs
def main(args): try: if not os.path.isfile(args.checkpoint): print('ERROR: Checkpoint file "%s" not found' % args.checkpoint) print( 'Maybe you forgot to download pretraind models? Try running:') print('bash scripts/download_models.sh') return if not os.path.isdir(args.output_dir): print('Output directory "%s" does not exist; creating it' % args.output_dir) os.makedirs(args.output_dir) if args.device == 'cpu': device = torch.device('cpu') elif args.device == 'gpu': device = torch.device('cuda:0') if not torch.cuda.is_available(): print('WARNING: CUDA not available; falling back to CPU') device = torch.device('cpu') # Load the model, with a bit of care in case there are no GPUs map_location = 'cpu' if device == torch.device('cpu') else None checkpoint = torch.load(args.checkpoint, map_location=map_location) model = Sg2ImModel(**checkpoint['model_kwargs']) model.load_state_dict(checkpoint['model_state']) model.eval() model.to(device) # Load the scene graphs scene_graphs = args.scene_graphs_json # with open(args.scene_graphs_json, 'r') as f: # scene_graphs = json.load(f) print(type(scene_graphs)) print('Loaded graph!') # Run the model forward with torch.no_grad(): imgs, boxes_pred, masks_pred, _ = model.forward_json(scene_graphs) imgs = imagenet_deprocess_batch(imgs) # Save the generated images for i in range(imgs.shape[0]): img_np = imgs[i].numpy().transpose(1, 2, 0) img_path = os.path.join(args.output_dir, 'img' + args.id + '.png') imwrite(img_path, img_np) print('Drawing now!') # Draw the scene graphs if args.draw_scene_graphs == 1: for i, sg in enumerate(scene_graphs): sg_img = vis.draw_scene_graph(sg['objects'], sg['relationships']) sg_img_path = os.path.join(args.output_dir, 'sg' + args.id + '.png' % i) imwrite(sg_img_path, sg_img) return True except (): return False
def main(): ''' calls fcns to load info, answer questions, evaluate ''' # Load the model, with a bit of care in case there are no GPUs print('loading scene gen model...') device = torch.device('cuda:0') map_location = 'cpu' if device == torch.device('cpu') else None checkpoint_theirs = torch.load('sg2im-models/vg64.pt', map_location=map_location) their_model = Sg2ImModel(**checkpoint_theirs['model_kwargs']) their_model.load_state_dict(checkpoint_theirs['model_state']) their_model.eval() their_model.to(device) checkpoint_mine = torch.load('vg_only.pt', map_location=map_location) my_model = Sg2ImModel(**checkpoint_mine['model_kwargs']) my_model.load_state_dict(checkpoint_mine['model_state']) my_model.eval() my_model.to(device) num_eval = 1000 print('getting', str(num_eval), ' q, a, images...') images_theirs = {} images_mine = {} ct = 0 for objs, triples, i, img in get_info(num_eval): print(ct) gen_img_theirs = generate_img(objs, triples, their_model) gen_img_mine = generate_img(objs, triples, my_model) gen_img_theirs = np.array(gen_img_theirs) gen_img_mine = np.array(gen_img_mine) gen_img_theirs = np.transpose(gen_img_theirs, (0, 2, 3, 1)) gen_img_mine = np.transpose(gen_img_mine, (0, 2, 3, 1)) images_theirs[ct] = (gen_img_theirs, i ) # should be of size 1,64,64,3 I think images_mine[ct] = (gen_img_mine, i) ct += 1 with open('their_gen_imgs.pickle', 'wb') as handle: pickle.dump(images_theirs, handle) with open('my_gen_imgs.pickle', 'wb') as handle: pickle.dump(images_mine, handle)
def main(args): if not os.path.isfile(args.checkpoint): print('ERROR: Checkpoint file "%s" not found' % args.checkpoint) print('Maybe you forgot to download pretraind models? Try running:') print('bash scripts/download_models.sh') return if not os.path.isdir(args.output_dir): print('Output directory "%s" does not exist; creating it' % args.output_dir) os.makedirs(args.output_dir) if args.device == 'cpu': device = torch.device('cpu') elif args.device == 'gpu': device = torch.device('cuda:0') if not torch.cuda.is_available(): print('WARNING: CUDA not available; falling back to CPU') device = torch.device('cpu') # Load the model, with a bit of care in case there are no GPUs map_location = 'cpu' if device == torch.device('cpu') else None checkpoint = torch.load(args.checkpoint, map_location=map_location) model = Sg2ImModel(**checkpoint['model_kwargs']) model.load_state_dict(checkpoint['model_state'], strict=False) model.eval() model.to(device) # Load the scene graphs with open(args.scene_graphs_json, 'r') as f: scene_graphs = json.load(f) # Run the model forward with torch.no_grad(): # imgs, boxes_pred, masks_pred, _ = model.forward_json(scene_graphs) imgs, boxes_pred, masks_pred, objs, layout, layout_boxes_t, layout_masks, obj_to_img, sg_context_pred, _, _ = model.forward_json(scene_graphs) imgs = imagenet_deprocess_batch(imgs) layout_boxes = layout_boxes_t.numpy() np_imgs = [] # Save the generated images import numpy as np for i in range(imgs.shape[0]): # img_np = imgs[i].numpy().transpose(1, 2, 0) img_np = (imgs[i].numpy().transpose(1, 2, 0) * 255.0).astype(np.uint8) img_path = os.path.join(args.output_dir, 'img%06d.png' % i) imwrite(img_path, img_np) np_imgs.append(img_np) # Draw the scene graphs if args.draw_scene_graphs == 1: for i, sg in enumerate(scene_graphs): sg_img = vis.draw_scene_graph(sg['objects'], sg['relationships']) sg_img_path = os.path.join(args.output_dir, 'sg%06d.png' % i) imwrite(sg_img_path, sg_img)
def build_model(args, checkpoint): kwargs = checkpoint['model_kwargs'] model = Sg2ImModel(**checkpoint['model_kwargs']) model.load_state_dict(checkpoint['model_state']) if args.model_mode == 'eval': model.eval() elif args.model_mode == 'train': model.train() model.image_size = args.image_size model.cuda() return model
def main(args): if args.device == 'cpu': device = torch.device('cpu') elif args.device == 'gpu': device = torch.device('cuda:0') if not torch.cuda.is_available(): print('WARNING: CUDA not available; falling back to CPU') device = torch.device('cpu') # Load the model, with a bit of care in case there are no GPUs map_location = 'cpu' if device == torch.device('cpu') else None checkpoint = torch.load(args.checkpoint, map_location=map_location) model = Sg2ImModel(**checkpoint['model_kwargs']) model.load_state_dict(checkpoint['model_state'], strict=False) model.eval() model.to(device) vocab, train_loader, val_loader = build_loaders(args) if not os.path.isdir(args.output_dir): os.mkdir(args.output_dir) print('Created %s' % args.output_dir) ## add code for validation visualization #logger = Logger(args.output_dir) logger = None t = 1 with timeit('forward', args.timing): print('checking on val') import json with open('vg_captions_500.json', 'w') as f: f.write('[') # check_model_predicate_debug(args, t, val_loader, model, logger=logger, log_tag='Validation', write_images=True) val_results = check_model(args, t, val_loader, model, device, logger=logger, log_tag='Validation', write_images=True) # rel_score, avg_iou = get_rel_score(args, t, val_loader, model) # print ('relation score: ', rel_score) # print ('average iou: ', avg_iou) # val_losses, val_avg_iou = val_results # print('val iou: ', val_avg_iou) with open('vg_captions_500.json', 'a') as f: f.write(']')
def build_model(args, vocab): kwargs = { 'vocab': vocab, 'image_size': args.image_size, 'embedding_dim': args.embedding_dim, 'gconv_dim': args.gconv_dim, 'gconv_hidden_dim': args.gconv_hidden_dim, 'gconv_num_layers': args.gconv_num_layers, 'mlp_normalization': args.mlp_normalization, 'refinement_dims': args.refinement_network_dims, 'normalization': args.normalization, 'activation': args.activation, 'mask_size': args.mask_size, 'layout_noise_dim': args.layout_noise_dim, } model = Sg2ImModel(**kwargs) return model, kwargs
def main(args): if args.device == 'cpu': device = torch.device('cpu') elif args.device == 'gpu': device = torch.device('cuda:0') if not torch.cuda.is_available(): print('WARNING: CUDA not available; falling back to CPU') device = torch.device('cpu') # Load the model, with a bit of care in case there are no GPUs map_location = 'cpu' if device == torch.device('cpu') else None checkpoint = torch.load(args.checkpoint, map_location=map_location) model = Sg2ImModel(**checkpoint['model_kwargs']) model.load_state_dict(checkpoint['model_state'], strict=False) model.eval() model.to(device) vocab, train_loader, val_loader = build_loaders(args) if not os.path.isdir(args.output_dir): os.mkdir(args.output_dir) print('Created %s' % args.output_dir) ## add code for validation visualization #logger = Logger(args.output_dir) logger = None t = 1 with timeit('forward', args.timing): print('checking on val') check_model_predicate_debug(args, t, val_loader, model, logger=logger, log_tag='Validation', write_images=True)
def main(): ''' calls fcns to load info, answer questions, evaluate ''' # Load the model, with a bit of care in case there are no GPUs print('loading scene gen model...') device = torch.device('cuda:0') map_location = 'cpu' if device == torch.device('cpu') else None checkpoint_theirs = torch.load('sg2im-models/vg64.pt', map_location=map_location) their_model = Sg2ImModel(**checkpoint_theirs['model_kwargs']) their_model.load_state_dict(checkpoint_theirs['model_state']) their_model.eval() their_model.to(device) checkpoint_mine = torch.load('vg_only.pt', map_location=map_location) my_model = Sg2ImModel(**checkpoint_mine['model_kwargs']) my_model.load_state_dict(checkpoint_mine['model_state']) my_model.eval() my_model.to(device) # load vqa model gt_imgs = [] #gen_imgs = [] questions = [] answers = [] question_tensors = [] answer_tensors = [] vqa_gt = [] vqa_gen_theirs = [] vqa_gen_mine = [] objs = [] triples = [] feature_tensors = [] their_feat_tensors = [] my_feat_tensors = [] i_s = [] print('getting q, a, images...') for gt_img, question_set, answer_set, obj, triple, question_tensor_set, answer_tensor_set, feature_tensor, their_feats, my_feats, i in get_info( num_eval=500): #1000 gt_imgs.append(gt_img) questions.append(question_set) answers.append(answer_set) objs.append(obj) triples.append(triple) question_tensors.append(question_tensor_set) answer_tensors.append(answer_tensor_set) feature_tensors.append(feature_tensor) their_feat_tensors.append(their_feats) my_feat_tensors.append(my_feats) i_s.append(i) ''' vocab_answers = [] vocab_words = [] for i in range(len(questions)): for j in range(len(questions[i])): vocab_answers.append(answers[i][j]) words = questions[i][j].split() for word in words: vocab_words.append(word) print(len(vocab_answers), len(vocab_words)) vqa_model = MutanAtt(options['model'], vocab_words, vocab_answers)#trainset.vocab_words(), trainset.vocab_answers()) ''' print('loading vqa model...') vqa_model = MutanAtt(options['model'], trainset.vocab_words(), trainset.vocab_answers()) path_ckpt_model = 'vqa_pytorch/vqa/mutan_att_trainval/ckpt_model.pth.tar' model_state = torch.load(path_ckpt_model) vqa_model.load_state_dict(model_state) vqa_model.eval() for i in range(len(gt_imgs)): gt_img = gt_imgs[i] question_set = questions[i] question_set_tensors = question_tensors[i] obj = objs[i] feature_tensor = feature_tensors[i] triple = triples[i] their_feats = their_feat_tensors[i] my_feats = my_feat_tensors[i] #img_paths.append(img_path) # answer, gt vqa_answer_from_gt = inference(vqa_model, feature_tensor, question_set_tensors) #print(vqa_answer_from_gt) vqa_gt.append(vqa_answer_from_gt) vqa_theirs = inference(vqa_model, their_feats, question_set_tensors) vqa_gen_theirs.append(vqa_theirs) vqa_mine = inference(vqa_model, my_feats, question_set_tensors) vqa_gen_mine.append(vqa_mine) vg_eval(answer_tensors, vqa_gt, vqa_gen_theirs, vqa_gen_mine, answers, i_s)
def main(args): if not os.path.isfile(args.checkpoint): print('ERROR: Checkpoint file "%s" not found' % args.checkpoint) print('Maybe you forgot to download pretraind models? Try running:') print('bash scripts/download_models.sh') return if not os.path.isdir(args.output_dir): print('Output directory "%s" does not exist; creating it' % args.output_dir) os.makedirs(args.output_dir) if args.device == 'cpu': device = torch.device('cpu') elif args.device == 'gpu': device = torch.device('cuda:0') if not torch.cuda.is_available(): print('WARNING: CUDA not available; falling back to CPU') device = torch.device('cpu') # Load the model, with a bit of care in case there are no GPUs map_location = 'cpu' if device == torch.device('cpu') else None checkpoint = torch.load(args.checkpoint, map_location=map_location) model = Sg2ImModel(**checkpoint['model_kwargs']) model.load_state_dict(checkpoint['model_state']) model.eval() model.to(device) SCENE_GRAPH_DIR = args.scene_graph_dir scene_graphs = [] # Load the scene graphs for filename in os.listdir(SCENE_GRAPH_DIR): print("opening file: {}".format(filename)) with open(os.path.join(SCENE_GRAPH_DIR, filename), 'r') as f: sg = json.load(f) scene_graphs.append(sg) for sg_idx, sg in enumerate(scene_graphs): # Run the model forward with torch.no_grad(): try: imgs, boxes_pred, masks_pred, _ = model.forward_json(sg) except ValueError as err: print("ValueError: {}".format(err)) continue imgs = imagenet_deprocess_batch(imgs) # Save the generated images for i in range(imgs.shape[0]): img_np = imgs[i].numpy().transpose(1, 2, 0) img_path = os.path.join(args.output_dir, 'img%06d.png' % sg_idx) imwrite(img_path, img_np) # Draw the scene graphs if args.draw_scene_graphs == 1: for i, sg_ in enumerate(sg): sg_img = vis.draw_scene_graph(sg_['objects'], sg_['relationships']) sg_img_path = os.path.join(args.output_dir, 'sg%06d.png' % sg_idx) imwrite(sg_img_path, sg_img)
def main(): ''' calls fcns to load info, answer questions, evaluate ''' # Load the model, with a bit of care in case there are no GPUs print('loading scene gen model...') device = torch.device('cuda:0') map_location = 'cpu' if device == torch.device('cpu') else None checkpoint_theirs = torch.load('sg2im-models/vg64.pt', map_location=map_location) their_model = Sg2ImModel(**checkpoint_theirs['model_kwargs']) their_model.load_state_dict(checkpoint_theirs['model_state']) their_model.eval() their_model.to(device) checkpoint_mine = torch.load('vg_only.pt', map_location=map_location) my_model = Sg2ImModel(**checkpoint_mine['model_kwargs']) my_model.load_state_dict(checkpoint_mine['model_state']) my_model.eval() my_model.to(device) gt_imgs = [] #gen_imgs = [] questions = [] answers = [] question_tensors = [] answer_tensors = [] vqa_gt = [] vqa_gen_theirs = [] vqa_gen_mine = [] objs = [] triples = [] feature_tensors = [] their_feat_tensors = [] my_feat_tensors = [] my_imgs = [] their_imgs = [] print('getting q, a, images...') #i_s = [225, 227, 209, 1556] # good vs theirs #i_s = [204, 207, 1715, 675, 225] # good vs gt #i_s = [198, 221, 547, 207, 859] # bad vs them #i_s = [209, 215, 214, 1144, 217] # bad vs gt #i_s = [200, 218, 203, 226, 651, 526, 216, 483, 543, 837, 635, 1003] i_s = [651, 226, 218, 209, 1003, 837] for gt_img, question_set, answer_set, obj, triple, question_tensor_set, answer_tensor_set, feature_tensor, their_feats, my_feats in get_info( i_s, their_model): gt_imgs.append(gt_img) questions.append(question_set) answers.append(answer_set) objs.append(obj) triples.append(triple) question_tensors.append(question_tensor_set) answer_tensors.append(answer_tensor_set) feature_tensors.append(feature_tensor) their_feat_tensors.append(their_feats) my_feat_tensors.append(my_feats) print('loading vqa model...') vqa_model = MutanAtt(options['model'], trainset.vocab_words(), trainset.vocab_answers()) path_ckpt_model = 'vqa_pytorch/vqa/mutan_att_trainval/ckpt_model.pth.tar' model_state = torch.load(path_ckpt_model) vqa_model.load_state_dict(model_state) vqa_model.eval() for i in range(len(gt_imgs)): gt_img = gt_imgs[i] question_set_tensors = question_tensors[i] obj = objs[i] feature_tensor = feature_tensors[i] triple = triples[i] their_feats = their_feat_tensors[i] my_feats = my_feat_tensors[i] # answer, gt vqa_answer_from_gt = inference(vqa_model, feature_tensor, question_set_tensors) vqa_gt.append(vqa_answer_from_gt) vqa_theirs = inference(vqa_model, their_feats, question_set_tensors) vqa_gen_theirs.append(vqa_theirs) vqa_mine = inference(vqa_model, my_feats, question_set_tensors) vqa_gen_mine.append(vqa_mine) # generating imgs gen_img_theirs = generate_img(obj, triple, their_model) gen_img_mine = generate_img(obj, triple, my_model) gen_img_theirs = np.array(gen_img_theirs) gen_img_mine = np.array(gen_img_mine) my_imgs.append(gen_img_theirs) their_imgs.append(gen_img_mine) vg_eval(answer_tensors, vqa_gt, vqa_gen_theirs, vqa_gen_mine, answers, i_s, questions, gt_imgs, my_imgs, their_imgs, objs, triples)