def run_model(args, checkpoint, output_dir, loader=None): vocab = checkpoint['model_kwargs']['vocab'] model = build_model(args, checkpoint) if loader is None: loader = build_loader(args, checkpoint) img_dir = makedir(output_dir, 'images') graph_dir = makedir(output_dir, 'graphs', args.save_graphs) gt_img_dir = makedir(output_dir, 'images_gt', args.save_gt_imgs) data_path = os.path.join(output_dir, 'data.pt') data = { 'vocab': vocab, 'objs': [], 'masks_pred': [], 'boxes_pred': [], 'masks_gt': [], 'boxes_gt': [], 'filenames': [], } if args.model == 'bm_FH': FH_dir_train = args.FH_dir_train FH_dir_val = args.FH_dir_val if args.model == 'bm_FHrec': FH_dir_train = args.FHrec_dir_train FH_dir_val = args.FHrec_dir_val if args.model == 'bm_FH64': FH_dir_train = args.FH64_dir_train FH_dir_val = args.FH64_dir_val if args.model == 'bm_FHrec64': FH_dir_train = args.FHrec64_dir_train FH_dir_val = args.FHrec64_dir_val FH_objs_train, FH_edges_train, IDs_train = torch.load(FH_dir_train) FH_objs_val, FH_edges_val, IDs_val = torch.load(FH_dir_val) IDs_train = torch.tensor(IDs_train) IDs_val = torch.tensor(IDs_val) if args.which_data == 'train': IDs = IDs_train FH_objs = FH_objs_train FH_edges = FH_edges_train else: IDs = IDs_val FH_objs = FH_objs_val FH_edges = FH_edges_val img_idx = 0 for batch in loader: masks = None if len(batch) == 6: imgs, objs, boxes, triples, obj_to_img, triple_to_img = [x.cuda() for x in batch] elif len(batch) == 7: imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img = [x.cuda() for x in batch] imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img, imgs_ids = [x.cuda() for x in batch] # get FH by images within a batch fh_obj, fh_edge = [],[] for i in range(imgs_ids.shape[0]): idd = ((IDs == imgs_ids[i].item()).nonzero()) fh_obj_i = FH_objs[idd] fh_obj.append(fh_obj_i) fh_edge_i = FH_edges[idd] fh_edge.append(fh_edge_i) fh_obj = torch.cat(fh_obj) fh_edge = torch.cat(fh_edge) imgs_gt = imagenet_deprocess_batch(imgs) boxes_gt = None masks_gt = None if args.use_gt_boxes: boxes_gt = boxes if args.use_gt_masks: masks_gt = masks # Run the model with predicted masks model_out = model(objs, triples, fh_obj, fh_edge, obj_to_img, boxes_gt=boxes_gt, masks_gt=masks_gt) # model_out = model(objs, triples, obj_to_img, # boxes_gt=boxes_gt, masks_gt=masks_gt) imgs_pred, boxes_pred, masks_pred, _ = model_out imgs_pred = imagenet_deprocess_batch(imgs_pred) obj_data = [objs, boxes_pred, masks_pred] _, obj_data = split_graph_batch(triples, obj_data, obj_to_img, triple_to_img) objs, boxes_pred, masks_pred = obj_data obj_data_gt = [boxes.data] if masks is not None: obj_data_gt.append(masks.data) triples, obj_data_gt = split_graph_batch(triples, obj_data_gt, obj_to_img, triple_to_img) boxes_gt, masks_gt = obj_data_gt[0], None if masks is not None: masks_gt = obj_data_gt[1] for i in range(imgs_pred.size(0)): img_filename = '%04d.png' % img_idx if args.save_gt_imgs: img_gt = imgs_gt[i].numpy().transpose(1, 2, 0) img_gt_path = os.path.join(gt_img_dir, img_filename) imsave(img_gt_path, img_gt) img_pred = imgs_pred[i] img_pred_np = imgs_pred[i].numpy().transpose(1, 2, 0) img_path = os.path.join(img_dir, img_filename) imsave(img_path, img_pred_np) data['objs'].append(objs[i].cpu().clone()) data['masks_pred'].append(masks_pred[i].cpu().clone()) data['boxes_pred'].append(boxes_pred[i].cpu().clone()) data['boxes_gt'].append(boxes_gt[i].cpu().clone()) data['filenames'].append(img_filename) cur_masks_gt = None if masks_gt is not None: cur_masks_gt = masks_gt[i].cpu().clone() data['masks_gt'].append(cur_masks_gt) if args.save_graphs: graph_img = draw_scene_graph(vocab, objs[i], triples[i]) graph_path = os.path.join(graph_dir, img_filename) imsave(graph_path, graph_img) img_idx += 1 torch.save(data, data_path) print('Saved %d images' % img_idx)
def check_model(args, t, loader, model): float_dtype = torch.cuda.FloatTensor long_dtype = torch.cuda.LongTensor num_samples = 0 all_losses = defaultdict(list) total_iou = 0 total_boxes = 0 with torch.no_grad(): for batch in loader: batch = [tensor.cuda() for tensor in batch] masks = None if len(batch) == 6: imgs, objs, boxes, triples, obj_to_img, triple_to_img = batch elif len(batch) == 7: imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img = batch predicates = triples[:, 1] # Run the model as it has been run during training model_masks = masks model_out = model(objs, triples, obj_to_img, boxes_gt=boxes, masks_gt=model_masks) imgs_pred, boxes_pred, masks_pred, predicate_scores = model_out skip_pixel_loss = False total_loss, losses = calculate_model_losses( args, skip_pixel_loss, model, imgs, imgs_pred, boxes, boxes_pred, masks, masks_pred, predicates, predicate_scores) total_iou += jaccard(boxes_pred, boxes) total_boxes += boxes_pred.size(0) for loss_name, loss_val in losses.items(): all_losses[loss_name].append(loss_val) num_samples += imgs.size(0) if num_samples >= args.num_val_samples: break samples = {} samples['gt_img'] = imgs model_out = model(objs, triples, obj_to_img, boxes_gt=boxes, masks_gt=masks) samples['gt_box_gt_mask'] = model_out[0] model_out = model(objs, triples, obj_to_img, boxes_gt=boxes) samples['gt_box_pred_mask'] = model_out[0] model_out = model(objs, triples, obj_to_img) samples['pred_box_pred_mask'] = model_out[0] for k, v in samples.items(): samples[k] = imagenet_deprocess_batch(v) mean_losses = {k: np.mean(v) for k, v in all_losses.items()} avg_iou = total_iou / total_boxes masks_to_store = masks if masks_to_store is not None: masks_to_store = masks_to_store.data.cpu().clone() masks_pred_to_store = masks_pred if masks_pred_to_store is not None: masks_pred_to_store = masks_pred_to_store.data.cpu().clone() batch_data = { 'objs': objs.detach().cpu().clone(), 'boxes_gt': boxes.detach().cpu().clone(), 'masks_gt': masks_to_store, 'triples': triples.detach().cpu().clone(), 'obj_to_img': obj_to_img.detach().cpu().clone(), 'triple_to_img': triple_to_img.detach().cpu().clone(), 'boxes_pred': boxes_pred.detach().cpu().clone(), 'masks_pred': masks_pred_to_store } out = [mean_losses, samples, batch_data, avg_iou] return tuple(out)
def run_model(args, checkpoint, output_dir, fn, loader=None): vocab = checkpoint['model_kwargs']['vocab'] print(vocab.keys()) print(vocab['pred_name_to_idx']) dic_pred = vocab[ 'pred_name_to_idx'] #{'inside': 5, 'left of': 1, '__in_image__': 0, 'right of': 2, 'below': 4, 'above': 3, 'surrounding': 6} model = build_model(args, checkpoint) if loader is None: loader = build_loader(args, checkpoint) data = { 'vocab': vocab, 'objs': [], 'masks_pred': [], 'boxes_pred': [], 'masks_gt': [], 'boxes_gt': [], 'filenames': [], } which_data = args.which_data makedir(output_dir, which_data) FN_path = os.path.join(output_dir, args.which_data, args.model + '_FN_%s.csv' % fn) f = open(FN_path, 'w') # f = open('err_bm_FH_%s.csv'%fn, 'w') with f: fieldnames = [ 'imageID', 'left of', 'right of', 'above', 'below', 'inside', 'surrounding' ] writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() class2idx = { "left of": 0, "right of": 1, "above": 2, "below": 3, "inside": 4, "surrounding": 5 } idx2class = {v: k for k, v in class2idx.items()} # save_dir = makedir(save_dir,fn) count_edge_gt = [] count_edge_pre = [] img_idx = 0 ibatch = 0 for batch in loader: ibatch += 1 masks = None # if len(batch) == 6: # imgs, objs, boxes, triples, obj_to_img, triple_to_img = [x.cuda() for x in batch] # elif len(batch) == 7: # imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img = [x.cuda() for x in batch] imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img, imgs_ids = [ x.cuda() for x in batch ] imgs_gt = imagenet_deprocess_batch(imgs) boxes_gt = None masks_gt = None if args.use_gt_boxes: boxes_gt = boxes if args.use_gt_masks: masks_gt = masks # Run the model with predicted masks model_out = model(objs, triples, obj_to_img, boxes_gt=boxes_gt, masks_gt=masks_gt) boxes_pred, masks_pred = model_out obj_data = [objs, boxes_pred, masks_pred] _, obj_data = split_graph_batch(triples, obj_data, obj_to_img, triple_to_img) objs, boxes_pred, masks_pred = obj_data obj_data_gt = [boxes.data] if masks is not None: obj_data_gt.append(masks.data) triples, obj_data_gt = split_graph_batch(triples, obj_data_gt, obj_to_img, triple_to_img) boxes_gt, masks_gt = obj_data_gt[0], None if masks is not None: masks_gt = obj_data_gt[1] for i in range(imgs_gt.size(0)): # for edges triples_i = triples[i] img_id = imgs_ids[i].item() i_edge_gt, i_edge_pre = [], [] for k in range(triples_i.shape[0]): if (triples_i[k][1] != 0): idx_s, idx_o = triples_i[k][0], triples_i[k][2] bbxs_of_img = boxes_gt[i] masks_of_img = masks_gt[i] box_s, box_o = bbxs_of_img[idx_s], bbxs_of_img[idx_o] mask_s, mask_o = masks_of_img[idx_s], masks_of_img[idx_o] edge_gt = get_relationship(box_s, box_o, mask_s, mask_o) count_edge_gt.append(edge_gt) i_edge_gt.append(edge_gt) bbxs_of_img = boxes_pred[i] masks_of_img = masks_pred[i] box_s, box_o = bbxs_of_img[idx_s], bbxs_of_img[idx_o] mask_s, mask_o = masks_of_img[idx_s], masks_of_img[idx_o] mask_s, mask_o = torch.round(mask_s).type( torch.long), torch.round(mask_o).type(torch.long) edge_pre = get_relationship(box_s, box_o, mask_s, mask_o) count_edge_pre.append(edge_pre) i_edge_pre.append(edge_pre) edges_items = list(set(i_edge_gt + i_edge_pre)) edges_items = [idx2class[x - 1] for x in edges_items] dictOfclass = { i: edges_items[i] for i in range(0, len(edges_items)) } cmi = confusion_matrix(i_edge_pre, i_edge_gt) # axis y predicted axis x true total = cmi.sum(axis=1) numacc = [cmi[i][i] for i in range(cmi.shape[0])] numFP = total - numacc cmi = cmi / cmi.sum(axis=0) acci = [cmi[i][i] for i in range(cmi.shape[0])] rowFP, rowAcc = {'imageID': img_id}, {'imageID': img_id} for q in range(len(dictOfclass)): rowFP.update({dictOfclass[q]: numFP[q]}) rowAcc.update({dictOfclass[q]: acci[q]}) img_idx += 1 FN_path = os.path.join(output_dir, args.which_data, args.model + '_FN_%s.csv' % fn) f = open(FN_path, 'a') with f: # 'imageID', 'left of', 'right of', 'above', 'below', 'inside', 'surounding' writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writerow(rowFP) print('%d images' % img_idx) # break print('gt', len(count_edge_gt)) print('pre', len(count_edge_pre)) cm = confusion_matrix(count_edge_pre, count_edge_gt) cm = cm / cm.sum(axis=0) confusion_matrix_df = pd.DataFrame( cm) #.rename(columns=idx2class, index=idx2class) print(confusion_matrix_df) label = {'a': '5%', 'b': '10%', 'c': '20%', 'd': '50%', 'e': '100%'} acc = [ confusion_matrix_df[i][i] for i in range(confusion_matrix_df.shape[0]) ] print('acc', acc) edgenames = [ 'left of', 'right of', 'above', 'below', 'inside', 'surrounding' ] accTotal = {'model': label[fn]} for q in range(0, len(acc)): accTotal.update({edgenames[q]: acc[q]}) err_path = os.path.join(output_dir, args.which_data, args.model + '_acc.csv') fil = open(err_path, 'a') with fil: fieldnames = [ 'model', 'left of', 'right of', 'above', 'below', 'inside', 'surrounding' ] writer = csv.DictWriter(fil, fieldnames=fieldnames) writer.writerow(accTotal) print('over')
def main(args): #print(args) check_args(args) float_dtype = torch.cuda.FloatTensor long_dtype = torch.cuda.LongTensor vocab, train_loader, val_loader = build_loaders(args) model, model_kwargs = build_model(args, vocab) model.type(float_dtype) model = model.cuda() layoutgen = LayoutGenerator(args.batch_size, args.max_objects_per_image + 1, 184).cuda() if (not os.path.exists(args.output_folder)): os.makedirs(args.output_folder) if (args.checkpoint_start_from is not None): model_path = args.checkpoint_start_from checkpoint = torch.load(model_path) model.load_state_dict(checkpoint['model_state']) layoutgen.load_state_dict(checkpoint['layout_gen']) num_samples = 0 for batchnum, batch in enumerate(tqdm(val_loader)): imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img, combined, all_num_objs = batch imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img, combined, all_num_objs = imgs.cuda( ), objs.cuda(), boxes.cuda( ), masks.cuda(), triples.cuda(), obj_to_img.cuda(), triple_to_img.cuda( ), combined.cuda(), all_num_objs.cuda() for k in range(2): zlist = [] for i in range(args.batch_size): geo_z = torch.normal(0, 1, size=(args.max_objects_per_image + 1, 4)) z = torch.FloatTensor(geo_z) zlist.append(z) zlist = torch.stack(zlist).cuda() zlist = torch.cat((zlist, combined[:, :, 4:]), dim=2) feature_vectors, logit_boxes = layoutgen(zlist.cuda()) generated_boxes = 1 / (1 + torch.exp(-1 * logit_boxes)) new_gen_boxes = torch.empty((0, 4)).cuda() new_feature_vecs = torch.empty((0, args.embedding_dim)).cuda() #print(generated_boxes[0,:,:4]) for kb in range(args.batch_size): new_gen_boxes = torch.cat([ new_gen_boxes, torch.squeeze(generated_boxes[kb, :all_num_objs[kb], :4]) ], dim=0) new_feature_vecs = torch.cat([ new_feature_vecs, torch.squeeze(feature_vectors[kb, :all_num_objs[kb], :]) ], dim=0) boxes_pred = new_gen_boxes triples = None imgs_pred = model(new_feature_vecs, new_gen_boxes, triples, obj_to_img) imgs_pred = imagenet_deprocess_batch(imgs_pred) for idx in range(imgs_pred.shape[0]): current_img = imgs_pred[idx, :, :, :].numpy().transpose( 1, 2, 0) cv2.imwrite( os.path.join( args.output_folder, str(batchnum) + '_' + str(k) + '_' + str(idx) + '.jpg'), current_img) num_samples += 1 if (num_samples + 1 >= args.num_sample_imgs): break
def run_model(args, checkpoint, output_dir, fn, loader=None): vocab = checkpoint['model_kwargs']['vocab'] print(vocab.keys()) print(vocab['pred_name_to_idx']) dic_pred = vocab[ 'pred_name_to_idx'] #{'inside': 5, 'left of': 1, '__in_image__': 0, 'right of': 2, 'below': 4, 'above': 3, 'surrounding': 6} model = build_model(args, checkpoint) if loader is None: loader = build_loader(args, checkpoint) data = { 'vocab': vocab, 'objs': [], 'masks_pred': [], 'boxes_pred': [], 'masks_gt': [], 'boxes_gt': [], 'filenames': [], } which_data = args.which_data # save_dir = makedir(output_dir, args.which_data) FN_path = os.path.join(output_dir, args.which_data, args.model + '_FN_%s.csv' % fn) f = open(FN_path, 'w') # f = open('err_bm_FH_%s.csv'%fn, 'w') with f: fieldnames = [ 'imageID', 'left of', 'right of', 'above', 'below', 'inside', 'surrounding' ] writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() if args.model == 'bm_FH': FH_dir_train = args.FH_dir_train FH_dir_val = args.FH_dir_val if args.model == 'bm_FHrec': FH_dir_train = args.FHrec_dir_train FH_dir_val = args.FHrec_dir_val FH_objs_train, FH_edges_train, IDs_train = torch.load( FH_dir_train) #torch.load('dataFH/train_FH.npy') FH_objs_val, FH_edges_val, IDs_val = torch.load( FH_dir_val) #torch.load('dataFH/val_FH.npy') IDs_train = torch.tensor(IDs_train) IDs_val = torch.tensor(IDs_val) if args.which_data == 'train': IDs = IDs_train FH_objs = FH_objs_train FH_edges = FH_edges_train else: IDs = IDs_val FH_objs = FH_objs_val FH_edges = FH_edges_val class2idx = { "left of": 0, "right of": 1, "above": 2, "below": 3, "inside": 4, "surrounding": 5 } idx2class = {v: k for k, v in class2idx.items()} count_edge_gt = [] count_edge_pre = [] img_idx = 0 ibatch = 0 for batch in loader: ibatch += 1 masks = None imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img, imgs_ids = [ x.cuda() for x in batch ] # get FH by images within a batch fh_obj, fh_edge = [], [] for i in range(imgs_ids.shape[0]): idd = ((IDs == imgs_ids[i].item()).nonzero()) fh_obj_i = FH_objs[idd] fh_obj.append(fh_obj_i) fh_edge_i = FH_edges[idd] fh_edge.append(fh_edge_i) fh_obj = torch.cat(fh_obj) fh_edge = torch.cat(fh_edge) imgs_gt = imagenet_deprocess_batch(imgs) boxes_gt = None masks_gt = None if args.use_gt_boxes: boxes_gt = boxes if args.use_gt_masks: masks_gt = masks # Run the model with predicted masks model_out = model(objs, triples, fh_obj, fh_edge, obj_to_img, boxes_gt=boxes_gt, masks_gt=masks_gt) boxes_pred, masks_pred = model_out obj_data = [objs, boxes_pred, masks_pred] _, obj_data = split_graph_batch(triples, obj_data, obj_to_img, triple_to_img) objs, boxes_pred, masks_pred = obj_data obj_data_gt = [boxes.data] if masks is not None: obj_data_gt.append(masks.data) triples, obj_data_gt = split_graph_batch(triples, obj_data_gt, obj_to_img, triple_to_img) boxes_gt, masks_gt = obj_data_gt[0], None if masks is not None: masks_gt = obj_data_gt[1] for i in range(imgs_gt.size(0)): # for edges triples_i = triples[i] img_id = imgs_ids[i].item() i_edge_gt, i_edge_pre = [], [] for k in range(triples_i.shape[0]): if (triples_i[k][1] != 0): idx_s, idx_o = triples_i[k][0], triples_i[k][2] bbxs_of_img = boxes_gt[i] masks_of_img = masks_gt[i] box_s, box_o = bbxs_of_img[idx_s], bbxs_of_img[idx_o] mask_s, mask_o = masks_of_img[idx_s], masks_of_img[idx_o] edge_gt = get_relationship(box_s, box_o, mask_s, mask_o) count_edge_gt.append(edge_gt) i_edge_gt.append(edge_gt) bbxs_of_img = boxes_pred[i] masks_of_img = masks_pred[i] box_s, box_o = bbxs_of_img[idx_s], bbxs_of_img[idx_o] mask_s, mask_o = masks_of_img[idx_s], masks_of_img[idx_o] mask_s, mask_o = torch.round(mask_s).type( torch.long), torch.round(mask_o).type(torch.long) edge_pre = get_relationship(box_s, box_o, mask_s, mask_o) count_edge_pre.append(edge_pre) i_edge_pre.append(edge_pre) edges_items = list(set(i_edge_gt + i_edge_pre)) # print(i_edge_pre, i_edge_gt,edges_items) edges_items = [idx2class[x - 1] for x in edges_items] # print(edges_items) dictOfclass = { i: edges_items[i] for i in range(0, len(edges_items)) } # edges_items = {k:idx2class[edges_items[k]-1] for k in range(len(edges_items))} # print(edges_items) cmi = confusion_matrix(i_edge_pre, i_edge_gt) # axis y predicted axis x true # confusion_matrix_df = pd.DataFrame(cmi).rename(columns=dictOfclass, index = dictOfclass)#idx2class, index=idx2class) # print(confusion_matrix_df) # FP # print(cmi) total = cmi.sum(axis=1) numacc = [cmi[i][i] for i in range(cmi.shape[0])] numFP = total - numacc # acc cmi = cmi / cmi.sum(axis=0) # print(cmi) acci = [cmi[i][i] for i in range(cmi.shape[0])] rowFP, rowAcc = {'imageID': img_id}, {'imageID': img_id} for q in range(len(dictOfclass)): rowFP.update({dictOfclass[q]: numFP[q]}) rowAcc.update({dictOfclass[q]: acci[q]}) # print(rowi) img_idx += 1 FN_path = os.path.join(output_dir, args.which_data, args.model + '_FN_%s.csv' % fn) f = open(FN_path, 'a') # f = open('err_bm_FH_%s.csv'%fn, 'a') with f: # 'imageID', 'left of', 'right of', 'above', 'below', 'inside', 'surounding' writer = csv.DictWriter(f, fieldnames=fieldnames) # writer = csv.writer(f) writer.writerow(rowFP) # writer.writerow(rowAcc) # writer.writerow([img_id]+acci) print('%d images' % img_idx) # break print('gt', len(count_edge_gt)) print('pre', len(count_edge_pre)) cm = confusion_matrix(count_edge_pre, count_edge_gt) cm = cm / cm.sum(axis=0) confusion_matrix_df = pd.DataFrame( cm) #.rename(columns=idx2class, index=idx2class) print(confusion_matrix_df) label = {'a': '5%', 'b': '10%', 'c': '20%', 'd': '50%', 'e': '100%'} acc = [ confusion_matrix_df[i][i] for i in range(confusion_matrix_df.shape[0]) ] print('acc', acc) # np.savetxt('scores.csv', acc, delimiter=',', fmt='%s') # fn = 'a' # filename = 'accuracy_' + args.model + '_' + fn + '_' + '.txt' # err_path = os.path.join(output_dir,args.which_data, filename) # np.savetxt(err_path, acc, delimiter=',', fmt='%s')s # np.savetxt(err_path, [p for p in zip(class2idx.keys(), acc)], delimiter=',', fmt='%s') # print(['bm_FH_'+fn]+acc) # save in total edgenames = [ 'left of', 'right of', 'above', 'below', 'inside', 'surrounding' ] accTotal = {'model': label[fn]} for q in range(0, len(acc)): accTotal.update({edgenames[q]: acc[q]}) err_path = os.path.join(output_dir, args.which_data, args.model + '_acc.csv') fil = open(err_path, 'a') with fil: fieldnames = [ 'model', 'left of', 'right of', 'above', 'below', 'inside', 'surrounding' ] writer = csv.DictWriter(fil, fieldnames=fieldnames) writer.writerow(accTotal) print('over')
def main(args): print(args) check_args(args) float_dtype = torch.cuda.FloatTensor long_dtype = torch.cuda.LongTensor vocab, train_loader, val_loader = build_loaders(args) print("vocab") print(vocab) obj_dict = vocab['object_name_to_idx'] print("vocab['object_name_to_idx']") print(vocab['object_name_to_idx']) model, model_kwargs = build_model(args, vocab) model.type(float_dtype) print(model) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) obj_discriminator, d_obj_kwargs = build_obj_discriminator(args, vocab) img_discriminator, d_img_kwargs = build_img_discriminator(args, vocab) gan_g_loss, gan_d_loss = get_gan_losses(args.gan_loss_type) if obj_discriminator is not None: obj_discriminator.type(float_dtype) obj_discriminator.train() print(obj_discriminator) optimizer_d_obj = torch.optim.Adam(obj_discriminator.parameters(), lr=args.learning_rate) if img_discriminator is not None: img_discriminator.type(float_dtype) img_discriminator.train() print(img_discriminator) optimizer_d_img = torch.optim.Adam(img_discriminator.parameters(), lr=args.learning_rate) restore_path = './' # if args.restore_from_checkpoint: # restore_path = '%s_with_model.pt' % args.checkpoint_name # restore_path = os.path.join(args.output_dir, restore_path) restore_path = './sg2im-models/vg128.pt' if restore_path is not None and os.path.isfile(restore_path): print('Restoring from checkpoint:') print(restore_path) checkpoint = torch.load(restore_path) model.load_state_dict(checkpoint['model_state']) #optimizer.load_state_dict(checkpoint['optim_state']) # if obj_discriminator is not None: # obj_discriminator.load_state_dict(checkpoint['d_obj_state']) # optimizer_d_obj.load_state_dict(checkpoint['d_obj_optim_state']) # if img_discriminator is not None: # img_discriminator.load_state_dict(checkpoint['d_img_state']) # optimizer_d_img.load_state_dict(checkpoint['d_img_optim_state']) # t = checkpoint['counters']['t'] # if 0 <= args.eval_mode_after <= t: # model.eval() # else: # model.train() # epoch = checkpoint['counters']['epoch'] # else: # t, epoch = 0, 0 # checkpoint = { # 'args': args.__dict__, # 'vocab': vocab, # 'model_kwargs': model_kwargs, # 'd_obj_kwargs': d_obj_kwargs, # 'd_img_kwargs': d_img_kwargs, # 'losses_ts': [], # 'losses': defaultdict(list), # 'd_losses': defaultdict(list), # 'checkpoint_ts': [], # 'train_batch_data': [], # 'train_samples': [], # 'train_iou': [], # 'val_batch_data': [], # 'val_samples': [], # 'val_losses': defaultdict(list), # 'val_iou': [], # 'norm_d': [], # 'norm_g': [], # 'counters': { # 't': None, # 'epoch': None, # }, # 'model_state': None, 'model_best_state': None, 'optim_state': None, # 'd_obj_state': None, 'd_obj_best_state': None, 'd_obj_optim_state': None, # 'd_img_state': None, 'd_img_best_state': None, 'd_img_optim_state': None, # 'best_t': [], # } # while True: # if t >= args.num_iterations: # break # epoch += 1 # print('Starting epoch %d' % epoch) # for batch in train_loader: # if t == args.eval_mode_after: # print('switching to eval mode') # model.eval() # optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) # t += 1 # batch = [tensor.cuda() for tensor in batch] # masks = None # if len(batch) == 6: # imgs, objs, boxes, triples, obj_to_img, triple_to_img = batch # elif len(batch) == 7: # imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img = batch # else: # assert False # predicates = triples[:, 1] # with timeit('forward', args.timing): # model_boxes = boxes # model_masks = masks # model_out = model(objs, triples, obj_to_img, # boxes_gt=model_boxes, masks_gt=model_masks) # imgs_pred, boxes_pred, masks_pred, predicate_scores = model_out # with timeit('loss', args.timing): # # Skip the pixel loss if using GT boxes # skip_pixel_loss = (model_boxes is None) # total_loss, losses = calculate_model_losses( # args, skip_pixel_loss, model, imgs, imgs_pred, # boxes, boxes_pred, masks, masks_pred, # predicates, predicate_scores) # if obj_discriminator is not None: # scores_fake, ac_loss = obj_discriminator(imgs_pred, objs, boxes, obj_to_img) # total_loss = add_loss(total_loss, ac_loss, losses, 'ac_loss', # args.ac_loss_weight) # weight = args.discriminator_loss_weight * args.d_obj_weight # total_loss = add_loss(total_loss, gan_g_loss(scores_fake), losses, # 'g_gan_obj_loss', weight) # if img_discriminator is not None: # scores_fake = img_discriminator(imgs_pred) # weight = args.discriminator_loss_weight * args.d_img_weight # total_loss = add_loss(total_loss, gan_g_loss(scores_fake), losses, # 'g_gan_img_loss', weight) # losses['total_loss'] = total_loss.item() # if not math.isfinite(losses['total_loss']): # print('WARNING: Got loss = NaN, not backpropping') # continue # optimizer.zero_grad() # with timeit('backward', args.timing): # total_loss.backward() # optimizer.step() # total_loss_d = None # ac_loss_real = None # ac_loss_fake = None # d_losses = {} # if obj_discriminator is not None: # d_obj_losses = LossManager() # imgs_fake = imgs_pred.detach() # scores_fake, ac_loss_fake = obj_discriminator(imgs_fake, objs, boxes, obj_to_img) # scores_real, ac_loss_real = obj_discriminator(imgs, objs, boxes, obj_to_img) # d_obj_gan_loss = gan_d_loss(scores_real, scores_fake) # d_obj_losses.add_loss(d_obj_gan_loss, 'd_obj_gan_loss') # d_obj_losses.add_loss(ac_loss_real, 'd_ac_loss_real') # d_obj_losses.add_loss(ac_loss_fake, 'd_ac_loss_fake') # optimizer_d_obj.zero_grad() # d_obj_losses.total_loss.backward() # optimizer_d_obj.step() # if img_discriminator is not None: # d_img_losses = LossManager() # imgs_fake = imgs_pred.detach() # scores_fake = img_discriminator(imgs_fake) # scores_real = img_discriminator(imgs) # d_img_gan_loss = gan_d_loss(scores_real, scores_fake) # d_img_losses.add_loss(d_img_gan_loss, 'd_img_gan_loss') # optimizer_d_img.zero_grad() # d_img_losses.total_loss.backward() # optimizer_d_img.step() # if t % args.print_every == 0: # print('t = %d / %d' % (t, args.num_iterations)) # for name, val in losses.items(): # print(' G [%s]: %.4f' % (name, val)) # checkpoint['losses'][name].append(val) # checkpoint['losses_ts'].append(t) # if obj_discriminator is not None: # for name, val in d_obj_losses.items(): # print(' D_obj [%s]: %.4f' % (name, val)) # checkpoint['d_losses'][name].append(val) # if img_discriminator is not None: # for name, val in d_img_losses.items(): # print(' D_img [%s]: %.4f' % (name, val)) # checkpoint['d_losses'][name].append(val) # if t % args.checkpoint_every == 0: # print('checking on train') # train_results = check_model(args, t, train_loader, model) # t_losses, t_samples, t_batch_data, t_avg_iou = train_results # checkpoint['train_batch_data'].append(t_batch_data) # checkpoint['train_samples'].append(t_samples) # checkpoint['checkpoint_ts'].append(t) # checkpoint['train_iou'].append(t_avg_iou) # print('checking on val') # val_results = check_model(args, t, val_loader, model) # val_losses, val_samples, val_batch_data, val_avg_iou = val_results # checkpoint['val_samples'].append(val_samples) # checkpoint['val_batch_data'].append(val_batch_data) # checkpoint['val_iou'].append(val_avg_iou) # print('train iou: ', t_avg_iou) # print('val iou: ', val_avg_iou) # for k, v in val_losses.items(): # checkpoint['val_losses'][k].append(v) # checkpoint['model_state'] = model.state_dict() # if obj_discriminator is not None: # checkpoint['d_obj_state'] = obj_discriminator.state_dict() # checkpoint['d_obj_optim_state'] = optimizer_d_obj.state_dict() # if img_discriminator is not None: # checkpoint['d_img_state'] = img_discriminator.state_dict() # checkpoint['d_img_optim_state'] = optimizer_d_img.state_dict() # checkpoint['optim_state'] = optimizer.state_dict() # checkpoint['counters']['t'] = t # checkpoint['counters']['epoch'] = epoch # checkpoint_path = os.path.join(args.output_dir, # '%s_with_model.pt' % args.checkpoint_name) # print('Saving checkpoint to ', checkpoint_path) # torch.save(checkpoint, checkpoint_path) # # Save another checkpoint without any model or optim state # checkpoint_path = os.path.join(args.output_dir, # '%s_no_model.pt' % args.checkpoint_name) # key_blacklist = ['model_state', 'optim_state', 'model_best_state', # 'd_obj_state', 'd_obj_optim_state', 'd_obj_best_state', # 'd_img_state', 'd_img_optim_state', 'd_img_best_state'] # small_checkpoint = {} # for k, v in checkpoint.items(): # if k not in key_blacklist: # small_checkpoint[k] = v # torch.save(small_checkpoint, checkpoint_path) batch_index = 0 print('switching to eval mode') model.eval() for val_batch in val_loader: batch_index += 1 print("val_batch_index: ", batch_index) val_batch = [tensor.cuda() for tensor in val_batch] val_masks = None if len(val_batch) == 6: val_imgs, val_objs, val_boxes, val_triples, val_obj_to_img, val_triple_to_img = val_batch elif len(val_batch) == 7: val_imgs, val_objs, val_boxes, val_masks, val_triples, val_obj_to_img, val_triple_to_img = val_batch else: assert False predicates = val_triples[:, 1] with timeit('forward', args.timing): val_model_boxes = val_boxes val_model_out = model(val_objs, val_triples, val_obj_to_img, boxes_gt=val_model_boxes, masks_gt=val_masks) val_imgs_pred, val_boxes_pred, val_masks_pred, val_predicate_scores = val_model_out val_imgs = imagenet_deprocess_batch(val_imgs_pred) output_img_dir = "./output_batch" if not os.path.exists(output_img_dir): os.makedirs(output_img_dir) print("label: ") print(val_objs.shape[0]) print(val_objs) object_name_list = [] for label_index in range(val_objs.shape[0]): object_index = val_objs[label_index].cpu().data.numpy() object_name = list(obj_dict.keys())[list( obj_dict.values()).index(object_index)] object_name_list.append(object_name) #print("val_objs[label_index]", val_objs[label_index].cpu().data.numpy()) #print("object_name: ", object_name) print(object_name_list) print("val_obj_to_img") print(val_obj_to_img) print("gt_boxes: ", val_model_boxes.shape) print(val_model_boxes) # Save the generated images for img_index in range(val_imgs.shape[0]): img_np = val_imgs[img_index].numpy().transpose(1, 2, 0) img_path = os.path.join( output_img_dir, 'img_{}_{}.png'.format('%04d' % batch_index, '%03d' % img_index)) cv2.imwrite(img_path, img_np) #print("val_imgs_pred.shape: ", val_imgs_pred.shape) raise Exception("hahha, gonna save val_imgs_pred")
def run_model(args, checkpoint, output_dir, fn, loader=None): vocab = checkpoint['model_kwargs']['vocab'] print(vocab.keys()) print(vocab['pred_name_to_idx']) dic_pred = vocab[ 'pred_name_to_idx'] #{'inside': 5, 'left of': 1, '__in_image__': 0, 'right of': 2, 'below': 4, 'above': 3, 'surrounding': 6} model = build_model(args, checkpoint) if loader is None: loader = build_loader(args, checkpoint) data = { 'vocab': vocab, 'objs': [], 'masks_pred': [], 'boxes_pred': [], 'masks_gt': [], 'boxes_gt': [], 'filenames': [], } which_data = args.which_data save_dir = makedir(output_dir, which_data) FH_objs_train, FH_edges_train, IDs_train = torch.load( args.FH_dir_train) #torch.load('dataFH/train_FH.npy') FH_objs_val, FH_edges_val, IDs_val = torch.load( args.FH_dir_val) #torch.load('dataFH/val_FH.npy') IDs_train = torch.tensor(IDs_train) IDs_val = torch.tensor(IDs_val) if args.which_data == 'train': IDs = IDs_train FH_objs = FH_objs_train FH_edges = FH_edges_train else: IDs = IDs_val FH_objs = FH_objs_val FH_edges = FH_edges_val count_edge_gt = [] count_edge_pre = [] img_idx = 0 ibatch = 0 for batch in loader: ibatch += 1 masks = None imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img, imgs_ids = [ x.cuda() for x in batch ] # get FH by images within a batch fh_obj, fh_edge = [], [] for i in range(imgs_ids.shape[0]): idd = ((IDs == imgs_ids[i].item()).nonzero()) fh_obj_i = FH_objs[idd] fh_obj.append(fh_obj_i) fh_edge_i = FH_edges[idd] fh_edge.append(fh_edge_i) fh_obj = torch.cat(fh_obj) fh_edge = torch.cat(fh_edge) imgs_gt = imagenet_deprocess_batch(imgs) boxes_gt = None masks_gt = None if args.use_gt_boxes: boxes_gt = boxes if args.use_gt_masks: masks_gt = masks # Run the model with predicted masks model_out = model(objs, triples, fh_obj, fh_edge, obj_to_img, boxes_gt=boxes_gt, masks_gt=masks_gt) boxes_pred, masks_pred = model_out obj_data = [objs, boxes_pred, masks_pred] _, obj_data = split_graph_batch(triples, obj_data, obj_to_img, triple_to_img) objs, boxes_pred, masks_pred = obj_data obj_data_gt = [boxes.data] if masks is not None: obj_data_gt.append(masks.data) triples, obj_data_gt = split_graph_batch(triples, obj_data_gt, obj_to_img, triple_to_img) boxes_gt, masks_gt = obj_data_gt[0], None if masks is not None: masks_gt = obj_data_gt[1] for i in range(imgs_gt.size(0)): # for edges triples_i = triples[i] for k in range(triples_i.shape[0]): if (triples_i[k][1] != 0): idx_s, idx_o = triples_i[k][0], triples_i[k][2] bbxs_of_img = boxes_gt[i] masks_of_img = masks_gt[i] box_s, box_o = bbxs_of_img[idx_s], bbxs_of_img[idx_o] mask_s, mask_o = masks_of_img[idx_s], masks_of_img[idx_o] edge_gt = get_relationship(box_s, box_o, mask_s, mask_o) count_edge_gt.append(edge_gt) # print('gt:', triples_i[k][1].item(), edge_gt) bbxs_of_img = boxes_pred[i] masks_of_img = masks_pred[i] box_s, box_o = bbxs_of_img[idx_s], bbxs_of_img[idx_o] mask_s, mask_o = masks_of_img[idx_s], masks_of_img[idx_o] mask_s, mask_o = torch.round(mask_s).type( torch.long), torch.round(mask_o).type(torch.long) edge_pre = get_relationship(box_s, box_o, mask_s, mask_o) count_edge_pre.append(edge_pre) img_idx += 1 print('%d images' % img_idx) class2idx = { "left of": 0, "right of": 1, "above": 2, "below": 3, "inside": 4, "surrounding": 5 } idx2class = {v: k for k, v in class2idx.items()} # break print('gt', len(count_edge_gt)) print('pre', len(count_edge_pre)) cm = confusion_matrix(count_edge_pre, count_edge_gt) # y, x cm = cm / cm.sum(axis=0) confusion_matrix_df = pd.DataFrame(cm).rename(columns=idx2class, index=idx2class) label = {'a': '5%', 'b': '10%', 'c': '20%', 'd': '50%', 'e': '100%'} ax = sns.heatmap(confusion_matrix_df, annot=True, cmap='Blues_r', vmin=0, vmax=1) title = 'M1_bm_FH_' + args.which_data + '_' + label[fn] ax.set(title=title, ylabel='Predicted label', xlabel='True label') fig = ax.get_figure() filename = 'CM1_bm_FH_' + fn + '_' + args.which_data + '.png' CM_path = os.path.join(output_dir, args.which_data, filename) fig.savefig(CM_path) fig.clf() print('over')
def run_model(args, checkpoint, output_dir, fn, loader=None): vocab = checkpoint['model_kwargs']['vocab'] model = build_model(args, checkpoint) if loader is None: loader = build_loader(args, checkpoint) data = { 'vocab': vocab, 'objs': [], 'masks_pred': [], 'boxes_pred': [], 'masks_gt': [], 'boxes_gt': [], 'filenames': [], } which_data = args.which_data save_dir = makedir(output_dir, which_data) # save_dir = makedir(save_dir,fn) img_idx = 0 ibatch = 0 for batch in loader: ibatch += 1 masks = None if len(batch) == 6: imgs, objs, boxes, triples, obj_to_img, triple_to_img = [ x.cuda() for x in batch ] elif len(batch) == 7: imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img = [ x.cuda() for x in batch ] # imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img, imgs_ids = [x.cuda() for x in batch] # imgs_print = imagenet_deprocess_batch(imgs) # grid = torchvision.utils.make_grid(imgs_print) # writer.add_image('img/real', grid, ibatch-1) imgs_gt = imagenet_deprocess_batch(imgs) boxes_gt = None masks_gt = None if args.use_gt_boxes: boxes_gt = boxes if args.use_gt_masks: masks_gt = masks # Run the model with predicted masks model_out = model(objs, triples, obj_to_img, boxes_gt=boxes_gt, masks_gt=masks_gt) # boxes_pred, masks_pred = model_out imgs_pred, boxes_pred, masks_pred, _ = model_out obj_data = [objs, boxes_pred, masks_pred] _, obj_data = split_graph_batch(triples, obj_data, obj_to_img, triple_to_img) objs, boxes_pred, masks_pred = obj_data obj_data_gt = [boxes.data] if masks is not None: obj_data_gt.append(masks.data) triples, obj_data_gt = split_graph_batch(triples, obj_data_gt, obj_to_img, triple_to_img) boxes_gt, masks_gt = obj_data_gt[0], None if masks is not None: masks_gt = obj_data_gt[1] imgs_bbx = torch.zeros(imgs_gt.size(), dtype=torch.uint8) imgs_bbx_pre = torch.zeros(imgs_gt.size(), dtype=torch.uint8) white_bbx_gt, white_bbx_gtb = torch.zeros( imgs_gt.size(), dtype=torch.uint8), torch.zeros(imgs_gt.size(), dtype=torch.uint8) white_bbx_pre, white_bbx_preb = torch.zeros( imgs_gt.size(), dtype=torch.uint8), torch.zeros(imgs_gt.size(), dtype=torch.uint8) for i in range(imgs_gt.size(0)): black_gt = np.zeros([args.image_size[0], args.image_size[1], 3]) black_gtb = np.zeros([args.image_size[0], args.image_size[1], 3]) img = np.copy(imgs_gt[i].numpy().transpose(1, 2, 0)) layer = np.zeros(list(args.image_size)) masks_of_img = masks_gt[i] bbxs_of_img = boxes_gt[i] num_of_objs = bbxs_of_img.size(0) for j in range(num_of_objs - 1): # color = tuple(np.random.randint(256, size=3)) color = colors[j % len(colors)] mask = masks_of_img[j].cpu().clone().numpy() mask = np.round(mask) bbx = (bbxs_of_img[j].cpu().numpy() * args.image_size[0]).astype(int) bbx = np.clip(bbx, 0, args.image_size[0] - 1) wbbx = bbx[2] - bbx[0] hbbx = bbx[3] - bbx[1] if not wbbx > 0: wbbx = 1 print('gt', wbbx, hbbx) if not hbbx > 0: hbbx = 1 print('gt', wbbx, hbbx) maskPIL = Image.fromarray(mask.astype(np.uint8)) maskPIL = maskPIL.resize((wbbx, hbbx), resample=Image.BILINEAR) layer[bbx[1]:bbx[3], bbx[0]:bbx[2]] = np.array(maskPIL) img = apply_mask(img, layer, color) masked_imgPIL = Image.fromarray(img.astype(np.uint8)) draw = ImageDraw.Draw(masked_imgPIL) draw.rectangle(bbx.tolist(), width=1, outline=color) img = np.array(masked_imgPIL) black_gt = apply_mask(black_gt, layer, color) masked_blackPIL = Image.fromarray(black_gt.astype(np.uint8)) draw2 = ImageDraw.Draw(masked_blackPIL) draw2.rectangle(bbx.tolist(), width=1, outline=color) black_gt = np.array(masked_blackPIL) blackPIL = Image.fromarray(black_gtb.astype(np.uint8)) draw2b = ImageDraw.Draw(blackPIL) draw2b.rectangle(bbx.tolist(), width=1, outline=color) black_gtb = np.array(blackPIL) imgs_bbx[i] = torchvision.transforms.ToTensor()( masked_imgPIL) * 255 white_bbx_gt[i] = torchvision.transforms.ToTensor()( masked_blackPIL) * 255 white_bbx_gtb[i] = torchvision.transforms.ToTensor()( blackPIL) * 255 black_gt = np.zeros([args.image_size[0], args.image_size[1], 3]) black_gtb = np.zeros([args.image_size[0], args.image_size[1], 3]) img = np.copy(imgs_gt[i].numpy().transpose(1, 2, 0)) layer = np.zeros(list(args.image_size)) bbxs_of_img = boxes_pred[i] masks_of_img = masks_pred[i] num_of_objs = bbxs_of_img.size(0) for j in range(num_of_objs - 1): color = colors[j % len(colors)] mask = masks_of_img[j].cpu().clone().numpy() mask = np.round(mask) bbx = (bbxs_of_img[j].cpu().numpy() * args.image_size[0]).astype(int) bbx = np.clip(bbx, 0, args.image_size[0] - 1) wbbx = bbx[2] - bbx[0] hbbx = bbx[3] - bbx[1] if not wbbx > 0: wbbx = 1 print('pred', wbbx, hbbx) if not hbbx > 0: hbbx = 1 print('pred', wbbx, hbbx) maskPIL = Image.fromarray(mask.astype(np.uint8)) maskPIL = maskPIL.resize((wbbx, hbbx), resample=Image.BILINEAR) # print('wwbx,hbbx:',wbbx, hbbx, maskPIL2.size, bbx) layer[bbx[1]:bbx[3], bbx[0]:bbx[2]] = np.array(maskPIL) img = apply_mask(img, layer, color) masked_imgPIL = Image.fromarray(img.astype(np.uint8)) draw = ImageDraw.Draw(masked_imgPIL) draw.rectangle(bbx.tolist(), width=1, outline=color) img = np.array(masked_imgPIL) black_gt = apply_mask(black_gt, layer, color) masked_blackPIL = Image.fromarray(black_gt.astype(np.uint8)) draw2 = ImageDraw.Draw(masked_blackPIL) draw2.rectangle(bbx.tolist(), width=1, outline=color) black_gt = np.array(masked_blackPIL) blackPIL = Image.fromarray(black_gtb.astype(np.uint8)) draw2b = ImageDraw.Draw(blackPIL) draw2b.rectangle(bbx.tolist(), width=1, outline=color) black_gtb = np.array(blackPIL) imgs_bbx_pre[i] = torchvision.transforms.ToTensor()( masked_imgPIL) * 255 white_bbx_pre[i] = torchvision.transforms.ToTensor()( masked_blackPIL) * 255 white_bbx_preb[i] = torchvision.transforms.ToTensor()( blackPIL) * 255 img_idx += 1 imgs_orig = imagenet_deprocess_batch(imgs) grid1 = torchvision.utils.make_grid(imgs_orig) toSave = grid1 # GT # imgs_grid_GT = imgs_bbx.byte() # grid2 = torchvision.utils.make_grid(imgs_grid_GT) # toSave = torch.cat((grid1,grid2),1) white_grid_GT = white_bbx_gt.byte() grid3 = torchvision.utils.make_grid(white_grid_GT) toSave = torch.cat((toSave, grid3), 1) white_grid_GTb = white_bbx_gtb.byte() grid3b = torchvision.utils.make_grid(white_grid_GTb) toSave = torch.cat((toSave, grid3b), 1) # PRE imgs_pred = imagenet_deprocess_batch(imgs_pred) gridx = torchvision.utils.make_grid(imgs_pred) toSave = torch.cat((toSave, gridx), 1) # imgs_grid_pre = imgs_bbx_pre.byte() # grid4 = torchvision.utils.make_grid(imgs_grid_pre) # toSave = torch.cat((toSave, grid4),1) white_grid_pre = white_bbx_pre.byte() grid5 = torchvision.utils.make_grid(white_grid_pre) toSave = torch.cat((toSave, grid5), 1) white_grid_preb = white_bbx_preb.byte() grid5b = torchvision.utils.make_grid(white_grid_preb) toSave = torch.cat((toSave, grid5b), 1) toSavePIL = torchvision.transforms.ToPILImage()(toSave) save_dir = 'output' fn = 'M1re' grids_path = os.path.join(save_dir, '%d' % img_idx + fn + '.png') # grids_path = os.path.join(save_dir, '%d'%img_id + fn + '.png') toSavePIL.save(grids_path) print('Saved %d images' % img_idx)
def run_model(args, checkpoint, output_dir, loader=None): vocab = checkpoint['model_kwargs']['vocab'] model = build_model(args, checkpoint) if loader is None: loader = build_loader(args, checkpoint) img_dir = makedir(output_dir, 'images') graph_dir = makedir(output_dir, 'graphs', args.save_graphs) gt_img_dir = makedir(output_dir, 'images_gt', args.save_gt_imgs) data_path = os.path.join(output_dir, 'data.pt') data = { 'vocab': vocab, 'objs': [], 'masks_pred': [], 'boxes_pred': [], 'masks_gt': [], 'boxes_gt': [], 'filenames': [], } img_idx = 0 for batch in loader: masks = None if len(batch) == 6: imgs, objs, boxes, triples, obj_to_img, triple_to_img = [ x.cuda() for x in batch ] elif len(batch) == 7: imgs, objs, boxes, masks, triples, obj_to_img, triple_to_img = [ x.cuda() for x in batch ] imgs_gt = imagenet_deprocess_batch(imgs) boxes_gt = None masks_gt = None if args.use_gt_boxes: boxes_gt = boxes if args.use_gt_masks: masks_gt = masks # Run the model with predicted masks model_out = model(objs, triples, obj_to_img, boxes_gt=boxes_gt, masks_gt=masks_gt) imgs_pred, boxes_pred, masks_pred, _ = model_out imgs_pred = imagenet_deprocess_batch(imgs_pred) obj_data = [objs, boxes_pred, masks_pred] _, obj_data = split_graph_batch(triples, obj_data, obj_to_img, triple_to_img) objs, boxes_pred, masks_pred = obj_data obj_data_gt = [boxes.data] if masks is not None: obj_data_gt.append(masks.data) triples, obj_data_gt = split_graph_batch(triples, obj_data_gt, obj_to_img, triple_to_img) boxes_gt, masks_gt = obj_data_gt[0], None if masks is not None: masks_gt = obj_data_gt[1] for i in range(imgs_pred.size(0)): img_filename = '%04d.png' % img_idx if args.save_gt_imgs: img_gt = imgs_gt[i].numpy().transpose(1, 2, 0) img_gt_path = os.path.join(gt_img_dir, img_filename) imsave(img_gt_path, img_gt) img_pred = imgs_pred[i] img_pred_np = imgs_pred[i].numpy().transpose(1, 2, 0) img_path = os.path.join(img_dir, img_filename) imsave(img_path, img_pred_np) data['objs'].append(objs[i].cpu().clone()) data['masks_pred'].append(masks_pred[i].cpu().clone()) data['boxes_pred'].append(boxes_pred[i].cpu().clone()) data['boxes_gt'].append(boxes_gt[i].cpu().clone()) data['filenames'].append(img_filename) cur_masks_gt = None if masks_gt is not None: cur_masks_gt = masks_gt[i].cpu().clone() data['masks_gt'].append(cur_masks_gt) if args.save_graphs: graph_img = draw_scene_graph(vocab, objs[i], triples[i]) graph_path = os.path.join(graph_dir, img_filename) imsave(graph_path, graph_img) img_idx += 1 torch.save(data, data_path) print('Saved %d images' % img_idx)