def _get_data_path(self, config): split = config.get("split", "train") L = config.get("max_length", 10) F = config.get("frequency_threshold", 1) S = config.get("num_segment", 128) FT = config.get("feature_type", "I3D") root_dir = os.path.join(config.get("data_dir", ""), "preprocess") grounding_info_path = os.path.join( root_dir, "grounding_info", "{}_labels_S{}_{}.hdf5".format(split, S, FT)) query_info_path = os.path.join( root_dir, "query_info", "{}_info_F{}_L{}_{}.json".format(split, F, L, FT)) query_label_path = os.path.join( root_dir, "query_info", "{}_label_F{}_L{}_{}.hdf5".format(split, F, L, FT)) caption_label_path = os.path.join( root_dir, "query_info", "{}_caption_label_F{}_L{}_{}.hdf5".format(split, F, L, FT)) io_utils.check_and_create_dir(os.path.join(root_dir, "grounding_info")) io_utils.check_and_create_dir(os.path.join(root_dir, "query_info")) self.paths = { "grounding_info": grounding_info_path, "query_labels": query_label_path, "query_info": query_info_path, } return self.paths
def main(params): # Obtain configuration path exp_path = os.path.join("results", params["dataset"], params["model_type"], params["experiment"]) config_path = os.path.join(exp_path, "config.yml") params["config_path"] = config_path # prepare model and dataset M, dataset, config = cmf.prepare_experiment(params) # evaluate on GT config["evaluation"]["use_gt"] = params["evaluate_on_gt"] # evaluate on Top 1000 proposals if params["evaluate_on_top1000"]: config["evaluation"]["use_gt"] = False config["evaluation"]["apply_nms"] = False if len(params["proposal"]) > 0: config["evaluation"]["precomputed_proposal_sequence"] = params[ "proposal"] # create logger epoch_logger = cmf.create_logger(config, "EPOCH", "test.log") """ Build data loader """ loader_config = io_utils.load_yaml(params["loader_config_path"]) if params["test_on_server"]: loader_config = loader_config["test_loader"] test_on = "Test_Server" else: loader_config = loader_config["val_loader"] test_on = "Test" dsets, L = cmf.get_loader(dataset, split=["test"], loader_configs=[loader_config], num_workers=params["num_workers"]) config = M.override_config_from_dataset(config, dsets["test"], mode="Test") config["model"]["resume"] = True tensorboard_path = config["misc"]["tensorboard_dir"] config["misc"]["tensorboard_dir"] = "" # config["misc"]["debug"] = params["debug_mode"] """ Evaluating networks """ e0 = params["start_epoch"] e1 = params["end_epoch"] es = params["epoch_stride"] io_utils.check_and_create_dir(tensorboard_path + "_test_s{}_e{}".format(e0, e1)) summary = PytorchSummary(tensorboard_path + "_test_s{}_e{}".format(e0, e1)) for epoch in range(e0, e1 + 1, es): """ Build network """ config["model"]["checkpoint_path"] = \ os.path.join(exp_path, "checkpoints", "epoch_{:03d}.pkl".format(epoch)) net, _ = cmf.factory_model(config, M, dsets["test"], None) net.set_tensorboard_summary(summary) cmf.test(config, L["test"], net, epoch, None, epoch_logger, on=test_on)
def extract_output(self, vis_inps, vis_gt, save_dir): vis_data = self._infer(vis_inps, "save_output", vis_gt) qids = vis_data["qids"] preds = net_utils.loc2mask(loc, seg_masks) for i, qid in enumerate(qids): out = dict() for k in vis_data.keys(): out[k] = vis_data[k][i] # save output save_path = os.path.join(save_dir, "{}.pkl".format(qid)) io_utils.check_and_create_dir(save_dir) io_utils.write_pkl(save_path, out)
def save_results(self, prefix, mode="Train"): # save predictions save_dir = os.path.join(self.config["misc"]["result_dir"], "predictions", mode) save_to = os.path.join(save_dir, prefix + ".json") io_utils.check_and_create_dir(save_dir) io_utils.write_json(save_to, self.results) # compute performances nb = float(len(self.results["gts"])) self.evaluator.set_duration(self.results["durations"]) rank1, rank5, miou = self.evaluator.eval(self.results["predictions"], self.results["gts"]) for k, v in rank1.items(): self.counters[k].add(v / nb, 1) self.counters["mIoU"].add(miou / nb, 1)
def save_logits(config, L, net, prefix="", mode="train"): # save assignments save_dir = os.path.join(config["misc"]["result_dir"], "logits", str(prefix)) io_utils.check_and_create_dir(save_dir) for batch in tqdm(L): # forward the network outputs = net.evaluate(batch) if type(outputs[1]) == type(list()): logits = net_utils.get_data(torch.stack(outputs[1], 0)) # [m,B,num_answers] else: logits = net_utils.get_data(outputs[1]) # [B,num_answers] # save logits as filename of qid for qi, qst_id in enumerate(batch[1]): save_path = os.path.join(save_dir, "{}.npy".format(qst_id)) np.save(save_path, logits[qi].numpy())
def visualize_topk_proposals(config, sample_proposals, gts, topk, prefix, mode): duration = gts["duration"] gt_times = gts["gt_times"] prop_timestamps = utils.get_timestamps_for_topk_proposals( sample_proposals, duration, topk, apply_nms=True) # prepare data names = ["GT_{}".format(i+1) for i,x in enumerate(gt_times)] timestamps = [gtt for gtt in gt_times] for i,ts in enumerate(prop_timestamps): tiou, gtidx = utils.iou(ts, gt_times, return_index=True) names.append("prop{:02d}_e{:.5f}_t{:.5f}_{:02d}".format( i, ts[2], tiou, gtidx+1)) timestamps.append([ts[0], ts[1]]) # get y-values and unique labels names = np.asarray(names) y = np.arange(len(names)+1)[::-1] / float(len(names)+1) # Draw proposals fig = plt.figure(figsize=(5,5)) colors = sns.color_palette("Set1", n_colors=len(names), desat=.4) for ts, y_, i in zip(timestamps, y, range(len(names))): add_timelines(y_, ts[0], ts[1], color=colors[i]) # set x-, y-axis ax = plt.gca() plt.yticks(y, names, fontsize=5) plt.ylim(0,1) plt.xlim(0-duration/50.0, duration+duration/50.0) plt.xlabel("Time") # save figure save_dir = os.path.join( config["misc"]["result_dir"], "qualitative", mode) save_path = os.path.join(save_dir, prefix + "_proposals.png") io_utils.check_and_create_dir(save_dir) plt.savefig(save_path, bbox_inches="tight", dpi=450) print("Qualtitative result of Topk proposals saved in {}".format(save_path)) plt.close()
def save_assignments(self, prefix, mode="train"): assignments = np.vstack(self.assignments_list) qst_ids = [] for qid in self.qst_ids_list: qst_ids.extend(qid) print("shape of assignments: ", assignments.shape) if mode == "train": origin_qst_ids = self.origin_train_qst_ids else: origin_qst_ids = self.origin_test_qst_ids assignments, qst_ids = cmf.reorder_assignments_using_qst_ids( origin_qst_ids, qst_ids, assignments, is_subset=True) # setting directory for saving assignments save_dir = os.path.join(self.config["misc"]["result_dir"], "assignments", mode) io_utils.check_and_create_dir(save_dir) # save assignments save_hdf5_path = os.path.join(save_dir, prefix + "_assignment.h5") hdf5_file = io_utils.open_hdf5(save_hdf5_path, "w") hdf5_file.create_dataset("assignments", dtype="int32", data=assignments) print("Assignments are saved in {}".format(save_hdf5_path)) save_json_path = os.path.join(save_dir, prefix + "_assignment.json") for qsp in self.assignment_qst_ans_pairs: for k, v in qsp.items(): qsp[k] = list(qsp[k]) io_utils.write_json(save_json_path, self.assignment_qst_ans_pairs) print("Assignments (ans-qst) are saved in {}".format(save_json_path)) # save assignments of qst_ids save_json_path = os.path.join(save_dir, prefix + "_qst_ids.json") out = {} out["question_ids"] = qst_ids io_utils.write_json(save_json_path, out) print("Saving is done: {}".format(save_json_path))
def create_save_dirs(config): """ Create neccessary directories for training and evaluating models """ # create directory for checkpoints io_utils.check_and_create_dir( os.path.join(config["result_dir"], "checkpoints")) # create directory for results io_utils.check_and_create_dir(os.path.join(config["result_dir"], "status")) io_utils.check_and_create_dir( os.path.join(config["result_dir"], "qualitative"))
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--save_dir", default="data/CLEVR_v1.0/feats", help="Directory for saving extracted features.") parser.add_argument("--image_dir", default="data/CLEVR_v1.0/images", help="Directory for images.") parser.add_argument("--num_batch", default=128, type=int, help="batch size") parser.add_argument( "--image_size", default=224, type=int, help="Image size. VGG 16-layer network uses 224x224 images as an input." ) parser.add_argument("--feat_type", default="conv5_3", help="Layer to extract feature. [conv5_3 | conv4]") parser.add_argument("--data_type", default="h5py", help="Data type to save features. [h5py | numpy]") parser.add_argument("--shuffle", action="store_true", default=False, help="Shuffle the image list to get features") parser.add_argument("--debug_mode", action="store_true", default=False, help="debug mode") args = parser.parse_args() print("Arguments are as follows:\n", args) # create save_dir if not exists io_utils.check_and_create_dir(args.save_dir) """ if args.data_type == "numpy": io_utils.check_and_create_dir(os.path.join(args.save_dir, "train2014")) io_utils.check_and_create_dir(os.path.join(args.save_dir, "test2014")) io_utils.check_and_create_dir(os.path.join(args.save_dir, "val2014")) """ # build (or load) network M = resnet.resnet101(pretrained=True) # TODO: remove some layers M.cuda() cudnn.benchmark = True M.eval() # get image paths from image directory img_paths, dir_paths = io_utils.get_filenames_from_dir(args.image_dir) for dr in dir_paths: io_utils.check_and_create_dir(dr.replace("/images/", "/feats/")) if args.shuffle: img_paths = random.sample(img_paths, len(img_paths)) print("Total number of images: {}".format(len(img_paths))) # create h5py file if use h5py as data_type if args.data_type == "h5py": h5py_file = h5py.File(os.path.join(args.save_dir, "img_feats.h5"), "a") else: h5py_file = None # extract features and save them n = 0 bi = 0 batch = [] feat_path_list = [] for i, img_path in enumerate(tqdm(img_paths)): feat_path = _get_feat_path(args, img_path) if _feat_exist(args, feat_path, h5py_file): if args.debug_mode and ((i + 1) % 100 == 0): print("[{}] exists".format(feat_path)) continue # load and prprocess img try: img = imread(args.image_dir + img_path, mode="RGB") img = imresize(img, (args.image_size, args.image_size), interp="bicubic") img = img.transpose(2, 0, 1)[None] except: print("[Error] fail to load image from {}".format(args.image_dir + img_path)) continue # save img in batch bi = bi + 1 batch.append(img) feat_path_list.append(feat_path) if (bi == args.num_batch) or ((i + 1) == len(img_paths)): batch_var = np.concatenate(batch, 0).astype(np.float32) batch_var = (batch_var / 255.0 - mean) / std batch_var = torch.FloatTensor(batch_var).cuda() batch_var = Variable(batch_var, volatile=True) if args.feat_type == "conv5_3": feats = M.get_conv5_feat(batch_var) elif args.feat_type == "conv4": feats = M.get_conv4_feat(batch_var) else: raise NotImplementedError( "Not supported feature type ({})".format(args.feat_type)) # save features feats = feats.data.cpu().clone().numpy() for b in range(bi): if args.data_type == "h5py": h5py_file.create_dataset(feat_path_list[b], dtype='float', data=feats[b]) elif args.data_type == "numpy": np.save(feat_path_list[b], feats[b]) else: raise NotImplementedError( "Not supported data type ({})".format(args.data_type)) n += 1 if args.debug_mode and ((n + 1) % 5000 == 0): print("{}th feature is saved in {}".format( i + 1, feat_path_list[-1])) if args.debug_mode and ((n + 1) % 20000 == 0): print(feats[0].shape) print("max value: ", np.max(feats[0])) print("min value: ", np.min(feats[0])) # initialize batch index bi = 0 batch = [] feat_path_list = [] # close h5py file if use h5py as data_type if args.data_type == "h5py": h5py_file.close()
def ensemble(config): """ Build data loader """ dset = dataset.DataSet(config["test_loader"]) L = data.DataLoader( \ dset, batch_size=config["test_loader"]["batch_size"], \ num_workers=config["num_workers"], \ shuffle=False, collate_fn=dataset.collate_fn) """ Load assignments if exists """ with_assignment = False if config["assignment_path"] != "None": with_assignment = True assignment_file = io_utils.load_hdf5(config["assignment_path"], verbose=False) assignments = assignment_file["assignments"][:] cnt_mapping = np.zeros((3,3)) """ Build network """ nets = [] net_configs = [] for i in range(len(config["checkpoint_paths"])): net_configs.append(io_utils.load_yaml(config["config_paths"][i])) net_configs[i] = M.override_config_from_loader(net_configs[i], dset) nets.append(M(net_configs[i])) nets[i].bring_loader_info(dset) apply_cc_after = utils.get_value_from_dict( net_configs[i]["model"], "apply_curriculum_learning_after", -1) # load checkpoint if exists nets[i].load_checkpoint(config["checkpoint_paths"][i]) start_epoch = int(utils.get_filename_from_path( config["checkpoint_paths"][i]).split("_")[-1]) # If checkpoint use curriculum learning if (apply_cc_after > 0) and (start_epoch >= apply_cc_after): nets[i].apply_curriculum_learning() # ship network to use gpu if config["use_gpu"]: for i in range(len(nets)): nets[i].gpu_mode() for i in range(len(nets)): nets[i].eval_mode() # initialize counters for different tau metrics = ["top1-avg", "top1-max", "oracle"] for i in range(len(nets)): modelname = "M{}".format(i) metrics.append(modelname) tau = [1.0, 1.2, 1.5, 2.0, 5.0, 10.0, 50.0, 100.0] counters = OrderedDict() for T in tau: tau_name = "tau-"+str(T) counters[tau_name] = OrderedDict() for mt in metrics: counters[tau_name][mt] = accumulator.Accumulator(mt) """ Run training network """ ii = 0 itoa = dset.get_itoa() predictions = [] for batch in tqdm(L): # Forward networks probs = 0 B = batch[0][0].size(0) if type(batch[0][-1]) == type(list()): gt = batch[0][-1][0] else: gt = batch[0][-1] correct = 0 probs = {} for T in tau: tau_name = "tau-"+str(T) probs[tau_name] = 0 prob_list = [] for i in range(len(nets)): outputs = nets[i].evaluate(batch) prob_list.append(outputs[1]) # m*[B,A] if config["save_logits"]: TODO = True for T in tau: tau_name = "tau-"+str(T) probs = [net_utils.get_data(F.softmax(logits/T, dim=1)) \ for logits in prob_list] # m*[B,A] # count correct numbers for each model for i in range(len(nets)): val, idx = probs[i].max(dim=1) correct = torch.eq(idx, gt) num_correct = torch.sum(correct) modelname = "M{}".format(i) counters[tau_name][modelname].add(num_correct, B) # add prob of each model if i == 0: oracle_correct = correct else: oracle_correct = oracle_correct + correct # top1-max accuracy for ensemble ens_probs, ens_idx = torch.stack(probs,0).max(0) # [B,A] max_val, max_idx = ens_probs.max(dim=1) num_correct = torch.sum(torch.eq(max_idx, gt)) counters[tau_name]["top1-max"].add(num_correct, B) # top1-avg accuracy for ensemble ens_probs = sum(probs) # [B,A] max_val, max_idx = ens_probs.max(dim=1) num_correct = torch.sum(torch.eq(max_idx, gt)) counters[tau_name]["top1-avg"].add(num_correct, B) # oracle accuracy for ensemble num_oracle_correct = torch.sum(torch.ge(oracle_correct, 1)) counters[tau_name]["oracle"].add(num_oracle_correct, B) # attach predictions for i in range(len(batch[1])): qid = batch[1][i] predictions.append({ "question_id": qid, "answer": utils.label2string(itoa, max_idx[i]) }) # epoch done # print accuracy for cnt_k,cnt_v in counters.items(): txt = cnt_k + " " for k,v in cnt_v.items(): txt += ", {} = {:.5f}".format(v.get_name(), v.get_average()) print(txt) save_dir = os.path.join("results", "ensemble_predictions") io_utils.check_and_create_dir(save_dir) io_utils.write_json(os.path.join(save_dir, config["out"]+".json"), predictions)
def visualize_LGI_SQAN(config, vis_data, itow, prefix): # fetching data qids = vis_data["qids"] qr_label = vis_data["query_labels"] # [B, L_q] == [5,25] gt = vis_data["grounding_gt"] # [B, L_v] == [5,128] pred = vis_data["grounding_pred"] # [B, L_v] vid_nfeats = vis_data["nfeats"] latt_w = vis_data["t_attw"] # [B,nseg] watt_w = vis_data["watt_w"] # [B,nstep,A,Lq] nl_matt_w = vis_data["nl_matt_w"] # [B,nstep,nblock,nheads,128,128] watt_w = vis_data["se_attw"] # [B,A,Lq] matt_w = vis_data["t_attw"] # [B,A,Lv] gatt_w = vis_data["s_attw"] # [B,A,Lv] if nl_matt_w is None: B, nseg = latt_w.shape else: # we visualize only first head in last block nl_matt_w = nl_matt_w[:, :, -1, 0, :, :] # [B,nstep,128,128] # constants B, nstep, nseg, _ = nl_matt_w.shape # prepare xaxis labels for visualization query = [ utils.label2string(itow, qr_label[idx], end_idx=0).split(" ") for idx in range(B) ] vid_idx = [] for idx in range(B): if len(query[idx]) < qr_label.shape[1]: for i in range(qr_label.shape[1] - len(query[idx])): # add null token query[idx].append("-") vid_idx.append([]) for i in range(nseg): if i >= vid_nfeats[idx]: vlabel = "$" elif i % 10 == 0: vlabel = str(i) else: vlabel = "" vid_idx[idx].append(vlabel) # create figure if nl_matt_w is None: figsize = [4, B] # (col, row) else: figsize = [4 + nstep, B] # (col, row) fig = plt.figure(figsize=figsize) gc = gridspec.GridSpec(figsize[1], figsize[0]) # create figure for idx in range(B): # word attention weight add_attention_to_figure(fig, gc, idx, 0, 1, 1, watt_w[idx], query[idx], ["watt"], show_colorbar=True) if nl_matt_w is None: n = -1 else: for n in range(nstep): # NL attention in MMLG add_attention_to_figure(fig, gc, idx, n + 1, 1, 1, nl_matt_w[idx, n], vid_idx[idx], vid_idx[idx], show_colorbar=True) # local attention add_attention_to_figure(fig, gc, idx, n + 2, 1, 1, latt_w[idx][np.newaxis, :], vid_idx[idx], ["latt"], show_colorbar=True) # localization add_attention_to_figure(fig, gc, idx, n + 3, 1, 1, pred[idx][np.newaxis, :], vid_idx[idx], ["Pred"]) add_attention_to_figure(fig, gc, idx, n + 4, 1, 1, gt[idx][np.newaxis, :], vid_idx[idx], ["GT"]) # save figure save_dir = os.path.join(config["misc"]["result_dir"], "qualitative", "Train") save_path = os.path.join(save_dir, prefix + ".png") io_utils.check_and_create_dir(save_dir) plt.tight_layout(pad=0.1, h_pad=0.1) plt.savefig(save_path, bbox_inches="tight", dpi=450) print("Visualization of LGI-SQAN is saved in {}".format(save_path)) plt.close()
def save_assignment_visualization(config, assigns, classes, prefix, mode, fontsize=2, cmap=plt.cm.Blues, figsize=(7, 5)): """ This function saves the confusion matrix. Normalization can be applied by setting `normalize=True`. """ # create save directory img_dir = config["train_loader"]["img_dir"] save_dir = os.path.join(config["misc"]["result_dir"], "qualitative", "model_assign", mode) io_utils.check_and_create_dir(save_dir) np.set_printoptions(precision=2) assigns = assigns.numpy() # get model naems num_models = assigns.shape[0] modelnames = [] for i in range(num_models): modelnames.append("M{}".format(i)) # create figure fig = plt.figure(figsize=figsize) gc = gridspec.GridSpec(1, 2) # draw assignments for ii in range(2): if ii == 0: norm_assigns = assigns.astype('float') / assigns.sum( axis=1)[:, np.newaxis] title = "normalize along model (row)" else: norm_assigns = assigns.astype('float') / assigns.sum( axis=0)[np.newaxis, :] title = "normalize along label (column)" sub = fig.add_subplot(gc[0, ii]) ax = sub.imshow(norm_assigns, interpolation='nearest', cmap=plt.cm.Blues) # show title, axis (labels) sub.set_title(title, fontsize=3) plt.setp(sub, yticks=np.arange(num_models), yticklabels=modelnames) plt.setp(sub.get_yticklabels(), fontsize=fontsize) if len(classes) <= 100: plt.setp(sub, xticks=np.arange(len(classes)), xticklabels=classes) plt.setp(sub.get_xticklabels(), fontsize=fontsize, rotation=45) fmt = '.2f' max_val = norm_assigns.max() for i, j in itertools.product(range(norm_assigns.shape[0]), range(norm_assigns.shape[1])): sub.text(j, i, format(norm_assigns[i, j], fmt), horizontalalignment="center", fontsize=fontsize, rotation=45, color="white" if norm_assigns[i, j] >= (0.70) else "black") fig.tight_layout() # save figure and close it plt.savefig(os.path.join(save_dir, prefix + ".png"), bbox_inches="tight", dpi=450) plt.close()
def save_confusion_matrix_visualization(config, cm_list, classes, epoch, prefix, fontsize=2, normalize=True, cmap=plt.cm.Blues, figsize=(5, 5)): """ This function saves the confusion matrix. Normalization can be applied by setting `normalize=True`. """ np.set_printoptions(precision=2) if normalize: for ii, cm in enumerate(cm_list): cm_list[ii] = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] print("Normalized confusion matrix") else: print('Confusion matrix, without normalization') # create save directory img_dir = config["train_loader"]["img_dir"] save_dir = os.path.join(config["misc"]["result_dir"], "qualitative", "confusion_matrix", prefix) io_utils.check_and_create_dir(save_dir) # create figure fig = plt.figure(figsize=figsize) gc = gridspec.GridSpec(1, len(cm_list)) tick_marks = np.arange(len(classes)) for ii, cm in enumerate(cm_list): sub = fig.add_subplot(gc[0, ii]) # show confusion matrix with colorbar ax = sub.imshow(cm, interpolation='nearest', cmap=cmap) # show title, axis (labels) sub.set_title("model_{}".format(ii), fontsize=3) plt.setp(sub, xticks=tick_marks, xticklabels=classes) plt.setp(sub, yticks=tick_marks, yticklabels=classes) plt.setp(sub.get_xticklabels(), fontsize=fontsize, rotation=-45) plt.setp(sub.get_yticklabels(), fontsize=fontsize) if len(classes) <= 100: fmt = '.2f' if normalize else 'd' max_val = cm.max() for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): if cm[i, j] > (max_val * 0.4): sub.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", fontsize=fontsize, rotation=45, color="white" if cm[i, j] > (max_val * 0.8) else "black") if ii == 0: sub.set_ylabel('True label', fontsize=3) sub.set_xlabel('Predicted label', fontsize=3) """ if (ii+1) == len(cm_list): divider = make_axes_locatable(sub) cax = divider.append_axes("right", size="5%", pad=0.05) cbar = fig.colorbar(ax, cax=cax) cbar.ax.tick_params(labelsize=2) """ fig.tight_layout() # save figure and close it plt.savefig(os.path.join(save_dir, "epoch_{:03d}.png".format(epoch)), bbox_inches="tight", dpi=450) plt.close()
def save_ensemble_visualization(config, data, logits, class_name, itow, itoa, \ save_to, prefix, use_base_model=False, figsize=(5,5)): """ Save visualization of CMCL-based model Args: config: configuration file including information of save directory data: list of fourcomponents; [[inputs for network], img_info, selections, base_predictions] - inputs for network: list of items [imgs, qst_labels, qst_lengths, (precomputed_selections), answers] logits: list of logit for m models; m * (B, C) itow: dictionary for mapping index to word in questions itoa: dictionary for mapping index to word in answers save_to: directory to save visualizations prefix: name for directory to save visualization figsize: figure size """ # create save directory img_dir = config["train_loader"]["img_dir"] save_dir = os.path.join("visualization", save_to) io_utils.check_and_create_dir(save_dir) num_data = len(data[0][2]) #- inputs for network: list of items [imgs, qst_labels, qst_lengths, answers] for idx in range(num_data): # load image img_path = data[1][idx] img = Image.open(os.path.join(img_dir, img_path)).convert("RGB") # convert indices of question into words and get gt and logit question = utils.label2string(itow, data[0][1][idx]) gt = utils.label2string(itoa, data[0][-1][idx]) # create figure fig = plt.figure(figsize=figsize) if use_base_model: gc = gridspec.GridSpec(len(logits) + 2 + len(data[3]), 14) else: gc = gridspec.GridSpec(len(logits) + 2, 14) # plot question add_question_row_subplot(fig, gc, [question], 0) # plot answers and predictions """ data: list of four components; [[inputs for network], img_info, selections, base_predictions] """ is_vqa = config["misc"]["dataset"] == "vqa" logits = [logit[idx] for logit in logits] if use_base_model: for i in range(len(data[3])): logits.append(data[3][i][idx]) selections = data[2][idx] add_answer_row_subplot(fig, gc, [logits, selections],\ gt, itoa, 1, class_name) #add_answer_row_subplot(fig, gc, [logits, selections], # gt, itoa, 1, is_vqa=is_vqa) # save figure and close it img_filename = utils.get_filename_from_path(img_path) img_filename = "{}_{}_{}.png".format(idx, prefix, img_filename) #plt.savefig(os.path.join(save_dir, img_filename), bbox_inches="tight", dpi=500) plt.savefig(os.path.join(save_dir, img_filename), bbox_inches="tight", dpi=1000) plt.close()
def save_san_visualization(config, data, result, itow, itoa, prefix, figsize=(5, 5)): """ Save visualization of Stacked Attention Network Args: config: configuration file including information of save directory data: list of [imgs, question_labels, question_lengths, answers, img_paths] result:list of [attention weights, logits]; (B, h, w), (B, C) itow: dictionary for mapping index to word in questions itoa: dictionary for mapping index to word in answers prefix: name for directory to save visualization figsize: figure size """ img_dir = config["train_loader"]["img_dir"] save_dir = os.path.join(config["misc"]["result_dir"], "qualitative", "attention") io_utils.check_and_create_dir(save_dir) attention_weights = result[0] logits = result[1] img_paths = data[1] data = data[0] num_data = len(data[2]) num_stacks = config["model"]["num_stacks"] if type(data[-1]) == type(list()): data[-1] = data[-1][0] for idx in range(num_data): # load image img_path = img_paths[idx] img = Image.open(os.path.join(img_dir, img_path)).convert("RGB") # convert indices of question into words and get gt and logit question = utils.label2string(itow, data[1][idx]) gt = utils.label2string(itoa, data[-1][idx]) logit = logits[idx] # create figure fig = plt.figure(figsize=figsize) col_width = 2 row = 2 + num_stacks * col_width col = col_width * 2 + 6 gc = gridspec.GridSpec(row, col) # plot question add_question_row_subplot(fig, gc, [question], 0, col_width) # plot attention weights cur_att = [attention_weights[ns][idx] for ns in range(num_stacks)] add_attention_row_subplot(fig, gc, img, cur_att, num_stacks, 1, col_width) # plot answers add_answer_row_subplot(fig, gc, logit, gt, itoa, row - 1) # save figure and close it img_filename = utils.get_filename_from_path(img_path) img_filename = "{}_{}_{}.png".format(idx, prefix, img_filename) plt.savefig(os.path.join(save_dir, img_filename), bbox_inches="tight", dpi=500) #plt.savefig(save_dir + "_" + img_filename, bbox_inches="tight", dpi=500) plt.close()
def compute_sample_mean_per_class(config, L, net, prefix=""): assert net.classname == "INFERENCE", \ "Currently only suppert ensemble inference network" assert config["model"]["save_sample_mean"] assert config["model"]["output_with_internal_values"] # prepare directory for saving sample mean save_dir = os.path.join("results", "sample_mean", str(prefix)) io_utils.check_and_create_dir(save_dir) # construct sample mean variable where the structure is # { # "M0": [sample_mean_for_first_layer, sample_mean_for_second_layer, ..., # sample_mean_for_last_layer] # "M1": [sample_mean_for_first_layer, sample_mean_for_second_layer, ..., # sample_mean_for_last_layer] # ... # "Mm": [sample_mean_for_first_layer, sample_mean_for_second_layer, ..., # sample_mean_for_last_layer] # } # note: size of each sample_mean is [num_answers, feat_dims] num_models = net.num_base_models num_answers = config["model"]["num_labels"] sample_means = {"M{}".format(m): [] for m in range(num_models)} sample_cnt = np.zeros((num_answers)) i = 1 for batch in tqdm(L): # forward the network B = batch[0][0].size(0) outputs = net.evaluate(batch) gt_answers = batch[0][-1] if type(gt_answers) == type(list()): gt_answers = gt_answers[1] # all answers assert len(outputs) > 0 internal_values = outputs[ 2] # m * [internal_1, internal_2, ..., internal_n] for b in range(B): gt_idx = gt_answers[b] gt_idx = np.unique(gt_idx.numpy()) for idx in gt_idx: sample_cnt[idx] += 1 for m in range(num_models): modelname = "M{}".format(m) num_internal = len(internal_values[m]) for iv in range(num_internal): if i == 1: sample_means[modelname].append( np.zeros( (num_answers, internal_values[m][iv].size(1)))) for idx in gt_idx: sample_means[modelname][iv][idx] += \ net_utils.get_data(internal_values[m][iv][b]).numpy() i += 1 # compute mean (divide by counts) for modelname in sample_means.keys(): for i, s in enumerate(sample_means[modelname]): # TODO: deal with nan sample_means[modelname][i] = s / sample_cnt[:, np.newaxis] # save sample mean & count save_path = os.path.join(save_dir, "sample_mean.pkl") torch.save(sample_means, save_path) print("save done in {}".format(save_path)) save_path = os.path.join(save_dir, "sample_cnt.pkl") torch.save(sample_cnt, save_path) print("save done in {}".format(save_path))
def visualize_LGI(config, vis_data, itow, prefix): # fetching data qids = vis_data["qids"] qr_label = vis_data["query_labels"] # [B, L_q] == [5,25] gt = vis_data["grounding_gt"] # [B, L_v] == [5,128] pred = vis_data["grounding_pred"] # [B, L_v] vid_nfeats = vis_data["nfeats"] watt_w = vis_data["se_attw"] # [B,A,Lq] matt_w = vis_data["t_attw"] # [B,A,Lv] gatt_w = vis_data["s_attw"] # [B,A,Lv] # constants B, num_seg = qr_label.shape[0], pred.shape[1] # prepare xaxis labels for visualization query = [ utils.label2string(itow, qr_label[idx], end_idx=0).split(" ") for idx in range(B) ] vid_idx = [] for idx in range(B): if len(query[idx]) < qr_label.shape[1]: for i in range(qr_label.shape[1] - len(query[idx])): # add null token query[idx].append("-") vid_idx.append([]) for i in range(num_seg): if i >= vid_nfeats[idx]: vlabel = "$" elif i % 10 == 0: vlabel = str(i) else: vlabel = "" vid_idx[idx].append(vlabel) # create figure figsize = [5, B] # (col, row) fig = plt.figure(figsize=figsize) gc = gridspec.GridSpec(figsize[1], figsize[0]) # create figure ngates = [str(i + 1) for i in range(gatt_w.shape[1])] for idx in range(B): # word attention weight add_attention_to_figure(fig, gc, idx, 0, 1, 1, watt_w[idx], query[idx], ["watt"], show_colorbar=True) # MM attentive pooling add_attention_to_figure(fig, gc, idx, 1, 1, 1, matt_w[idx], vid_idx[idx], ["matt"], show_colorbar=True) # MM attentive pooling add_attention_to_figure(fig, gc, idx, 2, 1, 1, gatt_w[idx][np.newaxis, :], ngates, ["gatt"], show_colorbar=True) # localization add_attention_to_figure(fig, gc, idx, 3, 1, 1, gt[idx][np.newaxis, :], vid_idx[idx], ["GT"]) add_attention_to_figure(fig, gc, idx, 4, 1, 1, pred[idx][np.newaxis, :], vid_idx[idx], ["Pred"]) # save figure save_dir = os.path.join(config["misc"]["result_dir"], "qualitative", "Train") save_path = os.path.join(save_dir, prefix + "_qrn.png") io_utils.check_and_create_dir(save_dir) plt.tight_layout(pad=0.1, h_pad=0.1) plt.savefig(save_path, bbox_inches="tight", dpi=450) print("Visualization of LGI is saved in {}".format(save_path)) plt.close()