def val_epoch(): detector.eval() evaluator_list = [ ] # for calculating recall of each relationship except no relationship evaluator_multiple_preds_list = [] for index, name in enumerate(ind_to_predicates): if index == 0: continue evaluator_list.append( (index, name, BasicSceneGraphEvaluator.all_modes())) evaluator_multiple_preds_list.append( (index, name, BasicSceneGraphEvaluator.all_modes(multiple_preds=True))) evaluator = BasicSceneGraphEvaluator.all_modes() # for calculating recall evaluator_multiple_preds = BasicSceneGraphEvaluator.all_modes( multiple_preds=True) for val_b, batch in enumerate(val_loader): val_batch(conf.num_gpus * val_b, batch, evaluator, evaluator_multiple_preds, evaluator_list, evaluator_multiple_preds_list) recall = evaluator[conf.mode].print_stats() recall_mp = evaluator_multiple_preds[conf.mode].print_stats() mean_recall = calculate_mR_from_evaluator_list(evaluator_list, conf.mode) mean_recall_mp = calculate_mR_from_evaluator_list( evaluator_multiple_preds_list, conf.mode, multiple_preds=True) return recall, recall_mp, mean_recall, mean_recall_mp
def train_epoch(epoch_num): detector.train() # two evaluator to calculate recall base_evaluator = BasicSceneGraphEvaluator.all_modes() train_evaluator = BasicSceneGraphEvaluator.all_modes() tr = [] start = time.time() for b, batch in enumerate(train_loader): if batch[0][4].shape[0] > 90: #print('lager: ', batch[0][4].shape[0]) continue if conf.use_rl_tree: batch_train = train_batch_rl else: batch_train = train_batch tr.append( batch_train(b, batch, verbose=b % (conf.print_interval * 10) == 0, base_evaluator=base_evaluator, train_evaluator=train_evaluator)) if b % conf.print_interval == 0 and b >= conf.print_interval: mn = pd.concat(tr[-conf.print_interval:], axis=1).mean(1) time_per_batch = (time.time() - start) / conf.print_interval print("\ne{:2d}b{:5d}/{:5d} {:.3f}s/batch, {:.1f}m/epoch".format( epoch_num, b, len(train_loader), time_per_batch, len(train_loader) * time_per_batch / 60)) print(mn) print('-----------', flush=True) start = time.time() return pd.concat(tr, axis=1)
def val_epoch(): detector.eval() evaluator = BasicSceneGraphEvaluator.all_modes() for val_b, batch in enumerate(tqdm(val_loader)): val_batch(conf.num_gpus * val_b, batch, evaluator) evaluator[conf.mode].print_stats()
def val_epoch(): detector.eval() evaluator = BasicSceneGraphEvaluator.all_modes() for val_b, batch in enumerate(val_loader): val_batch(conf.num_gpus * val_b, batch, evaluator) evaluator[conf.mode].print_stats() return np.mean(evaluator[conf.mode].result_dict[conf.mode + '_recall'][100])
def val_epoch(detector, val, val_loader, val_f): print("=== Validating") detector.eval() evaluator = BasicSceneGraphEvaluator.all_modes(multiple_preds=True) evaluator_con = BasicSceneGraphEvaluator.all_modes(multiple_preds=False) n_val = len(val_loader) for val_b, batch in enumerate(val_loader): # print(val_b, n_val) val_batch(conf.num_gpus * val_b, detector, batch, val, evaluator, evaluator_con) evaluator[conf.mode].print_stats() evaluator_con[conf.mode].print_stats() recalls = evaluator[conf.mode].result_dict[conf.mode + '_recall'] recalls_con = evaluator_con[conf.mode].result_dict[conf.mode + '_recall'] val_f.write('{},{},{},{},{},{}\n'.format( np.mean(recalls[20]), np.mean(recalls[50]), np.mean(recalls[100]), np.mean(recalls_con[20]), np.mean(recalls_con[50]), np.mean(recalls_con[100]), )) val_f.flush() return np.mean(recalls_con[100])
def val_epoch(): # import pdb; pdb.set_trace() detector.eval() if conf.save_detection_results: all_save_res = [] # evaluator = BasicSceneGraphEvaluator.all_modes() evaluator = BasicSceneGraphEvaluator.all_modes(spice=False) for val_b, batch in enumerate(tqdm(val_loader)): if conf.save_detection_results: save_res = {} gt_entry, pred_entry = val_batch(conf.num_gpus * val_b, batch, evaluator) # the relationship in pred_entry actually sort by scores already # all_pred_rels_scores = pred_entry['rel_scores'][:, 1:].max(1) # all_rels_subj_scores = pred_entry['obj_scores'][pred_entry['pred_rel_inds'][:, 0]] # all_rels_obj_scores = pred_entry['obj_scores'][pred_entry['pred_rel_inds'][:, 1]] # all_triplets_scores = all_pred_rels_scores * all_rels_subj_scores * all_rels_obj_scores all_pred_rel_type = pred_entry['rel_scores'][:, 1:].argmax(1) + 1 save_res['save_pred_rel_type'] = all_pred_rel_type[:20] save_res['save_pred_rel_inds'] = pred_entry['pred_rel_inds'][:20] save_res['save_pred_boxes'] = pred_entry['pred_boxes'] save_res['save_pred_classes'] = pred_entry['pred_classes'] save_res['save_gt_classes'] = gt_entry['gt_classes'] save_res['save_gt_relations'] = gt_entry['gt_relations'] save_res['save_gt_boxes'] = gt_entry['gt_boxes'] save_res['img_size'] = val[val_b]['img_size'] save_res['filename'] = val[val_b]['fn'] all_save_res.append(save_res) else: val_batch(conf.num_gpus * val_b, batch, evaluator) if conf.save_detection_results: all_recall20 = evaluator[conf.mode].result_dict[conf.mode + '_recall'][20] pkl.dump({'all_save_res': all_save_res, 'all_recall20': all_recall20}, \ open('visualization_detect_results.pkl', 'wb')) print('Finish Save Results!') evaluator[conf.mode].print_stats() return np.mean(evaluator[conf.mode].result_dict[conf.mode + '_recall'][50])
def train_batch(batch_num, b, detector, train, optimizer, verbose=False): """ :param b: contains: :param imgs: the image, [batch_size, 3, IM_SIZE, IM_SIZE] :param all_anchors: [num_anchors, 4] the boxes of all anchors that we'll be using :param all_anchor_inds: [num_anchors, 2] array of the indices into the concatenated RPN feature vector that give us all_anchors, each one (img_ind, fpn_idx) :param im_sizes: a [batch_size, 4] numpy array of (h, w, scale, num_good_anchors) for each image. :param num_anchors_per_img: int, number of anchors in total over the feature pyramid per img Training parameters: :param train_anchor_inds: a [num_train, 5] array of indices for the anchors that will be used to compute the training loss (img_ind, fpn_idx) :param gt_boxes: [num_gt, 4] GT boxes over the batch. :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class) :return: """ result, result_preds = detector[b] losses = {} losses['class_loss'] = F.cross_entropy(result.rm_obj_dists, result.rm_obj_labels) n_rel = len(train.ind_to_predicates) if conf.lml_topk is not None and conf.lml_topk: # Note: This still uses a maximum of 1 relationship per edge # in the graph. Adding them all requires changing the data loading # process. gt = result.rel_labels[:, -1] I = gt > 0 gt = gt[I] n_pos = len(gt) reps = torch.cat(result.rel_reps) I_reps = I.unsqueeze(1).repeat(1, n_rel) reps = reps[I_reps].view(-1, n_rel) loss = [] for i in range(n_pos): gt_i = gt[i] reps_i = reps[i] loss_i = -(reps_i[gt_i].log()) loss.append(loss_i) loss = torch.cat(loss) loss = torch.sum(loss) / n_pos losses['rel_loss'] = loss elif conf.ml_loss: loss = [] start = 0 for i, rel_reps_i in enumerate(result.rel_reps): n = rel_reps_i.shape[0] # Get rid of the background labels here: reps = result.rel_dists[start:start + n, 1:].contiguous().view(-1) gt = result.rel_labels[start:start + n, -1].data.cpu() I = gt > 0 gt = gt[I] gt = gt - 1 # Hacky shift to get rid of background labels. r = (n_rel - 1) * torch.arange(len(I))[I].long() gt_flat = r + gt gt_flat_onehot = torch.zeros(len(reps)) gt_flat_onehot.scatter_(0, gt_flat, 1) loss_i = torch.nn.BCEWithLogitsLoss(size_average=False)( reps, Variable(gt_flat_onehot.cuda())) loss.append(loss_i) start += n loss = torch.cat(loss) loss = torch.sum(loss) / len(loss) losses['rel_loss'] = loss elif conf.entr_topk is not None and conf.entr_topk: # Note: This still uses a maximum of 1 relationship per edge # in the graph. Adding them all requires changing the data loading # process. loss = [] start = 0 for i, rel_reps_i in enumerate(result.rel_reps): n = rel_reps_i.shape[0] # Get rid of the background labels here: reps = result.rel_dists[start:start + n, 1:].contiguous().view(-1) if len(reps) <= conf.entr_topk: # Nothing to do for small graphs. continue gt = result.rel_labels[start:start + n, -1].data.cpu() I = gt > 0 gt = gt[I] gt = gt - 1 # Hacky shift to get rid of background labels. r = (n_rel - 1) * torch.arange(len(I))[I].long() gt_flat = r + gt n_pos = len(gt_flat) if n_pos == 0: # Nothing to do if there is no ground-truth data. continue reps_sorted, J = reps.sort(descending=True) reps_sorted_last = reps_sorted[conf.entr_topk:] J_last = J[conf.entr_topk:] # Hacky way of removing the ground-truth from J. J_last_bool = J_last != gt_flat[0] for j in range(n_pos - 1): J_last_bool *= (J_last != gt_flat[j + 1]) J_last_bool = J_last_bool.type_as(reps) loss_i = [] for j in range(n_pos): yj = gt_flat[j] fyj = reps[yj] loss_ij = torch.log(1. + torch.sum((reps_sorted_last - fyj).exp() * J_last_bool)) loss_i.append(loss_ij) loss_i = torch.cat(loss_i) loss_i = torch.sum(loss_i) / len(loss_i) loss.append(loss_i) start += n loss = torch.cat(loss) loss = torch.sum(loss) / len(loss) losses['rel_loss'] = loss else: losses['rel_loss'] = F.cross_entropy(result.rel_dists, result.rel_labels[:, -1]) loss = sum(losses.values()) optimizer.zero_grad() loss.backward() clip_grad_norm( [(n, p) for n, p in detector.named_parameters() if p.grad is not None], max_norm=conf.clip, verbose=verbose, clip=True) losses['total'] = loss optimizer.step() evaluator = BasicSceneGraphEvaluator.all_modes(multiple_preds=True) evaluator_con = BasicSceneGraphEvaluator.all_modes(multiple_preds=False) assert conf.num_gpus == 1 # assert conf.mode == 'predcls' for i, (pred_i, gt_idx) in enumerate(zip(result_preds, b.indexes)): boxes_i, objs_i, obj_scores_i, rels_i, pred_scores_i = pred_i gt_entry = { 'gt_classes': train.gt_classes[gt_idx].copy(), 'gt_relations': train.relationships[gt_idx].copy(), 'gt_boxes': train.gt_boxes[gt_idx].copy(), } assert np.all(objs_i[rels_i[:, 0]] > 0) and \ np.all(objs_i[rels_i[:, 1]] > 0) pred_entry = { 'pred_boxes': boxes_i * BOX_SCALE / IM_SCALE, 'pred_classes': objs_i, 'pred_rel_inds': rels_i, 'obj_scores': obj_scores_i, 'rel_scores': pred_scores_i, # hack for now. } evaluator[conf.mode].evaluate_scene_graph_entry( gt_entry, pred_entry, ) evaluator_con[conf.mode].evaluate_scene_graph_entry( gt_entry, pred_entry, ) res = {x: y.data[0] for x, y in losses.items()} recalls = evaluator[conf.mode].result_dict[conf.mode + '_recall'] recalls_con = evaluator_con[conf.mode].result_dict[conf.mode + '_recall'] res.update({ 'recall20': np.mean(recalls[20]), 'recall50': np.mean(recalls[50]), 'recall100': np.mean(recalls[100]), 'recall20_con': np.mean(recalls_con[20]), 'recall50_con': np.mean(recalls_con[50]), 'recall100_con': np.mean(recalls_con[100]), }) res = pd.Series(res) return res
# assert np.all(rels_i[:,2] > 0) pred_entry = { 'pred_boxes': boxes_i * BOX_SCALE/IM_SCALE, 'pred_classes': objs_i, 'pred_rel_inds': rels_i, 'obj_scores': obj_scores_i, 'rel_scores': pred_scores_i, } all_pred_entries.append(pred_entry) eval_entry(conf.mode, gt_entry, pred_entry, evaluator, evaluator_multiple_preds, evaluator_list, evaluator_multiple_preds_list) evaluator = BasicSceneGraphEvaluator.all_modes() evaluator_multiple_preds = BasicSceneGraphEvaluator.all_modes(multiple_preds=True) evaluator_list = [] # for calculating recall of each relationship except no relationship evaluator_multiple_preds_list = [] for index, name in enumerate(ind_to_predicates): if index == 0: continue evaluator_list.append((index, name, BasicSceneGraphEvaluator.all_modes())) evaluator_multiple_preds_list.append((index, name, BasicSceneGraphEvaluator.all_modes(multiple_preds=True))) if conf.cache is not None and os.path.exists(conf.cache): print("Found {}! Loading from it".format(conf.cache)) with open(conf.cache,'rb') as f: all_pred_entries = pkl.load(f) for i, pred_entry in enumerate(tqdm(all_pred_entries)): gt_entry = {
pred_entry = { 'pred_boxes': boxes_i * BOX_SCALE / IM_SCALE, 'pred_classes': objs_i, 'pred_rel_inds': rels_i, 'obj_scores': obj_scores_i, 'rel_scores': pred_scores_i, } all_pred_entries.append(pred_entry) evaluator[conf.mode].evaluate_scene_graph_entry( gt_entry, pred_entry, ) evaluator = BasicSceneGraphEvaluator.all_modes(multiple_preds=conf.multi_pred) if conf.cache is not None and os.path.exists(conf.cache): print("Found {}! Loading from it".format(conf.cache)) with open(conf.cache, 'rb') as f: all_pred_entries = pkl.load(f) for i, pred_entry in enumerate(tqdm(all_pred_entries)): gt_entry = { 'gt_classes': val.gt_classes[i].copy(), 'gt_relations': val.relationships[i].copy(), 'gt_boxes': val.gt_boxes[i].copy(), } evaluator[conf.mode].evaluate_scene_graph_entry( gt_entry, pred_entry, )
def main(): args = 'X -m predcls -model motifnet -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -test -ckpt checkpoints/vgrel-motifnet-sgcls.tar -nepoch 50 -use_bias -multipred -cache motifnet_predcls1' sys.argv = args.split(' ') conf = ModelConfig() if conf.model == 'motifnet': from lib.rel_model import RelModel elif conf.model == 'stanford': from lib.rel_model_stanford import RelModelStanford as RelModel else: raise ValueError() train, val, test = VG.splits( num_val_im=conf.val_size, filter_duplicate_rels=True, use_proposals=conf.use_proposals, filter_non_overlap=conf.mode == 'sgdet', ) if conf.test: val = test train_loader, val_loader = VGDataLoader.splits( train, val, mode='rel', batch_size=conf.batch_size, num_workers=conf.num_workers, num_gpus=conf.num_gpus ) detector = RelModel( classes=train.ind_to_classes, rel_classes=train.ind_to_predicates, num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True, use_resnet=conf.use_resnet, order=conf.order, nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim, use_proposals=conf.use_proposals, pass_in_obj_feats_to_decoder=conf.pass_in_obj_feats_to_decoder, pass_in_obj_feats_to_edge=conf.pass_in_obj_feats_to_edge, pooling_dim=conf.pooling_dim, rec_dropout=conf.rec_dropout, use_bias=conf.use_bias, use_tanh=conf.use_tanh, limit_vision=conf.limit_vision ) detector.cuda() ckpt = torch.load(conf.ckpt) optimistic_restore(detector, ckpt['state_dict']) evaluator = BasicSceneGraphEvaluator.all_modes( multiple_preds=conf.multi_pred) mode, N = 'test.multi_pred', 20 recs = pkl.load(open('{}.{}.pkl'.format(mode, N), 'rb')) np.random.seed(0) # sorted_idxs = np.argsort(recs) selected_idxs = np.random.choice(range(len(recs)), size=100, replace=False) sorted_idxs = selected_idxs[np.argsort(np.array(recs)[selected_idxs])] print('Sorted idxs: {}'.format(sorted_idxs.tolist())) save_dir = '/nethome/bamos/2018-intel/data/2018-07-31/sgs.multi' for idx in selected_idxs: gt_entry = { 'gt_classes': val.gt_classes[idx].copy(), 'gt_relations': val.relationships[idx].copy(), 'gt_boxes': val.gt_boxes[idx].copy(), } detector.eval() det_res = detector[vg_collate([test[idx]], num_gpus=1)] boxes_i, objs_i, obj_scores_i, rels_i, pred_scores_i = det_res pred_entry = { 'pred_boxes': boxes_i * BOX_SCALE/IM_SCALE, 'pred_classes': objs_i, 'pred_rel_inds': rels_i, 'obj_scores': obj_scores_i, 'rel_scores': pred_scores_i, } unique_cnames = get_unique_cnames(gt_entry, test) save_img(idx, recs, test, gt_entry, det_res, unique_cnames, save_dir) save_gt_graph(idx, test, gt_entry, det_res, unique_cnames, save_dir) save_pred_graph(idx, test, pred_entry, det_res, unique_cnames, save_dir, multi_pred=conf.multi_pred, n_pred=20)
def val_epoch(loader, name, n_batches=-1, is_test=False): print('\nEvaluate %s %s triplets' % (name.upper(), 'test' if is_test else 'val')) detector.eval() evaluator, all_pred_entries, all_metrics = {}, {}, [] with NO_GRAD(): for eval_m in EVAL_MODES: if eval_m == 'sgdet' and name.find('val_') >= 0: continue # skip for validation, because it takes a lot of time print('\nEvaluating %s...' % eval_m.upper()) evaluator[eval_m] = BasicSceneGraphEvaluator( eval_m) # graph constrained evaluator evaluator[eval_m + '_nogc'] = BasicSceneGraphEvaluator( eval_m, multiple_preds=True, # graph unconstrained evaluator per_triplet=name not in ['val_zs', 'test_zs'], triplet_counts=train.triplet_counts, triplet2str=train_loader.dataset.triplet2str) # for calculating recall of each relationship except no relationship evaluator_list, evaluator_multiple_preds_list = [], [] if name not in ['val_zs', 'test_zs'] and name.find('val_') < 0: for index, name_s in enumerate(train.ind_to_predicates): if index == 0: continue evaluator_list.append( (index, name_s, BasicSceneGraphEvaluator.all_modes())) evaluator_multiple_preds_list.append( (index, name_s, BasicSceneGraphEvaluator.all_modes( multiple_preds=True))) set_mode(detector, mode=eval_m, is_train=False, conf=conf, verbose=True) # For all val/test batches all_pred_entries[eval_m] = [] for val_b, batch in enumerate(tqdm(loader)): pred_entry = val_batch(conf.num_gpus * val_b, batch, evaluator, eval_m, loader.dataset, evaluator_list, evaluator_multiple_preds_list) if not conf.nosave: all_pred_entries[eval_m].extend(pred_entry) if n_batches > -1 and val_b + 1 >= n_batches: break evaluator[eval_m].print_stats() evaluator[eval_m + '_nogc'].print_stats() mean_recall = mean_recall_mp = None if len(evaluator_list) > 0: # Compute Mean Recall Results mean_recall = calculate_mR_from_evaluator_list(evaluator_list, eval_m, save_file=None) mean_recall_mp = calculate_mR_from_evaluator_list( evaluator_multiple_preds_list, eval_m, multiple_preds=True, save_file=None) if not conf.wandb_log: continue # Log using WANDB eval_gc = evaluator[eval_m].result_dict eval_no_gc = evaluator[eval_m + '_nogc'].result_dict results_dict = {} for eval_, mean_eval, sfx in zip([eval_gc, eval_no_gc], [mean_recall, mean_recall_mp], ['GC', 'NOGC']): for k, v in eval_[eval_m + '_recall'].items(): all_metrics.append(np.mean(v)) results_dict['%s/%s_R@%i_%s' % (eval_m, name, k, sfx)] = np.mean(v) if mean_eval: for k, v in mean_eval.items(): results_dict['%s/%s_m%s_%s' % (eval_m, name, k, sfx)] = np.mean(v) # Per triplet metrics if name not in ['val_zs', 'test_zs']: for case in ['', '_norm']: for k, v in eval_no_gc[eval_m + '_recall_triplet' + case].items(): results_dict['%s/%s_R@%i_triplet%s' % (eval_m, name, k, case)] = v for metric in ['meanrank', 'medianrank'] + ( ['medianrankclass'] if case == '' else []): results_dict['%s/%s_%s_triplet%s' % (eval_m, name, metric, case)] = \ eval_no_gc[eval_m + ('_%s_triplet' % metric) + case] conf.wandb_log(results_dict, step=detector.global_batch_iter, is_summary=True, log_repeats=5 if is_test else 1)
'pred_rel_inds': rels_i, 'obj_scores': obj_scores_i, 'rel_scores': pred_scores_i, # hack for now. 'rel_rank_scores': rel_rank_scores_i, 'forest': forest } all_pred_entries.append(pred_entry) evaluator[conf.mode].evaluate_scene_graph_entry( gt_entry, pred_entry, ) evaluator = BasicSceneGraphEvaluator.all_modes( multiple_preds=conf.multi_pred, num_predicates=80 if conf.vg200 or conf.vg200_kr else 50) if conf.cache is not None and os.path.exists(conf.cache): print("Found {}! Loading from it".format(conf.cache)) with open(conf.cache, 'rb') as f: all_pred_entries = pkl.load(f) for i, pred_entry in enumerate(tqdm(all_pred_entries)): gt_entry = { 'gt_classes': val.gt_classes[i].copy(), 'gt_relations': val.relationships[i].copy(), 'gt_boxes': val.gt_boxes[i].copy(), 'gt_key_rels': val.key_rel_idxes[i].copy() if conf.vg200_kr else None } evaluator[conf.mode].evaluate_scene_graph_entry(