def evaluation(cfg): logger = logging.getLogger("JigsawPuzzle") if torch.cuda.is_available(): device = "cuda" torch.backends.cudnn.benchmark = True else: device = "cpu" if cfg.dataset == "MNIST": in_c = 1 else: in_c = 3 model = ConvModel(in_c, cfg.pieces, cfg.image_size, cfg.hid_c, cfg.stride, cfg.kernel_size) model.load_state_dict(torch.load(os.path.join(cfg.out_dir, "model.pth"))) model = model.to(device) eval_data = build_dataset(cfg, split="test") loader = DataLoader(eval_data, cfg.batch_size, shuffle=False, num_workers=cfg.num_workers, drop_last=False) with torch.no_grad(): l1_diffs = [] l2_diffs = [] prop_wrongs = [] prop_any_wrongs = [] kendall_taus = [] logger.info("start evaluation") for data in loader: inputs, _ = data pieces, random_pieces, perm_index = batch_tch_divide_image(inputs, cfg.pieces) pieces, random_pieces = pieces.to(device), random_pieces.to(device) log_alpha = model(random_pieces) gumbel_matching_mat = gumbel_sinkhorn_ops.gumbel_matching(log_alpha, noise=False) hard_sorted_pieces = gumbel_sinkhorn_ops.inverse_permutation_for_image(random_pieces, gumbel_matching_mat) est_perm_index = gumbel_matching_mat.max(1)[1].float() hard_l1_diff = (pieces - hard_sorted_pieces).abs().mean((2,3,4)) # (batchsize, num_pieces) hard_l2_diff = (pieces - hard_sorted_pieces).pow(2).mean((2,3,4)) sign_l1_diff = hard_l1_diff.sign() prop_wrong = sign_l1_diff.mean(1) prop_any_wrong = sign_l1_diff.sum(1).sign() np_perm_index = perm_index.detach().numpy() np_est_perm_index = est_perm_index.to("cpu").numpy() kendall_tau = metric.kendall_tau(np_est_perm_index, np_perm_index) l1_diffs.append(hard_l1_diff); l2_diffs.append(hard_l2_diff) prop_wrongs.append(prop_wrong); prop_any_wrongs.append(prop_any_wrong) kendall_taus.append(kendall_tau) mean_l1_diff = torch.cat(l1_diffs).mean() mean_l2_diff = torch.cat(l2_diffs).mean() mean_prop_wrong = torch.cat(prop_wrongs).mean() mean_prop_any_wrong = torch.cat(prop_any_wrongs).mean() mean_kendall_tau = np.concatenate(kendall_taus).mean() logger.info("\nmean l1 diff : %f\n mean l2 diff : %f\n mean prop wrong : %f\n mean prop any wrong : %f\n mean kendall tau : %f", mean_l1_diff, mean_l2_diff, mean_prop_wrong, mean_prop_any_wrong, mean_kendall_tau )
if not os.path.exists('./c/'): os.makedirs('./c/') dataset = Dataset(args) change_itr = range(8000, 100000, 4000) logger = Logger('./logs/' + args.model_name) if args.env_name == 'bimgame': model = ConvModel(3, args.num_subgoals, use_rnn=False).to(device) else: model = MLPModel(46, args.num_subgoals, use_rnn=False).to(device) start_itr = 0 c = [] if args.one_class: if args.pretrained_ckpt is not None: model.load_state_dict( torch.load('./ckpt/' + args.pretrained_ckpt + '.pkl')) start_itr = np.load('./iter_num/' + args.pretrained_ckpt + '.npy') c = torch.from_numpy( np.load('./c/' + args.pretrained_ckpt + '.npy')).float().to(device) # computing initial c for one-class out-of-set estimation if len(c) == 0: c = get_c(dataset, model, args) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.0005) for itr in range(start_itr, args.max_iter): train(itr, dataset, args, model, optimizer, logger, device, c) if itr % 500 == 0: torch.save(model.state_dict(), './ckpt/' + args.model_name + '.pkl')