def test(): model.load(os.path.join(save_dir, 'model.tar')) model.build_test_loader() accm = Accumulator(*model.test_metrics) logger = get_logger(exp_id, os.path.join(save_dir, 'test.log')) model.test(accm) logger.info(accm.info(header='test'))
def test(accm=None, verbose=True): net.eval() accm = Accumulator(*model.metrics) if accm is None else accm with torch.no_grad(): for i, batch in enumerate(test_loader): accm.update(model.loss_fn(batch, train=False)) line = accm.info(header='test') if verbose: logger = get_logger(exp_id, os.path.join(save_dir, 'test.log')) logger.info(line) return line
def train(): if not os.path.isdir(save_dir): os.makedirs(save_dir) # save hyperparams with open(os.path.join(save_dir, 'args.json'), 'w') as f: json.dump(args.__dict__, f, sort_keys=True, indent=4) model.build_train_loader() model.build_test_loader() model.build_optimizer() train_accm = Accumulator(*model.train_metrics) test_accm = Accumulator(*model.test_metrics) logger = get_logger( exp_id, os.path.join(save_dir, 'train_' + time.strftime('%Y%m%d-%H%M') + '.log')) for t, batch in enumerate(model.train_loader, 1): model.train_batch(batch, train_accm) if t % args.test_freq == 0: line = 'step {}, '.format(t) line += model.get_lr_string() + ', ' line += train_accm.info(header='train', show_et=False) model.test(test_accm) line += test_accm.info(header='test', ) logger.info(line) train_accm.reset() test_accm.reset() if t % args.save_freq == 0: model.save(os.path.join(save_dir, 'model.tar')) model.save(os.path.join(save_dir, 'model.tar'))
from utils.log import get_logger, Accumulator from utils.paths import benchmarks_path, results_path from utils.tensor import to_numpy from utils.plots import scatter, scatter_mog import matplotlib.pyplot as plt parser = argparse.ArgumentParser() parser.add_argument('--benchmarkfile', type=str, default='mog_10_1000_4.tar') parser.add_argument('--filename', type=str, default='test.log') args, _ = parser.parse_known_args() print(str(args)) benchmark = torch.load(os.path.join(benchmarks_path, args.benchmarkfile)) accm = Accumulator('ari', 'nmi', 'et') for batch in tqdm(benchmark): B = batch['X'].shape[0] for b in range(B): X = to_numpy(batch['X'][b]) true_labels = to_numpy(batch['labels'][b].argmax(-1)) true_K = len(np.unique(true_labels)) tick = time.time() spec = SpectralClustering(n_clusters=true_K, affinity='nearest_neighbors', n_neighbors=10).fit(X) labels = spec.labels_ accm.update([ ARI(true_labels, labels),
from utils.plots import scatter, scatter_mog import matplotlib.pyplot as plt import time parser = argparse.ArgumentParser() parser.add_argument('--benchmarkfile', type=str, default='mog_10_1000_4.tar') parser.add_argument('--k_max', type=int, default=6) parser.add_argument('--filename', type=str, default='test.log') args, _ = parser.parse_known_args() print(str(args)) benchmark = torch.load(os.path.join(benchmarks_path, args.benchmarkfile)) vbmog = VBMOG(args.k_max) accm = Accumulator('model ll', 'oracle ll', 'ARI', 'NMI', 'k-MAE', 'et') for dataset in tqdm(benchmark): true_labels = to_numpy(dataset['labels'].argmax(-1)) X = to_numpy(dataset['X']) ll = 0 ari = 0 nmi = 0 mae = 0 et = 0 for b in range(len(X)): tick = time.time() vbmog.run(X[b], verbose=False) et += time.time() - tick ll += vbmog.loglikel(X[b]) labels = vbmog.labels() ari += ARI(true_labels[b], labels)
def train(): if not os.path.isdir(save_dir): os.makedirs(save_dir) model.load_from_ckpt() # save hyperparams with open(os.path.join(save_dir, 'args.json'), 'w') as f: json.dump(args.__dict__, f, sort_keys=True, indent=4) optimizer, scheduler = model.build_optimizer() logger = get_logger(exp_id, os.path.join(save_dir, 'train_'+time.strftime('%Y%m%d-%H%M')+'.log')) accm = Accumulator(*model.metrics) train_accm = Accumulator('loss') tick = time.time() for t, batch in enumerate(train_loader, 1): net.train() optimizer.zero_grad() loss = model.loss_fn(batch) loss.backward() nn.utils.clip_grad_norm_(net.parameters(), args.clip) optimizer.step() scheduler.step() train_accm.update(loss.item()) if t % args.test_freq == 0: line = 'step {}, lr {:.3e}, train loss {:.4f}, '.format( t, optimizer.param_groups[0]['lr'], train_accm.get('loss')) line += test(accm=accm, verbose=False) logger.info(line) accm.reset() train_accm.reset() if t % args.save_freq == 0: if args.save_all: torch.save(net.state_dict(), os.path.join(save_dir, 'model{}.tar'.format(t))) torch.save(net.state_dict(), os.path.join(save_dir, 'model.tar')) torch.save(net.state_dict(), os.path.join(save_dir, 'model.tar'))
from utils.log import Accumulator from utils.paths import benchmarks_path, datasets_path from utils.tensor import to_numpy from data.kkanji import KKanji from data.clustered_dataset import get_saved_cluster_loader import torchvision.transforms as tvt transform = tvt.Normalize(mean=[0.2170], std=[0.3787]) dataset = KKanji(os.path.join(datasets_path, 'kkanji'), train=False, transform=transform) filename = os.path.join(benchmarks_path, 'kkanji_10_300_12.tar') loader = get_saved_cluster_loader(dataset, filename, classes=range(700, 813)) accm = Accumulator('ari', 'k-mae') for batch in tqdm(loader): B = batch['X'].shape[0] for b in range(B): X = to_numpy(batch['X'][b]).reshape(-1, 784) true_labels = to_numpy(batch['labels'][b].argmax(-1)) true_K = len(np.unique(true_labels)) # KMeans kmeans = KMeans(n_clusters=true_K).fit(X) labels = kmeans.labels_ # Spectral #spec = SpectralClustering(n_clusters=true_K).fit(X) #labels = spec.labels_
res = res.clamp_min_(1e-30).sqrt_() return res if not hasattr(model, 'cluster'): raise ValueError('Model is not for clustering') save_dir = os.path.join(results_path, module_name, args.run_name) net = model.net.cuda() # net.load_state_dict(torch.load(os.path.join(save_dir, 'model.tar'))) net.load_state_dict( torch.load(os.path.join(save_dir, 'originalDAC_fullytrained.tar'))) net.eval() test_loader = model.get_test_loader(filename=model.clusterfile) accm = Accumulator('model ll', 'oracle ll', 'ARI', 'NMI', 'k-MAE') num_failure = 0 logger = get_logger('{}_{}'.format(module_name, args.run_name), os.path.join(save_dir, args.filename)) all_correct_counts = [] all_distances = [] for batch in tqdm(test_loader): params, labels, ll, fail = model.cluster(batch['X'].cuda(), max_iter=args.max_iter, verbose=False, check=True) true_labels = to_numpy(batch['labels'].argmax(-1)) ari = 0 nmi = 0 mae = 0 for b in range(len(labels)):
loss = -flow.log_prob(X).mean() loss.backward() nn.utils.clip_grad_norm_(flow.parameters(), args.clip) if i % 1000 == 0: print('iter {}, lr {:.3e}, ll {}'.format( i, optimizer.param_groups[0]['lr'], -loss.item())) optimizer.step() scheduler.step() return flow.log_prob(X).mean() benchmark = torch.load(os.path.join(benchmarks_path, args.benchmarkfile)) accm = Accumulator('ll') save_dir = os.path.join(results_path, 'baselines', 'maf') if not os.path.isdir(save_dir): os.makedirs(save_dir) logger = get_logger('baseline_maf', os.path.join(save_dir, args.filename)) for i, dataset in enumerate(benchmark[:10], 1): X = dataset['X'].cuda() for Xb in X: accm.update(train_maf(Xb)) print() print('dataset {} done, avg ll {}'.format(i, accm.get('ll'))) print() logger.info(accm.info()) logger.info(accm.info())