Esempio n. 1
0
def test():
    model.load(os.path.join(save_dir, 'model.tar'))
    model.build_test_loader()
    accm = Accumulator(*model.test_metrics)
    logger = get_logger(exp_id, os.path.join(save_dir, 'test.log'))
    model.test(accm)
    logger.info(accm.info(header='test'))
Esempio n. 2
0
File: run.py Progetto: mlzxy/dac
def test(accm=None, verbose=True):
    net.eval()
    accm = Accumulator(*model.metrics) if accm is None else accm
    with torch.no_grad():
        for i, batch in enumerate(test_loader):
            accm.update(model.loss_fn(batch, train=False))

    line = accm.info(header='test')
    if verbose:
        logger = get_logger(exp_id, os.path.join(save_dir, 'test.log'))
        logger.info(line)
    return line
Esempio n. 3
0
def train():
    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)

    # save hyperparams
    with open(os.path.join(save_dir, 'args.json'), 'w') as f:
        json.dump(args.__dict__, f, sort_keys=True, indent=4)

    model.build_train_loader()
    model.build_test_loader()
    model.build_optimizer()

    train_accm = Accumulator(*model.train_metrics)
    test_accm = Accumulator(*model.test_metrics)
    logger = get_logger(
        exp_id,
        os.path.join(save_dir,
                     'train_' + time.strftime('%Y%m%d-%H%M') + '.log'))

    for t, batch in enumerate(model.train_loader, 1):
        model.train_batch(batch, train_accm)

        if t % args.test_freq == 0:
            line = 'step {}, '.format(t)
            line += model.get_lr_string() + ', '
            line += train_accm.info(header='train', show_et=False)
            model.test(test_accm)
            line += test_accm.info(header='test', )
            logger.info(line)
            train_accm.reset()
            test_accm.reset()

        if t % args.save_freq == 0:
            model.save(os.path.join(save_dir, 'model.tar'))

    model.save(os.path.join(save_dir, 'model.tar'))
Esempio n. 4
0
from utils.log import get_logger, Accumulator
from utils.paths import benchmarks_path, results_path
from utils.tensor import to_numpy
from utils.plots import scatter, scatter_mog
import matplotlib.pyplot as plt

parser = argparse.ArgumentParser()
parser.add_argument('--benchmarkfile', type=str, default='mog_10_1000_4.tar')
parser.add_argument('--filename', type=str, default='test.log')

args, _ = parser.parse_known_args()
print(str(args))

benchmark = torch.load(os.path.join(benchmarks_path, args.benchmarkfile))
accm = Accumulator('ari', 'nmi', 'et')
for batch in tqdm(benchmark):
    B = batch['X'].shape[0]
    for b in range(B):
        X = to_numpy(batch['X'][b])
        true_labels = to_numpy(batch['labels'][b].argmax(-1))
        true_K = len(np.unique(true_labels))

        tick = time.time()
        spec = SpectralClustering(n_clusters=true_K,
                                  affinity='nearest_neighbors',
                                  n_neighbors=10).fit(X)
        labels = spec.labels_

        accm.update([
            ARI(true_labels, labels),
Esempio n. 5
0
from utils.plots import scatter, scatter_mog
import matplotlib.pyplot as plt
import time

parser = argparse.ArgumentParser()
parser.add_argument('--benchmarkfile', type=str, default='mog_10_1000_4.tar')
parser.add_argument('--k_max', type=int, default=6)
parser.add_argument('--filename', type=str, default='test.log')

args, _ = parser.parse_known_args()
print(str(args))

benchmark = torch.load(os.path.join(benchmarks_path, args.benchmarkfile))

vbmog = VBMOG(args.k_max)
accm = Accumulator('model ll', 'oracle ll', 'ARI', 'NMI', 'k-MAE', 'et')
for dataset in tqdm(benchmark):
    true_labels = to_numpy(dataset['labels'].argmax(-1))
    X = to_numpy(dataset['X'])
    ll = 0
    ari = 0
    nmi = 0
    mae = 0
    et = 0
    for b in range(len(X)):
        tick = time.time()
        vbmog.run(X[b], verbose=False)
        et += time.time() - tick
        ll += vbmog.loglikel(X[b])
        labels = vbmog.labels()
        ari += ARI(true_labels[b], labels)
Esempio n. 6
0
File: run.py Progetto: mlzxy/dac
def train():
    if not os.path.isdir(save_dir):
        os.makedirs(save_dir)

    model.load_from_ckpt()

    # save hyperparams
    with open(os.path.join(save_dir, 'args.json'), 'w') as f:
        json.dump(args.__dict__, f, sort_keys=True, indent=4)

    optimizer, scheduler = model.build_optimizer()
    logger = get_logger(exp_id, os.path.join(save_dir,
        'train_'+time.strftime('%Y%m%d-%H%M')+'.log'))
    accm = Accumulator(*model.metrics)
    train_accm = Accumulator('loss')

    tick = time.time()
    for t, batch in enumerate(train_loader, 1):
        net.train()
        optimizer.zero_grad()
        loss = model.loss_fn(batch)
        loss.backward()
        nn.utils.clip_grad_norm_(net.parameters(), args.clip)
        optimizer.step()
        scheduler.step()
        train_accm.update(loss.item())

        if t % args.test_freq == 0:
            line = 'step {}, lr {:.3e}, train loss {:.4f}, '.format(
                    t, optimizer.param_groups[0]['lr'], train_accm.get('loss'))
            line += test(accm=accm, verbose=False)
            logger.info(line)
            accm.reset()
            train_accm.reset()

        if t % args.save_freq == 0:
            if args.save_all:
                torch.save(net.state_dict(),
                        os.path.join(save_dir, 'model{}.tar'.format(t)))
            torch.save(net.state_dict(), os.path.join(save_dir, 'model.tar'))

    torch.save(net.state_dict(), os.path.join(save_dir, 'model.tar'))
Esempio n. 7
0
from utils.log import Accumulator
from utils.paths import benchmarks_path, datasets_path
from utils.tensor import to_numpy

from data.kkanji import KKanji
from data.clustered_dataset import get_saved_cluster_loader
import torchvision.transforms as tvt

transform = tvt.Normalize(mean=[0.2170], std=[0.3787])
dataset = KKanji(os.path.join(datasets_path, 'kkanji'),
                 train=False,
                 transform=transform)
filename = os.path.join(benchmarks_path, 'kkanji_10_300_12.tar')
loader = get_saved_cluster_loader(dataset, filename, classes=range(700, 813))
accm = Accumulator('ari', 'k-mae')

for batch in tqdm(loader):
    B = batch['X'].shape[0]
    for b in range(B):
        X = to_numpy(batch['X'][b]).reshape(-1, 784)
        true_labels = to_numpy(batch['labels'][b].argmax(-1))
        true_K = len(np.unique(true_labels))

        # KMeans
        kmeans = KMeans(n_clusters=true_K).fit(X)
        labels = kmeans.labels_

        # Spectral
        #spec = SpectralClustering(n_clusters=true_K).fit(X)
        #labels = spec.labels_
Esempio n. 8
0
    res = res.clamp_min_(1e-30).sqrt_()
    return res


if not hasattr(model, 'cluster'):
    raise ValueError('Model is not for clustering')

save_dir = os.path.join(results_path, module_name, args.run_name)
net = model.net.cuda()

# net.load_state_dict(torch.load(os.path.join(save_dir, 'model.tar')))
net.load_state_dict(
    torch.load(os.path.join(save_dir, 'originalDAC_fullytrained.tar')))
net.eval()
test_loader = model.get_test_loader(filename=model.clusterfile)
accm = Accumulator('model ll', 'oracle ll', 'ARI', 'NMI', 'k-MAE')
num_failure = 0
logger = get_logger('{}_{}'.format(module_name, args.run_name),
                    os.path.join(save_dir, args.filename))
all_correct_counts = []
all_distances = []
for batch in tqdm(test_loader):
    params, labels, ll, fail = model.cluster(batch['X'].cuda(),
                                             max_iter=args.max_iter,
                                             verbose=False,
                                             check=True)
    true_labels = to_numpy(batch['labels'].argmax(-1))
    ari = 0
    nmi = 0
    mae = 0
    for b in range(len(labels)):
Esempio n. 9
0
File: run_maf.py Progetto: mlzxy/dac
        loss = -flow.log_prob(X).mean()
        loss.backward()
        nn.utils.clip_grad_norm_(flow.parameters(), args.clip)

        if i % 1000 == 0:
            print('iter {}, lr {:.3e}, ll {}'.format(
                i, optimizer.param_groups[0]['lr'], -loss.item()))

        optimizer.step()
        scheduler.step()

    return flow.log_prob(X).mean()


benchmark = torch.load(os.path.join(benchmarks_path, args.benchmarkfile))
accm = Accumulator('ll')
save_dir = os.path.join(results_path, 'baselines', 'maf')
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
logger = get_logger('baseline_maf', os.path.join(save_dir, args.filename))

for i, dataset in enumerate(benchmark[:10], 1):
    X = dataset['X'].cuda()
    for Xb in X:
        accm.update(train_maf(Xb))
        print()
    print('dataset {} done, avg ll {}'.format(i, accm.get('ll')))
    print()
    logger.info(accm.info())
logger.info(accm.info())