def is_derailed(model, load_model_for_check=False): from cvae import ClassificationVariationalNetwork if isinstance(model, dict): directory = model['dir'] elif isinstance(model, str): directory = model else: directory = model.saved_dir if os.path.exists(os.path.join(directory, 'derailed')): return True elif load_model_for_check: try: model = ClassificationVariationalNetwork.load(directory) if torch.cuda.is_available(): model.to('cuda') x = torch.zeros(1, *model.input_shape, device=model.device) model.evaluate(x) except ValueError: return True return False
def collect_models(directory, wanted_epoch='last', load_state=True, tpr_for_max=0.95, **default_load_paramaters): from cvae import ClassificationVariationalNetwork if 'dump' in directory: return assert wanted_epoch == 'last' or not load_state try: logging.debug(f'Loading net in: {directory}') model = ClassificationVariationalNetwork.load(directory, load_state=load_state, **default_load_paramaters) return make_dict_from_model(model, directory, tpr=tpr_for_max, wanted_epoch=wanted_epoch) except (FileNotFoundError, PermissionError, NoModelError) as e: pass except RuntimeError as e: logging.warning(f'Load error in {directory} see log file') logging.debug(f'Load error: {e}')
def needed_remote_files(*mdirs, epoch='last', which_rec='all', state=False): r""" list missing recorders to be fetched on a remote -- mdirs: list of directories -- epoch: last or min-loss or int -- which_rec: either 'none' 'ind' or 'all' -- state: wehter to include state.pth returns generator of needed files paths """ assert not state or epoch == 'last' from cvae import ClassificationVariationalNetwork as M for d in mdirs: m = M.load(d, load_net=False) epoch_ = epoch if epoch_ == 'min-loss': epoch_ = m.training_parameters.get('early-min-loss', 'last') if epoch_ == 'last': epoch_ = max(m.testing) if isinstance(epoch_, int): epoch_ = '{:04d}'.format(epoch_) testset = m.training_parameters['set'] sets = [] recs_to_exclude = which_rec.split('-')[1:] which_rec_ = which_rec.split('-')[0] if which_rec_ in ('all', 'ind'): sets.append(testset) if which_rec_ == 'all': sets += get_same_size_by_name(testset) for _ in [_ for _ in recs_to_exclude if _ in sets]: sets.remove(_) for s in sets: sdir = os.path.join(d, 'samples', epoch_, 'record-{}.pth'.format(s)) if not os.path.exists(sdir): yield d, sdir if state: sdir = os.path.join(d, 'state.pth') if not os.path.exists(sdir): yield d, sdir
def gather_registered_models(mdict, filter, tpr_for_max=0.95, wanted_epoch='last', **kw): from cvae import ClassificationVariationalNetwork mlist = [] for _ in mdict: if filter is None or filter.filter(mdict[_]): m = ClassificationVariationalNetwork.load(_, **kw) mlist.append(make_dict_from_model(m, _, tpr=tpr_for_max, wanted_epoch=wanted_epoch)) return mlist
def load(cls, dir_name, *a, **kw): architecture = save_load.load_json(dir_name, 'params.json') models = [architecture[str(_)] for _ in range(len(architecture))] m = cls(*[M.load(_, *a, **kw) for _ in models]) try: m.testing = save_load.load_json(dir_name, 'test.json', presumed_type=int) except (FileNotFoundError): pass try: m.ood_results = save_load.load_json(dir_name, 'ood.json', presumed_type=int) except (FileNotFoundError): pass m.saved_dir = dir_name return m
logging.info('{} model{} over {}'.format(len(mdirs), 's' if len(mdirs) > 1 else '', total_models)) if removed: logging.error('Exiting, load files') logging.error( 'E.g: %s', '$ rsync -avP --files-from=/tmp/files remote:dir/joint-vae .') logging.error(' Or: %s', '$ . /tmp/rsync-files remote:dir/joint-vae') with open('/tmp/rsync-files', 'w') as f: f.write('#!/bin/bash\n') f.write('rsync -avP --files-from=/tmp/files $1 .\n') sys.exit(1) models = [M.load(d, load_state=True) for d in mdirs] model = IteratedModels(*models) device = args.device model.to(device) logging.debug('Model sent to {} (device wanted: {})'.format( next(iter(model.parameters())), device)) testset = model.training_parameters['set'] allsets = [testset] allsets.extend(get_same_size_by_name(testset)) transformer = model.training_parameters['transformer']
n_by_rep = dict(hsv=0, rgb=0) as_in = {'ind': {}, 'correct': {}} pr = {'ind': {}, 'correct': {}} y_classif = {} classif_acc = {} agreement = {} distribution = {} y_true = None for mdir in mdirs: model = M.load(mdir, load_net=False) rep = model.architecture['representation'] name = rep.upper() + str(n_by_rep[rep]) name = str(model.job_number) n_by_rep[rep] += 1 current_testset = model.training_parameters['set'] if testset and current_testset != testset: continue else: testset = current_testset if n_by_rep[rep] > max_per_rep: continue if args.when == 'min-loss':
from cvae import ClassificationVariationalNetwork as M from utils.save_load import available_results, make_dict_from_model, LossRecorder, find_by_job_number, load_json import logging logging.getLogger().setLevel(logging.DEBUG) mdir = '/tmp/000033' mdir = '/tmp/000186' mdir = '/tmp/151320' mdir = '/tmp/151024' mdir = '/tmp/151409' mdir = '/tmp/151020' mdir = '/tmp/148722' print('Loading') model = M.load(mdir, load_state=False) new = M((3, 32, 32), 10, type_of_net='vib') print('Loaded') """ acc = {} acc = {_: model.accuracy(wygiwyu=True, wanted_epoch=_) for _ in (0, 10, 200, 'last')} print(acc) """ # model.trained = 2000 # model.testing[2000].pop('iws') # model.ood_results.pop(2000)
args.add_argument('--soft', choices=['kl', 'iws'], default='default') args.add_argument('--hard', choices=['kl', 'iws']) args.add_argument('--entropy', '-H', action='store_true') args.add_argument('--elbo', action='store_true') args.add_argument('--baseline', action='store_true') args.add_argument('--2s', action='store_true', dest='two_sided') args.add_argument('--print', action='store_true') a = args.parse_args() j = a.j ood_tpr = a.ood_tpr / 100 mis_tpr = a.mis_tpr / 100 reload = False if a.direct_load: net = Model.load(a.direct_load, load_state=False) print(net.job_number, 'loaded') try: reload = net.job_number != j and not a.direct_load except NameError: reload = True if reload: net = find_by_job_number(j, load_state=False)['net'] dir_path = os.path.join(net.saved_dir, 'samples', 'last') testset = net.training_parameters['set'] if net.type == 'vib': pass # a.plot = False
for job_number in job_numbers: jobs[job_number] except (NameError, KeyError): print('Loading jobs') reload = True recompute = True if reload: jobs = find_by_job_number(search_dir, *job_numbers, load_state=False) #, json_file='networks-lss.json') to_be_removed = [] for job_number in jobs: try: jobs[job_number]['net'] = ClassificationVariationalNetwork.load( jobs[job_number]['dir']) except RuntimeError: print(f'Error loading {job_number}') to_be_removed.append(job_number) for job_number in to_be_removed: jobs.pop(job_number) fgrid = {} fexamples = {} food = {} foodexamples = {} fmuvar = {} fhist = {} fx_ = {}
from matplotlib import pyplot as plt logging.getLogger().setLevel(logging.WARNING) compute = False compute = True if compute: j = 107495 j = 108183 j = 37 load_dir = find_by_job_number('./jobs', j, load_net=False)[j]['dir'] print('Load net', end='') net = ClassificationVariationalNetwork.load(load_dir, load_state=True) print(' to gpu') net.to('cuda') net.latent_sampling = 16 print('Getting sets') trainset_name = net.training['set'] trainset, testset = dl.get_dataset(trainset_name, transformer=net.training['transformer']) oodsets = [dl.get_dataset(n)[1] for n in testset.same_size] oodset = oodsets[0] batch_size = 200 n_batch = 1000 // batch_size loader = torch.utils.data.DataLoader(testset,
from itertools import product C = 2 D = (1, 28, 28) K = 9 L = int(1e4) N = (4,) x = torch.randn(*N, *D) y = torch.randint(0, C, N) type_ = 'vae' beta = 1e-2 net = Net(D, C, latent_dim=K, latent_sampling=L, beta=beta, type_of_net=type_) _x_, logit_, mu_z, lv_z, z_ = net.forward(x, y) print(logit_.shape) y_ = F.softmax(logit_, -1) loss_ = net.loss(x, y, _x_, y_, mu_z, lv_z, return_all_losses=True) _x, logit, loss = net.evaluate(x, return_all_losses=True) print(logit.shape) if type_ != 'vae': y_pred = net.predict_after_evaluate(logit, loss)
oodsets = test_dataset.same_size for o in oodsets: _, ood_dataset = tl.get_dataset(o, transformer=transformer) x[o], y[o] = tl.get_batch(ood_dataset, device=device, batch_size=max(z_sample, N)) if not L: L = args.total_width // (1 + len(x)) for n in list_of_nets: logging.info('loading state of %s', n['job']) model = Net.load(n['dir']) model.to(device) logging.info('done') logging.info('Compute max batch size') batch_size = min( m, model.compute_max_batch_size(batch_size=m, which='test')) logging.info(f'done ({batch_size})') for s in x: logging.info('sampling %s', s) if N: list_of_images = sample(model, x[s][:N],
y_coded = True y_coded = False if y_coded: types = ('jvae', 'xvae') for ntype in types: print('TYPE:', ntype) n = Net(D, C, type_of_net=ntype, y_is_coded=y_coded and ntype not in ('vib', 'vae'), batch_norm='encoder', features='vgg16', encoder_layer_sizes=[], decoder_layer_sizes=[], classifier_layer_sizes=cls_cvae if ntype == 'cvae' else [20, 10], sigma=0, gamma=gamma, force_cross_y=0, latent_sampling=L, latent_dim=K) n.to(d) nets[ntype] = n # n.compute_max_batch_size(batch_size=1024) # print(n.max_batch_sizes) if n.y_is_coded: pass if ntype != 'vae':
for s in archs: archs[s] = { n['model']['arch'] for n in models_to_be_kept if n['model']['set'] == s } for m_ in models_to_be_kept: m = m_['model'] epoch = m_['epoch'] plan = m_['plan'] if plan['recorders'] or plan['compute']: print('Computing rates of job {} of type {} at epoch {}'.format( m['job'], m['type'], epoch)) logging.debug('Plan for {}; {}'.format(m['job'], plan)) model = CVNet.load(m['dir'], load_state=plan['compute']) if plan['compute']: device = args.device or 'cuda' else: device = args.device or 'cpu' logging.debug('Will work on {}'.format(device)) model.to(device) with torch.no_grad(): print('OOD') model.ood_detection_rates( epoch=epoch, from_where=where, sample_dirs=[ os.path.join(m['dir'], 'samples', '{:4d}'.format(epoch))
if done_epochs == 0: verb = 'will start from scratch.' elif done_epochs < args.epochs: verb = f'will resume from {done_epochs}.' else: verb = 'is already done.' log.info(f'Training {verb}') except (NoModelError): log.error(f'model #{job_TBR_num} not found!') sys.exit(1) else: try: resumed_from = resume log.info('Loading network in %s', resume) jvae = CVNet.load(args.resume, load_state=True) log.debug('Network loaded in {}'.format(resumed_from)) done_epochs = jvae.trained if done_epochs == 0: verb = 'will start from scratch.' elif done_epochs < args.epochs: verb = f'will resume from {done_epochs}.' else: verb = 'is already done.' log.info(f'Training {verb}') except (FileNotFoundError, NameError): log.error(f'network not found in {resume}') sys.exit(1) else: