Example #1
0
def is_derailed(model, load_model_for_check=False):
    from cvae import ClassificationVariationalNetwork

    if isinstance(model, dict):
        directory = model['dir']

    elif isinstance(model, str):
        directory = model

    else:
        directory = model.saved_dir

    if os.path.exists(os.path.join(directory, 'derailed')):
        return True
    
    elif load_model_for_check:
        try:
            model = ClassificationVariationalNetwork.load(directory)
            if torch.cuda.is_available():
                model.to('cuda')
            x = torch.zeros(1, *model.input_shape, device=model.device)
            model.evaluate(x)
        except ValueError:
            return True

    return False            
Example #2
0
def collect_models(directory,
                     wanted_epoch='last',
                     load_state=True, tpr_for_max=0.95, **default_load_paramaters):

    from cvae import ClassificationVariationalNetwork

    if 'dump' in directory:
        return

    assert wanted_epoch == 'last' or not load_state
    
    try:
        logging.debug(f'Loading net in: {directory}')
        model = ClassificationVariationalNetwork.load(directory,
                                                      load_state=load_state,
                                                      **default_load_paramaters)

        return make_dict_from_model(model, directory, tpr=tpr_for_max, wanted_epoch=wanted_epoch) 

    except (FileNotFoundError, PermissionError, NoModelError) as e:    
        pass

    except RuntimeError as e:
        logging.warning(f'Load error in {directory} see log file')
        logging.debug(f'Load error: {e}')
Example #3
0
def needed_remote_files(*mdirs, epoch='last', which_rec='all', state=False):
    r""" list missing recorders to be fetched on a remote

    -- mdirs: list of directories

    -- epoch: last or min-loss or int

    -- which_rec: either 'none' 'ind' or 'all'

    -- state: wehter to include state.pth

    returns generator of needed files paths

    """

    assert not state or epoch == 'last'
    
    from cvae import ClassificationVariationalNetwork as M

    for d in mdirs:

        m = M.load(d, load_net=False)
        epoch_ = epoch
        if epoch_ == 'min-loss':
            epoch_ = m.training_parameters.get('early-min-loss', 'last')
        if epoch_ == 'last':
            epoch_ = max(m.testing)
            
        if isinstance(epoch_, int):
            epoch_ = '{:04d}'.format(epoch_)

        testset = m.training_parameters['set']

        sets = []

        recs_to_exclude = which_rec.split('-')[1:]
        which_rec_ = which_rec.split('-')[0]

        if which_rec_ in ('all', 'ind'):
            sets.append(testset)
            if which_rec_ == 'all':
                sets += get_same_size_by_name(testset)
                for _ in [_ for _ in recs_to_exclude if _ in  sets]:
                    sets.remove(_)
                    
        for s in sets:
            sdir = os.path.join(d, 'samples', epoch_, 'record-{}.pth'.format(s))
            if not os.path.exists(sdir):
                yield d, sdir

        if state:
            sdir = os.path.join(d, 'state.pth')
            if not os.path.exists(sdir):
                yield d, sdir
Example #4
0
def gather_registered_models(mdict, filter, tpr_for_max=0.95, wanted_epoch='last', **kw):

    from cvae import ClassificationVariationalNetwork

    mlist = []
    for _ in mdict:
        if filter is None or filter.filter(mdict[_]):
            m = ClassificationVariationalNetwork.load(_, **kw)
            mlist.append(make_dict_from_model(m, _, tpr=tpr_for_max, wanted_epoch=wanted_epoch))

    return mlist
Example #5
0
    def load(cls, dir_name, *a, **kw):

        architecture = save_load.load_json(dir_name, 'params.json')
        models = [architecture[str(_)] for _ in range(len(architecture))]

        m = cls(*[M.load(_, *a, **kw) for _ in models])

        try:
            m.testing = save_load.load_json(dir_name,
                                            'test.json',
                                            presumed_type=int)
        except (FileNotFoundError):
            pass

        try:
            m.ood_results = save_load.load_json(dir_name,
                                                'ood.json',
                                                presumed_type=int)
        except (FileNotFoundError):
            pass

        m.saved_dir = dir_name

        return m
Example #6
0
    logging.info('{} model{} over {}'.format(len(mdirs),
                                             's' if len(mdirs) > 1 else '',
                                             total_models))

    if removed:
        logging.error('Exiting, load files')
        logging.error(
            'E.g: %s',
            '$ rsync -avP --files-from=/tmp/files remote:dir/joint-vae .')
        logging.error(' Or: %s', '$ . /tmp/rsync-files remote:dir/joint-vae')
        with open('/tmp/rsync-files', 'w') as f:
            f.write('#!/bin/bash\n')
            f.write('rsync -avP --files-from=/tmp/files $1 .\n')
        sys.exit(1)

    models = [M.load(d, load_state=True) for d in mdirs]
    model = IteratedModels(*models)

    device = args.device

    model.to(device)

    logging.debug('Model sent to {} (device wanted: {})'.format(
        next(iter(model.parameters())), device))

    testset = model.training_parameters['set']
    allsets = [testset]
    allsets.extend(get_same_size_by_name(testset))

    transformer = model.training_parameters['transformer']
Example #7
0
    n_by_rep = dict(hsv=0, rgb=0)

    as_in = {'ind': {}, 'correct': {}}
    pr = {'ind': {}, 'correct': {}}

    y_classif = {}
    classif_acc = {}
    agreement = {}
    distribution = {}

    y_true = None

    for mdir in mdirs:

        model = M.load(mdir, load_net=False)
        rep = model.architecture['representation']
        name = rep.upper() + str(n_by_rep[rep])
        name = str(model.job_number)

        n_by_rep[rep] += 1
        current_testset = model.training_parameters['set']
        if testset and current_testset != testset:
            continue
        else:
            testset = current_testset

        if n_by_rep[rep] > max_per_rep:
            continue

        if args.when == 'min-loss':
Example #8
0
from cvae import ClassificationVariationalNetwork as M
from utils.save_load import available_results, make_dict_from_model, LossRecorder, find_by_job_number, load_json
import logging

logging.getLogger().setLevel(logging.DEBUG)

mdir = '/tmp/000033'
mdir = '/tmp/000186'
mdir = '/tmp/151320'
mdir = '/tmp/151024'
mdir = '/tmp/151409'
mdir = '/tmp/151020'
mdir = '/tmp/148722'

print('Loading')
model = M.load(mdir, load_state=False)

new = M((3, 32, 32), 10, type_of_net='vib')

print('Loaded')
"""
acc = {}
acc = {_: model.accuracy(wygiwyu=True, wanted_epoch=_) for _ in (0, 10, 200, 'last')}

print(acc)
"""
# model.trained = 2000

# model.testing[2000].pop('iws')
# model.ood_results.pop(2000)
Example #9
0
args.add_argument('--soft', choices=['kl', 'iws'], default='default')
args.add_argument('--hard', choices=['kl', 'iws'])
args.add_argument('--entropy', '-H', action='store_true')
args.add_argument('--elbo', action='store_true')
args.add_argument('--baseline', action='store_true')
args.add_argument('--2s', action='store_true', dest='two_sided')
args.add_argument('--print', action='store_true')

a = args.parse_args()
j = a.j
ood_tpr = a.ood_tpr / 100
mis_tpr = a.mis_tpr / 100

reload = False
if a.direct_load:
    net = Model.load(a.direct_load, load_state=False)
    print(net.job_number, 'loaded')
try:
    reload = net.job_number != j and not a.direct_load
except NameError:
    reload = True

if reload:
    net = find_by_job_number(j, load_state=False)['net']

dir_path = os.path.join(net.saved_dir, 'samples', 'last')
testset = net.training_parameters['set']

if net.type == 'vib':
    pass  # a.plot = False
Example #10
0
    for job_number in job_numbers:
        jobs[job_number]

except (NameError, KeyError):
    print('Loading jobs')
    reload = True
    recompute = True

if reload:
    jobs = find_by_job_number(search_dir, *job_numbers, load_state=False)
    #, json_file='networks-lss.json')

    to_be_removed = []
    for job_number in jobs:
        try:
            jobs[job_number]['net'] = ClassificationVariationalNetwork.load(
                jobs[job_number]['dir'])
        except RuntimeError:
            print(f'Error loading {job_number}')
            to_be_removed.append(job_number)

    for job_number in to_be_removed:
        jobs.pop(job_number)

fgrid = {}
fexamples = {}
food = {}
foodexamples = {}
fmuvar = {}
fhist = {}
fx_ = {}
Example #11
0
from matplotlib import pyplot as plt

logging.getLogger().setLevel(logging.WARNING)

compute = False
compute = True

if compute:
    j = 107495
    j = 108183
    j = 37

    load_dir = find_by_job_number('./jobs', j, load_net=False)[j]['dir']

    print('Load net', end='')
    net = ClassificationVariationalNetwork.load(load_dir, load_state=True)
    print(' to gpu')
    net.to('cuda')
    net.latent_sampling = 16

    print('Getting sets')
    trainset_name = net.training['set']
    trainset, testset = dl.get_dataset(trainset_name,
                                       transformer=net.training['transformer'])
    oodsets = [dl.get_dataset(n)[1] for n in testset.same_size]
    oodset = oodsets[0]

    batch_size = 200
    n_batch = 1000 // batch_size

    loader = torch.utils.data.DataLoader(testset,
Example #12
0
from itertools import product

C = 2
D = (1, 28, 28)
K = 9
L = int(1e4)

N = (4,)


x = torch.randn(*N, *D)
y = torch.randint(0, C, N)


type_ = 'vae'
beta = 1e-2
net = Net(D, C, latent_dim=K, latent_sampling=L, beta=beta, type_of_net=type_)

_x_, logit_, mu_z, lv_z, z_ = net.forward(x, y)
print(logit_.shape)

y_ = F.softmax(logit_, -1)

loss_ = net.loss(x, y, _x_, y_, mu_z, lv_z, return_all_losses=True)

_x, logit, loss = net.evaluate(x, return_all_losses=True)
print(logit.shape)

if type_ != 'vae':
    y_pred = net.predict_after_evaluate(logit, loss)
Example #13
0
        oodsets = test_dataset.same_size

        for o in oodsets:
            _, ood_dataset = tl.get_dataset(o, transformer=transformer)
            x[o], y[o] = tl.get_batch(ood_dataset,
                                      device=device,
                                      batch_size=max(z_sample, N))

        if not L:
            L = args.total_width // (1 + len(x))

        for n in list_of_nets:

            logging.info('loading state of %s', n['job'])

            model = Net.load(n['dir'])
            model.to(device)
            logging.info('done')
            logging.info('Compute max batch size')

            batch_size = min(
                m, model.compute_max_batch_size(batch_size=m, which='test'))
            logging.info(f'done ({batch_size})')

            for s in x:

                logging.info('sampling %s', s)

                if N:
                    list_of_images = sample(model,
                                            x[s][:N],
Example #14
0
y_coded = True
y_coded = False

if y_coded:
    types = ('jvae', 'xvae')

for ntype in types:

    print('TYPE:', ntype)
    n = Net(D, C,
            type_of_net=ntype,
            y_is_coded=y_coded and ntype not in ('vib', 'vae'),
            batch_norm='encoder',
            features='vgg16',
            encoder_layer_sizes=[],
            decoder_layer_sizes=[],
            classifier_layer_sizes=cls_cvae if ntype == 'cvae' else [20, 10],
            sigma=0,
            gamma=gamma,
            force_cross_y=0,
            latent_sampling=L,
            latent_dim=K)
    n.to(d)
    nets[ntype] = n
    # n.compute_max_batch_size(batch_size=1024)
    # print(n.max_batch_sizes)
    
    if n.y_is_coded:
        pass

    if ntype != 'vae':
Example #15
0
    for s in archs:
        archs[s] = {
            n['model']['arch']
            for n in models_to_be_kept if n['model']['set'] == s
        }

    for m_ in models_to_be_kept:
        m = m_['model']
        epoch = m_['epoch']
        plan = m_['plan']

        if plan['recorders'] or plan['compute']:
            print('Computing rates of job {} of type {} at epoch {}'.format(
                m['job'], m['type'], epoch))
            logging.debug('Plan for {}; {}'.format(m['job'], plan))
            model = CVNet.load(m['dir'], load_state=plan['compute'])
            if plan['compute']:
                device = args.device or 'cuda'
            else:
                device = args.device or 'cpu'

            logging.debug('Will work on {}'.format(device))
            model.to(device)
            with torch.no_grad():
                print('OOD')
                model.ood_detection_rates(
                    epoch=epoch,
                    from_where=where,
                    sample_dirs=[
                        os.path.join(m['dir'], 'samples',
                                     '{:4d}'.format(epoch))
Example #16
0
                if done_epochs == 0:
                    verb = 'will start from scratch.'
                elif done_epochs < args.epochs:
                    verb = f'will resume from {done_epochs}.'
                else:
                    verb = 'is already done.'
                log.info(f'Training {verb}')
            except (NoModelError):
                log.error(f'model #{job_TBR_num} not found!')
                sys.exit(1)

        else:
            try:
                resumed_from = resume
                log.info('Loading network in %s', resume)
                jvae = CVNet.load(args.resume, load_state=True)
                log.debug('Network loaded in {}'.format(resumed_from))
                done_epochs = jvae.trained
                if done_epochs == 0:
                    verb = 'will start from scratch.'
                elif done_epochs < args.epochs:
                    verb = f'will resume from {done_epochs}.'
                else:
                    verb = 'is already done.'
                log.info(f'Training {verb}')

            except (FileNotFoundError, NameError):
                log.error(f'network not found in {resume}')
                sys.exit(1)

    else: