def compress_experiment_run_dir(flags) -> None: """ Move zipped experiment_dir_run in TMPDIR to experiment_dir. """ dir_experiment = Path( json2dict( get_config_path(flags=flags))['dir_experiment']).expanduser() dir_experiment.mkdir(exist_ok=True) # zip dir_experiment_run log.info( f'zipping {flags.dir_experiment_run} ' f'to {(Path(dir_experiment) / flags.experiment_uid).with_suffix(".zip")}.' ) dir_experiment_zipped = (dir_experiment / flags.experiment_uid) shutil.make_archive(dir_experiment_zipped, 'zip', flags.dir_experiment_run, verbose=True) assert dir_experiment_zipped.with_suffix('.zip').exists(), f'{dir_experiment_zipped} does not exist. ' \ f'Zipping of dir_experiment_run failed.' # delete not compressed experiment dir shutil.rmtree(str(flags.dir_experiment_run))
def df_maker_nbr_mods_comp(exp_uids: dict, method: str, data_dir: Path): df = pd.DataFrame() df['nbr_mods'] = list(exp_uids[method]) for nbr_mods in exp_uids[method]: for id in exp_uids[method][nbr_mods]: epoch_results_dir = data_dir / 'experiments' / 'polymnist' / method / id / 'epoch_results' if epoch_results_dir.exists(): epoch = config['max_epoch']['polymnist'] - 1 res_dict = json2dict(epoch_results_dir / f'{epoch}.json') lr_eval_score = np.mean([ v['accuracy'] for _, v in res_dict['test_results']['lr_eval_q0'].items() ]) coherence_score = np.mean([ v for _, v in flatten_dict(res_dict['test_results'] ['gen_eval']).items() ]) df.loc[df['nbr_mods'] == nbr_mods, f'lr_eval_score_{id}'] = lr_eval_score df.loc[df['nbr_mods'] == nbr_mods, f'coherence_score_{id}'] = coherence_score return df
def upload_one(exp_path: Path): """ Upload one experiment result to database together with the model checkpoints, the logfile and tensorboardlogs, then delete zipped experiment dir. """ is_zip = exp_path.suffix == '.zip' with tempfile.TemporaryDirectory() as tmpdirname: tmpdir = Path(tmpdirname) / exp_path.stem tmpdir.mkdir() if is_zip: # unpack zip into tmpdir log.info(f'Unpacking {exp_path} to {tmpdir}.') with zipfile.ZipFile(exp_path) as z: z.extractall(tmpdir) exp_dir = Path(tmpdir) else: exp_dir = exp_path flags = torch.load(exp_dir / 'flags.rar') db = MongoDatabase(training=True, flags=flags) results = {'epoch_results': {}} epochs = sorted(int(str(epoch.stem)) for epoch in (exp_dir / 'epoch_results').iterdir()) for epoch in epochs: epoch_str = str(epoch) epoch_results = (exp_dir / 'epoch_results' / epoch_str).with_suffix('.json') results['epoch_results'][epoch_str] = json2dict(epoch_results) db.insert_dict(results) modalities = [mod_str for mod_str in results['epoch_results'][str(epoch)]['train_results']['log_probs'] if len(mod_str.split('_')) == 1] dir_checkpoints = exp_dir / 'checkpoints' db.save_networks_to_db( dir_checkpoints=dir_checkpoints, epoch=max(int(str(d.name)) for d in dir_checkpoints.iterdir()), modalities=modalities, ) db.upload_tensorbardlogs(exp_dir / 'logs') pdf_path = run_notebook_convert(exp_dir) expvis_url = ppb.upload(pdf_path, plain=True) db.insert_dict({'expvis_url': expvis_url}) log_file = glob.glob(str(exp_dir) + '/*.log') if len(log_file): db.upload_logfile(Path(log_file[0])) send_msg(f'Uploading of experiment {flags.experiment_uid} has finished. The experiment visualisation can be ' f'found here: {expvis_url}' ) # delete exp_path if is_zip: exp_path.unlink() else: shutil.rmtree(exp_path)
def launch_mimic_training(): experiment_uids_path = Path( __file__).parent.parent / 'data/thesis/experiment_uids.json' dataset = 'mimic' if experiment_uids_path.exists(): exp_uids = json2dict(experiment_uids_path) else: experiment_uids_path.parent.mkdir(exist_ok=True, parents=True) exp_uids = {dataset: {}} for params in [mopoe_mimic_args, mofop_mimic_args, mopgfm_mimic_args]: method = params['method'] if method not in exp_uids[dataset]: exp_uids[dataset][method] = {} num_mods = 3 exp_uids[dataset][method][f'{num_mods}_mods'] = [] experiment_uid = get_experiment_uid('mimic', method=method) params["experiment_uid"] = experiment_uid exp_uids[dataset][method][f'{num_mods}_mods'].append(experiment_uid) launch_leomed_jobs(which_dataset='mimic', params=params) dict2json(experiment_uids_path, d=exp_uids)
def plot_comparisons(which: str, metric: str): title_mapping = { 'coherence_eval': 'Evaluation of the generation coherence', 'lat_eval': 'Evaluation of the separability of the latent representation', } config = json2dict(Path('prepare_thesis/conf.json')) if metric == 'coherence_eval': y_label = 'Generation Coherence Accuracy' elif metric == 'lat_eval': y_label = 'Linear Classification Accuracy' else: y_label = None if which == 'nbr_mods_comp': d = dict_elements_to_array(json2dict( Path('data/thesis/nbr_mods_comp.json')), exclude='nbr_mods') x_steps = [e.replace('_mods', '') for e in d['nbr_mods']] methods = [m for m in config['methods']] x_label = 'Number of modalities' elif which == 'epoch_comp': d = dict_elements_to_array(json2dict( Path('data/thesis/epoch_comp.json')), exclude='epochs') x_steps = d['epochs'] methods = config['methods'] x_label = 'Epochs' else: raise ValueError(f'{which} not implemented for plot_comparisons.') markers = _cycle(['o', 's', 'v', 'p', '*', 'h']) for method in methods: plt.plot(x_steps, d[method][metric], marker=next(markers)) plt.fill_between(x_steps, d[method][metric] - d[method][f'{metric}_std'], d[method][metric] + d[method][f'{metric}_std'], alpha=0.2, linewidth=1) plt.title(title_mapping[metric]) plt.xlabel(x_label) plt.ylabel(ylabel=y_label) plt.legend(methods, ncol=2)
def test_connectdb(): """Test connection to the mongodb database.""" dbconfig = json2dict( Path(__file__).parent.parent / 'configs/mmvae_db.json') client = MongoClient(dbconfig['mongodb_URI']) db = client.mmvae experiments = db.experiments print(experiments.find_one)
def update_flags_with_config(p, config_path: Path, additional_args: dict = None, testing=False): """ If testing is true, no cli arguments will be read. Parameters ---------- p : parser to be updated. config_path : path to the json config file. additional_args : optional additional arguments to be passed as dict. """ additional_args = additional_args or {} json_config = json2dict(config_path) t_args = argparse.Namespace() t_args.__dict__.update({**json_config, **additional_args}) if testing: return p.parse_args([], namespace=t_args) else: return p.parse_args(namespace=t_args)
def df_maker_epoch_comp(exp_uids: dict, method: str, data_dir: Path): df = pd.DataFrame() for _id in exp_uids[method]['3_mods']: exp_dir = data_dir / 'experiments' / 'polymnist' / method / _id epoch_results_dir = exp_dir / 'epoch_results' # get the epochs where the model was evaluated flags = load_flags(dir_path=exp_dir) # eval_epochs = [i - 1 for i in range(1, config['max_epoch']) if i % flags.eval_freq == 0] eval_epochs = [ i - 1 for i in range(1, config['max_epoch']['polymnist'] + 1) if i % 100 == 0 ] if epoch_results_dir.exists(): if 'epoch' not in df.columns: df['epoch'] = [ int(i.stem) for i in epoch_results_dir.iterdir() ] for epoch in eval_epochs: res_dict = json2dict(epoch_results_dir / f'{epoch}.json') if res_dict['test_results'] and 'lr_eval_q0' in res_dict[ 'test_results']: lr_eval_score = np.mean([ v['accuracy'] for _, v in res_dict['test_results'] ['lr_eval_q0'].items() ]) coherence_score = np.mean([ v for _, v in flatten_dict(res_dict['test_results'] ['gen_eval']).items() ]) df.loc[df['epoch'] == epoch, f'lr_eval_score_{_id}'] = lr_eval_score df.loc[df['epoch'] == epoch, f'coherence_score_{_id}'] = coherence_score return df.dropna()
def launch_polymnist_training(): experiment_uids_path = Path( __file__).parent.parent / 'data/thesis/experiment_uids.json' dataset = 'polymnist' if experiment_uids_path.exists(): exp_uids = json2dict(experiment_uids_path) else: experiment_uids_path.parent.mkdir(exist_ok=True, parents=True) exp_uids = {dataset: {}} for params in [ mopoe_args, poe_args, moe_args, mopgfm_args, mofop_args, iwmogfm2_args, mogfm_amortized_args ]: # for params in [mofop_args]: method = params['method'] if method not in exp_uids[dataset]: exp_uids[dataset][method] = {} for num_mods in range(1, 5): params['num_mods'] = num_mods # more evaluation steps are needed for 3 mods if num_mods == 3: params['eval_freq'] = 100 exp_uids[dataset][method][f'{num_mods}_mods'] = [] for _ in range(nbr_repeats): experiment_uid = get_experiment_uid('polymnist', method=method) params["experiment_uid"] = experiment_uid exp_uids[dataset][method][f'{num_mods}_mods'].append( experiment_uid) launch_leomed_jobs(which_dataset='polymnist', params=params) dict2json(experiment_uids_path, d=exp_uids)
def test_static_results_2mods(method: str): """ Test if the results are constant. If the assertion fails, it means that the model or the evaluation has changed, perhaps involuntarily. """ static_results = json2dict( Path('static_results.json'))['static_results_2mod'] with tempfile.TemporaryDirectory() as tmpdirname: mst = set_me_up(tmpdirname, method=method, attributes={ 'num_flows': 0, 'num_mods': 2, 'deterministic': True, 'device': 'cpu', 'steps_per_training_epoch': 1, 'factorized_representation': False }) trainer = PolymnistTrainer(mst) test_results = trainer.run_epochs() assert np.round(test_results.joint_div, 1) == np.round(static_results[method]['joint_div'], 1) assert np.round(test_results.klds['m0'], 1) == np.round(static_results[method]['klds'], 1) assert np.round(test_results.lhoods['m0']['m0'], 1) == np.round(static_results[method]['lhoods'], 1) assert np.round(test_results.log_probs['m0'], 0) == np.round(static_results[method]['log_probs'], 0) assert np.round(test_results.total_loss, 0) == np.round(static_results[method]['total_loss'], 0) assert np.round(test_results.lr_eval['m0']['accuracy'], 2) == np.round(static_results[method]['lr_eval'], 2) assert np.round(test_results.latents['m0']['latents_class']['mu'], 2) == np.round( static_results[method]['latents_class']['mu'], 2)
from pathlib import Path from mmvae_hub.leomed_utils.launch_jobs import launch_leomed_jobs from mmvae_hub.utils.setup.filehandling import get_experiment_uid from mmvae_hub.utils.utils import json2dict, dict2json conf = json2dict(Path(__file__).parent / 'conf.json') end_epoch = conf['max_epoch']['polymnist'] eval_freq = 500 nbr_repeats = 5 poe_args = { 'method': 'poe', "initial_learning_rate": 0.0005, 'class_dim': 512, "min_beta": 0, "max_beta": 2., "beta_warmup": 0, "num_mods": 3, "end_epoch": end_epoch, "eval_freq": eval_freq, } moe_args = { 'method': 'moe', "initial_learning_rate": 0.0005, 'class_dim': 1280, "min_beta": 0, "max_beta": 2.0, "beta_warmup": 0, "num_mods": 3,
if __name__ == "__main__": # parser = argparse.ArgumentParser() # parser.add_argument('--seed', type=int, default=42) # parser.add_argument('--num-modalities', type=int, default=5) # parser.add_argument('--savepath-train', type=str, required=True) # parser.add_argument('--savepath-test', type=str, required=True) # parser.add_argument('--backgroundimagepath', type=str, required=True) # args = parser.parse_args() # use vars to convert args into a dict # print("\nARGS:\n", args) from dataclasses import dataclass from pathlib import Path config = json2dict(get_config_path()) @dataclass class Args: savepath_train: Path = Path(config['dir_data']) / 'train' savepath_test: Path = Path(config['dir_data']) / 'test' backgroundimagepath: Path = Path(__file__).parent / 'polymnist_background_images' num_modalities: int = 5 args = Args() # create dataset PolymnistDataset.create_polymnist_dataset(args.savepath_train, args.backgroundimagepath, args.num_modalities, train=True) PolymnistDataset.create_polymnist_dataset(args.savepath_test, args.backgroundimagepath, args.num_modalities,
for l, line in enumerate(lines): width, height = font.getsize(line) draw.text((0, (h / 2) - (num_lines / 2 - l) * height), line, (0, 0, 0), font=font) y_text += height if imgsize[0] == 3: text_pil = transforms.ToTensor()(pil_img.resize( (imgsize[1], imgsize[2]), Image.ANTIALIAS)) else: text_pil = transforms.ToTensor()(pil_img.resize( (imgsize[1], imgsize[2]), Image.ANTIALIAS).convert('L')) return text_pil if __name__ == '__main__': config = json2dict(get_config_path(dataset='celeba')) img_clf_path = Path(config['dir_clf']).expanduser() / 'clf_celeba_text.pth' img_clf_path.parent.mkdir(exist_ok=True, parents=True) if not img_clf_path.exists(): print( f'text clf not found under {img_clf_path}. Parent folder contains: {list(img_clf_path.parent.iterdir())}' ) download_from_url( url= 'https://www.dropbox.com/sh/lx8669lyok9ois6/AACaBy1YNNq3ebh149k_EXrca/trained_classifiers/trained_clfs_celeba/clf_m2?dl=1', dest_path=img_clf_path, verbose=True)
def test_static_results_1mod(method: str, update_static_results=False): """ Test if the results are constant. If the assertion fails, it means that the model or the evaluation has changed, perhaps involuntarily. """ jsonfile = Path(__file__).parent / 'static_results.json' static_results = json2dict(jsonfile)['static_results_1mod'] if method not in static_results: write_to_jsonfile(jsonfile, [(f'static_results_1mod.{method}', {})]) static_results[method] = {} static_results = static_results[method] with tempfile.TemporaryDirectory() as tmpdirname: mst = set_me_up(tmpdirname, dataset='polymnist', method=method, attributes={ 'num_flows': 0, 'num_mods': 1, 'deterministic': True, 'device': 'cpu', 'steps_per_training_epoch': 1, 'factorized_representation': False, 'calc_nll': False }) trainer = PolymnistTrainer(mst) test_results = trainer.run_epochs() if update_static_results: static_results['joint_div'] = test_results.joint_div static_results['klds'] = test_results.klds['m0'] # static_results['lhoods'] = test_results.lhoods['m0']['m0'] static_results['log_probs'] = test_results.log_probs['m0'] static_results['total_loss'] = test_results.total_loss # static_results['lr_eval'] = test_results.lr_eval['m0']['accuracy'] static_results['latents_class'] = { 'mu': test_results.latents['m0']['latents_class']['mu'] } write_to_jsonfile( jsonfile, [(f'static_results_1mod.{method}', static_results)]) are_they_equal = { 'joint_div': np.round(test_results.joint_div, 5) == np.round(static_results['joint_div'], 5), 'klds': np.round(test_results.klds['m0'], 5) == np.round(static_results['klds'], 5), # 'lhoods': np.round(test_results.lhoods['m0']['m0'], 3) == np.round(static_results['lhoods'], 3), 'log_probs': test_results.log_probs['m0'] == static_results['log_probs'], 'total_loss': test_results.total_loss == static_results['total_loss'], # 'lr_eval': test_results.lr_eval['m0']['accuracy'] == static_results['lr_eval'], 'latents_class_mu': np.round(test_results.latents['m0']['latents_class']['mu'], 8) == np.round(static_results['latents_class']['mu'], 8) } assert all( v for _, v in are_they_equal.items()), f'Some results changed: {are_they_equal}'
def load_flags(dir_path: Path): if (dir_path / 'flags.rar').exists(): return torch.load(dir_path / 'flags.rar') elif (dir_path / 'flags.json').exists(): return dict2pyobject(json2dict(dir_path / 'flags.json'), 'flags')
"""Save the dicts containing data for the epoch comparison and nbr_mods comparison.""" from pathlib import Path import numpy as np import pandas as pd import torch from mmvae_hub.utils.utils import json2dict, dict2json from modun.dict_utils import flatten_dict, dict2pyobject config = json2dict(Path('conf.json')) data_dir = Path(__file__).parent.parent / 'data/thesis' experiment_uids_path = data_dir / ('experiment_uids.json') exp_uids = json2dict(experiment_uids_path)['polymnist'] methods = config['methods'] def load_flags(dir_path: Path): if (dir_path / 'flags.rar').exists(): return torch.load(dir_path / 'flags.rar') elif (dir_path / 'flags.json').exists(): return dict2pyobject(json2dict(dir_path / 'flags.json'), 'flags') def df_maker_epoch_comp(exp_uids: dict, method: str, data_dir: Path): df = pd.DataFrame() for _id in exp_uids[method]['3_mods']: exp_dir = data_dir / 'experiments' / 'polymnist' / method / _id epoch_results_dir = exp_dir / 'epoch_results'
from utils import * from mmvae_hub.utils.utils import json2dict config = json2dict(Path(__file__).parent / 'config.json') def get_best_mopoe_plot(df): method = 'mopoe' if config[method] == 'best': df = df.loc[(df['method'].str.startswith(f'joint_elbo')) & (df['num_mods'] == 3)] _id = df[df.score == df.score.max()]._id.item() else: _id = config[method] save_path = Path(__file__).parent.parent / f'data/{method}/cond_gen_plots' save_path.mkdir(parents=True, exist_ok=True) save_plots(_id, save_path, method) def get_best_pgfm_plot(df): method = 'pgfm' if config[method] == 'best': df = df.loc[(df['method'].str.startswith(f'joint_elbo')) & (df['num_mods'] == 3)] _id = df[df.score == df.score.max()]._id.item() else:
def get_lr_score(method: str): epoch_comp_dict = json2dict(Path('data/thesis/epoch_comp.json')) return float_to_tex(epoch_comp_dict[method]['lat_eval'][-1])
"""Get the experiment dirs from leomed""" import os from pathlib import Path from mmvae_hub.experiment_vis.utils import get_exp_dir from mmvae_hub.utils.MongoDB import MongoDatabase from mmvae_hub.utils.utils import json2dict from modun.zip_utils import unzip_to data_dir = Path(__file__).parent.parent / 'data/thesis' data_dir.mkdir(exist_ok=True, parents=True) conf = json2dict(Path('conf.json')) if conf['use_db']: exp_db = MongoDatabase(training=False) experiments = exp_db.connect() db_uids = {exp['_id'] for exp in experiments.find({})} else: db_uids = [] # get experiment_uids from leomed leomed_path = Path(conf['data_dir_leomed']) / 'experiment_uids.json' experiment_uids_path = data_dir / ('experiment_uids.json') # remove existing experiment_uids # if experiment_uids_path.exists(): # os.remove(experiment_uids_path) rsync_command = f'rsync -avP ethsec:{leomed_path} {experiment_uids_path}'
from pathlib import Path from matplotlib import pyplot as plt from mmvae_hub.experiment_vis.utils import save_cond_gen, show_generated_figs from mmvae_hub.utils.utils import json2dict data_dir = Path(__file__).parent.parent / 'data/thesis' d = json2dict(data_dir / 'experiment_uids.json') config = json2dict(Path('conf.json')) def save_plots(_id, experiment_dir: Path, save_path, method: str): cond_gen_plots = show_generated_figs(_id=_id, experiment_dir=experiment_dir, return_plots=True, nbr_samples_x=10, nbr_samples_y=1) for p_key, ps in cond_gen_plots.items(): for name, fig in ps.items(): plt.figure(figsize=(10, 10)) plt.imshow(fig) title = fr'\textbf{{{method}}}: ' + name.replace( '__', r'$\rightarrow$') plt.title(title) plt.axis('off') plt.savefig(save_path / name, bbox_inches='tight', pad_inches=0) plt.close() if __name__ == '__main__':
def load_old_flags(self, flags_path: Path = None, _id: str = None, is_dict: bool = False, add_args: dict = None): """ Load flags from old experiments, either from a directory or from the db. Add parameters for backwards compatibility and adapt paths for current system. If flags_path is None, flags will be loaded from the db using the _id. """ add_args = add_args | { 'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'), 'prior': 'normal', 'qz_x': 'normal' } if is_dict or flags_path is None: if flags_path is None: # get flags from db db = MongoDatabase(_id=_id) flags = db.get_experiment_dict()['flags'] else: # load flags from jsonfile flags = json2dict(flags_path) flags = self.set_paths_with_config(json2dict(self.config_path), flags, True) # get defaults from newer parameters that might not be defined in old flags flags = self.get_defaults(flags, is_dict=True) if add_args is not None: for k, v in add_args.items(): flags[k] = v if 'min_beta' not in flags: flags['min_beta'] = flags['beta'] flags['max_beta'] = flags['beta'] if 'num_gfm_flows' not in flags: flags['num_gfm_flows'] = flags['num_flows'] # becomes immutable.. flags = dict2pyobject(flags, 'flags') else: # load flags from .rar file flags = torch.load(flags_path) flags = self.set_paths_with_config(json2dict(self.config_path), flags, False) # get defaults from newer parameters that might not be defined in old flags flags = self.get_defaults(flags, is_dict=False) if add_args is not None: for k, v in add_args.items(): setattr(flags, k, v) if not hasattr(flags, 'min_beta'): setattr(flags, 'min_beta', flags.beta) setattr(flags, 'max_beta', flags.beta) if not hasattr(flags, 'num_gfm_flows'): setattr(flags, 'num_gfm_flows', flags.num_flows) return flags
def get_mongodb_uri(): dbconfig = json2dict(Path('~/.config/mmvaedb.json').expanduser()) return dbconfig['mongodb_URI']